aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2014-05-13 20:30:47 -0400
committerChris Mason <clm@fb.com>2014-06-09 20:20:48 -0400
commitfcebe4562dec83b3f8d3088d77584727b09130b2 (patch)
tree80cf5cf51b8ccbada232486acf57c4bb1cbcf3b4
parent5dca6eea91653e9949ce6eb9e9acab6277e2f2c4 (diff)
Btrfs: rework qgroup accounting
Currently qgroups account for space by intercepting delayed ref updates to fs trees. It does this by adding sequence numbers to delayed ref updates so that it can figure out how the tree looked before the update so we can adjust the counters properly. The problem with this is that it does not allow delayed refs to be merged, so if you say are defragging an extent with 5k snapshots pointing to it we will thrash the delayed ref lock because we need to go back and manually merge these things together. Instead we want to process quota changes when we know they are going to happen, like when we first allocate an extent, we free a reference for an extent, we add new references etc. This patch accomplishes this by only adding qgroup operations for real ref changes. We only modify the sequence number when we need to lookup roots for bytenrs, this reduces the amount of churn on the sequence number and allows us to merge delayed refs as we add them most of the time. This patch encompasses a bunch of architectural changes 1) qgroup ref operations: instead of tracking qgroup operations through the delayed refs we simply add new ref operations whenever we notice that we need to when we've modified the refs themselves. 2) tree mod seq: we no longer have this separation of major/minor counters. this makes the sequence number stuff much more sane and we can remove some locking that was needed to protect the counter. 3) delayed ref seq: we now read the tree mod seq number and use that as our sequence. This means each new delayed ref doesn't have it's own unique sequence number, rather whenever we go to lookup backrefs we inc the sequence number so we can make sure to keep any new operations from screwing up our world view at that given point. This allows us to merge delayed refs during runtime. With all of these changes the delayed ref stuff is a little saner and the qgroup accounting stuff no longer goes negative in some cases like it was before. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/ctree.c45
-rw-r--r--fs/btrfs/ctree.h59
-rw-r--r--fs/btrfs/delayed-ref.c39
-rw-r--r--fs/btrfs/delayed-ref.h24
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent-tree.c206
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/ioctl.c63
-rw-r--r--fs/btrfs/qgroup.c915
-rw-r--r--fs/btrfs/qgroup.h107
-rw-r--r--fs/btrfs/transaction.c52
12 files changed, 1044 insertions, 479 deletions
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index a910b27a8ad9..94e94429f3e9 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
55int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); 55int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
56 56
57int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 57int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
58 struct btrfs_fs_info *fs_info, u64 bytenr, 58 struct btrfs_fs_info *fs_info, u64 bytenr,
59 u64 time_seq, struct ulist **roots); 59 u64 time_seq, struct ulist **roots);
60char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 60char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
61 u32 name_len, unsigned long name_off, 61 u32 name_len, unsigned long name_off,
62 struct extent_buffer *eb_in, u64 parent, 62 struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2f10e12ae94c..bbbe4f1c5086 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -356,44 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
356} 356}
357 357
358/* 358/*
359 * Increment the upper half of tree_mod_seq, set lower half zero. 359 * Pull a new tree mod seq number for our operation.
360 *
361 * Must be called with fs_info->tree_mod_seq_lock held.
362 */
363static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
364{
365 u64 seq = atomic64_read(&fs_info->tree_mod_seq);
366 seq &= 0xffffffff00000000ull;
367 seq += 1ull << 32;
368 atomic64_set(&fs_info->tree_mod_seq, seq);
369 return seq;
370}
371
372/*
373 * Increment the lower half of tree_mod_seq.
374 *
375 * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
376 * are generated should not technically require a spin lock here. (Rationale:
377 * incrementing the minor while incrementing the major seq number is between its
378 * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
379 * just returns a unique sequence number as usual.) We have decided to leave
380 * that requirement in here and rethink it once we notice it really imposes a
381 * problem on some workload.
382 */ 360 */
383static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) 361static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
384{ 362{
385 return atomic64_inc_return(&fs_info->tree_mod_seq); 363 return atomic64_inc_return(&fs_info->tree_mod_seq);
386} 364}
387 365
388/* 366/*
389 * return the last minor in the previous major tree_mod_seq number
390 */
391u64 btrfs_tree_mod_seq_prev(u64 seq)
392{
393 return (seq & 0xffffffff00000000ull) - 1ull;
394}
395
396/*
397 * This adds a new blocker to the tree mod log's blocker list if the @elem 367 * This adds a new blocker to the tree mod log's blocker list if the @elem
398 * passed does not already have a sequence number set. So when a caller expects 368 * passed does not already have a sequence number set. So when a caller expects
399 * to record tree modifications, it should ensure to set elem->seq to zero 369 * to record tree modifications, it should ensure to set elem->seq to zero
@@ -404,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
404u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 374u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
405 struct seq_list *elem) 375 struct seq_list *elem)
406{ 376{
407 u64 seq;
408
409 tree_mod_log_write_lock(fs_info); 377 tree_mod_log_write_lock(fs_info);
410 spin_lock(&fs_info->tree_mod_seq_lock); 378 spin_lock(&fs_info->tree_mod_seq_lock);
411 if (!elem->seq) { 379 if (!elem->seq) {
412 elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); 380 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
413 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); 381 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
414 } 382 }
415 seq = btrfs_inc_tree_mod_seq_minor(fs_info);
416 spin_unlock(&fs_info->tree_mod_seq_lock); 383 spin_unlock(&fs_info->tree_mod_seq_lock);
417 tree_mod_log_write_unlock(fs_info); 384 tree_mod_log_write_unlock(fs_info);
418 385
419 return seq; 386 return elem->seq;
420} 387}
421 388
422void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 389void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -489,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
489 456
490 BUG_ON(!tm); 457 BUG_ON(!tm);
491 458
492 spin_lock(&fs_info->tree_mod_seq_lock); 459 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
493 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
494 spin_unlock(&fs_info->tree_mod_seq_lock);
495 460
496 tm_root = &fs_info->tree_mod_log; 461 tm_root = &fs_info->tree_mod_log;
497 new = &tm_root->rb_node; 462 new = &tm_root->rb_node;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index efd3bf61696d..06cc384933cc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1648,7 +1648,10 @@ struct btrfs_fs_info {
1648 1648
1649 /* holds configuration and tracking. Protected by qgroup_lock */ 1649 /* holds configuration and tracking. Protected by qgroup_lock */
1650 struct rb_root qgroup_tree; 1650 struct rb_root qgroup_tree;
1651 struct rb_root qgroup_op_tree;
1651 spinlock_t qgroup_lock; 1652 spinlock_t qgroup_lock;
1653 spinlock_t qgroup_op_lock;
1654 atomic_t qgroup_op_seq;
1652 1655
1653 /* 1656 /*
1654 * used to avoid frequently calling ulist_alloc()/ulist_free() 1657 * used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -3300,9 +3303,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3300 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3303 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3301 struct btrfs_key *ins, int is_data); 3304 struct btrfs_key *ins, int is_data);
3302int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3305int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3303 struct extent_buffer *buf, int full_backref, int for_cow); 3306 struct extent_buffer *buf, int full_backref, int no_quota);
3304int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3307int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3305 struct extent_buffer *buf, int full_backref, int for_cow); 3308 struct extent_buffer *buf, int full_backref, int no_quota);
3306int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3309int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3307 struct btrfs_root *root, 3310 struct btrfs_root *root,
3308 u64 bytenr, u64 num_bytes, u64 flags, 3311 u64 bytenr, u64 num_bytes, u64 flags,
@@ -3310,7 +3313,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3310int btrfs_free_extent(struct btrfs_trans_handle *trans, 3313int btrfs_free_extent(struct btrfs_trans_handle *trans,
3311 struct btrfs_root *root, 3314 struct btrfs_root *root,
3312 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3315 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
3313 u64 owner, u64 offset, int for_cow); 3316 u64 owner, u64 offset, int no_quota);
3314 3317
3315int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 3318int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
3316int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 3319int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
@@ -3322,7 +3325,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3322int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 3325int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3323 struct btrfs_root *root, 3326 struct btrfs_root *root,
3324 u64 bytenr, u64 num_bytes, u64 parent, 3327 u64 bytenr, u64 num_bytes, u64 parent,
3325 u64 root_objectid, u64 owner, u64 offset, int for_cow); 3328 u64 root_objectid, u64 owner, u64 offset, int no_quota);
3326 3329
3327int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3330int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3328 struct btrfs_root *root); 3331 struct btrfs_root *root);
@@ -3410,7 +3413,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
3410int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, 3413int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
3411 struct btrfs_fs_info *fs_info); 3414 struct btrfs_fs_info *fs_info);
3412int __get_raid_index(u64 flags); 3415int __get_raid_index(u64 flags);
3413
3414int btrfs_start_nocow_write(struct btrfs_root *root); 3416int btrfs_start_nocow_write(struct btrfs_root *root);
3415void btrfs_end_nocow_write(struct btrfs_root *root); 3417void btrfs_end_nocow_write(struct btrfs_root *root);
3416/* ctree.c */ 3418/* ctree.c */
@@ -3586,7 +3588,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
3586 struct seq_list *elem); 3588 struct seq_list *elem);
3587void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 3589void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3588 struct seq_list *elem); 3590 struct seq_list *elem);
3589u64 btrfs_tree_mod_seq_prev(u64 seq);
3590int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); 3591int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
3591 3592
3592/* root-item.c */ 3593/* root-item.c */
@@ -4094,52 +4095,6 @@ void btrfs_reada_detach(void *handle);
4094int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 4095int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
4095 u64 start, int err); 4096 u64 start, int err);
4096 4097
4097/* qgroup.c */
4098struct qgroup_update {
4099 struct list_head list;
4100 struct btrfs_delayed_ref_node *node;
4101 struct btrfs_delayed_extent_op *extent_op;
4102};
4103
4104int btrfs_quota_enable(struct btrfs_trans_handle *trans,
4105 struct btrfs_fs_info *fs_info);
4106int btrfs_quota_disable(struct btrfs_trans_handle *trans,
4107 struct btrfs_fs_info *fs_info);
4108int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
4109void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
4110int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
4111int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
4112 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
4113int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
4114 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
4115int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
4116 struct btrfs_fs_info *fs_info, u64 qgroupid,
4117 char *name);
4118int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
4119 struct btrfs_fs_info *fs_info, u64 qgroupid);
4120int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
4121 struct btrfs_fs_info *fs_info, u64 qgroupid,
4122 struct btrfs_qgroup_limit *limit);
4123int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
4124void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
4125struct btrfs_delayed_extent_op;
4126int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
4127 struct btrfs_delayed_ref_node *node,
4128 struct btrfs_delayed_extent_op *extent_op);
4129int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
4130 struct btrfs_fs_info *fs_info,
4131 struct btrfs_delayed_ref_node *node,
4132 struct btrfs_delayed_extent_op *extent_op);
4133int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
4134 struct btrfs_fs_info *fs_info);
4135int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
4136 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
4137 struct btrfs_qgroup_inherit *inherit);
4138int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
4139void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
4140
4141void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
4142
4143static inline int is_fstree(u64 rootid) 4098static inline int is_fstree(u64 rootid)
4144{ 4099{
4145 if (rootid == BTRFS_FS_TREE_OBJECTID || 4100 if (rootid == BTRFS_FS_TREE_OBJECTID ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 31299646024d..6d16bea94e1c 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
106 return -1; 106 return -1;
107 if (ref1->type > ref2->type) 107 if (ref1->type > ref2->type)
108 return 1; 108 return 1;
109 if (ref1->no_quota > ref2->no_quota)
110 return 1;
111 if (ref1->no_quota < ref2->no_quota)
112 return -1;
109 /* merging of sequenced refs is not allowed */ 113 /* merging of sequenced refs is not allowed */
110 if (compare_seq) { 114 if (compare_seq) {
111 if (ref1->seq < ref2->seq) 115 if (ref1->seq < ref2->seq)
@@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
635 struct btrfs_delayed_ref_head *head_ref, 639 struct btrfs_delayed_ref_head *head_ref,
636 struct btrfs_delayed_ref_node *ref, u64 bytenr, 640 struct btrfs_delayed_ref_node *ref, u64 bytenr,
637 u64 num_bytes, u64 parent, u64 ref_root, int level, 641 u64 num_bytes, u64 parent, u64 ref_root, int level,
638 int action, int for_cow) 642 int action, int no_quota)
639{ 643{
640 struct btrfs_delayed_ref_node *existing; 644 struct btrfs_delayed_ref_node *existing;
641 struct btrfs_delayed_tree_ref *full_ref; 645 struct btrfs_delayed_tree_ref *full_ref;
@@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
645 if (action == BTRFS_ADD_DELAYED_EXTENT) 649 if (action == BTRFS_ADD_DELAYED_EXTENT)
646 action = BTRFS_ADD_DELAYED_REF; 650 action = BTRFS_ADD_DELAYED_REF;
647 651
652 if (is_fstree(ref_root))
653 seq = atomic64_read(&fs_info->tree_mod_seq);
648 delayed_refs = &trans->transaction->delayed_refs; 654 delayed_refs = &trans->transaction->delayed_refs;
649 655
650 /* first set the basic ref node struct up */ 656 /* first set the basic ref node struct up */
@@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
655 ref->action = action; 661 ref->action = action;
656 ref->is_head = 0; 662 ref->is_head = 0;
657 ref->in_tree = 1; 663 ref->in_tree = 1;
658 664 ref->no_quota = no_quota;
659 if (need_ref_seq(for_cow, ref_root))
660 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
661 ref->seq = seq; 665 ref->seq = seq;
662 666
663 full_ref = btrfs_delayed_node_to_tree_ref(ref); 667 full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
697 struct btrfs_delayed_ref_head *head_ref, 701 struct btrfs_delayed_ref_head *head_ref,
698 struct btrfs_delayed_ref_node *ref, u64 bytenr, 702 struct btrfs_delayed_ref_node *ref, u64 bytenr,
699 u64 num_bytes, u64 parent, u64 ref_root, u64 owner, 703 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
700 u64 offset, int action, int for_cow) 704 u64 offset, int action, int no_quota)
701{ 705{
702 struct btrfs_delayed_ref_node *existing; 706 struct btrfs_delayed_ref_node *existing;
703 struct btrfs_delayed_data_ref *full_ref; 707 struct btrfs_delayed_data_ref *full_ref;
@@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
709 713
710 delayed_refs = &trans->transaction->delayed_refs; 714 delayed_refs = &trans->transaction->delayed_refs;
711 715
716 if (is_fstree(ref_root))
717 seq = atomic64_read(&fs_info->tree_mod_seq);
718
712 /* first set the basic ref node struct up */ 719 /* first set the basic ref node struct up */
713 atomic_set(&ref->refs, 1); 720 atomic_set(&ref->refs, 1);
714 ref->bytenr = bytenr; 721 ref->bytenr = bytenr;
@@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
717 ref->action = action; 724 ref->action = action;
718 ref->is_head = 0; 725 ref->is_head = 0;
719 ref->in_tree = 1; 726 ref->in_tree = 1;
720 727 ref->no_quota = no_quota;
721 if (need_ref_seq(for_cow, ref_root))
722 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
723 ref->seq = seq; 728 ref->seq = seq;
724 729
725 full_ref = btrfs_delayed_node_to_data_ref(ref); 730 full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
762 u64 bytenr, u64 num_bytes, u64 parent, 767 u64 bytenr, u64 num_bytes, u64 parent,
763 u64 ref_root, int level, int action, 768 u64 ref_root, int level, int action,
764 struct btrfs_delayed_extent_op *extent_op, 769 struct btrfs_delayed_extent_op *extent_op,
765 int for_cow) 770 int no_quota)
766{ 771{
767 struct btrfs_delayed_tree_ref *ref; 772 struct btrfs_delayed_tree_ref *ref;
768 struct btrfs_delayed_ref_head *head_ref; 773 struct btrfs_delayed_ref_head *head_ref;
769 struct btrfs_delayed_ref_root *delayed_refs; 774 struct btrfs_delayed_ref_root *delayed_refs;
770 775
776 if (!is_fstree(ref_root) || !fs_info->quota_enabled)
777 no_quota = 0;
778
771 BUG_ON(extent_op && extent_op->is_data); 779 BUG_ON(extent_op && extent_op->is_data);
772 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); 780 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
773 if (!ref) 781 if (!ref)
@@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
793 801
794 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, 802 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
795 num_bytes, parent, ref_root, level, action, 803 num_bytes, parent, ref_root, level, action,
796 for_cow); 804 no_quota);
797 spin_unlock(&delayed_refs->lock); 805 spin_unlock(&delayed_refs->lock);
798 if (need_ref_seq(for_cow, ref_root))
799 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
800 806
801 return 0; 807 return 0;
802} 808}
@@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
810 u64 parent, u64 ref_root, 816 u64 parent, u64 ref_root,
811 u64 owner, u64 offset, int action, 817 u64 owner, u64 offset, int action,
812 struct btrfs_delayed_extent_op *extent_op, 818 struct btrfs_delayed_extent_op *extent_op,
813 int for_cow) 819 int no_quota)
814{ 820{
815 struct btrfs_delayed_data_ref *ref; 821 struct btrfs_delayed_data_ref *ref;
816 struct btrfs_delayed_ref_head *head_ref; 822 struct btrfs_delayed_ref_head *head_ref;
817 struct btrfs_delayed_ref_root *delayed_refs; 823 struct btrfs_delayed_ref_root *delayed_refs;
818 824
825 if (!is_fstree(ref_root) || !fs_info->quota_enabled)
826 no_quota = 0;
827
819 BUG_ON(extent_op && !extent_op->is_data); 828 BUG_ON(extent_op && !extent_op->is_data);
820 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); 829 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
821 if (!ref) 830 if (!ref)
@@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
841 850
842 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, 851 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
843 num_bytes, parent, ref_root, owner, offset, 852 num_bytes, parent, ref_root, owner, offset,
844 action, for_cow); 853 action, no_quota);
845 spin_unlock(&delayed_refs->lock); 854 spin_unlock(&delayed_refs->lock);
846 if (need_ref_seq(for_cow, ref_root))
847 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
848 855
849 return 0; 856 return 0;
850} 857}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 4ba9b93022ff..a764e2340d48 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {
52 52
53 unsigned int action:8; 53 unsigned int action:8;
54 unsigned int type:8; 54 unsigned int type:8;
55 unsigned int no_quota:1;
55 /* is this node still in the rbtree? */ 56 /* is this node still in the rbtree? */
56 unsigned int is_head:1; 57 unsigned int is_head:1;
57 unsigned int in_tree:1; 58 unsigned int in_tree:1;
@@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
196 u64 bytenr, u64 num_bytes, u64 parent, 197 u64 bytenr, u64 num_bytes, u64 parent,
197 u64 ref_root, int level, int action, 198 u64 ref_root, int level, int action,
198 struct btrfs_delayed_extent_op *extent_op, 199 struct btrfs_delayed_extent_op *extent_op,
199 int for_cow); 200 int no_quota);
200int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, 201int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
201 struct btrfs_trans_handle *trans, 202 struct btrfs_trans_handle *trans,
202 u64 bytenr, u64 num_bytes, 203 u64 bytenr, u64 num_bytes,
203 u64 parent, u64 ref_root, 204 u64 parent, u64 ref_root,
204 u64 owner, u64 offset, int action, 205 u64 owner, u64 offset, int action,
205 struct btrfs_delayed_extent_op *extent_op, 206 struct btrfs_delayed_extent_op *extent_op,
206 int for_cow); 207 int no_quota);
207int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 208int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
208 struct btrfs_trans_handle *trans, 209 struct btrfs_trans_handle *trans,
209 u64 bytenr, u64 num_bytes, 210 u64 bytenr, u64 num_bytes,
@@ -231,25 +232,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
231 u64 seq); 232 u64 seq);
232 233
233/* 234/*
234 * delayed refs with a ref_seq > 0 must be held back during backref walking.
235 * this only applies to items in one of the fs-trees. for_cow items never need
236 * to be held back, so they won't get a ref_seq number.
237 */
238static inline int need_ref_seq(int for_cow, u64 rootid)
239{
240 if (for_cow)
241 return 0;
242
243 if (rootid == BTRFS_FS_TREE_OBJECTID)
244 return 1;
245
246 if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
247 return 1;
248
249 return 0;
250}
251
252/*
253 * a node might live in a head or a regular ref, this lets you 235 * a node might live in a head or a regular ref, this lets you
254 * test for the proper type to use. 236 * test for the proper type to use.
255 */ 237 */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e54f0cd5cdf6..77f92a32e230 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -49,6 +49,7 @@
49#include "dev-replace.h" 49#include "dev-replace.h"
50#include "raid56.h" 50#include "raid56.h"
51#include "sysfs.h" 51#include "sysfs.h"
52#include "qgroup.h"
52 53
53#ifdef CONFIG_X86 54#ifdef CONFIG_X86
54#include <asm/cpufeature.h> 55#include <asm/cpufeature.h>
@@ -2219,6 +2220,7 @@ int open_ctree(struct super_block *sb,
2219 spin_lock_init(&fs_info->free_chunk_lock); 2220 spin_lock_init(&fs_info->free_chunk_lock);
2220 spin_lock_init(&fs_info->tree_mod_seq_lock); 2221 spin_lock_init(&fs_info->tree_mod_seq_lock);
2221 spin_lock_init(&fs_info->super_lock); 2222 spin_lock_init(&fs_info->super_lock);
2223 spin_lock_init(&fs_info->qgroup_op_lock);
2222 spin_lock_init(&fs_info->buffer_lock); 2224 spin_lock_init(&fs_info->buffer_lock);
2223 rwlock_init(&fs_info->tree_mod_log_lock); 2225 rwlock_init(&fs_info->tree_mod_log_lock);
2224 mutex_init(&fs_info->reloc_mutex); 2226 mutex_init(&fs_info->reloc_mutex);
@@ -2244,6 +2246,7 @@ int open_ctree(struct super_block *sb,
2244 atomic_set(&fs_info->async_submit_draining, 0); 2246 atomic_set(&fs_info->async_submit_draining, 0);
2245 atomic_set(&fs_info->nr_async_bios, 0); 2247 atomic_set(&fs_info->nr_async_bios, 0);
2246 atomic_set(&fs_info->defrag_running, 0); 2248 atomic_set(&fs_info->defrag_running, 0);
2249 atomic_set(&fs_info->qgroup_op_seq, 0);
2247 atomic64_set(&fs_info->tree_mod_seq, 0); 2250 atomic64_set(&fs_info->tree_mod_seq, 0);
2248 fs_info->sb = sb; 2251 fs_info->sb = sb;
2249 fs_info->max_inline = 8192 * 1024; 2252 fs_info->max_inline = 8192 * 1024;
@@ -2353,6 +2356,7 @@ int open_ctree(struct super_block *sb,
2353 spin_lock_init(&fs_info->qgroup_lock); 2356 spin_lock_init(&fs_info->qgroup_lock);
2354 mutex_init(&fs_info->qgroup_ioctl_lock); 2357 mutex_init(&fs_info->qgroup_ioctl_lock);
2355 fs_info->qgroup_tree = RB_ROOT; 2358 fs_info->qgroup_tree = RB_ROOT;
2359 fs_info->qgroup_op_tree = RB_ROOT;
2356 INIT_LIST_HEAD(&fs_info->dirty_qgroups); 2360 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2357 fs_info->qgroup_seq = 1; 2361 fs_info->qgroup_seq = 1;
2358 fs_info->quota_enabled = 0; 2362 fs_info->quota_enabled = 0;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aff579df5f47..343eb10230a1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,6 +35,7 @@
35#include "free-space-cache.h" 35#include "free-space-cache.h"
36#include "math.h" 36#include "math.h"
37#include "sysfs.h" 37#include "sysfs.h"
38#include "qgroup.h"
38 39
39#undef SCRAMBLE_DELAYED_REFS 40#undef SCRAMBLE_DELAYED_REFS
40 41
@@ -80,7 +81,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
80 u64 bytenr, u64 num_bytes, u64 parent, 81 u64 bytenr, u64 num_bytes, u64 parent,
81 u64 root_objectid, u64 owner_objectid, 82 u64 root_objectid, u64 owner_objectid,
82 u64 owner_offset, int refs_to_drop, 83 u64 owner_offset, int refs_to_drop,
83 struct btrfs_delayed_extent_op *extra_op); 84 struct btrfs_delayed_extent_op *extra_op,
85 int no_quota);
84static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 86static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
85 struct extent_buffer *leaf, 87 struct extent_buffer *leaf,
86 struct btrfs_extent_item *ei); 88 struct btrfs_extent_item *ei);
@@ -93,7 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
93 struct btrfs_root *root, 95 struct btrfs_root *root,
94 u64 parent, u64 root_objectid, 96 u64 parent, u64 root_objectid,
95 u64 flags, struct btrfs_disk_key *key, 97 u64 flags, struct btrfs_disk_key *key,
96 int level, struct btrfs_key *ins); 98 int level, struct btrfs_key *ins,
99 int no_quota);
97static int do_chunk_alloc(struct btrfs_trans_handle *trans, 100static int do_chunk_alloc(struct btrfs_trans_handle *trans,
98 struct btrfs_root *extent_root, u64 flags, 101 struct btrfs_root *extent_root, u64 flags,
99 int force); 102 int force);
@@ -1270,7 +1273,7 @@ fail:
1270static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1273static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1271 struct btrfs_root *root, 1274 struct btrfs_root *root,
1272 struct btrfs_path *path, 1275 struct btrfs_path *path,
1273 int refs_to_drop) 1276 int refs_to_drop, int *last_ref)
1274{ 1277{
1275 struct btrfs_key key; 1278 struct btrfs_key key;
1276 struct btrfs_extent_data_ref *ref1 = NULL; 1279 struct btrfs_extent_data_ref *ref1 = NULL;
@@ -1306,6 +1309,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1306 1309
1307 if (num_refs == 0) { 1310 if (num_refs == 0) {
1308 ret = btrfs_del_item(trans, root, path); 1311 ret = btrfs_del_item(trans, root, path);
1312 *last_ref = 1;
1309 } else { 1313 } else {
1310 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1314 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1311 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1315 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@@ -1763,7 +1767,8 @@ void update_inline_extent_backref(struct btrfs_root *root,
1763 struct btrfs_path *path, 1767 struct btrfs_path *path,
1764 struct btrfs_extent_inline_ref *iref, 1768 struct btrfs_extent_inline_ref *iref,
1765 int refs_to_mod, 1769 int refs_to_mod,
1766 struct btrfs_delayed_extent_op *extent_op) 1770 struct btrfs_delayed_extent_op *extent_op,
1771 int *last_ref)
1767{ 1772{
1768 struct extent_buffer *leaf; 1773 struct extent_buffer *leaf;
1769 struct btrfs_extent_item *ei; 1774 struct btrfs_extent_item *ei;
@@ -1807,6 +1812,7 @@ void update_inline_extent_backref(struct btrfs_root *root,
1807 else 1812 else
1808 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1813 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1809 } else { 1814 } else {
1815 *last_ref = 1;
1810 size = btrfs_extent_inline_ref_size(type); 1816 size = btrfs_extent_inline_ref_size(type);
1811 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1817 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1812 ptr = (unsigned long)iref; 1818 ptr = (unsigned long)iref;
@@ -1838,7 +1844,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1838 if (ret == 0) { 1844 if (ret == 0) {
1839 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1845 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1840 update_inline_extent_backref(root, path, iref, 1846 update_inline_extent_backref(root, path, iref,
1841 refs_to_add, extent_op); 1847 refs_to_add, extent_op, NULL);
1842 } else if (ret == -ENOENT) { 1848 } else if (ret == -ENOENT) {
1843 setup_inline_extent_backref(root, path, iref, parent, 1849 setup_inline_extent_backref(root, path, iref, parent,
1844 root_objectid, owner, offset, 1850 root_objectid, owner, offset,
@@ -1871,17 +1877,19 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1871 struct btrfs_root *root, 1877 struct btrfs_root *root,
1872 struct btrfs_path *path, 1878 struct btrfs_path *path,
1873 struct btrfs_extent_inline_ref *iref, 1879 struct btrfs_extent_inline_ref *iref,
1874 int refs_to_drop, int is_data) 1880 int refs_to_drop, int is_data, int *last_ref)
1875{ 1881{
1876 int ret = 0; 1882 int ret = 0;
1877 1883
1878 BUG_ON(!is_data && refs_to_drop != 1); 1884 BUG_ON(!is_data && refs_to_drop != 1);
1879 if (iref) { 1885 if (iref) {
1880 update_inline_extent_backref(root, path, iref, 1886 update_inline_extent_backref(root, path, iref,
1881 -refs_to_drop, NULL); 1887 -refs_to_drop, NULL, last_ref);
1882 } else if (is_data) { 1888 } else if (is_data) {
1883 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1889 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1890 last_ref);
1884 } else { 1891 } else {
1892 *last_ref = 1;
1885 ret = btrfs_del_item(trans, root, path); 1893 ret = btrfs_del_item(trans, root, path);
1886 } 1894 }
1887 return ret; 1895 return ret;
@@ -1945,7 +1953,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1945int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1953int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1946 struct btrfs_root *root, 1954 struct btrfs_root *root,
1947 u64 bytenr, u64 num_bytes, u64 parent, 1955 u64 bytenr, u64 num_bytes, u64 parent,
1948 u64 root_objectid, u64 owner, u64 offset, int for_cow) 1956 u64 root_objectid, u64 owner, u64 offset,
1957 int no_quota)
1949{ 1958{
1950 int ret; 1959 int ret;
1951 struct btrfs_fs_info *fs_info = root->fs_info; 1960 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1957,12 +1966,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1957 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 1966 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
1958 num_bytes, 1967 num_bytes,
1959 parent, root_objectid, (int)owner, 1968 parent, root_objectid, (int)owner,
1960 BTRFS_ADD_DELAYED_REF, NULL, for_cow); 1969 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1961 } else { 1970 } else {
1962 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 1971 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
1963 num_bytes, 1972 num_bytes,
1964 parent, root_objectid, owner, offset, 1973 parent, root_objectid, owner, offset,
1965 BTRFS_ADD_DELAYED_REF, NULL, for_cow); 1974 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1966 } 1975 }
1967 return ret; 1976 return ret;
1968} 1977}
@@ -1972,31 +1981,64 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1972 u64 bytenr, u64 num_bytes, 1981 u64 bytenr, u64 num_bytes,
1973 u64 parent, u64 root_objectid, 1982 u64 parent, u64 root_objectid,
1974 u64 owner, u64 offset, int refs_to_add, 1983 u64 owner, u64 offset, int refs_to_add,
1984 int no_quota,
1975 struct btrfs_delayed_extent_op *extent_op) 1985 struct btrfs_delayed_extent_op *extent_op)
1976{ 1986{
1987 struct btrfs_fs_info *fs_info = root->fs_info;
1977 struct btrfs_path *path; 1988 struct btrfs_path *path;
1978 struct extent_buffer *leaf; 1989 struct extent_buffer *leaf;
1979 struct btrfs_extent_item *item; 1990 struct btrfs_extent_item *item;
1991 struct btrfs_key key;
1980 u64 refs; 1992 u64 refs;
1981 int ret; 1993 int ret;
1994 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
1982 1995
1983 path = btrfs_alloc_path(); 1996 path = btrfs_alloc_path();
1984 if (!path) 1997 if (!path)
1985 return -ENOMEM; 1998 return -ENOMEM;
1986 1999
2000 if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
2001 no_quota = 1;
2002
1987 path->reada = 1; 2003 path->reada = 1;
1988 path->leave_spinning = 1; 2004 path->leave_spinning = 1;
1989 /* this will setup the path even if it fails to insert the back ref */ 2005 /* this will setup the path even if it fails to insert the back ref */
1990 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 2006 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
1991 path, bytenr, num_bytes, parent, 2007 bytenr, num_bytes, parent,
1992 root_objectid, owner, offset, 2008 root_objectid, owner, offset,
1993 refs_to_add, extent_op); 2009 refs_to_add, extent_op);
1994 if (ret != -EAGAIN) 2010 if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
1995 goto out; 2011 goto out;
2012 /*
2013 * Ok we were able to insert an inline extent and it appears to be a new
2014 * reference, deal with the qgroup accounting.
2015 */
2016 if (!ret && !no_quota) {
2017 ASSERT(root->fs_info->quota_enabled);
2018 leaf = path->nodes[0];
2019 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2020 item = btrfs_item_ptr(leaf, path->slots[0],
2021 struct btrfs_extent_item);
2022 if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
2023 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2024 btrfs_release_path(path);
1996 2025
2026 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2027 bytenr, num_bytes, type, 0);
2028 goto out;
2029 }
2030
2031 /*
2032 * Ok we had -EAGAIN which means we didn't have space to insert and
2033 * inline extent ref, so just update the reference count and add a
2034 * normal backref.
2035 */
1997 leaf = path->nodes[0]; 2036 leaf = path->nodes[0];
2037 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1998 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2038 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1999 refs = btrfs_extent_refs(leaf, item); 2039 refs = btrfs_extent_refs(leaf, item);
2040 if (refs)
2041 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2000 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 2042 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2001 if (extent_op) 2043 if (extent_op)
2002 __run_delayed_extent_op(extent_op, leaf, item); 2044 __run_delayed_extent_op(extent_op, leaf, item);
@@ -2004,9 +2046,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2004 btrfs_mark_buffer_dirty(leaf); 2046 btrfs_mark_buffer_dirty(leaf);
2005 btrfs_release_path(path); 2047 btrfs_release_path(path);
2006 2048
2049 if (!no_quota) {
2050 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2051 bytenr, num_bytes, type, 0);
2052 if (ret)
2053 goto out;
2054 }
2055
2007 path->reada = 1; 2056 path->reada = 1;
2008 path->leave_spinning = 1; 2057 path->leave_spinning = 1;
2009
2010 /* now insert the actual backref */ 2058 /* now insert the actual backref */
2011 ret = insert_extent_backref(trans, root->fs_info->extent_root, 2059 ret = insert_extent_backref(trans, root->fs_info->extent_root,
2012 path, bytenr, parent, root_objectid, 2060 path, bytenr, parent, root_objectid,
@@ -2040,8 +2088,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2040 2088
2041 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 2089 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2042 parent = ref->parent; 2090 parent = ref->parent;
2043 else 2091 ref_root = ref->root;
2044 ref_root = ref->root;
2045 2092
2046 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2093 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2047 if (extent_op) 2094 if (extent_op)
@@ -2055,13 +2102,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2055 node->num_bytes, parent, 2102 node->num_bytes, parent,
2056 ref_root, ref->objectid, 2103 ref_root, ref->objectid,
2057 ref->offset, node->ref_mod, 2104 ref->offset, node->ref_mod,
2058 extent_op); 2105 node->no_quota, extent_op);
2059 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2106 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2060 ret = __btrfs_free_extent(trans, root, node->bytenr, 2107 ret = __btrfs_free_extent(trans, root, node->bytenr,
2061 node->num_bytes, parent, 2108 node->num_bytes, parent,
2062 ref_root, ref->objectid, 2109 ref_root, ref->objectid,
2063 ref->offset, node->ref_mod, 2110 ref->offset, node->ref_mod,
2064 extent_op); 2111 extent_op, node->no_quota);
2065 } else { 2112 } else {
2066 BUG(); 2113 BUG();
2067 } 2114 }
@@ -2198,8 +2245,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2198 2245
2199 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2246 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2200 parent = ref->parent; 2247 parent = ref->parent;
2201 else 2248 ref_root = ref->root;
2202 ref_root = ref->root;
2203 2249
2204 ins.objectid = node->bytenr; 2250 ins.objectid = node->bytenr;
2205 if (skinny_metadata) { 2251 if (skinny_metadata) {
@@ -2217,15 +2263,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2217 parent, ref_root, 2263 parent, ref_root,
2218 extent_op->flags_to_set, 2264 extent_op->flags_to_set,
2219 &extent_op->key, 2265 &extent_op->key,
2220 ref->level, &ins); 2266 ref->level, &ins,
2267 node->no_quota);
2221 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2268 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2222 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 2269 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2223 node->num_bytes, parent, ref_root, 2270 node->num_bytes, parent, ref_root,
2224 ref->level, 0, 1, extent_op); 2271 ref->level, 0, 1, node->no_quota,
2272 extent_op);
2225 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2273 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2226 ret = __btrfs_free_extent(trans, root, node->bytenr, 2274 ret = __btrfs_free_extent(trans, root, node->bytenr,
2227 node->num_bytes, parent, ref_root, 2275 node->num_bytes, parent, ref_root,
2228 ref->level, 0, 1, extent_op); 2276 ref->level, 0, 1, extent_op,
2277 node->no_quota);
2229 } else { 2278 } else {
2230 BUG(); 2279 BUG();
2231 } 2280 }
@@ -2573,42 +2622,6 @@ static u64 find_middle(struct rb_root *root)
2573} 2622}
2574#endif 2623#endif
2575 2624
2576int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2577 struct btrfs_fs_info *fs_info)
2578{
2579 struct qgroup_update *qgroup_update;
2580 int ret = 0;
2581
2582 if (list_empty(&trans->qgroup_ref_list) !=
2583 !trans->delayed_ref_elem.seq) {
2584 /* list without seq or seq without list */
2585 btrfs_err(fs_info,
2586 "qgroup accounting update error, list is%s empty, seq is %#x.%x",
2587 list_empty(&trans->qgroup_ref_list) ? "" : " not",
2588 (u32)(trans->delayed_ref_elem.seq >> 32),
2589 (u32)trans->delayed_ref_elem.seq);
2590 BUG();
2591 }
2592
2593 if (!trans->delayed_ref_elem.seq)
2594 return 0;
2595
2596 while (!list_empty(&trans->qgroup_ref_list)) {
2597 qgroup_update = list_first_entry(&trans->qgroup_ref_list,
2598 struct qgroup_update, list);
2599 list_del(&qgroup_update->list);
2600 if (!ret)
2601 ret = btrfs_qgroup_account_ref(
2602 trans, fs_info, qgroup_update->node,
2603 qgroup_update->extent_op);
2604 kfree(qgroup_update);
2605 }
2606
2607 btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
2608
2609 return ret;
2610}
2611
2612static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) 2625static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2613{ 2626{
2614 u64 num_bytes; 2627 u64 num_bytes;
@@ -2697,8 +2710,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2697 if (root == root->fs_info->extent_root) 2710 if (root == root->fs_info->extent_root)
2698 root = root->fs_info->tree_root; 2711 root = root->fs_info->tree_root;
2699 2712
2700 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2701
2702 delayed_refs = &trans->transaction->delayed_refs; 2713 delayed_refs = &trans->transaction->delayed_refs;
2703 if (count == 0) { 2714 if (count == 0) {
2704 count = atomic_read(&delayed_refs->num_entries) * 2; 2715 count = atomic_read(&delayed_refs->num_entries) * 2;
@@ -2757,6 +2768,9 @@ again:
2757 goto again; 2768 goto again;
2758 } 2769 }
2759out: 2770out:
2771 ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
2772 if (ret)
2773 return ret;
2760 assert_qgroups_uptodate(trans); 2774 assert_qgroups_uptodate(trans);
2761 return 0; 2775 return 0;
2762} 2776}
@@ -2963,7 +2977,7 @@ out:
2963static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2977static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2964 struct btrfs_root *root, 2978 struct btrfs_root *root,
2965 struct extent_buffer *buf, 2979 struct extent_buffer *buf,
2966 int full_backref, int inc, int for_cow) 2980 int full_backref, int inc, int no_quota)
2967{ 2981{
2968 u64 bytenr; 2982 u64 bytenr;
2969 u64 num_bytes; 2983 u64 num_bytes;
@@ -3013,7 +3027,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3013 key.offset -= btrfs_file_extent_offset(buf, fi); 3027 key.offset -= btrfs_file_extent_offset(buf, fi);
3014 ret = process_func(trans, root, bytenr, num_bytes, 3028 ret = process_func(trans, root, bytenr, num_bytes,
3015 parent, ref_root, key.objectid, 3029 parent, ref_root, key.objectid,
3016 key.offset, for_cow); 3030 key.offset, no_quota);
3017 if (ret) 3031 if (ret)
3018 goto fail; 3032 goto fail;
3019 } else { 3033 } else {
@@ -3021,7 +3035,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3021 num_bytes = btrfs_level_size(root, level - 1); 3035 num_bytes = btrfs_level_size(root, level - 1);
3022 ret = process_func(trans, root, bytenr, num_bytes, 3036 ret = process_func(trans, root, bytenr, num_bytes,
3023 parent, ref_root, level - 1, 0, 3037 parent, ref_root, level - 1, 0,
3024 for_cow); 3038 no_quota);
3025 if (ret) 3039 if (ret)
3026 goto fail; 3040 goto fail;
3027 } 3041 }
@@ -3032,15 +3046,15 @@ fail:
3032} 3046}
3033 3047
3034int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3048int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3035 struct extent_buffer *buf, int full_backref, int for_cow) 3049 struct extent_buffer *buf, int full_backref, int no_quota)
3036{ 3050{
3037 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); 3051 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
3038} 3052}
3039 3053
3040int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3054int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3041 struct extent_buffer *buf, int full_backref, int for_cow) 3055 struct extent_buffer *buf, int full_backref, int no_quota)
3042{ 3056{
3043 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); 3057 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
3044} 3058}
3045 3059
3046static int write_one_cache_group(struct btrfs_trans_handle *trans, 3060static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -5723,7 +5737,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5723 u64 bytenr, u64 num_bytes, u64 parent, 5737 u64 bytenr, u64 num_bytes, u64 parent,
5724 u64 root_objectid, u64 owner_objectid, 5738 u64 root_objectid, u64 owner_objectid,
5725 u64 owner_offset, int refs_to_drop, 5739 u64 owner_offset, int refs_to_drop,
5726 struct btrfs_delayed_extent_op *extent_op) 5740 struct btrfs_delayed_extent_op *extent_op,
5741 int no_quota)
5727{ 5742{
5728 struct btrfs_key key; 5743 struct btrfs_key key;
5729 struct btrfs_path *path; 5744 struct btrfs_path *path;
@@ -5739,9 +5754,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5739 int num_to_del = 1; 5754 int num_to_del = 1;
5740 u32 item_size; 5755 u32 item_size;
5741 u64 refs; 5756 u64 refs;
5757 int last_ref = 0;
5758 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
5742 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 5759 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
5743 SKINNY_METADATA); 5760 SKINNY_METADATA);
5744 5761
5762 if (!info->quota_enabled || !is_fstree(root_objectid))
5763 no_quota = 1;
5764
5745 path = btrfs_alloc_path(); 5765 path = btrfs_alloc_path();
5746 if (!path) 5766 if (!path)
5747 return -ENOMEM; 5767 return -ENOMEM;
@@ -5789,7 +5809,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5789 BUG_ON(iref); 5809 BUG_ON(iref);
5790 ret = remove_extent_backref(trans, extent_root, path, 5810 ret = remove_extent_backref(trans, extent_root, path,
5791 NULL, refs_to_drop, 5811 NULL, refs_to_drop,
5792 is_data); 5812 is_data, &last_ref);
5793 if (ret) { 5813 if (ret) {
5794 btrfs_abort_transaction(trans, extent_root, ret); 5814 btrfs_abort_transaction(trans, extent_root, ret);
5795 goto out; 5815 goto out;
@@ -5916,6 +5936,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5916 refs -= refs_to_drop; 5936 refs -= refs_to_drop;
5917 5937
5918 if (refs > 0) { 5938 if (refs > 0) {
5939 type = BTRFS_QGROUP_OPER_SUB_SHARED;
5919 if (extent_op) 5940 if (extent_op)
5920 __run_delayed_extent_op(extent_op, leaf, ei); 5941 __run_delayed_extent_op(extent_op, leaf, ei);
5921 /* 5942 /*
@@ -5931,7 +5952,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5931 if (found_extent) { 5952 if (found_extent) {
5932 ret = remove_extent_backref(trans, extent_root, path, 5953 ret = remove_extent_backref(trans, extent_root, path,
5933 iref, refs_to_drop, 5954 iref, refs_to_drop,
5934 is_data); 5955 is_data, &last_ref);
5935 if (ret) { 5956 if (ret) {
5936 btrfs_abort_transaction(trans, extent_root, ret); 5957 btrfs_abort_transaction(trans, extent_root, ret);
5937 goto out; 5958 goto out;
@@ -5952,6 +5973,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5952 } 5973 }
5953 } 5974 }
5954 5975
5976 last_ref = 1;
5955 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 5977 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
5956 num_to_del); 5978 num_to_del);
5957 if (ret) { 5979 if (ret) {
@@ -5974,6 +5996,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5974 goto out; 5996 goto out;
5975 } 5997 }
5976 } 5998 }
5999 btrfs_release_path(path);
6000
6001 /* Deal with the quota accounting */
6002 if (!ret && last_ref && !no_quota) {
6003 int mod_seq = 0;
6004
6005 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
6006 type == BTRFS_QGROUP_OPER_SUB_SHARED)
6007 mod_seq = 1;
6008
6009 ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
6010 bytenr, num_bytes, type,
6011 mod_seq);
6012 }
5977out: 6013out:
5978 btrfs_free_path(path); 6014 btrfs_free_path(path);
5979 return ret; 6015 return ret;
@@ -6110,7 +6146,7 @@ out:
6110/* Can return -ENOMEM */ 6146/* Can return -ENOMEM */
6111int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, 6147int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6112 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 6148 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
6113 u64 owner, u64 offset, int for_cow) 6149 u64 owner, u64 offset, int no_quota)
6114{ 6150{
6115 int ret; 6151 int ret;
6116 struct btrfs_fs_info *fs_info = root->fs_info; 6152 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6130,13 +6166,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6130 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 6166 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
6131 num_bytes, 6167 num_bytes,
6132 parent, root_objectid, (int)owner, 6168 parent, root_objectid, (int)owner,
6133 BTRFS_DROP_DELAYED_REF, NULL, for_cow); 6169 BTRFS_DROP_DELAYED_REF, NULL, no_quota);
6134 } else { 6170 } else {
6135 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 6171 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
6136 num_bytes, 6172 num_bytes,
6137 parent, root_objectid, owner, 6173 parent, root_objectid, owner,
6138 offset, BTRFS_DROP_DELAYED_REF, 6174 offset, BTRFS_DROP_DELAYED_REF,
6139 NULL, for_cow); 6175 NULL, no_quota);
6140 } 6176 }
6141 return ret; 6177 return ret;
6142} 6178}
@@ -6842,6 +6878,13 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6842 btrfs_mark_buffer_dirty(path->nodes[0]); 6878 btrfs_mark_buffer_dirty(path->nodes[0]);
6843 btrfs_free_path(path); 6879 btrfs_free_path(path);
6844 6880
6881 /* Always set parent to 0 here since its exclusive anyway. */
6882 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
6883 ins->objectid, ins->offset,
6884 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
6885 if (ret)
6886 return ret;
6887
6845 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6888 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6846 if (ret) { /* -ENOENT, logic error */ 6889 if (ret) { /* -ENOENT, logic error */
6847 btrfs_err(fs_info, "update block group failed for %llu %llu", 6890 btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6856,7 +6899,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6856 struct btrfs_root *root, 6899 struct btrfs_root *root,
6857 u64 parent, u64 root_objectid, 6900 u64 parent, u64 root_objectid,
6858 u64 flags, struct btrfs_disk_key *key, 6901 u64 flags, struct btrfs_disk_key *key,
6859 int level, struct btrfs_key *ins) 6902 int level, struct btrfs_key *ins,
6903 int no_quota)
6860{ 6904{
6861 int ret; 6905 int ret;
6862 struct btrfs_fs_info *fs_info = root->fs_info; 6906 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6866,6 +6910,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6866 struct btrfs_path *path; 6910 struct btrfs_path *path;
6867 struct extent_buffer *leaf; 6911 struct extent_buffer *leaf;
6868 u32 size = sizeof(*extent_item) + sizeof(*iref); 6912 u32 size = sizeof(*extent_item) + sizeof(*iref);
6913 u64 num_bytes = ins->offset;
6869 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 6914 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6870 SKINNY_METADATA); 6915 SKINNY_METADATA);
6871 6916
@@ -6899,6 +6944,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6899 6944
6900 if (skinny_metadata) { 6945 if (skinny_metadata) {
6901 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 6946 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
6947 num_bytes = root->leafsize;
6902 } else { 6948 } else {
6903 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 6949 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
6904 btrfs_set_tree_block_key(leaf, block_info, key); 6950 btrfs_set_tree_block_key(leaf, block_info, key);
@@ -6920,6 +6966,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6920 btrfs_mark_buffer_dirty(leaf); 6966 btrfs_mark_buffer_dirty(leaf);
6921 btrfs_free_path(path); 6967 btrfs_free_path(path);
6922 6968
6969 if (!no_quota) {
6970 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
6971 ins->objectid, num_bytes,
6972 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
6973 if (ret)
6974 return ret;
6975 }
6976
6923 ret = update_block_group(root, ins->objectid, root->leafsize, 1); 6977 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
6924 if (ret) { /* -ENOENT, logic error */ 6978 if (ret) { /* -ENOENT, logic error */
6925 btrfs_err(fs_info, "update block group failed for %llu %llu", 6979 btrfs_err(fs_info, "update block group failed for %llu %llu",
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5c6947dbc948..8accf94ef220 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,7 @@
40#include "tree-log.h" 40#include "tree-log.h"
41#include "locking.h" 41#include "locking.h"
42#include "volumes.h" 42#include "volumes.h"
43#include "qgroup.h"
43 44
44static struct kmem_cache *btrfs_inode_defrag_cachep; 45static struct kmem_cache *btrfs_inode_defrag_cachep;
45/* 46/*
@@ -849,7 +850,7 @@ next_slot:
849 disk_bytenr, num_bytes, 0, 850 disk_bytenr, num_bytes, 0,
850 root->root_key.objectid, 851 root->root_key.objectid,
851 new_key.objectid, 852 new_key.objectid,
852 start - extent_offset, 0); 853 start - extent_offset, 1);
853 BUG_ON(ret); /* -ENOMEM */ 854 BUG_ON(ret); /* -ENOMEM */
854 } 855 }
855 key.offset = start; 856 key.offset = start;
@@ -1206,7 +1207,7 @@ again:
1206 1207
1207 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 1208 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
1208 root->root_key.objectid, 1209 root->root_key.objectid,
1209 ino, orig_offset, 0); 1210 ino, orig_offset, 1);
1210 BUG_ON(ret); /* -ENOMEM */ 1211 BUG_ON(ret); /* -ENOMEM */
1211 1212
1212 if (split == start) { 1213 if (split == start) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 242a37cd26b2..a21a4ac537b7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -58,6 +58,7 @@
58#include "dev-replace.h" 58#include "dev-replace.h"
59#include "props.h" 59#include "props.h"
60#include "sysfs.h" 60#include "sysfs.h"
61#include "qgroup.h"
61 62
62#ifdef CONFIG_64BIT 63#ifdef CONFIG_64BIT
63/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI 64/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -2941,6 +2942,41 @@ out:
2941 return ret; 2942 return ret;
2942} 2943}
2943 2944
2945/* Helper to check and see if this root currently has a ref on the given disk
2946 * bytenr. If it does then we need to update the quota for this root. This
2947 * doesn't do anything if quotas aren't enabled.
2948 */
2949static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2950 u64 disko)
2951{
2952 struct seq_list tree_mod_seq_elem = {};
2953 struct ulist *roots;
2954 struct ulist_iterator uiter;
2955 struct ulist_node *root_node = NULL;
2956 int ret;
2957
2958 if (!root->fs_info->quota_enabled)
2959 return 1;
2960
2961 btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
2962 ret = btrfs_find_all_roots(trans, root->fs_info, disko,
2963 tree_mod_seq_elem.seq, &roots);
2964 if (ret < 0)
2965 goto out;
2966 ret = 0;
2967 ULIST_ITER_INIT(&uiter);
2968 while ((root_node = ulist_next(roots, &uiter))) {
2969 if (root_node->val == root->objectid) {
2970 ret = 1;
2971 break;
2972 }
2973 }
2974 ulist_free(roots);
2975out:
2976 btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
2977 return ret;
2978}
2979
2944/** 2980/**
2945 * btrfs_clone() - clone a range from inode file to another 2981 * btrfs_clone() - clone a range from inode file to another
2946 * 2982 *
@@ -2964,7 +3000,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2964 u32 nritems; 3000 u32 nritems;
2965 int slot; 3001 int slot;
2966 int ret; 3002 int ret;
3003 int no_quota;
2967 u64 len = olen_aligned; 3004 u64 len = olen_aligned;
3005 u64 last_disko = 0;
2968 3006
2969 ret = -ENOMEM; 3007 ret = -ENOMEM;
2970 buf = vmalloc(btrfs_level_size(root, 0)); 3008 buf = vmalloc(btrfs_level_size(root, 0));
@@ -2996,6 +3034,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2996 3034
2997 nritems = btrfs_header_nritems(path->nodes[0]); 3035 nritems = btrfs_header_nritems(path->nodes[0]);
2998process_slot: 3036process_slot:
3037 no_quota = 1;
2999 if (path->slots[0] >= nritems) { 3038 if (path->slots[0] >= nritems) {
3000 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 3039 ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
3001 if (ret < 0) 3040 if (ret < 0)
@@ -3128,6 +3167,28 @@ process_slot:
3128 datao); 3167 datao);
3129 btrfs_set_file_extent_num_bytes(leaf, extent, 3168 btrfs_set_file_extent_num_bytes(leaf, extent,
3130 datal); 3169 datal);
3170
3171 /*
3172 * We need to look up the roots that point at
3173 * this bytenr and see if the new root does. If
3174 * it does not we need to make sure we update
3175 * quotas appropriately.
3176 */
3177 if (disko && root != BTRFS_I(src)->root &&
3178 disko != last_disko) {
3179 no_quota = check_ref(trans, root,
3180 disko);
3181 if (no_quota < 0) {
3182 btrfs_abort_transaction(trans,
3183 root,
3184 ret);
3185 btrfs_end_transaction(trans,
3186 root);
3187 ret = no_quota;
3188 goto out;
3189 }
3190 }
3191
3131 if (disko) { 3192 if (disko) {
3132 inode_add_bytes(inode, datal); 3193 inode_add_bytes(inode, datal);
3133 ret = btrfs_inc_extent_ref(trans, root, 3194 ret = btrfs_inc_extent_ref(trans, root,
@@ -3135,7 +3196,7 @@ process_slot:
3135 root->root_key.objectid, 3196 root->root_key.objectid,
3136 btrfs_ino(inode), 3197 btrfs_ino(inode),
3137 new_key.offset - datao, 3198 new_key.offset - datao,
3138 0); 3199 no_quota);
3139 if (ret) { 3200 if (ret) {
3140 btrfs_abort_transaction(trans, 3201 btrfs_abort_transaction(trans,
3141 root, 3202 root,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2cf905877aaf..09b8cc83965c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -32,6 +32,7 @@
32#include "ulist.h" 32#include "ulist.h"
33#include "backref.h" 33#include "backref.h"
34#include "extent_io.h" 34#include "extent_io.h"
35#include "qgroup.h"
35 36
36/* TODO XXX FIXME 37/* TODO XXX FIXME
37 * - subvol delete -> delete when ref goes to 0? delete limits also? 38 * - subvol delete -> delete when ref goes to 0? delete limits also?
@@ -84,8 +85,8 @@ struct btrfs_qgroup {
84 /* 85 /*
85 * temp variables for accounting operations 86 * temp variables for accounting operations
86 */ 87 */
87 u64 tag; 88 u64 old_refcnt;
88 u64 refcnt; 89 u64 new_refcnt;
89}; 90};
90 91
91/* 92/*
@@ -98,6 +99,9 @@ struct btrfs_qgroup_list {
98 struct btrfs_qgroup *member; 99 struct btrfs_qgroup *member;
99}; 100};
100 101
102#define ptr_to_u64(x) ((u64)(uintptr_t)x)
103#define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
104
101static int 105static int
102qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 106qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103 int init_flags); 107 int init_flags);
@@ -1174,33 +1178,198 @@ out:
1174 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1178 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1175 return ret; 1179 return ret;
1176} 1180}
1181static int comp_oper(struct btrfs_qgroup_operation *oper1,
1182 struct btrfs_qgroup_operation *oper2)
1183{
1184 if (oper1->bytenr < oper2->bytenr)
1185 return -1;
1186 if (oper1->bytenr > oper2->bytenr)
1187 return 1;
1188 if (oper1->seq < oper2->seq)
1189 return -1;
1190 if (oper1->seq > oper2->seq)
1191 return -1;
1192 if (oper1->ref_root < oper2->ref_root)
1193 return -1;
1194 if (oper1->ref_root > oper2->ref_root)
1195 return 1;
1196 if (oper1->type < oper2->type)
1197 return -1;
1198 if (oper1->type > oper2->type)
1199 return 1;
1200 return 0;
1201}
1202
1203static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1204 struct btrfs_qgroup_operation *oper)
1205{
1206 struct rb_node **p;
1207 struct rb_node *parent = NULL;
1208 struct btrfs_qgroup_operation *cur;
1209 int cmp;
1210
1211 spin_lock(&fs_info->qgroup_op_lock);
1212 p = &fs_info->qgroup_op_tree.rb_node;
1213 while (*p) {
1214 parent = *p;
1215 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1216 cmp = comp_oper(cur, oper);
1217 if (cmp < 0) {
1218 p = &(*p)->rb_right;
1219 } else if (cmp) {
1220 p = &(*p)->rb_left;
1221 } else {
1222 spin_unlock(&fs_info->qgroup_op_lock);
1223 return -EEXIST;
1224 }
1225 }
1226 rb_link_node(&oper->n, parent, p);
1227 rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1228 spin_unlock(&fs_info->qgroup_op_lock);
1229 return 0;
1230}
1177 1231
1178/* 1232/*
1179 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts 1233 * Record a quota operation for processing later on.
1180 * the modification into a list that's later used by btrfs_end_transaction to 1234 * @trans: the transaction we are adding the delayed op to.
1181 * pass the recorded modifications on to btrfs_qgroup_account_ref. 1235 * @fs_info: the fs_info for this fs.
1236 * @ref_root: the root of the reference we are acting on,
1237 * @bytenr: the bytenr we are acting on.
1238 * @num_bytes: the number of bytes in the reference.
1239 * @type: the type of operation this is.
1240 * @mod_seq: do we need to get a sequence number for looking up roots.
1241 *
1242 * We just add it to our trans qgroup_ref_list and carry on and process these
1243 * operations in order at some later point. If the reference root isn't a fs
1244 * root then we don't bother with doing anything.
1245 *
1246 * MUST BE HOLDING THE REF LOCK.
1182 */ 1247 */
1183int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1248int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1184 struct btrfs_delayed_ref_node *node, 1249 struct btrfs_fs_info *fs_info, u64 ref_root,
1185 struct btrfs_delayed_extent_op *extent_op) 1250 u64 bytenr, u64 num_bytes,
1251 enum btrfs_qgroup_operation_type type, int mod_seq)
1186{ 1252{
1187 struct qgroup_update *u; 1253 struct btrfs_qgroup_operation *oper;
1254 int ret;
1188 1255
1189 BUG_ON(!trans->delayed_ref_elem.seq); 1256 if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1190 u = kmalloc(sizeof(*u), GFP_NOFS); 1257 return 0;
1191 if (!u) 1258
1259 oper = kmalloc(sizeof(*oper), GFP_NOFS);
1260 if (!oper)
1192 return -ENOMEM; 1261 return -ENOMEM;
1193 1262
1194 u->node = node; 1263 oper->ref_root = ref_root;
1195 u->extent_op = extent_op; 1264 oper->bytenr = bytenr;
1196 list_add_tail(&u->list, &trans->qgroup_ref_list); 1265 oper->num_bytes = num_bytes;
1266 oper->type = type;
1267 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1268 INIT_LIST_HEAD(&oper->elem.list);
1269 oper->elem.seq = 0;
1270 ret = insert_qgroup_oper(fs_info, oper);
1271 if (ret) {
1272 /* Shouldn't happen so have an assert for developers */
1273 ASSERT(0);
1274 kfree(oper);
1275 return ret;
1276 }
1277 list_add_tail(&oper->list, &trans->qgroup_ref_list);
1278
1279 if (mod_seq)
1280 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1197 1281
1198 return 0; 1282 return 0;
1199} 1283}
1200 1284
1201static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, 1285/*
1202 struct ulist *roots, struct ulist *tmp, 1286 * The easy accounting, if we are adding/removing the only ref for an extent
1203 u64 seq) 1287 * then this qgroup and all of the parent qgroups get their refrence and
1288 * exclusive counts adjusted.
1289 */
1290static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1291 struct btrfs_qgroup_operation *oper)
1292{
1293 struct btrfs_qgroup *qgroup;
1294 struct ulist *tmp;
1295 struct btrfs_qgroup_list *glist;
1296 struct ulist_node *unode;
1297 struct ulist_iterator uiter;
1298 int sign = 0;
1299 int ret = 0;
1300
1301 tmp = ulist_alloc(GFP_NOFS);
1302 if (!tmp)
1303 return -ENOMEM;
1304
1305 spin_lock(&fs_info->qgroup_lock);
1306 if (!fs_info->quota_root)
1307 goto out;
1308 qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1309 if (!qgroup)
1310 goto out;
1311 switch (oper->type) {
1312 case BTRFS_QGROUP_OPER_ADD_EXCL:
1313 sign = 1;
1314 break;
1315 case BTRFS_QGROUP_OPER_SUB_EXCL:
1316 sign = -1;
1317 break;
1318 default:
1319 ASSERT(0);
1320 }
1321 qgroup->rfer += sign * oper->num_bytes;
1322 qgroup->rfer_cmpr += sign * oper->num_bytes;
1323
1324 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1325 qgroup->excl += sign * oper->num_bytes;
1326 qgroup->excl_cmpr += sign * oper->num_bytes;
1327
1328 qgroup_dirty(fs_info, qgroup);
1329
1330 /* Get all of the parent groups that contain this qgroup */
1331 list_for_each_entry(glist, &qgroup->groups, next_group) {
1332 ret = ulist_add(tmp, glist->group->qgroupid,
1333 ptr_to_u64(glist->group), GFP_ATOMIC);
1334 if (ret < 0)
1335 goto out;
1336 }
1337
1338 /* Iterate all of the parents and adjust their reference counts */
1339 ULIST_ITER_INIT(&uiter);
1340 while ((unode = ulist_next(tmp, &uiter))) {
1341 qgroup = u64_to_ptr(unode->aux);
1342 qgroup->rfer += sign * oper->num_bytes;
1343 qgroup->rfer_cmpr += sign * oper->num_bytes;
1344 qgroup->excl += sign * oper->num_bytes;
1345 if (sign < 0)
1346 WARN_ON(qgroup->excl < oper->num_bytes);
1347 qgroup->excl_cmpr += sign * oper->num_bytes;
1348 qgroup_dirty(fs_info, qgroup);
1349
1350 /* Add any parents of the parents */
1351 list_for_each_entry(glist, &qgroup->groups, next_group) {
1352 ret = ulist_add(tmp, glist->group->qgroupid,
1353 ptr_to_u64(glist->group), GFP_ATOMIC);
1354 if (ret < 0)
1355 goto out;
1356 }
1357 }
1358 ret = 0;
1359out:
1360 spin_unlock(&fs_info->qgroup_lock);
1361 ulist_free(tmp);
1362 return ret;
1363}
1364
1365/*
1366 * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1367 * properly.
1368 */
1369static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1370 u64 root_to_skip, struct ulist *tmp,
1371 struct ulist *roots, struct ulist *qgroups,
1372 u64 seq, int *old_roots, int rescan)
1204{ 1373{
1205 struct ulist_node *unode; 1374 struct ulist_node *unode;
1206 struct ulist_iterator uiter; 1375 struct ulist_iterator uiter;
@@ -1211,256 +1380,549 @@ static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1211 1380
1212 ULIST_ITER_INIT(&uiter); 1381 ULIST_ITER_INIT(&uiter);
1213 while ((unode = ulist_next(roots, &uiter))) { 1382 while ((unode = ulist_next(roots, &uiter))) {
1383 /* We don't count our current root here */
1384 if (unode->val == root_to_skip)
1385 continue;
1214 qg = find_qgroup_rb(fs_info, unode->val); 1386 qg = find_qgroup_rb(fs_info, unode->val);
1215 if (!qg) 1387 if (!qg)
1216 continue; 1388 continue;
1389 /*
1390 * We could have a pending removal of this same ref so we may
1391 * not have actually found our ref root when doing
1392 * btrfs_find_all_roots, so we need to keep track of how many
1393 * old roots we find in case we removed ours and added a
1394 * different one at the same time. I don't think this could
1395 * happen in practice but that sort of thinking leads to pain
1396 * and suffering and to the dark side.
1397 */
1398 (*old_roots)++;
1217 1399
1218 ulist_reinit(tmp); 1400 ulist_reinit(tmp);
1219 /* XXX id not needed */ 1401 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1220 ret = ulist_add(tmp, qg->qgroupid, 1402 GFP_ATOMIC);
1221 (u64)(uintptr_t)qg, GFP_ATOMIC); 1403 if (ret < 0)
1404 return ret;
1405 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1222 if (ret < 0) 1406 if (ret < 0)
1223 return ret; 1407 return ret;
1224 ULIST_ITER_INIT(&tmp_uiter); 1408 ULIST_ITER_INIT(&tmp_uiter);
1225 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1409 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1226 struct btrfs_qgroup_list *glist; 1410 struct btrfs_qgroup_list *glist;
1227 1411
1228 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; 1412 qg = u64_to_ptr(tmp_unode->aux);
1229 if (qg->refcnt < seq) 1413 /*
1230 qg->refcnt = seq + 1; 1414 * We use this sequence number to keep from having to
1415 * run the whole list and 0 out the refcnt every time.
1416 * We basically use sequnce as the known 0 count and
1417 * then add 1 everytime we see a qgroup. This is how we
1418 * get how many of the roots actually point up to the
1419 * upper level qgroups in order to determine exclusive
1420 * counts.
1421 *
1422 * For rescan we want to set old_refcnt to seq so our
1423 * exclusive calculations end up correct.
1424 */
1425 if (rescan)
1426 qg->old_refcnt = seq;
1427 else if (qg->old_refcnt < seq)
1428 qg->old_refcnt = seq + 1;
1231 else 1429 else
1232 ++qg->refcnt; 1430 qg->old_refcnt++;
1233 1431
1432 if (qg->new_refcnt < seq)
1433 qg->new_refcnt = seq + 1;
1434 else
1435 qg->new_refcnt++;
1234 list_for_each_entry(glist, &qg->groups, next_group) { 1436 list_for_each_entry(glist, &qg->groups, next_group) {
1437 ret = ulist_add(qgroups, glist->group->qgroupid,
1438 ptr_to_u64(glist->group),
1439 GFP_ATOMIC);
1440 if (ret < 0)
1441 return ret;
1235 ret = ulist_add(tmp, glist->group->qgroupid, 1442 ret = ulist_add(tmp, glist->group->qgroupid,
1236 (u64)(uintptr_t)glist->group, 1443 ptr_to_u64(glist->group),
1237 GFP_ATOMIC); 1444 GFP_ATOMIC);
1238 if (ret < 0) 1445 if (ret < 0)
1239 return ret; 1446 return ret;
1240 } 1447 }
1241 } 1448 }
1242 } 1449 }
1450 return 0;
1451}
1243 1452
1453/*
1454 * We need to walk forward in our operation tree and account for any roots that
1455 * were deleted after we made this operation.
1456 */
1457static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1458 struct btrfs_qgroup_operation *oper,
1459 struct ulist *tmp,
1460 struct ulist *qgroups, u64 seq,
1461 int *old_roots)
1462{
1463 struct ulist_node *unode;
1464 struct ulist_iterator uiter;
1465 struct btrfs_qgroup *qg;
1466 struct btrfs_qgroup_operation *tmp_oper;
1467 struct rb_node *n;
1468 int ret;
1469
1470 ulist_reinit(tmp);
1471
1472 /*
1473 * We only walk forward in the tree since we're only interested in
1474 * removals that happened _after_ our operation.
1475 */
1476 spin_lock(&fs_info->qgroup_op_lock);
1477 n = rb_next(&oper->n);
1478 spin_unlock(&fs_info->qgroup_op_lock);
1479 if (!n)
1480 return 0;
1481 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1482 while (tmp_oper->bytenr == oper->bytenr) {
1483 /*
1484 * If it's not a removal we don't care, additions work out
1485 * properly with our refcnt tracking.
1486 */
1487 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1488 tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1489 goto next;
1490 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1491 if (!qg)
1492 goto next;
1493 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1494 GFP_ATOMIC);
1495 if (ret) {
1496 if (ret < 0)
1497 return ret;
1498 /*
1499 * We only want to increase old_roots if this qgroup is
1500 * not already in the list of qgroups. If it is already
1501 * there then that means it must have been re-added or
1502 * the delete will be discarded because we had an
1503 * existing ref that we haven't looked up yet. In this
1504 * case we don't want to increase old_roots. So if ret
1505 * == 1 then we know that this is the first time we've
1506 * seen this qgroup and we can bump the old_roots.
1507 */
1508 (*old_roots)++;
1509 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1510 GFP_ATOMIC);
1511 if (ret < 0)
1512 return ret;
1513 }
1514next:
1515 spin_lock(&fs_info->qgroup_op_lock);
1516 n = rb_next(&tmp_oper->n);
1517 spin_unlock(&fs_info->qgroup_op_lock);
1518 if (!n)
1519 break;
1520 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1521 }
1522
1523 /* Ok now process the qgroups we found */
1524 ULIST_ITER_INIT(&uiter);
1525 while ((unode = ulist_next(tmp, &uiter))) {
1526 struct btrfs_qgroup_list *glist;
1527
1528 qg = u64_to_ptr(unode->aux);
1529 if (qg->old_refcnt < seq)
1530 qg->old_refcnt = seq + 1;
1531 else
1532 qg->old_refcnt++;
1533 if (qg->new_refcnt < seq)
1534 qg->new_refcnt = seq + 1;
1535 else
1536 qg->new_refcnt++;
1537 list_for_each_entry(glist, &qg->groups, next_group) {
1538 ret = ulist_add(qgroups, glist->group->qgroupid,
1539 ptr_to_u64(glist->group), GFP_ATOMIC);
1540 if (ret < 0)
1541 return ret;
1542 ret = ulist_add(tmp, glist->group->qgroupid,
1543 ptr_to_u64(glist->group), GFP_ATOMIC);
1544 if (ret < 0)
1545 return ret;
1546 }
1547 }
1244 return 0; 1548 return 0;
1245} 1549}
1246 1550
1247static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info, 1551/* Add refcnt for the newly added reference. */
1248 struct ulist *roots, struct ulist *tmp, 1552static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1249 u64 seq, int sgn, u64 num_bytes, 1553 struct btrfs_qgroup_operation *oper,
1250 struct btrfs_qgroup *qgroup) 1554 struct btrfs_qgroup *qgroup,
1555 struct ulist *tmp, struct ulist *qgroups,
1556 u64 seq)
1251{ 1557{
1252 struct ulist_node *unode; 1558 struct ulist_node *unode;
1253 struct ulist_iterator uiter; 1559 struct ulist_iterator uiter;
1254 struct btrfs_qgroup *qg; 1560 struct btrfs_qgroup *qg;
1255 struct btrfs_qgroup_list *glist;
1256 int ret; 1561 int ret;
1257 1562
1258 ulist_reinit(tmp); 1563 ulist_reinit(tmp);
1259 ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1564 ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1565 GFP_ATOMIC);
1566 if (ret < 0)
1567 return ret;
1568 ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1569 GFP_ATOMIC);
1260 if (ret < 0) 1570 if (ret < 0)
1261 return ret; 1571 return ret;
1262
1263 ULIST_ITER_INIT(&uiter); 1572 ULIST_ITER_INIT(&uiter);
1264 while ((unode = ulist_next(tmp, &uiter))) { 1573 while ((unode = ulist_next(tmp, &uiter))) {
1265 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1574 struct btrfs_qgroup_list *glist;
1266 if (qg->refcnt < seq) {
1267 /* not visited by step 1 */
1268 qg->rfer += sgn * num_bytes;
1269 qg->rfer_cmpr += sgn * num_bytes;
1270 if (roots->nnodes == 0) {
1271 qg->excl += sgn * num_bytes;
1272 qg->excl_cmpr += sgn * num_bytes;
1273 }
1274 qgroup_dirty(fs_info, qg);
1275 }
1276 WARN_ON(qg->tag >= seq);
1277 qg->tag = seq;
1278 1575
1576 qg = u64_to_ptr(unode->aux);
1577 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1578 if (qg->new_refcnt < seq)
1579 qg->new_refcnt = seq + 1;
1580 else
1581 qg->new_refcnt++;
1582 } else {
1583 if (qg->old_refcnt < seq)
1584 qg->old_refcnt = seq + 1;
1585 else
1586 qg->old_refcnt++;
1587 }
1279 list_for_each_entry(glist, &qg->groups, next_group) { 1588 list_for_each_entry(glist, &qg->groups, next_group) {
1280 ret = ulist_add(tmp, glist->group->qgroupid, 1589 ret = ulist_add(tmp, glist->group->qgroupid,
1281 (uintptr_t)glist->group, GFP_ATOMIC); 1590 ptr_to_u64(glist->group), GFP_ATOMIC);
1591 if (ret < 0)
1592 return ret;
1593 ret = ulist_add(qgroups, glist->group->qgroupid,
1594 ptr_to_u64(glist->group), GFP_ATOMIC);
1282 if (ret < 0) 1595 if (ret < 0)
1283 return ret; 1596 return ret;
1284 } 1597 }
1285 } 1598 }
1286
1287 return 0; 1599 return 0;
1288} 1600}
1289 1601
1290static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info, 1602/*
1291 struct ulist *roots, struct ulist *tmp, 1603 * This adjusts the counters for all referenced qgroups if need be.
1292 u64 seq, int sgn, u64 num_bytes) 1604 */
1605static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1606 u64 root_to_skip, u64 num_bytes,
1607 struct ulist *qgroups, u64 seq,
1608 int old_roots, int new_roots, int rescan)
1293{ 1609{
1294 struct ulist_node *unode; 1610 struct ulist_node *unode;
1295 struct ulist_iterator uiter; 1611 struct ulist_iterator uiter;
1296 struct btrfs_qgroup *qg; 1612 struct btrfs_qgroup *qg;
1297 struct ulist_node *tmp_unode; 1613 u64 cur_new_count, cur_old_count;
1298 struct ulist_iterator tmp_uiter;
1299 int ret;
1300 1614
1301 ULIST_ITER_INIT(&uiter); 1615 ULIST_ITER_INIT(&uiter);
1302 while ((unode = ulist_next(roots, &uiter))) { 1616 while ((unode = ulist_next(qgroups, &uiter))) {
1303 qg = find_qgroup_rb(fs_info, unode->val); 1617 bool dirty = false;
1304 if (!qg)
1305 continue;
1306 1618
1307 ulist_reinit(tmp); 1619 qg = u64_to_ptr(unode->aux);
1308 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); 1620 /*
1309 if (ret < 0) 1621 * Wasn't referenced before but is now, add to the reference
1310 return ret; 1622 * counters.
1623 */
1624 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
1625 qg->rfer += num_bytes;
1626 qg->rfer_cmpr += num_bytes;
1627 dirty = true;
1628 }
1311 1629
1312 ULIST_ITER_INIT(&tmp_uiter); 1630 /*
1313 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1631 * Was referenced before but isn't now, subtract from the
1314 struct btrfs_qgroup_list *glist; 1632 * reference counters.
1633 */
1634 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
1635 qg->rfer -= num_bytes;
1636 qg->rfer_cmpr -= num_bytes;
1637 dirty = true;
1638 }
1315 1639
1316 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; 1640 if (qg->old_refcnt < seq)
1317 if (qg->tag == seq) 1641 cur_old_count = 0;
1318 continue; 1642 else
1643 cur_old_count = qg->old_refcnt - seq;
1644 if (qg->new_refcnt < seq)
1645 cur_new_count = 0;
1646 else
1647 cur_new_count = qg->new_refcnt - seq;
1319 1648
1320 if (qg->refcnt - seq == roots->nnodes) { 1649 /*
1321 qg->excl -= sgn * num_bytes; 1650 * If our refcount was the same as the roots previously but our
1322 qg->excl_cmpr -= sgn * num_bytes; 1651 * new count isn't the same as the number of roots now then we
1323 qgroup_dirty(fs_info, qg); 1652 * went from having a exclusive reference on this range to not.
1324 } 1653 */
1654 if (old_roots && cur_old_count == old_roots &&
1655 (cur_new_count != new_roots || new_roots == 0)) {
1656 WARN_ON(cur_new_count != new_roots && new_roots == 0);
1657 qg->excl -= num_bytes;
1658 qg->excl_cmpr -= num_bytes;
1659 dirty = true;
1660 }
1325 1661
1326 list_for_each_entry(glist, &qg->groups, next_group) { 1662 /*
1327 ret = ulist_add(tmp, glist->group->qgroupid, 1663 * If we didn't reference all the roots before but now we do we
1328 (uintptr_t)glist->group, 1664 * have an exclusive reference to this range.
1329 GFP_ATOMIC); 1665 */
1330 if (ret < 0) 1666 if ((!old_roots || (old_roots && cur_old_count != old_roots))
1331 return ret; 1667 && cur_new_count == new_roots) {
1332 } 1668 qg->excl += num_bytes;
1669 qg->excl_cmpr += num_bytes;
1670 dirty = true;
1333 } 1671 }
1334 }
1335 1672
1673 if (dirty)
1674 qgroup_dirty(fs_info, qg);
1675 }
1336 return 0; 1676 return 0;
1337} 1677}
1338 1678
1339/* 1679/*
1340 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 1680 * If we removed a data extent and there were other references for that bytenr
1341 * from the fs. First, all roots referencing the extent are searched, and 1681 * then we need to lookup all referenced roots to make sure we still don't
1342 * then the space is accounted accordingly to the different roots. The 1682 * reference this bytenr. If we do then we can just discard this operation.
1343 * accounting algorithm works in 3 steps documented inline.
1344 */ 1683 */
1345int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, 1684static int check_existing_refs(struct btrfs_trans_handle *trans,
1346 struct btrfs_fs_info *fs_info, 1685 struct btrfs_fs_info *fs_info,
1347 struct btrfs_delayed_ref_node *node, 1686 struct btrfs_qgroup_operation *oper)
1348 struct btrfs_delayed_extent_op *extent_op)
1349{ 1687{
1350 struct btrfs_root *quota_root;
1351 u64 ref_root;
1352 struct btrfs_qgroup *qgroup;
1353 struct ulist *roots = NULL; 1688 struct ulist *roots = NULL;
1354 u64 seq; 1689 struct ulist_node *unode;
1690 struct ulist_iterator uiter;
1355 int ret = 0; 1691 int ret = 0;
1356 int sgn;
1357 1692
1358 if (!fs_info->quota_enabled) 1693 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1359 return 0; 1694 oper->elem.seq, &roots);
1360 1695 if (ret < 0)
1361 BUG_ON(!fs_info->quota_root); 1696 return ret;
1697 ret = 0;
1362 1698
1363 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1699 ULIST_ITER_INIT(&uiter);
1364 node->type == BTRFS_SHARED_BLOCK_REF_KEY) { 1700 while ((unode = ulist_next(roots, &uiter))) {
1365 struct btrfs_delayed_tree_ref *ref; 1701 if (unode->val == oper->ref_root) {
1366 ref = btrfs_delayed_node_to_tree_ref(node); 1702 ret = 1;
1367 ref_root = ref->root; 1703 break;
1368 } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1704 }
1369 node->type == BTRFS_SHARED_DATA_REF_KEY) {
1370 struct btrfs_delayed_data_ref *ref;
1371 ref = btrfs_delayed_node_to_data_ref(node);
1372 ref_root = ref->root;
1373 } else {
1374 BUG();
1375 } 1705 }
1706 ulist_free(roots);
1707 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1376 1708
1377 if (!is_fstree(ref_root)) { 1709 return ret;
1378 /* 1710}
1379 * non-fs-trees are not being accounted
1380 */
1381 return 0;
1382 }
1383 1711
1384 switch (node->action) { 1712/*
1385 case BTRFS_ADD_DELAYED_REF: 1713 * If we share a reference across multiple roots then we may need to adjust
1386 case BTRFS_ADD_DELAYED_EXTENT: 1714 * various qgroups referenced and exclusive counters. The basic premise is this
1387 sgn = 1; 1715 *
1388 seq = btrfs_tree_mod_seq_prev(node->seq); 1716 * 1) We have seq to represent a 0 count. Instead of looping through all of the
1389 break; 1717 * qgroups and resetting their refcount to 0 we just constantly bump this
1390 case BTRFS_DROP_DELAYED_REF: 1718 * sequence number to act as the base reference count. This means that if
1391 sgn = -1; 1719 * anybody is equal to or below this sequence they were never referenced. We
1392 seq = node->seq; 1720 * jack this sequence up by the number of roots we found each time in order to
1393 break; 1721 * make sure we don't have any overlap.
1394 case BTRFS_UPDATE_DELAYED_HEAD: 1722 *
1395 return 0; 1723 * 2) We first search all the roots that reference the area _except_ the root
1396 default: 1724 * we're acting on currently. This makes up the old_refcnt of all the qgroups
1397 BUG(); 1725 * before.
1398 } 1726 *
1727 * 3) We walk all of the qgroups referenced by the root we are currently acting
1728 * on, and will either adjust old_refcnt in the case of a removal or the
1729 * new_refcnt in the case of an addition.
1730 *
1731 * 4) Finally we walk all the qgroups that are referenced by this range
1732 * including the root we are acting on currently. We will adjust the counters
1733 * based on the number of roots we had and will have after this operation.
1734 *
1735 * Take this example as an illustration
1736 *
1737 * [qgroup 1/0]
1738 * / | \
1739 * [qg 0/0] [qg 0/1] [qg 0/2]
1740 * \ | /
1741 * [ extent ]
1742 *
1743 * Say we are adding a reference that is covered by qg 0/0. The first step
1744 * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
1745 * old_roots being 2. Because it is adding new_roots will be 1. We then go
1746 * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
1747 * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
1748 * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
1749 * reference and thus must add the size to the referenced bytes. Everything
1750 * else is the same so nothing else changes.
1751 */
1752static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1753 struct btrfs_fs_info *fs_info,
1754 struct btrfs_qgroup_operation *oper)
1755{
1756 struct ulist *roots = NULL;
1757 struct ulist *qgroups, *tmp;
1758 struct btrfs_qgroup *qgroup;
1759 struct seq_list elem = {};
1760 u64 seq;
1761 int old_roots = 0;
1762 int new_roots = 0;
1763 int ret = 0;
1399 1764
1400 mutex_lock(&fs_info->qgroup_rescan_lock); 1765 if (oper->elem.seq) {
1401 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1766 ret = check_existing_refs(trans, fs_info, oper);
1402 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { 1767 if (ret < 0)
1403 mutex_unlock(&fs_info->qgroup_rescan_lock); 1768 return ret;
1769 if (ret)
1404 return 0; 1770 return 0;
1405 }
1406 } 1771 }
1407 mutex_unlock(&fs_info->qgroup_rescan_lock);
1408 1772
1409 /* 1773 qgroups = ulist_alloc(GFP_NOFS);
1410 * the delayed ref sequence number we pass depends on the direction of 1774 if (!qgroups)
1411 * the operation. for add operations, we pass 1775 return -ENOMEM;
1412 * tree_mod_log_prev_seq(node->seq) to skip
1413 * the delayed ref's current sequence number, because we need the state
1414 * of the tree before the add operation. for delete operations, we pass
1415 * (node->seq) to include the delayed ref's current sequence number,
1416 * because we need the state of the tree after the delete operation.
1417 */
1418 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1419 if (ret < 0)
1420 return ret;
1421
1422 spin_lock(&fs_info->qgroup_lock);
1423 1776
1424 quota_root = fs_info->quota_root; 1777 tmp = ulist_alloc(GFP_NOFS);
1425 if (!quota_root) 1778 if (!tmp)
1426 goto unlock; 1779 return -ENOMEM;
1427 1780
1428 qgroup = find_qgroup_rb(fs_info, ref_root); 1781 btrfs_get_tree_mod_seq(fs_info, &elem);
1782 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
1783 &roots);
1784 btrfs_put_tree_mod_seq(fs_info, &elem);
1785 if (ret < 0) {
1786 ulist_free(qgroups);
1787 ulist_free(tmp);
1788 return ret;
1789 }
1790 spin_lock(&fs_info->qgroup_lock);
1791 qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1429 if (!qgroup) 1792 if (!qgroup)
1430 goto unlock; 1793 goto out;
1794 seq = fs_info->qgroup_seq;
1431 1795
1432 /* 1796 /*
1433 * step 1: for each old ref, visit all nodes once and inc refcnt 1797 * So roots is the list of all the roots currently pointing at the
1798 * bytenr, including the ref we are adding if we are adding, or not if
1799 * we are removing a ref. So we pass in the ref_root to skip that root
1800 * in our calculations. We set old_refnct and new_refcnt cause who the
1801 * hell knows what everything looked like before, and it doesn't matter
1802 * except...
1434 */ 1803 */
1435 ulist_reinit(fs_info->qgroup_ulist); 1804 ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
1436 seq = fs_info->qgroup_seq; 1805 seq, &old_roots, 0);
1437 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 1806 if (ret < 0)
1807 goto out;
1438 1808
1439 ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist, 1809 /*
1440 seq); 1810 * Now adjust the refcounts of the qgroups that care about this
1441 if (ret) 1811 * reference, either the old_count in the case of removal or new_count
1442 goto unlock; 1812 * in the case of an addition.
1813 */
1814 ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
1815 seq);
1816 if (ret < 0)
1817 goto out;
1443 1818
1444 /* 1819 /*
1445 * step 2: walk from the new root 1820 * ...in the case of removals. If we had a removal before we got around
1821 * to processing this operation then we need to find that guy and count
1822 * his references as if they really existed so we don't end up screwing
1823 * up the exclusive counts. Then whenever we go to process the delete
1824 * everything will be grand and we can account for whatever exclusive
1825 * changes need to be made there. We also have to pass in old_roots so
1826 * we have an accurate count of the roots as it pertains to this
1827 * operations view of the world.
1446 */ 1828 */
1447 ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist, 1829 ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
1448 seq, sgn, node->num_bytes, qgroup); 1830 &old_roots);
1449 if (ret) 1831 if (ret < 0)
1450 goto unlock; 1832 goto out;
1451 1833
1452 /* 1834 /*
1453 * step 3: walk again from old refs 1835 * We are adding our root, need to adjust up the number of roots,
1836 * otherwise old_roots is the number of roots we want.
1454 */ 1837 */
1455 ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist, 1838 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1456 seq, sgn, node->num_bytes); 1839 new_roots = old_roots + 1;
1457 if (ret) 1840 } else {
1458 goto unlock; 1841 new_roots = old_roots;
1842 old_roots++;
1843 }
1844 fs_info->qgroup_seq += old_roots + 1;
1459 1845
1460unlock: 1846
1847 /*
1848 * And now the magic happens, bless Arne for having a pretty elegant
1849 * solution for this.
1850 */
1851 qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
1852 qgroups, seq, old_roots, new_roots, 0);
1853out:
1461 spin_unlock(&fs_info->qgroup_lock); 1854 spin_unlock(&fs_info->qgroup_lock);
1855 ulist_free(qgroups);
1462 ulist_free(roots); 1856 ulist_free(roots);
1857 ulist_free(tmp);
1858 return ret;
1859}
1860
1861/*
1862 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1863 * from the fs. First, all roots referencing the extent are searched, and
1864 * then the space is accounted accordingly to the different roots. The
1865 * accounting algorithm works in 3 steps documented inline.
1866 */
1867static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1868 struct btrfs_fs_info *fs_info,
1869 struct btrfs_qgroup_operation *oper)
1870{
1871 int ret = 0;
1872
1873 if (!fs_info->quota_enabled)
1874 return 0;
1875
1876 BUG_ON(!fs_info->quota_root);
1877
1878 mutex_lock(&fs_info->qgroup_rescan_lock);
1879 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1880 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
1881 mutex_unlock(&fs_info->qgroup_rescan_lock);
1882 return 0;
1883 }
1884 }
1885 mutex_unlock(&fs_info->qgroup_rescan_lock);
1886
1887 ASSERT(is_fstree(oper->ref_root));
1888
1889 switch (oper->type) {
1890 case BTRFS_QGROUP_OPER_ADD_EXCL:
1891 case BTRFS_QGROUP_OPER_SUB_EXCL:
1892 ret = qgroup_excl_accounting(fs_info, oper);
1893 break;
1894 case BTRFS_QGROUP_OPER_ADD_SHARED:
1895 case BTRFS_QGROUP_OPER_SUB_SHARED:
1896 ret = qgroup_shared_accounting(trans, fs_info, oper);
1897 break;
1898 default:
1899 ASSERT(0);
1900 }
1901 return ret;
1902}
1463 1903
1904/*
1905 * Needs to be called everytime we run delayed refs, even if there is an error
1906 * in order to cleanup outstanding operations.
1907 */
1908int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
1909 struct btrfs_fs_info *fs_info)
1910{
1911 struct btrfs_qgroup_operation *oper;
1912 int ret = 0;
1913
1914 while (!list_empty(&trans->qgroup_ref_list)) {
1915 oper = list_first_entry(&trans->qgroup_ref_list,
1916 struct btrfs_qgroup_operation, list);
1917 list_del_init(&oper->list);
1918 if (!ret || !trans->aborted)
1919 ret = btrfs_qgroup_account(trans, fs_info, oper);
1920 spin_lock(&fs_info->qgroup_op_lock);
1921 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
1922 spin_unlock(&fs_info->qgroup_op_lock);
1923 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1924 kfree(oper);
1925 }
1464 return ret; 1926 return ret;
1465} 1927}
1466 1928
@@ -1629,8 +2091,16 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1629 srcgroup = find_qgroup_rb(fs_info, srcid); 2091 srcgroup = find_qgroup_rb(fs_info, srcid);
1630 if (!srcgroup) 2092 if (!srcgroup)
1631 goto unlock; 2093 goto unlock;
1632 dstgroup->rfer = srcgroup->rfer - level_size; 2094
1633 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; 2095 /*
2096 * We call inherit after we clone the root in order to make sure
2097 * our counts don't go crazy, so at this point the only
2098 * difference between the two roots should be the root node.
2099 */
2100 dstgroup->rfer = srcgroup->rfer;
2101 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2102 dstgroup->excl = level_size;
2103 dstgroup->excl_cmpr = level_size;
1634 srcgroup->excl = level_size; 2104 srcgroup->excl = level_size;
1635 srcgroup->excl_cmpr = level_size; 2105 srcgroup->excl_cmpr = level_size;
1636 qgroup_dirty(fs_info, dstgroup); 2106 qgroup_dirty(fs_info, dstgroup);
@@ -1734,7 +2204,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1734 struct btrfs_qgroup *qg; 2204 struct btrfs_qgroup *qg;
1735 struct btrfs_qgroup_list *glist; 2205 struct btrfs_qgroup_list *glist;
1736 2206
1737 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 2207 qg = u64_to_ptr(unode->aux);
1738 2208
1739 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2209 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1740 qg->reserved + (s64)qg->rfer + num_bytes > 2210 qg->reserved + (s64)qg->rfer + num_bytes >
@@ -1766,7 +2236,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1766 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2236 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1767 struct btrfs_qgroup *qg; 2237 struct btrfs_qgroup *qg;
1768 2238
1769 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 2239 qg = u64_to_ptr(unode->aux);
1770 2240
1771 qg->reserved += num_bytes; 2241 qg->reserved += num_bytes;
1772 } 2242 }
@@ -1812,7 +2282,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1812 struct btrfs_qgroup *qg; 2282 struct btrfs_qgroup *qg;
1813 struct btrfs_qgroup_list *glist; 2283 struct btrfs_qgroup_list *glist;
1814 2284
1815 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 2285 qg = u64_to_ptr(unode->aux);
1816 2286
1817 qg->reserved -= num_bytes; 2287 qg->reserved -= num_bytes;
1818 2288
@@ -1848,15 +2318,15 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1848 */ 2318 */
1849static int 2319static int
1850qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2320qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1851 struct btrfs_trans_handle *trans, struct ulist *tmp, 2321 struct btrfs_trans_handle *trans, struct ulist *qgroups,
1852 struct extent_buffer *scratch_leaf) 2322 struct ulist *tmp, struct extent_buffer *scratch_leaf)
1853{ 2323{
1854 struct btrfs_key found; 2324 struct btrfs_key found;
1855 struct ulist *roots = NULL; 2325 struct ulist *roots = NULL;
1856 struct ulist_node *unode;
1857 struct ulist_iterator uiter;
1858 struct seq_list tree_mod_seq_elem = {}; 2326 struct seq_list tree_mod_seq_elem = {};
2327 u64 num_bytes;
1859 u64 seq; 2328 u64 seq;
2329 int new_roots;
1860 int slot; 2330 int slot;
1861 int ret; 2331 int ret;
1862 2332
@@ -1897,8 +2367,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1897 mutex_unlock(&fs_info->qgroup_rescan_lock); 2367 mutex_unlock(&fs_info->qgroup_rescan_lock);
1898 2368
1899 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2369 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900 u64 num_bytes;
1901
1902 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2370 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1903 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2371 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904 found.type != BTRFS_METADATA_ITEM_KEY) 2372 found.type != BTRFS_METADATA_ITEM_KEY)
@@ -1908,76 +2376,34 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1908 else 2376 else
1909 num_bytes = found.offset; 2377 num_bytes = found.offset;
1910 2378
1911 ret = btrfs_find_all_roots(trans, fs_info, found.objectid, 2379 ulist_reinit(qgroups);
1912 tree_mod_seq_elem.seq, &roots); 2380 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2381 &roots);
1913 if (ret < 0) 2382 if (ret < 0)
1914 goto out; 2383 goto out;
1915 spin_lock(&fs_info->qgroup_lock); 2384 spin_lock(&fs_info->qgroup_lock);
1916 seq = fs_info->qgroup_seq; 2385 seq = fs_info->qgroup_seq;
1917 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 2386 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1918 2387
1919 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); 2388 new_roots = 0;
1920 if (ret) { 2389 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2390 seq, &new_roots, 1);
2391 if (ret < 0) {
1921 spin_unlock(&fs_info->qgroup_lock); 2392 spin_unlock(&fs_info->qgroup_lock);
1922 ulist_free(roots); 2393 ulist_free(roots);
1923 goto out; 2394 goto out;
1924 } 2395 }
1925 2396
1926 /* 2397 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
1927 * step2 of btrfs_qgroup_account_ref works from a single root, 2398 seq, 0, new_roots, 1);
1928 * we're doing all at once here. 2399 if (ret < 0) {
1929 */ 2400 spin_unlock(&fs_info->qgroup_lock);
1930 ulist_reinit(tmp); 2401 ulist_free(roots);
1931 ULIST_ITER_INIT(&uiter); 2402 goto out;
1932 while ((unode = ulist_next(roots, &uiter))) {
1933 struct btrfs_qgroup *qg;
1934
1935 qg = find_qgroup_rb(fs_info, unode->val);
1936 if (!qg)
1937 continue;
1938
1939 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1940 GFP_ATOMIC);
1941 if (ret < 0) {
1942 spin_unlock(&fs_info->qgroup_lock);
1943 ulist_free(roots);
1944 goto out;
1945 }
1946 }
1947
1948 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1949 ULIST_ITER_INIT(&uiter);
1950 while ((unode = ulist_next(tmp, &uiter))) {
1951 struct btrfs_qgroup *qg;
1952 struct btrfs_qgroup_list *glist;
1953
1954 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1955 qg->rfer += num_bytes;
1956 qg->rfer_cmpr += num_bytes;
1957 WARN_ON(qg->tag >= seq);
1958 if (qg->refcnt - seq == roots->nnodes) {
1959 qg->excl += num_bytes;
1960 qg->excl_cmpr += num_bytes;
1961 }
1962 qgroup_dirty(fs_info, qg);
1963
1964 list_for_each_entry(glist, &qg->groups, next_group) {
1965 ret = ulist_add(tmp, glist->group->qgroupid,
1966 (uintptr_t)glist->group,
1967 GFP_ATOMIC);
1968 if (ret < 0) {
1969 spin_unlock(&fs_info->qgroup_lock);
1970 ulist_free(roots);
1971 goto out;
1972 }
1973 }
1974 } 2403 }
1975
1976 spin_unlock(&fs_info->qgroup_lock); 2404 spin_unlock(&fs_info->qgroup_lock);
1977 ulist_free(roots); 2405 ulist_free(roots);
1978 ret = 0;
1979 } 2406 }
1980
1981out: 2407out:
1982 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2408 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1983 2409
@@ -1990,13 +2416,16 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1990 qgroup_rescan_work); 2416 qgroup_rescan_work);
1991 struct btrfs_path *path; 2417 struct btrfs_path *path;
1992 struct btrfs_trans_handle *trans = NULL; 2418 struct btrfs_trans_handle *trans = NULL;
1993 struct ulist *tmp = NULL; 2419 struct ulist *tmp = NULL, *qgroups = NULL;
1994 struct extent_buffer *scratch_leaf = NULL; 2420 struct extent_buffer *scratch_leaf = NULL;
1995 int err = -ENOMEM; 2421 int err = -ENOMEM;
1996 2422
1997 path = btrfs_alloc_path(); 2423 path = btrfs_alloc_path();
1998 if (!path) 2424 if (!path)
1999 goto out; 2425 goto out;
2426 qgroups = ulist_alloc(GFP_NOFS);
2427 if (!qgroups)
2428 goto out;
2000 tmp = ulist_alloc(GFP_NOFS); 2429 tmp = ulist_alloc(GFP_NOFS);
2001 if (!tmp) 2430 if (!tmp)
2002 goto out; 2431 goto out;
@@ -2015,7 +2444,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2015 err = -EINTR; 2444 err = -EINTR;
2016 } else { 2445 } else {
2017 err = qgroup_rescan_leaf(fs_info, path, trans, 2446 err = qgroup_rescan_leaf(fs_info, path, trans,
2018 tmp, scratch_leaf); 2447 qgroups, tmp, scratch_leaf);
2019 } 2448 }
2020 if (err > 0) 2449 if (err > 0)
2021 btrfs_commit_transaction(trans, fs_info->fs_root); 2450 btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2025,7 +2454,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2025 2454
2026out: 2455out:
2027 kfree(scratch_leaf); 2456 kfree(scratch_leaf);
2028 ulist_free(tmp); 2457 ulist_free(qgroups);
2029 btrfs_free_path(path); 2458 btrfs_free_path(path);
2030 2459
2031 mutex_lock(&fs_info->qgroup_rescan_lock); 2460 mutex_lock(&fs_info->qgroup_rescan_lock);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
new file mode 100644
index 000000000000..5952ff1fbd7a
--- /dev/null
+++ b/fs/btrfs/qgroup.h
@@ -0,0 +1,107 @@
1/*
2 * Copyright (C) 2014 Facebook. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_QGROUP__
20#define __BTRFS_QGROUP__
21
22/*
23 * A description of the operations, all of these operations only happen when we
24 * are adding the 1st reference for that subvolume in the case of adding space
25 * or on the last reference delete in the case of subtraction. The only
26 * exception is the last one, which is added for confusion.
27 *
28 * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
29 * one pointing at the bytes we are adding. This is called on the first
30 * allocation.
31 *
32 * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
33 * shared between subvols. This is called on the creation of a ref that already
34 * has refs from a different subvolume, so basically reflink.
35 *
36 * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
37 * one referencing the range.
38 *
39 * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
40 * refs with other subvolumes.
41 */
42enum btrfs_qgroup_operation_type {
43 BTRFS_QGROUP_OPER_ADD_EXCL,
44 BTRFS_QGROUP_OPER_ADD_SHARED,
45 BTRFS_QGROUP_OPER_SUB_EXCL,
46 BTRFS_QGROUP_OPER_SUB_SHARED,
47};
48
49struct btrfs_qgroup_operation {
50 u64 ref_root;
51 u64 bytenr;
52 u64 num_bytes;
53 u64 seq;
54 enum btrfs_qgroup_operation_type type;
55 struct seq_list elem;
56 struct rb_node n;
57 struct list_head list;
58};
59
60int btrfs_quota_enable(struct btrfs_trans_handle *trans,
61 struct btrfs_fs_info *fs_info);
62int btrfs_quota_disable(struct btrfs_trans_handle *trans,
63 struct btrfs_fs_info *fs_info);
64int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
65void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
66int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
67int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
68 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
69int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
70 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
71int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
72 struct btrfs_fs_info *fs_info, u64 qgroupid,
73 char *name);
74int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
75 struct btrfs_fs_info *fs_info, u64 qgroupid);
76int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
77 struct btrfs_fs_info *fs_info, u64 qgroupid,
78 struct btrfs_qgroup_limit *limit);
79int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
80void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
81struct btrfs_delayed_extent_op;
82int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
83 struct btrfs_fs_info *fs_info, u64 ref_root,
84 u64 bytenr, u64 num_bytes,
85 enum btrfs_qgroup_operation_type type,
86 int mod_seq);
87int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
88 struct btrfs_fs_info *fs_info);
89void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
90 struct btrfs_fs_info *fs_info,
91 struct btrfs_qgroup_operation *oper);
92int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
93 struct btrfs_fs_info *fs_info);
94int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
95 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
96 struct btrfs_qgroup_inherit *inherit);
97int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
98void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
99
100void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
101
102#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
103int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
104 u64 rfer, u64 excl);
105#endif
106
107#endif /* __BTRFS_QGROUP__ */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7c4c049da871..3aafbde8b637 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -31,6 +31,7 @@
31#include "inode-map.h" 31#include "inode-map.h"
32#include "volumes.h" 32#include "volumes.h"
33#include "dev-replace.h" 33#include "dev-replace.h"
34#include "qgroup.h"
34 35
35#define BTRFS_ROOT_TRANS_TAG 0 36#define BTRFS_ROOT_TRANS_TAG 0
36 37
@@ -703,23 +704,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
703 return 0; 704 return 0;
704 } 705 }
705 706
706 /*
707 * do the qgroup accounting as early as possible
708 */
709 err = btrfs_delayed_refs_qgroup_accounting(trans, info);
710
711 btrfs_trans_release_metadata(trans, root); 707 btrfs_trans_release_metadata(trans, root);
712 trans->block_rsv = NULL; 708 trans->block_rsv = NULL;
713 709
714 if (trans->qgroup_reserved) {
715 /*
716 * the same root has to be passed here between start_transaction
717 * and end_transaction. Subvolume quota depends on this.
718 */
719 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
720 trans->qgroup_reserved = 0;
721 }
722
723 if (!list_empty(&trans->new_bgs)) 710 if (!list_empty(&trans->new_bgs))
724 btrfs_create_pending_block_groups(trans, root); 711 btrfs_create_pending_block_groups(trans, root);
725 712
@@ -730,6 +717,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
730 btrfs_run_delayed_refs(trans, root, cur); 717 btrfs_run_delayed_refs(trans, root, cur);
731 } 718 }
732 719
720 if (trans->qgroup_reserved) {
721 /*
722 * the same root has to be passed here between start_transaction
723 * and end_transaction. Subvolume quota depends on this.
724 */
725 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
726 trans->qgroup_reserved = 0;
727 }
728
733 btrfs_trans_release_metadata(trans, root); 729 btrfs_trans_release_metadata(trans, root);
734 trans->block_rsv = NULL; 730 trans->block_rsv = NULL;
735 731
@@ -1169,12 +1165,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1169 goto no_free_objectid; 1165 goto no_free_objectid;
1170 } 1166 }
1171 1167
1172 pending->error = btrfs_qgroup_inherit(trans, fs_info,
1173 root->root_key.objectid,
1174 objectid, pending->inherit);
1175 if (pending->error)
1176 goto no_free_objectid;
1177
1178 key.objectid = objectid; 1168 key.objectid = objectid;
1179 key.offset = (u64)-1; 1169 key.offset = (u64)-1;
1180 key.type = BTRFS_ROOT_ITEM_KEY; 1170 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1271,6 +1261,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1271 goto fail; 1261 goto fail;
1272 } 1262 }
1273 1263
1264 /*
1265 * We need to flush delayed refs in order to make sure all of our quota
1266 * operations have been done before we call btrfs_qgroup_inherit.
1267 */
1268 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1269 if (ret) {
1270 btrfs_abort_transaction(trans, root, ret);
1271 goto fail;
1272 }
1273
1274 pending->error = btrfs_qgroup_inherit(trans, fs_info,
1275 root->root_key.objectid,
1276 objectid, pending->inherit);
1277 if (pending->error)
1278 goto no_free_objectid;
1279
1274 /* see comments in should_cow_block() */ 1280 /* see comments in should_cow_block() */
1275 set_bit(BTRFS_ROOT_FORCE_COW, &root->state); 1281 set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
1276 smp_wmb(); 1282 smp_wmb();
@@ -1599,12 +1605,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1599 * them now so that they hinder processing of more delayed refs 1605 * them now so that they hinder processing of more delayed refs
1600 * as little as possible. 1606 * as little as possible.
1601 */ 1607 */
1602 if (ret) {
1603 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1604 return ret;
1605 }
1606
1607 ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1608 if (ret) 1608 if (ret)
1609 return ret; 1609 return ret;
1610 1610