aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/ctree.c45
-rw-r--r--fs/btrfs/ctree.h59
-rw-r--r--fs/btrfs/delayed-ref.c39
-rw-r--r--fs/btrfs/delayed-ref.h24
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent-tree.c206
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/ioctl.c63
-rw-r--r--fs/btrfs/qgroup.c915
-rw-r--r--fs/btrfs/qgroup.h107
-rw-r--r--fs/btrfs/transaction.c52
12 files changed, 1044 insertions, 479 deletions
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index a910b27a8ad9..94e94429f3e9 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
55int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); 55int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
56 56
57int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 57int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
58 struct btrfs_fs_info *fs_info, u64 bytenr, 58 struct btrfs_fs_info *fs_info, u64 bytenr,
59 u64 time_seq, struct ulist **roots); 59 u64 time_seq, struct ulist **roots);
60char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 60char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
61 u32 name_len, unsigned long name_off, 61 u32 name_len, unsigned long name_off,
62 struct extent_buffer *eb_in, u64 parent, 62 struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2f10e12ae94c..bbbe4f1c5086 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -356,44 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
356} 356}
357 357
358/* 358/*
359 * Increment the upper half of tree_mod_seq, set lower half zero. 359 * Pull a new tree mod seq number for our operation.
360 *
361 * Must be called with fs_info->tree_mod_seq_lock held.
362 */
363static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
364{
365 u64 seq = atomic64_read(&fs_info->tree_mod_seq);
366 seq &= 0xffffffff00000000ull;
367 seq += 1ull << 32;
368 atomic64_set(&fs_info->tree_mod_seq, seq);
369 return seq;
370}
371
372/*
373 * Increment the lower half of tree_mod_seq.
374 *
375 * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
376 * are generated should not technically require a spin lock here. (Rationale:
377 * incrementing the minor while incrementing the major seq number is between its
378 * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
379 * just returns a unique sequence number as usual.) We have decided to leave
380 * that requirement in here and rethink it once we notice it really imposes a
381 * problem on some workload.
382 */ 360 */
383static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) 361static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
384{ 362{
385 return atomic64_inc_return(&fs_info->tree_mod_seq); 363 return atomic64_inc_return(&fs_info->tree_mod_seq);
386} 364}
387 365
388/* 366/*
389 * return the last minor in the previous major tree_mod_seq number
390 */
391u64 btrfs_tree_mod_seq_prev(u64 seq)
392{
393 return (seq & 0xffffffff00000000ull) - 1ull;
394}
395
396/*
397 * This adds a new blocker to the tree mod log's blocker list if the @elem 367 * This adds a new blocker to the tree mod log's blocker list if the @elem
398 * passed does not already have a sequence number set. So when a caller expects 368 * passed does not already have a sequence number set. So when a caller expects
399 * to record tree modifications, it should ensure to set elem->seq to zero 369 * to record tree modifications, it should ensure to set elem->seq to zero
@@ -404,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
404u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 374u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
405 struct seq_list *elem) 375 struct seq_list *elem)
406{ 376{
407 u64 seq;
408
409 tree_mod_log_write_lock(fs_info); 377 tree_mod_log_write_lock(fs_info);
410 spin_lock(&fs_info->tree_mod_seq_lock); 378 spin_lock(&fs_info->tree_mod_seq_lock);
411 if (!elem->seq) { 379 if (!elem->seq) {
412 elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); 380 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
413 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); 381 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
414 } 382 }
415 seq = btrfs_inc_tree_mod_seq_minor(fs_info);
416 spin_unlock(&fs_info->tree_mod_seq_lock); 383 spin_unlock(&fs_info->tree_mod_seq_lock);
417 tree_mod_log_write_unlock(fs_info); 384 tree_mod_log_write_unlock(fs_info);
418 385
419 return seq; 386 return elem->seq;
420} 387}
421 388
422void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 389void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -489,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
489 456
490 BUG_ON(!tm); 457 BUG_ON(!tm);
491 458
492 spin_lock(&fs_info->tree_mod_seq_lock); 459 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
493 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
494 spin_unlock(&fs_info->tree_mod_seq_lock);
495 460
496 tm_root = &fs_info->tree_mod_log; 461 tm_root = &fs_info->tree_mod_log;
497 new = &tm_root->rb_node; 462 new = &tm_root->rb_node;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index efd3bf61696d..06cc384933cc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1648,7 +1648,10 @@ struct btrfs_fs_info {
1648 1648
1649 /* holds configuration and tracking. Protected by qgroup_lock */ 1649 /* holds configuration and tracking. Protected by qgroup_lock */
1650 struct rb_root qgroup_tree; 1650 struct rb_root qgroup_tree;
1651 struct rb_root qgroup_op_tree;
1651 spinlock_t qgroup_lock; 1652 spinlock_t qgroup_lock;
1653 spinlock_t qgroup_op_lock;
1654 atomic_t qgroup_op_seq;
1652 1655
1653 /* 1656 /*
1654 * used to avoid frequently calling ulist_alloc()/ulist_free() 1657 * used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -3300,9 +3303,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3300 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3303 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3301 struct btrfs_key *ins, int is_data); 3304 struct btrfs_key *ins, int is_data);
3302int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3305int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3303 struct extent_buffer *buf, int full_backref, int for_cow); 3306 struct extent_buffer *buf, int full_backref, int no_quota);
3304int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3307int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3305 struct extent_buffer *buf, int full_backref, int for_cow); 3308 struct extent_buffer *buf, int full_backref, int no_quota);
3306int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3309int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3307 struct btrfs_root *root, 3310 struct btrfs_root *root,
3308 u64 bytenr, u64 num_bytes, u64 flags, 3311 u64 bytenr, u64 num_bytes, u64 flags,
@@ -3310,7 +3313,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3310int btrfs_free_extent(struct btrfs_trans_handle *trans, 3313int btrfs_free_extent(struct btrfs_trans_handle *trans,
3311 struct btrfs_root *root, 3314 struct btrfs_root *root,
3312 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3315 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
3313 u64 owner, u64 offset, int for_cow); 3316 u64 owner, u64 offset, int no_quota);
3314 3317
3315int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 3318int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
3316int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 3319int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
@@ -3322,7 +3325,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3322int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 3325int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3323 struct btrfs_root *root, 3326 struct btrfs_root *root,
3324 u64 bytenr, u64 num_bytes, u64 parent, 3327 u64 bytenr, u64 num_bytes, u64 parent,
3325 u64 root_objectid, u64 owner, u64 offset, int for_cow); 3328 u64 root_objectid, u64 owner, u64 offset, int no_quota);
3326 3329
3327int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3330int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3328 struct btrfs_root *root); 3331 struct btrfs_root *root);
@@ -3410,7 +3413,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
3410int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, 3413int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
3411 struct btrfs_fs_info *fs_info); 3414 struct btrfs_fs_info *fs_info);
3412int __get_raid_index(u64 flags); 3415int __get_raid_index(u64 flags);
3413
3414int btrfs_start_nocow_write(struct btrfs_root *root); 3416int btrfs_start_nocow_write(struct btrfs_root *root);
3415void btrfs_end_nocow_write(struct btrfs_root *root); 3417void btrfs_end_nocow_write(struct btrfs_root *root);
3416/* ctree.c */ 3418/* ctree.c */
@@ -3586,7 +3588,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
3586 struct seq_list *elem); 3588 struct seq_list *elem);
3587void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 3589void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3588 struct seq_list *elem); 3590 struct seq_list *elem);
3589u64 btrfs_tree_mod_seq_prev(u64 seq);
3590int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); 3591int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
3591 3592
3592/* root-item.c */ 3593/* root-item.c */
@@ -4094,52 +4095,6 @@ void btrfs_reada_detach(void *handle);
4094int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 4095int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
4095 u64 start, int err); 4096 u64 start, int err);
4096 4097
4097/* qgroup.c */
4098struct qgroup_update {
4099 struct list_head list;
4100 struct btrfs_delayed_ref_node *node;
4101 struct btrfs_delayed_extent_op *extent_op;
4102};
4103
4104int btrfs_quota_enable(struct btrfs_trans_handle *trans,
4105 struct btrfs_fs_info *fs_info);
4106int btrfs_quota_disable(struct btrfs_trans_handle *trans,
4107 struct btrfs_fs_info *fs_info);
4108int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
4109void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
4110int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
4111int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
4112 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
4113int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
4114 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
4115int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
4116 struct btrfs_fs_info *fs_info, u64 qgroupid,
4117 char *name);
4118int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
4119 struct btrfs_fs_info *fs_info, u64 qgroupid);
4120int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
4121 struct btrfs_fs_info *fs_info, u64 qgroupid,
4122 struct btrfs_qgroup_limit *limit);
4123int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
4124void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
4125struct btrfs_delayed_extent_op;
4126int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
4127 struct btrfs_delayed_ref_node *node,
4128 struct btrfs_delayed_extent_op *extent_op);
4129int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
4130 struct btrfs_fs_info *fs_info,
4131 struct btrfs_delayed_ref_node *node,
4132 struct btrfs_delayed_extent_op *extent_op);
4133int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
4134 struct btrfs_fs_info *fs_info);
4135int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
4136 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
4137 struct btrfs_qgroup_inherit *inherit);
4138int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
4139void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
4140
4141void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
4142
4143static inline int is_fstree(u64 rootid) 4098static inline int is_fstree(u64 rootid)
4144{ 4099{
4145 if (rootid == BTRFS_FS_TREE_OBJECTID || 4100 if (rootid == BTRFS_FS_TREE_OBJECTID ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 31299646024d..6d16bea94e1c 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
106 return -1; 106 return -1;
107 if (ref1->type > ref2->type) 107 if (ref1->type > ref2->type)
108 return 1; 108 return 1;
109 if (ref1->no_quota > ref2->no_quota)
110 return 1;
111 if (ref1->no_quota < ref2->no_quota)
112 return -1;
109 /* merging of sequenced refs is not allowed */ 113 /* merging of sequenced refs is not allowed */
110 if (compare_seq) { 114 if (compare_seq) {
111 if (ref1->seq < ref2->seq) 115 if (ref1->seq < ref2->seq)
@@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
635 struct btrfs_delayed_ref_head *head_ref, 639 struct btrfs_delayed_ref_head *head_ref,
636 struct btrfs_delayed_ref_node *ref, u64 bytenr, 640 struct btrfs_delayed_ref_node *ref, u64 bytenr,
637 u64 num_bytes, u64 parent, u64 ref_root, int level, 641 u64 num_bytes, u64 parent, u64 ref_root, int level,
638 int action, int for_cow) 642 int action, int no_quota)
639{ 643{
640 struct btrfs_delayed_ref_node *existing; 644 struct btrfs_delayed_ref_node *existing;
641 struct btrfs_delayed_tree_ref *full_ref; 645 struct btrfs_delayed_tree_ref *full_ref;
@@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
645 if (action == BTRFS_ADD_DELAYED_EXTENT) 649 if (action == BTRFS_ADD_DELAYED_EXTENT)
646 action = BTRFS_ADD_DELAYED_REF; 650 action = BTRFS_ADD_DELAYED_REF;
647 651
652 if (is_fstree(ref_root))
653 seq = atomic64_read(&fs_info->tree_mod_seq);
648 delayed_refs = &trans->transaction->delayed_refs; 654 delayed_refs = &trans->transaction->delayed_refs;
649 655
650 /* first set the basic ref node struct up */ 656 /* first set the basic ref node struct up */
@@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
655 ref->action = action; 661 ref->action = action;
656 ref->is_head = 0; 662 ref->is_head = 0;
657 ref->in_tree = 1; 663 ref->in_tree = 1;
658 664 ref->no_quota = no_quota;
659 if (need_ref_seq(for_cow, ref_root))
660 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
661 ref->seq = seq; 665 ref->seq = seq;
662 666
663 full_ref = btrfs_delayed_node_to_tree_ref(ref); 667 full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
697 struct btrfs_delayed_ref_head *head_ref, 701 struct btrfs_delayed_ref_head *head_ref,
698 struct btrfs_delayed_ref_node *ref, u64 bytenr, 702 struct btrfs_delayed_ref_node *ref, u64 bytenr,
699 u64 num_bytes, u64 parent, u64 ref_root, u64 owner, 703 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
700 u64 offset, int action, int for_cow) 704 u64 offset, int action, int no_quota)
701{ 705{
702 struct btrfs_delayed_ref_node *existing; 706 struct btrfs_delayed_ref_node *existing;
703 struct btrfs_delayed_data_ref *full_ref; 707 struct btrfs_delayed_data_ref *full_ref;
@@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
709 713
710 delayed_refs = &trans->transaction->delayed_refs; 714 delayed_refs = &trans->transaction->delayed_refs;
711 715
716 if (is_fstree(ref_root))
717 seq = atomic64_read(&fs_info->tree_mod_seq);
718
712 /* first set the basic ref node struct up */ 719 /* first set the basic ref node struct up */
713 atomic_set(&ref->refs, 1); 720 atomic_set(&ref->refs, 1);
714 ref->bytenr = bytenr; 721 ref->bytenr = bytenr;
@@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
717 ref->action = action; 724 ref->action = action;
718 ref->is_head = 0; 725 ref->is_head = 0;
719 ref->in_tree = 1; 726 ref->in_tree = 1;
720 727 ref->no_quota = no_quota;
721 if (need_ref_seq(for_cow, ref_root))
722 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
723 ref->seq = seq; 728 ref->seq = seq;
724 729
725 full_ref = btrfs_delayed_node_to_data_ref(ref); 730 full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
762 u64 bytenr, u64 num_bytes, u64 parent, 767 u64 bytenr, u64 num_bytes, u64 parent,
763 u64 ref_root, int level, int action, 768 u64 ref_root, int level, int action,
764 struct btrfs_delayed_extent_op *extent_op, 769 struct btrfs_delayed_extent_op *extent_op,
765 int for_cow) 770 int no_quota)
766{ 771{
767 struct btrfs_delayed_tree_ref *ref; 772 struct btrfs_delayed_tree_ref *ref;
768 struct btrfs_delayed_ref_head *head_ref; 773 struct btrfs_delayed_ref_head *head_ref;
769 struct btrfs_delayed_ref_root *delayed_refs; 774 struct btrfs_delayed_ref_root *delayed_refs;
770 775
776 if (!is_fstree(ref_root) || !fs_info->quota_enabled)
777 no_quota = 0;
778
771 BUG_ON(extent_op && extent_op->is_data); 779 BUG_ON(extent_op && extent_op->is_data);
772 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); 780 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
773 if (!ref) 781 if (!ref)
@@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
793 801
794 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, 802 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
795 num_bytes, parent, ref_root, level, action, 803 num_bytes, parent, ref_root, level, action,
796 for_cow); 804 no_quota);
797 spin_unlock(&delayed_refs->lock); 805 spin_unlock(&delayed_refs->lock);
798 if (need_ref_seq(for_cow, ref_root))
799 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
800 806
801 return 0; 807 return 0;
802} 808}
@@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
810 u64 parent, u64 ref_root, 816 u64 parent, u64 ref_root,
811 u64 owner, u64 offset, int action, 817 u64 owner, u64 offset, int action,
812 struct btrfs_delayed_extent_op *extent_op, 818 struct btrfs_delayed_extent_op *extent_op,
813 int for_cow) 819 int no_quota)
814{ 820{
815 struct btrfs_delayed_data_ref *ref; 821 struct btrfs_delayed_data_ref *ref;
816 struct btrfs_delayed_ref_head *head_ref; 822 struct btrfs_delayed_ref_head *head_ref;
817 struct btrfs_delayed_ref_root *delayed_refs; 823 struct btrfs_delayed_ref_root *delayed_refs;
818 824
825 if (!is_fstree(ref_root) || !fs_info->quota_enabled)
826 no_quota = 0;
827
819 BUG_ON(extent_op && !extent_op->is_data); 828 BUG_ON(extent_op && !extent_op->is_data);
820 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); 829 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
821 if (!ref) 830 if (!ref)
@@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
841 850
842 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, 851 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
843 num_bytes, parent, ref_root, owner, offset, 852 num_bytes, parent, ref_root, owner, offset,
844 action, for_cow); 853 action, no_quota);
845 spin_unlock(&delayed_refs->lock); 854 spin_unlock(&delayed_refs->lock);
846 if (need_ref_seq(for_cow, ref_root))
847 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
848 855
849 return 0; 856 return 0;
850} 857}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 4ba9b93022ff..a764e2340d48 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {
52 52
53 unsigned int action:8; 53 unsigned int action:8;
54 unsigned int type:8; 54 unsigned int type:8;
55 unsigned int no_quota:1;
55 /* is this node still in the rbtree? */ 56 /* is this node still in the rbtree? */
56 unsigned int is_head:1; 57 unsigned int is_head:1;
57 unsigned int in_tree:1; 58 unsigned int in_tree:1;
@@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
196 u64 bytenr, u64 num_bytes, u64 parent, 197 u64 bytenr, u64 num_bytes, u64 parent,
197 u64 ref_root, int level, int action, 198 u64 ref_root, int level, int action,
198 struct btrfs_delayed_extent_op *extent_op, 199 struct btrfs_delayed_extent_op *extent_op,
199 int for_cow); 200 int no_quota);
200int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, 201int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
201 struct btrfs_trans_handle *trans, 202 struct btrfs_trans_handle *trans,
202 u64 bytenr, u64 num_bytes, 203 u64 bytenr, u64 num_bytes,
203 u64 parent, u64 ref_root, 204 u64 parent, u64 ref_root,
204 u64 owner, u64 offset, int action, 205 u64 owner, u64 offset, int action,
205 struct btrfs_delayed_extent_op *extent_op, 206 struct btrfs_delayed_extent_op *extent_op,
206 int for_cow); 207 int no_quota);
207int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 208int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
208 struct btrfs_trans_handle *trans, 209 struct btrfs_trans_handle *trans,
209 u64 bytenr, u64 num_bytes, 210 u64 bytenr, u64 num_bytes,
@@ -231,25 +232,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
231 u64 seq); 232 u64 seq);
232 233
233/* 234/*
234 * delayed refs with a ref_seq > 0 must be held back during backref walking.
235 * this only applies to items in one of the fs-trees. for_cow items never need
236 * to be held back, so they won't get a ref_seq number.
237 */
238static inline int need_ref_seq(int for_cow, u64 rootid)
239{
240 if (for_cow)
241 return 0;
242
243 if (rootid == BTRFS_FS_TREE_OBJECTID)
244 return 1;
245
246 if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
247 return 1;
248
249 return 0;
250}
251
252/*
253 * a node might live in a head or a regular ref, this lets you 235 * a node might live in a head or a regular ref, this lets you
254 * test for the proper type to use. 236 * test for the proper type to use.
255 */ 237 */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e54f0cd5cdf6..77f92a32e230 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -49,6 +49,7 @@
49#include "dev-replace.h" 49#include "dev-replace.h"
50#include "raid56.h" 50#include "raid56.h"
51#include "sysfs.h" 51#include "sysfs.h"
52#include "qgroup.h"
52 53
53#ifdef CONFIG_X86 54#ifdef CONFIG_X86
54#include <asm/cpufeature.h> 55#include <asm/cpufeature.h>
@@ -2219,6 +2220,7 @@ int open_ctree(struct super_block *sb,
2219 spin_lock_init(&fs_info->free_chunk_lock); 2220 spin_lock_init(&fs_info->free_chunk_lock);
2220 spin_lock_init(&fs_info->tree_mod_seq_lock); 2221 spin_lock_init(&fs_info->tree_mod_seq_lock);
2221 spin_lock_init(&fs_info->super_lock); 2222 spin_lock_init(&fs_info->super_lock);
2223 spin_lock_init(&fs_info->qgroup_op_lock);
2222 spin_lock_init(&fs_info->buffer_lock); 2224 spin_lock_init(&fs_info->buffer_lock);
2223 rwlock_init(&fs_info->tree_mod_log_lock); 2225 rwlock_init(&fs_info->tree_mod_log_lock);
2224 mutex_init(&fs_info->reloc_mutex); 2226 mutex_init(&fs_info->reloc_mutex);
@@ -2244,6 +2246,7 @@ int open_ctree(struct super_block *sb,
2244 atomic_set(&fs_info->async_submit_draining, 0); 2246 atomic_set(&fs_info->async_submit_draining, 0);
2245 atomic_set(&fs_info->nr_async_bios, 0); 2247 atomic_set(&fs_info->nr_async_bios, 0);
2246 atomic_set(&fs_info->defrag_running, 0); 2248 atomic_set(&fs_info->defrag_running, 0);
2249 atomic_set(&fs_info->qgroup_op_seq, 0);
2247 atomic64_set(&fs_info->tree_mod_seq, 0); 2250 atomic64_set(&fs_info->tree_mod_seq, 0);
2248 fs_info->sb = sb; 2251 fs_info->sb = sb;
2249 fs_info->max_inline = 8192 * 1024; 2252 fs_info->max_inline = 8192 * 1024;
@@ -2353,6 +2356,7 @@ int open_ctree(struct super_block *sb,
2353 spin_lock_init(&fs_info->qgroup_lock); 2356 spin_lock_init(&fs_info->qgroup_lock);
2354 mutex_init(&fs_info->qgroup_ioctl_lock); 2357 mutex_init(&fs_info->qgroup_ioctl_lock);
2355 fs_info->qgroup_tree = RB_ROOT; 2358 fs_info->qgroup_tree = RB_ROOT;
2359 fs_info->qgroup_op_tree = RB_ROOT;
2356 INIT_LIST_HEAD(&fs_info->dirty_qgroups); 2360 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2357 fs_info->qgroup_seq = 1; 2361 fs_info->qgroup_seq = 1;
2358 fs_info->quota_enabled = 0; 2362 fs_info->quota_enabled = 0;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aff579df5f47..343eb10230a1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,6 +35,7 @@
35#include "free-space-cache.h" 35#include "free-space-cache.h"
36#include "math.h" 36#include "math.h"
37#include "sysfs.h" 37#include "sysfs.h"
38#include "qgroup.h"
38 39
39#undef SCRAMBLE_DELAYED_REFS 40#undef SCRAMBLE_DELAYED_REFS
40 41
@@ -80,7 +81,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
80 u64 bytenr, u64 num_bytes, u64 parent, 81 u64 bytenr, u64 num_bytes, u64 parent,
81 u64 root_objectid, u64 owner_objectid, 82 u64 root_objectid, u64 owner_objectid,
82 u64 owner_offset, int refs_to_drop, 83 u64 owner_offset, int refs_to_drop,
83 struct btrfs_delayed_extent_op *extra_op); 84 struct btrfs_delayed_extent_op *extra_op,
85 int no_quota);
84static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 86static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
85 struct extent_buffer *leaf, 87 struct extent_buffer *leaf,
86 struct btrfs_extent_item *ei); 88 struct btrfs_extent_item *ei);
@@ -93,7 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
93 struct btrfs_root *root, 95 struct btrfs_root *root,
94 u64 parent, u64 root_objectid, 96 u64 parent, u64 root_objectid,
95 u64 flags, struct btrfs_disk_key *key, 97 u64 flags, struct btrfs_disk_key *key,
96 int level, struct btrfs_key *ins); 98 int level, struct btrfs_key *ins,
99 int no_quota);
97static int do_chunk_alloc(struct btrfs_trans_handle *trans, 100static int do_chunk_alloc(struct btrfs_trans_handle *trans,
98 struct btrfs_root *extent_root, u64 flags, 101 struct btrfs_root *extent_root, u64 flags,
99 int force); 102 int force);
@@ -1270,7 +1273,7 @@ fail:
1270static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1273static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1271 struct btrfs_root *root, 1274 struct btrfs_root *root,
1272 struct btrfs_path *path, 1275 struct btrfs_path *path,
1273 int refs_to_drop) 1276 int refs_to_drop, int *last_ref)
1274{ 1277{
1275 struct btrfs_key key; 1278 struct btrfs_key key;
1276 struct btrfs_extent_data_ref *ref1 = NULL; 1279 struct btrfs_extent_data_ref *ref1 = NULL;
@@ -1306,6 +1309,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1306 1309
1307 if (num_refs == 0) { 1310 if (num_refs == 0) {
1308 ret = btrfs_del_item(trans, root, path); 1311 ret = btrfs_del_item(trans, root, path);
1312 *last_ref = 1;
1309 } else { 1313 } else {
1310 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1314 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1311 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1315 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@@ -1763,7 +1767,8 @@ void update_inline_extent_backref(struct btrfs_root *root,
1763 struct btrfs_path *path, 1767 struct btrfs_path *path,
1764 struct btrfs_extent_inline_ref *iref, 1768 struct btrfs_extent_inline_ref *iref,
1765 int refs_to_mod, 1769 int refs_to_mod,
1766 struct btrfs_delayed_extent_op *extent_op) 1770 struct btrfs_delayed_extent_op *extent_op,
1771 int *last_ref)
1767{ 1772{
1768 struct extent_buffer *leaf; 1773 struct extent_buffer *leaf;
1769 struct btrfs_extent_item *ei; 1774 struct btrfs_extent_item *ei;
@@ -1807,6 +1812,7 @@ void update_inline_extent_backref(struct btrfs_root *root,
1807 else 1812 else
1808 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1813 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1809 } else { 1814 } else {
1815 *last_ref = 1;
1810 size = btrfs_extent_inline_ref_size(type); 1816 size = btrfs_extent_inline_ref_size(type);
1811 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1817 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1812 ptr = (unsigned long)iref; 1818 ptr = (unsigned long)iref;
@@ -1838,7 +1844,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1838 if (ret == 0) { 1844 if (ret == 0) {
1839 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1845 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1840 update_inline_extent_backref(root, path, iref, 1846 update_inline_extent_backref(root, path, iref,
1841 refs_to_add, extent_op); 1847 refs_to_add, extent_op, NULL);
1842 } else if (ret == -ENOENT) { 1848 } else if (ret == -ENOENT) {
1843 setup_inline_extent_backref(root, path, iref, parent, 1849 setup_inline_extent_backref(root, path, iref, parent,
1844 root_objectid, owner, offset, 1850 root_objectid, owner, offset,
@@ -1871,17 +1877,19 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1871 struct btrfs_root *root, 1877 struct btrfs_root *root,
1872 struct btrfs_path *path, 1878 struct btrfs_path *path,
1873 struct btrfs_extent_inline_ref *iref, 1879 struct btrfs_extent_inline_ref *iref,
1874 int refs_to_drop, int is_data) 1880 int refs_to_drop, int is_data, int *last_ref)
1875{ 1881{
1876 int ret = 0; 1882 int ret = 0;
1877 1883
1878 BUG_ON(!is_data && refs_to_drop != 1); 1884 BUG_ON(!is_data && refs_to_drop != 1);
1879 if (iref) { 1885 if (iref) {
1880 update_inline_extent_backref(root, path, iref, 1886 update_inline_extent_backref(root, path, iref,
1881 -refs_to_drop, NULL); 1887 -refs_to_drop, NULL, last_ref);
1882 } else if (is_data) { 1888 } else if (is_data) {
1883 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1889 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1890 last_ref);
1884 } else { 1891 } else {
1892 *last_ref = 1;
1885 ret = btrfs_del_item(trans, root, path); 1893 ret = btrfs_del_item(trans, root, path);
1886 } 1894 }
1887 return ret; 1895 return ret;
@@ -1945,7 +1953,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1945int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1953int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1946 struct btrfs_root *root, 1954 struct btrfs_root *root,
1947 u64 bytenr, u64 num_bytes, u64 parent, 1955 u64 bytenr, u64 num_bytes, u64 parent,
1948 u64 root_objectid, u64 owner, u64 offset, int for_cow) 1956 u64 root_objectid, u64 owner, u64 offset,
1957 int no_quota)
1949{ 1958{
1950 int ret; 1959 int ret;
1951 struct btrfs_fs_info *fs_info = root->fs_info; 1960 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1957,12 +1966,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1957 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 1966 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
1958 num_bytes, 1967 num_bytes,
1959 parent, root_objectid, (int)owner, 1968 parent, root_objectid, (int)owner,
1960 BTRFS_ADD_DELAYED_REF, NULL, for_cow); 1969 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1961 } else { 1970 } else {
1962 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 1971 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
1963 num_bytes, 1972 num_bytes,
1964 parent, root_objectid, owner, offset, 1973 parent, root_objectid, owner, offset,
1965 BTRFS_ADD_DELAYED_REF, NULL, for_cow); 1974 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1966 } 1975 }
1967 return ret; 1976 return ret;
1968} 1977}
@@ -1972,31 +1981,64 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1972 u64 bytenr, u64 num_bytes, 1981 u64 bytenr, u64 num_bytes,
1973 u64 parent, u64 root_objectid, 1982 u64 parent, u64 root_objectid,
1974 u64 owner, u64 offset, int refs_to_add, 1983 u64 owner, u64 offset, int refs_to_add,
1984 int no_quota,
1975 struct btrfs_delayed_extent_op *extent_op) 1985 struct btrfs_delayed_extent_op *extent_op)
1976{ 1986{
1987 struct btrfs_fs_info *fs_info = root->fs_info;
1977 struct btrfs_path *path; 1988 struct btrfs_path *path;
1978 struct extent_buffer *leaf; 1989 struct extent_buffer *leaf;
1979 struct btrfs_extent_item *item; 1990 struct btrfs_extent_item *item;
1991 struct btrfs_key key;
1980 u64 refs; 1992 u64 refs;
1981 int ret; 1993 int ret;
1994 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
1982 1995
1983 path = btrfs_alloc_path(); 1996 path = btrfs_alloc_path();
1984 if (!path) 1997 if (!path)
1985 return -ENOMEM; 1998 return -ENOMEM;
1986 1999
2000 if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
2001 no_quota = 1;
2002
1987 path->reada = 1; 2003 path->reada = 1;
1988 path->leave_spinning = 1; 2004 path->leave_spinning = 1;
1989 /* this will setup the path even if it fails to insert the back ref */ 2005 /* this will setup the path even if it fails to insert the back ref */
1990 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 2006 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
1991 path, bytenr, num_bytes, parent, 2007 bytenr, num_bytes, parent,
1992 root_objectid, owner, offset, 2008 root_objectid, owner, offset,
1993 refs_to_add, extent_op); 2009 refs_to_add, extent_op);
1994 if (ret != -EAGAIN) 2010 if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
1995 goto out; 2011 goto out;
2012 /*
2013 * Ok we were able to insert an inline extent and it appears to be a new
2014 * reference, deal with the qgroup accounting.
2015 */
2016 if (!ret && !no_quota) {
2017 ASSERT(root->fs_info->quota_enabled);
2018 leaf = path->nodes[0];
2019 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2020 item = btrfs_item_ptr(leaf, path->slots[0],
2021 struct btrfs_extent_item);
2022 if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
2023 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2024 btrfs_release_path(path);
1996 2025
2026 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2027 bytenr, num_bytes, type, 0);
2028 goto out;
2029 }
2030
2031 /*
2032 * Ok we had -EAGAIN which means we didn't have space to insert and
2033 * inline extent ref, so just update the reference count and add a
2034 * normal backref.
2035 */
1997 leaf = path->nodes[0]; 2036 leaf = path->nodes[0];
2037 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1998 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2038 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1999 refs = btrfs_extent_refs(leaf, item); 2039 refs = btrfs_extent_refs(leaf, item);
2040 if (refs)
2041 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2000 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 2042 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2001 if (extent_op) 2043 if (extent_op)
2002 __run_delayed_extent_op(extent_op, leaf, item); 2044 __run_delayed_extent_op(extent_op, leaf, item);
@@ -2004,9 +2046,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2004 btrfs_mark_buffer_dirty(leaf); 2046 btrfs_mark_buffer_dirty(leaf);
2005 btrfs_release_path(path); 2047 btrfs_release_path(path);
2006 2048
2049 if (!no_quota) {
2050 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2051 bytenr, num_bytes, type, 0);
2052 if (ret)
2053 goto out;
2054 }
2055
2007 path->reada = 1; 2056 path->reada = 1;
2008 path->leave_spinning = 1; 2057 path->leave_spinning = 1;
2009
2010 /* now insert the actual backref */ 2058 /* now insert the actual backref */
2011 ret = insert_extent_backref(trans, root->fs_info->extent_root, 2059 ret = insert_extent_backref(trans, root->fs_info->extent_root,
2012 path, bytenr, parent, root_objectid, 2060 path, bytenr, parent, root_objectid,
@@ -2040,8 +2088,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2040 2088
2041 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 2089 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2042 parent = ref->parent; 2090 parent = ref->parent;
2043 else 2091 ref_root = ref->root;
2044 ref_root = ref->root;
2045 2092
2046 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2093 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2047 if (extent_op) 2094 if (extent_op)
@@ -2055,13 +2102,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2055 node->num_bytes, parent, 2102 node->num_bytes, parent,
2056 ref_root, ref->objectid, 2103 ref_root, ref->objectid,
2057 ref->offset, node->ref_mod, 2104 ref->offset, node->ref_mod,
2058 extent_op); 2105 node->no_quota, extent_op);
2059 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2106 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2060 ret = __btrfs_free_extent(trans, root, node->bytenr, 2107 ret = __btrfs_free_extent(trans, root, node->bytenr,
2061 node->num_bytes, parent, 2108 node->num_bytes, parent,
2062 ref_root, ref->objectid, 2109 ref_root, ref->objectid,
2063 ref->offset, node->ref_mod, 2110 ref->offset, node->ref_mod,
2064 extent_op); 2111 extent_op, node->no_quota);
2065 } else { 2112 } else {
2066 BUG(); 2113 BUG();
2067 } 2114 }
@@ -2198,8 +2245,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2198 2245
2199 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2246 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2200 parent = ref->parent; 2247 parent = ref->parent;
2201 else 2248 ref_root = ref->root;
2202 ref_root = ref->root;
2203 2249
2204 ins.objectid = node->bytenr; 2250 ins.objectid = node->bytenr;
2205 if (skinny_metadata) { 2251 if (skinny_metadata) {
@@ -2217,15 +2263,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2217 parent, ref_root, 2263 parent, ref_root,
2218 extent_op->flags_to_set, 2264 extent_op->flags_to_set,
2219 &extent_op->key, 2265 &extent_op->key,
2220 ref->level, &ins); 2266 ref->level, &ins,
2267 node->no_quota);
2221 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2268 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2222 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 2269 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2223 node->num_bytes, parent, ref_root, 2270 node->num_bytes, parent, ref_root,
2224 ref->level, 0, 1, extent_op); 2271 ref->level, 0, 1, node->no_quota,
2272 extent_op);
2225 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2273 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2226 ret = __btrfs_free_extent(trans, root, node->bytenr, 2274 ret = __btrfs_free_extent(trans, root, node->bytenr,
2227 node->num_bytes, parent, ref_root, 2275 node->num_bytes, parent, ref_root,
2228 ref->level, 0, 1, extent_op); 2276 ref->level, 0, 1, extent_op,
2277 node->no_quota);
2229 } else { 2278 } else {
2230 BUG(); 2279 BUG();
2231 } 2280 }
@@ -2573,42 +2622,6 @@ static u64 find_middle(struct rb_root *root)
2573} 2622}
2574#endif 2623#endif
2575 2624
2576int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2577 struct btrfs_fs_info *fs_info)
2578{
2579 struct qgroup_update *qgroup_update;
2580 int ret = 0;
2581
2582 if (list_empty(&trans->qgroup_ref_list) !=
2583 !trans->delayed_ref_elem.seq) {
2584 /* list without seq or seq without list */
2585 btrfs_err(fs_info,
2586 "qgroup accounting update error, list is%s empty, seq is %#x.%x",
2587 list_empty(&trans->qgroup_ref_list) ? "" : " not",
2588 (u32)(trans->delayed_ref_elem.seq >> 32),
2589 (u32)trans->delayed_ref_elem.seq);
2590 BUG();
2591 }
2592
2593 if (!trans->delayed_ref_elem.seq)
2594 return 0;
2595
2596 while (!list_empty(&trans->qgroup_ref_list)) {
2597 qgroup_update = list_first_entry(&trans->qgroup_ref_list,
2598 struct qgroup_update, list);
2599 list_del(&qgroup_update->list);
2600 if (!ret)
2601 ret = btrfs_qgroup_account_ref(
2602 trans, fs_info, qgroup_update->node,
2603 qgroup_update->extent_op);
2604 kfree(qgroup_update);
2605 }
2606
2607 btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
2608
2609 return ret;
2610}
2611
2612static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) 2625static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2613{ 2626{
2614 u64 num_bytes; 2627 u64 num_bytes;
@@ -2697,8 +2710,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2697 if (root == root->fs_info->extent_root) 2710 if (root == root->fs_info->extent_root)
2698 root = root->fs_info->tree_root; 2711 root = root->fs_info->tree_root;
2699 2712
2700 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2701
2702 delayed_refs = &trans->transaction->delayed_refs; 2713 delayed_refs = &trans->transaction->delayed_refs;
2703 if (count == 0) { 2714 if (count == 0) {
2704 count = atomic_read(&delayed_refs->num_entries) * 2; 2715 count = atomic_read(&delayed_refs->num_entries) * 2;
@@ -2757,6 +2768,9 @@ again:
2757 goto again; 2768 goto again;
2758 } 2769 }
2759out: 2770out:
2771 ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
2772 if (ret)
2773 return ret;
2760 assert_qgroups_uptodate(trans); 2774 assert_qgroups_uptodate(trans);
2761 return 0; 2775 return 0;
2762} 2776}
@@ -2963,7 +2977,7 @@ out:
2963static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2977static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2964 struct btrfs_root *root, 2978 struct btrfs_root *root,
2965 struct extent_buffer *buf, 2979 struct extent_buffer *buf,
2966 int full_backref, int inc, int for_cow) 2980 int full_backref, int inc, int no_quota)
2967{ 2981{
2968 u64 bytenr; 2982 u64 bytenr;
2969 u64 num_bytes; 2983 u64 num_bytes;
@@ -3013,7 +3027,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3013 key.offset -= btrfs_file_extent_offset(buf, fi); 3027 key.offset -= btrfs_file_extent_offset(buf, fi);
3014 ret = process_func(trans, root, bytenr, num_bytes, 3028 ret = process_func(trans, root, bytenr, num_bytes,
3015 parent, ref_root, key.objectid, 3029 parent, ref_root, key.objectid,
3016 key.offset, for_cow); 3030 key.offset, no_quota);
3017 if (ret) 3031 if (ret)
3018 goto fail; 3032 goto fail;
3019 } else { 3033 } else {
@@ -3021,7 +3035,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3021 num_bytes = btrfs_level_size(root, level - 1); 3035 num_bytes = btrfs_level_size(root, level - 1);
3022 ret = process_func(trans, root, bytenr, num_bytes, 3036 ret = process_func(trans, root, bytenr, num_bytes,
3023 parent, ref_root, level - 1, 0, 3037 parent, ref_root, level - 1, 0,
3024 for_cow); 3038 no_quota);
3025 if (ret) 3039 if (ret)
3026 goto fail; 3040 goto fail;
3027 } 3041 }
@@ -3032,15 +3046,15 @@ fail:
3032} 3046}
3033 3047
3034int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3048int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3035 struct extent_buffer *buf, int full_backref, int for_cow) 3049 struct extent_buffer *buf, int full_backref, int no_quota)
3036{ 3050{
3037 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); 3051 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
3038} 3052}
3039 3053
3040int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3054int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3041 struct extent_buffer *buf, int full_backref, int for_cow) 3055 struct extent_buffer *buf, int full_backref, int no_quota)
3042{ 3056{
3043 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); 3057 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
3044} 3058}
3045 3059
3046static int write_one_cache_group(struct btrfs_trans_handle *trans, 3060static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -5723,7 +5737,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5723 u64 bytenr, u64 num_bytes, u64 parent, 5737 u64 bytenr, u64 num_bytes, u64 parent,
5724 u64 root_objectid, u64 owner_objectid, 5738 u64 root_objectid, u64 owner_objectid,
5725 u64 owner_offset, int refs_to_drop, 5739 u64 owner_offset, int refs_to_drop,
5726 struct btrfs_delayed_extent_op *extent_op) 5740 struct btrfs_delayed_extent_op *extent_op,
5741 int no_quota)
5727{ 5742{
5728 struct btrfs_key key; 5743 struct btrfs_key key;
5729 struct btrfs_path *path; 5744 struct btrfs_path *path;
@@ -5739,9 +5754,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5739 int num_to_del = 1; 5754 int num_to_del = 1;
5740 u32 item_size; 5755 u32 item_size;
5741 u64 refs; 5756 u64 refs;
5757 int last_ref = 0;
5758 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
5742 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 5759 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
5743 SKINNY_METADATA); 5760 SKINNY_METADATA);
5744 5761
5762 if (!info->quota_enabled || !is_fstree(root_objectid))
5763 no_quota = 1;
5764
5745 path = btrfs_alloc_path(); 5765 path = btrfs_alloc_path();
5746 if (!path) 5766 if (!path)
5747 return -ENOMEM; 5767 return -ENOMEM;
@@ -5789,7 +5809,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5789 BUG_ON(iref); 5809 BUG_ON(iref);
5790 ret = remove_extent_backref(trans, extent_root, path, 5810 ret = remove_extent_backref(trans, extent_root, path,
5791 NULL, refs_to_drop, 5811 NULL, refs_to_drop,
5792 is_data); 5812 is_data, &last_ref);
5793 if (ret) { 5813 if (ret) {
5794 btrfs_abort_transaction(trans, extent_root, ret); 5814 btrfs_abort_transaction(trans, extent_root, ret);
5795 goto out; 5815 goto out;
@@ -5916,6 +5936,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5916 refs -= refs_to_drop; 5936 refs -= refs_to_drop;
5917 5937
5918 if (refs > 0) { 5938 if (refs > 0) {
5939 type = BTRFS_QGROUP_OPER_SUB_SHARED;
5919 if (extent_op) 5940 if (extent_op)
5920 __run_delayed_extent_op(extent_op, leaf, ei); 5941 __run_delayed_extent_op(extent_op, leaf, ei);
5921 /* 5942 /*
@@ -5931,7 +5952,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5931 if (found_extent) { 5952 if (found_extent) {
5932 ret = remove_extent_backref(trans, extent_root, path, 5953 ret = remove_extent_backref(trans, extent_root, path,
5933 iref, refs_to_drop, 5954 iref, refs_to_drop,
5934 is_data); 5955 is_data, &last_ref);
5935 if (ret) { 5956 if (ret) {
5936 btrfs_abort_transaction(trans, extent_root, ret); 5957 btrfs_abort_transaction(trans, extent_root, ret);
5937 goto out; 5958 goto out;
@@ -5952,6 +5973,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5952 } 5973 }
5953 } 5974 }
5954 5975
5976 last_ref = 1;
5955 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 5977 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
5956 num_to_del); 5978 num_to_del);
5957 if (ret) { 5979 if (ret) {
@@ -5974,6 +5996,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5974 goto out; 5996 goto out;
5975 } 5997 }
5976 } 5998 }
5999 btrfs_release_path(path);
6000
6001 /* Deal with the quota accounting */
6002 if (!ret && last_ref && !no_quota) {
6003 int mod_seq = 0;
6004
6005 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
6006 type == BTRFS_QGROUP_OPER_SUB_SHARED)
6007 mod_seq = 1;
6008
6009 ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
6010 bytenr, num_bytes, type,
6011 mod_seq);
6012 }
5977out: 6013out:
5978 btrfs_free_path(path); 6014 btrfs_free_path(path);
5979 return ret; 6015 return ret;
@@ -6110,7 +6146,7 @@ out:
6110/* Can return -ENOMEM */ 6146/* Can return -ENOMEM */
6111int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, 6147int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6112 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 6148 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
6113 u64 owner, u64 offset, int for_cow) 6149 u64 owner, u64 offset, int no_quota)
6114{ 6150{
6115 int ret; 6151 int ret;
6116 struct btrfs_fs_info *fs_info = root->fs_info; 6152 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6130,13 +6166,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6130 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 6166 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
6131 num_bytes, 6167 num_bytes,
6132 parent, root_objectid, (int)owner, 6168 parent, root_objectid, (int)owner,
6133 BTRFS_DROP_DELAYED_REF, NULL, for_cow); 6169 BTRFS_DROP_DELAYED_REF, NULL, no_quota);
6134 } else { 6170 } else {
6135 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 6171 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
6136 num_bytes, 6172 num_bytes,
6137 parent, root_objectid, owner, 6173 parent, root_objectid, owner,
6138 offset, BTRFS_DROP_DELAYED_REF, 6174 offset, BTRFS_DROP_DELAYED_REF,
6139 NULL, for_cow); 6175 NULL, no_quota);
6140 } 6176 }
6141 return ret; 6177 return ret;
6142} 6178}
@@ -6842,6 +6878,13 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6842 btrfs_mark_buffer_dirty(path->nodes[0]); 6878 btrfs_mark_buffer_dirty(path->nodes[0]);
6843 btrfs_free_path(path); 6879 btrfs_free_path(path);
6844 6880
6881 /* Always set parent to 0 here since its exclusive anyway. */
6882 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
6883 ins->objectid, ins->offset,
6884 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
6885 if (ret)
6886 return ret;
6887
6845 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6888 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6846 if (ret) { /* -ENOENT, logic error */ 6889 if (ret) { /* -ENOENT, logic error */
6847 btrfs_err(fs_info, "update block group failed for %llu %llu", 6890 btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6856,7 +6899,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6856 struct btrfs_root *root, 6899 struct btrfs_root *root,
6857 u64 parent, u64 root_objectid, 6900 u64 parent, u64 root_objectid,
6858 u64 flags, struct btrfs_disk_key *key, 6901 u64 flags, struct btrfs_disk_key *key,
6859 int level, struct btrfs_key *ins) 6902 int level, struct btrfs_key *ins,
6903 int no_quota)
6860{ 6904{
6861 int ret; 6905 int ret;
6862 struct btrfs_fs_info *fs_info = root->fs_info; 6906 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6866,6 +6910,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6866 struct btrfs_path *path; 6910 struct btrfs_path *path;
6867 struct extent_buffer *leaf; 6911 struct extent_buffer *leaf;
6868 u32 size = sizeof(*extent_item) + sizeof(*iref); 6912 u32 size = sizeof(*extent_item) + sizeof(*iref);
6913 u64 num_bytes = ins->offset;
6869 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 6914 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6870 SKINNY_METADATA); 6915 SKINNY_METADATA);
6871 6916
@@ -6899,6 +6944,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6899 6944
6900 if (skinny_metadata) { 6945 if (skinny_metadata) {
6901 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 6946 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
6947 num_bytes = root->leafsize;
6902 } else { 6948 } else {
6903 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 6949 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
6904 btrfs_set_tree_block_key(leaf, block_info, key); 6950 btrfs_set_tree_block_key(leaf, block_info, key);
@@ -6920,6 +6966,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6920 btrfs_mark_buffer_dirty(leaf); 6966 btrfs_mark_buffer_dirty(leaf);
6921 btrfs_free_path(path); 6967 btrfs_free_path(path);
6922 6968
6969 if (!no_quota) {
6970 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
6971 ins->objectid, num_bytes,
6972 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
6973 if (ret)
6974 return ret;
6975 }
6976
6923 ret = update_block_group(root, ins->objectid, root->leafsize, 1); 6977 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
6924 if (ret) { /* -ENOENT, logic error */ 6978 if (ret) { /* -ENOENT, logic error */
6925 btrfs_err(fs_info, "update block group failed for %llu %llu", 6979 btrfs_err(fs_info, "update block group failed for %llu %llu",
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5c6947dbc948..8accf94ef220 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,7 @@
40#include "tree-log.h" 40#include "tree-log.h"
41#include "locking.h" 41#include "locking.h"
42#include "volumes.h" 42#include "volumes.h"
43#include "qgroup.h"
43 44
44static struct kmem_cache *btrfs_inode_defrag_cachep; 45static struct kmem_cache *btrfs_inode_defrag_cachep;
45/* 46/*
@@ -849,7 +850,7 @@ next_slot:
849 disk_bytenr, num_bytes, 0, 850 disk_bytenr, num_bytes, 0,
850 root->root_key.objectid, 851 root->root_key.objectid,
851 new_key.objectid, 852 new_key.objectid,
852 start - extent_offset, 0); 853 start - extent_offset, 1);
853 BUG_ON(ret); /* -ENOMEM */ 854 BUG_ON(ret); /* -ENOMEM */
854 } 855 }
855 key.offset = start; 856 key.offset = start;
@@ -1206,7 +1207,7 @@ again:
1206 1207
1207 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 1208 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
1208 root->root_key.objectid, 1209 root->root_key.objectid,
1209 ino, orig_offset, 0); 1210 ino, orig_offset, 1);
1210 BUG_ON(ret); /* -ENOMEM */ 1211 BUG_ON(ret); /* -ENOMEM */
1211 1212
1212 if (split == start) { 1213 if (split == start) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 242a37cd26b2..a21a4ac537b7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -58,6 +58,7 @@
58#include "dev-replace.h" 58#include "dev-replace.h"
59#include "props.h" 59#include "props.h"
60#include "sysfs.h" 60#include "sysfs.h"
61#include "qgroup.h"
61 62
62#ifdef CONFIG_64BIT 63#ifdef CONFIG_64BIT
63/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI 64/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -2941,6 +2942,41 @@ out:
2941 return ret; 2942 return ret;
2942} 2943}
2943 2944
2945/* Helper to check and see if this root currently has a ref on the given disk
2946 * bytenr. If it does then we need to update the quota for this root. This
2947 * doesn't do anything if quotas aren't enabled.
2948 */
2949static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2950 u64 disko)
2951{
2952 struct seq_list tree_mod_seq_elem = {};
2953 struct ulist *roots;
2954 struct ulist_iterator uiter;
2955 struct ulist_node *root_node = NULL;
2956 int ret;
2957
2958 if (!root->fs_info->quota_enabled)
2959 return 1;
2960
2961 btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
2962 ret = btrfs_find_all_roots(trans, root->fs_info, disko,
2963 tree_mod_seq_elem.seq, &roots);
2964 if (ret < 0)
2965 goto out;
2966 ret = 0;
2967 ULIST_ITER_INIT(&uiter);
2968 while ((root_node = ulist_next(roots, &uiter))) {
2969 if (root_node->val == root->objectid) {
2970 ret = 1;
2971 break;
2972 }
2973 }
2974 ulist_free(roots);
2975out:
2976 btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
2977 return ret;
2978}
2979
2944/** 2980/**
2945 * btrfs_clone() - clone a range from inode file to another 2981 * btrfs_clone() - clone a range from inode file to another
2946 * 2982 *
@@ -2964,7 +3000,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2964 u32 nritems; 3000 u32 nritems;
2965 int slot; 3001 int slot;
2966 int ret; 3002 int ret;
3003 int no_quota;
2967 u64 len = olen_aligned; 3004 u64 len = olen_aligned;
3005 u64 last_disko = 0;
2968 3006
2969 ret = -ENOMEM; 3007 ret = -ENOMEM;
2970 buf = vmalloc(btrfs_level_size(root, 0)); 3008 buf = vmalloc(btrfs_level_size(root, 0));
@@ -2996,6 +3034,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2996 3034
2997 nritems = btrfs_header_nritems(path->nodes[0]); 3035 nritems = btrfs_header_nritems(path->nodes[0]);
2998process_slot: 3036process_slot:
3037 no_quota = 1;
2999 if (path->slots[0] >= nritems) { 3038 if (path->slots[0] >= nritems) {
3000 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 3039 ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
3001 if (ret < 0) 3040 if (ret < 0)
@@ -3128,6 +3167,28 @@ process_slot:
3128 datao); 3167 datao);
3129 btrfs_set_file_extent_num_bytes(leaf, extent, 3168 btrfs_set_file_extent_num_bytes(leaf, extent,
3130 datal); 3169 datal);
3170
3171 /*
3172 * We need to look up the roots that point at
3173 * this bytenr and see if the new root does. If
3174 * it does not we need to make sure we update
3175 * quotas appropriately.
3176 */
3177 if (disko && root != BTRFS_I(src)->root &&
3178 disko != last_disko) {
3179 no_quota = check_ref(trans, root,
3180 disko);
3181 if (no_quota < 0) {
3182 btrfs_abort_transaction(trans,
3183 root,
3184 ret);
3185 btrfs_end_transaction(trans,
3186 root);
3187 ret = no_quota;
3188 goto out;
3189 }
3190 }
3191
3131 if (disko) { 3192 if (disko) {
3132 inode_add_bytes(inode, datal); 3193 inode_add_bytes(inode, datal);
3133 ret = btrfs_inc_extent_ref(trans, root, 3194 ret = btrfs_inc_extent_ref(trans, root,
@@ -3135,7 +3196,7 @@ process_slot:
3135 root->root_key.objectid, 3196 root->root_key.objectid,
3136 btrfs_ino(inode), 3197 btrfs_ino(inode),
3137 new_key.offset - datao, 3198 new_key.offset - datao,
3138 0); 3199 no_quota);
3139 if (ret) { 3200 if (ret) {
3140 btrfs_abort_transaction(trans, 3201 btrfs_abort_transaction(trans,
3141 root, 3202 root,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2cf905877aaf..09b8cc83965c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -32,6 +32,7 @@
32#include "ulist.h" 32#include "ulist.h"
33#include "backref.h" 33#include "backref.h"
34#include "extent_io.h" 34#include "extent_io.h"
35#include "qgroup.h"
35 36
36/* TODO XXX FIXME 37/* TODO XXX FIXME
37 * - subvol delete -> delete when ref goes to 0? delete limits also? 38 * - subvol delete -> delete when ref goes to 0? delete limits also?
@@ -84,8 +85,8 @@ struct btrfs_qgroup {
84 /* 85 /*
85 * temp variables for accounting operations 86 * temp variables for accounting operations
86 */ 87 */
87 u64 tag; 88 u64 old_refcnt;
88 u64 refcnt; 89 u64 new_refcnt;
89}; 90};
90 91
91/* 92/*
@@ -98,6 +99,9 @@ struct btrfs_qgroup_list {
98 struct btrfs_qgroup *member; 99 struct btrfs_qgroup *member;
99}; 100};
100 101
102#define ptr_to_u64(x) ((u64)(uintptr_t)x)
103#define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
104
101static int 105static int
102qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 106qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103 int init_flags); 107 int init_flags);
@@ -1174,33 +1178,198 @@ out:
1174 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1178 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1175 return ret; 1179 return ret;
1176} 1180}
1181static int comp_oper(struct btrfs_qgroup_operation *oper1,
1182 struct btrfs_qgroup_operation *oper2)
1183{
1184 if (oper1->bytenr < oper2->bytenr)
1185 return -1;
1186 if (oper1->bytenr > oper2->bytenr)
1187 return 1;
1188 if (oper1->seq < oper2->seq)
1189 return -1;
1190 if (oper1->seq > oper2->seq)
1191 return -1;
1192 if (oper1->ref_root < oper2->ref_root)
1193 return -1;
1194 if (oper1->ref_root > oper2->ref_root)
1195 return 1;
1196 if (oper1->type < oper2->type)
1197 return -1;
1198 if (oper1->type > oper2->type)
1199 return 1;
1200 return 0;
1201}
1202
1203static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1204 struct btrfs_qgroup_operation *oper)
1205{
1206 struct rb_node **p;
1207 struct rb_node *parent = NULL;
1208 struct btrfs_qgroup_operation *cur;
1209 int cmp;
1210
1211 spin_lock(&fs_info->qgroup_op_lock);
1212 p = &fs_info->qgroup_op_tree.rb_node;
1213 while (*p) {
1214 parent = *p;
1215 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1216 cmp = comp_oper(cur, oper);
1217 if (cmp < 0) {
1218 p = &(*p)->rb_right;
1219 } else if (cmp) {
1220 p = &(*p)->rb_left;
1221 } else {
1222 spin_unlock(&fs_info->qgroup_op_lock);
1223 return -EEXIST;
1224 }
1225 }
1226 rb_link_node(&oper->n, parent, p);
1227 rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1228 spin_unlock(&fs_info->qgroup_op_lock);
1229 return 0;
1230}
1177 1231
1178/* 1232/*
1179 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts 1233 * Record a quota operation for processing later on.
1180 * the modification into a list that's later used by btrfs_end_transaction to 1234 * @trans: the transaction we are adding the delayed op to.
1181 * pass the recorded modifications on to btrfs_qgroup_account_ref. 1235 * @fs_info: the fs_info for this fs.
1236 * @ref_root: the root of the reference we are acting on,
1237 * @bytenr: the bytenr we are acting on.
1238 * @num_bytes: the number of bytes in the reference.
1239 * @type: the type of operation this is.
1240 * @mod_seq: do we need to get a sequence number for looking up roots.
1241 *
1242 * We just add it to our trans qgroup_ref_list and carry on and process these
1243 * operations in order at some later point. If the reference root isn't a fs
1244 * root then we don't bother with doing anything.
1245 *
1246 * MUST BE HOLDING THE REF LOCK.
1182 */ 1247 */
1183int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1248int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1184 struct btrfs_delayed_ref_node *node, 1249 struct btrfs_fs_info *fs_info, u64 ref_root,
1185 struct btrfs_delayed_extent_op *extent_op) 1250 u64 bytenr, u64 num_bytes,
1251 enum btrfs_qgroup_operation_type type, int mod_seq)
1186{ 1252{
1187 struct qgroup_update *u; 1253 struct btrfs_qgroup_operation *oper;
1254 int ret;
1188 1255
1189 BUG_ON(!trans->delayed_ref_elem.seq); 1256 if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1190 u = kmalloc(sizeof(*u), GFP_NOFS); 1257 return 0;
1191 if (!u) 1258
1259 oper = kmalloc(sizeof(*oper), GFP_NOFS);
1260 if (!oper)
1192 return -ENOMEM; 1261 return -ENOMEM;
1193 1262
1194 u->node = node; 1263 oper->ref_root = ref_root;
1195 u->extent_op = extent_op; 1264 oper->bytenr = bytenr;
1196 list_add_tail(&u->list, &trans->qgroup_ref_list); 1265 oper->num_bytes = num_bytes;
1266 oper->type = type;
1267 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1268 INIT_LIST_HEAD(&oper->elem.list);
1269 oper->elem.seq = 0;
1270 ret = insert_qgroup_oper(fs_info, oper);
1271 if (ret) {
1272 /* Shouldn't happen so have an assert for developers */
1273 ASSERT(0);
1274 kfree(oper);
1275 return ret;
1276 }
1277 list_add_tail(&oper->list, &trans->qgroup_ref_list);
1278
1279 if (mod_seq)
1280 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1197 1281
1198 return 0; 1282 return 0;
1199} 1283}
1200 1284
1201static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, 1285/*
1202 struct ulist *roots, struct ulist *tmp, 1286 * The easy accounting, if we are adding/removing the only ref for an extent
1203 u64 seq) 1287 * then this qgroup and all of the parent qgroups get their refrence and
1288 * exclusive counts adjusted.
1289 */
1290static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1291 struct btrfs_qgroup_operation *oper)
1292{
1293 struct btrfs_qgroup *qgroup;
1294 struct ulist *tmp;
1295 struct btrfs_qgroup_list *glist;
1296 struct ulist_node *unode;
1297 struct ulist_iterator uiter;
1298 int sign = 0;
1299 int ret = 0;
1300
1301 tmp = ulist_alloc(GFP_NOFS);
1302 if (!tmp)
1303 return -ENOMEM;
1304
1305 spin_lock(&fs_info->qgroup_lock);
1306 if (!fs_info->quota_root)
1307 goto out;
1308 qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1309 if (!qgroup)
1310 goto out;
1311 switch (oper->type) {
1312 case BTRFS_QGROUP_OPER_ADD_EXCL:
1313 sign = 1;
1314 break;
1315 case BTRFS_QGROUP_OPER_SUB_EXCL:
1316 sign = -1;
1317 break;
1318 default:
1319 ASSERT(0);
1320 }
1321 qgroup->rfer += sign * oper->num_bytes;
1322 qgroup->rfer_cmpr += sign * oper->num_bytes;
1323
1324 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1325 qgroup->excl += sign * oper->num_bytes;
1326 qgroup->excl_cmpr += sign * oper->num_bytes;
1327
1328 qgroup_dirty(fs_info, qgroup);
1329
1330 /* Get all of the parent groups that contain this qgroup */
1331 list_for_each_entry(glist, &qgroup->groups, next_group) {
1332 ret = ulist_add(tmp, glist->group->qgroupid,
1333 ptr_to_u64(glist->group), GFP_ATOMIC);
1334 if (ret < 0)
1335 goto out;
1336 }
1337
1338 /* Iterate all of the parents and adjust their reference counts */
1339 ULIST_ITER_INIT(&uiter);
1340 while ((unode = ulist_next(tmp, &uiter))) {
1341 qgroup = u64_to_ptr(unode->aux);
1342 qgroup->rfer += sign * oper->num_bytes;
1343 qgroup->rfer_cmpr += sign * oper->num_bytes;
1344 qgroup->excl += sign * oper->num_bytes;
1345 if (sign < 0)
1346 WARN_ON(qgroup->excl < oper->num_bytes);
1347 qgroup->excl_cmpr += sign * oper->num_bytes;
1348 qgroup_dirty(fs_info, qgroup);
1349
1350 /* Add any parents of the parents */
1351 list_for_each_entry(glist, &qgroup->groups, next_group) {
1352 ret = ulist_add(tmp, glist->group->qgroupid,
1353 ptr_to_u64(glist->group), GFP_ATOMIC);
1354 if (ret < 0)
1355 goto out;
1356 }
1357 }
1358 ret = 0;
1359out:
1360 spin_unlock(&fs_info->qgroup_lock);
1361 ulist_free(tmp);
1362 return ret;
1363}
1364
1365/*
1366 * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1367 * properly.
1368 */
1369static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1370 u64 root_to_skip, struct ulist *tmp,
1371 struct ulist *roots, struct ulist *qgroups,
1372 u64 seq, int *old_roots, int rescan)
1204{ 1373{
1205 struct ulist_node *unode; 1374 struct ulist_node *unode;
1206 struct ulist_iterator uiter; 1375 struct ulist_iterator uiter;
@@ -1211,256 +1380,549 @@ static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1211 1380
1212 ULIST_ITER_INIT(&uiter); 1381 ULIST_ITER_INIT(&uiter);
1213 while ((unode = ulist_next(roots, &uiter))) { 1382 while ((unode = ulist_next(roots, &uiter))) {
1383 /* We don't count our current root here */
1384 if (unode->val == root_to_skip)
1385 continue;
1214 qg = find_qgroup_rb(fs_info, unode->val); 1386 qg = find_qgroup_rb(fs_info, unode->val);
1215 if (!qg) 1387 if (!qg)
1216 continue; 1388 continue;
1389 /*
1390 * We could have a pending removal of this same ref so we may
1391 * not have actually found our ref root when doing
1392 * btrfs_find_all_roots, so we need to keep track of how many
1393 * old roots we find in case we removed ours and added a
1394 * different one at the same time. I don't think this could
1395 * happen in practice but that sort of thinking leads to pain
1396 * and suffering and to the dark side.
1397 */
1398 (*old_roots)++;
1217 1399
1218 ulist_reinit(tmp); 1400 ulist_reinit(tmp);
1219 /* XXX id not needed */ 1401 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1220 ret = ulist_add(tmp, qg->qgroupid, 1402 GFP_ATOMIC);
1221 (u64)(uintptr_t)qg, GFP_ATOMIC); 1403 if (ret < 0)
1404 return ret;
1405 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1222 if (ret < 0) 1406 if (ret < 0)
1223 return ret; 1407 return ret;
1224 ULIST_ITER_INIT(&tmp_uiter); 1408 ULIST_ITER_INIT(&tmp_uiter);
1225 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1409 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1226 struct btrfs_qgroup_list *glist; 1410 struct btrfs_qgroup_list *glist;
1227 1411
1228 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; 1412 qg = u64_to_ptr(tmp_unode->aux);
1229 if (qg->refcnt < seq) 1413 /*
1230 qg->refcnt = seq + 1; 1414 * We use this sequence number to keep from having to
1415 * run the whole list and 0 out the refcnt every time.
1416 * We basically use sequnce as the known 0 count and
1417 * then add 1 everytime we see a qgroup. This is how we
1418 * get how many of the roots actually point up to the
1419 * upper level qgroups in order to determine exclusive
1420 * counts.
1421 *
1422 * For rescan we want to set old_refcnt to seq so our
1423 * exclusive calculations end up correct.
1424 */
1425 if (rescan)
1426 qg->old_refcnt = seq;
1427 else if (qg->old_refcnt < seq)
1428 qg->old_refcnt = seq + 1;
1231 else 1429 else
1232 ++qg->refcnt; 1430 qg->old_refcnt++;
1233 1431
1432 if (qg->new_refcnt < seq)
1433 qg->new_refcnt = seq + 1;
1434 else
1435 qg->new_refcnt++;
1234 list_for_each_entry(glist, &qg->groups, next_group) { 1436 list_for_each_entry(glist, &qg->groups, next_group) {
1437 ret = ulist_add(qgroups, glist->group->qgroupid,
1438 ptr_to_u64(glist->group),
1439 GFP_ATOMIC);
1440 if (ret < 0)
1441 return ret;
1235 ret = ulist_add(tmp, glist->group->qgroupid, 1442 ret = ulist_add(tmp, glist->group->qgroupid,
1236 (u64)(uintptr_t)glist->group, 1443 ptr_to_u64(glist->group),
1237 GFP_ATOMIC); 1444 GFP_ATOMIC);
1238 if (ret < 0) 1445 if (ret < 0)
1239 return ret; 1446 return ret;
1240 } 1447 }
1241 } 1448 }
1242 } 1449 }
1450 return 0;
1451}
1243 1452
1453/*
1454 * We need to walk forward in our operation tree and account for any roots that
1455 * were deleted after we made this operation.
1456 */
1457static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1458 struct btrfs_qgroup_operation *oper,
1459 struct ulist *tmp,
1460 struct ulist *qgroups, u64 seq,
1461 int *old_roots)
1462{
1463 struct ulist_node *unode;
1464 struct ulist_iterator uiter;
1465 struct btrfs_qgroup *qg;
1466 struct btrfs_qgroup_operation *tmp_oper;
1467 struct rb_node *n;
1468 int ret;
1469
1470 ulist_reinit(tmp);
1471
1472 /*
1473 * We only walk forward in the tree since we're only interested in
1474 * removals that happened _after_ our operation.
1475 */
1476 spin_lock(&fs_info->qgroup_op_lock);
1477 n = rb_next(&oper->n);
1478 spin_unlock(&fs_info->qgroup_op_lock);
1479 if (!n)
1480 return 0;
1481 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1482 while (tmp_oper->bytenr == oper->bytenr) {
1483 /*
1484 * If it's not a removal we don't care, additions work out
1485 * properly with our refcnt tracking.
1486 */
1487 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1488 tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1489 goto next;
1490 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1491 if (!qg)
1492 goto next;
1493 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1494 GFP_ATOMIC);
1495 if (ret) {
1496 if (ret < 0)
1497 return ret;
1498 /*
1499 * We only want to increase old_roots if this qgroup is
1500 * not already in the list of qgroups. If it is already
1501 * there then that means it must have been re-added or
1502 * the delete will be discarded because we had an
1503 * existing ref that we haven't looked up yet. In this
1504 * case we don't want to increase old_roots. So if ret
1505 * == 1 then we know that this is the first time we've
1506 * seen this qgroup and we can bump the old_roots.
1507 */
1508 (*old_roots)++;
1509 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1510 GFP_ATOMIC);
1511 if (ret < 0)
1512 return ret;
1513 }
1514next:
1515 spin_lock(&fs_info->qgroup_op_lock);
1516 n = rb_next(&tmp_oper->n);
1517 spin_unlock(&fs_info->qgroup_op_lock);
1518 if (!n)
1519 break;
1520 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1521 }
1522
1523 /* Ok now process the qgroups we found */
1524 ULIST_ITER_INIT(&uiter);
1525 while ((unode = ulist_next(tmp, &uiter))) {
1526 struct btrfs_qgroup_list *glist;
1527
1528 qg = u64_to_ptr(unode->aux);
1529 if (qg->old_refcnt < seq)
1530 qg->old_refcnt = seq + 1;
1531 else
1532 qg->old_refcnt++;
1533 if (qg->new_refcnt < seq)
1534 qg->new_refcnt = seq + 1;
1535 else
1536 qg->new_refcnt++;
1537 list_for_each_entry(glist, &qg->groups, next_group) {
1538 ret = ulist_add(qgroups, glist->group->qgroupid,
1539 ptr_to_u64(glist->group), GFP_ATOMIC);
1540 if (ret < 0)
1541 return ret;
1542 ret = ulist_add(tmp, glist->group->qgroupid,
1543 ptr_to_u64(glist->group), GFP_ATOMIC);
1544 if (ret < 0)
1545 return ret;
1546 }
1547 }
1244 return 0; 1548 return 0;
1245} 1549}
1246 1550
1247static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info, 1551/* Add refcnt for the newly added reference. */
1248 struct ulist *roots, struct ulist *tmp, 1552static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1249 u64 seq, int sgn, u64 num_bytes, 1553 struct btrfs_qgroup_operation *oper,
1250 struct btrfs_qgroup *qgroup) 1554 struct btrfs_qgroup *qgroup,
1555 struct ulist *tmp, struct ulist *qgroups,
1556 u64 seq)
1251{ 1557{
1252 struct ulist_node *unode; 1558 struct ulist_node *unode;
1253 struct ulist_iterator uiter; 1559 struct ulist_iterator uiter;
1254 struct btrfs_qgroup *qg; 1560 struct btrfs_qgroup *qg;
1255 struct btrfs_qgroup_list *glist;
1256 int ret; 1561 int ret;
1257 1562
1258 ulist_reinit(tmp); 1563 ulist_reinit(tmp);
1259 ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1564 ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1565 GFP_ATOMIC);
1566 if (ret < 0)
1567 return ret;
1568 ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1569 GFP_ATOMIC);
1260 if (ret < 0) 1570 if (ret < 0)
1261 return ret; 1571 return ret;
1262
1263 ULIST_ITER_INIT(&uiter); 1572 ULIST_ITER_INIT(&uiter);
1264 while ((unode = ulist_next(tmp, &uiter))) { 1573 while ((unode = ulist_next(tmp, &uiter))) {
1265 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1574 struct btrfs_qgroup_list *glist;
1266 if (qg->refcnt < seq) {
1267 /* not visited by step 1 */
1268 qg->rfer += sgn * num_bytes;
1269 qg->rfer_cmpr += sgn * num_bytes;
1270 if (roots->nnodes == 0) {
1271 qg->excl += sgn * num_bytes;
1272 qg->excl_cmpr += sgn * num_bytes;
1273 }
1274 qgroup_dirty(fs_info, qg);
1275 }
1276 WARN_ON(qg->tag >= seq);
1277 qg->tag = seq;
1278 1575
1576 qg = u64_to_ptr(unode->aux);
1577 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1578 if (qg->new_refcnt < seq)
1579 qg->new_refcnt = seq + 1;
1580 else
1581 qg->new_refcnt++;
1582 } else {
1583 if (qg->old_refcnt < seq)
1584 qg->old_refcnt = seq + 1;
1585 else
1586 qg->old_refcnt++;
1587 }
1279 list_for_each_entry(glist, &qg->groups, next_group) { 1588 list_for_each_entry(glist, &qg->groups, next_group) {
1280 ret = ulist_add(tmp, glist->group->qgroupid, 1589 ret = ulist_add(tmp, glist->group->qgroupid,
1281 (uintptr_t)glist->group, GFP_ATOMIC); 1590 ptr_to_u64(glist->group), GFP_ATOMIC);
1591 if (ret < 0)
1592 return ret;
1593 ret = ulist_add(qgroups, glist->group->qgroupid,
1594 ptr_to_u64(glist->group), GFP_ATOMIC);
1282 if (ret < 0) 1595 if (ret < 0)
1283 return ret; 1596 return ret;
1284 } 1597 }
1285 } 1598 }
1286
1287 return 0; 1599 return 0;
1288} 1600}
1289 1601
1290static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info, 1602/*
1291 struct ulist *roots, struct ulist *tmp, 1603 * This adjusts the counters for all referenced qgroups if need be.
1292 u64 seq, int sgn, u64 num_bytes) 1604 */
1605static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1606 u64 root_to_skip, u64 num_bytes,
1607 struct ulist *qgroups, u64 seq,
1608 int old_roots, int new_roots, int rescan)
1293{ 1609{
1294 struct ulist_node *unode; 1610 struct ulist_node *unode;
1295 struct ulist_iterator uiter; 1611 struct ulist_iterator uiter;
1296 struct btrfs_qgroup *qg; 1612 struct btrfs_qgroup *qg;
1297 struct ulist_node *tmp_unode; 1613 u64 cur_new_count, cur_old_count;
1298 struct ulist_iterator tmp_uiter;
1299 int ret;
1300 1614
1301 ULIST_ITER_INIT(&uiter); 1615 ULIST_ITER_INIT(&uiter);
1302 while ((unode = ulist_next(roots, &uiter))) { 1616 while ((unode = ulist_next(qgroups, &uiter))) {
1303 qg = find_qgroup_rb(fs_info, unode->val); 1617 bool dirty = false;
1304 if (!qg)
1305 continue;
1306 1618
1307 ulist_reinit(tmp); 1619 qg = u64_to_ptr(unode->aux);
1308 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); 1620 /*
1309 if (ret < 0) 1621 * Wasn't referenced before but is now, add to the reference
1310 return ret; 1622 * counters.
1623 */
1624 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
1625 qg->rfer += num_bytes;
1626 qg->rfer_cmpr += num_bytes;
1627 dirty = true;
1628 }
1311 1629
1312 ULIST_ITER_INIT(&tmp_uiter); 1630 /*
1313 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1631 * Was referenced before but isn't now, subtract from the
1314 struct btrfs_qgroup_list *glist; 1632 * reference counters.
1633 */
1634 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
1635 qg->rfer -= num_bytes;
1636 qg->rfer_cmpr -= num_bytes;
1637 dirty = true;
1638 }
1315 1639
1316 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; 1640 if (qg->old_refcnt < seq)
1317 if (qg->tag == seq) 1641 cur_old_count = 0;
1318 continue; 1642 else
1643 cur_old_count = qg->old_refcnt - seq;
1644 if (qg->new_refcnt < seq)
1645 cur_new_count = 0;
1646 else
1647 cur_new_count = qg->new_refcnt - seq;
1319 1648
1320 if (qg->refcnt - seq == roots->nnodes) { 1649 /*
1321 qg->excl -= sgn * num_bytes; 1650 * If our refcount was the same as the roots previously but our
1322 qg->excl_cmpr -= sgn * num_bytes; 1651 * new count isn't the same as the number of roots now then we
1323 qgroup_dirty(fs_info, qg); 1652 * went from having a exclusive reference on this range to not.
1324 } 1653 */
1654 if (old_roots && cur_old_count == old_roots &&
1655 (cur_new_count != new_roots || new_roots == 0)) {
1656 WARN_ON(cur_new_count != new_roots && new_roots == 0);
1657 qg->excl -= num_bytes;
1658 qg->excl_cmpr -= num_bytes;
1659 dirty = true;
1660 }
1325 1661
1326 list_for_each_entry(glist, &qg->groups, next_group) { 1662 /*
1327 ret = ulist_add(tmp, glist->group->qgroupid, 1663 * If we didn't reference all the roots before but now we do we
1328 (uintptr_t)glist->group, 1664 * have an exclusive reference to this range.
1329 GFP_ATOMIC); 1665 */
1330 if (ret < 0) 1666 if ((!old_roots || (old_roots && cur_old_count != old_roots))
1331 return ret; 1667 && cur_new_count == new_roots) {
1332 } 1668 qg->excl += num_bytes;
1669 qg->excl_cmpr += num_bytes;
1670 dirty = true;
1333 } 1671 }
1334 }
1335 1672
1673 if (dirty)
1674 qgroup_dirty(fs_info, qg);
1675 }
1336 return 0; 1676 return 0;
1337} 1677}
1338 1678
1339/* 1679/*
1340 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 1680 * If we removed a data extent and there were other references for that bytenr
1341 * from the fs. First, all roots referencing the extent are searched, and 1681 * then we need to lookup all referenced roots to make sure we still don't
1342 * then the space is accounted accordingly to the different roots. The 1682 * reference this bytenr. If we do then we can just discard this operation.
1343 * accounting algorithm works in 3 steps documented inline.
1344 */ 1683 */
1345int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, 1684static int check_existing_refs(struct btrfs_trans_handle *trans,
1346 struct btrfs_fs_info *fs_info, 1685 struct btrfs_fs_info *fs_info,
1347 struct btrfs_delayed_ref_node *node, 1686 struct btrfs_qgroup_operation *oper)
1348 struct btrfs_delayed_extent_op *extent_op)
1349{ 1687{
1350 struct btrfs_root *quota_root;
1351 u64 ref_root;
1352 struct btrfs_qgroup *qgroup;
1353 struct ulist *roots = NULL; 1688 struct ulist *roots = NULL;
1354 u64 seq; 1689 struct ulist_node *unode;
1690 struct ulist_iterator uiter;
1355 int ret = 0; 1691 int ret = 0;
1356 int sgn;
1357 1692
1358 if (!fs_info->quota_enabled) 1693 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1359 return 0; 1694 oper->elem.seq, &roots);
1360 1695 if (ret < 0)
1361 BUG_ON(!fs_info->quota_root); 1696 return ret;
1697 ret = 0;
1362 1698
1363 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1699 ULIST_ITER_INIT(&uiter);
1364 node->type == BTRFS_SHARED_BLOCK_REF_KEY) { 1700 while ((unode = ulist_next(roots, &uiter))) {
1365 struct btrfs_delayed_tree_ref *ref; 1701 if (unode->val == oper->ref_root) {
1366 ref = btrfs_delayed_node_to_tree_ref(node); 1702 ret = 1;
1367 ref_root = ref->root; 1703 break;
1368 } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1704 }
1369 node->type == BTRFS_SHARED_DATA_REF_KEY) {
1370 struct btrfs_delayed_data_ref *ref;
1371 ref = btrfs_delayed_node_to_data_ref(node);
1372 ref_root = ref->root;
1373 } else {
1374 BUG();
1375 } 1705 }
1706 ulist_free(roots);
1707 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1376 1708
1377 if (!is_fstree(ref_root)) { 1709 return ret;
1378 /* 1710}
1379 * non-fs-trees are not being accounted
1380 */
1381 return 0;
1382 }
1383 1711
1384 switch (node->action) { 1712/*
1385 case BTRFS_ADD_DELAYED_REF: 1713 * If we share a reference across multiple roots then we may need to adjust
1386 case BTRFS_ADD_DELAYED_EXTENT: 1714 * various qgroups referenced and exclusive counters. The basic premise is this
1387 sgn = 1; 1715 *
1388 seq = btrfs_tree_mod_seq_prev(node->seq); 1716 * 1) We have seq to represent a 0 count. Instead of looping through all of the
1389 break; 1717 * qgroups and resetting their refcount to 0 we just constantly bump this
1390 case BTRFS_DROP_DELAYED_REF: 1718 * sequence number to act as the base reference count. This means that if
1391 sgn = -1; 1719 * anybody is equal to or below this sequence they were never referenced. We
1392 seq = node->seq; 1720 * jack this sequence up by the number of roots we found each time in order to
1393 break; 1721 * make sure we don't have any overlap.
1394 case BTRFS_UPDATE_DELAYED_HEAD: 1722 *
1395 return 0; 1723 * 2) We first search all the roots that reference the area _except_ the root
1396 default: 1724 * we're acting on currently. This makes up the old_refcnt of all the qgroups
1397 BUG(); 1725 * before.
1398 } 1726 *
1727 * 3) We walk all of the qgroups referenced by the root we are currently acting
1728 * on, and will either adjust old_refcnt in the case of a removal or the
1729 * new_refcnt in the case of an addition.
1730 *
1731 * 4) Finally we walk all the qgroups that are referenced by this range
1732 * including the root we are acting on currently. We will adjust the counters
1733 * based on the number of roots we had and will have after this operation.
1734 *
1735 * Take this example as an illustration
1736 *
1737 * [qgroup 1/0]
1738 * / | \
1739 * [qg 0/0] [qg 0/1] [qg 0/2]
1740 * \ | /
1741 * [ extent ]
1742 *
1743 * Say we are adding a reference that is covered by qg 0/0. The first step
1744 * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
1745 * old_roots being 2. Because it is adding new_roots will be 1. We then go
1746 * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
1747 * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
1748 * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
1749 * reference and thus must add the size to the referenced bytes. Everything
1750 * else is the same so nothing else changes.
1751 */
1752static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1753 struct btrfs_fs_info *fs_info,
1754 struct btrfs_qgroup_operation *oper)
1755{
1756 struct ulist *roots = NULL;
1757 struct ulist *qgroups, *tmp;
1758 struct btrfs_qgroup *qgroup;
1759 struct seq_list elem = {};
1760 u64 seq;
1761 int old_roots = 0;
1762 int new_roots = 0;
1763 int ret = 0;
1399 1764
1400 mutex_lock(&fs_info->qgroup_rescan_lock); 1765 if (oper->elem.seq) {
1401 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1766 ret = check_existing_refs(trans, fs_info, oper);
1402 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { 1767 if (ret < 0)
1403 mutex_unlock(&fs_info->qgroup_rescan_lock); 1768 return ret;
1769 if (ret)
1404 return 0; 1770 return 0;
1405 }
1406 } 1771 }
1407 mutex_unlock(&fs_info->qgroup_rescan_lock);
1408 1772
1409 /* 1773 qgroups = ulist_alloc(GFP_NOFS);
1410 * the delayed ref sequence number we pass depends on the direction of 1774 if (!qgroups)
1411 * the operation. for add operations, we pass 1775 return -ENOMEM;
1412 * tree_mod_log_prev_seq(node->seq) to skip
1413 * the delayed ref's current sequence number, because we need the state
1414 * of the tree before the add operation. for delete operations, we pass
1415 * (node->seq) to include the delayed ref's current sequence number,
1416 * because we need the state of the tree after the delete operation.
1417 */
1418 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1419 if (ret < 0)
1420 return ret;
1421
1422 spin_lock(&fs_info->qgroup_lock);
1423 1776
1424 quota_root = fs_info->quota_root; 1777 tmp = ulist_alloc(GFP_NOFS);
1425 if (!quota_root) 1778 if (!tmp)
1426 goto unlock; 1779 return -ENOMEM;
1427 1780
1428 qgroup = find_qgroup_rb(fs_info, ref_root); 1781 btrfs_get_tree_mod_seq(fs_info, &elem);
1782 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
1783 &roots);
1784 btrfs_put_tree_mod_seq(fs_info, &elem);
1785 if (ret < 0) {
1786 ulist_free(qgroups);
1787 ulist_free(tmp);
1788 return ret;
1789 }
1790 spin_lock(&fs_info->qgroup_lock);
1791 qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1429 if (!qgroup) 1792 if (!qgroup)
1430 goto unlock; 1793 goto out;
1794 seq = fs_info->qgroup_seq;
1431 1795
1432 /* 1796 /*
1433 * step 1: for each old ref, visit all nodes once and inc refcnt 1797 * So roots is the list of all the roots currently pointing at the
1798 * bytenr, including the ref we are adding if we are adding, or not if
1799 * we are removing a ref. So we pass in the ref_root to skip that root
1800 * in our calculations. We set old_refnct and new_refcnt cause who the
1801 * hell knows what everything looked like before, and it doesn't matter
1802 * except...
1434 */ 1803 */
1435 ulist_reinit(fs_info->qgroup_ulist); 1804 ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
1436 seq = fs_info->qgroup_seq; 1805 seq, &old_roots, 0);
1437 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 1806 if (ret < 0)
1807 goto out;
1438 1808
1439 ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist, 1809 /*
1440 seq); 1810 * Now adjust the refcounts of the qgroups that care about this
1441 if (ret) 1811 * reference, either the old_count in the case of removal or new_count
1442 goto unlock; 1812 * in the case of an addition.
1813 */
1814 ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
1815 seq);
1816 if (ret < 0)
1817 goto out;
1443 1818
1444 /* 1819 /*
1445 * step 2: walk from the new root 1820 * ...in the case of removals. If we had a removal before we got around
1821 * to processing this operation then we need to find that guy and count
1822 * his references as if they really existed so we don't end up screwing
1823 * up the exclusive counts. Then whenever we go to process the delete
1824 * everything will be grand and we can account for whatever exclusive
1825 * changes need to be made there. We also have to pass in old_roots so
1826 * we have an accurate count of the roots as it pertains to this
1827 * operations view of the world.
1446 */ 1828 */
1447 ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist, 1829 ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
1448 seq, sgn, node->num_bytes, qgroup); 1830 &old_roots);
1449 if (ret) 1831 if (ret < 0)
1450 goto unlock; 1832 goto out;
1451 1833
1452 /* 1834 /*
1453 * step 3: walk again from old refs 1835 * We are adding our root, need to adjust up the number of roots,
1836 * otherwise old_roots is the number of roots we want.
1454 */ 1837 */
1455 ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist, 1838 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1456 seq, sgn, node->num_bytes); 1839 new_roots = old_roots + 1;
1457 if (ret) 1840 } else {
1458 goto unlock; 1841 new_roots = old_roots;
1842 old_roots++;
1843 }
1844 fs_info->qgroup_seq += old_roots + 1;
1459 1845
1460unlock: 1846
1847 /*
1848 * And now the magic happens, bless Arne for having a pretty elegant
1849 * solution for this.
1850 */
1851 qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
1852 qgroups, seq, old_roots, new_roots, 0);
1853out:
1461 spin_unlock(&fs_info->qgroup_lock); 1854 spin_unlock(&fs_info->qgroup_lock);
1855 ulist_free(qgroups);
1462 ulist_free(roots); 1856 ulist_free(roots);
1857 ulist_free(tmp);
1858 return ret;
1859}
1860
1861/*
1862 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1863 * from the fs. First, all roots referencing the extent are searched, and
1864 * then the space is accounted accordingly to the different roots. The
1865 * accounting algorithm works in 3 steps documented inline.
1866 */
1867static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1868 struct btrfs_fs_info *fs_info,
1869 struct btrfs_qgroup_operation *oper)
1870{
1871 int ret = 0;
1872
1873 if (!fs_info->quota_enabled)
1874 return 0;
1875
1876 BUG_ON(!fs_info->quota_root);
1877
1878 mutex_lock(&fs_info->qgroup_rescan_lock);
1879 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1880 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
1881 mutex_unlock(&fs_info->qgroup_rescan_lock);
1882 return 0;
1883 }
1884 }
1885 mutex_unlock(&fs_info->qgroup_rescan_lock);
1886
1887 ASSERT(is_fstree(oper->ref_root));
1888
1889 switch (oper->type) {
1890 case BTRFS_QGROUP_OPER_ADD_EXCL:
1891 case BTRFS_QGROUP_OPER_SUB_EXCL:
1892 ret = qgroup_excl_accounting(fs_info, oper);
1893 break;
1894 case BTRFS_QGROUP_OPER_ADD_SHARED:
1895 case BTRFS_QGROUP_OPER_SUB_SHARED:
1896 ret = qgroup_shared_accounting(trans, fs_info, oper);
1897 break;
1898 default:
1899 ASSERT(0);
1900 }
1901 return ret;
1902}
1463 1903
1904/*
1905 * Needs to be called everytime we run delayed refs, even if there is an error
1906 * in order to cleanup outstanding operations.
1907 */
1908int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
1909 struct btrfs_fs_info *fs_info)
1910{
1911 struct btrfs_qgroup_operation *oper;
1912 int ret = 0;
1913
1914 while (!list_empty(&trans->qgroup_ref_list)) {
1915 oper = list_first_entry(&trans->qgroup_ref_list,
1916 struct btrfs_qgroup_operation, list);
1917 list_del_init(&oper->list);
1918 if (!ret || !trans->aborted)
1919 ret = btrfs_qgroup_account(trans, fs_info, oper);
1920 spin_lock(&fs_info->qgroup_op_lock);
1921 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
1922 spin_unlock(&fs_info->qgroup_op_lock);
1923 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1924 kfree(oper);
1925 }
1464 return ret; 1926 return ret;
1465} 1927}
1466 1928
@@ -1629,8 +2091,16 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1629 srcgroup = find_qgroup_rb(fs_info, srcid); 2091 srcgroup = find_qgroup_rb(fs_info, srcid);
1630 if (!srcgroup) 2092 if (!srcgroup)
1631 goto unlock; 2093 goto unlock;
1632 dstgroup->rfer = srcgroup->rfer - level_size; 2094
1633 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; 2095 /*
2096 * We call inherit after we clone the root in order to make sure
2097 * our counts don't go crazy, so at this point the only
2098 * difference between the two roots should be the root node.
2099 */
2100 dstgroup->rfer = srcgroup->rfer;
2101 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2102 dstgroup->excl = level_size;
2103 dstgroup->excl_cmpr = level_size;
1634 srcgroup->excl = level_size; 2104 srcgroup->excl = level_size;
1635 srcgroup->excl_cmpr = level_size; 2105 srcgroup->excl_cmpr = level_size;
1636 qgroup_dirty(fs_info, dstgroup); 2106 qgroup_dirty(fs_info, dstgroup);
@@ -1734,7 +2204,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1734 struct btrfs_qgroup *qg; 2204 struct btrfs_qgroup *qg;
1735 struct btrfs_qgroup_list *glist; 2205 struct btrfs_qgroup_list *glist;
1736 2206
1737 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 2207 qg = u64_to_ptr(unode->aux);
1738 2208
1739 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2209 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1740 qg->reserved + (s64)qg->rfer + num_bytes > 2210 qg->reserved + (s64)qg->rfer + num_bytes >
@@ -1766,7 +2236,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1766 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2236 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1767 struct btrfs_qgroup *qg; 2237 struct btrfs_qgroup *qg;
1768 2238
1769 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 2239 qg = u64_to_ptr(unode->aux);
1770 2240
1771 qg->reserved += num_bytes; 2241 qg->reserved += num_bytes;
1772 } 2242 }
@@ -1812,7 +2282,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1812 struct btrfs_qgroup *qg; 2282 struct btrfs_qgroup *qg;
1813 struct btrfs_qgroup_list *glist; 2283 struct btrfs_qgroup_list *glist;
1814 2284
1815 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 2285 qg = u64_to_ptr(unode->aux);
1816 2286
1817 qg->reserved -= num_bytes; 2287 qg->reserved -= num_bytes;
1818 2288
@@ -1848,15 +2318,15 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1848 */ 2318 */
1849static int 2319static int
1850qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2320qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1851 struct btrfs_trans_handle *trans, struct ulist *tmp, 2321 struct btrfs_trans_handle *trans, struct ulist *qgroups,
1852 struct extent_buffer *scratch_leaf) 2322 struct ulist *tmp, struct extent_buffer *scratch_leaf)
1853{ 2323{
1854 struct btrfs_key found; 2324 struct btrfs_key found;
1855 struct ulist *roots = NULL; 2325 struct ulist *roots = NULL;
1856 struct ulist_node *unode;
1857 struct ulist_iterator uiter;
1858 struct seq_list tree_mod_seq_elem = {}; 2326 struct seq_list tree_mod_seq_elem = {};
2327 u64 num_bytes;
1859 u64 seq; 2328 u64 seq;
2329 int new_roots;
1860 int slot; 2330 int slot;
1861 int ret; 2331 int ret;
1862 2332
@@ -1897,8 +2367,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1897 mutex_unlock(&fs_info->qgroup_rescan_lock); 2367 mutex_unlock(&fs_info->qgroup_rescan_lock);
1898 2368
1899 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2369 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900 u64 num_bytes;
1901
1902 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2370 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1903 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2371 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904 found.type != BTRFS_METADATA_ITEM_KEY) 2372 found.type != BTRFS_METADATA_ITEM_KEY)
@@ -1908,76 +2376,34 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1908 else 2376 else
1909 num_bytes = found.offset; 2377 num_bytes = found.offset;
1910 2378
1911 ret = btrfs_find_all_roots(trans, fs_info, found.objectid, 2379 ulist_reinit(qgroups);
1912 tree_mod_seq_elem.seq, &roots); 2380 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2381 &roots);
1913 if (ret < 0) 2382 if (ret < 0)
1914 goto out; 2383 goto out;
1915 spin_lock(&fs_info->qgroup_lock); 2384 spin_lock(&fs_info->qgroup_lock);
1916 seq = fs_info->qgroup_seq; 2385 seq = fs_info->qgroup_seq;
1917 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 2386 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1918 2387
1919 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); 2388 new_roots = 0;
1920 if (ret) { 2389 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2390 seq, &new_roots, 1);
2391 if (ret < 0) {
1921 spin_unlock(&fs_info->qgroup_lock); 2392 spin_unlock(&fs_info->qgroup_lock);
1922 ulist_free(roots); 2393 ulist_free(roots);
1923 goto out; 2394 goto out;
1924 } 2395 }
1925 2396
1926 /* 2397 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
1927 * step2 of btrfs_qgroup_account_ref works from a single root, 2398 seq, 0, new_roots, 1);
1928 * we're doing all at once here. 2399 if (ret < 0) {
1929 */ 2400 spin_unlock(&fs_info->qgroup_lock);
1930 ulist_reinit(tmp); 2401 ulist_free(roots);
1931 ULIST_ITER_INIT(&uiter); 2402 goto out;
1932 while ((unode = ulist_next(roots, &uiter))) {
1933 struct btrfs_qgroup *qg;
1934
1935 qg = find_qgroup_rb(fs_info, unode->val);
1936 if (!qg)
1937 continue;
1938
1939 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1940 GFP_ATOMIC);
1941 if (ret < 0) {
1942 spin_unlock(&fs_info->qgroup_lock);
1943 ulist_free(roots);
1944 goto out;
1945 }
1946 }
1947
1948 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1949 ULIST_ITER_INIT(&uiter);
1950 while ((unode = ulist_next(tmp, &uiter))) {
1951 struct btrfs_qgroup *qg;
1952 struct btrfs_qgroup_list *glist;
1953
1954 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1955 qg->rfer += num_bytes;
1956 qg->rfer_cmpr += num_bytes;
1957 WARN_ON(qg->tag >= seq);
1958 if (qg->refcnt - seq == roots->nnodes) {
1959 qg->excl += num_bytes;
1960 qg->excl_cmpr += num_bytes;
1961 }
1962 qgroup_dirty(fs_info, qg);
1963
1964 list_for_each_entry(glist, &qg->groups, next_group) {
1965 ret = ulist_add(tmp, glist->group->qgroupid,
1966 (uintptr_t)glist->group,
1967 GFP_ATOMIC);
1968 if (ret < 0) {
1969 spin_unlock(&fs_info->qgroup_lock);
1970 ulist_free(roots);
1971 goto out;
1972 }
1973 }
1974 } 2403 }
1975
1976 spin_unlock(&fs_info->qgroup_lock); 2404 spin_unlock(&fs_info->qgroup_lock);
1977 ulist_free(roots); 2405 ulist_free(roots);
1978 ret = 0;
1979 } 2406 }
1980
1981out: 2407out:
1982 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2408 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1983 2409
@@ -1990,13 +2416,16 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1990 qgroup_rescan_work); 2416 qgroup_rescan_work);
1991 struct btrfs_path *path; 2417 struct btrfs_path *path;
1992 struct btrfs_trans_handle *trans = NULL; 2418 struct btrfs_trans_handle *trans = NULL;
1993 struct ulist *tmp = NULL; 2419 struct ulist *tmp = NULL, *qgroups = NULL;
1994 struct extent_buffer *scratch_leaf = NULL; 2420 struct extent_buffer *scratch_leaf = NULL;
1995 int err = -ENOMEM; 2421 int err = -ENOMEM;
1996 2422
1997 path = btrfs_alloc_path(); 2423 path = btrfs_alloc_path();
1998 if (!path) 2424 if (!path)
1999 goto out; 2425 goto out;
2426 qgroups = ulist_alloc(GFP_NOFS);
2427 if (!qgroups)
2428 goto out;
2000 tmp = ulist_alloc(GFP_NOFS); 2429 tmp = ulist_alloc(GFP_NOFS);
2001 if (!tmp) 2430 if (!tmp)
2002 goto out; 2431 goto out;
@@ -2015,7 +2444,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2015 err = -EINTR; 2444 err = -EINTR;
2016 } else { 2445 } else {
2017 err = qgroup_rescan_leaf(fs_info, path, trans, 2446 err = qgroup_rescan_leaf(fs_info, path, trans,
2018 tmp, scratch_leaf); 2447 qgroups, tmp, scratch_leaf);
2019 } 2448 }
2020 if (err > 0) 2449 if (err > 0)
2021 btrfs_commit_transaction(trans, fs_info->fs_root); 2450 btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2025,7 +2454,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2025 2454
2026out: 2455out:
2027 kfree(scratch_leaf); 2456 kfree(scratch_leaf);
2028 ulist_free(tmp); 2457 ulist_free(qgroups);
2029 btrfs_free_path(path); 2458 btrfs_free_path(path);
2030 2459
2031 mutex_lock(&fs_info->qgroup_rescan_lock); 2460 mutex_lock(&fs_info->qgroup_rescan_lock);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
new file mode 100644
index 000000000000..5952ff1fbd7a
--- /dev/null
+++ b/fs/btrfs/qgroup.h
@@ -0,0 +1,107 @@
1/*
2 * Copyright (C) 2014 Facebook. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_QGROUP__
20#define __BTRFS_QGROUP__
21
22/*
23 * A description of the operations, all of these operations only happen when we
24 * are adding the 1st reference for that subvolume in the case of adding space
25 * or on the last reference delete in the case of subtraction. The only
26 * exception is the last one, which is added for confusion.
27 *
28 * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
29 * one pointing at the bytes we are adding. This is called on the first
30 * allocation.
31 *
32 * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
33 * shared between subvols. This is called on the creation of a ref that already
34 * has refs from a different subvolume, so basically reflink.
35 *
36 * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
37 * one referencing the range.
38 *
39 * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
40 * refs with other subvolumes.
41 */
42enum btrfs_qgroup_operation_type {
43 BTRFS_QGROUP_OPER_ADD_EXCL,
44 BTRFS_QGROUP_OPER_ADD_SHARED,
45 BTRFS_QGROUP_OPER_SUB_EXCL,
46 BTRFS_QGROUP_OPER_SUB_SHARED,
47};
48
49struct btrfs_qgroup_operation {
50 u64 ref_root;
51 u64 bytenr;
52 u64 num_bytes;
53 u64 seq;
54 enum btrfs_qgroup_operation_type type;
55 struct seq_list elem;
56 struct rb_node n;
57 struct list_head list;
58};
59
60int btrfs_quota_enable(struct btrfs_trans_handle *trans,
61 struct btrfs_fs_info *fs_info);
62int btrfs_quota_disable(struct btrfs_trans_handle *trans,
63 struct btrfs_fs_info *fs_info);
64int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
65void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
66int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
67int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
68 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
69int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
70 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
71int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
72 struct btrfs_fs_info *fs_info, u64 qgroupid,
73 char *name);
74int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
75 struct btrfs_fs_info *fs_info, u64 qgroupid);
76int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
77 struct btrfs_fs_info *fs_info, u64 qgroupid,
78 struct btrfs_qgroup_limit *limit);
79int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
80void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
81struct btrfs_delayed_extent_op;
82int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
83 struct btrfs_fs_info *fs_info, u64 ref_root,
84 u64 bytenr, u64 num_bytes,
85 enum btrfs_qgroup_operation_type type,
86 int mod_seq);
87int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
88 struct btrfs_fs_info *fs_info);
89void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
90 struct btrfs_fs_info *fs_info,
91 struct btrfs_qgroup_operation *oper);
92int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
93 struct btrfs_fs_info *fs_info);
94int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
95 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
96 struct btrfs_qgroup_inherit *inherit);
97int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
98void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
99
100void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
101
102#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
103int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
104 u64 rfer, u64 excl);
105#endif
106
107#endif /* __BTRFS_QGROUP__ */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7c4c049da871..3aafbde8b637 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -31,6 +31,7 @@
31#include "inode-map.h" 31#include "inode-map.h"
32#include "volumes.h" 32#include "volumes.h"
33#include "dev-replace.h" 33#include "dev-replace.h"
34#include "qgroup.h"
34 35
35#define BTRFS_ROOT_TRANS_TAG 0 36#define BTRFS_ROOT_TRANS_TAG 0
36 37
@@ -703,23 +704,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
703 return 0; 704 return 0;
704 } 705 }
705 706
706 /*
707 * do the qgroup accounting as early as possible
708 */
709 err = btrfs_delayed_refs_qgroup_accounting(trans, info);
710
711 btrfs_trans_release_metadata(trans, root); 707 btrfs_trans_release_metadata(trans, root);
712 trans->block_rsv = NULL; 708 trans->block_rsv = NULL;
713 709
714 if (trans->qgroup_reserved) {
715 /*
716 * the same root has to be passed here between start_transaction
717 * and end_transaction. Subvolume quota depends on this.
718 */
719 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
720 trans->qgroup_reserved = 0;
721 }
722
723 if (!list_empty(&trans->new_bgs)) 710 if (!list_empty(&trans->new_bgs))
724 btrfs_create_pending_block_groups(trans, root); 711 btrfs_create_pending_block_groups(trans, root);
725 712
@@ -730,6 +717,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
730 btrfs_run_delayed_refs(trans, root, cur); 717 btrfs_run_delayed_refs(trans, root, cur);
731 } 718 }
732 719
720 if (trans->qgroup_reserved) {
721 /*
722 * the same root has to be passed here between start_transaction
723 * and end_transaction. Subvolume quota depends on this.
724 */
725 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
726 trans->qgroup_reserved = 0;
727 }
728
733 btrfs_trans_release_metadata(trans, root); 729 btrfs_trans_release_metadata(trans, root);
734 trans->block_rsv = NULL; 730 trans->block_rsv = NULL;
735 731
@@ -1169,12 +1165,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1169 goto no_free_objectid; 1165 goto no_free_objectid;
1170 } 1166 }
1171 1167
1172 pending->error = btrfs_qgroup_inherit(trans, fs_info,
1173 root->root_key.objectid,
1174 objectid, pending->inherit);
1175 if (pending->error)
1176 goto no_free_objectid;
1177
1178 key.objectid = objectid; 1168 key.objectid = objectid;
1179 key.offset = (u64)-1; 1169 key.offset = (u64)-1;
1180 key.type = BTRFS_ROOT_ITEM_KEY; 1170 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1271,6 +1261,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1271 goto fail; 1261 goto fail;
1272 } 1262 }
1273 1263
1264 /*
1265 * We need to flush delayed refs in order to make sure all of our quota
1266 * operations have been done before we call btrfs_qgroup_inherit.
1267 */
1268 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1269 if (ret) {
1270 btrfs_abort_transaction(trans, root, ret);
1271 goto fail;
1272 }
1273
1274 pending->error = btrfs_qgroup_inherit(trans, fs_info,
1275 root->root_key.objectid,
1276 objectid, pending->inherit);
1277 if (pending->error)
1278 goto no_free_objectid;
1279
1274 /* see comments in should_cow_block() */ 1280 /* see comments in should_cow_block() */
1275 set_bit(BTRFS_ROOT_FORCE_COW, &root->state); 1281 set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
1276 smp_wmb(); 1282 smp_wmb();
@@ -1599,12 +1605,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1599 * them now so that they hinder processing of more delayed refs 1605 * them now so that they hinder processing of more delayed refs
1600 * as little as possible. 1606 * as little as possible.
1601 */ 1607 */
1602 if (ret) {
1603 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1604 return ret;
1605 }
1606
1607 ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1608 if (ret) 1608 if (ret)
1609 return ret; 1609 return ret;
1610 1610