aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c261
-rw-r--r--fs/btrfs/qgroup.c164
-rw-r--r--fs/btrfs/qgroup.h1
3 files changed, 426 insertions, 0 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 591893fe58cb..102ed3143976 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7478,6 +7478,220 @@ reada:
7478 wc->reada_slot = slot; 7478 wc->reada_slot = slot;
7479} 7479}
7480 7480
7481static int account_leaf_items(struct btrfs_trans_handle *trans,
7482 struct btrfs_root *root,
7483 struct extent_buffer *eb)
7484{
7485 int nr = btrfs_header_nritems(eb);
7486 int i, extent_type, ret;
7487 struct btrfs_key key;
7488 struct btrfs_file_extent_item *fi;
7489 u64 bytenr, num_bytes;
7490
7491 for (i = 0; i < nr; i++) {
7492 btrfs_item_key_to_cpu(eb, &key, i);
7493
7494 if (key.type != BTRFS_EXTENT_DATA_KEY)
7495 continue;
7496
7497 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7498 /* filter out non qgroup-accountable extents */
7499 extent_type = btrfs_file_extent_type(eb, fi);
7500
7501 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7502 continue;
7503
7504 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7505 if (!bytenr)
7506 continue;
7507
7508 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
7509
7510 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7511 root->objectid,
7512 bytenr, num_bytes,
7513 BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
7514 if (ret)
7515 return ret;
7516 }
7517 return 0;
7518}
7519
7520/*
7521 * Walk up the tree from the bottom, freeing leaves and any interior
7522 * nodes which have had all slots visited. If a node (leaf or
7523 * interior) is freed, the node above it will have it's slot
7524 * incremented. The root node will never be freed.
7525 *
7526 * At the end of this function, we should have a path which has all
7527 * slots incremented to the next position for a search. If we need to
7528 * read a new node it will be NULL and the node above it will have the
7529 * correct slot selected for a later read.
7530 *
7531 * If we increment the root nodes slot counter past the number of
7532 * elements, 1 is returned to signal completion of the search.
7533 */
7534static int adjust_slots_upwards(struct btrfs_root *root,
7535 struct btrfs_path *path, int root_level)
7536{
7537 int level = 0;
7538 int nr, slot;
7539 struct extent_buffer *eb;
7540
7541 if (root_level == 0)
7542 return 1;
7543
7544 while (level <= root_level) {
7545 eb = path->nodes[level];
7546 nr = btrfs_header_nritems(eb);
7547 path->slots[level]++;
7548 slot = path->slots[level];
7549 if (slot >= nr || level == 0) {
7550 /*
7551 * Don't free the root - we will detect this
7552 * condition after our loop and return a
7553 * positive value for caller to stop walking the tree.
7554 */
7555 if (level != root_level) {
7556 btrfs_tree_unlock_rw(eb, path->locks[level]);
7557 path->locks[level] = 0;
7558
7559 free_extent_buffer(eb);
7560 path->nodes[level] = NULL;
7561 path->slots[level] = 0;
7562 }
7563 } else {
7564 /*
7565 * We have a valid slot to walk back down
7566 * from. Stop here so caller can process these
7567 * new nodes.
7568 */
7569 break;
7570 }
7571
7572 level++;
7573 }
7574
7575 eb = path->nodes[root_level];
7576 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7577 return 1;
7578
7579 return 0;
7580}
7581
7582/*
7583 * root_eb is the subtree root and is locked before this function is called.
7584 */
7585static int account_shared_subtree(struct btrfs_trans_handle *trans,
7586 struct btrfs_root *root,
7587 struct extent_buffer *root_eb,
7588 u64 root_gen,
7589 int root_level)
7590{
7591 int ret = 0;
7592 int level;
7593 struct extent_buffer *eb = root_eb;
7594 struct btrfs_path *path = NULL;
7595
7596 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7597 BUG_ON(root_eb == NULL);
7598
7599 if (!root->fs_info->quota_enabled)
7600 return 0;
7601
7602 if (!extent_buffer_uptodate(root_eb)) {
7603 ret = btrfs_read_buffer(root_eb, root_gen);
7604 if (ret)
7605 goto out;
7606 }
7607
7608 if (root_level == 0) {
7609 ret = account_leaf_items(trans, root, root_eb);
7610 goto out;
7611 }
7612
7613 path = btrfs_alloc_path();
7614 if (!path)
7615 return -ENOMEM;
7616
7617 /*
7618 * Walk down the tree. Missing extent blocks are filled in as
7619 * we go. Metadata is accounted every time we read a new
7620 * extent block.
7621 *
7622 * When we reach a leaf, we account for file extent items in it,
7623 * walk back up the tree (adjusting slot pointers as we go)
7624 * and restart the search process.
7625 */
7626 extent_buffer_get(root_eb); /* For path */
7627 path->nodes[root_level] = root_eb;
7628 path->slots[root_level] = 0;
7629 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
7630walk_down:
7631 level = root_level;
7632 while (level >= 0) {
7633 if (path->nodes[level] == NULL) {
7634 int child_bsize = root->nodesize;
7635 int parent_slot;
7636 u64 child_gen;
7637 u64 child_bytenr;
7638
7639 /* We need to get child blockptr/gen from
7640 * parent before we can read it. */
7641 eb = path->nodes[level + 1];
7642 parent_slot = path->slots[level + 1];
7643 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
7644 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
7645
7646 eb = read_tree_block(root, child_bytenr, child_bsize,
7647 child_gen);
7648 if (!eb || !extent_buffer_uptodate(eb)) {
7649 ret = -EIO;
7650 goto out;
7651 }
7652
7653 path->nodes[level] = eb;
7654 path->slots[level] = 0;
7655
7656 btrfs_tree_read_lock(eb);
7657 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
7658 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
7659
7660 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7661 root->objectid,
7662 child_bytenr,
7663 child_bsize,
7664 BTRFS_QGROUP_OPER_SUB_SUBTREE,
7665 0);
7666 if (ret)
7667 goto out;
7668
7669 }
7670
7671 if (level == 0) {
7672 ret = account_leaf_items(trans, root, path->nodes[level]);
7673 if (ret)
7674 goto out;
7675
7676 /* Nonzero return here means we completed our search */
7677 ret = adjust_slots_upwards(root, path, root_level);
7678 if (ret)
7679 break;
7680
7681 /* Restart search with new slots */
7682 goto walk_down;
7683 }
7684
7685 level--;
7686 }
7687
7688 ret = 0;
7689out:
7690 btrfs_free_path(path);
7691
7692 return ret;
7693}
7694
7481/* 7695/*
7482 * helper to process tree block while walking down the tree. 7696 * helper to process tree block while walking down the tree.
7483 * 7697 *
@@ -7581,6 +7795,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7581 int level = wc->level; 7795 int level = wc->level;
7582 int reada = 0; 7796 int reada = 0;
7583 int ret = 0; 7797 int ret = 0;
7798 bool need_account = false;
7584 7799
7585 generation = btrfs_node_ptr_generation(path->nodes[level], 7800 generation = btrfs_node_ptr_generation(path->nodes[level],
7586 path->slots[level]); 7801 path->slots[level]);
@@ -7626,6 +7841,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7626 7841
7627 if (wc->stage == DROP_REFERENCE) { 7842 if (wc->stage == DROP_REFERENCE) {
7628 if (wc->refs[level - 1] > 1) { 7843 if (wc->refs[level - 1] > 1) {
7844 need_account = true;
7629 if (level == 1 && 7845 if (level == 1 &&
7630 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 7846 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7631 goto skip; 7847 goto skip;
@@ -7689,6 +7905,16 @@ skip:
7689 parent = 0; 7905 parent = 0;
7690 } 7906 }
7691 7907
7908 if (need_account) {
7909 ret = account_shared_subtree(trans, root, next,
7910 generation, level - 1);
7911 if (ret) {
7912 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
7913 "%d accounting shared subtree. Quota "
7914 "is out of sync, rescan required.\n",
7915 root->fs_info->sb->s_id, ret);
7916 }
7917 }
7692 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 7918 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
7693 root->root_key.objectid, level - 1, 0, 0); 7919 root->root_key.objectid, level - 1, 0, 0);
7694 BUG_ON(ret); /* -ENOMEM */ 7920 BUG_ON(ret); /* -ENOMEM */
@@ -7773,6 +7999,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7773 else 7999 else
7774 ret = btrfs_dec_ref(trans, root, eb, 0); 8000 ret = btrfs_dec_ref(trans, root, eb, 0);
7775 BUG_ON(ret); /* -ENOMEM */ 8001 BUG_ON(ret); /* -ENOMEM */
8002 ret = account_leaf_items(trans, root, eb);
8003 if (ret) {
8004 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8005 "%d accounting leaf items. Quota "
8006 "is out of sync, rescan required.\n",
8007 root->fs_info->sb->s_id, ret);
8008 }
7776 } 8009 }
7777 /* make block locked assertion in clean_tree_block happy */ 8010 /* make block locked assertion in clean_tree_block happy */
7778 if (!path->locks[level] && 8011 if (!path->locks[level] &&
@@ -7898,6 +8131,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7898 int level; 8131 int level;
7899 bool root_dropped = false; 8132 bool root_dropped = false;
7900 8133
8134 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8135
7901 path = btrfs_alloc_path(); 8136 path = btrfs_alloc_path();
7902 if (!path) { 8137 if (!path) {
7903 err = -ENOMEM; 8138 err = -ENOMEM;
@@ -8023,6 +8258,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8023 goto out_end_trans; 8258 goto out_end_trans;
8024 } 8259 }
8025 8260
8261 /*
8262 * Qgroup update accounting is run from
8263 * delayed ref handling. This usually works
8264 * out because delayed refs are normally the
8265 * only way qgroup updates are added. However,
8266 * we may have added updates during our tree
8267 * walk so run qgroups here to make sure we
8268 * don't lose any updates.
8269 */
8270 ret = btrfs_delayed_qgroup_accounting(trans,
8271 root->fs_info);
8272 if (ret)
8273 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8274 "running qgroup updates "
8275 "during snapshot delete. "
8276 "Quota is out of sync, "
8277 "rescan required.\n", ret);
8278
8026 btrfs_end_transaction_throttle(trans, tree_root); 8279 btrfs_end_transaction_throttle(trans, tree_root);
8027 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 8280 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8028 pr_debug("BTRFS: drop snapshot early exit\n"); 8281 pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8076,6 +8329,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8076 } 8329 }
8077 root_dropped = true; 8330 root_dropped = true;
8078out_end_trans: 8331out_end_trans:
8332 ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
8333 if (ret)
8334 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8335 "running qgroup updates "
8336 "during snapshot delete. "
8337 "Quota is out of sync, "
8338 "rescan required.\n", ret);
8339
8079 btrfs_end_transaction_throttle(trans, tree_root); 8340 btrfs_end_transaction_throttle(trans, tree_root);
8080out_free: 8341out_free:
8081 kfree(wc); 8342 kfree(wc);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98cb6b2630f9..65b62c467e28 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1201,6 +1201,50 @@ out:
1201 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1201 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1202 return ret; 1202 return ret;
1203} 1203}
1204
1205static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1206 struct btrfs_qgroup_operation *oper2)
1207{
1208 /*
1209 * Ignore seq and type here, we're looking for any operation
1210 * at all related to this extent on that root.
1211 */
1212 if (oper1->bytenr < oper2->bytenr)
1213 return -1;
1214 if (oper1->bytenr > oper2->bytenr)
1215 return 1;
1216 if (oper1->ref_root < oper2->ref_root)
1217 return -1;
1218 if (oper1->ref_root > oper2->ref_root)
1219 return 1;
1220 return 0;
1221}
1222
1223static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1224 struct btrfs_qgroup_operation *oper)
1225{
1226 struct rb_node *n;
1227 struct btrfs_qgroup_operation *cur;
1228 int cmp;
1229
1230 spin_lock(&fs_info->qgroup_op_lock);
1231 n = fs_info->qgroup_op_tree.rb_node;
1232 while (n) {
1233 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1234 cmp = comp_oper_exist(cur, oper);
1235 if (cmp < 0) {
1236 n = n->rb_right;
1237 } else if (cmp) {
1238 n = n->rb_left;
1239 } else {
1240 spin_unlock(&fs_info->qgroup_op_lock);
1241 return -EEXIST;
1242 }
1243 }
1244 spin_unlock(&fs_info->qgroup_op_lock);
1245 return 0;
1246}
1247
1204static int comp_oper(struct btrfs_qgroup_operation *oper1, 1248static int comp_oper(struct btrfs_qgroup_operation *oper1,
1205 struct btrfs_qgroup_operation *oper2) 1249 struct btrfs_qgroup_operation *oper2)
1206{ 1250{
@@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1290 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1334 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1291 INIT_LIST_HEAD(&oper->elem.list); 1335 INIT_LIST_HEAD(&oper->elem.list);
1292 oper->elem.seq = 0; 1336 oper->elem.seq = 0;
1337
1338 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1339 /*
1340 * If any operation for this bytenr/ref_root combo
1341 * exists, then we know it's not exclusively owned and
1342 * shouldn't be queued up.
1343 *
1344 * This also catches the case where we have a cloned
1345 * extent that gets queued up multiple times during
1346 * drop snapshot.
1347 */
1348 if (qgroup_oper_exists(fs_info, oper)) {
1349 kfree(oper);
1350 return 0;
1351 }
1352 }
1353
1293 ret = insert_qgroup_oper(fs_info, oper); 1354 ret = insert_qgroup_oper(fs_info, oper);
1294 if (ret) { 1355 if (ret) {
1295 /* Shouldn't happen so have an assert for developers */ 1356 /* Shouldn't happen so have an assert for developers */
@@ -1884,6 +1945,106 @@ out:
1884} 1945}
1885 1946
1886/* 1947/*
1948 * Process a reference to a shared subtree. This type of operation is
1949 * queued during snapshot removal when we encounter extents which are
1950 * shared between more than one root.
1951 */
1952static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1953 struct btrfs_fs_info *fs_info,
1954 struct btrfs_qgroup_operation *oper)
1955{
1956 struct ulist *roots = NULL;
1957 struct ulist_node *unode;
1958 struct ulist_iterator uiter;
1959 struct btrfs_qgroup_list *glist;
1960 struct ulist *parents;
1961 int ret = 0;
1962 struct btrfs_qgroup *qg;
1963 u64 root_obj = 0;
1964 struct seq_list elem = {};
1965
1966 parents = ulist_alloc(GFP_NOFS);
1967 if (!parents)
1968 return -ENOMEM;
1969
1970 btrfs_get_tree_mod_seq(fs_info, &elem);
1971 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1972 elem.seq, &roots);
1973 btrfs_put_tree_mod_seq(fs_info, &elem);
1974 if (ret < 0)
1975 return ret;
1976
1977 if (roots->nnodes != 1)
1978 goto out;
1979
1980 ULIST_ITER_INIT(&uiter);
1981 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
1982 /*
1983 * If we find our ref root then that means all refs
1984 * this extent has to the root have not yet been
1985 * deleted. In that case, we do nothing and let the
1986 * last ref for this bytenr drive our update.
1987 *
1988 * This can happen for example if an extent is
1989 * referenced multiple times in a snapshot (clone,
1990 * etc). If we are in the middle of snapshot removal,
1991 * queued updates for such an extent will find the
1992 * root if we have not yet finished removing the
1993 * snapshot.
1994 */
1995 if (unode->val == oper->ref_root)
1996 goto out;
1997
1998 root_obj = unode->val;
1999 BUG_ON(!root_obj);
2000
2001 spin_lock(&fs_info->qgroup_lock);
2002 qg = find_qgroup_rb(fs_info, root_obj);
2003 if (!qg)
2004 goto out_unlock;
2005
2006 qg->excl += oper->num_bytes;
2007 qg->excl_cmpr += oper->num_bytes;
2008 qgroup_dirty(fs_info, qg);
2009
2010 /*
2011 * Adjust counts for parent groups. First we find all
2012 * parents, then in the 2nd loop we do the adjustment
2013 * while adding parents of the parents to our ulist.
2014 */
2015 list_for_each_entry(glist, &qg->groups, next_group) {
2016 ret = ulist_add(parents, glist->group->qgroupid,
2017 ptr_to_u64(glist->group), GFP_ATOMIC);
2018 if (ret < 0)
2019 goto out_unlock;
2020 }
2021
2022 ULIST_ITER_INIT(&uiter);
2023 while ((unode = ulist_next(parents, &uiter))) {
2024 qg = u64_to_ptr(unode->aux);
2025 qg->excl += oper->num_bytes;
2026 qg->excl_cmpr += oper->num_bytes;
2027 qgroup_dirty(fs_info, qg);
2028
2029 /* Add any parents of the parents */
2030 list_for_each_entry(glist, &qg->groups, next_group) {
2031 ret = ulist_add(parents, glist->group->qgroupid,
2032 ptr_to_u64(glist->group), GFP_ATOMIC);
2033 if (ret < 0)
2034 goto out_unlock;
2035 }
2036 }
2037
2038out_unlock:
2039 spin_unlock(&fs_info->qgroup_lock);
2040
2041out:
2042 ulist_free(roots);
2043 ulist_free(parents);
2044 return ret;
2045}
2046
2047/*
1887 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2048 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1888 * from the fs. First, all roots referencing the extent are searched, and 2049 * from the fs. First, all roots referencing the extent are searched, and
1889 * then the space is accounted accordingly to the different roots. The 2050 * then the space is accounted accordingly to the different roots. The
@@ -1920,6 +2081,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1920 case BTRFS_QGROUP_OPER_SUB_SHARED: 2081 case BTRFS_QGROUP_OPER_SUB_SHARED:
1921 ret = qgroup_shared_accounting(trans, fs_info, oper); 2082 ret = qgroup_shared_accounting(trans, fs_info, oper);
1922 break; 2083 break;
2084 case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2085 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2086 break;
1923 default: 2087 default:
1924 ASSERT(0); 2088 ASSERT(0);
1925 } 2089 }
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5952ff1fbd7a..18cc68ca3090 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -44,6 +44,7 @@ enum btrfs_qgroup_operation_type {
44 BTRFS_QGROUP_OPER_ADD_SHARED, 44 BTRFS_QGROUP_OPER_ADD_SHARED,
45 BTRFS_QGROUP_OPER_SUB_EXCL, 45 BTRFS_QGROUP_OPER_SUB_EXCL,
46 BTRFS_QGROUP_OPER_SUB_SHARED, 46 BTRFS_QGROUP_OPER_SUB_SHARED,
47 BTRFS_QGROUP_OPER_SUB_SUBTREE,
47}; 48};
48 49
49struct btrfs_qgroup_operation { 50struct btrfs_qgroup_operation {