aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c5
-rw-r--r--fs/btrfs/qgroup.c864
-rw-r--r--fs/btrfs/qgroup.h49
-rw-r--r--include/trace/events/btrfs.h55
5 files changed, 3 insertions, 972 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 881549a35fca..0498f5cd8752 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1736,7 +1736,7 @@ struct btrfs_fs_info {
1736 /* list of dirty qgroups to be written at next commit */ 1736 /* list of dirty qgroups to be written at next commit */
1737 struct list_head dirty_qgroups; 1737 struct list_head dirty_qgroups;
1738 1738
1739 /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ 1739 /* used by qgroup for an efficient tree traversal */
1740 u64 qgroup_seq; 1740 u64 qgroup_seq;
1741 1741
1742 /* qgroup rescan items */ 1742 /* qgroup rescan items */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b76b42d95619..1acd63fcb252 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1981,7 +1981,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1981 u64 refs; 1981 u64 refs;
1982 int ret; 1982 int ret;
1983 int no_quota = node->no_quota; 1983 int no_quota = node->no_quota;
1984 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
1985 1984
1986 path = btrfs_alloc_path(); 1985 path = btrfs_alloc_path();
1987 if (!path) 1986 if (!path)
@@ -2009,8 +2008,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2009 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2008 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2010 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2009 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2011 refs = btrfs_extent_refs(leaf, item); 2010 refs = btrfs_extent_refs(leaf, item);
2012 if (refs)
2013 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2014 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 2011 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2015 if (extent_op) 2012 if (extent_op)
2016 __run_delayed_extent_op(extent_op, leaf, item); 2013 __run_delayed_extent_op(extent_op, leaf, item);
@@ -6112,7 +6109,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6112 u64 bytenr = node->bytenr; 6109 u64 bytenr = node->bytenr;
6113 u64 num_bytes = node->num_bytes; 6110 u64 num_bytes = node->num_bytes;
6114 int last_ref = 0; 6111 int last_ref = 0;
6115 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
6116 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 6112 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6117 SKINNY_METADATA); 6113 SKINNY_METADATA);
6118 6114
@@ -6293,7 +6289,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6293 refs -= refs_to_drop; 6289 refs -= refs_to_drop;
6294 6290
6295 if (refs > 0) { 6291 if (refs > 0) {
6296 type = BTRFS_QGROUP_OPER_SUB_SHARED;
6297 if (extent_op) 6292 if (extent_op)
6298 __run_delayed_extent_op(extent_op, leaf, ei); 6293 __run_delayed_extent_op(extent_op, leaf, ei);
6299 /* 6294 /*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 693533d9e22b..c5aa0d34940e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -34,6 +34,7 @@
34#include "extent_io.h" 34#include "extent_io.h"
35#include "qgroup.h" 35#include "qgroup.h"
36 36
37
37/* TODO XXX FIXME 38/* TODO XXX FIXME
38 * - subvol delete -> delete when ref goes to 0? delete limits also? 39 * - subvol delete -> delete when ref goes to 0? delete limits also?
39 * - reorganize keys 40 * - reorganize keys
@@ -1387,172 +1388,6 @@ out:
1387 return ret; 1388 return ret;
1388} 1389}
1389 1390
1390static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1391 struct btrfs_qgroup_operation *oper2)
1392{
1393 /*
1394 * Ignore seq and type here, we're looking for any operation
1395 * at all related to this extent on that root.
1396 */
1397 if (oper1->bytenr < oper2->bytenr)
1398 return -1;
1399 if (oper1->bytenr > oper2->bytenr)
1400 return 1;
1401 if (oper1->ref_root < oper2->ref_root)
1402 return -1;
1403 if (oper1->ref_root > oper2->ref_root)
1404 return 1;
1405 return 0;
1406}
1407
1408static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1409 struct btrfs_qgroup_operation *oper)
1410{
1411 struct rb_node *n;
1412 struct btrfs_qgroup_operation *cur;
1413 int cmp;
1414
1415 spin_lock(&fs_info->qgroup_op_lock);
1416 n = fs_info->qgroup_op_tree.rb_node;
1417 while (n) {
1418 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1419 cmp = comp_oper_exist(cur, oper);
1420 if (cmp < 0) {
1421 n = n->rb_right;
1422 } else if (cmp) {
1423 n = n->rb_left;
1424 } else {
1425 spin_unlock(&fs_info->qgroup_op_lock);
1426 return -EEXIST;
1427 }
1428 }
1429 spin_unlock(&fs_info->qgroup_op_lock);
1430 return 0;
1431}
1432
1433static int comp_oper(struct btrfs_qgroup_operation *oper1,
1434 struct btrfs_qgroup_operation *oper2)
1435{
1436 if (oper1->bytenr < oper2->bytenr)
1437 return -1;
1438 if (oper1->bytenr > oper2->bytenr)
1439 return 1;
1440 if (oper1->ref_root < oper2->ref_root)
1441 return -1;
1442 if (oper1->ref_root > oper2->ref_root)
1443 return 1;
1444 if (oper1->seq < oper2->seq)
1445 return -1;
1446 if (oper1->seq > oper2->seq)
1447 return 1;
1448 if (oper1->type < oper2->type)
1449 return -1;
1450 if (oper1->type > oper2->type)
1451 return 1;
1452 return 0;
1453}
1454
1455static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1456 struct btrfs_qgroup_operation *oper)
1457{
1458 struct rb_node **p;
1459 struct rb_node *parent = NULL;
1460 struct btrfs_qgroup_operation *cur;
1461 int cmp;
1462
1463 spin_lock(&fs_info->qgroup_op_lock);
1464 p = &fs_info->qgroup_op_tree.rb_node;
1465 while (*p) {
1466 parent = *p;
1467 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1468 cmp = comp_oper(cur, oper);
1469 if (cmp < 0) {
1470 p = &(*p)->rb_right;
1471 } else if (cmp) {
1472 p = &(*p)->rb_left;
1473 } else {
1474 spin_unlock(&fs_info->qgroup_op_lock);
1475 return -EEXIST;
1476 }
1477 }
1478 rb_link_node(&oper->n, parent, p);
1479 rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1480 spin_unlock(&fs_info->qgroup_op_lock);
1481 return 0;
1482}
1483
1484/*
1485 * Record a quota operation for processing later on.
1486 * @trans: the transaction we are adding the delayed op to.
1487 * @fs_info: the fs_info for this fs.
1488 * @ref_root: the root of the reference we are acting on,
1489 * @bytenr: the bytenr we are acting on.
1490 * @num_bytes: the number of bytes in the reference.
1491 * @type: the type of operation this is.
1492 * @mod_seq: do we need to get a sequence number for looking up roots.
1493 *
1494 * We just add it to our trans qgroup_ref_list and carry on and process these
1495 * operations in order at some later point. If the reference root isn't a fs
1496 * root then we don't bother with doing anything.
1497 *
1498 * MUST BE HOLDING THE REF LOCK.
1499 */
1500int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1501 struct btrfs_fs_info *fs_info, u64 ref_root,
1502 u64 bytenr, u64 num_bytes,
1503 enum btrfs_qgroup_operation_type type, int mod_seq)
1504{
1505 struct btrfs_qgroup_operation *oper;
1506 int ret;
1507
1508 if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1509 return 0;
1510
1511 oper = kmalloc(sizeof(*oper), GFP_NOFS);
1512 if (!oper)
1513 return -ENOMEM;
1514
1515 oper->ref_root = ref_root;
1516 oper->bytenr = bytenr;
1517 oper->num_bytes = num_bytes;
1518 oper->type = type;
1519 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1520 INIT_LIST_HEAD(&oper->elem.list);
1521 oper->elem.seq = 0;
1522
1523 trace_btrfs_qgroup_record_ref(oper);
1524
1525 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1526 /*
1527 * If any operation for this bytenr/ref_root combo
1528 * exists, then we know it's not exclusively owned and
1529 * shouldn't be queued up.
1530 *
1531 * This also catches the case where we have a cloned
1532 * extent that gets queued up multiple times during
1533 * drop snapshot.
1534 */
1535 if (qgroup_oper_exists(fs_info, oper)) {
1536 kfree(oper);
1537 return 0;
1538 }
1539 }
1540
1541 ret = insert_qgroup_oper(fs_info, oper);
1542 if (ret) {
1543 /* Shouldn't happen so have an assert for developers */
1544 ASSERT(0);
1545 kfree(oper);
1546 return ret;
1547 }
1548 list_add_tail(&oper->list, &trans->qgroup_ref_list);
1549
1550 if (mod_seq)
1551 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1552
1553 return 0;
1554}
1555
1556int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 1391int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
1557 struct btrfs_fs_info *fs_info) 1392 struct btrfs_fs_info *fs_info)
1558{ 1393{
@@ -1606,264 +1441,6 @@ struct btrfs_qgroup_extent_record
1606 return NULL; 1441 return NULL;
1607} 1442}
1608 1443
1609/*
1610 * The easy accounting, if we are adding/removing the only ref for an extent
1611 * then this qgroup and all of the parent qgroups get their refrence and
1612 * exclusive counts adjusted.
1613 */
1614static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1615 struct btrfs_qgroup_operation *oper)
1616{
1617 struct ulist *tmp;
1618 int sign = 0;
1619 int ret = 0;
1620
1621 tmp = ulist_alloc(GFP_NOFS);
1622 if (!tmp)
1623 return -ENOMEM;
1624
1625 spin_lock(&fs_info->qgroup_lock);
1626 if (!fs_info->quota_root)
1627 goto out;
1628
1629 switch (oper->type) {
1630 case BTRFS_QGROUP_OPER_ADD_EXCL:
1631 sign = 1;
1632 break;
1633 case BTRFS_QGROUP_OPER_SUB_EXCL:
1634 sign = -1;
1635 break;
1636 default:
1637 ASSERT(0);
1638 }
1639 ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
1640 oper->num_bytes, sign);
1641out:
1642 spin_unlock(&fs_info->qgroup_lock);
1643 ulist_free(tmp);
1644 return ret;
1645}
1646
1647/*
1648 * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1649 * properly.
1650 */
1651static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1652 u64 root_to_skip, struct ulist *tmp,
1653 struct ulist *roots, struct ulist *qgroups,
1654 u64 seq, int *old_roots, int rescan)
1655{
1656 struct ulist_node *unode;
1657 struct ulist_iterator uiter;
1658 struct ulist_node *tmp_unode;
1659 struct ulist_iterator tmp_uiter;
1660 struct btrfs_qgroup *qg;
1661 int ret;
1662
1663 ULIST_ITER_INIT(&uiter);
1664 while ((unode = ulist_next(roots, &uiter))) {
1665 /* We don't count our current root here */
1666 if (unode->val == root_to_skip)
1667 continue;
1668 qg = find_qgroup_rb(fs_info, unode->val);
1669 if (!qg)
1670 continue;
1671 /*
1672 * We could have a pending removal of this same ref so we may
1673 * not have actually found our ref root when doing
1674 * btrfs_find_all_roots, so we need to keep track of how many
1675 * old roots we find in case we removed ours and added a
1676 * different one at the same time. I don't think this could
1677 * happen in practice but that sort of thinking leads to pain
1678 * and suffering and to the dark side.
1679 */
1680 (*old_roots)++;
1681
1682 ulist_reinit(tmp);
1683 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1684 GFP_ATOMIC);
1685 if (ret < 0)
1686 return ret;
1687 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1688 if (ret < 0)
1689 return ret;
1690 ULIST_ITER_INIT(&tmp_uiter);
1691 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1692 struct btrfs_qgroup_list *glist;
1693 int mod;
1694
1695 qg = u64_to_ptr(tmp_unode->aux);
1696 /*
1697 * We use this sequence number to keep from having to
1698 * run the whole list and 0 out the refcnt every time.
1699 * We basically use sequnce as the known 0 count and
1700 * then add 1 everytime we see a qgroup. This is how we
1701 * get how many of the roots actually point up to the
1702 * upper level qgroups in order to determine exclusive
1703 * counts.
1704 *
1705 * For rescan none of the extent is recorded before so
1706 * we just don't add old_refcnt.
1707 */
1708 if (rescan)
1709 mod = 0;
1710 else
1711 mod = 1;
1712 btrfs_qgroup_update_old_refcnt(qg, seq, mod);
1713 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1714 list_for_each_entry(glist, &qg->groups, next_group) {
1715 ret = ulist_add(qgroups, glist->group->qgroupid,
1716 ptr_to_u64(glist->group),
1717 GFP_ATOMIC);
1718 if (ret < 0)
1719 return ret;
1720 ret = ulist_add(tmp, glist->group->qgroupid,
1721 ptr_to_u64(glist->group),
1722 GFP_ATOMIC);
1723 if (ret < 0)
1724 return ret;
1725 }
1726 }
1727 }
1728 return 0;
1729}
1730
1731/*
1732 * We need to walk forward in our operation tree and account for any roots that
1733 * were deleted after we made this operation.
1734 */
1735static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1736 struct btrfs_qgroup_operation *oper,
1737 struct ulist *tmp,
1738 struct ulist *qgroups, u64 seq,
1739 int *old_roots)
1740{
1741 struct ulist_node *unode;
1742 struct ulist_iterator uiter;
1743 struct btrfs_qgroup *qg;
1744 struct btrfs_qgroup_operation *tmp_oper;
1745 struct rb_node *n;
1746 int ret;
1747
1748 ulist_reinit(tmp);
1749
1750 /*
1751 * We only walk forward in the tree since we're only interested in
1752 * removals that happened _after_ our operation.
1753 */
1754 spin_lock(&fs_info->qgroup_op_lock);
1755 n = rb_next(&oper->n);
1756 spin_unlock(&fs_info->qgroup_op_lock);
1757 if (!n)
1758 return 0;
1759 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1760 while (tmp_oper->bytenr == oper->bytenr) {
1761 /*
1762 * If it's not a removal we don't care, additions work out
1763 * properly with our refcnt tracking.
1764 */
1765 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1766 tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1767 goto next;
1768 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1769 if (!qg)
1770 goto next;
1771 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1772 GFP_ATOMIC);
1773 if (ret) {
1774 if (ret < 0)
1775 return ret;
1776 /*
1777 * We only want to increase old_roots if this qgroup is
1778 * not already in the list of qgroups. If it is already
1779 * there then that means it must have been re-added or
1780 * the delete will be discarded because we had an
1781 * existing ref that we haven't looked up yet. In this
1782 * case we don't want to increase old_roots. So if ret
1783 * == 1 then we know that this is the first time we've
1784 * seen this qgroup and we can bump the old_roots.
1785 */
1786 (*old_roots)++;
1787 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1788 GFP_ATOMIC);
1789 if (ret < 0)
1790 return ret;
1791 }
1792next:
1793 spin_lock(&fs_info->qgroup_op_lock);
1794 n = rb_next(&tmp_oper->n);
1795 spin_unlock(&fs_info->qgroup_op_lock);
1796 if (!n)
1797 break;
1798 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1799 }
1800
1801 /* Ok now process the qgroups we found */
1802 ULIST_ITER_INIT(&uiter);
1803 while ((unode = ulist_next(tmp, &uiter))) {
1804 struct btrfs_qgroup_list *glist;
1805
1806 qg = u64_to_ptr(unode->aux);
1807 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1808 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1809 list_for_each_entry(glist, &qg->groups, next_group) {
1810 ret = ulist_add(qgroups, glist->group->qgroupid,
1811 ptr_to_u64(glist->group), GFP_ATOMIC);
1812 if (ret < 0)
1813 return ret;
1814 ret = ulist_add(tmp, glist->group->qgroupid,
1815 ptr_to_u64(glist->group), GFP_ATOMIC);
1816 if (ret < 0)
1817 return ret;
1818 }
1819 }
1820 return 0;
1821}
1822
1823/* Add refcnt for the newly added reference. */
1824static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1825 struct btrfs_qgroup_operation *oper,
1826 struct btrfs_qgroup *qgroup,
1827 struct ulist *tmp, struct ulist *qgroups,
1828 u64 seq)
1829{
1830 struct ulist_node *unode;
1831 struct ulist_iterator uiter;
1832 struct btrfs_qgroup *qg;
1833 int ret;
1834
1835 ulist_reinit(tmp);
1836 ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1837 GFP_ATOMIC);
1838 if (ret < 0)
1839 return ret;
1840 ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1841 GFP_ATOMIC);
1842 if (ret < 0)
1843 return ret;
1844 ULIST_ITER_INIT(&uiter);
1845 while ((unode = ulist_next(tmp, &uiter))) {
1846 struct btrfs_qgroup_list *glist;
1847
1848 qg = u64_to_ptr(unode->aux);
1849 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED)
1850 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1851 else
1852 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1853 list_for_each_entry(glist, &qg->groups, next_group) {
1854 ret = ulist_add(tmp, glist->group->qgroupid,
1855 ptr_to_u64(glist->group), GFP_ATOMIC);
1856 if (ret < 0)
1857 return ret;
1858 ret = ulist_add(qgroups, glist->group->qgroupid,
1859 ptr_to_u64(glist->group), GFP_ATOMIC);
1860 if (ret < 0)
1861 return ret;
1862 }
1863 }
1864 return 0;
1865}
1866
1867#define UPDATE_NEW 0 1444#define UPDATE_NEW 0
1868#define UPDATE_OLD 1 1445#define UPDATE_OLD 1
1869/* 1446/*
@@ -1925,6 +1502,7 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
1925/* 1502/*
1926 * Update qgroup rfer/excl counters. 1503 * Update qgroup rfer/excl counters.
1927 * Rfer update is easy, codes can explain themselves. 1504 * Rfer update is easy, codes can explain themselves.
1505 *
1928 * Excl update is tricky, the update is split into 2 part. 1506 * Excl update is tricky, the update is split into 2 part.
1929 * Part 1: Possible exclusive <-> sharing detect: 1507 * Part 1: Possible exclusive <-> sharing detect:
1930 * | A | !A | 1508 * | A | !A |
@@ -2042,419 +1620,6 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
2042 return 0; 1620 return 0;
2043} 1621}
2044 1622
2045/*
2046 * This adjusts the counters for all referenced qgroups if need be.
2047 */
2048static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
2049 u64 root_to_skip, u64 num_bytes,
2050 struct ulist *qgroups, u64 seq,
2051 int old_roots, int new_roots, int rescan)
2052{
2053 struct ulist_node *unode;
2054 struct ulist_iterator uiter;
2055 struct btrfs_qgroup *qg;
2056 u64 cur_new_count, cur_old_count;
2057
2058 ULIST_ITER_INIT(&uiter);
2059 while ((unode = ulist_next(qgroups, &uiter))) {
2060 bool dirty = false;
2061
2062 qg = u64_to_ptr(unode->aux);
2063 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
2064 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
2065
2066 /*
2067 * Wasn't referenced before but is now, add to the reference
2068 * counters.
2069 */
2070 if (cur_old_count == 0 && cur_new_count > 0) {
2071 qg->rfer += num_bytes;
2072 qg->rfer_cmpr += num_bytes;
2073 dirty = true;
2074 }
2075
2076 /*
2077 * Was referenced before but isn't now, subtract from the
2078 * reference counters.
2079 */
2080 if (cur_old_count > 0 && cur_new_count == 0) {
2081 qg->rfer -= num_bytes;
2082 qg->rfer_cmpr -= num_bytes;
2083 dirty = true;
2084 }
2085
2086 /*
2087 * If our refcount was the same as the roots previously but our
2088 * new count isn't the same as the number of roots now then we
2089 * went from having a exclusive reference on this range to not.
2090 */
2091 if (old_roots && cur_old_count == old_roots &&
2092 (cur_new_count != new_roots || new_roots == 0)) {
2093 WARN_ON(cur_new_count != new_roots && new_roots == 0);
2094 qg->excl -= num_bytes;
2095 qg->excl_cmpr -= num_bytes;
2096 dirty = true;
2097 }
2098
2099 /*
2100 * If we didn't reference all the roots before but now we do we
2101 * have an exclusive reference to this range.
2102 */
2103 if ((!old_roots || (old_roots && cur_old_count != old_roots))
2104 && cur_new_count == new_roots) {
2105 qg->excl += num_bytes;
2106 qg->excl_cmpr += num_bytes;
2107 dirty = true;
2108 }
2109
2110 if (dirty)
2111 qgroup_dirty(fs_info, qg);
2112 }
2113 return 0;
2114}
2115
2116/*
2117 * If we removed a data extent and there were other references for that bytenr
2118 * then we need to lookup all referenced roots to make sure we still don't
2119 * reference this bytenr. If we do then we can just discard this operation.
2120 */
2121static int check_existing_refs(struct btrfs_trans_handle *trans,
2122 struct btrfs_fs_info *fs_info,
2123 struct btrfs_qgroup_operation *oper)
2124{
2125 struct ulist *roots = NULL;
2126 struct ulist_node *unode;
2127 struct ulist_iterator uiter;
2128 int ret = 0;
2129
2130 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
2131 oper->elem.seq, &roots);
2132 if (ret < 0)
2133 return ret;
2134 ret = 0;
2135
2136 ULIST_ITER_INIT(&uiter);
2137 while ((unode = ulist_next(roots, &uiter))) {
2138 if (unode->val == oper->ref_root) {
2139 ret = 1;
2140 break;
2141 }
2142 }
2143 ulist_free(roots);
2144 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2145
2146 return ret;
2147}
2148
2149/*
2150 * If we share a reference across multiple roots then we may need to adjust
2151 * various qgroups referenced and exclusive counters. The basic premise is this
2152 *
2153 * 1) We have seq to represent a 0 count. Instead of looping through all of the
2154 * qgroups and resetting their refcount to 0 we just constantly bump this
2155 * sequence number to act as the base reference count. This means that if
2156 * anybody is equal to or below this sequence they were never referenced. We
2157 * jack this sequence up by the number of roots we found each time in order to
2158 * make sure we don't have any overlap.
2159 *
2160 * 2) We first search all the roots that reference the area _except_ the root
2161 * we're acting on currently. This makes up the old_refcnt of all the qgroups
2162 * before.
2163 *
2164 * 3) We walk all of the qgroups referenced by the root we are currently acting
2165 * on, and will either adjust old_refcnt in the case of a removal or the
2166 * new_refcnt in the case of an addition.
2167 *
2168 * 4) Finally we walk all the qgroups that are referenced by this range
2169 * including the root we are acting on currently. We will adjust the counters
2170 * based on the number of roots we had and will have after this operation.
2171 *
2172 * Take this example as an illustration
2173 *
2174 * [qgroup 1/0]
2175 * / | \
2176 * [qg 0/0] [qg 0/1] [qg 0/2]
2177 * \ | /
2178 * [ extent ]
2179 *
2180 * Say we are adding a reference that is covered by qg 0/0. The first step
2181 * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
2182 * old_roots being 2. Because it is adding new_roots will be 1. We then go
2183 * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
2184 * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
2185 * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
2186 * reference and thus must add the size to the referenced bytes. Everything
2187 * else is the same so nothing else changes.
2188 */
2189static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
2190 struct btrfs_fs_info *fs_info,
2191 struct btrfs_qgroup_operation *oper)
2192{
2193 struct ulist *roots = NULL;
2194 struct ulist *qgroups, *tmp;
2195 struct btrfs_qgroup *qgroup;
2196 struct seq_list elem = SEQ_LIST_INIT(elem);
2197 u64 seq;
2198 int old_roots = 0;
2199 int new_roots = 0;
2200 int ret = 0;
2201
2202 if (oper->elem.seq) {
2203 ret = check_existing_refs(trans, fs_info, oper);
2204 if (ret < 0)
2205 return ret;
2206 if (ret)
2207 return 0;
2208 }
2209
2210 qgroups = ulist_alloc(GFP_NOFS);
2211 if (!qgroups)
2212 return -ENOMEM;
2213
2214 tmp = ulist_alloc(GFP_NOFS);
2215 if (!tmp) {
2216 ulist_free(qgroups);
2217 return -ENOMEM;
2218 }
2219
2220 btrfs_get_tree_mod_seq(fs_info, &elem);
2221 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
2222 &roots);
2223 btrfs_put_tree_mod_seq(fs_info, &elem);
2224 if (ret < 0) {
2225 ulist_free(qgroups);
2226 ulist_free(tmp);
2227 return ret;
2228 }
2229 spin_lock(&fs_info->qgroup_lock);
2230 qgroup = find_qgroup_rb(fs_info, oper->ref_root);
2231 if (!qgroup)
2232 goto out;
2233 seq = fs_info->qgroup_seq;
2234
2235 /*
2236 * So roots is the list of all the roots currently pointing at the
2237 * bytenr, including the ref we are adding if we are adding, or not if
2238 * we are removing a ref. So we pass in the ref_root to skip that root
2239 * in our calculations. We set old_refnct and new_refcnt cause who the
2240 * hell knows what everything looked like before, and it doesn't matter
2241 * except...
2242 */
2243 ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
2244 seq, &old_roots, 0);
2245 if (ret < 0)
2246 goto out;
2247
2248 /*
2249 * Now adjust the refcounts of the qgroups that care about this
2250 * reference, either the old_count in the case of removal or new_count
2251 * in the case of an addition.
2252 */
2253 ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
2254 seq);
2255 if (ret < 0)
2256 goto out;
2257
2258 /*
2259 * ...in the case of removals. If we had a removal before we got around
2260 * to processing this operation then we need to find that guy and count
2261 * his references as if they really existed so we don't end up screwing
2262 * up the exclusive counts. Then whenever we go to process the delete
2263 * everything will be grand and we can account for whatever exclusive
2264 * changes need to be made there. We also have to pass in old_roots so
2265 * we have an accurate count of the roots as it pertains to this
2266 * operations view of the world.
2267 */
2268 ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
2269 &old_roots);
2270 if (ret < 0)
2271 goto out;
2272
2273 /*
2274 * We are adding our root, need to adjust up the number of roots,
2275 * otherwise old_roots is the number of roots we want.
2276 */
2277 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
2278 new_roots = old_roots + 1;
2279 } else {
2280 new_roots = old_roots;
2281 old_roots++;
2282 }
2283
2284 /*
2285 * Bump qgroup_seq to avoid seq overlap
2286 * XXX: This makes qgroup_seq mismatch with oper->seq.
2287 */
2288 fs_info->qgroup_seq += old_roots + 1;
2289
2290
2291 /*
2292 * And now the magic happens, bless Arne for having a pretty elegant
2293 * solution for this.
2294 */
2295 qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
2296 qgroups, seq, old_roots, new_roots, 0);
2297out:
2298 spin_unlock(&fs_info->qgroup_lock);
2299 ulist_free(qgroups);
2300 ulist_free(roots);
2301 ulist_free(tmp);
2302 return ret;
2303}
2304
2305/*
2306 * Process a reference to a shared subtree. This type of operation is
2307 * queued during snapshot removal when we encounter extents which are
2308 * shared between more than one root.
2309 */
2310static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
2311 struct btrfs_fs_info *fs_info,
2312 struct btrfs_qgroup_operation *oper)
2313{
2314 struct ulist *roots = NULL;
2315 struct ulist_node *unode;
2316 struct ulist_iterator uiter;
2317 struct btrfs_qgroup_list *glist;
2318 struct ulist *parents;
2319 int ret = 0;
2320 int err;
2321 struct btrfs_qgroup *qg;
2322 u64 root_obj = 0;
2323 struct seq_list elem = SEQ_LIST_INIT(elem);
2324
2325 parents = ulist_alloc(GFP_NOFS);
2326 if (!parents)
2327 return -ENOMEM;
2328
2329 btrfs_get_tree_mod_seq(fs_info, &elem);
2330 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
2331 elem.seq, &roots);
2332 btrfs_put_tree_mod_seq(fs_info, &elem);
2333 if (ret < 0)
2334 goto out;
2335
2336 if (roots->nnodes != 1)
2337 goto out;
2338
2339 ULIST_ITER_INIT(&uiter);
2340 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
2341 /*
2342 * If we find our ref root then that means all refs
2343 * this extent has to the root have not yet been
2344 * deleted. In that case, we do nothing and let the
2345 * last ref for this bytenr drive our update.
2346 *
2347 * This can happen for example if an extent is
2348 * referenced multiple times in a snapshot (clone,
2349 * etc). If we are in the middle of snapshot removal,
2350 * queued updates for such an extent will find the
2351 * root if we have not yet finished removing the
2352 * snapshot.
2353 */
2354 if (unode->val == oper->ref_root)
2355 goto out;
2356
2357 root_obj = unode->val;
2358 BUG_ON(!root_obj);
2359
2360 spin_lock(&fs_info->qgroup_lock);
2361 qg = find_qgroup_rb(fs_info, root_obj);
2362 if (!qg)
2363 goto out_unlock;
2364
2365 qg->excl += oper->num_bytes;
2366 qg->excl_cmpr += oper->num_bytes;
2367 qgroup_dirty(fs_info, qg);
2368
2369 /*
2370 * Adjust counts for parent groups. First we find all
2371 * parents, then in the 2nd loop we do the adjustment
2372 * while adding parents of the parents to our ulist.
2373 */
2374 list_for_each_entry(glist, &qg->groups, next_group) {
2375 err = ulist_add(parents, glist->group->qgroupid,
2376 ptr_to_u64(glist->group), GFP_ATOMIC);
2377 if (err < 0) {
2378 ret = err;
2379 goto out_unlock;
2380 }
2381 }
2382
2383 ULIST_ITER_INIT(&uiter);
2384 while ((unode = ulist_next(parents, &uiter))) {
2385 qg = u64_to_ptr(unode->aux);
2386 qg->excl += oper->num_bytes;
2387 qg->excl_cmpr += oper->num_bytes;
2388 qgroup_dirty(fs_info, qg);
2389
2390 /* Add any parents of the parents */
2391 list_for_each_entry(glist, &qg->groups, next_group) {
2392 err = ulist_add(parents, glist->group->qgroupid,
2393 ptr_to_u64(glist->group), GFP_ATOMIC);
2394 if (err < 0) {
2395 ret = err;
2396 goto out_unlock;
2397 }
2398 }
2399 }
2400
2401out_unlock:
2402 spin_unlock(&fs_info->qgroup_lock);
2403
2404out:
2405 ulist_free(roots);
2406 ulist_free(parents);
2407 return ret;
2408}
2409
2410/*
2411 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
2412 * from the fs. First, all roots referencing the extent are searched, and
2413 * then the space is accounted accordingly to the different roots. The
2414 * accounting algorithm works in 3 steps documented inline.
2415 */
2416static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
2417 struct btrfs_fs_info *fs_info,
2418 struct btrfs_qgroup_operation *oper)
2419{
2420 int ret = 0;
2421
2422 if (!fs_info->quota_enabled)
2423 return 0;
2424
2425 BUG_ON(!fs_info->quota_root);
2426
2427 mutex_lock(&fs_info->qgroup_rescan_lock);
2428 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2429 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
2430 mutex_unlock(&fs_info->qgroup_rescan_lock);
2431 return 0;
2432 }
2433 }
2434 mutex_unlock(&fs_info->qgroup_rescan_lock);
2435
2436 ASSERT(is_fstree(oper->ref_root));
2437
2438 trace_btrfs_qgroup_account(oper);
2439
2440 switch (oper->type) {
2441 case BTRFS_QGROUP_OPER_ADD_EXCL:
2442 case BTRFS_QGROUP_OPER_SUB_EXCL:
2443 ret = qgroup_excl_accounting(fs_info, oper);
2444 break;
2445 case BTRFS_QGROUP_OPER_ADD_SHARED:
2446 case BTRFS_QGROUP_OPER_SUB_SHARED:
2447 ret = qgroup_shared_accounting(trans, fs_info, oper);
2448 break;
2449 case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2450 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2451 break;
2452 default:
2453 ASSERT(0);
2454 }
2455 return ret;
2456}
2457
2458int 1623int
2459btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1624btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
2460 struct btrfs_fs_info *fs_info, 1625 struct btrfs_fs_info *fs_info,
@@ -2572,31 +1737,6 @@ cleanup:
2572} 1737}
2573 1738
2574/* 1739/*
2575 * Needs to be called everytime we run delayed refs, even if there is an error
2576 * in order to cleanup outstanding operations.
2577 */
2578int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
2579 struct btrfs_fs_info *fs_info)
2580{
2581 struct btrfs_qgroup_operation *oper;
2582 int ret = 0;
2583
2584 while (!list_empty(&trans->qgroup_ref_list)) {
2585 oper = list_first_entry(&trans->qgroup_ref_list,
2586 struct btrfs_qgroup_operation, list);
2587 list_del_init(&oper->list);
2588 if (!ret || !trans->aborted)
2589 ret = btrfs_qgroup_account(trans, fs_info, oper);
2590 spin_lock(&fs_info->qgroup_op_lock);
2591 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
2592 spin_unlock(&fs_info->qgroup_op_lock);
2593 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2594 kfree(oper);
2595 }
2596 return ret;
2597}
2598
2599/*
2600 * called from commit_transaction. Writes all changed qgroups to disk. 1740 * called from commit_transaction. Writes all changed qgroups to disk.
2601 */ 1741 */
2602int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 1742int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 90998b5e1713..6387dcfa354c 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -23,45 +23,6 @@
23#include "delayed-ref.h" 23#include "delayed-ref.h"
24 24
25/* 25/*
26 * A description of the operations, all of these operations only happen when we
27 * are adding the 1st reference for that subvolume in the case of adding space
28 * or on the last reference delete in the case of subtraction. The only
29 * exception is the last one, which is added for confusion.
30 *
31 * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
32 * one pointing at the bytes we are adding. This is called on the first
33 * allocation.
34 *
35 * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
36 * shared between subvols. This is called on the creation of a ref that already
37 * has refs from a different subvolume, so basically reflink.
38 *
39 * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
40 * one referencing the range.
41 *
42 * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
43 * refs with other subvolumes.
44 */
45enum btrfs_qgroup_operation_type {
46 BTRFS_QGROUP_OPER_ADD_EXCL,
47 BTRFS_QGROUP_OPER_ADD_SHARED,
48 BTRFS_QGROUP_OPER_SUB_EXCL,
49 BTRFS_QGROUP_OPER_SUB_SHARED,
50 BTRFS_QGROUP_OPER_SUB_SUBTREE,
51};
52
53struct btrfs_qgroup_operation {
54 u64 ref_root;
55 u64 bytenr;
56 u64 num_bytes;
57 u64 seq;
58 enum btrfs_qgroup_operation_type type;
59 struct seq_list elem;
60 struct rb_node n;
61 struct list_head list;
62};
63
64/*
65 * Record a dirty extent, and info qgroup to update quota on it 26 * Record a dirty extent, and info qgroup to update quota on it
66 * TODO: Use kmem cache to alloc it. 27 * TODO: Use kmem cache to alloc it.
67 */ 28 */
@@ -93,11 +54,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
93int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); 54int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
94void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); 55void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
95struct btrfs_delayed_extent_op; 56struct btrfs_delayed_extent_op;
96int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
97 struct btrfs_fs_info *fs_info, u64 ref_root,
98 u64 bytenr, u64 num_bytes,
99 enum btrfs_qgroup_operation_type type,
100 int mod_seq);
101int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 57int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
102 struct btrfs_fs_info *fs_info); 58 struct btrfs_fs_info *fs_info);
103struct btrfs_qgroup_extent_record 59struct btrfs_qgroup_extent_record
@@ -110,11 +66,6 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
110 struct ulist *old_roots, struct ulist *new_roots); 66 struct ulist *old_roots, struct ulist *new_roots);
111int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 67int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
112 struct btrfs_fs_info *fs_info); 68 struct btrfs_fs_info *fs_info);
113int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
114 struct btrfs_fs_info *fs_info);
115void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
116 struct btrfs_fs_info *fs_info,
117 struct btrfs_qgroup_operation *oper);
118int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 69int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
119 struct btrfs_fs_info *fs_info); 70 struct btrfs_fs_info *fs_info);
120int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 71int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 7f79cf459591..0b73af9be12f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1117,61 +1117,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
1117 TP_ARGS(wq) 1117 TP_ARGS(wq)
1118); 1118);
1119 1119
1120#define show_oper_type(type) \
1121 __print_symbolic(type, \
1122 { BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \
1123 { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \
1124 { BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \
1125 { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" })
1126
1127DECLARE_EVENT_CLASS(btrfs_qgroup_oper,
1128
1129 TP_PROTO(struct btrfs_qgroup_operation *oper),
1130
1131 TP_ARGS(oper),
1132
1133 TP_STRUCT__entry(
1134 __field( u64, ref_root )
1135 __field( u64, bytenr )
1136 __field( u64, num_bytes )
1137 __field( u64, seq )
1138 __field( int, type )
1139 __field( u64, elem_seq )
1140 ),
1141
1142 TP_fast_assign(
1143 __entry->ref_root = oper->ref_root;
1144 __entry->bytenr = oper->bytenr,
1145 __entry->num_bytes = oper->num_bytes;
1146 __entry->seq = oper->seq;
1147 __entry->type = oper->type;
1148 __entry->elem_seq = oper->elem.seq;
1149 ),
1150
1151 TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, "
1152 "seq = %llu, elem.seq = %llu, type = %s",
1153 (unsigned long long)__entry->ref_root,
1154 (unsigned long long)__entry->bytenr,
1155 (unsigned long long)__entry->num_bytes,
1156 (unsigned long long)__entry->seq,
1157 (unsigned long long)__entry->elem_seq,
1158 show_oper_type(__entry->type))
1159);
1160
1161DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account,
1162
1163 TP_PROTO(struct btrfs_qgroup_operation *oper),
1164
1165 TP_ARGS(oper)
1166);
1167
1168DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref,
1169
1170 TP_PROTO(struct btrfs_qgroup_operation *oper),
1171
1172 TP_ARGS(oper)
1173);
1174
1175#endif /* _TRACE_BTRFS_H */ 1120#endif /* _TRACE_BTRFS_H */
1176 1121
1177/* This part must be outside protection */ 1122/* This part must be outside protection */