aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/qgroup.c
diff options
context:
space:
mode:
authorJan Schmidt <list.btrfs@jan-o-sch.net>2013-04-25 12:04:51 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-05-06 15:55:19 -0400
commit2f2320360b0c35b86938bfc561124474f0dac6e4 (patch)
treef7b1cbec89d9c6d030f15817e77ee42e5941244a /fs/btrfs/qgroup.c
parent46b665ceb1edd2ac149ff701313c115f52dc0348 (diff)
Btrfs: rescan for qgroups
If qgroup tracking is out of sync, a rescan operation can be started. It iterates the complete extent tree and recalculates all qgroup tracking data. This is an expensive operation and should not be used unless required. A filesystem under rescan can still be umounted. The rescan continues on the next mount. Status information is provided with a separate ioctl while a rescan operation is in progress. Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/qgroup.c')
-rw-r--r--fs/btrfs/qgroup.c318
1 files changed, 304 insertions, 14 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1fb7d8da3084..da8458357b57 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
31#include "locking.h" 31#include "locking.h"
32#include "ulist.h" 32#include "ulist.h"
33#include "backref.h" 33#include "backref.h"
34#include "extent_io.h"
34 35
35/* TODO XXX FIXME 36/* TODO XXX FIXME
36 * - subvol delete -> delete when ref goes to 0? delete limits also? 37 * - subvol delete -> delete when ref goes to 0? delete limits also?
37 * - reorganize keys 38 * - reorganize keys
38 * - compressed 39 * - compressed
39 * - sync 40 * - sync
40 * - rescan
41 * - copy also limits on subvol creation 41 * - copy also limits on subvol creation
42 * - limit 42 * - limit
43 * - caches fuer ulists 43 * - caches fuer ulists
@@ -98,6 +98,14 @@ struct btrfs_qgroup_list {
98 struct btrfs_qgroup *member; 98 struct btrfs_qgroup *member;
99}; 99};
100 100
101struct qgroup_rescan {
102 struct btrfs_work work;
103 struct btrfs_fs_info *fs_info;
104};
105
106static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
107 struct qgroup_rescan *qscan);
108
101/* must be called with qgroup_ioctl_lock held */ 109/* must be called with qgroup_ioctl_lock held */
102static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 110static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
103 u64 qgroupid) 111 u64 qgroupid)
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
298 } 306 }
299 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 307 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
300 ptr); 308 ptr);
301 /* FIXME read scan element */ 309 fs_info->qgroup_rescan_progress.objectid =
310 btrfs_qgroup_status_rescan(l, ptr);
311 if (fs_info->qgroup_flags &
312 BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
313 struct qgroup_rescan *qscan =
314 kmalloc(sizeof(*qscan), GFP_NOFS);
315 if (!qscan) {
316 ret = -ENOMEM;
317 goto out;
318 }
319 fs_info->qgroup_rescan_progress.type = 0;
320 fs_info->qgroup_rescan_progress.offset = 0;
321 qgroup_rescan_start(fs_info, qscan);
322 }
302 goto next1; 323 goto next1;
303 } 324 }
304 325
@@ -719,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
719 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 740 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
720 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 741 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
721 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 742 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
722 /* XXX scan */ 743 btrfs_set_qgroup_status_rescan(l, ptr,
744 fs_info->qgroup_rescan_progress.objectid);
723 745
724 btrfs_mark_buffer_dirty(l); 746 btrfs_mark_buffer_dirty(l);
725 747
@@ -830,7 +852,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
830 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 852 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
831 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 853 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
832 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 854 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
833 btrfs_set_qgroup_status_scan(leaf, ptr, 0); 855 btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
834 856
835 btrfs_mark_buffer_dirty(leaf); 857 btrfs_mark_buffer_dirty(leaf);
836 858
@@ -944,10 +966,11 @@ out:
944 return ret; 966 return ret;
945} 967}
946 968
947int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) 969static void qgroup_dirty(struct btrfs_fs_info *fs_info,
970 struct btrfs_qgroup *qgroup)
948{ 971{
949 /* FIXME */ 972 if (list_empty(&qgroup->dirty))
950 return 0; 973 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
951} 974}
952 975
953int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 976int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -1155,13 +1178,6 @@ out:
1155 return ret; 1178 return ret;
1156} 1179}
1157 1180
1158static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1159 struct btrfs_qgroup *qgroup)
1160{
1161 if (list_empty(&qgroup->dirty))
1162 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1163}
1164
1165/* 1181/*
1166 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts 1182 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1167 * the modification into a list that's later used by btrfs_end_transaction to 1183 * the modification into a list that's later used by btrfs_end_transaction to
@@ -1390,6 +1406,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1390 BUG(); 1406 BUG();
1391 } 1407 }
1392 1408
1409 mutex_lock(&fs_info->qgroup_rescan_lock);
1410 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1411 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1412 mutex_unlock(&fs_info->qgroup_rescan_lock);
1413 return 0;
1414 }
1415 }
1416 mutex_unlock(&fs_info->qgroup_rescan_lock);
1417
1393 /* 1418 /*
1394 * the delayed ref sequence number we pass depends on the direction of 1419 * the delayed ref sequence number we pass depends on the direction of
1395 * the operation. for add operations, we pass 1420 * the operation. for add operations, we pass
@@ -1403,7 +1428,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1403 if (ret < 0) 1428 if (ret < 0)
1404 return ret; 1429 return ret;
1405 1430
1431 mutex_lock(&fs_info->qgroup_rescan_lock);
1406 spin_lock(&fs_info->qgroup_lock); 1432 spin_lock(&fs_info->qgroup_lock);
1433 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1434 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1435 ret = 0;
1436 goto unlock;
1437 }
1438 }
1439
1407 quota_root = fs_info->quota_root; 1440 quota_root = fs_info->quota_root;
1408 if (!quota_root) 1441 if (!quota_root)
1409 goto unlock; 1442 goto unlock;
@@ -1445,6 +1478,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1445 1478
1446unlock: 1479unlock:
1447 spin_unlock(&fs_info->qgroup_lock); 1480 spin_unlock(&fs_info->qgroup_lock);
1481 mutex_unlock(&fs_info->qgroup_rescan_lock);
1448 ulist_free(roots); 1482 ulist_free(roots);
1449 ulist_free(tmp); 1483 ulist_free(tmp);
1450 1484
@@ -1823,3 +1857,259 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1823 (u32)trans->delayed_ref_elem.seq); 1857 (u32)trans->delayed_ref_elem.seq);
1824 BUG(); 1858 BUG();
1825} 1859}
1860
1861/*
1862 * returns < 0 on error, 0 when more leafs are to be scanned.
1863 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1864 */
1865static int
1866qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
1867 struct btrfs_trans_handle *trans, struct ulist *tmp,
1868 struct extent_buffer *scratch_leaf)
1869{
1870 struct btrfs_key found;
1871 struct btrfs_fs_info *fs_info = qscan->fs_info;
1872 struct ulist *roots = NULL;
1873 struct ulist_node *unode;
1874 struct ulist_iterator uiter;
1875 struct seq_list tree_mod_seq_elem = {};
1876 u64 seq;
1877 int slot;
1878 int ret;
1879
1880 path->leave_spinning = 1;
1881 mutex_lock(&fs_info->qgroup_rescan_lock);
1882 ret = btrfs_search_slot_for_read(fs_info->extent_root,
1883 &fs_info->qgroup_rescan_progress,
1884 path, 1, 0);
1885
1886 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1887 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
1888 fs_info->qgroup_rescan_progress.type,
1889 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
1890 ret);
1891
1892 if (ret) {
1893 /*
1894 * The rescan is about to end, we will not be scanning any
1895 * further blocks. We cannot unset the RESCAN flag here, because
1896 * we want to commit the transaction if everything went well.
1897 * To make the live accounting work in this phase, we set our
1898 * scan progress pointer such that every real extent objectid
1899 * will be smaller.
1900 */
1901 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1902 btrfs_release_path(path);
1903 mutex_unlock(&fs_info->qgroup_rescan_lock);
1904 return ret;
1905 }
1906
1907 btrfs_item_key_to_cpu(path->nodes[0], &found,
1908 btrfs_header_nritems(path->nodes[0]) - 1);
1909 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1910
1911 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1912 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1913 slot = path->slots[0];
1914 btrfs_release_path(path);
1915 mutex_unlock(&fs_info->qgroup_rescan_lock);
1916
1917 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1918 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1919 if (found.type != BTRFS_EXTENT_ITEM_KEY)
1920 continue;
1921 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1922 tree_mod_seq_elem.seq, &roots);
1923 if (ret < 0)
1924 goto out;
1925 spin_lock(&fs_info->qgroup_lock);
1926 seq = fs_info->qgroup_seq;
1927 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1928
1929 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1930 if (ret) {
1931 spin_unlock(&fs_info->qgroup_lock);
1932 ulist_free(roots);
1933 goto out;
1934 }
1935
1936 /*
1937 * step2 of btrfs_qgroup_account_ref works from a single root,
1938 * we're doing all at once here.
1939 */
1940 ulist_reinit(tmp);
1941 ULIST_ITER_INIT(&uiter);
1942 while ((unode = ulist_next(roots, &uiter))) {
1943 struct btrfs_qgroup *qg;
1944
1945 qg = find_qgroup_rb(fs_info, unode->val);
1946 if (!qg)
1947 continue;
1948
1949 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1950 GFP_ATOMIC);
1951 if (ret < 0) {
1952 spin_unlock(&fs_info->qgroup_lock);
1953 ulist_free(roots);
1954 goto out;
1955 }
1956 }
1957
1958 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1959 ULIST_ITER_INIT(&uiter);
1960 while ((unode = ulist_next(tmp, &uiter))) {
1961 struct btrfs_qgroup *qg;
1962 struct btrfs_qgroup_list *glist;
1963
1964 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1965 qg->rfer += found.offset;
1966 qg->rfer_cmpr += found.offset;
1967 WARN_ON(qg->tag >= seq);
1968 if (qg->refcnt - seq == roots->nnodes) {
1969 qg->excl += found.offset;
1970 qg->excl_cmpr += found.offset;
1971 }
1972 qgroup_dirty(fs_info, qg);
1973
1974 list_for_each_entry(glist, &qg->groups, next_group) {
1975 ret = ulist_add(tmp, glist->group->qgroupid,
1976 (uintptr_t)glist->group,
1977 GFP_ATOMIC);
1978 if (ret < 0) {
1979 spin_unlock(&fs_info->qgroup_lock);
1980 ulist_free(roots);
1981 goto out;
1982 }
1983 }
1984 }
1985
1986 spin_unlock(&fs_info->qgroup_lock);
1987 ulist_free(roots);
1988 ret = 0;
1989 }
1990
1991out:
1992 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1993
1994 return ret;
1995}
1996
1997static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1998{
1999 struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
2000 work);
2001 struct btrfs_path *path;
2002 struct btrfs_trans_handle *trans = NULL;
2003 struct btrfs_fs_info *fs_info = qscan->fs_info;
2004 struct ulist *tmp = NULL;
2005 struct extent_buffer *scratch_leaf = NULL;
2006 int err = -ENOMEM;
2007
2008 path = btrfs_alloc_path();
2009 if (!path)
2010 goto out;
2011 tmp = ulist_alloc(GFP_NOFS);
2012 if (!tmp)
2013 goto out;
2014 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2015 if (!scratch_leaf)
2016 goto out;
2017
2018 err = 0;
2019 while (!err) {
2020 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2021 if (IS_ERR(trans)) {
2022 err = PTR_ERR(trans);
2023 break;
2024 }
2025 if (!fs_info->quota_enabled) {
2026 err = -EINTR;
2027 } else {
2028 err = qgroup_rescan_leaf(qscan, path, trans,
2029 tmp, scratch_leaf);
2030 }
2031 if (err > 0)
2032 btrfs_commit_transaction(trans, fs_info->fs_root);
2033 else
2034 btrfs_end_transaction(trans, fs_info->fs_root);
2035 }
2036
2037out:
2038 kfree(scratch_leaf);
2039 ulist_free(tmp);
2040 btrfs_free_path(path);
2041 kfree(qscan);
2042
2043 mutex_lock(&fs_info->qgroup_rescan_lock);
2044 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2045
2046 if (err == 2 &&
2047 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2048 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2049 } else if (err < 0) {
2050 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2051 }
2052 mutex_unlock(&fs_info->qgroup_rescan_lock);
2053
2054 if (err >= 0) {
2055 pr_info("btrfs: qgroup scan completed%s\n",
2056 err == 2 ? " (inconsistency flag cleared)" : "");
2057 } else {
2058 pr_err("btrfs: qgroup scan failed with %d\n", err);
2059 }
2060}
2061
2062static void
2063qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
2064{
2065 memset(&qscan->work, 0, sizeof(qscan->work));
2066 qscan->work.func = btrfs_qgroup_rescan_worker;
2067 qscan->fs_info = fs_info;
2068
2069 pr_info("btrfs: qgroup scan started\n");
2070 btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
2071}
2072
2073int
2074btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2075{
2076 int ret = 0;
2077 struct rb_node *n;
2078 struct btrfs_qgroup *qgroup;
2079 struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
2080
2081 if (!qscan)
2082 return -ENOMEM;
2083
2084 mutex_lock(&fs_info->qgroup_rescan_lock);
2085 spin_lock(&fs_info->qgroup_lock);
2086 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2087 ret = -EINPROGRESS;
2088 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2089 ret = -EINVAL;
2090 if (ret) {
2091 spin_unlock(&fs_info->qgroup_lock);
2092 mutex_unlock(&fs_info->qgroup_rescan_lock);
2093 kfree(qscan);
2094 return ret;
2095 }
2096
2097 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2098 memset(&fs_info->qgroup_rescan_progress, 0,
2099 sizeof(fs_info->qgroup_rescan_progress));
2100
2101 /* clear all current qgroup tracking information */
2102 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2103 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2104 qgroup->rfer = 0;
2105 qgroup->rfer_cmpr = 0;
2106 qgroup->excl = 0;
2107 qgroup->excl_cmpr = 0;
2108 }
2109 spin_unlock(&fs_info->qgroup_lock);
2110 mutex_unlock(&fs_info->qgroup_rescan_lock);
2111
2112 qgroup_rescan_start(fs_info, qscan);
2113
2114 return 0;
2115}