diff options
author | Jan Schmidt <list.btrfs@jan-o-sch.net> | 2013-04-25 12:04:51 -0400 |
---|---|---|
committer | Josef Bacik <jbacik@fusionio.com> | 2013-05-06 15:55:19 -0400 |
commit | 2f2320360b0c35b86938bfc561124474f0dac6e4 (patch) | |
tree | f7b1cbec89d9c6d030f15817e77ee42e5941244a /fs/btrfs/qgroup.c | |
parent | 46b665ceb1edd2ac149ff701313c115f52dc0348 (diff) |
Btrfs: rescan for qgroups
If qgroup tracking is out of sync, a rescan operation can be started. It
iterates the complete extent tree and recalculates all qgroup tracking data.
This is an expensive operation and should not be used unless required.
A filesystem under rescan can still be umounted. The rescan continues on the
next mount. Status information is provided with a separate ioctl while a
rescan operation is in progress.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/qgroup.c')
-rw-r--r-- | fs/btrfs/qgroup.c | 318 |
1 files changed, 304 insertions, 14 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1fb7d8da3084..da8458357b57 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -31,13 +31,13 @@ | |||
31 | #include "locking.h" | 31 | #include "locking.h" |
32 | #include "ulist.h" | 32 | #include "ulist.h" |
33 | #include "backref.h" | 33 | #include "backref.h" |
34 | #include "extent_io.h" | ||
34 | 35 | ||
35 | /* TODO XXX FIXME | 36 | /* TODO XXX FIXME |
36 | * - subvol delete -> delete when ref goes to 0? delete limits also? | 37 | * - subvol delete -> delete when ref goes to 0? delete limits also? |
37 | * - reorganize keys | 38 | * - reorganize keys |
38 | * - compressed | 39 | * - compressed |
39 | * - sync | 40 | * - sync |
40 | * - rescan | ||
41 | * - copy also limits on subvol creation | 41 | * - copy also limits on subvol creation |
42 | * - limit | 42 | * - limit |
43 | * - caches fuer ulists | 43 | * - caches fuer ulists |
@@ -98,6 +98,14 @@ struct btrfs_qgroup_list { | |||
98 | struct btrfs_qgroup *member; | 98 | struct btrfs_qgroup *member; |
99 | }; | 99 | }; |
100 | 100 | ||
101 | struct qgroup_rescan { | ||
102 | struct btrfs_work work; | ||
103 | struct btrfs_fs_info *fs_info; | ||
104 | }; | ||
105 | |||
106 | static void qgroup_rescan_start(struct btrfs_fs_info *fs_info, | ||
107 | struct qgroup_rescan *qscan); | ||
108 | |||
101 | /* must be called with qgroup_ioctl_lock held */ | 109 | /* must be called with qgroup_ioctl_lock held */ |
102 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, | 110 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, |
103 | u64 qgroupid) | 111 | u64 qgroupid) |
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) | |||
298 | } | 306 | } |
299 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, | 307 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, |
300 | ptr); | 308 | ptr); |
301 | /* FIXME read scan element */ | 309 | fs_info->qgroup_rescan_progress.objectid = |
310 | btrfs_qgroup_status_rescan(l, ptr); | ||
311 | if (fs_info->qgroup_flags & | ||
312 | BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
313 | struct qgroup_rescan *qscan = | ||
314 | kmalloc(sizeof(*qscan), GFP_NOFS); | ||
315 | if (!qscan) { | ||
316 | ret = -ENOMEM; | ||
317 | goto out; | ||
318 | } | ||
319 | fs_info->qgroup_rescan_progress.type = 0; | ||
320 | fs_info->qgroup_rescan_progress.offset = 0; | ||
321 | qgroup_rescan_start(fs_info, qscan); | ||
322 | } | ||
302 | goto next1; | 323 | goto next1; |
303 | } | 324 | } |
304 | 325 | ||
@@ -719,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans, | |||
719 | ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); | 740 | ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); |
720 | btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); | 741 | btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); |
721 | btrfs_set_qgroup_status_generation(l, ptr, trans->transid); | 742 | btrfs_set_qgroup_status_generation(l, ptr, trans->transid); |
722 | /* XXX scan */ | 743 | btrfs_set_qgroup_status_rescan(l, ptr, |
744 | fs_info->qgroup_rescan_progress.objectid); | ||
723 | 745 | ||
724 | btrfs_mark_buffer_dirty(l); | 746 | btrfs_mark_buffer_dirty(l); |
725 | 747 | ||
@@ -830,7 +852,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
830 | fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | | 852 | fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | |
831 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | 853 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
832 | btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); | 854 | btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); |
833 | btrfs_set_qgroup_status_scan(leaf, ptr, 0); | 855 | btrfs_set_qgroup_status_rescan(leaf, ptr, 0); |
834 | 856 | ||
835 | btrfs_mark_buffer_dirty(leaf); | 857 | btrfs_mark_buffer_dirty(leaf); |
836 | 858 | ||
@@ -944,10 +966,11 @@ out: | |||
944 | return ret; | 966 | return ret; |
945 | } | 967 | } |
946 | 968 | ||
947 | int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) | 969 | static void qgroup_dirty(struct btrfs_fs_info *fs_info, |
970 | struct btrfs_qgroup *qgroup) | ||
948 | { | 971 | { |
949 | /* FIXME */ | 972 | if (list_empty(&qgroup->dirty)) |
950 | return 0; | 973 | list_add(&qgroup->dirty, &fs_info->dirty_qgroups); |
951 | } | 974 | } |
952 | 975 | ||
953 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | 976 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
@@ -1155,13 +1178,6 @@ out: | |||
1155 | return ret; | 1178 | return ret; |
1156 | } | 1179 | } |
1157 | 1180 | ||
1158 | static void qgroup_dirty(struct btrfs_fs_info *fs_info, | ||
1159 | struct btrfs_qgroup *qgroup) | ||
1160 | { | ||
1161 | if (list_empty(&qgroup->dirty)) | ||
1162 | list_add(&qgroup->dirty, &fs_info->dirty_qgroups); | ||
1163 | } | ||
1164 | |||
1165 | /* | 1181 | /* |
1166 | * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts | 1182 | * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts |
1167 | * the modification into a list that's later used by btrfs_end_transaction to | 1183 | * the modification into a list that's later used by btrfs_end_transaction to |
@@ -1390,6 +1406,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1390 | BUG(); | 1406 | BUG(); |
1391 | } | 1407 | } |
1392 | 1408 | ||
1409 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
1410 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
1411 | if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { | ||
1412 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1413 | return 0; | ||
1414 | } | ||
1415 | } | ||
1416 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1417 | |||
1393 | /* | 1418 | /* |
1394 | * the delayed ref sequence number we pass depends on the direction of | 1419 | * the delayed ref sequence number we pass depends on the direction of |
1395 | * the operation. for add operations, we pass | 1420 | * the operation. for add operations, we pass |
@@ -1403,7 +1428,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1403 | if (ret < 0) | 1428 | if (ret < 0) |
1404 | return ret; | 1429 | return ret; |
1405 | 1430 | ||
1431 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
1406 | spin_lock(&fs_info->qgroup_lock); | 1432 | spin_lock(&fs_info->qgroup_lock); |
1433 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
1434 | if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { | ||
1435 | ret = 0; | ||
1436 | goto unlock; | ||
1437 | } | ||
1438 | } | ||
1439 | |||
1407 | quota_root = fs_info->quota_root; | 1440 | quota_root = fs_info->quota_root; |
1408 | if (!quota_root) | 1441 | if (!quota_root) |
1409 | goto unlock; | 1442 | goto unlock; |
@@ -1445,6 +1478,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1445 | 1478 | ||
1446 | unlock: | 1479 | unlock: |
1447 | spin_unlock(&fs_info->qgroup_lock); | 1480 | spin_unlock(&fs_info->qgroup_lock); |
1481 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1448 | ulist_free(roots); | 1482 | ulist_free(roots); |
1449 | ulist_free(tmp); | 1483 | ulist_free(tmp); |
1450 | 1484 | ||
@@ -1823,3 +1857,259 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | |||
1823 | (u32)trans->delayed_ref_elem.seq); | 1857 | (u32)trans->delayed_ref_elem.seq); |
1824 | BUG(); | 1858 | BUG(); |
1825 | } | 1859 | } |
1860 | |||
1861 | /* | ||
1862 | * returns < 0 on error, 0 when more leafs are to be scanned. | ||
1863 | * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. | ||
1864 | */ | ||
1865 | static int | ||
1866 | qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path, | ||
1867 | struct btrfs_trans_handle *trans, struct ulist *tmp, | ||
1868 | struct extent_buffer *scratch_leaf) | ||
1869 | { | ||
1870 | struct btrfs_key found; | ||
1871 | struct btrfs_fs_info *fs_info = qscan->fs_info; | ||
1872 | struct ulist *roots = NULL; | ||
1873 | struct ulist_node *unode; | ||
1874 | struct ulist_iterator uiter; | ||
1875 | struct seq_list tree_mod_seq_elem = {}; | ||
1876 | u64 seq; | ||
1877 | int slot; | ||
1878 | int ret; | ||
1879 | |||
1880 | path->leave_spinning = 1; | ||
1881 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
1882 | ret = btrfs_search_slot_for_read(fs_info->extent_root, | ||
1883 | &fs_info->qgroup_rescan_progress, | ||
1884 | path, 1, 0); | ||
1885 | |||
1886 | pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", | ||
1887 | (unsigned long long)fs_info->qgroup_rescan_progress.objectid, | ||
1888 | fs_info->qgroup_rescan_progress.type, | ||
1889 | (unsigned long long)fs_info->qgroup_rescan_progress.offset, | ||
1890 | ret); | ||
1891 | |||
1892 | if (ret) { | ||
1893 | /* | ||
1894 | * The rescan is about to end, we will not be scanning any | ||
1895 | * further blocks. We cannot unset the RESCAN flag here, because | ||
1896 | * we want to commit the transaction if everything went well. | ||
1897 | * To make the live accounting work in this phase, we set our | ||
1898 | * scan progress pointer such that every real extent objectid | ||
1899 | * will be smaller. | ||
1900 | */ | ||
1901 | fs_info->qgroup_rescan_progress.objectid = (u64)-1; | ||
1902 | btrfs_release_path(path); | ||
1903 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1904 | return ret; | ||
1905 | } | ||
1906 | |||
1907 | btrfs_item_key_to_cpu(path->nodes[0], &found, | ||
1908 | btrfs_header_nritems(path->nodes[0]) - 1); | ||
1909 | fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; | ||
1910 | |||
1911 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1912 | memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); | ||
1913 | slot = path->slots[0]; | ||
1914 | btrfs_release_path(path); | ||
1915 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1916 | |||
1917 | for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { | ||
1918 | btrfs_item_key_to_cpu(scratch_leaf, &found, slot); | ||
1919 | if (found.type != BTRFS_EXTENT_ITEM_KEY) | ||
1920 | continue; | ||
1921 | ret = btrfs_find_all_roots(trans, fs_info, found.objectid, | ||
1922 | tree_mod_seq_elem.seq, &roots); | ||
1923 | if (ret < 0) | ||
1924 | goto out; | ||
1925 | spin_lock(&fs_info->qgroup_lock); | ||
1926 | seq = fs_info->qgroup_seq; | ||
1927 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | ||
1928 | |||
1929 | ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); | ||
1930 | if (ret) { | ||
1931 | spin_unlock(&fs_info->qgroup_lock); | ||
1932 | ulist_free(roots); | ||
1933 | goto out; | ||
1934 | } | ||
1935 | |||
1936 | /* | ||
1937 | * step2 of btrfs_qgroup_account_ref works from a single root, | ||
1938 | * we're doing all at once here. | ||
1939 | */ | ||
1940 | ulist_reinit(tmp); | ||
1941 | ULIST_ITER_INIT(&uiter); | ||
1942 | while ((unode = ulist_next(roots, &uiter))) { | ||
1943 | struct btrfs_qgroup *qg; | ||
1944 | |||
1945 | qg = find_qgroup_rb(fs_info, unode->val); | ||
1946 | if (!qg) | ||
1947 | continue; | ||
1948 | |||
1949 | ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, | ||
1950 | GFP_ATOMIC); | ||
1951 | if (ret < 0) { | ||
1952 | spin_unlock(&fs_info->qgroup_lock); | ||
1953 | ulist_free(roots); | ||
1954 | goto out; | ||
1955 | } | ||
1956 | } | ||
1957 | |||
1958 | /* this loop is similar to step 2 of btrfs_qgroup_account_ref */ | ||
1959 | ULIST_ITER_INIT(&uiter); | ||
1960 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1961 | struct btrfs_qgroup *qg; | ||
1962 | struct btrfs_qgroup_list *glist; | ||
1963 | |||
1964 | qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; | ||
1965 | qg->rfer += found.offset; | ||
1966 | qg->rfer_cmpr += found.offset; | ||
1967 | WARN_ON(qg->tag >= seq); | ||
1968 | if (qg->refcnt - seq == roots->nnodes) { | ||
1969 | qg->excl += found.offset; | ||
1970 | qg->excl_cmpr += found.offset; | ||
1971 | } | ||
1972 | qgroup_dirty(fs_info, qg); | ||
1973 | |||
1974 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1975 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1976 | (uintptr_t)glist->group, | ||
1977 | GFP_ATOMIC); | ||
1978 | if (ret < 0) { | ||
1979 | spin_unlock(&fs_info->qgroup_lock); | ||
1980 | ulist_free(roots); | ||
1981 | goto out; | ||
1982 | } | ||
1983 | } | ||
1984 | } | ||
1985 | |||
1986 | spin_unlock(&fs_info->qgroup_lock); | ||
1987 | ulist_free(roots); | ||
1988 | ret = 0; | ||
1989 | } | ||
1990 | |||
1991 | out: | ||
1992 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1993 | |||
1994 | return ret; | ||
1995 | } | ||
1996 | |||
1997 | static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | ||
1998 | { | ||
1999 | struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan, | ||
2000 | work); | ||
2001 | struct btrfs_path *path; | ||
2002 | struct btrfs_trans_handle *trans = NULL; | ||
2003 | struct btrfs_fs_info *fs_info = qscan->fs_info; | ||
2004 | struct ulist *tmp = NULL; | ||
2005 | struct extent_buffer *scratch_leaf = NULL; | ||
2006 | int err = -ENOMEM; | ||
2007 | |||
2008 | path = btrfs_alloc_path(); | ||
2009 | if (!path) | ||
2010 | goto out; | ||
2011 | tmp = ulist_alloc(GFP_NOFS); | ||
2012 | if (!tmp) | ||
2013 | goto out; | ||
2014 | scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); | ||
2015 | if (!scratch_leaf) | ||
2016 | goto out; | ||
2017 | |||
2018 | err = 0; | ||
2019 | while (!err) { | ||
2020 | trans = btrfs_start_transaction(fs_info->fs_root, 0); | ||
2021 | if (IS_ERR(trans)) { | ||
2022 | err = PTR_ERR(trans); | ||
2023 | break; | ||
2024 | } | ||
2025 | if (!fs_info->quota_enabled) { | ||
2026 | err = -EINTR; | ||
2027 | } else { | ||
2028 | err = qgroup_rescan_leaf(qscan, path, trans, | ||
2029 | tmp, scratch_leaf); | ||
2030 | } | ||
2031 | if (err > 0) | ||
2032 | btrfs_commit_transaction(trans, fs_info->fs_root); | ||
2033 | else | ||
2034 | btrfs_end_transaction(trans, fs_info->fs_root); | ||
2035 | } | ||
2036 | |||
2037 | out: | ||
2038 | kfree(scratch_leaf); | ||
2039 | ulist_free(tmp); | ||
2040 | btrfs_free_path(path); | ||
2041 | kfree(qscan); | ||
2042 | |||
2043 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
2044 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
2045 | |||
2046 | if (err == 2 && | ||
2047 | fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { | ||
2048 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
2049 | } else if (err < 0) { | ||
2050 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
2051 | } | ||
2052 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2053 | |||
2054 | if (err >= 0) { | ||
2055 | pr_info("btrfs: qgroup scan completed%s\n", | ||
2056 | err == 2 ? " (inconsistency flag cleared)" : ""); | ||
2057 | } else { | ||
2058 | pr_err("btrfs: qgroup scan failed with %d\n", err); | ||
2059 | } | ||
2060 | } | ||
2061 | |||
2062 | static void | ||
2063 | qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan) | ||
2064 | { | ||
2065 | memset(&qscan->work, 0, sizeof(qscan->work)); | ||
2066 | qscan->work.func = btrfs_qgroup_rescan_worker; | ||
2067 | qscan->fs_info = fs_info; | ||
2068 | |||
2069 | pr_info("btrfs: qgroup scan started\n"); | ||
2070 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work); | ||
2071 | } | ||
2072 | |||
2073 | int | ||
2074 | btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | ||
2075 | { | ||
2076 | int ret = 0; | ||
2077 | struct rb_node *n; | ||
2078 | struct btrfs_qgroup *qgroup; | ||
2079 | struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS); | ||
2080 | |||
2081 | if (!qscan) | ||
2082 | return -ENOMEM; | ||
2083 | |||
2084 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
2085 | spin_lock(&fs_info->qgroup_lock); | ||
2086 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | ||
2087 | ret = -EINPROGRESS; | ||
2088 | else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) | ||
2089 | ret = -EINVAL; | ||
2090 | if (ret) { | ||
2091 | spin_unlock(&fs_info->qgroup_lock); | ||
2092 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2093 | kfree(qscan); | ||
2094 | return ret; | ||
2095 | } | ||
2096 | |||
2097 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
2098 | memset(&fs_info->qgroup_rescan_progress, 0, | ||
2099 | sizeof(fs_info->qgroup_rescan_progress)); | ||
2100 | |||
2101 | /* clear all current qgroup tracking information */ | ||
2102 | for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { | ||
2103 | qgroup = rb_entry(n, struct btrfs_qgroup, node); | ||
2104 | qgroup->rfer = 0; | ||
2105 | qgroup->rfer_cmpr = 0; | ||
2106 | qgroup->excl = 0; | ||
2107 | qgroup->excl_cmpr = 0; | ||
2108 | } | ||
2109 | spin_unlock(&fs_info->qgroup_lock); | ||
2110 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2111 | |||
2112 | qgroup_rescan_start(fs_info, qscan); | ||
2113 | |||
2114 | return 0; | ||
2115 | } | ||