aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/uuid-tree.c
diff options
context:
space:
mode:
authorStefan Behrens <sbehrens@giantdisaster.de>2013-08-15 11:11:17 -0400
committerChris Mason <chris.mason@fusionio.com>2013-09-01 08:15:52 -0400
commit07b30a49dac4f60a6c4b0b3938bd6f45affb9455 (patch)
tree29361b8f0f5b067b379d2cbe11f80b1102a842fa /fs/btrfs/uuid-tree.c
parent171170c1c5625cab9687ecf6714e09e0c8a6ed3c (diff)
Btrfs: introduce a tree for items that map UUIDs to something
Mapping UUIDs to subvolume IDs is an operation with a high effort today. Today, the algorithm even has quadratic effort (based on the number of existing subvolumes), which means, that it takes minutes to send/receive a single subvolume if 10,000 subvolumes exist. But even linear effort would be too much since it is a waste. And these data structures to allow mapping UUIDs to subvolume IDs are created every time a btrfs send/receive instance is started. It is much more efficient to maintain a searchable persistent data structure in the filesystem, one that is updated whenever a subvolume/snapshot is created and deleted, and when the received subvolume UUID is set by the btrfs-receive tool. Therefore kernel code is added with this commit that is able to maintain data structures in the filesystem that allow to quickly search for a given UUID and to retrieve data that is assigned to this UUID, like which subvolume ID is related to this UUID. This commit adds a new tree to hold UUID-to-data mapping items. The key of the items is the full UUID plus the key type BTRFS_UUID_KEY. Multiple data blocks can be stored for a given UUID, a type/length/ value scheme is used. Now follows the lengthy justification, why a new tree was added instead of using the existing root tree: The first approach was to not create another tree that holds UUID items. Instead, the items should just go into the top root tree. Unfortunately this confused the algorithm to assign the objectid of subvolumes and snapshots. The reason is that btrfs_find_free_objectid() calls btrfs_find_highest_objectid() for the first created subvol or snapshot after mounting a filesystem, and this function simply searches for the largest used objectid in the root tree keys to pick the next objectid to assign. Of course, the UUID keys have always been the ones with the highest offset value, and the next assigned subvol ID was wastefully huge. To use any other existing tree did not look proper. To apply a workaround such as setting the objectid to zero in the UUID item key and to implement collision handling would either add limitations (in case of a btrfs_extend_item() approach to handle the collisions) or a lot of complexity and source code (in case a key would be looked up that is free of collisions). Adding new code that introduces limitations is not good, and adding code that is complex and lengthy for no good reason is also not good. That's the justification why a completely new tree was introduced. Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de> Signed-off-by: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/uuid-tree.c')
-rw-r--r--fs/btrfs/uuid-tree.c235
1 files changed, 235 insertions, 0 deletions
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
new file mode 100644
index 000000000000..04a04fa0f7c4
--- /dev/null
+++ b/fs/btrfs/uuid-tree.c
@@ -0,0 +1,235 @@
1/*
2 * Copyright (C) STRATO AG 2013. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/uuid.h>
19#include <asm/unaligned.h>
20#include "ctree.h"
21#include "transaction.h"
22#include "disk-io.h"
23#include "print-tree.h"
24
25
26static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
27{
28 key->type = type;
29 key->objectid = get_unaligned_le64(uuid);
30 key->offset = get_unaligned_le64(uuid + sizeof(u64));
31}
32
33/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
34static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
35 u8 type, u64 subid)
36{
37 int ret;
38 struct btrfs_path *path = NULL;
39 struct extent_buffer *eb;
40 int slot;
41 u32 item_size;
42 unsigned long offset;
43 struct btrfs_key key;
44
45 if (WARN_ON_ONCE(!uuid_root)) {
46 ret = -ENOENT;
47 goto out;
48 }
49
50 path = btrfs_alloc_path();
51 if (!path) {
52 ret = -ENOMEM;
53 goto out;
54 }
55
56 btrfs_uuid_to_key(uuid, type, &key);
57 ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0);
58 if (ret < 0) {
59 goto out;
60 } else if (ret > 0) {
61 ret = -ENOENT;
62 goto out;
63 }
64
65 eb = path->nodes[0];
66 slot = path->slots[0];
67 item_size = btrfs_item_size_nr(eb, slot);
68 offset = btrfs_item_ptr_offset(eb, slot);
69 ret = -ENOENT;
70
71 if (!IS_ALIGNED(item_size, sizeof(u64))) {
72 pr_warn("btrfs: uuid item with illegal size %lu!\n",
73 (unsigned long)item_size);
74 goto out;
75 }
76 while (item_size) {
77 __le64 data;
78
79 read_extent_buffer(eb, &data, offset, sizeof(data));
80 if (le64_to_cpu(data) == subid) {
81 ret = 0;
82 break;
83 }
84 offset += sizeof(data);
85 item_size -= sizeof(data);
86 }
87
88out:
89 btrfs_free_path(path);
90 return ret;
91}
92
93int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
94 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
95 u64 subid_cpu)
96{
97 int ret;
98 struct btrfs_path *path = NULL;
99 struct btrfs_key key;
100 struct extent_buffer *eb;
101 int slot;
102 unsigned long offset;
103 __le64 subid_le;
104
105 ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu);
106 if (ret != -ENOENT)
107 return ret;
108
109 if (WARN_ON_ONCE(!uuid_root)) {
110 ret = -EINVAL;
111 goto out;
112 }
113
114 btrfs_uuid_to_key(uuid, type, &key);
115
116 path = btrfs_alloc_path();
117 if (!path) {
118 ret = -ENOMEM;
119 goto out;
120 }
121
122 ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
123 sizeof(subid_le));
124 if (ret >= 0) {
125 /* Add an item for the type for the first time */
126 eb = path->nodes[0];
127 slot = path->slots[0];
128 offset = btrfs_item_ptr_offset(eb, slot);
129 } else if (ret == -EEXIST) {
130 /*
131 * An item with that type already exists.
132 * Extend the item and store the new subid at the end.
133 */
134 btrfs_extend_item(uuid_root, path, sizeof(subid_le));
135 eb = path->nodes[0];
136 slot = path->slots[0];
137 offset = btrfs_item_ptr_offset(eb, slot);
138 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
139 } else if (ret < 0) {
140 pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n",
141 ret, (unsigned long long)key.objectid,
142 (unsigned long long)key.offset, type);
143 goto out;
144 }
145
146 ret = 0;
147 subid_le = cpu_to_le64(subid_cpu);
148 write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
149 btrfs_mark_buffer_dirty(eb);
150
151out:
152 btrfs_free_path(path);
153 return ret;
154}
155
156int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
157 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
158 u64 subid)
159{
160 int ret;
161 struct btrfs_path *path = NULL;
162 struct btrfs_key key;
163 struct extent_buffer *eb;
164 int slot;
165 unsigned long offset;
166 u32 item_size;
167 unsigned long move_dst;
168 unsigned long move_src;
169 unsigned long move_len;
170
171 if (WARN_ON_ONCE(!uuid_root)) {
172 ret = -EINVAL;
173 goto out;
174 }
175
176 btrfs_uuid_to_key(uuid, type, &key);
177
178 path = btrfs_alloc_path();
179 if (!path) {
180 ret = -ENOMEM;
181 goto out;
182 }
183
184 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
185 if (ret < 0) {
186 pr_warn("btrfs: error %d while searching for uuid item!\n",
187 ret);
188 goto out;
189 }
190 if (ret > 0) {
191 ret = -ENOENT;
192 goto out;
193 }
194
195 eb = path->nodes[0];
196 slot = path->slots[0];
197 offset = btrfs_item_ptr_offset(eb, slot);
198 item_size = btrfs_item_size_nr(eb, slot);
199 if (!IS_ALIGNED(item_size, sizeof(u64))) {
200 pr_warn("btrfs: uuid item with illegal size %lu!\n",
201 (unsigned long)item_size);
202 ret = -ENOENT;
203 goto out;
204 }
205 while (item_size) {
206 __le64 read_subid;
207
208 read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid));
209 if (le64_to_cpu(read_subid) == subid)
210 break;
211 offset += sizeof(read_subid);
212 item_size -= sizeof(read_subid);
213 }
214
215 if (!item_size) {
216 ret = -ENOENT;
217 goto out;
218 }
219
220 item_size = btrfs_item_size_nr(eb, slot);
221 if (item_size == sizeof(subid)) {
222 ret = btrfs_del_item(trans, uuid_root, path);
223 goto out;
224 }
225
226 move_dst = offset;
227 move_src = offset + sizeof(subid);
228 move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
229 memmove_extent_buffer(eb, move_dst, move_src, move_len);
230 btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1);
231
232out:
233 btrfs_free_path(path);
234 return ret;
235}