diff options
Diffstat (limited to 'fs/btrfs/ref-verify.c')
-rw-r--r-- | fs/btrfs/ref-verify.c | 1031 |
1 files changed, 1031 insertions, 0 deletions
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c new file mode 100644 index 000000000000..34878699d363 --- /dev/null +++ b/fs/btrfs/ref-verify.c | |||
@@ -0,0 +1,1031 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Facebook. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/stacktrace.h> | ||
21 | #include "ctree.h" | ||
22 | #include "disk-io.h" | ||
23 | #include "locking.h" | ||
24 | #include "delayed-ref.h" | ||
25 | #include "ref-verify.h" | ||
26 | |||
27 | /* | ||
28 | * Used to keep track the roots and number of refs each root has for a given | ||
29 | * bytenr. This just tracks the number of direct references, no shared | ||
30 | * references. | ||
31 | */ | ||
32 | struct root_entry { | ||
33 | u64 root_objectid; | ||
34 | u64 num_refs; | ||
35 | struct rb_node node; | ||
36 | }; | ||
37 | |||
38 | /* | ||
39 | * These are meant to represent what should exist in the extent tree, these can | ||
40 | * be used to verify the extent tree is consistent as these should all match | ||
41 | * what the extent tree says. | ||
42 | */ | ||
43 | struct ref_entry { | ||
44 | u64 root_objectid; | ||
45 | u64 parent; | ||
46 | u64 owner; | ||
47 | u64 offset; | ||
48 | u64 num_refs; | ||
49 | struct rb_node node; | ||
50 | }; | ||
51 | |||
52 | #define MAX_TRACE 16 | ||
53 | |||
54 | /* | ||
55 | * Whenever we add/remove a reference we record the action. The action maps | ||
56 | * back to the delayed ref action. We hold the ref we are changing in the | ||
57 | * action so we can account for the history properly, and we record the root we | ||
58 | * were called with since it could be different from ref_root. We also store | ||
59 | * stack traces because thats how I roll. | ||
60 | */ | ||
61 | struct ref_action { | ||
62 | int action; | ||
63 | u64 root; | ||
64 | struct ref_entry ref; | ||
65 | struct list_head list; | ||
66 | unsigned long trace[MAX_TRACE]; | ||
67 | unsigned int trace_len; | ||
68 | }; | ||
69 | |||
70 | /* | ||
71 | * One of these for every block we reference, it holds the roots and references | ||
72 | * to it as well as all of the ref actions that have occured to it. We never | ||
73 | * free it until we unmount the file system in order to make sure re-allocations | ||
74 | * are happening properly. | ||
75 | */ | ||
76 | struct block_entry { | ||
77 | u64 bytenr; | ||
78 | u64 len; | ||
79 | u64 num_refs; | ||
80 | int metadata; | ||
81 | int from_disk; | ||
82 | struct rb_root roots; | ||
83 | struct rb_root refs; | ||
84 | struct rb_node node; | ||
85 | struct list_head actions; | ||
86 | }; | ||
87 | |||
88 | static struct block_entry *insert_block_entry(struct rb_root *root, | ||
89 | struct block_entry *be) | ||
90 | { | ||
91 | struct rb_node **p = &root->rb_node; | ||
92 | struct rb_node *parent_node = NULL; | ||
93 | struct block_entry *entry; | ||
94 | |||
95 | while (*p) { | ||
96 | parent_node = *p; | ||
97 | entry = rb_entry(parent_node, struct block_entry, node); | ||
98 | if (entry->bytenr > be->bytenr) | ||
99 | p = &(*p)->rb_left; | ||
100 | else if (entry->bytenr < be->bytenr) | ||
101 | p = &(*p)->rb_right; | ||
102 | else | ||
103 | return entry; | ||
104 | } | ||
105 | |||
106 | rb_link_node(&be->node, parent_node, p); | ||
107 | rb_insert_color(&be->node, root); | ||
108 | return NULL; | ||
109 | } | ||
110 | |||
111 | static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) | ||
112 | { | ||
113 | struct rb_node *n; | ||
114 | struct block_entry *entry = NULL; | ||
115 | |||
116 | n = root->rb_node; | ||
117 | while (n) { | ||
118 | entry = rb_entry(n, struct block_entry, node); | ||
119 | if (entry->bytenr < bytenr) | ||
120 | n = n->rb_right; | ||
121 | else if (entry->bytenr > bytenr) | ||
122 | n = n->rb_left; | ||
123 | else | ||
124 | return entry; | ||
125 | } | ||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | static struct root_entry *insert_root_entry(struct rb_root *root, | ||
130 | struct root_entry *re) | ||
131 | { | ||
132 | struct rb_node **p = &root->rb_node; | ||
133 | struct rb_node *parent_node = NULL; | ||
134 | struct root_entry *entry; | ||
135 | |||
136 | while (*p) { | ||
137 | parent_node = *p; | ||
138 | entry = rb_entry(parent_node, struct root_entry, node); | ||
139 | if (entry->root_objectid > re->root_objectid) | ||
140 | p = &(*p)->rb_left; | ||
141 | else if (entry->root_objectid < re->root_objectid) | ||
142 | p = &(*p)->rb_right; | ||
143 | else | ||
144 | return entry; | ||
145 | } | ||
146 | |||
147 | rb_link_node(&re->node, parent_node, p); | ||
148 | rb_insert_color(&re->node, root); | ||
149 | return NULL; | ||
150 | |||
151 | } | ||
152 | |||
153 | static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) | ||
154 | { | ||
155 | if (ref1->root_objectid < ref2->root_objectid) | ||
156 | return -1; | ||
157 | if (ref1->root_objectid > ref2->root_objectid) | ||
158 | return 1; | ||
159 | if (ref1->parent < ref2->parent) | ||
160 | return -1; | ||
161 | if (ref1->parent > ref2->parent) | ||
162 | return 1; | ||
163 | if (ref1->owner < ref2->owner) | ||
164 | return -1; | ||
165 | if (ref1->owner > ref2->owner) | ||
166 | return 1; | ||
167 | if (ref1->offset < ref2->offset) | ||
168 | return -1; | ||
169 | if (ref1->offset > ref2->offset) | ||
170 | return 1; | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static struct ref_entry *insert_ref_entry(struct rb_root *root, | ||
175 | struct ref_entry *ref) | ||
176 | { | ||
177 | struct rb_node **p = &root->rb_node; | ||
178 | struct rb_node *parent_node = NULL; | ||
179 | struct ref_entry *entry; | ||
180 | int cmp; | ||
181 | |||
182 | while (*p) { | ||
183 | parent_node = *p; | ||
184 | entry = rb_entry(parent_node, struct ref_entry, node); | ||
185 | cmp = comp_refs(entry, ref); | ||
186 | if (cmp > 0) | ||
187 | p = &(*p)->rb_left; | ||
188 | else if (cmp < 0) | ||
189 | p = &(*p)->rb_right; | ||
190 | else | ||
191 | return entry; | ||
192 | } | ||
193 | |||
194 | rb_link_node(&ref->node, parent_node, p); | ||
195 | rb_insert_color(&ref->node, root); | ||
196 | return NULL; | ||
197 | |||
198 | } | ||
199 | |||
200 | static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) | ||
201 | { | ||
202 | struct rb_node *n; | ||
203 | struct root_entry *entry = NULL; | ||
204 | |||
205 | n = root->rb_node; | ||
206 | while (n) { | ||
207 | entry = rb_entry(n, struct root_entry, node); | ||
208 | if (entry->root_objectid < objectid) | ||
209 | n = n->rb_right; | ||
210 | else if (entry->root_objectid > objectid) | ||
211 | n = n->rb_left; | ||
212 | else | ||
213 | return entry; | ||
214 | } | ||
215 | return NULL; | ||
216 | } | ||
217 | |||
218 | #ifdef CONFIG_STACKTRACE | ||
219 | static void __save_stack_trace(struct ref_action *ra) | ||
220 | { | ||
221 | struct stack_trace stack_trace; | ||
222 | |||
223 | stack_trace.max_entries = MAX_TRACE; | ||
224 | stack_trace.nr_entries = 0; | ||
225 | stack_trace.entries = ra->trace; | ||
226 | stack_trace.skip = 2; | ||
227 | save_stack_trace(&stack_trace); | ||
228 | ra->trace_len = stack_trace.nr_entries; | ||
229 | } | ||
230 | |||
231 | static void __print_stack_trace(struct btrfs_fs_info *fs_info, | ||
232 | struct ref_action *ra) | ||
233 | { | ||
234 | struct stack_trace trace; | ||
235 | |||
236 | if (ra->trace_len == 0) { | ||
237 | btrfs_err(fs_info, " ref-verify: no stacktrace"); | ||
238 | return; | ||
239 | } | ||
240 | trace.nr_entries = ra->trace_len; | ||
241 | trace.entries = ra->trace; | ||
242 | print_stack_trace(&trace, 2); | ||
243 | } | ||
244 | #else | ||
245 | static void inline __save_stack_trace(struct ref_action *ra) | ||
246 | { | ||
247 | } | ||
248 | |||
249 | static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, | ||
250 | struct ref_action *ra) | ||
251 | { | ||
252 | btrfs_err(fs_info, " ref-verify: no stacktrace support"); | ||
253 | } | ||
254 | #endif | ||
255 | |||
256 | static void free_block_entry(struct block_entry *be) | ||
257 | { | ||
258 | struct root_entry *re; | ||
259 | struct ref_entry *ref; | ||
260 | struct ref_action *ra; | ||
261 | struct rb_node *n; | ||
262 | |||
263 | while ((n = rb_first(&be->roots))) { | ||
264 | re = rb_entry(n, struct root_entry, node); | ||
265 | rb_erase(&re->node, &be->roots); | ||
266 | kfree(re); | ||
267 | } | ||
268 | |||
269 | while((n = rb_first(&be->refs))) { | ||
270 | ref = rb_entry(n, struct ref_entry, node); | ||
271 | rb_erase(&ref->node, &be->refs); | ||
272 | kfree(ref); | ||
273 | } | ||
274 | |||
275 | while (!list_empty(&be->actions)) { | ||
276 | ra = list_first_entry(&be->actions, struct ref_action, | ||
277 | list); | ||
278 | list_del(&ra->list); | ||
279 | kfree(ra); | ||
280 | } | ||
281 | kfree(be); | ||
282 | } | ||
283 | |||
284 | static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, | ||
285 | u64 bytenr, u64 len, | ||
286 | u64 root_objectid) | ||
287 | { | ||
288 | struct block_entry *be = NULL, *exist; | ||
289 | struct root_entry *re = NULL; | ||
290 | |||
291 | re = kzalloc(sizeof(struct root_entry), GFP_KERNEL); | ||
292 | be = kzalloc(sizeof(struct block_entry), GFP_KERNEL); | ||
293 | if (!be || !re) { | ||
294 | kfree(re); | ||
295 | kfree(be); | ||
296 | return ERR_PTR(-ENOMEM); | ||
297 | } | ||
298 | be->bytenr = bytenr; | ||
299 | be->len = len; | ||
300 | |||
301 | re->root_objectid = root_objectid; | ||
302 | re->num_refs = 0; | ||
303 | |||
304 | spin_lock(&fs_info->ref_verify_lock); | ||
305 | exist = insert_block_entry(&fs_info->block_tree, be); | ||
306 | if (exist) { | ||
307 | if (root_objectid) { | ||
308 | struct root_entry *exist_re; | ||
309 | |||
310 | exist_re = insert_root_entry(&exist->roots, re); | ||
311 | if (exist_re) | ||
312 | kfree(re); | ||
313 | } | ||
314 | kfree(be); | ||
315 | return exist; | ||
316 | } | ||
317 | |||
318 | be->num_refs = 0; | ||
319 | be->metadata = 0; | ||
320 | be->from_disk = 0; | ||
321 | be->roots = RB_ROOT; | ||
322 | be->refs = RB_ROOT; | ||
323 | INIT_LIST_HEAD(&be->actions); | ||
324 | if (root_objectid) | ||
325 | insert_root_entry(&be->roots, re); | ||
326 | else | ||
327 | kfree(re); | ||
328 | return be; | ||
329 | } | ||
330 | |||
331 | static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, | ||
332 | u64 parent, u64 bytenr, int level) | ||
333 | { | ||
334 | struct block_entry *be; | ||
335 | struct root_entry *re; | ||
336 | struct ref_entry *ref = NULL, *exist; | ||
337 | |||
338 | ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL); | ||
339 | if (!ref) | ||
340 | return -ENOMEM; | ||
341 | |||
342 | if (parent) | ||
343 | ref->root_objectid = 0; | ||
344 | else | ||
345 | ref->root_objectid = ref_root; | ||
346 | ref->parent = parent; | ||
347 | ref->owner = level; | ||
348 | ref->offset = 0; | ||
349 | ref->num_refs = 1; | ||
350 | |||
351 | be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); | ||
352 | if (IS_ERR(be)) { | ||
353 | kfree(ref); | ||
354 | return PTR_ERR(be); | ||
355 | } | ||
356 | be->num_refs++; | ||
357 | be->from_disk = 1; | ||
358 | be->metadata = 1; | ||
359 | |||
360 | if (!parent) { | ||
361 | ASSERT(ref_root); | ||
362 | re = lookup_root_entry(&be->roots, ref_root); | ||
363 | ASSERT(re); | ||
364 | re->num_refs++; | ||
365 | } | ||
366 | exist = insert_ref_entry(&be->refs, ref); | ||
367 | if (exist) { | ||
368 | exist->num_refs++; | ||
369 | kfree(ref); | ||
370 | } | ||
371 | spin_unlock(&fs_info->ref_verify_lock); | ||
372 | |||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | static int add_shared_data_ref(struct btrfs_fs_info *fs_info, | ||
377 | u64 parent, u32 num_refs, u64 bytenr, | ||
378 | u64 num_bytes) | ||
379 | { | ||
380 | struct block_entry *be; | ||
381 | struct ref_entry *ref; | ||
382 | |||
383 | ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); | ||
384 | if (!ref) | ||
385 | return -ENOMEM; | ||
386 | be = add_block_entry(fs_info, bytenr, num_bytes, 0); | ||
387 | if (IS_ERR(be)) { | ||
388 | kfree(ref); | ||
389 | return PTR_ERR(be); | ||
390 | } | ||
391 | be->num_refs += num_refs; | ||
392 | |||
393 | ref->parent = parent; | ||
394 | ref->num_refs = num_refs; | ||
395 | if (insert_ref_entry(&be->refs, ref)) { | ||
396 | spin_unlock(&fs_info->ref_verify_lock); | ||
397 | btrfs_err(fs_info, "existing shared ref when reading from disk?"); | ||
398 | kfree(ref); | ||
399 | return -EINVAL; | ||
400 | } | ||
401 | spin_unlock(&fs_info->ref_verify_lock); | ||
402 | return 0; | ||
403 | } | ||
404 | |||
405 | static int add_extent_data_ref(struct btrfs_fs_info *fs_info, | ||
406 | struct extent_buffer *leaf, | ||
407 | struct btrfs_extent_data_ref *dref, | ||
408 | u64 bytenr, u64 num_bytes) | ||
409 | { | ||
410 | struct block_entry *be; | ||
411 | struct ref_entry *ref; | ||
412 | struct root_entry *re; | ||
413 | u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); | ||
414 | u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); | ||
415 | u64 offset = btrfs_extent_data_ref_offset(leaf, dref); | ||
416 | u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); | ||
417 | |||
418 | ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); | ||
419 | if (!ref) | ||
420 | return -ENOMEM; | ||
421 | be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); | ||
422 | if (IS_ERR(be)) { | ||
423 | kfree(ref); | ||
424 | return PTR_ERR(be); | ||
425 | } | ||
426 | be->num_refs += num_refs; | ||
427 | |||
428 | ref->parent = 0; | ||
429 | ref->owner = owner; | ||
430 | ref->root_objectid = ref_root; | ||
431 | ref->offset = offset; | ||
432 | ref->num_refs = num_refs; | ||
433 | if (insert_ref_entry(&be->refs, ref)) { | ||
434 | spin_unlock(&fs_info->ref_verify_lock); | ||
435 | btrfs_err(fs_info, "existing ref when reading from disk?"); | ||
436 | kfree(ref); | ||
437 | return -EINVAL; | ||
438 | } | ||
439 | |||
440 | re = lookup_root_entry(&be->roots, ref_root); | ||
441 | if (!re) { | ||
442 | spin_unlock(&fs_info->ref_verify_lock); | ||
443 | btrfs_err(fs_info, "missing root in new block entry?"); | ||
444 | return -EINVAL; | ||
445 | } | ||
446 | re->num_refs += num_refs; | ||
447 | spin_unlock(&fs_info->ref_verify_lock); | ||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | static int process_extent_item(struct btrfs_fs_info *fs_info, | ||
452 | struct btrfs_path *path, struct btrfs_key *key, | ||
453 | int slot, int *tree_block_level) | ||
454 | { | ||
455 | struct btrfs_extent_item *ei; | ||
456 | struct btrfs_extent_inline_ref *iref; | ||
457 | struct btrfs_extent_data_ref *dref; | ||
458 | struct btrfs_shared_data_ref *sref; | ||
459 | struct extent_buffer *leaf = path->nodes[0]; | ||
460 | u32 item_size = btrfs_item_size_nr(leaf, slot); | ||
461 | unsigned long end, ptr; | ||
462 | u64 offset, flags, count; | ||
463 | int type, ret; | ||
464 | |||
465 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | ||
466 | flags = btrfs_extent_flags(leaf, ei); | ||
467 | |||
468 | if ((key->type == BTRFS_EXTENT_ITEM_KEY) && | ||
469 | flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
470 | struct btrfs_tree_block_info *info; | ||
471 | |||
472 | info = (struct btrfs_tree_block_info *)(ei + 1); | ||
473 | *tree_block_level = btrfs_tree_block_level(leaf, info); | ||
474 | iref = (struct btrfs_extent_inline_ref *)(info + 1); | ||
475 | } else { | ||
476 | if (key->type == BTRFS_METADATA_ITEM_KEY) | ||
477 | *tree_block_level = key->offset; | ||
478 | iref = (struct btrfs_extent_inline_ref *)(ei + 1); | ||
479 | } | ||
480 | |||
481 | ptr = (unsigned long)iref; | ||
482 | end = (unsigned long)ei + item_size; | ||
483 | while (ptr < end) { | ||
484 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
485 | type = btrfs_extent_inline_ref_type(leaf, iref); | ||
486 | offset = btrfs_extent_inline_ref_offset(leaf, iref); | ||
487 | switch (type) { | ||
488 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
489 | ret = add_tree_block(fs_info, offset, 0, key->objectid, | ||
490 | *tree_block_level); | ||
491 | break; | ||
492 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
493 | ret = add_tree_block(fs_info, 0, offset, key->objectid, | ||
494 | *tree_block_level); | ||
495 | break; | ||
496 | case BTRFS_EXTENT_DATA_REF_KEY: | ||
497 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
498 | ret = add_extent_data_ref(fs_info, leaf, dref, | ||
499 | key->objectid, key->offset); | ||
500 | break; | ||
501 | case BTRFS_SHARED_DATA_REF_KEY: | ||
502 | sref = (struct btrfs_shared_data_ref *)(iref + 1); | ||
503 | count = btrfs_shared_data_ref_count(leaf, sref); | ||
504 | ret = add_shared_data_ref(fs_info, offset, count, | ||
505 | key->objectid, key->offset); | ||
506 | break; | ||
507 | default: | ||
508 | btrfs_err(fs_info, "invalid key type in iref"); | ||
509 | ret = -EINVAL; | ||
510 | break; | ||
511 | } | ||
512 | if (ret) | ||
513 | break; | ||
514 | ptr += btrfs_extent_inline_ref_size(type); | ||
515 | } | ||
516 | return ret; | ||
517 | } | ||
518 | |||
519 | static int process_leaf(struct btrfs_root *root, | ||
520 | struct btrfs_path *path, u64 *bytenr, u64 *num_bytes) | ||
521 | { | ||
522 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
523 | struct extent_buffer *leaf = path->nodes[0]; | ||
524 | struct btrfs_extent_data_ref *dref; | ||
525 | struct btrfs_shared_data_ref *sref; | ||
526 | u32 count; | ||
527 | int i = 0, tree_block_level = 0, ret; | ||
528 | struct btrfs_key key; | ||
529 | int nritems = btrfs_header_nritems(leaf); | ||
530 | |||
531 | for (i = 0; i < nritems; i++) { | ||
532 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
533 | switch (key.type) { | ||
534 | case BTRFS_EXTENT_ITEM_KEY: | ||
535 | *num_bytes = key.offset; | ||
536 | case BTRFS_METADATA_ITEM_KEY: | ||
537 | *bytenr = key.objectid; | ||
538 | ret = process_extent_item(fs_info, path, &key, i, | ||
539 | &tree_block_level); | ||
540 | break; | ||
541 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
542 | ret = add_tree_block(fs_info, key.offset, 0, | ||
543 | key.objectid, tree_block_level); | ||
544 | break; | ||
545 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
546 | ret = add_tree_block(fs_info, 0, key.offset, | ||
547 | key.objectid, tree_block_level); | ||
548 | break; | ||
549 | case BTRFS_EXTENT_DATA_REF_KEY: | ||
550 | dref = btrfs_item_ptr(leaf, i, | ||
551 | struct btrfs_extent_data_ref); | ||
552 | ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, | ||
553 | *num_bytes); | ||
554 | break; | ||
555 | case BTRFS_SHARED_DATA_REF_KEY: | ||
556 | sref = btrfs_item_ptr(leaf, i, | ||
557 | struct btrfs_shared_data_ref); | ||
558 | count = btrfs_shared_data_ref_count(leaf, sref); | ||
559 | ret = add_shared_data_ref(fs_info, key.offset, count, | ||
560 | *bytenr, *num_bytes); | ||
561 | break; | ||
562 | default: | ||
563 | break; | ||
564 | } | ||
565 | if (ret) | ||
566 | break; | ||
567 | } | ||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | /* Walk down to the leaf from the given level */ | ||
572 | static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, | ||
573 | int level, u64 *bytenr, u64 *num_bytes) | ||
574 | { | ||
575 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
576 | struct extent_buffer *eb; | ||
577 | u64 block_bytenr, gen; | ||
578 | int ret = 0; | ||
579 | |||
580 | while (level >= 0) { | ||
581 | if (level) { | ||
582 | block_bytenr = btrfs_node_blockptr(path->nodes[level], | ||
583 | path->slots[level]); | ||
584 | gen = btrfs_node_ptr_generation(path->nodes[level], | ||
585 | path->slots[level]); | ||
586 | eb = read_tree_block(fs_info, block_bytenr, gen); | ||
587 | if (IS_ERR(eb)) | ||
588 | return PTR_ERR(eb); | ||
589 | if (!extent_buffer_uptodate(eb)) { | ||
590 | free_extent_buffer(eb); | ||
591 | return -EIO; | ||
592 | } | ||
593 | btrfs_tree_read_lock(eb); | ||
594 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
595 | path->nodes[level-1] = eb; | ||
596 | path->slots[level-1] = 0; | ||
597 | path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING; | ||
598 | } else { | ||
599 | ret = process_leaf(root, path, bytenr, num_bytes); | ||
600 | if (ret) | ||
601 | break; | ||
602 | } | ||
603 | level--; | ||
604 | } | ||
605 | return ret; | ||
606 | } | ||
607 | |||
608 | /* Walk up to the next node that needs to be processed */ | ||
609 | static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, | ||
610 | int *level) | ||
611 | { | ||
612 | int l; | ||
613 | |||
614 | for (l = 0; l < BTRFS_MAX_LEVEL; l++) { | ||
615 | if (!path->nodes[l]) | ||
616 | continue; | ||
617 | if (l) { | ||
618 | path->slots[l]++; | ||
619 | if (path->slots[l] < | ||
620 | btrfs_header_nritems(path->nodes[l])) { | ||
621 | *level = l; | ||
622 | return 0; | ||
623 | } | ||
624 | } | ||
625 | btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); | ||
626 | free_extent_buffer(path->nodes[l]); | ||
627 | path->nodes[l] = NULL; | ||
628 | path->slots[l] = 0; | ||
629 | path->locks[l] = 0; | ||
630 | } | ||
631 | |||
632 | return 1; | ||
633 | } | ||
634 | |||
635 | static void dump_ref_action(struct btrfs_fs_info *fs_info, | ||
636 | struct ref_action *ra) | ||
637 | { | ||
638 | btrfs_err(fs_info, | ||
639 | " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", | ||
640 | ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, | ||
641 | ra->ref.owner, ra->ref.offset, ra->ref.num_refs); | ||
642 | __print_stack_trace(fs_info, ra); | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Dumps all the information from the block entry to printk, it's going to be | ||
647 | * awesome. | ||
648 | */ | ||
649 | static void dump_block_entry(struct btrfs_fs_info *fs_info, | ||
650 | struct block_entry *be) | ||
651 | { | ||
652 | struct ref_entry *ref; | ||
653 | struct root_entry *re; | ||
654 | struct ref_action *ra; | ||
655 | struct rb_node *n; | ||
656 | |||
657 | btrfs_err(fs_info, | ||
658 | "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", | ||
659 | be->bytenr, be->len, be->num_refs, be->metadata, | ||
660 | be->from_disk); | ||
661 | |||
662 | for (n = rb_first(&be->refs); n; n = rb_next(n)) { | ||
663 | ref = rb_entry(n, struct ref_entry, node); | ||
664 | btrfs_err(fs_info, | ||
665 | " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", | ||
666 | ref->root_objectid, ref->parent, ref->owner, | ||
667 | ref->offset, ref->num_refs); | ||
668 | } | ||
669 | |||
670 | for (n = rb_first(&be->roots); n; n = rb_next(n)) { | ||
671 | re = rb_entry(n, struct root_entry, node); | ||
672 | btrfs_err(fs_info, " root entry %llu, num_refs %llu", | ||
673 | re->root_objectid, re->num_refs); | ||
674 | } | ||
675 | |||
676 | list_for_each_entry(ra, &be->actions, list) | ||
677 | dump_ref_action(fs_info, ra); | ||
678 | } | ||
679 | |||
680 | /* | ||
681 | * btrfs_ref_tree_mod: called when we modify a ref for a bytenr | ||
682 | * @root: the root we are making this modification from. | ||
683 | * @bytenr: the bytenr we are modifying. | ||
684 | * @num_bytes: number of bytes. | ||
685 | * @parent: the parent bytenr. | ||
686 | * @ref_root: the original root owner of the bytenr. | ||
687 | * @owner: level in the case of metadata, inode in the case of data. | ||
688 | * @offset: 0 for metadata, file offset for data. | ||
689 | * @action: the action that we are doing, this is the same as the delayed ref | ||
690 | * action. | ||
691 | * | ||
692 | * This will add an action item to the given bytenr and do sanity checks to make | ||
693 | * sure we haven't messed something up. If we are making a new allocation and | ||
694 | * this block entry has history we will delete all previous actions as long as | ||
695 | * our sanity checks pass as they are no longer needed. | ||
696 | */ | ||
697 | int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, | ||
698 | u64 parent, u64 ref_root, u64 owner, u64 offset, | ||
699 | int action) | ||
700 | { | ||
701 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
702 | struct ref_entry *ref = NULL, *exist; | ||
703 | struct ref_action *ra = NULL; | ||
704 | struct block_entry *be = NULL; | ||
705 | struct root_entry *re = NULL; | ||
706 | int ret = 0; | ||
707 | bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; | ||
708 | |||
709 | if (!btrfs_test_opt(root->fs_info, REF_VERIFY)) | ||
710 | return 0; | ||
711 | |||
712 | ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); | ||
713 | ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); | ||
714 | if (!ra || !ref) { | ||
715 | kfree(ref); | ||
716 | kfree(ra); | ||
717 | ret = -ENOMEM; | ||
718 | goto out; | ||
719 | } | ||
720 | |||
721 | if (parent) { | ||
722 | ref->parent = parent; | ||
723 | } else { | ||
724 | ref->root_objectid = ref_root; | ||
725 | ref->owner = owner; | ||
726 | ref->offset = offset; | ||
727 | } | ||
728 | ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; | ||
729 | |||
730 | memcpy(&ra->ref, ref, sizeof(struct ref_entry)); | ||
731 | /* | ||
732 | * Save the extra info from the delayed ref in the ref action to make it | ||
733 | * easier to figure out what is happening. The real ref's we add to the | ||
734 | * ref tree need to reflect what we save on disk so it matches any | ||
735 | * on-disk refs we pre-loaded. | ||
736 | */ | ||
737 | ra->ref.owner = owner; | ||
738 | ra->ref.offset = offset; | ||
739 | ra->ref.root_objectid = ref_root; | ||
740 | __save_stack_trace(ra); | ||
741 | |||
742 | INIT_LIST_HEAD(&ra->list); | ||
743 | ra->action = action; | ||
744 | ra->root = root->objectid; | ||
745 | |||
746 | /* | ||
747 | * This is an allocation, preallocate the block_entry in case we haven't | ||
748 | * used it before. | ||
749 | */ | ||
750 | ret = -EINVAL; | ||
751 | if (action == BTRFS_ADD_DELAYED_EXTENT) { | ||
752 | /* | ||
753 | * For subvol_create we'll just pass in whatever the parent root | ||
754 | * is and the new root objectid, so let's not treat the passed | ||
755 | * in root as if it really has a ref for this bytenr. | ||
756 | */ | ||
757 | be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root); | ||
758 | if (IS_ERR(be)) { | ||
759 | kfree(ra); | ||
760 | ret = PTR_ERR(be); | ||
761 | goto out; | ||
762 | } | ||
763 | be->num_refs++; | ||
764 | if (metadata) | ||
765 | be->metadata = 1; | ||
766 | |||
767 | if (be->num_refs != 1) { | ||
768 | btrfs_err(fs_info, | ||
769 | "re-allocated a block that still has references to it!"); | ||
770 | dump_block_entry(fs_info, be); | ||
771 | dump_ref_action(fs_info, ra); | ||
772 | goto out_unlock; | ||
773 | } | ||
774 | |||
775 | while (!list_empty(&be->actions)) { | ||
776 | struct ref_action *tmp; | ||
777 | |||
778 | tmp = list_first_entry(&be->actions, struct ref_action, | ||
779 | list); | ||
780 | list_del(&tmp->list); | ||
781 | kfree(tmp); | ||
782 | } | ||
783 | } else { | ||
784 | struct root_entry *tmp; | ||
785 | |||
786 | if (!parent) { | ||
787 | re = kmalloc(sizeof(struct root_entry), GFP_NOFS); | ||
788 | if (!re) { | ||
789 | kfree(ref); | ||
790 | kfree(ra); | ||
791 | ret = -ENOMEM; | ||
792 | goto out; | ||
793 | } | ||
794 | /* | ||
795 | * This is the root that is modifying us, so it's the | ||
796 | * one we want to lookup below when we modify the | ||
797 | * re->num_refs. | ||
798 | */ | ||
799 | ref_root = root->objectid; | ||
800 | re->root_objectid = root->objectid; | ||
801 | re->num_refs = 0; | ||
802 | } | ||
803 | |||
804 | spin_lock(&root->fs_info->ref_verify_lock); | ||
805 | be = lookup_block_entry(&root->fs_info->block_tree, bytenr); | ||
806 | if (!be) { | ||
807 | btrfs_err(fs_info, | ||
808 | "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", | ||
809 | action, (unsigned long long)bytenr, | ||
810 | (unsigned long long)num_bytes); | ||
811 | dump_ref_action(fs_info, ra); | ||
812 | kfree(ref); | ||
813 | kfree(ra); | ||
814 | goto out_unlock; | ||
815 | } | ||
816 | |||
817 | if (!parent) { | ||
818 | tmp = insert_root_entry(&be->roots, re); | ||
819 | if (tmp) { | ||
820 | kfree(re); | ||
821 | re = tmp; | ||
822 | } | ||
823 | } | ||
824 | } | ||
825 | |||
826 | exist = insert_ref_entry(&be->refs, ref); | ||
827 | if (exist) { | ||
828 | if (action == BTRFS_DROP_DELAYED_REF) { | ||
829 | if (exist->num_refs == 0) { | ||
830 | btrfs_err(fs_info, | ||
831 | "dropping a ref for a existing root that doesn't have a ref on the block"); | ||
832 | dump_block_entry(fs_info, be); | ||
833 | dump_ref_action(fs_info, ra); | ||
834 | kfree(ra); | ||
835 | goto out_unlock; | ||
836 | } | ||
837 | exist->num_refs--; | ||
838 | if (exist->num_refs == 0) { | ||
839 | rb_erase(&exist->node, &be->refs); | ||
840 | kfree(exist); | ||
841 | } | ||
842 | } else if (!be->metadata) { | ||
843 | exist->num_refs++; | ||
844 | } else { | ||
845 | btrfs_err(fs_info, | ||
846 | "attempting to add another ref for an existing ref on a tree block"); | ||
847 | dump_block_entry(fs_info, be); | ||
848 | dump_ref_action(fs_info, ra); | ||
849 | kfree(ra); | ||
850 | goto out_unlock; | ||
851 | } | ||
852 | kfree(ref); | ||
853 | } else { | ||
854 | if (action == BTRFS_DROP_DELAYED_REF) { | ||
855 | btrfs_err(fs_info, | ||
856 | "dropping a ref for a root that doesn't have a ref on the block"); | ||
857 | dump_block_entry(fs_info, be); | ||
858 | dump_ref_action(fs_info, ra); | ||
859 | kfree(ra); | ||
860 | goto out_unlock; | ||
861 | } | ||
862 | } | ||
863 | |||
864 | if (!parent && !re) { | ||
865 | re = lookup_root_entry(&be->roots, ref_root); | ||
866 | if (!re) { | ||
867 | /* | ||
868 | * This shouldn't happen because we will add our re | ||
869 | * above when we lookup the be with !parent, but just in | ||
870 | * case catch this case so we don't panic because I | ||
871 | * didn't thik of some other corner case. | ||
872 | */ | ||
873 | btrfs_err(fs_info, "failed to find root %llu for %llu", | ||
874 | root->objectid, be->bytenr); | ||
875 | dump_block_entry(fs_info, be); | ||
876 | dump_ref_action(fs_info, ra); | ||
877 | kfree(ra); | ||
878 | goto out_unlock; | ||
879 | } | ||
880 | } | ||
881 | if (action == BTRFS_DROP_DELAYED_REF) { | ||
882 | if (re) | ||
883 | re->num_refs--; | ||
884 | be->num_refs--; | ||
885 | } else if (action == BTRFS_ADD_DELAYED_REF) { | ||
886 | be->num_refs++; | ||
887 | if (re) | ||
888 | re->num_refs++; | ||
889 | } | ||
890 | list_add_tail(&ra->list, &be->actions); | ||
891 | ret = 0; | ||
892 | out_unlock: | ||
893 | spin_unlock(&root->fs_info->ref_verify_lock); | ||
894 | out: | ||
895 | if (ret) | ||
896 | btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); | ||
897 | return ret; | ||
898 | } | ||
899 | |||
900 | /* Free up the ref cache */ | ||
901 | void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) | ||
902 | { | ||
903 | struct block_entry *be; | ||
904 | struct rb_node *n; | ||
905 | |||
906 | if (!btrfs_test_opt(fs_info, REF_VERIFY)) | ||
907 | return; | ||
908 | |||
909 | spin_lock(&fs_info->ref_verify_lock); | ||
910 | while ((n = rb_first(&fs_info->block_tree))) { | ||
911 | be = rb_entry(n, struct block_entry, node); | ||
912 | rb_erase(&be->node, &fs_info->block_tree); | ||
913 | free_block_entry(be); | ||
914 | cond_resched_lock(&fs_info->ref_verify_lock); | ||
915 | } | ||
916 | spin_unlock(&fs_info->ref_verify_lock); | ||
917 | } | ||
918 | |||
919 | void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, | ||
920 | u64 len) | ||
921 | { | ||
922 | struct block_entry *be = NULL, *entry; | ||
923 | struct rb_node *n; | ||
924 | |||
925 | if (!btrfs_test_opt(fs_info, REF_VERIFY)) | ||
926 | return; | ||
927 | |||
928 | spin_lock(&fs_info->ref_verify_lock); | ||
929 | n = fs_info->block_tree.rb_node; | ||
930 | while (n) { | ||
931 | entry = rb_entry(n, struct block_entry, node); | ||
932 | if (entry->bytenr < start) { | ||
933 | n = n->rb_right; | ||
934 | } else if (entry->bytenr > start) { | ||
935 | n = n->rb_left; | ||
936 | } else { | ||
937 | be = entry; | ||
938 | break; | ||
939 | } | ||
940 | /* We want to get as close to start as possible */ | ||
941 | if (be == NULL || | ||
942 | (entry->bytenr < start && be->bytenr > start) || | ||
943 | (entry->bytenr < start && entry->bytenr > be->bytenr)) | ||
944 | be = entry; | ||
945 | } | ||
946 | |||
947 | /* | ||
948 | * Could have an empty block group, maybe have something to check for | ||
949 | * this case to verify we were actually empty? | ||
950 | */ | ||
951 | if (!be) { | ||
952 | spin_unlock(&fs_info->ref_verify_lock); | ||
953 | return; | ||
954 | } | ||
955 | |||
956 | n = &be->node; | ||
957 | while (n) { | ||
958 | be = rb_entry(n, struct block_entry, node); | ||
959 | n = rb_next(n); | ||
960 | if (be->bytenr < start && be->bytenr + be->len > start) { | ||
961 | btrfs_err(fs_info, | ||
962 | "block entry overlaps a block group [%llu,%llu]!", | ||
963 | start, len); | ||
964 | dump_block_entry(fs_info, be); | ||
965 | continue; | ||
966 | } | ||
967 | if (be->bytenr < start) | ||
968 | continue; | ||
969 | if (be->bytenr >= start + len) | ||
970 | break; | ||
971 | if (be->bytenr + be->len > start + len) { | ||
972 | btrfs_err(fs_info, | ||
973 | "block entry overlaps a block group [%llu,%llu]!", | ||
974 | start, len); | ||
975 | dump_block_entry(fs_info, be); | ||
976 | } | ||
977 | rb_erase(&be->node, &fs_info->block_tree); | ||
978 | free_block_entry(be); | ||
979 | } | ||
980 | spin_unlock(&fs_info->ref_verify_lock); | ||
981 | } | ||
982 | |||
983 | /* Walk down all roots and build the ref tree, meant to be called at mount */ | ||
984 | int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) | ||
985 | { | ||
986 | struct btrfs_path *path; | ||
987 | struct btrfs_root *root; | ||
988 | struct extent_buffer *eb; | ||
989 | u64 bytenr = 0, num_bytes = 0; | ||
990 | int ret, level; | ||
991 | |||
992 | if (!btrfs_test_opt(fs_info, REF_VERIFY)) | ||
993 | return 0; | ||
994 | |||
995 | path = btrfs_alloc_path(); | ||
996 | if (!path) | ||
997 | return -ENOMEM; | ||
998 | |||
999 | eb = btrfs_read_lock_root_node(fs_info->extent_root); | ||
1000 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
1001 | level = btrfs_header_level(eb); | ||
1002 | path->nodes[level] = eb; | ||
1003 | path->slots[level] = 0; | ||
1004 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; | ||
1005 | |||
1006 | while (1) { | ||
1007 | /* | ||
1008 | * We have to keep track of the bytenr/num_bytes we last hit | ||
1009 | * because we could have run out of space for an inline ref, and | ||
1010 | * would have had to added a ref key item which may appear on a | ||
1011 | * different leaf from the original extent item. | ||
1012 | */ | ||
1013 | ret = walk_down_tree(fs_info->extent_root, path, level, | ||
1014 | &bytenr, &num_bytes); | ||
1015 | if (ret) | ||
1016 | break; | ||
1017 | ret = walk_up_tree(root, path, &level); | ||
1018 | if (ret < 0) | ||
1019 | break; | ||
1020 | if (ret > 0) { | ||
1021 | ret = 0; | ||
1022 | break; | ||
1023 | } | ||
1024 | } | ||
1025 | if (ret) { | ||
1026 | btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); | ||
1027 | btrfs_free_ref_cache(fs_info); | ||
1028 | } | ||
1029 | btrfs_free_path(path); | ||
1030 | return ret; | ||
1031 | } | ||