aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ref-verify.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2017-09-29 15:43:50 -0400
committerDavid Sterba <dsterba@suse.com>2017-10-30 07:28:00 -0400
commitfd708b81d972a0714b02a60eb4792fdbf15868c4 (patch)
tree218197c736b2e9755f3f31796f46102a56400082 /fs/btrfs/ref-verify.c
parent84f7d8e6242ceb377c7af10a7133c653cc7fea5f (diff)
Btrfs: add a extent ref verify tool
We were having corruption issues that were tied back to problems with the extent tree. In order to track them down I built this tool to try and find the culprit, which was pretty successful. If you compile with this tool on it will live verify every ref update that the fs makes and make sure it is consistent and valid. I've run this through with xfstests and haven't gotten any false positives. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Reviewed-by: David Sterba <dsterba@suse.com> [ update error messages, add fixup from Dan Carpenter to handle errors of read_tree_block ] Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/ref-verify.c')
-rw-r--r--fs/btrfs/ref-verify.c1031
1 files changed, 1031 insertions, 0 deletions
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
new file mode 100644
index 000000000000..34878699d363
--- /dev/null
+++ b/fs/btrfs/ref-verify.c
@@ -0,0 +1,1031 @@
1/*
2 * Copyright (C) 2014 Facebook. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/stacktrace.h>
21#include "ctree.h"
22#include "disk-io.h"
23#include "locking.h"
24#include "delayed-ref.h"
25#include "ref-verify.h"
26
27/*
28 * Used to keep track the roots and number of refs each root has for a given
29 * bytenr. This just tracks the number of direct references, no shared
30 * references.
31 */
32struct root_entry {
33 u64 root_objectid;
34 u64 num_refs;
35 struct rb_node node;
36};
37
38/*
39 * These are meant to represent what should exist in the extent tree, these can
40 * be used to verify the extent tree is consistent as these should all match
41 * what the extent tree says.
42 */
43struct ref_entry {
44 u64 root_objectid;
45 u64 parent;
46 u64 owner;
47 u64 offset;
48 u64 num_refs;
49 struct rb_node node;
50};
51
52#define MAX_TRACE 16
53
54/*
55 * Whenever we add/remove a reference we record the action. The action maps
56 * back to the delayed ref action. We hold the ref we are changing in the
57 * action so we can account for the history properly, and we record the root we
58 * were called with since it could be different from ref_root. We also store
59 * stack traces because thats how I roll.
60 */
61struct ref_action {
62 int action;
63 u64 root;
64 struct ref_entry ref;
65 struct list_head list;
66 unsigned long trace[MAX_TRACE];
67 unsigned int trace_len;
68};
69
70/*
71 * One of these for every block we reference, it holds the roots and references
72 * to it as well as all of the ref actions that have occured to it. We never
73 * free it until we unmount the file system in order to make sure re-allocations
74 * are happening properly.
75 */
76struct block_entry {
77 u64 bytenr;
78 u64 len;
79 u64 num_refs;
80 int metadata;
81 int from_disk;
82 struct rb_root roots;
83 struct rb_root refs;
84 struct rb_node node;
85 struct list_head actions;
86};
87
88static struct block_entry *insert_block_entry(struct rb_root *root,
89 struct block_entry *be)
90{
91 struct rb_node **p = &root->rb_node;
92 struct rb_node *parent_node = NULL;
93 struct block_entry *entry;
94
95 while (*p) {
96 parent_node = *p;
97 entry = rb_entry(parent_node, struct block_entry, node);
98 if (entry->bytenr > be->bytenr)
99 p = &(*p)->rb_left;
100 else if (entry->bytenr < be->bytenr)
101 p = &(*p)->rb_right;
102 else
103 return entry;
104 }
105
106 rb_link_node(&be->node, parent_node, p);
107 rb_insert_color(&be->node, root);
108 return NULL;
109}
110
111static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
112{
113 struct rb_node *n;
114 struct block_entry *entry = NULL;
115
116 n = root->rb_node;
117 while (n) {
118 entry = rb_entry(n, struct block_entry, node);
119 if (entry->bytenr < bytenr)
120 n = n->rb_right;
121 else if (entry->bytenr > bytenr)
122 n = n->rb_left;
123 else
124 return entry;
125 }
126 return NULL;
127}
128
129static struct root_entry *insert_root_entry(struct rb_root *root,
130 struct root_entry *re)
131{
132 struct rb_node **p = &root->rb_node;
133 struct rb_node *parent_node = NULL;
134 struct root_entry *entry;
135
136 while (*p) {
137 parent_node = *p;
138 entry = rb_entry(parent_node, struct root_entry, node);
139 if (entry->root_objectid > re->root_objectid)
140 p = &(*p)->rb_left;
141 else if (entry->root_objectid < re->root_objectid)
142 p = &(*p)->rb_right;
143 else
144 return entry;
145 }
146
147 rb_link_node(&re->node, parent_node, p);
148 rb_insert_color(&re->node, root);
149 return NULL;
150
151}
152
153static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
154{
155 if (ref1->root_objectid < ref2->root_objectid)
156 return -1;
157 if (ref1->root_objectid > ref2->root_objectid)
158 return 1;
159 if (ref1->parent < ref2->parent)
160 return -1;
161 if (ref1->parent > ref2->parent)
162 return 1;
163 if (ref1->owner < ref2->owner)
164 return -1;
165 if (ref1->owner > ref2->owner)
166 return 1;
167 if (ref1->offset < ref2->offset)
168 return -1;
169 if (ref1->offset > ref2->offset)
170 return 1;
171 return 0;
172}
173
174static struct ref_entry *insert_ref_entry(struct rb_root *root,
175 struct ref_entry *ref)
176{
177 struct rb_node **p = &root->rb_node;
178 struct rb_node *parent_node = NULL;
179 struct ref_entry *entry;
180 int cmp;
181
182 while (*p) {
183 parent_node = *p;
184 entry = rb_entry(parent_node, struct ref_entry, node);
185 cmp = comp_refs(entry, ref);
186 if (cmp > 0)
187 p = &(*p)->rb_left;
188 else if (cmp < 0)
189 p = &(*p)->rb_right;
190 else
191 return entry;
192 }
193
194 rb_link_node(&ref->node, parent_node, p);
195 rb_insert_color(&ref->node, root);
196 return NULL;
197
198}
199
200static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
201{
202 struct rb_node *n;
203 struct root_entry *entry = NULL;
204
205 n = root->rb_node;
206 while (n) {
207 entry = rb_entry(n, struct root_entry, node);
208 if (entry->root_objectid < objectid)
209 n = n->rb_right;
210 else if (entry->root_objectid > objectid)
211 n = n->rb_left;
212 else
213 return entry;
214 }
215 return NULL;
216}
217
218#ifdef CONFIG_STACKTRACE
219static void __save_stack_trace(struct ref_action *ra)
220{
221 struct stack_trace stack_trace;
222
223 stack_trace.max_entries = MAX_TRACE;
224 stack_trace.nr_entries = 0;
225 stack_trace.entries = ra->trace;
226 stack_trace.skip = 2;
227 save_stack_trace(&stack_trace);
228 ra->trace_len = stack_trace.nr_entries;
229}
230
231static void __print_stack_trace(struct btrfs_fs_info *fs_info,
232 struct ref_action *ra)
233{
234 struct stack_trace trace;
235
236 if (ra->trace_len == 0) {
237 btrfs_err(fs_info, " ref-verify: no stacktrace");
238 return;
239 }
240 trace.nr_entries = ra->trace_len;
241 trace.entries = ra->trace;
242 print_stack_trace(&trace, 2);
243}
244#else
245static void inline __save_stack_trace(struct ref_action *ra)
246{
247}
248
249static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
250 struct ref_action *ra)
251{
252 btrfs_err(fs_info, " ref-verify: no stacktrace support");
253}
254#endif
255
256static void free_block_entry(struct block_entry *be)
257{
258 struct root_entry *re;
259 struct ref_entry *ref;
260 struct ref_action *ra;
261 struct rb_node *n;
262
263 while ((n = rb_first(&be->roots))) {
264 re = rb_entry(n, struct root_entry, node);
265 rb_erase(&re->node, &be->roots);
266 kfree(re);
267 }
268
269 while((n = rb_first(&be->refs))) {
270 ref = rb_entry(n, struct ref_entry, node);
271 rb_erase(&ref->node, &be->refs);
272 kfree(ref);
273 }
274
275 while (!list_empty(&be->actions)) {
276 ra = list_first_entry(&be->actions, struct ref_action,
277 list);
278 list_del(&ra->list);
279 kfree(ra);
280 }
281 kfree(be);
282}
283
284static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
285 u64 bytenr, u64 len,
286 u64 root_objectid)
287{
288 struct block_entry *be = NULL, *exist;
289 struct root_entry *re = NULL;
290
291 re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
292 be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
293 if (!be || !re) {
294 kfree(re);
295 kfree(be);
296 return ERR_PTR(-ENOMEM);
297 }
298 be->bytenr = bytenr;
299 be->len = len;
300
301 re->root_objectid = root_objectid;
302 re->num_refs = 0;
303
304 spin_lock(&fs_info->ref_verify_lock);
305 exist = insert_block_entry(&fs_info->block_tree, be);
306 if (exist) {
307 if (root_objectid) {
308 struct root_entry *exist_re;
309
310 exist_re = insert_root_entry(&exist->roots, re);
311 if (exist_re)
312 kfree(re);
313 }
314 kfree(be);
315 return exist;
316 }
317
318 be->num_refs = 0;
319 be->metadata = 0;
320 be->from_disk = 0;
321 be->roots = RB_ROOT;
322 be->refs = RB_ROOT;
323 INIT_LIST_HEAD(&be->actions);
324 if (root_objectid)
325 insert_root_entry(&be->roots, re);
326 else
327 kfree(re);
328 return be;
329}
330
331static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
332 u64 parent, u64 bytenr, int level)
333{
334 struct block_entry *be;
335 struct root_entry *re;
336 struct ref_entry *ref = NULL, *exist;
337
338 ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
339 if (!ref)
340 return -ENOMEM;
341
342 if (parent)
343 ref->root_objectid = 0;
344 else
345 ref->root_objectid = ref_root;
346 ref->parent = parent;
347 ref->owner = level;
348 ref->offset = 0;
349 ref->num_refs = 1;
350
351 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
352 if (IS_ERR(be)) {
353 kfree(ref);
354 return PTR_ERR(be);
355 }
356 be->num_refs++;
357 be->from_disk = 1;
358 be->metadata = 1;
359
360 if (!parent) {
361 ASSERT(ref_root);
362 re = lookup_root_entry(&be->roots, ref_root);
363 ASSERT(re);
364 re->num_refs++;
365 }
366 exist = insert_ref_entry(&be->refs, ref);
367 if (exist) {
368 exist->num_refs++;
369 kfree(ref);
370 }
371 spin_unlock(&fs_info->ref_verify_lock);
372
373 return 0;
374}
375
376static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
377 u64 parent, u32 num_refs, u64 bytenr,
378 u64 num_bytes)
379{
380 struct block_entry *be;
381 struct ref_entry *ref;
382
383 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
384 if (!ref)
385 return -ENOMEM;
386 be = add_block_entry(fs_info, bytenr, num_bytes, 0);
387 if (IS_ERR(be)) {
388 kfree(ref);
389 return PTR_ERR(be);
390 }
391 be->num_refs += num_refs;
392
393 ref->parent = parent;
394 ref->num_refs = num_refs;
395 if (insert_ref_entry(&be->refs, ref)) {
396 spin_unlock(&fs_info->ref_verify_lock);
397 btrfs_err(fs_info, "existing shared ref when reading from disk?");
398 kfree(ref);
399 return -EINVAL;
400 }
401 spin_unlock(&fs_info->ref_verify_lock);
402 return 0;
403}
404
405static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
406 struct extent_buffer *leaf,
407 struct btrfs_extent_data_ref *dref,
408 u64 bytenr, u64 num_bytes)
409{
410 struct block_entry *be;
411 struct ref_entry *ref;
412 struct root_entry *re;
413 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
414 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
415 u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
416 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
417
418 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
419 if (!ref)
420 return -ENOMEM;
421 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
422 if (IS_ERR(be)) {
423 kfree(ref);
424 return PTR_ERR(be);
425 }
426 be->num_refs += num_refs;
427
428 ref->parent = 0;
429 ref->owner = owner;
430 ref->root_objectid = ref_root;
431 ref->offset = offset;
432 ref->num_refs = num_refs;
433 if (insert_ref_entry(&be->refs, ref)) {
434 spin_unlock(&fs_info->ref_verify_lock);
435 btrfs_err(fs_info, "existing ref when reading from disk?");
436 kfree(ref);
437 return -EINVAL;
438 }
439
440 re = lookup_root_entry(&be->roots, ref_root);
441 if (!re) {
442 spin_unlock(&fs_info->ref_verify_lock);
443 btrfs_err(fs_info, "missing root in new block entry?");
444 return -EINVAL;
445 }
446 re->num_refs += num_refs;
447 spin_unlock(&fs_info->ref_verify_lock);
448 return 0;
449}
450
451static int process_extent_item(struct btrfs_fs_info *fs_info,
452 struct btrfs_path *path, struct btrfs_key *key,
453 int slot, int *tree_block_level)
454{
455 struct btrfs_extent_item *ei;
456 struct btrfs_extent_inline_ref *iref;
457 struct btrfs_extent_data_ref *dref;
458 struct btrfs_shared_data_ref *sref;
459 struct extent_buffer *leaf = path->nodes[0];
460 u32 item_size = btrfs_item_size_nr(leaf, slot);
461 unsigned long end, ptr;
462 u64 offset, flags, count;
463 int type, ret;
464
465 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
466 flags = btrfs_extent_flags(leaf, ei);
467
468 if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
469 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
470 struct btrfs_tree_block_info *info;
471
472 info = (struct btrfs_tree_block_info *)(ei + 1);
473 *tree_block_level = btrfs_tree_block_level(leaf, info);
474 iref = (struct btrfs_extent_inline_ref *)(info + 1);
475 } else {
476 if (key->type == BTRFS_METADATA_ITEM_KEY)
477 *tree_block_level = key->offset;
478 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
479 }
480
481 ptr = (unsigned long)iref;
482 end = (unsigned long)ei + item_size;
483 while (ptr < end) {
484 iref = (struct btrfs_extent_inline_ref *)ptr;
485 type = btrfs_extent_inline_ref_type(leaf, iref);
486 offset = btrfs_extent_inline_ref_offset(leaf, iref);
487 switch (type) {
488 case BTRFS_TREE_BLOCK_REF_KEY:
489 ret = add_tree_block(fs_info, offset, 0, key->objectid,
490 *tree_block_level);
491 break;
492 case BTRFS_SHARED_BLOCK_REF_KEY:
493 ret = add_tree_block(fs_info, 0, offset, key->objectid,
494 *tree_block_level);
495 break;
496 case BTRFS_EXTENT_DATA_REF_KEY:
497 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
498 ret = add_extent_data_ref(fs_info, leaf, dref,
499 key->objectid, key->offset);
500 break;
501 case BTRFS_SHARED_DATA_REF_KEY:
502 sref = (struct btrfs_shared_data_ref *)(iref + 1);
503 count = btrfs_shared_data_ref_count(leaf, sref);
504 ret = add_shared_data_ref(fs_info, offset, count,
505 key->objectid, key->offset);
506 break;
507 default:
508 btrfs_err(fs_info, "invalid key type in iref");
509 ret = -EINVAL;
510 break;
511 }
512 if (ret)
513 break;
514 ptr += btrfs_extent_inline_ref_size(type);
515 }
516 return ret;
517}
518
519static int process_leaf(struct btrfs_root *root,
520 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes)
521{
522 struct btrfs_fs_info *fs_info = root->fs_info;
523 struct extent_buffer *leaf = path->nodes[0];
524 struct btrfs_extent_data_ref *dref;
525 struct btrfs_shared_data_ref *sref;
526 u32 count;
527 int i = 0, tree_block_level = 0, ret;
528 struct btrfs_key key;
529 int nritems = btrfs_header_nritems(leaf);
530
531 for (i = 0; i < nritems; i++) {
532 btrfs_item_key_to_cpu(leaf, &key, i);
533 switch (key.type) {
534 case BTRFS_EXTENT_ITEM_KEY:
535 *num_bytes = key.offset;
536 case BTRFS_METADATA_ITEM_KEY:
537 *bytenr = key.objectid;
538 ret = process_extent_item(fs_info, path, &key, i,
539 &tree_block_level);
540 break;
541 case BTRFS_TREE_BLOCK_REF_KEY:
542 ret = add_tree_block(fs_info, key.offset, 0,
543 key.objectid, tree_block_level);
544 break;
545 case BTRFS_SHARED_BLOCK_REF_KEY:
546 ret = add_tree_block(fs_info, 0, key.offset,
547 key.objectid, tree_block_level);
548 break;
549 case BTRFS_EXTENT_DATA_REF_KEY:
550 dref = btrfs_item_ptr(leaf, i,
551 struct btrfs_extent_data_ref);
552 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
553 *num_bytes);
554 break;
555 case BTRFS_SHARED_DATA_REF_KEY:
556 sref = btrfs_item_ptr(leaf, i,
557 struct btrfs_shared_data_ref);
558 count = btrfs_shared_data_ref_count(leaf, sref);
559 ret = add_shared_data_ref(fs_info, key.offset, count,
560 *bytenr, *num_bytes);
561 break;
562 default:
563 break;
564 }
565 if (ret)
566 break;
567 }
568 return ret;
569}
570
571/* Walk down to the leaf from the given level */
572static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
573 int level, u64 *bytenr, u64 *num_bytes)
574{
575 struct btrfs_fs_info *fs_info = root->fs_info;
576 struct extent_buffer *eb;
577 u64 block_bytenr, gen;
578 int ret = 0;
579
580 while (level >= 0) {
581 if (level) {
582 block_bytenr = btrfs_node_blockptr(path->nodes[level],
583 path->slots[level]);
584 gen = btrfs_node_ptr_generation(path->nodes[level],
585 path->slots[level]);
586 eb = read_tree_block(fs_info, block_bytenr, gen);
587 if (IS_ERR(eb))
588 return PTR_ERR(eb);
589 if (!extent_buffer_uptodate(eb)) {
590 free_extent_buffer(eb);
591 return -EIO;
592 }
593 btrfs_tree_read_lock(eb);
594 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
595 path->nodes[level-1] = eb;
596 path->slots[level-1] = 0;
597 path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
598 } else {
599 ret = process_leaf(root, path, bytenr, num_bytes);
600 if (ret)
601 break;
602 }
603 level--;
604 }
605 return ret;
606}
607
608/* Walk up to the next node that needs to be processed */
609static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
610 int *level)
611{
612 int l;
613
614 for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
615 if (!path->nodes[l])
616 continue;
617 if (l) {
618 path->slots[l]++;
619 if (path->slots[l] <
620 btrfs_header_nritems(path->nodes[l])) {
621 *level = l;
622 return 0;
623 }
624 }
625 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
626 free_extent_buffer(path->nodes[l]);
627 path->nodes[l] = NULL;
628 path->slots[l] = 0;
629 path->locks[l] = 0;
630 }
631
632 return 1;
633}
634
635static void dump_ref_action(struct btrfs_fs_info *fs_info,
636 struct ref_action *ra)
637{
638 btrfs_err(fs_info,
639" Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
640 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
641 ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
642 __print_stack_trace(fs_info, ra);
643}
644
645/*
646 * Dumps all the information from the block entry to printk, it's going to be
647 * awesome.
648 */
649static void dump_block_entry(struct btrfs_fs_info *fs_info,
650 struct block_entry *be)
651{
652 struct ref_entry *ref;
653 struct root_entry *re;
654 struct ref_action *ra;
655 struct rb_node *n;
656
657 btrfs_err(fs_info,
658"dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
659 be->bytenr, be->len, be->num_refs, be->metadata,
660 be->from_disk);
661
662 for (n = rb_first(&be->refs); n; n = rb_next(n)) {
663 ref = rb_entry(n, struct ref_entry, node);
664 btrfs_err(fs_info,
665" ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
666 ref->root_objectid, ref->parent, ref->owner,
667 ref->offset, ref->num_refs);
668 }
669
670 for (n = rb_first(&be->roots); n; n = rb_next(n)) {
671 re = rb_entry(n, struct root_entry, node);
672 btrfs_err(fs_info, " root entry %llu, num_refs %llu",
673 re->root_objectid, re->num_refs);
674 }
675
676 list_for_each_entry(ra, &be->actions, list)
677 dump_ref_action(fs_info, ra);
678}
679
680/*
681 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
682 * @root: the root we are making this modification from.
683 * @bytenr: the bytenr we are modifying.
684 * @num_bytes: number of bytes.
685 * @parent: the parent bytenr.
686 * @ref_root: the original root owner of the bytenr.
687 * @owner: level in the case of metadata, inode in the case of data.
688 * @offset: 0 for metadata, file offset for data.
689 * @action: the action that we are doing, this is the same as the delayed ref
690 * action.
691 *
692 * This will add an action item to the given bytenr and do sanity checks to make
693 * sure we haven't messed something up. If we are making a new allocation and
694 * this block entry has history we will delete all previous actions as long as
695 * our sanity checks pass as they are no longer needed.
696 */
697int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
698 u64 parent, u64 ref_root, u64 owner, u64 offset,
699 int action)
700{
701 struct btrfs_fs_info *fs_info = root->fs_info;
702 struct ref_entry *ref = NULL, *exist;
703 struct ref_action *ra = NULL;
704 struct block_entry *be = NULL;
705 struct root_entry *re = NULL;
706 int ret = 0;
707 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
708
709 if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
710 return 0;
711
712 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
713 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
714 if (!ra || !ref) {
715 kfree(ref);
716 kfree(ra);
717 ret = -ENOMEM;
718 goto out;
719 }
720
721 if (parent) {
722 ref->parent = parent;
723 } else {
724 ref->root_objectid = ref_root;
725 ref->owner = owner;
726 ref->offset = offset;
727 }
728 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
729
730 memcpy(&ra->ref, ref, sizeof(struct ref_entry));
731 /*
732 * Save the extra info from the delayed ref in the ref action to make it
733 * easier to figure out what is happening. The real ref's we add to the
734 * ref tree need to reflect what we save on disk so it matches any
735 * on-disk refs we pre-loaded.
736 */
737 ra->ref.owner = owner;
738 ra->ref.offset = offset;
739 ra->ref.root_objectid = ref_root;
740 __save_stack_trace(ra);
741
742 INIT_LIST_HEAD(&ra->list);
743 ra->action = action;
744 ra->root = root->objectid;
745
746 /*
747 * This is an allocation, preallocate the block_entry in case we haven't
748 * used it before.
749 */
750 ret = -EINVAL;
751 if (action == BTRFS_ADD_DELAYED_EXTENT) {
752 /*
753 * For subvol_create we'll just pass in whatever the parent root
754 * is and the new root objectid, so let's not treat the passed
755 * in root as if it really has a ref for this bytenr.
756 */
757 be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
758 if (IS_ERR(be)) {
759 kfree(ra);
760 ret = PTR_ERR(be);
761 goto out;
762 }
763 be->num_refs++;
764 if (metadata)
765 be->metadata = 1;
766
767 if (be->num_refs != 1) {
768 btrfs_err(fs_info,
769 "re-allocated a block that still has references to it!");
770 dump_block_entry(fs_info, be);
771 dump_ref_action(fs_info, ra);
772 goto out_unlock;
773 }
774
775 while (!list_empty(&be->actions)) {
776 struct ref_action *tmp;
777
778 tmp = list_first_entry(&be->actions, struct ref_action,
779 list);
780 list_del(&tmp->list);
781 kfree(tmp);
782 }
783 } else {
784 struct root_entry *tmp;
785
786 if (!parent) {
787 re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
788 if (!re) {
789 kfree(ref);
790 kfree(ra);
791 ret = -ENOMEM;
792 goto out;
793 }
794 /*
795 * This is the root that is modifying us, so it's the
796 * one we want to lookup below when we modify the
797 * re->num_refs.
798 */
799 ref_root = root->objectid;
800 re->root_objectid = root->objectid;
801 re->num_refs = 0;
802 }
803
804 spin_lock(&root->fs_info->ref_verify_lock);
805 be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
806 if (!be) {
807 btrfs_err(fs_info,
808"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
809 action, (unsigned long long)bytenr,
810 (unsigned long long)num_bytes);
811 dump_ref_action(fs_info, ra);
812 kfree(ref);
813 kfree(ra);
814 goto out_unlock;
815 }
816
817 if (!parent) {
818 tmp = insert_root_entry(&be->roots, re);
819 if (tmp) {
820 kfree(re);
821 re = tmp;
822 }
823 }
824 }
825
826 exist = insert_ref_entry(&be->refs, ref);
827 if (exist) {
828 if (action == BTRFS_DROP_DELAYED_REF) {
829 if (exist->num_refs == 0) {
830 btrfs_err(fs_info,
831"dropping a ref for a existing root that doesn't have a ref on the block");
832 dump_block_entry(fs_info, be);
833 dump_ref_action(fs_info, ra);
834 kfree(ra);
835 goto out_unlock;
836 }
837 exist->num_refs--;
838 if (exist->num_refs == 0) {
839 rb_erase(&exist->node, &be->refs);
840 kfree(exist);
841 }
842 } else if (!be->metadata) {
843 exist->num_refs++;
844 } else {
845 btrfs_err(fs_info,
846"attempting to add another ref for an existing ref on a tree block");
847 dump_block_entry(fs_info, be);
848 dump_ref_action(fs_info, ra);
849 kfree(ra);
850 goto out_unlock;
851 }
852 kfree(ref);
853 } else {
854 if (action == BTRFS_DROP_DELAYED_REF) {
855 btrfs_err(fs_info,
856"dropping a ref for a root that doesn't have a ref on the block");
857 dump_block_entry(fs_info, be);
858 dump_ref_action(fs_info, ra);
859 kfree(ra);
860 goto out_unlock;
861 }
862 }
863
864 if (!parent && !re) {
865 re = lookup_root_entry(&be->roots, ref_root);
866 if (!re) {
867 /*
868 * This shouldn't happen because we will add our re
869 * above when we lookup the be with !parent, but just in
870 * case catch this case so we don't panic because I
871 * didn't thik of some other corner case.
872 */
873 btrfs_err(fs_info, "failed to find root %llu for %llu",
874 root->objectid, be->bytenr);
875 dump_block_entry(fs_info, be);
876 dump_ref_action(fs_info, ra);
877 kfree(ra);
878 goto out_unlock;
879 }
880 }
881 if (action == BTRFS_DROP_DELAYED_REF) {
882 if (re)
883 re->num_refs--;
884 be->num_refs--;
885 } else if (action == BTRFS_ADD_DELAYED_REF) {
886 be->num_refs++;
887 if (re)
888 re->num_refs++;
889 }
890 list_add_tail(&ra->list, &be->actions);
891 ret = 0;
892out_unlock:
893 spin_unlock(&root->fs_info->ref_verify_lock);
894out:
895 if (ret)
896 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
897 return ret;
898}
899
900/* Free up the ref cache */
901void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
902{
903 struct block_entry *be;
904 struct rb_node *n;
905
906 if (!btrfs_test_opt(fs_info, REF_VERIFY))
907 return;
908
909 spin_lock(&fs_info->ref_verify_lock);
910 while ((n = rb_first(&fs_info->block_tree))) {
911 be = rb_entry(n, struct block_entry, node);
912 rb_erase(&be->node, &fs_info->block_tree);
913 free_block_entry(be);
914 cond_resched_lock(&fs_info->ref_verify_lock);
915 }
916 spin_unlock(&fs_info->ref_verify_lock);
917}
918
919void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
920 u64 len)
921{
922 struct block_entry *be = NULL, *entry;
923 struct rb_node *n;
924
925 if (!btrfs_test_opt(fs_info, REF_VERIFY))
926 return;
927
928 spin_lock(&fs_info->ref_verify_lock);
929 n = fs_info->block_tree.rb_node;
930 while (n) {
931 entry = rb_entry(n, struct block_entry, node);
932 if (entry->bytenr < start) {
933 n = n->rb_right;
934 } else if (entry->bytenr > start) {
935 n = n->rb_left;
936 } else {
937 be = entry;
938 break;
939 }
940 /* We want to get as close to start as possible */
941 if (be == NULL ||
942 (entry->bytenr < start && be->bytenr > start) ||
943 (entry->bytenr < start && entry->bytenr > be->bytenr))
944 be = entry;
945 }
946
947 /*
948 * Could have an empty block group, maybe have something to check for
949 * this case to verify we were actually empty?
950 */
951 if (!be) {
952 spin_unlock(&fs_info->ref_verify_lock);
953 return;
954 }
955
956 n = &be->node;
957 while (n) {
958 be = rb_entry(n, struct block_entry, node);
959 n = rb_next(n);
960 if (be->bytenr < start && be->bytenr + be->len > start) {
961 btrfs_err(fs_info,
962 "block entry overlaps a block group [%llu,%llu]!",
963 start, len);
964 dump_block_entry(fs_info, be);
965 continue;
966 }
967 if (be->bytenr < start)
968 continue;
969 if (be->bytenr >= start + len)
970 break;
971 if (be->bytenr + be->len > start + len) {
972 btrfs_err(fs_info,
973 "block entry overlaps a block group [%llu,%llu]!",
974 start, len);
975 dump_block_entry(fs_info, be);
976 }
977 rb_erase(&be->node, &fs_info->block_tree);
978 free_block_entry(be);
979 }
980 spin_unlock(&fs_info->ref_verify_lock);
981}
982
983/* Walk down all roots and build the ref tree, meant to be called at mount */
984int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
985{
986 struct btrfs_path *path;
987 struct btrfs_root *root;
988 struct extent_buffer *eb;
989 u64 bytenr = 0, num_bytes = 0;
990 int ret, level;
991
992 if (!btrfs_test_opt(fs_info, REF_VERIFY))
993 return 0;
994
995 path = btrfs_alloc_path();
996 if (!path)
997 return -ENOMEM;
998
999 eb = btrfs_read_lock_root_node(fs_info->extent_root);
1000 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1001 level = btrfs_header_level(eb);
1002 path->nodes[level] = eb;
1003 path->slots[level] = 0;
1004 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
1005
1006 while (1) {
1007 /*
1008 * We have to keep track of the bytenr/num_bytes we last hit
1009 * because we could have run out of space for an inline ref, and
1010 * would have had to added a ref key item which may appear on a
1011 * different leaf from the original extent item.
1012 */
1013 ret = walk_down_tree(fs_info->extent_root, path, level,
1014 &bytenr, &num_bytes);
1015 if (ret)
1016 break;
1017 ret = walk_up_tree(root, path, &level);
1018 if (ret < 0)
1019 break;
1020 if (ret > 0) {
1021 ret = 0;
1022 break;
1023 }
1024 }
1025 if (ret) {
1026 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
1027 btrfs_free_ref_cache(fs_info);
1028 }
1029 btrfs_free_path(path);
1030 return ret;
1031}