aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2011-03-16 13:42:43 -0400
committerJosef Bacik <josef@redhat.com>2011-03-17 14:21:37 -0400
commita826d6dcb32d811b4c81df57a5ef1367516586b0 (patch)
tree4853d5acf6cde4dfb0eeb19f5c74dc3d7934359b /fs/btrfs
parent850265335f792f5d39ab24e5fb7160bac28d77e5 (diff)
Btrfs: check items for correctness as we search
Currently if we have corrupted items things will blow up in spectacular ways. So as we read in blocks and they are leaves, check the entire leaf to make sure all of the items are correct and point to valid parts in the leaf for the item data the are responsible for. If the item is corrupt we will kick back EIO and not read any of the copies since they are likely to not be correct either. This will catch generic corruptions, it will be up to the individual callers of btrfs_search_slot to make sure their items are right. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.c123
-rw-r--r--fs/btrfs/disk-io.c90
-rw-r--r--fs/btrfs/extent-tree.c5
-rw-r--r--fs/btrfs/extent_io.h1
4 files changed, 95 insertions, 124 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b5baff0dccfe..73e53009e126 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -732,122 +732,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
732 return btrfs_item_offset_nr(leaf, nr - 1); 732 return btrfs_item_offset_nr(leaf, nr - 1);
733} 733}
734 734
735/*
736 * extra debugging checks to make sure all the items in a key are
737 * well formed and in the proper order
738 */
739static int check_node(struct btrfs_root *root, struct btrfs_path *path,
740 int level)
741{
742 struct extent_buffer *parent = NULL;
743 struct extent_buffer *node = path->nodes[level];
744 struct btrfs_disk_key parent_key;
745 struct btrfs_disk_key node_key;
746 int parent_slot;
747 int slot;
748 struct btrfs_key cpukey;
749 u32 nritems = btrfs_header_nritems(node);
750
751 if (path->nodes[level + 1])
752 parent = path->nodes[level + 1];
753
754 slot = path->slots[level];
755 BUG_ON(nritems == 0);
756 if (parent) {
757 parent_slot = path->slots[level + 1];
758 btrfs_node_key(parent, &parent_key, parent_slot);
759 btrfs_node_key(node, &node_key, 0);
760 BUG_ON(memcmp(&parent_key, &node_key,
761 sizeof(struct btrfs_disk_key)));
762 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
763 btrfs_header_bytenr(node));
764 }
765 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
766 if (slot != 0) {
767 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
768 btrfs_node_key(node, &node_key, slot);
769 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
770 }
771 if (slot < nritems - 1) {
772 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
773 btrfs_node_key(node, &node_key, slot);
774 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
775 }
776 return 0;
777}
778
779/*
780 * extra checking to make sure all the items in a leaf are
781 * well formed and in the proper order
782 */
783static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
784 int level)
785{
786 struct extent_buffer *leaf = path->nodes[level];
787 struct extent_buffer *parent = NULL;
788 int parent_slot;
789 struct btrfs_key cpukey;
790 struct btrfs_disk_key parent_key;
791 struct btrfs_disk_key leaf_key;
792 int slot = path->slots[0];
793
794 u32 nritems = btrfs_header_nritems(leaf);
795
796 if (path->nodes[level + 1])
797 parent = path->nodes[level + 1];
798
799 if (nritems == 0)
800 return 0;
801
802 if (parent) {
803 parent_slot = path->slots[level + 1];
804 btrfs_node_key(parent, &parent_key, parent_slot);
805 btrfs_item_key(leaf, &leaf_key, 0);
806
807 BUG_ON(memcmp(&parent_key, &leaf_key,
808 sizeof(struct btrfs_disk_key)));
809 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
810 btrfs_header_bytenr(leaf));
811 }
812 if (slot != 0 && slot < nritems - 1) {
813 btrfs_item_key(leaf, &leaf_key, slot);
814 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
815 if (comp_keys(&leaf_key, &cpukey) <= 0) {
816 btrfs_print_leaf(root, leaf);
817 printk(KERN_CRIT "slot %d offset bad key\n", slot);
818 BUG_ON(1);
819 }
820 if (btrfs_item_offset_nr(leaf, slot - 1) !=
821 btrfs_item_end_nr(leaf, slot)) {
822 btrfs_print_leaf(root, leaf);
823 printk(KERN_CRIT "slot %d offset bad\n", slot);
824 BUG_ON(1);
825 }
826 }
827 if (slot < nritems - 1) {
828 btrfs_item_key(leaf, &leaf_key, slot);
829 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
830 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
831 if (btrfs_item_offset_nr(leaf, slot) !=
832 btrfs_item_end_nr(leaf, slot + 1)) {
833 btrfs_print_leaf(root, leaf);
834 printk(KERN_CRIT "slot %d offset bad\n", slot);
835 BUG_ON(1);
836 }
837 }
838 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
839 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
840 return 0;
841}
842
843static noinline int check_block(struct btrfs_root *root,
844 struct btrfs_path *path, int level)
845{
846 return 0;
847 if (level == 0)
848 return check_leaf(root, path, level);
849 return check_node(root, path, level);
850}
851 735
852/* 736/*
853 * search for key in the extent_buffer. The items start at offset p, 737 * search for key in the extent_buffer. The items start at offset p,
@@ -1188,7 +1072,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1188 } 1072 }
1189 } 1073 }
1190 /* double check we haven't messed things up */ 1074 /* double check we haven't messed things up */
1191 check_block(root, path, level);
1192 if (orig_ptr != 1075 if (orig_ptr !=
1193 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1076 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1194 BUG(); 1077 BUG();
@@ -1798,12 +1681,6 @@ cow_done:
1798 if (!cow) 1681 if (!cow)
1799 btrfs_unlock_up_safe(p, level + 1); 1682 btrfs_unlock_up_safe(p, level + 1);
1800 1683
1801 ret = check_block(root, p, level);
1802 if (ret) {
1803 ret = -1;
1804 goto done;
1805 }
1806
1807 ret = bin_search(b, key, level, &slot); 1684 ret = bin_search(b, key, level, &slot);
1808 1685
1809 if (level != 0) { 1686 if (level != 0) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 495b1ac45f8c..9f31e110b481 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -323,6 +323,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
323 int num_copies = 0; 323 int num_copies = 0;
324 int mirror_num = 0; 324 int mirror_num = 0;
325 325
326 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
326 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 327 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
327 while (1) { 328 while (1) {
328 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 329 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@ -331,6 +332,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
331 !verify_parent_transid(io_tree, eb, parent_transid)) 332 !verify_parent_transid(io_tree, eb, parent_transid))
332 return ret; 333 return ret;
333 334
335 /*
336 * This buffer's crc is fine, but its contents are corrupted, so
337 * there is no reason to read the other copies, they won't be
338 * any less wrong.
339 */
340 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
341 return ret;
342
334 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 343 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
335 eb->start, eb->len); 344 eb->start, eb->len);
336 if (num_copies == 1) 345 if (num_copies == 1)
@@ -419,6 +428,73 @@ static int check_tree_block_fsid(struct btrfs_root *root,
419 return ret; 428 return ret;
420} 429}
421 430
431#define CORRUPT(reason, eb, root, slot) \
432 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
433 "root=%llu, slot=%d\n", reason, \
434 (unsigned long long)btrfs_header_bytenr(eb), \
435 (unsigned long long)root->objectid, slot)
436
437static noinline int check_leaf(struct btrfs_root *root,
438 struct extent_buffer *leaf)
439{
440 struct btrfs_key key;
441 struct btrfs_key leaf_key;
442 u32 nritems = btrfs_header_nritems(leaf);
443 int slot;
444
445 if (nritems == 0)
446 return 0;
447
448 /* Check the 0 item */
449 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
450 BTRFS_LEAF_DATA_SIZE(root)) {
451 CORRUPT("invalid item offset size pair", leaf, root, 0);
452 return -EIO;
453 }
454
455 /*
456 * Check to make sure each items keys are in the correct order and their
457 * offsets make sense. We only have to loop through nritems-1 because
458 * we check the current slot against the next slot, which verifies the
459 * next slot's offset+size makes sense and that the current's slot
460 * offset is correct.
461 */
462 for (slot = 0; slot < nritems - 1; slot++) {
463 btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
464 btrfs_item_key_to_cpu(leaf, &key, slot + 1);
465
466 /* Make sure the keys are in the right order */
467 if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
468 CORRUPT("bad key order", leaf, root, slot);
469 return -EIO;
470 }
471
472 /*
473 * Make sure the offset and ends are right, remember that the
474 * item data starts at the end of the leaf and grows towards the
475 * front.
476 */
477 if (btrfs_item_offset_nr(leaf, slot) !=
478 btrfs_item_end_nr(leaf, slot + 1)) {
479 CORRUPT("slot offset bad", leaf, root, slot);
480 return -EIO;
481 }
482
483 /*
484 * Check to make sure that we don't point outside of the leaf,
485 * just incase all the items are consistent to eachother, but
486 * all point outside of the leaf.
487 */
488 if (btrfs_item_end_nr(leaf, slot) >
489 BTRFS_LEAF_DATA_SIZE(root)) {
490 CORRUPT("slot end outside of leaf", leaf, root, slot);
491 return -EIO;
492 }
493 }
494
495 return 0;
496}
497
422#ifdef CONFIG_DEBUG_LOCK_ALLOC 498#ifdef CONFIG_DEBUG_LOCK_ALLOC
423void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) 499void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
424{ 500{
@@ -485,8 +561,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
485 btrfs_set_buffer_lockdep_class(eb, found_level); 561 btrfs_set_buffer_lockdep_class(eb, found_level);
486 562
487 ret = csum_tree_block(root, eb, 1); 563 ret = csum_tree_block(root, eb, 1);
488 if (ret) 564 if (ret) {
489 ret = -EIO; 565 ret = -EIO;
566 goto err;
567 }
568
569 /*
570 * If this is a leaf block and it is corrupt, set the corrupt bit so
571 * that we don't try and read the other copies of this block, just
572 * return -EIO.
573 */
574 if (found_level == 0 && check_leaf(root, eb)) {
575 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
576 ret = -EIO;
577 }
490 578
491 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 579 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
492 end = eb->start + end - 1; 580 end = eb->start + end - 1;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a8f4e8d2ba60..cd794c35a636 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4754,6 +4754,11 @@ pin:
4754 } 4754 }
4755 } 4755 }
4756out: 4756out:
4757 /*
4758 * Deleting the buffer, clear the corrupt flag since it doesn't matter
4759 * anymore.
4760 */
4761 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
4757 btrfs_put_block_group(cache); 4762 btrfs_put_block_group(cache);
4758} 4763}
4759 4764
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 9318dfefd59c..f62c5442835d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -31,6 +31,7 @@
31#define EXTENT_BUFFER_UPTODATE 0 31#define EXTENT_BUFFER_UPTODATE 0
32#define EXTENT_BUFFER_BLOCKING 1 32#define EXTENT_BUFFER_BLOCKING 1
33#define EXTENT_BUFFER_DIRTY 2 33#define EXTENT_BUFFER_DIRTY 2
34#define EXTENT_BUFFER_CORRUPT 3
34 35
35/* these are flags for extent_clear_unlock_delalloc */ 36/* these are flags for extent_clear_unlock_delalloc */
36#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 37#define EXTENT_CLEAR_UNLOCK_PAGE 0x1