ext4: Add a comprehensive block validity check to ext4_get_blocks()

To catch filesystem bugs or corruption which could lead to the filesystem getting severly damaged, this patch adds a facility for tracking all of the filesystem metadata blocks by contiguous regions in a red-black tree. This allows quick searching of the tree to locate extents which might overlap with filesystem metadata blocks. This facility is also used by the multi-block allocator to assure that it is not allocating blocks out of the system zone, as well as by the routines used when reading indirect blocks and extents information from disk to make sure their contents are valid. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
author: Theodore Ts'o <tytso@mit.edu> 2009-05-17 15:38:01 -0400
committer: Theodore Ts'o <tytso@mit.edu> 2009-05-17 15:38:01 -0400
commit: 6fd058f7791087648c683eb8572edf3be3c4c23c (patch)
tree: 0d80791532d2d022c91f20013003716eaf0afb40 /fs/ext4/block_validity.c
parent: 2ac3b6e00acb46406c993d57921f86a594aafe08 (diff)
1 files changed, 244 insertions, 0 deletions
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 000000000000..50784ef07563
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
+/*
+ *  linux/fs/ext4/block_validity.c
+ *
+ * Copyright (C) 2009
+ * Theodore Ts'o (tytso@mit.edu)
+ *
+ * Track which blocks in the filesystem are metadata blocks that
+ * should never be used as data blocks by files or directories.
+ */
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include "ext4.h"
+struct ext4_system_zone {
+        struct rb_node  node;
+        ext4_fsblk_t    start_blk;
+        unsigned int    count;
+};
+static struct kmem_cache *ext4_system_zone_cachep;
+int __init init_ext4_system_zone(void)
+{
+        ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
+                                             SLAB_RECLAIM_ACCOUNT);
+        if (ext4_system_zone_cachep == NULL)
+                return -ENOMEM;
+        return 0;
+}
+void exit_ext4_system_zone(void)
+{
+        kmem_cache_destroy(ext4_system_zone_cachep);
+}
+static inline int can_merge(struct ext4_system_zone *entry1,
+                     struct ext4_system_zone *entry2)
+{
+        if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+                return 1;
+        return 0;
+}
+/*
+ * Mark a range of blocks as belonging to the "system zone" --- that
+ * is, filesystem metadata blocks which should never be used by
+ * inodes.
+ */
+static int add_system_zone(struct ext4_sb_info *sbi,
+                           ext4_fsblk_t start_blk,
+                           unsigned int count)
+{
+        struct ext4_system_zone *new_entry = NULL, *entry;
+        struct rb_node **n = &sbi->system_blks.rb_node, *node;
+        struct rb_node *parent = NULL, *new_node = NULL;
+        while (*n) {
+                parent = *n;
+                entry = rb_entry(parent, struct ext4_system_zone, node);
+                if (start_blk < entry->start_blk)
+                        n = &(*n)->rb_left;
+                else if (start_blk >= (entry->start_blk + entry->count))
+                        n = &(*n)->rb_right;
+                else {
+                        if (start_blk + count > (entry->start_blk + 
+                                                 entry->count))
+                                entry->count = (start_blk + count - 
+                                                entry->start_blk);
+                        new_node = *n;
+                        new_entry = rb_entry(new_node, struct ext4_system_zone,
+                                             node);
+                        break;
+                }
+        }
+        if (!new_entry) {
+                new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+                                             GFP_KERNEL);
+                if (!new_entry)
+                        return -ENOMEM;
+                new_entry->start_blk = start_blk;
+                new_entry->count = count;
+                new_node = &new_entry->node;
+                rb_link_node(new_node, parent, n);
+                rb_insert_color(new_node, &sbi->system_blks);
+        }
+        /* Can we merge to the left? */
+        node = rb_prev(new_node);
+        if (node) {
+                entry = rb_entry(node, struct ext4_system_zone, node);
+                if (can_merge(entry, new_entry)) {
+                        new_entry->start_blk = entry->start_blk;
+                        new_entry->count += entry->count;
+                        rb_erase(node, &sbi->system_blks);
+                        kmem_cache_free(ext4_system_zone_cachep, entry);
+                }
+        }
+        /* Can we merge to the right? */
+        node = rb_next(new_node);
+        if (node) {
+                entry = rb_entry(node, struct ext4_system_zone, node);
+                if (can_merge(new_entry, entry)) {
+                        new_entry->count += entry->count;
+                        rb_erase(node, &sbi->system_blks);
+                        kmem_cache_free(ext4_system_zone_cachep, entry);
+                }
+        }
+        return 0;
+}
+static void debug_print_tree(struct ext4_sb_info *sbi)
+{
+        struct rb_node *node;
+        struct ext4_system_zone *entry;
+        int first = 1;
+        printk(KERN_INFO "System zones: ");
+        node = rb_first(&sbi->system_blks);
+        while (node) {
+                entry = rb_entry(node, struct ext4_system_zone, node);
+                printk("%s%llu-%llu", first ? "" : ", ",
+                       entry->start_blk, entry->start_blk + entry->count - 1);
+                first = 0;
+                node = rb_next(node);
+        }
+        printk("\n");
+}
+int ext4_setup_system_zone(struct super_block *sb)
+{
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        struct ext4_group_desc *gdp;
+        ext4_group_t i;
+        int flex_size = ext4_flex_bg_size(sbi);
+        int ret;
+        if (!test_opt(sb, BLOCK_VALIDITY)) {
+                if (EXT4_SB(sb)->system_blks.rb_node)
+                        ext4_release_system_zone(sb);
+                return 0;
+        }
+        if (EXT4_SB(sb)->system_blks.rb_node)
+                return 0;
+        for (i=0; i < ngroups; i++) {
+                if (ext4_bg_has_super(sb, i) &&
+                    ((i < 5) || ((i % flex_size) == 0)))
+                        add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+                                        sbi->s_gdb_count + 1);
+                gdp = ext4_get_group_desc(sb, i, NULL);
+                ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+                if (ret)
+                        return ret;
+                ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+                if (ret)
+                        return ret;
+                ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
+                                sbi->s_itb_per_group);
+                if (ret)
+                        return ret;
+        }
+        if (test_opt(sb, DEBUG))
+                debug_print_tree(EXT4_SB(sb));
+        return 0;
+}
+/* Called when the filesystem is unmounted */
+void ext4_release_system_zone(struct super_block *sb)
+{
+        struct rb_node  *n = EXT4_SB(sb)->system_blks.rb_node;
+        struct rb_node  *parent;
+        struct ext4_system_zone *entry;
+        while (n) {
+                /* Do the node's children first */
+                if (n->rb_left) {
+                        n = n->rb_left;
+                        continue;
+                }
+                if (n->rb_right) {
+                        n = n->rb_right;
+                        continue;
+                }
+                /*
+                 * The node has no children; free it, and then zero
+                 * out parent's link to it.  Finally go to the
+                 * beginning of the loop and try to free the parent
+                 * node.
+                 */
+                parent = rb_parent(n);
+                entry = rb_entry(n, struct ext4_system_zone, node);
+                kmem_cache_free(ext4_system_zone_cachep, entry);
+                if (!parent)
+                        EXT4_SB(sb)->system_blks.rb_node = NULL;
+                else if (parent->rb_left == n)
+                        parent->rb_left = NULL;
+                else if (parent->rb_right == n)
+                        parent->rb_right = NULL;
+                n = parent;
+        }
+        EXT4_SB(sb)->system_blks.rb_node = NULL;
+}
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with filesystem metadata blocks.
+ */
+int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
+                          unsigned int count)
+{
+        struct ext4_system_zone *entry;
+        struct rb_node *n = sbi->system_blks.rb_node;
+        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+            (start_blk + count > ext4_blocks_count(sbi->s_es)))
+                return 0;
+        while (n) {
+                entry = rb_entry(n, struct ext4_system_zone, node);
+                if (start_blk + count - 1 < entry->start_blk)
+                        n = n->rb_left;
+                else if (start_blk >= (entry->start_blk + entry->count))
+                        n = n->rb_right;
+                else
+                        return 0;
+        }
+        return 1;
+}
author	Theodore Ts'o <tytso@mit.edu>	2009-05-17 15:38:01 -0400
committer	Theodore Ts'o <tytso@mit.edu>	2009-05-17 15:38:01 -0400
commit	6fd058f7791087648c683eb8572edf3be3c4c23c (patch)
tree	0d80791532d2d022c91f20013003716eaf0afb40 /fs/ext4/block_validity.c
parent	2ac3b6e00acb46406c993d57921f86a594aafe08 (diff)

diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c new file mode 100644 index 000000000000..50784ef07563 --- /dev/null +++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
	1	/*
	2	* linux/fs/ext4/block_validity.c
	3	*
	4	* Copyright (C) 2009
	5	* Theodore Ts'o (tytso@mit.edu)
	6	*
	7	* Track which blocks in the filesystem are metadata blocks that
	8	* should never be used as data blocks by files or directories.
	9	*/
	10
	11	#include <linux/time.h>
	12	#include <linux/fs.h>
	13	#include <linux/namei.h>
	14	#include <linux/quotaops.h>
	15	#include <linux/buffer_head.h>
	16	#include <linux/module.h>
	17	#include <linux/swap.h>
	18	#include <linux/pagemap.h>
	19	#include <linux/version.h>
	20	#include <linux/blkdev.h>
	21	#include <linux/mutex.h>
	22	#include "ext4.h"
	23
	24	struct ext4_system_zone {
	25	struct rb_node node;
	26	ext4_fsblk_t start_blk;
	27	unsigned int count;
	28	};
	29
	30	static struct kmem_cache *ext4_system_zone_cachep;
	31
	32	int __init init_ext4_system_zone(void)
	33	{
	34	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
	35	SLAB_RECLAIM_ACCOUNT);
	36	if (ext4_system_zone_cachep == NULL)
	37	return -ENOMEM;
	38	return 0;
	39	}
	40
	41	void exit_ext4_system_zone(void)
	42	{
	43	kmem_cache_destroy(ext4_system_zone_cachep);
	44	}
	45
	46	static inline int can_merge(struct ext4_system_zone *entry1,
	47	struct ext4_system_zone *entry2)
	48	{
	49	if ((entry1->start_blk + entry1->count) == entry2->start_blk)
	50	return 1;
	51	return 0;
	52	}
	53
	54	/*
	55	* Mark a range of blocks as belonging to the "system zone" --- that
	56	* is, filesystem metadata blocks which should never be used by
	57	* inodes.
	58	*/
	59	static int add_system_zone(struct ext4_sb_info *sbi,
	60	ext4_fsblk_t start_blk,
	61	unsigned int count)
	62	{
	63	struct ext4_system_zone new_entry = NULL, entry;
	64	struct rb_node *n = &sbi->system_blks.rb_node, node;
	65	struct rb_node parent = NULL, new_node = NULL;
	66
	67	while (*n) {
	68	parent = *n;
	69	entry = rb_entry(parent, struct ext4_system_zone, node);
	70	if (start_blk < entry->start_blk)
	71	n = &(*n)->rb_left;
	72	else if (start_blk >= (entry->start_blk + entry->count))
	73	n = &(*n)->rb_right;
	74	else {
	75	if (start_blk + count > (entry->start_blk +
	76	entry->count))
	77	entry->count = (start_blk + count -
	78	entry->start_blk);
	79	new_node = *n;
	80	new_entry = rb_entry(new_node, struct ext4_system_zone,
	81	node);
	82	break;
	83	}
	84	}
	85
	86	if (!new_entry) {
	87	new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
	88	GFP_KERNEL);
	89	if (!new_entry)
	90	return -ENOMEM;
	91	new_entry->start_blk = start_blk;
	92	new_entry->count = count;
	93	new_node = &new_entry->node;
	94
	95	rb_link_node(new_node, parent, n);
	96	rb_insert_color(new_node, &sbi->system_blks);
	97	}
	98
	99	/* Can we merge to the left? */
	100	node = rb_prev(new_node);
	101	if (node) {
	102	entry = rb_entry(node, struct ext4_system_zone, node);
	103	if (can_merge(entry, new_entry)) {
	104	new_entry->start_blk = entry->start_blk;
	105	new_entry->count += entry->count;
	106	rb_erase(node, &sbi->system_blks);
	107	kmem_cache_free(ext4_system_zone_cachep, entry);
	108	}
	109	}
	110
	111	/* Can we merge to the right? */
	112	node = rb_next(new_node);
	113	if (node) {
	114	entry = rb_entry(node, struct ext4_system_zone, node);
	115	if (can_merge(new_entry, entry)) {
	116	new_entry->count += entry->count;
	117	rb_erase(node, &sbi->system_blks);
	118	kmem_cache_free(ext4_system_zone_cachep, entry);
	119	}
	120	}
	121	return 0;
	122	}
	123
	124	static void debug_print_tree(struct ext4_sb_info *sbi)
	125	{
	126	struct rb_node *node;
	127	struct ext4_system_zone *entry;
	128	int first = 1;
	129
	130	printk(KERN_INFO "System zones: ");
	131	node = rb_first(&sbi->system_blks);
	132	while (node) {
	133	entry = rb_entry(node, struct ext4_system_zone, node);
	134	printk("%s%llu-%llu", first ? "" : ", ",
	135	entry->start_blk, entry->start_blk + entry->count - 1);
	136	first = 0;
	137	node = rb_next(node);
	138	}
	139	printk("\n");
	140	}
	141
	142	int ext4_setup_system_zone(struct super_block *sb)
	143	{
	144	ext4_group_t ngroups = ext4_get_groups_count(sb);
	145	struct ext4_sb_info *sbi = EXT4_SB(sb);
	146	struct ext4_group_desc *gdp;
	147	ext4_group_t i;
	148	int flex_size = ext4_flex_bg_size(sbi);
	149	int ret;
	150
	151	if (!test_opt(sb, BLOCK_VALIDITY)) {
	152	if (EXT4_SB(sb)->system_blks.rb_node)
	153	ext4_release_system_zone(sb);
	154	return 0;
	155	}
	156	if (EXT4_SB(sb)->system_blks.rb_node)
	157	return 0;
	158
	159	for (i=0; i < ngroups; i++) {
	160	if (ext4_bg_has_super(sb, i) &&
	161	((i < 5) \|\| ((i % flex_size) == 0)))
	162	add_system_zone(sbi, ext4_group_first_block_no(sb, i),
	163	sbi->s_gdb_count + 1);
	164	gdp = ext4_get_group_desc(sb, i, NULL);
	165	ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
	166	if (ret)
	167	return ret;
	168	ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
	169	if (ret)
	170	return ret;
	171	ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
	172	sbi->s_itb_per_group);
	173	if (ret)
	174	return ret;
	175	}
	176
	177	if (test_opt(sb, DEBUG))
	178	debug_print_tree(EXT4_SB(sb));
	179	return 0;
	180	}
	181
	182	/* Called when the filesystem is unmounted */
	183	void ext4_release_system_zone(struct super_block *sb)
	184	{
	185	struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
	186	struct rb_node *parent;
	187	struct ext4_system_zone *entry;
	188
	189	while (n) {
	190	/* Do the node's children first */
	191	if (n->rb_left) {
	192	n = n->rb_left;
	193	continue;
	194	}
	195	if (n->rb_right) {
	196	n = n->rb_right;
	197	continue;
	198	}
	199	/*
	200	* The node has no children; free it, and then zero
	201	* out parent's link to it. Finally go to the
	202	* beginning of the loop and try to free the parent
	203	* node.
	204	*/
	205	parent = rb_parent(n);
	206	entry = rb_entry(n, struct ext4_system_zone, node);
	207	kmem_cache_free(ext4_system_zone_cachep, entry);
	208	if (!parent)
	209	EXT4_SB(sb)->system_blks.rb_node = NULL;
	210	else if (parent->rb_left == n)
	211	parent->rb_left = NULL;
	212	else if (parent->rb_right == n)
	213	parent->rb_right = NULL;
	214	n = parent;
	215	}
	216	EXT4_SB(sb)->system_blks.rb_node = NULL;
	217	}
	218
	219	/*
	220	* Returns 1 if the passed-in block region (start_blk,
	221	* start_blk+count) is valid; 0 if some part of the block region
	222	* overlaps with filesystem metadata blocks.
	223	*/
	224	int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
	225	unsigned int count)
	226	{
	227	struct ext4_system_zone *entry;
	228	struct rb_node *n = sbi->system_blks.rb_node;
	229
	230	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) \|\|
	231	(start_blk + count > ext4_blocks_count(sbi->s_es)))
	232	return 0;
	233	while (n) {
	234	entry = rb_entry(n, struct ext4_system_zone, node);
	235	if (start_blk + count - 1 < entry->start_blk)
	236	n = n->rb_left;
	237	else if (start_blk >= (entry->start_blk + entry->count))
	238	n = n->rb_right;
	239	else
	240	return 0;
	241	}
	242	return 1;
	243	}
	244