From 78f1ddbb498283c2445c11b0dfa666424c301803 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Jul 2009 23:09:47 -0400 Subject: ext4: Avoid null pointer dereference when decoding EROFS w/o a journal We need to check to make sure a journal is present before checking the journal flags in ext4_decode_error(). Signed-off-by: Eric Sesterhenn Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f4f079e6b9a..fe3f376b7df2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -344,7 +344,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, errstr = "Out of memory"; break; case -EROFS: - if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) + if (!sb || (EXT4_SB(sb)->s_journal && + EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) errstr = "Journal has aborted"; else errstr = "Readonly filesystem"; -- cgit v1.2.2 From bf43d84b185e2ff54598f8c58a5a8e63148b6e90 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 17 Aug 2009 23:48:51 -0400 Subject: ext4: reject too-large filesystems on 32-bit kernels ext4 will happily mount a > 16T filesystem on a 32-bit box, but this is not safe; writes to the block device will wrap past 16T and the page cache can't index past 16T (232 index * 4k pages). Adding another test to the existing "too many sectors" test should do the trick. Add a comment, a relevant return value, and fix the reference to the CONFIG_LBD(AF) option as well. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fe3f376b7df2..de67c3657b83 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2550,12 +2550,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (ext4_blocks_count(es) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + /* + * Test whether we have more sectors than will fit in sector_t, + * and whether the max offset is addressable by the page cache. + */ + if ((ext4_blocks_count(es) > + (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || + (ext4_blocks_count(es) > + (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { ext4_msg(sb, KERN_ERR, "filesystem" - " too large to mount safely"); + " too large to mount safely on this system"); if (sizeof(sector_t) < 8) ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); + ret = -EFBIG; goto failed_mount; } -- cgit v1.2.2 From a13fb1a4533f26c1e2b0204d5283b696689645af Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 18 Aug 2009 00:20:23 -0400 Subject: ext4: Add feature set check helper for mount & remount paths A user reported that although his root ext4 filesystem was mounting fine, other filesystems would not mount, with the: "Filesystem with huge files cannot be mounted RDWR without CONFIG_LBDAF" error on his 32-bit box built without CONFIG_LBDAF. This is because the test at mount time for this situation was not being re-checked on remount, and the normal boot process makes an ro->rw transition, so this was being missed. Refactor to make a common helper function to test the filesystem features against the type of mount request (RO vs. RW) so that we stay consistent. Addresses Red-Hat-Bugzilla: #517650 Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 91 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 42 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index de67c3657b83..4037fe0b5a5c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2254,6 +2254,49 @@ static struct kobj_type ext4_ktype = { .release = ext4_sb_release, }; +/* + * Check whether this filesystem can be mounted based on + * the features present and the RDONLY/RDWR mount requested. + * Returns 1 if this filesystem can be mounted as requested, + * 0 if it cannot be. + */ +static int ext4_feature_set_ok(struct super_block *sb, int readonly) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { + ext4_msg(sb, KERN_ERR, + "Couldn't mount because of " + "unsupported optional features (%x)", + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & + ~EXT4_FEATURE_INCOMPAT_SUPP)); + return 0; + } + + if (readonly) + return 1; + + /* Check that feature set is OK for a read-write mount */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { + ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " + "unsupported optional features (%x)", + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & + ~EXT4_FEATURE_RO_COMPAT_SUPP)); + return 0; + } + /* + * Large file size enabled file system can only be mounted + * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF + */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (sizeof(blkcnt_t) < sizeof(u64)) { + ext4_msg(sb, KERN_ERR, "Filesystem with huge files " + "cannot be mounted RDWR without " + "CONFIG_LBDAF"); + return 0; + } + } + return 1; +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) @@ -2275,7 +2318,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned int db_count; unsigned int i; int needs_recovery, has_huge_files; - int features; __u64 blocks_count; int err; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; @@ -2402,39 +2444,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * previously didn't change the revision level when setting the flags, * so there is a chance incompat flags are set on a rev 0 filesystem. */ - features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); - if (features) { - ext4_msg(sb, KERN_ERR, - "Couldn't mount because of " - "unsupported optional features (%x)", - (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & - ~EXT4_FEATURE_INCOMPAT_SUPP)); - goto failed_mount; - } - features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); - if (!(sb->s_flags & MS_RDONLY) && features) { - ext4_msg(sb, KERN_ERR, - "Couldn't mount RDWR because of " - "unsupported optional features (%x)", - (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & - ~EXT4_FEATURE_RO_COMPAT_SUPP)); + if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) goto failed_mount; - } - has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); - if (has_huge_files) { - /* - * Large file size enabled file system can only be - * mount if kernel is build with CONFIG_LBDAF - */ - if (sizeof(root->i_blocks) < sizeof(u64) && - !(sb->s_flags & MS_RDONLY)) { - ext4_msg(sb, KERN_ERR, "Filesystem with huge " - "files cannot be mounted read-write " - "without CONFIG_LBDAF"); - goto failed_mount; - } - } + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); if (blocksize < EXT4_MIN_BLOCK_SIZE || @@ -2470,6 +2482,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } + has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_HUGE_FILE); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); @@ -3485,18 +3499,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); } else { - int ret; - if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, - ~EXT4_FEATURE_RO_COMPAT_SUPP))) { - ext4_msg(sb, KERN_WARNING, "couldn't " - "remount RDWR because of unsupported " - "optional features (%x)", - (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & - ~EXT4_FEATURE_RO_COMPAT_SUPP)); + /* Make sure we can mount this feature set readwrite */ + if (!ext4_feature_set_ok(sb, 0)) { err = -EROFS; goto restore_opts; } - /* * Make sure the group descriptor checksums * are sane. If they aren't, refuse to remount r/w. -- cgit v1.2.2 From d0646f7b636d067d715fab52a2ba9c6f0f46b0d7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 5 Sep 2009 12:50:43 -0400 Subject: ext4: Remove journal_checksum mount option and enable it by default There's no real cost for the journal checksum feature, and we should make sure it is enabled all the time. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4037fe0b5a5c..f1815d3bcfd5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1280,11 +1280,9 @@ static int parse_options(char *options, struct super_block *sb, *journal_devnum = option; break; case Opt_journal_checksum: - set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); - break; + break; /* Kept for backwards compatibility */ case Opt_journal_async_commit: set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); - set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); break; case Opt_noload: set_opt(sbi->s_mount_opt, NOLOAD); @@ -2751,20 +2749,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { - jbd2_journal_set_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, + jbd2_journal_set_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) + jbd2_journal_set_features(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } else if (test_opt(sb, JOURNAL_CHECKSUM)) { - jbd2_journal_set_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); + else jbd2_journal_clear_features(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } else { - jbd2_journal_clear_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } /* We have now updated the journal if required, so we can * validate the data journaling mode. */ -- cgit v1.2.2 From 71290b368ad5e1e0b0b300c9d5638490a9fd1a2d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 10 Sep 2009 17:31:04 -0400 Subject: ext4: Don't update superblock write time when filesystem is read-only This avoids updating the superblock write time when we are mounting the root file system read/only but we need to replay the journal; at that point, for people who are east of GMT and who make their clock tick in localtime for Windows bug-for-bug compatibility, and this will cause e2fsck to complain and force a full file system check. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f1815d3bcfd5..9f6fa3f74629 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3222,7 +3222,18 @@ static int ext4_commit_super(struct super_block *sb, int sync) clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } - es->s_wtime = cpu_to_le32(get_seconds()); + /* + * If the file system is mounted read-only, don't update the + * superblock write time. This avoids updating the superblock + * write time when we are mounting the root file system + * read/only but we need to replay the journal; at that point, + * for people who are east of GMT and who make their clock + * tick in localtime for Windows bug-for-bug compatibility, + * the clock is set in the future, and this will cause e2fsck + * to complain and force a full file system check. + */ + if (!(sb->s_flags & MS_RDONLY)) + es->s_wtime = cpu_to_le32(get_seconds()); es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - -- cgit v1.2.2 From 7ad9bb651fc2036ea94bed94da76a4b08959a911 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 11 Sep 2009 16:51:28 -0400 Subject: ext4: Fix initalization of s_flex_groups The s_flex_groups array should have been initialized using atomic_add to sum up the free counts from the block groups that make up a flex_bg. By using atomic_set, the value of the s_flex_groups array was set to the values of the last block group in the flex_bg. The impact of this bug is that the block and inode allocation algorithms might not pick the best flex_bg for new allocation. Thanks to Damien Guibouret for pointing out this problem! Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9f6fa3f74629..04c693357336 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1694,12 +1694,12 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, - ext4_free_inodes_count(sb, gdp)); - atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, - ext4_free_blks_count(sb, gdp)); - atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, - ext4_used_dirs_count(sb, gdp)); + atomic_add(ext4_free_inodes_count(sb, gdp), + &sbi->s_flex_groups[flex_group].free_inodes); + atomic_add(ext4_free_blks_count(sb, gdp), + &sbi->s_flex_groups[flex_group].free_blocks); + atomic_add(ext4_used_dirs_count(sb, gdp), + &sbi->s_flex_groups[flex_group].used_dirs); } return 1; -- cgit v1.2.2 From 3661d28615ea580c1db02a972fd4d3898df1cb01 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 14 Sep 2009 22:59:50 -0400 Subject: ext4: Fix include/trace/events/ext4.h to work with Systemtap Using relative pathnames in #include statements interacts badly with SystemTap, since the fs/ext4/*.h header files are not packaged up as part of a distribution kernel's header files. Since systemtap doesn't use TP_fast_assign(), we can use a blind structure definition and then make sure the needed header files are defined before the ext4 source files #include the trace/events/ext4.h header file. https://bugzilla.redhat.com/show_bug.cgi?id=512478 Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 04c693357336..af95dd8ba54b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -45,6 +45,7 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include "mballoc.h" #define CREATE_TRACE_POINTS #include -- cgit v1.2.2 From fb0a387dcdcd21aab1b09ee7fd80b7c979bdbbfd Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 16 Sep 2009 14:45:10 -0400 Subject: ext4: limit block allocations for indirect-block files to < 2^32 Today, the ext4 allocator will happily allocate blocks past 2^32 for indirect-block files, which results in the block numbers getting truncated, and corruption ensues. This patch limits such allocations to < 2^32, and adds BUG_ONs if we do get blocks larger than that. This should address RH Bug 519471, ext4 bitmap allocator must limit blocks to < 2^32 * ext4_find_goal() is modified to choose a goal < UINT_MAX, so that our starting point is in an acceptable range. * ext4_xattr_block_set() is modified such that the goal block is < UINT_MAX, as above. * ext4_mb_regular_allocator() is modified so that the group search does not continue into groups which are too high * ext4_mb_use_preallocated() has a check that we don't use preallocated space which is too far out * ext4_alloc_blocks() and ext4_xattr_block_set() add some BUG_ONs No attempt has been made to limit inode locations to < 2^32, so we may wind up with blocks far from their inodes. Doing this much already will lead to some odd ENOSPC issues when the "lower 32" gets full, and further restricting inodes could make that even weirder. For high inodes, choosing a goal of the original, % UINT_MAX, may be a bit odd, but then we're in an odd situation anyway, and I don't know of a better heuristic. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index af95dd8ba54b..a6b1ab734728 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2616,6 +2616,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } sbi->s_groups_count = blocks_count; + sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, + (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), -- cgit v1.2.2