From a1c6c5698d53db4c47a25c3a8d11731a4d7b8370 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 27 Oct 2010 21:30:04 -0400 Subject: ext4: fix NULL pointer dereference in print_daily_error_info() Fix NULL pointer dereference in print_daily_error_info, when called on unmounted fs (EXT4_SB(sb) returns NULL), by removing error reporting timer in ext4_put_super. Google-Bug-Id: 3017663 Signed-off-by: Sergey Senozhatsky Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..751997d2cefe 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -719,6 +719,7 @@ static void ext4_put_super(struct super_block *sb) ext4_abort(sb, "Couldn't clean up the journal"); } + del_timer(&sbi->s_err_report); ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); -- cgit v1.2.2 From bfff68738f1cb5c93dab1114634cea02aae9e7ba Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2010 21:30:05 -0400 Subject: ext4: add support for lazy inode table initialization When the lazy_itable_init extended option is passed to mke2fs, it considerably speeds up filesystem creation because inode tables are not zeroed out. The fact that parts of the inode table are uninitialized is not a problem so long as the block group descriptors, which contain information regarding how much of the inode table has been initialized, has not been corrupted However, if the block group checksums are not valid, e2fsck must scan the entire inode table, and the the old, uninitialized data could potentially cause e2fsck to report false problems. Hence, it is important for the inode tables to be initialized as soon as possble. This commit adds this feature so that mke2fs can safely use the lazy inode table initialization feature to speed up formatting file systems. This is done via a new new kernel thread called ext4lazyinit, which is created on demand and destroyed, when it is no longer needed. There is only one thread for all ext4 filesystems in the system. When the first filesystem with inititable mount option is mounted, ext4lazyinit thread is created, then the filesystem can register its request in the request list. This thread then walks through the list of requests picking up scheduled requests and invoking ext4_init_inode_table(). Next schedule time for the request is computed by multiplying the time it took to zero out last inode table with wait multiplier, which can be set with the (init_itable=n) mount option (default is 10). We are doing this so we do not take the whole I/O bandwidth. When the thread is no longer necessary (request list is empty) it frees the appropriate structures and exits (and can be created later later by another filesystem). We do not disturb regular inode allocations in any way, it just do not care whether the inode table is, or is not zeroed. But when zeroing, we have to skip used inodes, obviously. Also we should prevent new inode allocations from the group, while zeroing is on the way. For that we take write alloc_sem lock in ext4_init_inode_table() and read alloc_sem in the ext4_claim_inode, so when we are unlucky and allocator hits the group which is currently being zeroed, it just has to wait. This can be suppresed using the mount option no_init_itable. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 440 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 437 insertions(+), 3 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 751997d2cefe..5066537e5a38 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -41,6 +41,9 @@ #include #include +#include +#include + #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" @@ -52,6 +55,8 @@ struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; +struct ext4_lazy_init *ext4_li_info; +struct mutex ext4_li_mtx; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -70,6 +75,8 @@ static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); static int ext4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt); +static void ext4_destroy_lazyinit_thread(void); +static void ext4_unregister_li_request(struct super_block *sb); #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { @@ -720,6 +727,7 @@ static void ext4_put_super(struct super_block *sb) } del_timer(&sbi->s_err_report); + ext4_unregister_li_request(sb); ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); @@ -1046,6 +1054,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) seq_puts(seq, ",block_validity"); + if (!test_opt(sb, INIT_INODE_TABLE)) + seq_puts(seq, ",noinit_inode_table"); + else if (sbi->s_li_wait_mult) + seq_printf(seq, ",init_inode_table=%u", + (unsigned) sbi->s_li_wait_mult); + ext4_show_quota_options(seq, sb); return 0; @@ -1220,6 +1234,7 @@ enum { Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, + Opt_init_inode_table, Opt_noinit_inode_table, }; static const match_table_t tokens = { @@ -1290,6 +1305,9 @@ static const match_table_t tokens = { {Opt_dioread_lock, "dioread_lock"}, {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, + {Opt_init_inode_table, "init_itable=%u"}, + {Opt_init_inode_table, "init_itable"}, + {Opt_noinit_inode_table, "noinit_itable"}, {Opt_err, NULL}, }; @@ -1760,6 +1778,20 @@ set_qf_format: case Opt_dioread_lock: clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); break; + case Opt_init_inode_table: + set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = EXT4_DEF_LI_WAIT_MULT; + if (option < 0) + return 0; + sbi->s_li_wait_mult = option; + break; + case Opt_noinit_inode_table: + clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE); + break; default: ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " @@ -1943,7 +1975,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, } /* Called at mount-time, super-block is locked */ -static int ext4_check_descriptors(struct super_block *sb) +static int ext4_check_descriptors(struct super_block *sb, + ext4_group_t *first_not_zeroed) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); @@ -1952,7 +1985,7 @@ static int ext4_check_descriptors(struct super_block *sb) ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; int flexbg_flag = 0; - ext4_group_t i; + ext4_group_t i, grp = sbi->s_groups_count; if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) flexbg_flag = 1; @@ -1968,6 +2001,10 @@ static int ext4_check_descriptors(struct super_block *sb) last_block = first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + if ((grp == sbi->s_groups_count) && + !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) + grp = i; + block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2005,6 +2042,8 @@ static int ext4_check_descriptors(struct super_block *sb) if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); } + if (NULL != first_not_zeroed) + *first_not_zeroed = grp; ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); @@ -2543,6 +2582,378 @@ static void print_daily_error_info(unsigned long arg) mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ } +static void ext4_lazyinode_timeout(unsigned long data) +{ + struct task_struct *p = (struct task_struct *)data; + wake_up_process(p); +} + +/* Find next suitable group and run ext4_init_inode_table */ +static int ext4_run_li_request(struct ext4_li_request *elr) +{ + struct ext4_group_desc *gdp = NULL; + ext4_group_t group, ngroups; + struct super_block *sb; + unsigned long timeout = 0; + int ret = 0; + + sb = elr->lr_super; + ngroups = EXT4_SB(sb)->s_groups_count; + + for (group = elr->lr_next_group; group < ngroups; group++) { + gdp = ext4_get_group_desc(sb, group, NULL); + if (!gdp) { + ret = 1; + break; + } + + if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) + break; + } + + if (group == ngroups) + ret = 1; + + if (!ret) { + timeout = jiffies; + ret = ext4_init_inode_table(sb, group, + elr->lr_timeout ? 0 : 1); + if (elr->lr_timeout == 0) { + timeout = jiffies - timeout; + if (elr->lr_sbi->s_li_wait_mult) + timeout *= elr->lr_sbi->s_li_wait_mult; + else + timeout *= 20; + elr->lr_timeout = timeout; + } + elr->lr_next_sched = jiffies + elr->lr_timeout; + elr->lr_next_group = group + 1; + } + + return ret; +} + +/* + * Remove lr_request from the list_request and free the + * request tructure. Should be called with li_list_mtx held + */ +static void ext4_remove_li_request(struct ext4_li_request *elr) +{ + struct ext4_sb_info *sbi; + + if (!elr) + return; + + sbi = elr->lr_sbi; + + list_del(&elr->lr_request); + sbi->s_li_request = NULL; + kfree(elr); +} + +static void ext4_unregister_li_request(struct super_block *sb) +{ + struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; + + if (!ext4_li_info) + return; + + mutex_lock(&ext4_li_info->li_list_mtx); + ext4_remove_li_request(elr); + mutex_unlock(&ext4_li_info->li_list_mtx); +} + +/* + * This is the function where ext4lazyinit thread lives. It walks + * through the request list searching for next scheduled filesystem. + * When such a fs is found, run the lazy initialization request + * (ext4_rn_li_request) and keep track of the time spend in this + * function. Based on that time we compute next schedule time of + * the request. When walking through the list is complete, compute + * next waking time and put itself into sleep. + */ +static int ext4_lazyinit_thread(void *arg) +{ + struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; + struct list_head *pos, *n; + struct ext4_li_request *elr; + unsigned long next_wakeup; + DEFINE_WAIT(wait); + int ret; + + BUG_ON(NULL == eli); + + eli->li_timer.data = (unsigned long)current; + eli->li_timer.function = ext4_lazyinode_timeout; + + eli->li_task = current; + wake_up(&eli->li_wait_task); + +cont_thread: + while (true) { + next_wakeup = MAX_JIFFY_OFFSET; + + mutex_lock(&eli->li_list_mtx); + if (list_empty(&eli->li_request_list)) { + mutex_unlock(&eli->li_list_mtx); + goto exit_thread; + } + + list_for_each_safe(pos, n, &eli->li_request_list) { + elr = list_entry(pos, struct ext4_li_request, + lr_request); + + if (time_after_eq(jiffies, elr->lr_next_sched)) + ret = ext4_run_li_request(elr); + + if (ret) { + ret = 0; + ext4_remove_li_request(elr); + continue; + } + + if (time_before(elr->lr_next_sched, next_wakeup)) + next_wakeup = elr->lr_next_sched; + } + mutex_unlock(&eli->li_list_mtx); + + if (freezing(current)) + refrigerator(); + + if (time_after_eq(jiffies, next_wakeup)) { + cond_resched(); + continue; + } + + eli->li_timer.expires = next_wakeup; + add_timer(&eli->li_timer); + prepare_to_wait(&eli->li_wait_daemon, &wait, + TASK_INTERRUPTIBLE); + if (time_before(jiffies, next_wakeup)) + schedule(); + finish_wait(&eli->li_wait_daemon, &wait); + } + +exit_thread: + /* + * It looks like the request list is empty, but we need + * to check it under the li_list_mtx lock, to prevent any + * additions into it, and of course we should lock ext4_li_mtx + * to atomically free the list and ext4_li_info, because at + * this point another ext4 filesystem could be registering + * new one. + */ + mutex_lock(&ext4_li_mtx); + mutex_lock(&eli->li_list_mtx); + if (!list_empty(&eli->li_request_list)) { + mutex_unlock(&eli->li_list_mtx); + mutex_unlock(&ext4_li_mtx); + goto cont_thread; + } + mutex_unlock(&eli->li_list_mtx); + del_timer_sync(&ext4_li_info->li_timer); + eli->li_task = NULL; + wake_up(&eli->li_wait_task); + + kfree(ext4_li_info); + ext4_li_info = NULL; + mutex_unlock(&ext4_li_mtx); + + return 0; +} + +static void ext4_clear_request_list(void) +{ + struct list_head *pos, *n; + struct ext4_li_request *elr; + + mutex_lock(&ext4_li_info->li_list_mtx); + if (list_empty(&ext4_li_info->li_request_list)) + return; + + list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { + elr = list_entry(pos, struct ext4_li_request, + lr_request); + ext4_remove_li_request(elr); + } + mutex_unlock(&ext4_li_info->li_list_mtx); +} + +static int ext4_run_lazyinit_thread(void) +{ + struct task_struct *t; + + t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); + if (IS_ERR(t)) { + int err = PTR_ERR(t); + ext4_clear_request_list(); + del_timer_sync(&ext4_li_info->li_timer); + kfree(ext4_li_info); + ext4_li_info = NULL; + printk(KERN_CRIT "EXT4: error %d creating inode table " + "initialization thread\n", + err); + return err; + } + ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; + + wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL); + return 0; +} + +/* + * Check whether it make sense to run itable init. thread or not. + * If there is at least one uninitialized inode table, return + * corresponding group number, else the loop goes through all + * groups and return total number of groups. + */ +static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) +{ + ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; + struct ext4_group_desc *gdp = NULL; + + for (group = 0; group < ngroups; group++) { + gdp = ext4_get_group_desc(sb, group, NULL); + if (!gdp) + continue; + + if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) + break; + } + + return group; +} + +static int ext4_li_info_new(void) +{ + struct ext4_lazy_init *eli = NULL; + + eli = kzalloc(sizeof(*eli), GFP_KERNEL); + if (!eli) + return -ENOMEM; + + eli->li_task = NULL; + INIT_LIST_HEAD(&eli->li_request_list); + mutex_init(&eli->li_list_mtx); + + init_waitqueue_head(&eli->li_wait_daemon); + init_waitqueue_head(&eli->li_wait_task); + init_timer(&eli->li_timer); + eli->li_state |= EXT4_LAZYINIT_QUIT; + + ext4_li_info = eli; + + return 0; +} + +static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, + ext4_group_t start) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_li_request *elr; + unsigned long rnd; + + elr = kzalloc(sizeof(*elr), GFP_KERNEL); + if (!elr) + return NULL; + + elr->lr_super = sb; + elr->lr_sbi = sbi; + elr->lr_next_group = start; + + /* + * Randomize first schedule time of the request to + * spread the inode table initialization requests + * better. + */ + get_random_bytes(&rnd, sizeof(rnd)); + elr->lr_next_sched = jiffies + (unsigned long)rnd % + (EXT4_DEF_LI_MAX_START_DELAY * HZ); + + return elr; +} + +static int ext4_register_li_request(struct super_block *sb, + ext4_group_t first_not_zeroed) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_li_request *elr; + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + int ret = 0; + + if (sbi->s_li_request != NULL) + goto out; + + if (first_not_zeroed == ngroups || + (sb->s_flags & MS_RDONLY) || + !test_opt(sb, INIT_INODE_TABLE)) { + sbi->s_li_request = NULL; + goto out; + } + + if (first_not_zeroed == ngroups) { + sbi->s_li_request = NULL; + goto out; + } + + elr = ext4_li_request_new(sb, first_not_zeroed); + if (!elr) { + ret = -ENOMEM; + goto out; + } + + mutex_lock(&ext4_li_mtx); + + if (NULL == ext4_li_info) { + ret = ext4_li_info_new(); + if (ret) + goto out; + } + + mutex_lock(&ext4_li_info->li_list_mtx); + list_add(&elr->lr_request, &ext4_li_info->li_request_list); + mutex_unlock(&ext4_li_info->li_list_mtx); + + sbi->s_li_request = elr; + + if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { + ret = ext4_run_lazyinit_thread(); + if (ret) + goto out; + } + + mutex_unlock(&ext4_li_mtx); + +out: + if (ret) { + mutex_unlock(&ext4_li_mtx); + kfree(elr); + } + return ret; +} + +/* + * We do not need to lock anything since this is called on + * module unload. + */ +static void ext4_destroy_lazyinit_thread(void) +{ + /* + * If thread exited earlier + * there's nothing to be done. + */ + if (!ext4_li_info) + return; + + ext4_clear_request_list(); + + while (ext4_li_info->li_task) { + wake_up(&ext4_li_info->li_wait_daemon); + wait_event(ext4_li_info->li_wait_task, + ext4_li_info->li_task == NULL); + } +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) @@ -2568,6 +2979,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) __u64 blocks_count; int err; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + ext4_group_t first_not_zeroed; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -2630,6 +3042,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Set defaults before we parse the mount options */ def_mount_opts = le32_to_cpu(es->s_default_mount_opts); + set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); if (def_mount_opts & EXT4_DEFM_DEBUG) set_opt(sbi->s_mount_opt, DEBUG); if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { @@ -2909,7 +3322,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount2; } } - if (!ext4_check_descriptors(sb)) { + if (!ext4_check_descriptors(sb, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); goto failed_mount2; } @@ -3130,6 +3543,10 @@ no_journal: goto failed_mount4; } + err = ext4_register_li_request(sb, first_not_zeroed); + if (err) + goto failed_mount4; + sbi->s_kobj.kset = ext4_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, @@ -3847,6 +4264,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) enable_quota = 1; } } + + /* + * Reinitialize lazy itable initialization thread based on + * current settings + */ + if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) + ext4_unregister_li_request(sb); + else { + ext4_group_t first_not_zeroed; + first_not_zeroed = ext4_has_uninit_itable(sb); + ext4_register_li_request(sb, first_not_zeroed); + } + ext4_setup_system_zone(sb); if (sbi->s_journal == NULL) ext4_commit_super(sb, 1); @@ -4317,6 +4747,9 @@ static int __init init_ext4_fs(void) err = register_filesystem(&ext4_fs_type); if (err) goto out; + + ext4_li_info = NULL; + mutex_init(&ext4_li_mtx); return 0; out: unregister_as_ext2(); @@ -4336,6 +4769,7 @@ out4: static void __exit exit_ext4_fs(void) { + ext4_destroy_lazyinit_thread(); unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); -- cgit v1.2.2 From 857ac889cce8a486d47874db4d2f9620e7e9e5de Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2010 21:30:05 -0400 Subject: ext4: add interface to advertise ext4 features in sysfs User-space should have the opportunity to check what features doest ext4 support in each particular copy. This adds easy interface by creating new "features" directory in sys/fs/ext4/. In that directory files advertising feature names can be created. Add lazy_itable_init to the feature list. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5066537e5a38..c5b890140d01 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -57,6 +57,7 @@ struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; struct ext4_lazy_init *ext4_li_info; struct mutex ext4_li_mtx; +struct ext4_features *ext4_feat; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -709,6 +710,7 @@ static void ext4_put_super(struct super_block *sb) struct ext4_super_block *es = sbi->s_es; int i, err; + ext4_unregister_li_request(sb); dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); flush_workqueue(sbi->dio_unwritten_wq); @@ -727,7 +729,6 @@ static void ext4_put_super(struct super_block *sb) } del_timer(&sbi->s_err_report); - ext4_unregister_li_request(sb); ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); @@ -2416,6 +2417,7 @@ static struct ext4_attr ext4_attr_##_name = { \ #define EXT4_ATTR(name, mode, show, store) \ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) +#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) #define EXT4_RW_ATTR_SBI_UI(name, elname) \ @@ -2452,6 +2454,14 @@ static struct attribute *ext4_attrs[] = { NULL, }; +/* Features this copy of ext4 supports */ +EXT4_INFO_ATTR(lazy_itable_init); + +static struct attribute *ext4_feat_attrs[] = { + ATTR_LIST(lazy_itable_init), + NULL, +}; + static ssize_t ext4_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -2480,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } - static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, @@ -2492,6 +2501,17 @@ static struct kobj_type ext4_ktype = { .release = ext4_sb_release, }; +static void ext4_feat_release(struct kobject *kobj) +{ + complete(&ext4_feat->f_kobj_unregister); +} + +static struct kobj_type ext4_feat_ktype = { + .default_attrs = ext4_feat_attrs, + .sysfs_ops = &ext4_attr_ops, + .release = ext4_feat_release, +}; + /* * Check whether this filesystem can be mounted based on * the features present and the RDONLY/RDWR mount requested. @@ -4720,6 +4740,30 @@ static struct file_system_type ext4_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +int __init ext4_init_feat_adverts(void) +{ + struct ext4_features *ef; + int ret = -ENOMEM; + + ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); + if (!ef) + goto out; + + ef->f_kobj.kset = ext4_kset; + init_completion(&ef->f_kobj_unregister); + ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, + "features"); + if (ret) { + kfree(ef); + goto out; + } + + ext4_feat = ef; + ret = 0; +out: + return ret; +} + static int __init init_ext4_fs(void) { int err; @@ -4732,6 +4776,9 @@ static int __init init_ext4_fs(void) if (!ext4_kset) goto out4; ext4_proc_root = proc_mkdir("fs/ext4", NULL); + + err = ext4_init_feat_adverts(); + err = init_ext4_mballoc(); if (err) goto out3; @@ -4760,6 +4807,7 @@ out1: out2: exit_ext4_mballoc(); out3: + kfree(ext4_feat); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); out4: -- cgit v1.2.2 From c41303ced67c4ebf51bf2e7d0f139155e09e0939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Wed, 27 Oct 2010 21:30:06 -0400 Subject: ext4: don't update sb journal_devnum when RO dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An ext4 filesystem on a read-only device, with an external journal which is at a different device number then recorded in the superblock will fail to honor the read-only setting of the device and trigger a superblock update (write). For example: - ext4 on a software raid which is in read-only mode - external journal on a read-write device which has changed device num - attempt to mount with -o journal_dev= - hits BUG_ON(mddev->ro = 1) in md.c Cc: Theodore Ts'o Signed-off-by: Maciej Żenczykowski Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c5b890140d01..8a24e9be7cb0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3908,7 +3908,7 @@ static int ext4_load_journal(struct super_block *sb, EXT4_SB(sb)->s_journal = journal; ext4_clear_journal_err(sb, es); - if (journal_devnum && + if (!really_read_only && journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { es->s_journal_dev = cpu_to_le32(journal_devnum); -- cgit v1.2.2 From bd2d0210cf22f2bd0cef72eb97cf94fc7d31d8cc Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Oct 2010 21:30:10 -0400 Subject: ext4: use bio layer instead of buffer layer in mpage_da_submit_io Call the block I/O layer directly instad of going through the buffer layer. This should give us much better performance and scalability, as well as lowering our CPU utilization when doing buffered writeback. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8a24e9be7cb0..e13b3c3534d7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4769,9 +4769,12 @@ static int __init init_ext4_fs(void) int err; ext4_check_flag_values(); - err = init_ext4_system_zone(); + err = init_ext4_pageio(); if (err) return err; + err = init_ext4_system_zone(); + if (err) + goto out5; ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); if (!ext4_kset) goto out4; @@ -4812,6 +4815,8 @@ out3: kset_unregister(ext4_kset); out4: exit_ext4_system_zone(); +out5: + exit_ext4_pageio(); return err; } @@ -4827,6 +4832,7 @@ static void __exit exit_ext4_fs(void) remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); exit_ext4_system_zone(); + exit_ext4_pageio(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -- cgit v1.2.2 From 7360d1731e5dc78aec867e65e55f9fb58782b5fe Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2010 21:30:12 -0400 Subject: ext4: Add batched discard support for ext4 Walk through allocation groups and trim all free extents. It can be invoked through FITRIM ioctl on the file system. The main idea is to provide a way to trim the whole file system if needed, since some SSD's may suffer from performance loss after the whole device was filled (it does not mean that fs is full!). It search for free extents in allocation groups specified by Byte range start -> start+len. When the free extent is within this range, blocks are marked as used and then trimmed. Afterwards these blocks are marked as free in per-group bitmap. Since fstrim is a long operation it is good to have an ability to interrupt it by a signal. This was added by Dmitry Monakhov. Thanks Dimitry. Signed-off-by: Lukas Czerner Signed-off-by: Dmitry Monakhov Reviewed-by: Jan Kara Reviewed-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e13b3c3534d7..01e60aa6c478 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1189,6 +1189,7 @@ static const struct super_operations ext4_sops = { .quota_write = ext4_quota_write, #endif .bdev_try_to_free_page = bdev_try_to_free_page, + .trim_fs = ext4_trim_fs }; static const struct super_operations ext4_nojournal_sops = { -- cgit v1.2.2 From 27ee40df2b17c84aa7855907df12befe6869b7a7 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2010 21:30:12 -0400 Subject: ext4: add batched_discard into ext4 feature list Should be applied on the top of "lazy inode table initialization" and "batched discard support" patch-sets. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 01e60aa6c478..9ce3b67b7269 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2457,9 +2457,11 @@ static struct attribute *ext4_attrs[] = { /* Features this copy of ext4 supports */ EXT4_INFO_ATTR(lazy_itable_init); +EXT4_INFO_ATTR(batched_discard); static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), + ATTR_LIST(batched_discard), NULL, }; -- cgit v1.2.2 From 7f93cff90fa9be6ed45f6189e136153d1d8631b0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Oct 2010 21:30:13 -0400 Subject: ext4: fix kernel oops if the journal superblock has a non-zero j_errno Commit 84061e0 fixed an accounting bug only to introduce the possibility of a kernel OOPS if the journal has a non-zero j_errno field indicating that the file system had detected a fs inconsistency. After the journal replay, if the journal superblock indicates that the file system has an error, this indication is transfered to the file system and then ext4_commit_super() is called to write this to the disk. But since the percpu counters are now initialized after the journal replay, the call to ext4_commit_super() will cause a kernel oops since it needs to use the percpu counters the ext4 superblock structure. The fix is to skip setting the ext4 free block and free inode fields if the percpu counter has not been set. Thanks to Ken Sumrall for reporting and analyzing the root causes of this bug. Addresses-Google-Bug: #3054080 Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ce3b67b7269..c9e06c647ce8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3964,9 +3964,12 @@ static int ext4_commit_super(struct super_block *sb, int sync) else es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); - ext4_free_blocks_count_set(es, percpu_counter_sum_positive( + if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) + ext4_free_blocks_count_set(es, percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeblocks_counter)); - es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( + if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) + es->s_free_inodes_count = + cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); -- cgit v1.2.2 From 5dabfc78dcedbe46cb2e4872dde448de3cec2979 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Oct 2010 21:30:14 -0400 Subject: ext4: rename {exit,init}_ext4_*() to ext4_{exit,init}_*() This is a cleanup to avoid namespace leaks out of fs/ext4 Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c9e06c647ce8..94e60038e05d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4770,15 +4770,15 @@ out: return ret; } -static int __init init_ext4_fs(void) +static int __init ext4_init_fs(void) { int err; ext4_check_flag_values(); - err = init_ext4_pageio(); + err = ext4_init_pageio(); if (err) return err; - err = init_ext4_system_zone(); + err = ext4_init_system_zone(); if (err) goto out5; ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); @@ -4788,11 +4788,11 @@ static int __init init_ext4_fs(void) err = ext4_init_feat_adverts(); - err = init_ext4_mballoc(); + err = ext4_init_mballoc(); if (err) goto out3; - err = init_ext4_xattr(); + err = ext4_init_xattr(); if (err) goto out2; err = init_inodecache(); @@ -4812,37 +4812,37 @@ out: unregister_as_ext3(); destroy_inodecache(); out1: - exit_ext4_xattr(); + ext4_exit_xattr(); out2: - exit_ext4_mballoc(); + ext4_exit_mballoc(); out3: kfree(ext4_feat); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); out4: - exit_ext4_system_zone(); + ext4_exit_system_zone(); out5: - exit_ext4_pageio(); + ext4_exit_pageio(); return err; } -static void __exit exit_ext4_fs(void) +static void __exit ext4_exit_fs(void) { ext4_destroy_lazyinit_thread(); unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); destroy_inodecache(); - exit_ext4_xattr(); - exit_ext4_mballoc(); + ext4_exit_xattr(); + ext4_exit_mballoc(); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); - exit_ext4_system_zone(); - exit_ext4_pageio(); + ext4_exit_system_zone(); + ext4_exit_pageio(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); -module_init(init_ext4_fs) -module_exit(exit_ext4_fs) +module_init(ext4_init_fs) +module_exit(ext4_exit_fs) -- cgit v1.2.2 From 1f109d5a17b438c4a54cbf6fd87a249e3d72fb21 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Oct 2010 21:30:14 -0400 Subject: ext4: make various ext4 functions be static These functions have no need to be exported beyond file context. No functions needed to be moved for this commit; just some function declarations changed to be static and removed from header files. (A similar patch was submitted by Eric Sandeen, but I wanted to handle code movement in separate patches to make sure code changes didn't accidentally get dropped.) Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94e60038e05d..158d1bca8769 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -53,7 +53,7 @@ #define CREATE_TRACE_POINTS #include -struct proc_dir_entry *ext4_proc_root; +static struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; struct ext4_lazy_init *ext4_li_info; struct mutex ext4_li_mtx; -- cgit v1.2.2 From beed5ecbaa377fa8bb6a54a6608e8725a21efdbc Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Wed, 27 Oct 2010 22:08:42 -0400 Subject: ext4: fix unbalanced mutex unlock in error path of ext4_li_request_new Signed-off-by: Nicolas Kaiser Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 158d1bca8769..3b4984d37a68 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2902,28 +2902,26 @@ static int ext4_register_li_request(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; - int ret = 0; + int ret; if (sbi->s_li_request != NULL) - goto out; + return 0; if (first_not_zeroed == ngroups || (sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) { sbi->s_li_request = NULL; - goto out; + return 0; } if (first_not_zeroed == ngroups) { sbi->s_li_request = NULL; - goto out; + return 0; } elr = ext4_li_request_new(sb, first_not_zeroed); - if (!elr) { - ret = -ENOMEM; - goto out; - } + if (!elr) + return -ENOMEM; mutex_lock(&ext4_li_mtx); @@ -2944,14 +2942,10 @@ static int ext4_register_li_request(struct super_block *sb, if (ret) goto out; } - - mutex_unlock(&ext4_li_mtx); - out: - if (ret) { - mutex_unlock(&ext4_li_mtx); + mutex_unlock(&ext4_li_mtx); + if (ret) kfree(elr); - } return ret; } -- cgit v1.2.2