aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c126
1 files changed, 87 insertions, 39 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 203f9e4a70be..8553dfb310af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -54,9 +54,9 @@
54 54
55static struct proc_dir_entry *ext4_proc_root; 55static struct proc_dir_entry *ext4_proc_root;
56static struct kset *ext4_kset; 56static struct kset *ext4_kset;
57struct ext4_lazy_init *ext4_li_info; 57static struct ext4_lazy_init *ext4_li_info;
58struct mutex ext4_li_mtx; 58static struct mutex ext4_li_mtx;
59struct ext4_features *ext4_feat; 59static struct ext4_features *ext4_feat;
60 60
61static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 61static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
62 unsigned long journal_devnum); 62 unsigned long journal_devnum);
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb);
75static int ext4_freeze(struct super_block *sb); 75static int ext4_freeze(struct super_block *sb);
76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static int ext4_feature_set_ok(struct super_block *sb, int readonly);
78static void ext4_destroy_lazyinit_thread(void); 79static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 80static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void); 81static void ext4_clear_request_list(void);
@@ -241,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
241 * journal_end calls result in the superblock being marked dirty, so 242 * journal_end calls result in the superblock being marked dirty, so
242 * that sync() will call the filesystem's write_super callback if 243 * that sync() will call the filesystem's write_super callback if
243 * appropriate. 244 * appropriate.
245 *
246 * To avoid j_barrier hold in userspace when a user calls freeze(),
247 * ext4 prevents a new handle from being started by s_frozen, which
248 * is in an upper layer.
244 */ 249 */
245handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 250handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
246{ 251{
247 journal_t *journal; 252 journal_t *journal;
253 handle_t *handle;
248 254
249 if (sb->s_flags & MS_RDONLY) 255 if (sb->s_flags & MS_RDONLY)
250 return ERR_PTR(-EROFS); 256 return ERR_PTR(-EROFS);
251 257
252 vfs_check_frozen(sb, SB_FREEZE_TRANS);
253 /* Special case here: if the journal has aborted behind our
254 * backs (eg. EIO in the commit thread), then we still need to
255 * take the FS itself readonly cleanly. */
256 journal = EXT4_SB(sb)->s_journal; 258 journal = EXT4_SB(sb)->s_journal;
257 if (journal) { 259 handle = ext4_journal_current_handle();
258 if (is_journal_aborted(journal)) { 260
259 ext4_abort(sb, "Detected aborted journal"); 261 /*
260 return ERR_PTR(-EROFS); 262 * If a handle has been started, it should be allowed to
261 } 263 * finish, otherwise deadlock could happen between freeze
262 return jbd2_journal_start(journal, nblocks); 264 * and others(e.g. truncate) due to the restart of the
265 * journal handle if the filesystem is forzen and active
266 * handles are not stopped.
267 */
268 if (!handle)
269 vfs_check_frozen(sb, SB_FREEZE_TRANS);
270
271 if (!journal)
272 return ext4_get_nojournal();
273 /*
274 * Special case here: if the journal has aborted behind our
275 * backs (eg. EIO in the commit thread), then we still need to
276 * take the FS itself readonly cleanly.
277 */
278 if (is_journal_aborted(journal)) {
279 ext4_abort(sb, "Detected aborted journal");
280 return ERR_PTR(-EROFS);
263 } 281 }
264 return ext4_get_nojournal(); 282 return jbd2_journal_start(journal, nblocks);
265} 283}
266 284
267/* 285/*
@@ -594,7 +612,7 @@ __acquires(bitlock)
594 612
595 vaf.fmt = fmt; 613 vaf.fmt = fmt;
596 vaf.va = &args; 614 vaf.va = &args;
597 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", 615 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
598 sb->s_id, function, line, grp); 616 sb->s_id, function, line, grp);
599 if (ino) 617 if (ino)
600 printk(KERN_CONT "inode %lu: ", ino); 618 printk(KERN_CONT "inode %lu: ", ino);
@@ -616,7 +634,7 @@ __acquires(bitlock)
616 * filesystem will have already been marked read/only and the 634 * filesystem will have already been marked read/only and the
617 * journal has been aborted. We return 1 as a hint to callers 635 * journal has been aborted. We return 1 as a hint to callers
618 * who might what to use the return value from 636 * who might what to use the return value from
619 * ext4_grp_locked_error() to distinguish beween the 637 * ext4_grp_locked_error() to distinguish between the
620 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 638 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
621 * aggressively from the ext4 function in question, with a 639 * aggressively from the ext4 function in question, with a
622 * more appropriate error code. 640 * more appropriate error code.
@@ -997,13 +1015,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
997 if (test_opt(sb, OLDALLOC)) 1015 if (test_opt(sb, OLDALLOC))
998 seq_puts(seq, ",oldalloc"); 1016 seq_puts(seq, ",oldalloc");
999#ifdef CONFIG_EXT4_FS_XATTR 1017#ifdef CONFIG_EXT4_FS_XATTR
1000 if (test_opt(sb, XATTR_USER) && 1018 if (test_opt(sb, XATTR_USER))
1001 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
1002 seq_puts(seq, ",user_xattr"); 1019 seq_puts(seq, ",user_xattr");
1003 if (!test_opt(sb, XATTR_USER) && 1020 if (!test_opt(sb, XATTR_USER))
1004 (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
1005 seq_puts(seq, ",nouser_xattr"); 1021 seq_puts(seq, ",nouser_xattr");
1006 }
1007#endif 1022#endif
1008#ifdef CONFIG_EXT4_FS_POSIX_ACL 1023#ifdef CONFIG_EXT4_FS_POSIX_ACL
1009 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1024 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
@@ -1041,8 +1056,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1041 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1056 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1042 seq_puts(seq, ",nodelalloc"); 1057 seq_puts(seq, ",nodelalloc");
1043 1058
1044 if (test_opt(sb, MBLK_IO_SUBMIT)) 1059 if (!test_opt(sb, MBLK_IO_SUBMIT))
1045 seq_puts(seq, ",mblk_io_submit"); 1060 seq_puts(seq, ",nomblk_io_submit");
1046 if (sbi->s_stripe) 1061 if (sbi->s_stripe)
1047 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1062 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1048 /* 1063 /*
@@ -1451,7 +1466,7 @@ static int parse_options(char *options, struct super_block *sb,
1451 * Initialize args struct so we know whether arg was 1466 * Initialize args struct so we know whether arg was
1452 * found; some options take optional arguments. 1467 * found; some options take optional arguments.
1453 */ 1468 */
1454 args[0].to = args[0].from = 0; 1469 args[0].to = args[0].from = NULL;
1455 token = match_token(p, tokens, args); 1470 token = match_token(p, tokens, args);
1456 switch (token) { 1471 switch (token) {
1457 case Opt_bsd_df: 1472 case Opt_bsd_df:
@@ -1771,7 +1786,7 @@ set_qf_format:
1771 return 0; 1786 return 0;
1772 if (option < 0 || option > (1 << 30)) 1787 if (option < 0 || option > (1 << 30))
1773 return 0; 1788 return 0;
1774 if (!is_power_of_2(option)) { 1789 if (option && !is_power_of_2(option)) {
1775 ext4_msg(sb, KERN_ERR, 1790 ext4_msg(sb, KERN_ERR,
1776 "EXT4-fs: inode_readahead_blks" 1791 "EXT4-fs: inode_readahead_blks"
1777 " must be a power of 2"); 1792 " must be a power of 2");
@@ -2120,6 +2135,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2120 return; 2135 return;
2121 } 2136 }
2122 2137
2138 /* Check if feature set would not allow a r/w mount */
2139 if (!ext4_feature_set_ok(sb, 0)) {
2140 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2141 "unknown ROCOMPAT features");
2142 return;
2143 }
2144
2123 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2145 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2124 if (es->s_last_orphan) 2146 if (es->s_last_orphan)
2125 jbd_debug(1, "Errors on filesystem, " 2147 jbd_debug(1, "Errors on filesystem, "
@@ -2412,7 +2434,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2412 if (parse_strtoul(buf, 0x40000000, &t)) 2434 if (parse_strtoul(buf, 0x40000000, &t))
2413 return -EINVAL; 2435 return -EINVAL;
2414 2436
2415 if (!is_power_of_2(t)) 2437 if (t && !is_power_of_2(t))
2416 return -EINVAL; 2438 return -EINVAL;
2417 2439
2418 sbi->s_inode_readahead_blks = t; 2440 sbi->s_inode_readahead_blks = t;
@@ -2970,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
2970 mutex_unlock(&ext4_li_info->li_list_mtx); 2992 mutex_unlock(&ext4_li_info->li_list_mtx);
2971 2993
2972 sbi->s_li_request = elr; 2994 sbi->s_li_request = elr;
2995 /*
2996 * set elr to NULL here since it has been inserted to
2997 * the request_list and the removal and free of it is
2998 * handled by ext4_clear_request_list from now on.
2999 */
3000 elr = NULL;
2973 3001
2974 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3002 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2975 ret = ext4_run_lazyinit_thread(); 3003 ret = ext4_run_lazyinit_thread();
@@ -3095,14 +3123,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3095 } 3123 }
3096 if (def_mount_opts & EXT4_DEFM_UID16) 3124 if (def_mount_opts & EXT4_DEFM_UID16)
3097 set_opt(sb, NO_UID32); 3125 set_opt(sb, NO_UID32);
3126 /* xattr user namespace & acls are now defaulted on */
3098#ifdef CONFIG_EXT4_FS_XATTR 3127#ifdef CONFIG_EXT4_FS_XATTR
3099 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 3128 set_opt(sb, XATTR_USER);
3100 set_opt(sb, XATTR_USER);
3101#endif 3129#endif
3102#ifdef CONFIG_EXT4_FS_POSIX_ACL 3130#ifdef CONFIG_EXT4_FS_POSIX_ACL
3103 if (def_mount_opts & EXT4_DEFM_ACL) 3131 set_opt(sb, POSIX_ACL);
3104 set_opt(sb, POSIX_ACL);
3105#endif 3132#endif
3133 set_opt(sb, MBLK_IO_SUBMIT);
3106 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3134 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3107 set_opt(sb, JOURNAL_DATA); 3135 set_opt(sb, JOURNAL_DATA);
3108 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3136 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3380,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3380 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3408 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3381 spin_lock_init(&sbi->s_next_gen_lock); 3409 spin_lock_init(&sbi->s_next_gen_lock);
3382 3410
3411 init_timer(&sbi->s_err_report);
3412 sbi->s_err_report.function = print_daily_error_info;
3413 sbi->s_err_report.data = (unsigned long) sb;
3414
3383 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3415 err = percpu_counter_init(&sbi->s_freeblocks_counter,
3384 ext4_count_free_blocks(sb)); 3416 ext4_count_free_blocks(sb));
3385 if (!err) { 3417 if (!err) {
@@ -3516,7 +3548,7 @@ no_journal:
3516 * concurrency isn't really necessary. Limit it to 1. 3548 * concurrency isn't really necessary. Limit it to 1.
3517 */ 3549 */
3518 EXT4_SB(sb)->dio_unwritten_wq = 3550 EXT4_SB(sb)->dio_unwritten_wq =
3519 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1); 3551 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3520 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3552 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3521 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3553 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3522 goto failed_mount_wq; 3554 goto failed_mount_wq;
@@ -3531,17 +3563,16 @@ no_journal:
3531 if (IS_ERR(root)) { 3563 if (IS_ERR(root)) {
3532 ext4_msg(sb, KERN_ERR, "get root inode failed"); 3564 ext4_msg(sb, KERN_ERR, "get root inode failed");
3533 ret = PTR_ERR(root); 3565 ret = PTR_ERR(root);
3566 root = NULL;
3534 goto failed_mount4; 3567 goto failed_mount4;
3535 } 3568 }
3536 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 3569 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
3537 iput(root);
3538 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 3570 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
3539 goto failed_mount4; 3571 goto failed_mount4;
3540 } 3572 }
3541 sb->s_root = d_alloc_root(root); 3573 sb->s_root = d_alloc_root(root);
3542 if (!sb->s_root) { 3574 if (!sb->s_root) {
3543 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3575 ext4_msg(sb, KERN_ERR, "get root dentry failed");
3544 iput(root);
3545 ret = -ENOMEM; 3576 ret = -ENOMEM;
3546 goto failed_mount4; 3577 goto failed_mount4;
3547 } 3578 }
@@ -3642,9 +3673,6 @@ no_journal:
3642 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3673 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3643 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3674 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3644 3675
3645 init_timer(&sbi->s_err_report);
3646 sbi->s_err_report.function = print_daily_error_info;
3647 sbi->s_err_report.data = (unsigned long) sb;
3648 if (es->s_error_count) 3676 if (es->s_error_count)
3649 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3677 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3650 3678
@@ -3657,6 +3685,8 @@ cantfind_ext4:
3657 goto failed_mount; 3685 goto failed_mount;
3658 3686
3659failed_mount4: 3687failed_mount4:
3688 iput(root);
3689 sb->s_root = NULL;
3660 ext4_msg(sb, KERN_ERR, "mount failed"); 3690 ext4_msg(sb, KERN_ERR, "mount failed");
3661 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3691 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
3662failed_mount_wq: 3692failed_mount_wq:
@@ -3666,6 +3696,7 @@ failed_mount_wq:
3666 sbi->s_journal = NULL; 3696 sbi->s_journal = NULL;
3667 } 3697 }
3668failed_mount3: 3698failed_mount3:
3699 del_timer(&sbi->s_err_report);
3669 if (sbi->s_flex_groups) { 3700 if (sbi->s_flex_groups) {
3670 if (is_vmalloc_addr(sbi->s_flex_groups)) 3701 if (is_vmalloc_addr(sbi->s_flex_groups))
3671 vfree(sbi->s_flex_groups); 3702 vfree(sbi->s_flex_groups);
@@ -4132,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4132/* 4163/*
4133 * LVM calls this function before a (read-only) snapshot is created. This 4164 * LVM calls this function before a (read-only) snapshot is created. This
4134 * gives us a chance to flush the journal completely and mark the fs clean. 4165 * gives us a chance to flush the journal completely and mark the fs clean.
4166 *
4167 * Note that only this function cannot bring a filesystem to be in a clean
4168 * state independently, because ext4 prevents a new handle from being started
4169 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
4170 * the upper layer.
4135 */ 4171 */
4136static int ext4_freeze(struct super_block *sb) 4172static int ext4_freeze(struct super_block *sb)
4137{ 4173{
@@ -4608,17 +4644,30 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4608 4644
4609static int ext4_quota_off(struct super_block *sb, int type) 4645static int ext4_quota_off(struct super_block *sb, int type)
4610{ 4646{
4647 struct inode *inode = sb_dqopt(sb)->files[type];
4648 handle_t *handle;
4649
4611 /* Force all delayed allocation blocks to be allocated. 4650 /* Force all delayed allocation blocks to be allocated.
4612 * Caller already holds s_umount sem */ 4651 * Caller already holds s_umount sem */
4613 if (test_opt(sb, DELALLOC)) 4652 if (test_opt(sb, DELALLOC))
4614 sync_filesystem(sb); 4653 sync_filesystem(sb);
4615 4654
4655 /* Update modification times of quota files when userspace can
4656 * start looking at them */
4657 handle = ext4_journal_start(inode, 1);
4658 if (IS_ERR(handle))
4659 goto out;
4660 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4661 ext4_mark_inode_dirty(handle, inode);
4662 ext4_journal_stop(handle);
4663
4664out:
4616 return dquot_quota_off(sb, type); 4665 return dquot_quota_off(sb, type);
4617} 4666}
4618 4667
4619/* Read data from quotafile - avoid pagecache and such because we cannot afford 4668/* Read data from quotafile - avoid pagecache and such because we cannot afford
4620 * acquiring the locks... As quota files are never truncated and quota code 4669 * acquiring the locks... As quota files are never truncated and quota code
4621 * itself serializes the operations (and noone else should touch the files) 4670 * itself serializes the operations (and no one else should touch the files)
4622 * we don't have to be afraid of races */ 4671 * we don't have to be afraid of races */
4623static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 4672static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
4624 size_t len, loff_t off) 4673 size_t len, loff_t off)
@@ -4708,9 +4757,8 @@ out:
4708 if (inode->i_size < off + len) { 4757 if (inode->i_size < off + len) {
4709 i_size_write(inode, off + len); 4758 i_size_write(inode, off + len);
4710 EXT4_I(inode)->i_disksize = inode->i_size; 4759 EXT4_I(inode)->i_disksize = inode->i_size;
4760 ext4_mark_inode_dirty(handle, inode);
4711 } 4761 }
4712 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4713 ext4_mark_inode_dirty(handle, inode);
4714 mutex_unlock(&inode->i_mutex); 4762 mutex_unlock(&inode->i_mutex);
4715 return len; 4763 return len;
4716} 4764}