aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c379
1 files changed, 324 insertions, 55 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1ca0f546c466..055a0cd0168e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -373,6 +373,66 @@ void ext4_update_dynamic_rev(struct super_block *sb)
373 */ 373 */
374} 374}
375 375
376int ext4_update_compat_feature(handle_t *handle,
377 struct super_block *sb, __u32 compat)
378{
379 int err = 0;
380 if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
381 err = ext4_journal_get_write_access(handle,
382 EXT4_SB(sb)->s_sbh);
383 if (err)
384 return err;
385 EXT4_SET_COMPAT_FEATURE(sb, compat);
386 sb->s_dirt = 1;
387 handle->h_sync = 1;
388 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
389 "call ext4_journal_dirty_met adata");
390 err = ext4_journal_dirty_metadata(handle,
391 EXT4_SB(sb)->s_sbh);
392 }
393 return err;
394}
395
396int ext4_update_rocompat_feature(handle_t *handle,
397 struct super_block *sb, __u32 rocompat)
398{
399 int err = 0;
400 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
401 err = ext4_journal_get_write_access(handle,
402 EXT4_SB(sb)->s_sbh);
403 if (err)
404 return err;
405 EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
406 sb->s_dirt = 1;
407 handle->h_sync = 1;
408 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
409 "call ext4_journal_dirty_met adata");
410 err = ext4_journal_dirty_metadata(handle,
411 EXT4_SB(sb)->s_sbh);
412 }
413 return err;
414}
415
416int ext4_update_incompat_feature(handle_t *handle,
417 struct super_block *sb, __u32 incompat)
418{
419 int err = 0;
420 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
421 err = ext4_journal_get_write_access(handle,
422 EXT4_SB(sb)->s_sbh);
423 if (err)
424 return err;
425 EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
426 sb->s_dirt = 1;
427 handle->h_sync = 1;
428 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
429 "call ext4_journal_dirty_met adata");
430 err = ext4_journal_dirty_metadata(handle,
431 EXT4_SB(sb)->s_sbh);
432 }
433 return err;
434}
435
376/* 436/*
377 * Open the external journal device 437 * Open the external journal device
378 */ 438 */
@@ -443,6 +503,7 @@ static void ext4_put_super (struct super_block * sb)
443 struct ext4_super_block *es = sbi->s_es; 503 struct ext4_super_block *es = sbi->s_es;
444 int i; 504 int i;
445 505
506 ext4_mb_release(sb);
446 ext4_ext_release(sb); 507 ext4_ext_release(sb);
447 ext4_xattr_put_super(sb); 508 ext4_xattr_put_super(sb);
448 jbd2_journal_destroy(sbi->s_journal); 509 jbd2_journal_destroy(sbi->s_journal);
@@ -509,6 +570,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
509 ei->i_block_alloc_info = NULL; 570 ei->i_block_alloc_info = NULL;
510 ei->vfs_inode.i_version = 1; 571 ei->vfs_inode.i_version = 1;
511 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 572 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
573 INIT_LIST_HEAD(&ei->i_prealloc_list);
574 spin_lock_init(&ei->i_prealloc_lock);
512 return &ei->vfs_inode; 575 return &ei->vfs_inode;
513} 576}
514 577
@@ -533,7 +596,7 @@ static void init_once(struct kmem_cache *cachep, void *foo)
533#ifdef CONFIG_EXT4DEV_FS_XATTR 596#ifdef CONFIG_EXT4DEV_FS_XATTR
534 init_rwsem(&ei->xattr_sem); 597 init_rwsem(&ei->xattr_sem);
535#endif 598#endif
536 mutex_init(&ei->truncate_mutex); 599 init_rwsem(&ei->i_data_sem);
537 inode_init_once(&ei->vfs_inode); 600 inode_init_once(&ei->vfs_inode);
538} 601}
539 602
@@ -605,18 +668,20 @@ static inline void ext4_show_quota_options(struct seq_file *seq, struct super_bl
605 */ 668 */
606static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 669static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
607{ 670{
671 int def_errors;
672 unsigned long def_mount_opts;
608 struct super_block *sb = vfs->mnt_sb; 673 struct super_block *sb = vfs->mnt_sb;
609 struct ext4_sb_info *sbi = EXT4_SB(sb); 674 struct ext4_sb_info *sbi = EXT4_SB(sb);
610 struct ext4_super_block *es = sbi->s_es; 675 struct ext4_super_block *es = sbi->s_es;
611 unsigned long def_mount_opts;
612 676
613 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 677 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
678 def_errors = le16_to_cpu(es->s_errors);
614 679
615 if (sbi->s_sb_block != 1) 680 if (sbi->s_sb_block != 1)
616 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 681 seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
617 if (test_opt(sb, MINIX_DF)) 682 if (test_opt(sb, MINIX_DF))
618 seq_puts(seq, ",minixdf"); 683 seq_puts(seq, ",minixdf");
619 if (test_opt(sb, GRPID)) 684 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
620 seq_puts(seq, ",grpid"); 685 seq_puts(seq, ",grpid");
621 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 686 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
622 seq_puts(seq, ",nogrpid"); 687 seq_puts(seq, ",nogrpid");
@@ -628,34 +693,33 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
628 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 693 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
629 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 694 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
630 } 695 }
631 if (test_opt(sb, ERRORS_CONT)) { 696 if (test_opt(sb, ERRORS_RO)) {
632 int def_errors = le16_to_cpu(es->s_errors);
633
634 if (def_errors == EXT4_ERRORS_PANIC || 697 if (def_errors == EXT4_ERRORS_PANIC ||
635 def_errors == EXT4_ERRORS_RO) { 698 def_errors == EXT4_ERRORS_CONTINUE) {
636 seq_puts(seq, ",errors=continue"); 699 seq_puts(seq, ",errors=remount-ro");
637 } 700 }
638 } 701 }
639 if (test_opt(sb, ERRORS_RO)) 702 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
640 seq_puts(seq, ",errors=remount-ro"); 703 seq_puts(seq, ",errors=continue");
641 if (test_opt(sb, ERRORS_PANIC)) 704 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
642 seq_puts(seq, ",errors=panic"); 705 seq_puts(seq, ",errors=panic");
643 if (test_opt(sb, NO_UID32)) 706 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
644 seq_puts(seq, ",nouid32"); 707 seq_puts(seq, ",nouid32");
645 if (test_opt(sb, DEBUG)) 708 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
646 seq_puts(seq, ",debug"); 709 seq_puts(seq, ",debug");
647 if (test_opt(sb, OLDALLOC)) 710 if (test_opt(sb, OLDALLOC))
648 seq_puts(seq, ",oldalloc"); 711 seq_puts(seq, ",oldalloc");
649#ifdef CONFIG_EXT4_FS_XATTR 712#ifdef CONFIG_EXT4DEV_FS_XATTR
650 if (test_opt(sb, XATTR_USER)) 713 if (test_opt(sb, XATTR_USER) &&
714 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
651 seq_puts(seq, ",user_xattr"); 715 seq_puts(seq, ",user_xattr");
652 if (!test_opt(sb, XATTR_USER) && 716 if (!test_opt(sb, XATTR_USER) &&
653 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 717 (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
654 seq_puts(seq, ",nouser_xattr"); 718 seq_puts(seq, ",nouser_xattr");
655 } 719 }
656#endif 720#endif
657#ifdef CONFIG_EXT4_FS_POSIX_ACL 721#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
658 if (test_opt(sb, POSIX_ACL)) 722 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
659 seq_puts(seq, ",acl"); 723 seq_puts(seq, ",acl");
660 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 724 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
661 seq_puts(seq, ",noacl"); 725 seq_puts(seq, ",noacl");
@@ -672,7 +736,17 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
672 seq_puts(seq, ",nobh"); 736 seq_puts(seq, ",nobh");
673 if (!test_opt(sb, EXTENTS)) 737 if (!test_opt(sb, EXTENTS))
674 seq_puts(seq, ",noextents"); 738 seq_puts(seq, ",noextents");
739 if (!test_opt(sb, MBALLOC))
740 seq_puts(seq, ",nomballoc");
741 if (test_opt(sb, I_VERSION))
742 seq_puts(seq, ",i_version");
675 743
744 if (sbi->s_stripe)
745 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
746 /*
747 * journal mode get enabled in different ways
748 * So just print the value even if we didn't specify it
749 */
676 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 750 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
677 seq_puts(seq, ",data=journal"); 751 seq_puts(seq, ",data=journal");
678 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 752 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@@ -681,7 +755,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
681 seq_puts(seq, ",data=writeback"); 755 seq_puts(seq, ",data=writeback");
682 756
683 ext4_show_quota_options(seq, sb); 757 ext4_show_quota_options(seq, sb);
684
685 return 0; 758 return 0;
686} 759}
687 760
@@ -809,11 +882,13 @@ enum {
809 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 882 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
810 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 883 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
811 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 884 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
885 Opt_journal_checksum, Opt_journal_async_commit,
812 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 886 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
813 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 887 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
814 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 888 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
815 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 889 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
816 Opt_grpquota, Opt_extents, Opt_noextents, 890 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
891 Opt_mballoc, Opt_nomballoc, Opt_stripe,
817}; 892};
818 893
819static match_table_t tokens = { 894static match_table_t tokens = {
@@ -848,6 +923,8 @@ static match_table_t tokens = {
848 {Opt_journal_update, "journal=update"}, 923 {Opt_journal_update, "journal=update"},
849 {Opt_journal_inum, "journal=%u"}, 924 {Opt_journal_inum, "journal=%u"},
850 {Opt_journal_dev, "journal_dev=%u"}, 925 {Opt_journal_dev, "journal_dev=%u"},
926 {Opt_journal_checksum, "journal_checksum"},
927 {Opt_journal_async_commit, "journal_async_commit"},
851 {Opt_abort, "abort"}, 928 {Opt_abort, "abort"},
852 {Opt_data_journal, "data=journal"}, 929 {Opt_data_journal, "data=journal"},
853 {Opt_data_ordered, "data=ordered"}, 930 {Opt_data_ordered, "data=ordered"},
@@ -865,6 +942,10 @@ static match_table_t tokens = {
865 {Opt_barrier, "barrier=%u"}, 942 {Opt_barrier, "barrier=%u"},
866 {Opt_extents, "extents"}, 943 {Opt_extents, "extents"},
867 {Opt_noextents, "noextents"}, 944 {Opt_noextents, "noextents"},
945 {Opt_i_version, "i_version"},
946 {Opt_mballoc, "mballoc"},
947 {Opt_nomballoc, "nomballoc"},
948 {Opt_stripe, "stripe=%u"},
868 {Opt_err, NULL}, 949 {Opt_err, NULL},
869 {Opt_resize, "resize"}, 950 {Opt_resize, "resize"},
870}; 951};
@@ -1035,6 +1116,13 @@ static int parse_options (char *options, struct super_block *sb,
1035 return 0; 1116 return 0;
1036 *journal_devnum = option; 1117 *journal_devnum = option;
1037 break; 1118 break;
1119 case Opt_journal_checksum:
1120 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1121 break;
1122 case Opt_journal_async_commit:
1123 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1124 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1125 break;
1038 case Opt_noload: 1126 case Opt_noload:
1039 set_opt (sbi->s_mount_opt, NOLOAD); 1127 set_opt (sbi->s_mount_opt, NOLOAD);
1040 break; 1128 break;
@@ -1203,6 +1291,23 @@ clear_qf_name:
1203 case Opt_noextents: 1291 case Opt_noextents:
1204 clear_opt (sbi->s_mount_opt, EXTENTS); 1292 clear_opt (sbi->s_mount_opt, EXTENTS);
1205 break; 1293 break;
1294 case Opt_i_version:
1295 set_opt(sbi->s_mount_opt, I_VERSION);
1296 sb->s_flags |= MS_I_VERSION;
1297 break;
1298 case Opt_mballoc:
1299 set_opt(sbi->s_mount_opt, MBALLOC);
1300 break;
1301 case Opt_nomballoc:
1302 clear_opt(sbi->s_mount_opt, MBALLOC);
1303 break;
1304 case Opt_stripe:
1305 if (match_int(&args[0], &option))
1306 return 0;
1307 if (option < 0)
1308 return 0;
1309 sbi->s_stripe = option;
1310 break;
1206 default: 1311 default:
1207 printk (KERN_ERR 1312 printk (KERN_ERR
1208 "EXT4-fs: Unrecognized mount option \"%s\" " 1313 "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1364,7 +1469,7 @@ static int ext4_check_descriptors (struct super_block * sb)
1364 struct ext4_group_desc * gdp = NULL; 1469 struct ext4_group_desc * gdp = NULL;
1365 int desc_block = 0; 1470 int desc_block = 0;
1366 int flexbg_flag = 0; 1471 int flexbg_flag = 0;
1367 int i; 1472 ext4_group_t i;
1368 1473
1369 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1474 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1370 flexbg_flag = 1; 1475 flexbg_flag = 1;
@@ -1386,7 +1491,7 @@ static int ext4_check_descriptors (struct super_block * sb)
1386 if (block_bitmap < first_block || block_bitmap > last_block) 1491 if (block_bitmap < first_block || block_bitmap > last_block)
1387 { 1492 {
1388 ext4_error (sb, "ext4_check_descriptors", 1493 ext4_error (sb, "ext4_check_descriptors",
1389 "Block bitmap for group %d" 1494 "Block bitmap for group %lu"
1390 " not in group (block %llu)!", 1495 " not in group (block %llu)!",
1391 i, block_bitmap); 1496 i, block_bitmap);
1392 return 0; 1497 return 0;
@@ -1395,7 +1500,7 @@ static int ext4_check_descriptors (struct super_block * sb)
1395 if (inode_bitmap < first_block || inode_bitmap > last_block) 1500 if (inode_bitmap < first_block || inode_bitmap > last_block)
1396 { 1501 {
1397 ext4_error (sb, "ext4_check_descriptors", 1502 ext4_error (sb, "ext4_check_descriptors",
1398 "Inode bitmap for group %d" 1503 "Inode bitmap for group %lu"
1399 " not in group (block %llu)!", 1504 " not in group (block %llu)!",
1400 i, inode_bitmap); 1505 i, inode_bitmap);
1401 return 0; 1506 return 0;
@@ -1405,17 +1510,16 @@ static int ext4_check_descriptors (struct super_block * sb)
1405 inode_table + sbi->s_itb_per_group - 1 > last_block) 1510 inode_table + sbi->s_itb_per_group - 1 > last_block)
1406 { 1511 {
1407 ext4_error (sb, "ext4_check_descriptors", 1512 ext4_error (sb, "ext4_check_descriptors",
1408 "Inode table for group %d" 1513 "Inode table for group %lu"
1409 " not in group (block %llu)!", 1514 " not in group (block %llu)!",
1410 i, inode_table); 1515 i, inode_table);
1411 return 0; 1516 return 0;
1412 } 1517 }
1413 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1518 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1414 ext4_error(sb, __FUNCTION__, 1519 ext4_error(sb, __FUNCTION__,
1415 "Checksum for group %d failed (%u!=%u)\n", i, 1520 "Checksum for group %lu failed (%u!=%u)\n",
1416 le16_to_cpu(ext4_group_desc_csum(sbi, i, 1521 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1417 gdp)), 1522 gdp)), le16_to_cpu(gdp->bg_checksum));
1418 le16_to_cpu(gdp->bg_checksum));
1419 return 0; 1523 return 0;
1420 } 1524 }
1421 if (!flexbg_flag) 1525 if (!flexbg_flag)
@@ -1429,7 +1533,6 @@ static int ext4_check_descriptors (struct super_block * sb)
1429 return 1; 1533 return 1;
1430} 1534}
1431 1535
1432
1433/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1536/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1434 * the superblock) which were deleted from all directories, but held open by 1537 * the superblock) which were deleted from all directories, but held open by
1435 * a process at the time of a crash. We walk the list and try to delete these 1538 * a process at the time of a crash. We walk the list and try to delete these
@@ -1542,20 +1645,95 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1542#endif 1645#endif
1543 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1646 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1544} 1647}
1648/*
1649 * Maximal extent format file size.
1650 * Resulting logical blkno at s_maxbytes must fit in our on-disk
1651 * extent format containers, within a sector_t, and within i_blocks
1652 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
1653 * so that won't be a limiting factor.
1654 *
1655 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1656 */
1657static loff_t ext4_max_size(int blkbits)
1658{
1659 loff_t res;
1660 loff_t upper_limit = MAX_LFS_FILESIZE;
1661
1662 /* small i_blocks in vfs inode? */
1663 if (sizeof(blkcnt_t) < sizeof(u64)) {
1664 /*
1665 * CONFIG_LSF is not enabled implies the inode
1666 * i_block represent total blocks in 512 bytes
1667 * 32 == size of vfs inode i_blocks * 8
1668 */
1669 upper_limit = (1LL << 32) - 1;
1670
1671 /* total blocks in file system block size */
1672 upper_limit >>= (blkbits - 9);
1673 upper_limit <<= blkbits;
1674 }
1675
1676 /* 32-bit extent-start container, ee_block */
1677 res = 1LL << 32;
1678 res <<= blkbits;
1679 res -= 1;
1680
1681 /* Sanity check against vm- & vfs- imposed limits */
1682 if (res > upper_limit)
1683 res = upper_limit;
1684
1685 return res;
1686}
1545 1687
1546/* 1688/*
1547 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1689 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
1548 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1690 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1549 * We need to be 1 filesystem block less than the 2^32 sector limit. 1691 * We need to be 1 filesystem block less than the 2^48 sector limit.
1550 */ 1692 */
1551static loff_t ext4_max_size(int bits) 1693static loff_t ext4_max_bitmap_size(int bits)
1552{ 1694{
1553 loff_t res = EXT4_NDIR_BLOCKS; 1695 loff_t res = EXT4_NDIR_BLOCKS;
1554 /* This constant is calculated to be the largest file size for a 1696 int meta_blocks;
1555 * dense, 4k-blocksize file such that the total number of 1697 loff_t upper_limit;
1698 /* This is calculated to be the largest file size for a
1699 * dense, bitmapped file such that the total number of
1556 * sectors in the file, including data and all indirect blocks, 1700 * sectors in the file, including data and all indirect blocks,
1557 * does not exceed 2^32. */ 1701 * does not exceed 2^48 -1
1558 const loff_t upper_limit = 0x1ff7fffd000LL; 1702 * __u32 i_blocks_lo and _u16 i_blocks_high representing the
1703 * total number of 512 bytes blocks of the file
1704 */
1705
1706 if (sizeof(blkcnt_t) < sizeof(u64)) {
1707 /*
1708 * CONFIG_LSF is not enabled implies the inode
1709 * i_block represent total blocks in 512 bytes
1710 * 32 == size of vfs inode i_blocks * 8
1711 */
1712 upper_limit = (1LL << 32) - 1;
1713
1714 /* total blocks in file system block size */
1715 upper_limit >>= (bits - 9);
1716
1717 } else {
1718 /*
1719 * We use 48 bit ext4_inode i_blocks
1720 * With EXT4_HUGE_FILE_FL set the i_blocks
1721 * represent total number of blocks in
1722 * file system block size
1723 */
1724 upper_limit = (1LL << 48) - 1;
1725
1726 }
1727
1728 /* indirect blocks */
1729 meta_blocks = 1;
1730 /* double indirect blocks */
1731 meta_blocks += 1 + (1LL << (bits-2));
1732 /* tripple indirect blocks */
1733 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1734
1735 upper_limit -= meta_blocks;
1736 upper_limit <<= bits;
1559 1737
1560 res += 1LL << (bits-2); 1738 res += 1LL << (bits-2);
1561 res += 1LL << (2*(bits-2)); 1739 res += 1LL << (2*(bits-2));
@@ -1563,6 +1741,10 @@ static loff_t ext4_max_size(int bits)
1563 res <<= bits; 1741 res <<= bits;
1564 if (res > upper_limit) 1742 if (res > upper_limit)
1565 res = upper_limit; 1743 res = upper_limit;
1744
1745 if (res > MAX_LFS_FILESIZE)
1746 res = MAX_LFS_FILESIZE;
1747
1566 return res; 1748 return res;
1567} 1749}
1568 1750
@@ -1570,7 +1752,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1570 ext4_fsblk_t logical_sb_block, int nr) 1752 ext4_fsblk_t logical_sb_block, int nr)
1571{ 1753{
1572 struct ext4_sb_info *sbi = EXT4_SB(sb); 1754 struct ext4_sb_info *sbi = EXT4_SB(sb);
1573 unsigned long bg, first_meta_bg; 1755 ext4_group_t bg, first_meta_bg;
1574 int has_super = 0; 1756 int has_super = 0;
1575 1757
1576 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1758 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
@@ -1584,8 +1766,39 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1584 return (has_super + ext4_group_first_block_no(sb, bg)); 1766 return (has_super + ext4_group_first_block_no(sb, bg));
1585} 1767}
1586 1768
1769/**
1770 * ext4_get_stripe_size: Get the stripe size.
1771 * @sbi: In memory super block info
1772 *
1773 * If we have specified it via mount option, then
1774 * use the mount option value. If the value specified at mount time is
1775 * greater than the blocks per group use the super block value.
1776 * If the super block value is greater than blocks per group return 0.
1777 * Allocator needs it be less than blocks per group.
1778 *
1779 */
1780static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1781{
1782 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
1783 unsigned long stripe_width =
1784 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
1785
1786 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
1787 return sbi->s_stripe;
1788
1789 if (stripe_width <= sbi->s_blocks_per_group)
1790 return stripe_width;
1791
1792 if (stride <= sbi->s_blocks_per_group)
1793 return stride;
1794
1795 return 0;
1796}
1587 1797
1588static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1798static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1799 __releases(kernel_sem)
1800 __acquires(kernel_sem)
1801
1589{ 1802{
1590 struct buffer_head * bh; 1803 struct buffer_head * bh;
1591 struct ext4_super_block *es = NULL; 1804 struct ext4_super_block *es = NULL;
@@ -1599,7 +1812,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1599 unsigned long def_mount_opts; 1812 unsigned long def_mount_opts;
1600 struct inode *root; 1813 struct inode *root;
1601 int blocksize; 1814 int blocksize;
1602 int hblock;
1603 int db_count; 1815 int db_count;
1604 int i; 1816 int i;
1605 int needs_recovery; 1817 int needs_recovery;
@@ -1624,6 +1836,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1624 goto out_fail; 1836 goto out_fail;
1625 } 1837 }
1626 1838
1839 if (!sb_set_blocksize(sb, blocksize)) {
1840 printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
1841 goto out_fail;
1842 }
1843
1627 /* 1844 /*
1628 * The ext4 superblock will not be buffer aligned for other than 1kB 1845 * The ext4 superblock will not be buffer aligned for other than 1kB
1629 * block sizes. We need to calculate the offset from buffer start. 1846 * block sizes. We need to calculate the offset from buffer start.
@@ -1674,10 +1891,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1674 1891
1675 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 1892 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
1676 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1893 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1677 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) 1894 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
1678 set_opt(sbi->s_mount_opt, ERRORS_RO);
1679 else
1680 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1895 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1896 else
1897 set_opt(sbi->s_mount_opt, ERRORS_RO);
1681 1898
1682 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1899 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1683 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1900 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -1689,6 +1906,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1689 * User -o noextents to turn it off 1906 * User -o noextents to turn it off
1690 */ 1907 */
1691 set_opt(sbi->s_mount_opt, EXTENTS); 1908 set_opt(sbi->s_mount_opt, EXTENTS);
1909 /*
1910 * turn on mballoc feature by default in ext4 filesystem
1911 * User -o nomballoc to turn it off
1912 */
1913 set_opt(sbi->s_mount_opt, MBALLOC);
1692 1914
1693 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1915 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1694 NULL, 0)) 1916 NULL, 0))
@@ -1723,6 +1945,19 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1723 sb->s_id, le32_to_cpu(features)); 1945 sb->s_id, le32_to_cpu(features));
1724 goto failed_mount; 1946 goto failed_mount;
1725 } 1947 }
1948 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
1949 /*
1950 * Large file size enabled file system can only be
1951 * mount if kernel is build with CONFIG_LSF
1952 */
1953 if (sizeof(root->i_blocks) < sizeof(u64) &&
1954 !(sb->s_flags & MS_RDONLY)) {
1955 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
1956 "files cannot be mounted read-write "
1957 "without CONFIG_LSF.\n", sb->s_id);
1958 goto failed_mount;
1959 }
1960 }
1726 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1961 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1727 1962
1728 if (blocksize < EXT4_MIN_BLOCK_SIZE || 1963 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
@@ -1733,20 +1968,16 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1733 goto failed_mount; 1968 goto failed_mount;
1734 } 1969 }
1735 1970
1736 hblock = bdev_hardsect_size(sb->s_bdev);
1737 if (sb->s_blocksize != blocksize) { 1971 if (sb->s_blocksize != blocksize) {
1738 /* 1972
1739 * Make sure the blocksize for the filesystem is larger 1973 /* Validate the filesystem blocksize */
1740 * than the hardware sectorsize for the machine. 1974 if (!sb_set_blocksize(sb, blocksize)) {
1741 */ 1975 printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
1742 if (blocksize < hblock) { 1976 blocksize);
1743 printk(KERN_ERR "EXT4-fs: blocksize %d too small for "
1744 "device blocksize %d.\n", blocksize, hblock);
1745 goto failed_mount; 1977 goto failed_mount;
1746 } 1978 }
1747 1979
1748 brelse (bh); 1980 brelse (bh);
1749 sb_set_blocksize(sb, blocksize);
1750 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 1981 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
1751 offset = do_div(logical_sb_block, blocksize); 1982 offset = do_div(logical_sb_block, blocksize);
1752 bh = sb_bread(sb, logical_sb_block); 1983 bh = sb_bread(sb, logical_sb_block);
@@ -1764,6 +1995,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1764 } 1995 }
1765 } 1996 }
1766 1997
1998 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
1767 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); 1999 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
1768 2000
1769 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2001 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
@@ -1838,6 +2070,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1838 2070
1839 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2071 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
1840 goto cantfind_ext4; 2072 goto cantfind_ext4;
2073
2074 /* ensure blocks_count calculation below doesn't sign-extend */
2075 if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) <
2076 le32_to_cpu(es->s_first_data_block) + 1) {
2077 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, "
2078 "first data block %u, blocks per group %lu\n",
2079 ext4_blocks_count(es),
2080 le32_to_cpu(es->s_first_data_block),
2081 EXT4_BLOCKS_PER_GROUP(sb));
2082 goto failed_mount;
2083 }
1841 blocks_count = (ext4_blocks_count(es) - 2084 blocks_count = (ext4_blocks_count(es) -
1842 le32_to_cpu(es->s_first_data_block) + 2085 le32_to_cpu(es->s_first_data_block) +
1843 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2086 EXT4_BLOCKS_PER_GROUP(sb) - 1);
@@ -1900,6 +2143,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1900 sbi->s_rsv_window_head.rsv_goal_size = 0; 2143 sbi->s_rsv_window_head.rsv_goal_size = 0;
1901 ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); 2144 ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
1902 2145
2146 sbi->s_stripe = ext4_get_stripe_size(sbi);
2147
1903 /* 2148 /*
1904 * set up enough so that it can read an inode 2149 * set up enough so that it can read an inode
1905 */ 2150 */
@@ -1944,6 +2189,21 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1944 goto failed_mount4; 2189 goto failed_mount4;
1945 } 2190 }
1946 2191
2192 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2193 jbd2_journal_set_features(sbi->s_journal,
2194 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2195 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2196 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2197 jbd2_journal_set_features(sbi->s_journal,
2198 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2199 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2200 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2201 } else {
2202 jbd2_journal_clear_features(sbi->s_journal,
2203 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2204 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2205 }
2206
1947 /* We have now updated the journal if required, so we can 2207 /* We have now updated the journal if required, so we can
1948 * validate the data journaling mode. */ 2208 * validate the data journaling mode. */
1949 switch (test_opt(sb, DATA_FLAGS)) { 2209 switch (test_opt(sb, DATA_FLAGS)) {
@@ -2044,6 +2304,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2044 "writeback"); 2304 "writeback");
2045 2305
2046 ext4_ext_init(sb); 2306 ext4_ext_init(sb);
2307 ext4_mb_init(sb, needs_recovery);
2047 2308
2048 lock_kernel(); 2309 lock_kernel();
2049 return 0; 2310 return 0;
@@ -2673,7 +2934,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
2673 if (test_opt(sb, MINIX_DF)) { 2934 if (test_opt(sb, MINIX_DF)) {
2674 sbi->s_overhead_last = 0; 2935 sbi->s_overhead_last = 0;
2675 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 2936 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
2676 unsigned long ngroups = sbi->s_groups_count, i; 2937 ext4_group_t ngroups = sbi->s_groups_count, i;
2677 ext4_fsblk_t overhead = 0; 2938 ext4_fsblk_t overhead = 0;
2678 smp_rmb(); 2939 smp_rmb();
2679 2940
@@ -2909,7 +3170,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
2909 size_t len, loff_t off) 3170 size_t len, loff_t off)
2910{ 3171{
2911 struct inode *inode = sb_dqopt(sb)->files[type]; 3172 struct inode *inode = sb_dqopt(sb)->files[type];
2912 sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3173 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
2913 int err = 0; 3174 int err = 0;
2914 int offset = off & (sb->s_blocksize - 1); 3175 int offset = off & (sb->s_blocksize - 1);
2915 int tocopy; 3176 int tocopy;
@@ -2947,7 +3208,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
2947 const char *data, size_t len, loff_t off) 3208 const char *data, size_t len, loff_t off)
2948{ 3209{
2949 struct inode *inode = sb_dqopt(sb)->files[type]; 3210 struct inode *inode = sb_dqopt(sb)->files[type];
2950 sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3211 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
2951 int err = 0; 3212 int err = 0;
2952 int offset = off & (sb->s_blocksize - 1); 3213 int offset = off & (sb->s_blocksize - 1);
2953 int tocopy; 3214 int tocopy;
@@ -3002,7 +3263,6 @@ out:
3002 i_size_write(inode, off+len-towrite); 3263 i_size_write(inode, off+len-towrite);
3003 EXT4_I(inode)->i_disksize = inode->i_size; 3264 EXT4_I(inode)->i_disksize = inode->i_size;
3004 } 3265 }
3005 inode->i_version++;
3006 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3266 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3007 ext4_mark_inode_dirty(handle, inode); 3267 ext4_mark_inode_dirty(handle, inode);
3008 mutex_unlock(&inode->i_mutex); 3268 mutex_unlock(&inode->i_mutex);
@@ -3027,9 +3287,15 @@ static struct file_system_type ext4dev_fs_type = {
3027 3287
3028static int __init init_ext4_fs(void) 3288static int __init init_ext4_fs(void)
3029{ 3289{
3030 int err = init_ext4_xattr(); 3290 int err;
3291
3292 err = init_ext4_mballoc();
3031 if (err) 3293 if (err)
3032 return err; 3294 return err;
3295
3296 err = init_ext4_xattr();
3297 if (err)
3298 goto out2;
3033 err = init_inodecache(); 3299 err = init_inodecache();
3034 if (err) 3300 if (err)
3035 goto out1; 3301 goto out1;
@@ -3041,6 +3307,8 @@ out:
3041 destroy_inodecache(); 3307 destroy_inodecache();
3042out1: 3308out1:
3043 exit_ext4_xattr(); 3309 exit_ext4_xattr();
3310out2:
3311 exit_ext4_mballoc();
3044 return err; 3312 return err;
3045} 3313}
3046 3314
@@ -3049,6 +3317,7 @@ static void __exit exit_ext4_fs(void)
3049 unregister_filesystem(&ext4dev_fs_type); 3317 unregister_filesystem(&ext4dev_fs_type);
3050 destroy_inodecache(); 3318 destroy_inodecache();
3051 exit_ext4_xattr(); 3319 exit_ext4_xattr();
3320 exit_ext4_mballoc();
3052} 3321}
3053 3322
3054MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3323MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");