aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
authorAndreas Dilger <adilger@clusterfs.com>2007-10-16 18:38:25 -0400
committerTheodore Ts'o <tytso@mit.edu>2007-10-17 18:50:00 -0400
commit717d50e4971b81b96c0199c91cdf0039a8cb181a (patch)
treea8d68edbc1f064c76cbfee206e093d2c86c80ba0 /fs/ext4/super.c
parent4074fe3736b1a43431dff870bf9055ac5dcf3f03 (diff)
Ext4: Uninitialized Block Groups
In pass1 of e2fsck, every inode table in the fileystem is scanned and checked, regardless of whether it is in use. This is this the most time consuming part of the filesystem check. The unintialized block group feature can greatly reduce e2fsck time by eliminating checking of uninitialized inodes. With this feature, there is a a high water mark of used inodes for each block group. Block and inode bitmaps can be uninitialized on disk via a flag in the group descriptor to avoid reading or scanning them at e2fsck time. A checksum of each group descriptor is used to ensure that corruption in the group descriptor's bit flags does not cause incorrect operation. The feature is enabled through a mkfs option mke2fs /dev/ -O uninit_groups A patch adding support for uninitialized block groups to e2fsprogs tools has been posted to the linux-ext4 mailing list. The patches have been stress tested with fsstress and fsx. In performance tests testing e2fsck time, we have seen that e2fsck time on ext3 grows linearly with the total number of inodes in the filesytem. In ext4 with the uninitialized block groups feature, the e2fsck time is constant, based solely on the number of used inodes rather than the total inode count. Since typical ext4 filesystems only use 1-10% of their inodes, this feature can greatly reduce e2fsck time for users. With performance improvement of 2-20 times, depending on how full the filesystem is. The attached graph shows the major improvements in e2fsck times in filesystems with a large total inode count, but few inodes in use. In each group descriptor if we have EXT4_BG_INODE_UNINIT set in bg_flags: Inode table is not initialized/used in this group. So we can skip the consistency check during fsck. EXT4_BG_BLOCK_UNINIT set in bg_flags: No block in the group is used. So we can skip the block bitmap verification for this group. We also add two new fields to group descriptor as a part of uninitialized group patch. __le16 bg_itable_unused; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ bg_itable_unused: If we have EXT4_BG_INODE_UNINIT not set in bg_flags then bg_itable_unused will give the offset within the inode table till the inodes are used. This can be used by fsck to skip list of inodes that are marked unused. bg_checksum: Now that we depend on bg_flags and bg_itable_unused to determine the block and inode usage, we need to make sure group descriptor is not corrupt. We add checksum to group descriptor to detect corruption. If the descriptor is found to be corrupt, we mark all the blocks and inodes in the group used. Signed-off-by: Avantika Mathur <mathur@us.ibm.com> Signed-off-by: Andreas Dilger <adilger@clusterfs.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e2bdf93693a6..dd4ff9c87358 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,12 +37,14 @@
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/seq_file.h> 38#include <linux/seq_file.h>
39#include <linux/log2.h> 39#include <linux/log2.h>
40#include <linux/crc16.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42 43
43#include "xattr.h" 44#include "xattr.h"
44#include "acl.h" 45#include "acl.h"
45#include "namei.h" 46#include "namei.h"
47#include "group.h"
46 48
47static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 49static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
48 unsigned long journal_devnum); 50 unsigned long journal_devnum);
@@ -1308,6 +1310,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1308 return res; 1310 return res;
1309} 1311}
1310 1312
1313__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1314 struct ext4_group_desc *gdp)
1315{
1316 __u16 crc = 0;
1317
1318 if (sbi->s_es->s_feature_ro_compat &
1319 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1320 int offset = offsetof(struct ext4_group_desc, bg_checksum);
1321 __le32 le_group = cpu_to_le32(block_group);
1322
1323 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1324 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1325 crc = crc16(crc, (__u8 *)gdp, offset);
1326 offset += sizeof(gdp->bg_checksum); /* skip checksum */
1327 /* for checksum of struct ext4_group_desc do the rest...*/
1328 if ((sbi->s_es->s_feature_incompat &
1329 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1330 offset < le16_to_cpu(sbi->s_es->s_desc_size))
1331 crc = crc16(crc, (__u8 *)gdp + offset,
1332 le16_to_cpu(sbi->s_es->s_desc_size) -
1333 offset);
1334 }
1335
1336 return cpu_to_le16(crc);
1337}
1338
1339int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1340 struct ext4_group_desc *gdp)
1341{
1342 if ((sbi->s_es->s_feature_ro_compat &
1343 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1344 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1345 return 0;
1346
1347 return 1;
1348}
1349
1311/* Called at mount-time, super-block is locked */ 1350/* Called at mount-time, super-block is locked */
1312static int ext4_check_descriptors (struct super_block * sb) 1351static int ext4_check_descriptors (struct super_block * sb)
1313{ 1352{
@@ -1362,6 +1401,14 @@ static int ext4_check_descriptors (struct super_block * sb)
1362 i, inode_table); 1401 i, inode_table);
1363 return 0; 1402 return 0;
1364 } 1403 }
1404 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1405 ext4_error(sb, __FUNCTION__,
1406 "Checksum for group %d failed (%u!=%u)\n", i,
1407 le16_to_cpu(ext4_group_desc_csum(sbi, i,
1408 gdp)),
1409 le16_to_cpu(gdp->bg_checksum));
1410 return 0;
1411 }
1365 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1412 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1366 gdp = (struct ext4_group_desc *) 1413 gdp = (struct ext4_group_desc *)
1367 ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); 1414 ((__u8 *)gdp + EXT4_DESC_SIZE(sb));