aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2009-05-01 12:58:36 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-05-01 12:58:36 -0400
commit75507efb1372b6acf1aa6bf00ebd49ce196fd994 (patch)
treeed2455cbd29b444e8cfc1651b51c6d7a34b6b01f
parent32ed5058ce90024efcd811254b4b1de0468099df (diff)
ext4: Don't avoid using BLOCK_UNINIT block groups in mballoc
By avoiding the use of not-yet-used block groups (i.e., block groups with the BLOCK_UNINIT flag), mballoc had a tendency to create large files with large non-contiguous gaps. In addition avoiding the use of new block groups had a tendency to push regular file data into the first block group in a flex_bg group, which slows down the speed of e2fsck pass 2, since it has a tendency to seek much more. For example: Before Patch After Patch Time in seconds Time in seconds Real / User/ Sys MB/s Real / User/ Sys MB/s Pass 1 8.52 / 2.21 / 0.46 20.43 8.84 / 4.97 / 1.11 19.68 Pass 2 21.16 / 1.02 / 1.86 11.30 6.54 / 1.77 / 1.78 36.39 Pass 3 0.01 / 0.00 / 0.00 139.00 0.01 / 0.01 / 0.00 128.90 Pass 4 0.16 / 0.15 / 0.00 0.00 0.17 / 0.17 / 0.00 0.00 Pass 5 2.52 / 1.99 / 0.09 0.79 2.31 / 1.78 / 0.06 0.86 Total 32.40 / 5.11 / 2.49 12.81 17.99 / 8.75 / 2.98 23.01 This was on a sample 80 gig root filesystem which was approximately 50% full. Note the improved e2fsck pass 2 performance, by over a factor of 3, due to a decreased number of seeks. (The total amount of I/O in pass 2 was unchanged; the layout of the directory blocks was simply much better from e2fsck's's perspective.) Other changes as a result of this patch on this sample filesystem: Before Patch After Patch # of non-contig files 762 779 # of non-contig directories 571 570 # of BLOCK_UNINIT bg's 307 293 # of INODE_UNINIT bg's 503 503 Out of 640 block groups, of which 333 were in use, this patch caused an extra 14 block groups to be utilized. The number of non-contiguous files did go up slightly, but when measured against the 99.9% of the files (603,154) which were contiguously allocated, this is pretty insignificant. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Andreas Dilger <adilger@sun.com>
-rw-r--r--fs/ext4/mballoc.c9
1 files changed, 1 insertions, 8 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c3af9e6b6668..dbd47eac13ec 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1728,7 +1728,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1728 unsigned free, fragments; 1728 unsigned free, fragments;
1729 unsigned i, bits; 1729 unsigned i, bits;
1730 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); 1730 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1731 struct ext4_group_desc *desc;
1732 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1731 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1733 1732
1734 BUG_ON(cr < 0 || cr >= 4); 1733 BUG_ON(cr < 0 || cr >= 4);
@@ -1744,10 +1743,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1744 switch (cr) { 1743 switch (cr) {
1745 case 0: 1744 case 0:
1746 BUG_ON(ac->ac_2order == 0); 1745 BUG_ON(ac->ac_2order == 0);
1747 /* If this group is uninitialized, skip it initially */
1748 desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
1749 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1750 return 0;
1751 1746
1752 /* Avoid using the first bg of a flexgroup for data files */ 1747 /* Avoid using the first bg of a flexgroup for data files */
1753 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1748 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -2067,9 +2062,7 @@ repeat:
2067 2062
2068 ac->ac_groups_scanned++; 2063 ac->ac_groups_scanned++;
2069 desc = ext4_get_group_desc(sb, group, NULL); 2064 desc = ext4_get_group_desc(sb, group, NULL);
2070 if (cr == 0 || (desc->bg_flags & 2065 if (cr == 0)
2071 cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
2072 ac->ac_2order != 0))
2073 ext4_mb_simple_scan_group(ac, &e4b); 2066 ext4_mb_simple_scan_group(ac, &e4b);
2074 else if (cr == 1 && 2067 else if (cr == 1 &&
2075 ac->ac_g_ex.fe_len == sbi->s_stripe) 2068 ac->ac_g_ex.fe_len == sbi->s_stripe)