diff options
author | Theodore Ts'o <tytso@mit.edu> | 2009-05-01 12:58:36 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-05-01 12:58:36 -0400 |
commit | 75507efb1372b6acf1aa6bf00ebd49ce196fd994 (patch) | |
tree | ed2455cbd29b444e8cfc1651b51c6d7a34b6b01f | |
parent | 32ed5058ce90024efcd811254b4b1de0468099df (diff) |
ext4: Don't avoid using BLOCK_UNINIT block groups in mballoc
By avoiding the use of not-yet-used block groups (i.e., block groups
with the BLOCK_UNINIT flag), mballoc had a tendency to create large
files with large non-contiguous gaps. In addition avoiding the use of
new block groups had a tendency to push regular file data into the
first block group in a flex_bg group, which slows down the speed of
e2fsck pass 2, since it has a tendency to seek much more. For
example:
Before Patch After Patch
Time in seconds Time in seconds
Real / User/ Sys MB/s Real / User/ Sys MB/s
Pass 1 8.52 / 2.21 / 0.46 20.43 8.84 / 4.97 / 1.11 19.68
Pass 2 21.16 / 1.02 / 1.86 11.30 6.54 / 1.77 / 1.78 36.39
Pass 3 0.01 / 0.00 / 0.00 139.00 0.01 / 0.01 / 0.00 128.90
Pass 4 0.16 / 0.15 / 0.00 0.00 0.17 / 0.17 / 0.00 0.00
Pass 5 2.52 / 1.99 / 0.09 0.79 2.31 / 1.78 / 0.06 0.86
Total 32.40 / 5.11 / 2.49 12.81 17.99 / 8.75 / 2.98 23.01
This was on a sample 80 gig root filesystem which was approximately
50% full. Note the improved e2fsck pass 2 performance, by over a
factor of 3, due to a decreased number of seeks. (The total amount of
I/O in pass 2 was unchanged; the layout of the directory blocks was
simply much better from e2fsck's's perspective.)
Other changes as a result of this patch on this sample filesystem:
Before Patch After Patch
# of non-contig files 762 779
# of non-contig directories 571 570
# of BLOCK_UNINIT bg's 307 293
# of INODE_UNINIT bg's 503 503
Out of 640 block groups, of which 333 were in use, this patch caused
an extra 14 block groups to be utilized. The number of non-contiguous
files did go up slightly, but when measured against the 99.9% of the
files (603,154) which were contiguously allocated, this is pretty
insignificant.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andreas Dilger <adilger@sun.com>
-rw-r--r-- | fs/ext4/mballoc.c | 9 |
1 files changed, 1 insertions, 8 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c3af9e6b6668..dbd47eac13ec 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -1728,7 +1728,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1728 | unsigned free, fragments; | 1728 | unsigned free, fragments; |
1729 | unsigned i, bits; | 1729 | unsigned i, bits; |
1730 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); | 1730 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); |
1731 | struct ext4_group_desc *desc; | ||
1732 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | 1731 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
1733 | 1732 | ||
1734 | BUG_ON(cr < 0 || cr >= 4); | 1733 | BUG_ON(cr < 0 || cr >= 4); |
@@ -1744,10 +1743,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1744 | switch (cr) { | 1743 | switch (cr) { |
1745 | case 0: | 1744 | case 0: |
1746 | BUG_ON(ac->ac_2order == 0); | 1745 | BUG_ON(ac->ac_2order == 0); |
1747 | /* If this group is uninitialized, skip it initially */ | ||
1748 | desc = ext4_get_group_desc(ac->ac_sb, group, NULL); | ||
1749 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1750 | return 0; | ||
1751 | 1746 | ||
1752 | /* Avoid using the first bg of a flexgroup for data files */ | 1747 | /* Avoid using the first bg of a flexgroup for data files */ |
1753 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && | 1748 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && |
@@ -2067,9 +2062,7 @@ repeat: | |||
2067 | 2062 | ||
2068 | ac->ac_groups_scanned++; | 2063 | ac->ac_groups_scanned++; |
2069 | desc = ext4_get_group_desc(sb, group, NULL); | 2064 | desc = ext4_get_group_desc(sb, group, NULL); |
2070 | if (cr == 0 || (desc->bg_flags & | 2065 | if (cr == 0) |
2071 | cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && | ||
2072 | ac->ac_2order != 0)) | ||
2073 | ext4_mb_simple_scan_group(ac, &e4b); | 2066 | ext4_mb_simple_scan_group(ac, &e4b); |
2074 | else if (cr == 1 && | 2067 | else if (cr == 1 && |
2075 | ac->ac_g_ex.fe_len == sbi->s_stripe) | 2068 | ac->ac_g_ex.fe_len == sbi->s_stripe) |