aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2013-02-04 15:08:40 -0500
committerTheodore Ts'o <tytso@mit.edu>2013-02-04 15:08:40 -0500
commit40ae3487628235e5f1eb27542cca0cdb6e5dbe16 (patch)
tree205ec6dad9b2d3f41d5a78f4ac2fa071bae14c25 /fs/ext4/mballoc.c
parent8dc0aa8cf0f7b51e6c7c342e6f1e61520fb94222 (diff)
ext4: optimize mballoc for large allocations
The ext4 block allocator only maintains buddy bitmaps for chunks which are less than or equal to one quarter of a block group. That is, for a file aystem with a 1k blocksize, and where the number of blocks in a block group is 8192 blocks, the largest chunk size tracked by buddy bitmaps is 2048 blocks. For a file system with a 4k blocksize, and where the number of blocks in a block group is 32768 blocks, the largest chunk size tracked by buddy bitmaps is 8192 blocks. To work around this code, mballoc.c before this commit would truncate allocation requests to the number of blocks in a block group minus 10. Why 10? Aside from being a completely arbitrary number, it avoids block allocation to be a power of two larger than 25% of the block group. If you try to explicitly fallocate 50% of the block group size, this will demonstrate the problem; the block allocation code will scan the all of the blocks in the file system with cr==0 (since the request is for a natural power of two), but then completely fail for all blocks groups, since the buddy bitmaps don't track chunk sizes of 50% of the block group. To fix this, in these we use ext4_mb_complex_scan_group() instead of ext4_mb_simple_scan_group(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: Andreas Dilger <adilger@dilger.ca>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c16
1 files changed, 10 insertions, 6 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 061727acd990..e350885aec30 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1884,15 +1884,19 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1884 case 0: 1884 case 0:
1885 BUG_ON(ac->ac_2order == 0); 1885 BUG_ON(ac->ac_2order == 0);
1886 1886
1887 if (grp->bb_largest_free_order < ac->ac_2order)
1888 return 0;
1889
1890 /* Avoid using the first bg of a flexgroup for data files */ 1887 /* Avoid using the first bg of a flexgroup for data files */
1891 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1888 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1892 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && 1889 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1893 ((group % flex_size) == 0)) 1890 ((group % flex_size) == 0))
1894 return 0; 1891 return 0;
1895 1892
1893 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
1894 (free / fragments) >= ac->ac_g_ex.fe_len)
1895 return 1;
1896
1897 if (grp->bb_largest_free_order < ac->ac_2order)
1898 return 0;
1899
1896 return 1; 1900 return 1;
1897 case 1: 1901 case 1:
1898 if ((free / fragments) >= ac->ac_g_ex.fe_len) 1902 if ((free / fragments) >= ac->ac_g_ex.fe_len)
@@ -2007,7 +2011,7 @@ repeat:
2007 } 2011 }
2008 2012
2009 ac->ac_groups_scanned++; 2013 ac->ac_groups_scanned++;
2010 if (cr == 0) 2014 if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
2011 ext4_mb_simple_scan_group(ac, &e4b); 2015 ext4_mb_simple_scan_group(ac, &e4b);
2012 else if (cr == 1 && sbi->s_stripe && 2016 else if (cr == 1 && sbi->s_stripe &&
2013 !(ac->ac_g_ex.fe_len % sbi->s_stripe)) 2017 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
@@ -4005,8 +4009,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4005 len = ar->len; 4009 len = ar->len;
4006 4010
4007 /* just a dirty hack to filter too big requests */ 4011 /* just a dirty hack to filter too big requests */
4008 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10) 4012 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4009 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10; 4013 len = EXT4_CLUSTERS_PER_GROUP(sb);
4010 4014
4011 /* start searching from the goal */ 4015 /* start searching from the goal */
4012 goal = ar->goal; 4016 goal = ar->goal;