aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Ehrenberg <dehrenberg@google.com>2011-07-17 21:11:30 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-07-17 21:11:30 -0400
commitd7a1fee135771e6e5185642bdc17df19bbdbcc48 (patch)
treec037586b0db8324bac4ef11bcd8b3b1a631bbdf2
parent265c6a0f9290c8f470b839257dc6af3c46b24da1 (diff)
ext4: make the preallocation size be a multiple of stripe size
Previously, if a stripe width was provided, then it would be used as the preallocation granularity, with no santiy checking and no way to override this. Now, mb_prealloc_size defaults to the smallest multiple of stripe size that is greater than or equal to the old default mb_prealloc_size, and this can be overridden with the sysfs interface. Signed-off-by: Dan Ehrenberg <dehrenberg@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/mballoc.c29
1 files changed, 20 insertions, 9 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b97a2d2f0fdf..037f680b76f9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -128,12 +128,13 @@
128 * we are doing a group prealloc we try to normalize the request to 128 * we are doing a group prealloc we try to normalize the request to
129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is 129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
130 * 512 blocks. This can be tuned via 130 * 512 blocks. This can be tuned via
131 * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in 131 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
132 * terms of number of blocks. If we have mounted the file system with -O 132 * terms of number of blocks. If we have mounted the file system with -O
133 * stripe=<value> option the group prealloc request is normalized to the 133 * stripe=<value> option the group prealloc request is normalized to the
134 * stripe value (sbi->s_stripe) 134 * the smallest multiple of the stripe value (sbi->s_stripe) which is
135 * greater than the default mb_group_prealloc.
135 * 136 *
136 * The regular allocator(using the buddy cache) supports few tunables. 137 * The regular allocator (using the buddy cache) supports a few tunables.
137 * 138 *
138 * /sys/fs/ext4/<partition>/mb_min_to_scan 139 * /sys/fs/ext4/<partition>/mb_min_to_scan
139 * /sys/fs/ext4/<partition>/mb_max_to_scan 140 * /sys/fs/ext4/<partition>/mb_max_to_scan
@@ -2474,6 +2475,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2474 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2475 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2475 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2476 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2476 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2477 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2478 /*
2479 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
2480 * to the lowest multiple of s_stripe which is bigger than
2481 * the s_mb_group_prealloc as determined above. We want
2482 * the preallocation size to be an exact multiple of the
2483 * RAID stripe size so that preallocations don't fragment
2484 * the stripes.
2485 */
2486 if (sbi->s_stripe > 1) {
2487 sbi->s_mb_group_prealloc = roundup(
2488 sbi->s_mb_group_prealloc, sbi->s_stripe);
2489 }
2477 2490
2478 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2491 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2479 if (sbi->s_locality_groups == NULL) { 2492 if (sbi->s_locality_groups == NULL) {
@@ -2841,8 +2854,9 @@ out_err:
2841 2854
2842/* 2855/*
2843 * here we normalize request for locality group 2856 * here we normalize request for locality group
2844 * Group request are normalized to s_strip size if we set the same via mount 2857 * Group request are normalized to s_mb_group_prealloc, which goes to
2845 * option. If not we set it to s_mb_group_prealloc which can be configured via 2858 * s_strip if we set the same via mount option.
2859 * s_mb_group_prealloc can be configured via
2846 * /sys/fs/ext4/<partition>/mb_group_prealloc 2860 * /sys/fs/ext4/<partition>/mb_group_prealloc
2847 * 2861 *
2848 * XXX: should we try to preallocate more than the group has now? 2862 * XXX: should we try to preallocate more than the group has now?
@@ -2853,10 +2867,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2853 struct ext4_locality_group *lg = ac->ac_lg; 2867 struct ext4_locality_group *lg = ac->ac_lg;
2854 2868
2855 BUG_ON(lg == NULL); 2869 BUG_ON(lg == NULL);
2856 if (EXT4_SB(sb)->s_stripe) 2870 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2857 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
2858 else
2859 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2860 mb_debug(1, "#%u: goal %u blocks for locality group\n", 2871 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2861 current->pid, ac->ac_g_ex.fe_len); 2872 current->pid, ac->ac_g_ex.fe_len);
2862} 2873}