diff options
author | Dan Ehrenberg <dehrenberg@google.com> | 2011-07-17 21:11:30 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-07-17 21:11:30 -0400 |
commit | d7a1fee135771e6e5185642bdc17df19bbdbcc48 (patch) | |
tree | c037586b0db8324bac4ef11bcd8b3b1a631bbdf2 | |
parent | 265c6a0f9290c8f470b839257dc6af3c46b24da1 (diff) |
ext4: make the preallocation size be a multiple of stripe size
Previously, if a stripe width was provided, then it would be used
as the preallocation granularity, with no santiy checking and no
way to override this. Now, mb_prealloc_size defaults to the smallest
multiple of stripe size that is greater than or equal to the old
default mb_prealloc_size, and this can be overridden with the sysfs
interface.
Signed-off-by: Dan Ehrenberg <dehrenberg@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/mballoc.c | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b97a2d2f0fdf..037f680b76f9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -128,12 +128,13 @@ | |||
128 | * we are doing a group prealloc we try to normalize the request to | 128 | * we are doing a group prealloc we try to normalize the request to |
129 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is | 129 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is |
130 | * 512 blocks. This can be tuned via | 130 | * 512 blocks. This can be tuned via |
131 | * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in | 131 | * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in |
132 | * terms of number of blocks. If we have mounted the file system with -O | 132 | * terms of number of blocks. If we have mounted the file system with -O |
133 | * stripe=<value> option the group prealloc request is normalized to the | 133 | * stripe=<value> option the group prealloc request is normalized to the |
134 | * stripe value (sbi->s_stripe) | 134 | * the smallest multiple of the stripe value (sbi->s_stripe) which is |
135 | * greater than the default mb_group_prealloc. | ||
135 | * | 136 | * |
136 | * The regular allocator(using the buddy cache) supports few tunables. | 137 | * The regular allocator (using the buddy cache) supports a few tunables. |
137 | * | 138 | * |
138 | * /sys/fs/ext4/<partition>/mb_min_to_scan | 139 | * /sys/fs/ext4/<partition>/mb_min_to_scan |
139 | * /sys/fs/ext4/<partition>/mb_max_to_scan | 140 | * /sys/fs/ext4/<partition>/mb_max_to_scan |
@@ -2474,6 +2475,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2474 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; | 2475 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; |
2475 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; | 2476 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; |
2476 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2477 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2478 | /* | ||
2479 | * If there is a s_stripe > 1, then we set the s_mb_group_prealloc | ||
2480 | * to the lowest multiple of s_stripe which is bigger than | ||
2481 | * the s_mb_group_prealloc as determined above. We want | ||
2482 | * the preallocation size to be an exact multiple of the | ||
2483 | * RAID stripe size so that preallocations don't fragment | ||
2484 | * the stripes. | ||
2485 | */ | ||
2486 | if (sbi->s_stripe > 1) { | ||
2487 | sbi->s_mb_group_prealloc = roundup( | ||
2488 | sbi->s_mb_group_prealloc, sbi->s_stripe); | ||
2489 | } | ||
2477 | 2490 | ||
2478 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2491 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2479 | if (sbi->s_locality_groups == NULL) { | 2492 | if (sbi->s_locality_groups == NULL) { |
@@ -2841,8 +2854,9 @@ out_err: | |||
2841 | 2854 | ||
2842 | /* | 2855 | /* |
2843 | * here we normalize request for locality group | 2856 | * here we normalize request for locality group |
2844 | * Group request are normalized to s_strip size if we set the same via mount | 2857 | * Group request are normalized to s_mb_group_prealloc, which goes to |
2845 | * option. If not we set it to s_mb_group_prealloc which can be configured via | 2858 | * s_strip if we set the same via mount option. |
2859 | * s_mb_group_prealloc can be configured via | ||
2846 | * /sys/fs/ext4/<partition>/mb_group_prealloc | 2860 | * /sys/fs/ext4/<partition>/mb_group_prealloc |
2847 | * | 2861 | * |
2848 | * XXX: should we try to preallocate more than the group has now? | 2862 | * XXX: should we try to preallocate more than the group has now? |
@@ -2853,10 +2867,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) | |||
2853 | struct ext4_locality_group *lg = ac->ac_lg; | 2867 | struct ext4_locality_group *lg = ac->ac_lg; |
2854 | 2868 | ||
2855 | BUG_ON(lg == NULL); | 2869 | BUG_ON(lg == NULL); |
2856 | if (EXT4_SB(sb)->s_stripe) | 2870 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; |
2857 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; | ||
2858 | else | ||
2859 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; | ||
2860 | mb_debug(1, "#%u: goal %u blocks for locality group\n", | 2871 | mb_debug(1, "#%u: goal %u blocks for locality group\n", |
2861 | current->pid, ac->ac_g_ex.fe_len); | 2872 | current->pid, ac->ac_g_ex.fe_len); |
2862 | } | 2873 | } |