diff options
author | Zheng Liu <wenqing.lz@taobao.com> | 2012-08-17 09:54:17 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-08-17 09:54:17 -0400 |
commit | 67a5da564f97f31c4054d358e00b34d7ee570da5 (patch) | |
tree | 525f256d46cfac4be0b0acd90cc2bad5fcdb1b77 /fs | |
parent | 81370291722ac1e0ec95234a0ea91a5bc76b6185 (diff) |
ext4: make the zero-out chunk size tunable
Currently in ext4 the length of zero-out chunk is set to 7 file system
blocks. But if an inode has uninitailized extents from using
fallocate to preallocate space, and the workload issues many random
writes, this can cause a fragmented extent tree that will
unnecessarily grow the extent tree.
So create a new sysfs tunable, extent_max_zeroout_kb, which controls
the maximum size where blocks will be zeroed out instead of creating a
new uninitialized extent. The default of this has been sent to 32kb.
CC: Zach Brown <zab@zabbo.net>
CC: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 3 | ||||
-rw-r--r-- | fs/ext4/extents.c | 25 | ||||
-rw-r--r-- | fs/ext4/super.c | 3 |
3 files changed, 19 insertions, 12 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 7c0841ecde6c..0df5ee102b61 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1271,6 +1271,9 @@ struct ext4_sb_info { | |||
1271 | unsigned long s_sectors_written_start; | 1271 | unsigned long s_sectors_written_start; |
1272 | u64 s_kbytes_written; | 1272 | u64 s_kbytes_written; |
1273 | 1273 | ||
1274 | /* the size of zero-out chunk */ | ||
1275 | unsigned int s_extent_max_zeroout_kb; | ||
1276 | |||
1274 | unsigned int s_log_groups_per_flex; | 1277 | unsigned int s_log_groups_per_flex; |
1275 | struct flex_groups *s_flex_groups; | 1278 | struct flex_groups *s_flex_groups; |
1276 | 1279 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e8755c21f4b9..2f082abf4992 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3085,7 +3085,6 @@ out: | |||
3085 | return err ? err : map->m_len; | 3085 | return err ? err : map->m_len; |
3086 | } | 3086 | } |
3087 | 3087 | ||
3088 | #define EXT4_EXT_ZERO_LEN 7 | ||
3089 | /* | 3088 | /* |
3090 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 3089 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
3091 | * to an uninitialized extent. It may result in splitting the uninitialized | 3090 | * to an uninitialized extent. It may result in splitting the uninitialized |
@@ -3111,13 +3110,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3111 | struct ext4_map_blocks *map, | 3110 | struct ext4_map_blocks *map, |
3112 | struct ext4_ext_path *path) | 3111 | struct ext4_ext_path *path) |
3113 | { | 3112 | { |
3113 | struct ext4_sb_info *sbi; | ||
3114 | struct ext4_extent_header *eh; | 3114 | struct ext4_extent_header *eh; |
3115 | struct ext4_map_blocks split_map; | 3115 | struct ext4_map_blocks split_map; |
3116 | struct ext4_extent zero_ex; | 3116 | struct ext4_extent zero_ex; |
3117 | struct ext4_extent *ex; | 3117 | struct ext4_extent *ex; |
3118 | ext4_lblk_t ee_block, eof_block; | 3118 | ext4_lblk_t ee_block, eof_block; |
3119 | unsigned int ee_len, depth; | 3119 | unsigned int ee_len, depth; |
3120 | int allocated; | 3120 | int allocated, max_zeroout = 0; |
3121 | int err = 0; | 3121 | int err = 0; |
3122 | int split_flag = 0; | 3122 | int split_flag = 0; |
3123 | 3123 | ||
@@ -3125,6 +3125,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3125 | "block %llu, max_blocks %u\n", inode->i_ino, | 3125 | "block %llu, max_blocks %u\n", inode->i_ino, |
3126 | (unsigned long long)map->m_lblk, map->m_len); | 3126 | (unsigned long long)map->m_lblk, map->m_len); |
3127 | 3127 | ||
3128 | sbi = EXT4_SB(inode->i_sb); | ||
3128 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3129 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3129 | inode->i_sb->s_blocksize_bits; | 3130 | inode->i_sb->s_blocksize_bits; |
3130 | if (eof_block < map->m_lblk + map->m_len) | 3131 | if (eof_block < map->m_lblk + map->m_len) |
@@ -3224,9 +3225,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3224 | */ | 3225 | */ |
3225 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3226 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
3226 | 3227 | ||
3227 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 3228 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) |
3228 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && | 3229 | max_zeroout = sbi->s_extent_max_zeroout_kb >> |
3229 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 3230 | inode->i_sb->s_blocksize_bits; |
3231 | |||
3232 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ | ||
3233 | if (max_zeroout && (ee_len <= max_zeroout)) { | ||
3230 | err = ext4_ext_zeroout(inode, ex); | 3234 | err = ext4_ext_zeroout(inode, ex); |
3231 | if (err) | 3235 | if (err) |
3232 | goto out; | 3236 | goto out; |
@@ -3250,9 +3254,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3250 | split_map.m_lblk = map->m_lblk; | 3254 | split_map.m_lblk = map->m_lblk; |
3251 | split_map.m_len = map->m_len; | 3255 | split_map.m_len = map->m_len; |
3252 | 3256 | ||
3253 | if (allocated > map->m_len) { | 3257 | if (max_zeroout && (allocated > map->m_len)) { |
3254 | if (allocated <= EXT4_EXT_ZERO_LEN && | 3258 | if (allocated <= max_zeroout) { |
3255 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3256 | /* case 3 */ | 3259 | /* case 3 */ |
3257 | zero_ex.ee_block = | 3260 | zero_ex.ee_block = |
3258 | cpu_to_le32(map->m_lblk); | 3261 | cpu_to_le32(map->m_lblk); |
@@ -3264,9 +3267,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3264 | goto out; | 3267 | goto out; |
3265 | split_map.m_lblk = map->m_lblk; | 3268 | split_map.m_lblk = map->m_lblk; |
3266 | split_map.m_len = allocated; | 3269 | split_map.m_len = allocated; |
3267 | } else if ((map->m_lblk - ee_block + map->m_len < | 3270 | } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { |
3268 | EXT4_EXT_ZERO_LEN) && | ||
3269 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3270 | /* case 2 */ | 3271 | /* case 2 */ |
3271 | if (map->m_lblk != ee_block) { | 3272 | if (map->m_lblk != ee_block) { |
3272 | zero_ex.ee_block = ex->ee_block; | 3273 | zero_ex.ee_block = ex->ee_block; |
@@ -3286,7 +3287,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3286 | } | 3287 | } |
3287 | 3288 | ||
3288 | allocated = ext4_split_extent(handle, inode, path, | 3289 | allocated = ext4_split_extent(handle, inode, path, |
3289 | &split_map, split_flag, 0); | 3290 | &split_map, split_flag, 0); |
3290 | if (allocated < 0) | 3291 | if (allocated < 0) |
3291 | err = allocated; | 3292 | err = allocated; |
3292 | 3293 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5a97e590692d..0423e2e7f615 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2541,6 +2541,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | |||
2541 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2541 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2542 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2542 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2543 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2543 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); |
2544 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | ||
2544 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2545 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
2545 | 2546 | ||
2546 | static struct attribute *ext4_attrs[] = { | 2547 | static struct attribute *ext4_attrs[] = { |
@@ -2556,6 +2557,7 @@ static struct attribute *ext4_attrs[] = { | |||
2556 | ATTR_LIST(mb_stream_req), | 2557 | ATTR_LIST(mb_stream_req), |
2557 | ATTR_LIST(mb_group_prealloc), | 2558 | ATTR_LIST(mb_group_prealloc), |
2558 | ATTR_LIST(max_writeback_mb_bump), | 2559 | ATTR_LIST(max_writeback_mb_bump), |
2560 | ATTR_LIST(extent_max_zeroout_kb), | ||
2559 | ATTR_LIST(trigger_fs_error), | 2561 | ATTR_LIST(trigger_fs_error), |
2560 | NULL, | 2562 | NULL, |
2561 | }; | 2563 | }; |
@@ -3756,6 +3758,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3756 | 3758 | ||
3757 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3759 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3758 | sbi->s_max_writeback_mb_bump = 128; | 3760 | sbi->s_max_writeback_mb_bump = 128; |
3761 | sbi->s_extent_max_zeroout_kb = 32; | ||
3759 | 3762 | ||
3760 | /* | 3763 | /* |
3761 | * set up enough so that it can read an inode | 3764 | * set up enough so that it can read an inode |