diff options
author | Zheng Liu <wenqing.lz@taobao.com> | 2012-08-17 09:54:17 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-08-17 09:54:17 -0400 |
commit | 67a5da564f97f31c4054d358e00b34d7ee570da5 (patch) | |
tree | 525f256d46cfac4be0b0acd90cc2bad5fcdb1b77 /fs/ext4/extents.c | |
parent | 81370291722ac1e0ec95234a0ea91a5bc76b6185 (diff) |
ext4: make the zero-out chunk size tunable
Currently in ext4 the length of zero-out chunk is set to 7 file system
blocks. But if an inode has uninitailized extents from using
fallocate to preallocate space, and the workload issues many random
writes, this can cause a fragmented extent tree that will
unnecessarily grow the extent tree.
So create a new sysfs tunable, extent_max_zeroout_kb, which controls
the maximum size where blocks will be zeroed out instead of creating a
new uninitialized extent. The default of this has been sent to 32kb.
CC: Zach Brown <zab@zabbo.net>
CC: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 25 |
1 files changed, 13 insertions, 12 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e8755c21f4b9..2f082abf4992 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3085,7 +3085,6 @@ out: | |||
3085 | return err ? err : map->m_len; | 3085 | return err ? err : map->m_len; |
3086 | } | 3086 | } |
3087 | 3087 | ||
3088 | #define EXT4_EXT_ZERO_LEN 7 | ||
3089 | /* | 3088 | /* |
3090 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 3089 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
3091 | * to an uninitialized extent. It may result in splitting the uninitialized | 3090 | * to an uninitialized extent. It may result in splitting the uninitialized |
@@ -3111,13 +3110,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3111 | struct ext4_map_blocks *map, | 3110 | struct ext4_map_blocks *map, |
3112 | struct ext4_ext_path *path) | 3111 | struct ext4_ext_path *path) |
3113 | { | 3112 | { |
3113 | struct ext4_sb_info *sbi; | ||
3114 | struct ext4_extent_header *eh; | 3114 | struct ext4_extent_header *eh; |
3115 | struct ext4_map_blocks split_map; | 3115 | struct ext4_map_blocks split_map; |
3116 | struct ext4_extent zero_ex; | 3116 | struct ext4_extent zero_ex; |
3117 | struct ext4_extent *ex; | 3117 | struct ext4_extent *ex; |
3118 | ext4_lblk_t ee_block, eof_block; | 3118 | ext4_lblk_t ee_block, eof_block; |
3119 | unsigned int ee_len, depth; | 3119 | unsigned int ee_len, depth; |
3120 | int allocated; | 3120 | int allocated, max_zeroout = 0; |
3121 | int err = 0; | 3121 | int err = 0; |
3122 | int split_flag = 0; | 3122 | int split_flag = 0; |
3123 | 3123 | ||
@@ -3125,6 +3125,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3125 | "block %llu, max_blocks %u\n", inode->i_ino, | 3125 | "block %llu, max_blocks %u\n", inode->i_ino, |
3126 | (unsigned long long)map->m_lblk, map->m_len); | 3126 | (unsigned long long)map->m_lblk, map->m_len); |
3127 | 3127 | ||
3128 | sbi = EXT4_SB(inode->i_sb); | ||
3128 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3129 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3129 | inode->i_sb->s_blocksize_bits; | 3130 | inode->i_sb->s_blocksize_bits; |
3130 | if (eof_block < map->m_lblk + map->m_len) | 3131 | if (eof_block < map->m_lblk + map->m_len) |
@@ -3224,9 +3225,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3224 | */ | 3225 | */ |
3225 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3226 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
3226 | 3227 | ||
3227 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 3228 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) |
3228 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && | 3229 | max_zeroout = sbi->s_extent_max_zeroout_kb >> |
3229 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 3230 | inode->i_sb->s_blocksize_bits; |
3231 | |||
3232 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ | ||
3233 | if (max_zeroout && (ee_len <= max_zeroout)) { | ||
3230 | err = ext4_ext_zeroout(inode, ex); | 3234 | err = ext4_ext_zeroout(inode, ex); |
3231 | if (err) | 3235 | if (err) |
3232 | goto out; | 3236 | goto out; |
@@ -3250,9 +3254,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3250 | split_map.m_lblk = map->m_lblk; | 3254 | split_map.m_lblk = map->m_lblk; |
3251 | split_map.m_len = map->m_len; | 3255 | split_map.m_len = map->m_len; |
3252 | 3256 | ||
3253 | if (allocated > map->m_len) { | 3257 | if (max_zeroout && (allocated > map->m_len)) { |
3254 | if (allocated <= EXT4_EXT_ZERO_LEN && | 3258 | if (allocated <= max_zeroout) { |
3255 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3256 | /* case 3 */ | 3259 | /* case 3 */ |
3257 | zero_ex.ee_block = | 3260 | zero_ex.ee_block = |
3258 | cpu_to_le32(map->m_lblk); | 3261 | cpu_to_le32(map->m_lblk); |
@@ -3264,9 +3267,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3264 | goto out; | 3267 | goto out; |
3265 | split_map.m_lblk = map->m_lblk; | 3268 | split_map.m_lblk = map->m_lblk; |
3266 | split_map.m_len = allocated; | 3269 | split_map.m_len = allocated; |
3267 | } else if ((map->m_lblk - ee_block + map->m_len < | 3270 | } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { |
3268 | EXT4_EXT_ZERO_LEN) && | ||
3269 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3270 | /* case 2 */ | 3271 | /* case 2 */ |
3271 | if (map->m_lblk != ee_block) { | 3272 | if (map->m_lblk != ee_block) { |
3272 | zero_ex.ee_block = ex->ee_block; | 3273 | zero_ex.ee_block = ex->ee_block; |
@@ -3286,7 +3287,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3286 | } | 3287 | } |
3287 | 3288 | ||
3288 | allocated = ext4_split_extent(handle, inode, path, | 3289 | allocated = ext4_split_extent(handle, inode, path, |
3289 | &split_map, split_flag, 0); | 3290 | &split_map, split_flag, 0); |
3290 | if (allocated < 0) | 3291 | if (allocated < 0) |
3291 | err = allocated; | 3292 | err = allocated; |
3292 | 3293 | ||