aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorZheng Liu <wenqing.lz@taobao.com>2012-08-17 09:54:17 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-08-17 09:54:17 -0400
commit67a5da564f97f31c4054d358e00b34d7ee570da5 (patch)
tree525f256d46cfac4be0b0acd90cc2bad5fcdb1b77 /fs
parent81370291722ac1e0ec95234a0ea91a5bc76b6185 (diff)
ext4: make the zero-out chunk size tunable
Currently in ext4 the length of zero-out chunk is set to 7 file system blocks. But if an inode has uninitailized extents from using fallocate to preallocate space, and the workload issues many random writes, this can cause a fragmented extent tree that will unnecessarily grow the extent tree. So create a new sysfs tunable, extent_max_zeroout_kb, which controls the maximum size where blocks will be zeroed out instead of creating a new uninitialized extent. The default of this has been sent to 32kb. CC: Zach Brown <zab@zabbo.net> CC: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/extents.c25
-rw-r--r--fs/ext4/super.c3
3 files changed, 19 insertions, 12 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7c0841ecde6c..0df5ee102b61 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1271,6 +1271,9 @@ struct ext4_sb_info {
1271 unsigned long s_sectors_written_start; 1271 unsigned long s_sectors_written_start;
1272 u64 s_kbytes_written; 1272 u64 s_kbytes_written;
1273 1273
1274 /* the size of zero-out chunk */
1275 unsigned int s_extent_max_zeroout_kb;
1276
1274 unsigned int s_log_groups_per_flex; 1277 unsigned int s_log_groups_per_flex;
1275 struct flex_groups *s_flex_groups; 1278 struct flex_groups *s_flex_groups;
1276 1279
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e8755c21f4b9..2f082abf4992 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3085,7 +3085,6 @@ out:
3085 return err ? err : map->m_len; 3085 return err ? err : map->m_len;
3086} 3086}
3087 3087
3088#define EXT4_EXT_ZERO_LEN 7
3089/* 3088/*
3090 * This function is called by ext4_ext_map_blocks() if someone tries to write 3089 * This function is called by ext4_ext_map_blocks() if someone tries to write
3091 * to an uninitialized extent. It may result in splitting the uninitialized 3090 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3111,13 +3110,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3111 struct ext4_map_blocks *map, 3110 struct ext4_map_blocks *map,
3112 struct ext4_ext_path *path) 3111 struct ext4_ext_path *path)
3113{ 3112{
3113 struct ext4_sb_info *sbi;
3114 struct ext4_extent_header *eh; 3114 struct ext4_extent_header *eh;
3115 struct ext4_map_blocks split_map; 3115 struct ext4_map_blocks split_map;
3116 struct ext4_extent zero_ex; 3116 struct ext4_extent zero_ex;
3117 struct ext4_extent *ex; 3117 struct ext4_extent *ex;
3118 ext4_lblk_t ee_block, eof_block; 3118 ext4_lblk_t ee_block, eof_block;
3119 unsigned int ee_len, depth; 3119 unsigned int ee_len, depth;
3120 int allocated; 3120 int allocated, max_zeroout = 0;
3121 int err = 0; 3121 int err = 0;
3122 int split_flag = 0; 3122 int split_flag = 0;
3123 3123
@@ -3125,6 +3125,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3125 "block %llu, max_blocks %u\n", inode->i_ino, 3125 "block %llu, max_blocks %u\n", inode->i_ino,
3126 (unsigned long long)map->m_lblk, map->m_len); 3126 (unsigned long long)map->m_lblk, map->m_len);
3127 3127
3128 sbi = EXT4_SB(inode->i_sb);
3128 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3129 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3129 inode->i_sb->s_blocksize_bits; 3130 inode->i_sb->s_blocksize_bits;
3130 if (eof_block < map->m_lblk + map->m_len) 3131 if (eof_block < map->m_lblk + map->m_len)
@@ -3224,9 +3225,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3224 */ 3225 */
3225 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3226 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3226 3227
3227 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 3228 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3228 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && 3229 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3229 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3230 inode->i_sb->s_blocksize_bits;
3231
3232 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3233 if (max_zeroout && (ee_len <= max_zeroout)) {
3230 err = ext4_ext_zeroout(inode, ex); 3234 err = ext4_ext_zeroout(inode, ex);
3231 if (err) 3235 if (err)
3232 goto out; 3236 goto out;
@@ -3250,9 +3254,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3250 split_map.m_lblk = map->m_lblk; 3254 split_map.m_lblk = map->m_lblk;
3251 split_map.m_len = map->m_len; 3255 split_map.m_len = map->m_len;
3252 3256
3253 if (allocated > map->m_len) { 3257 if (max_zeroout && (allocated > map->m_len)) {
3254 if (allocated <= EXT4_EXT_ZERO_LEN && 3258 if (allocated <= max_zeroout) {
3255 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3256 /* case 3 */ 3259 /* case 3 */
3257 zero_ex.ee_block = 3260 zero_ex.ee_block =
3258 cpu_to_le32(map->m_lblk); 3261 cpu_to_le32(map->m_lblk);
@@ -3264,9 +3267,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3264 goto out; 3267 goto out;
3265 split_map.m_lblk = map->m_lblk; 3268 split_map.m_lblk = map->m_lblk;
3266 split_map.m_len = allocated; 3269 split_map.m_len = allocated;
3267 } else if ((map->m_lblk - ee_block + map->m_len < 3270 } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
3268 EXT4_EXT_ZERO_LEN) &&
3269 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3270 /* case 2 */ 3271 /* case 2 */
3271 if (map->m_lblk != ee_block) { 3272 if (map->m_lblk != ee_block) {
3272 zero_ex.ee_block = ex->ee_block; 3273 zero_ex.ee_block = ex->ee_block;
@@ -3286,7 +3287,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3286 } 3287 }
3287 3288
3288 allocated = ext4_split_extent(handle, inode, path, 3289 allocated = ext4_split_extent(handle, inode, path,
3289 &split_map, split_flag, 0); 3290 &split_map, split_flag, 0);
3290 if (allocated < 0) 3291 if (allocated < 0)
3291 err = allocated; 3292 err = allocated;
3292 3293
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5a97e590692d..0423e2e7f615 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2541,6 +2541,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2541EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2541EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2542EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2542EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2543EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2543EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
2544EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2544EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); 2545EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2545 2546
2546static struct attribute *ext4_attrs[] = { 2547static struct attribute *ext4_attrs[] = {
@@ -2556,6 +2557,7 @@ static struct attribute *ext4_attrs[] = {
2556 ATTR_LIST(mb_stream_req), 2557 ATTR_LIST(mb_stream_req),
2557 ATTR_LIST(mb_group_prealloc), 2558 ATTR_LIST(mb_group_prealloc),
2558 ATTR_LIST(max_writeback_mb_bump), 2559 ATTR_LIST(max_writeback_mb_bump),
2560 ATTR_LIST(extent_max_zeroout_kb),
2559 ATTR_LIST(trigger_fs_error), 2561 ATTR_LIST(trigger_fs_error),
2560 NULL, 2562 NULL,
2561}; 2563};
@@ -3756,6 +3758,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3756 3758
3757 sbi->s_stripe = ext4_get_stripe_size(sbi); 3759 sbi->s_stripe = ext4_get_stripe_size(sbi);
3758 sbi->s_max_writeback_mb_bump = 128; 3760 sbi->s_max_writeback_mb_bump = 128;
3761 sbi->s_extent_max_zeroout_kb = 32;
3759 3762
3760 /* 3763 /*
3761 * set up enough so that it can read an inode 3764 * set up enough so that it can read an inode