aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2013-04-03 23:33:27 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-03 23:33:27 -0400
commitbc2d9db48c95ec6c9c5ecc97ddc61343d751f219 (patch)
treeba5e07ee4abef21cc6b2618e01515c4350e4d854 /fs/ext4/extents.c
parentbd86298e60b84b5e6d2da3e75c4ce2f6b70bdeed (diff)
ext4: Transfer initialized block to right neighbor if possible
Currently when converting extent to initialized we attempt to transfer initialized block to the left neighbour if possible when certain criteria are met. However we do not attempt to do the same for the right neighbor. This commit adds the possibility to transfer initialized block to the right neighbour if: 1. We're not converting the whole extent 2. Both extents are stored in the same extent tree node 3. Right neighbor is initialized 4. Right neighbor is logically abutting the current one 5. Right neighbor is physically abutting the current one 6. Right neighbor would not overflow the length limit This is basically the same logic as with transferring to the left. This will gain us some performance benefits since it is faster than inserting extent and then merging it. It would also prevent some situation in delalloc patch when we might run out of metadata reservation. This is due to the fact that we would attempt to split the extent first (possibly allocating new metadata block) even though we did not counted for that because it can (and will) be merged again. This commit fix that scenario, because we no longer need to split the extent in such case. Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c135
1 files changed, 89 insertions, 46 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 235246719074..892662334be9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3153,29 +3153,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3153 struct ext4_extent_header *eh; 3153 struct ext4_extent_header *eh;
3154 struct ext4_map_blocks split_map; 3154 struct ext4_map_blocks split_map;
3155 struct ext4_extent zero_ex; 3155 struct ext4_extent zero_ex;
3156 struct ext4_extent *ex; 3156 struct ext4_extent *ex, *abut_ex;
3157 ext4_lblk_t ee_block, eof_block; 3157 ext4_lblk_t ee_block, eof_block;
3158 unsigned int ee_len, depth; 3158 unsigned int ee_len, depth, map_len = map->m_len;
3159 int allocated, max_zeroout = 0; 3159 int allocated = 0, max_zeroout = 0;
3160 int err = 0; 3160 int err = 0;
3161 int split_flag = 0; 3161 int split_flag = 0;
3162 3162
3163 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 3163 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
3164 "block %llu, max_blocks %u\n", inode->i_ino, 3164 "block %llu, max_blocks %u\n", inode->i_ino,
3165 (unsigned long long)map->m_lblk, map->m_len); 3165 (unsigned long long)map->m_lblk, map_len);
3166 3166
3167 sbi = EXT4_SB(inode->i_sb); 3167 sbi = EXT4_SB(inode->i_sb);
3168 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3168 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3169 inode->i_sb->s_blocksize_bits; 3169 inode->i_sb->s_blocksize_bits;
3170 if (eof_block < map->m_lblk + map->m_len) 3170 if (eof_block < map->m_lblk + map_len)
3171 eof_block = map->m_lblk + map->m_len; 3171 eof_block = map->m_lblk + map_len;
3172 3172
3173 depth = ext_depth(inode); 3173 depth = ext_depth(inode);
3174 eh = path[depth].p_hdr; 3174 eh = path[depth].p_hdr;
3175 ex = path[depth].p_ext; 3175 ex = path[depth].p_ext;
3176 ee_block = le32_to_cpu(ex->ee_block); 3176 ee_block = le32_to_cpu(ex->ee_block);
3177 ee_len = ext4_ext_get_actual_len(ex); 3177 ee_len = ext4_ext_get_actual_len(ex);
3178 allocated = ee_len - (map->m_lblk - ee_block);
3179 zero_ex.ee_len = 0; 3178 zero_ex.ee_len = 0;
3180 3179
3181 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); 3180 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
@@ -3186,77 +3185,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3186 3185
3187 /* 3186 /*
3188 * Attempt to transfer newly initialized blocks from the currently 3187 * Attempt to transfer newly initialized blocks from the currently
3189 * uninitialized extent to its left neighbor. This is much cheaper 3188 * uninitialized extent to its neighbor. This is much cheaper
3190 * than an insertion followed by a merge as those involve costly 3189 * than an insertion followed by a merge as those involve costly
3191 * memmove() calls. This is the common case in steady state for 3190 * memmove() calls. Transferring to the left is the common case in
3192 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append 3191 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3193 * writes. 3192 * followed by append writes.
3194 * 3193 *
3195 * Limitations of the current logic: 3194 * Limitations of the current logic:
3196 * - L1: we only deal with writes at the start of the extent. 3195 * - L1: we do not deal with writes covering the whole extent.
3197 * The approach could be extended to writes at the end
3198 * of the extent but this scenario was deemed less common.
3199 * - L2: we do not deal with writes covering the whole extent.
3200 * This would require removing the extent if the transfer 3196 * This would require removing the extent if the transfer
3201 * is possible. 3197 * is possible.
3202 * - L3: we only attempt to merge with an extent stored in the 3198 * - L2: we only attempt to merge with an extent stored in the
3203 * same extent tree node. 3199 * same extent tree node.
3204 */ 3200 */
3205 if ((map->m_lblk == ee_block) && /*L1*/ 3201 if ((map->m_lblk == ee_block) &&
3206 (map->m_len < ee_len) && /*L2*/ 3202 /* See if we can merge left */
3207 (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ 3203 (map_len < ee_len) && /*L1*/
3208 struct ext4_extent *prev_ex; 3204 (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
3209 ext4_lblk_t prev_lblk; 3205 ext4_lblk_t prev_lblk;
3210 ext4_fsblk_t prev_pblk, ee_pblk; 3206 ext4_fsblk_t prev_pblk, ee_pblk;
3211 unsigned int prev_len, write_len; 3207 unsigned int prev_len;
3212 3208
3213 prev_ex = ex - 1; 3209 abut_ex = ex - 1;
3214 prev_lblk = le32_to_cpu(prev_ex->ee_block); 3210 prev_lblk = le32_to_cpu(abut_ex->ee_block);
3215 prev_len = ext4_ext_get_actual_len(prev_ex); 3211 prev_len = ext4_ext_get_actual_len(abut_ex);
3216 prev_pblk = ext4_ext_pblock(prev_ex); 3212 prev_pblk = ext4_ext_pblock(abut_ex);
3217 ee_pblk = ext4_ext_pblock(ex); 3213 ee_pblk = ext4_ext_pblock(ex);
3218 write_len = map->m_len;
3219 3214
3220 /* 3215 /*
3221 * A transfer of blocks from 'ex' to 'prev_ex' is allowed 3216 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3222 * upon those conditions: 3217 * upon those conditions:
3223 * - C1: prev_ex is initialized, 3218 * - C1: abut_ex is initialized,
3224 * - C2: prev_ex is logically abutting ex, 3219 * - C2: abut_ex is logically abutting ex,
3225 * - C3: prev_ex is physically abutting ex, 3220 * - C3: abut_ex is physically abutting ex,
3226 * - C4: prev_ex can receive the additional blocks without 3221 * - C4: abut_ex can receive the additional blocks without
3227 * overflowing the (initialized) length limit. 3222 * overflowing the (initialized) length limit.
3228 */ 3223 */
3229 if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ 3224 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
3230 ((prev_lblk + prev_len) == ee_block) && /*C2*/ 3225 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3231 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ 3226 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3232 (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ 3227 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3233 err = ext4_ext_get_access(handle, inode, path + depth); 3228 err = ext4_ext_get_access(handle, inode, path + depth);
3234 if (err) 3229 if (err)
3235 goto out; 3230 goto out;
3236 3231
3237 trace_ext4_ext_convert_to_initialized_fastpath(inode, 3232 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3238 map, ex, prev_ex); 3233 map, ex, abut_ex);
3239 3234
3240 /* Shift the start of ex by 'write_len' blocks */ 3235 /* Shift the start of ex by 'map_len' blocks */
3241 ex->ee_block = cpu_to_le32(ee_block + write_len); 3236 ex->ee_block = cpu_to_le32(ee_block + map_len);
3242 ext4_ext_store_pblock(ex, ee_pblk + write_len); 3237 ext4_ext_store_pblock(ex, ee_pblk + map_len);
3243 ex->ee_len = cpu_to_le16(ee_len - write_len); 3238 ex->ee_len = cpu_to_le16(ee_len - map_len);
3244 ext4_ext_mark_uninitialized(ex); /* Restore the flag */ 3239 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3245 3240
3246 /* Extend prev_ex by 'write_len' blocks */ 3241 /* Extend abut_ex by 'map_len' blocks */
3247 prev_ex->ee_len = cpu_to_le16(prev_len + write_len); 3242 abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3248 3243
3249 /* Mark the block containing both extents as dirty */ 3244 /* Result: number of initialized blocks past m_lblk */
3250 ext4_ext_dirty(handle, inode, path + depth); 3245 allocated = map_len;
3246 }
3247 } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3248 (map_len < ee_len) && /*L1*/
3249 ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3250 /* See if we can merge right */
3251 ext4_lblk_t next_lblk;
3252 ext4_fsblk_t next_pblk, ee_pblk;
3253 unsigned int next_len;
3254
3255 abut_ex = ex + 1;
3256 next_lblk = le32_to_cpu(abut_ex->ee_block);
3257 next_len = ext4_ext_get_actual_len(abut_ex);
3258 next_pblk = ext4_ext_pblock(abut_ex);
3259 ee_pblk = ext4_ext_pblock(ex);
3251 3260
3252 /* Update path to point to the right extent */ 3261 /*
3253 path[depth].p_ext = prev_ex; 3262 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3263 * upon those conditions:
3264 * - C1: abut_ex is initialized,
3265 * - C2: abut_ex is logically abutting ex,
3266 * - C3: abut_ex is physically abutting ex,
3267 * - C4: abut_ex can receive the additional blocks without
3268 * overflowing the (initialized) length limit.
3269 */
3270 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
3271 ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3272 ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3273 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3274 err = ext4_ext_get_access(handle, inode, path + depth);
3275 if (err)
3276 goto out;
3277
3278 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3279 map, ex, abut_ex);
3280
3281 /* Shift the start of abut_ex by 'map_len' blocks */
3282 abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3283 ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3284 ex->ee_len = cpu_to_le16(ee_len - map_len);
3285 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3286
3287 /* Extend abut_ex by 'map_len' blocks */
3288 abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3254 3289
3255 /* Result: number of initialized blocks past m_lblk */ 3290 /* Result: number of initialized blocks past m_lblk */
3256 allocated = write_len; 3291 allocated = map_len;
3257 goto out;
3258 } 3292 }
3259 } 3293 }
3294 if (allocated) {
3295 /* Mark the block containing both extents as dirty */
3296 ext4_ext_dirty(handle, inode, path + depth);
3297
3298 /* Update path to point to the right extent */
3299 path[depth].p_ext = abut_ex;
3300 goto out;
3301 } else
3302 allocated = ee_len - (map->m_lblk - ee_block);
3260 3303
3261 WARN_ON(map->m_lblk < ee_block); 3304 WARN_ON(map->m_lblk < ee_block);
3262 /* 3305 /*