aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/extents.c135
1 files changed, 89 insertions, 46 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 235246719074..892662334be9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3153,29 +3153,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3153 struct ext4_extent_header *eh; 3153 struct ext4_extent_header *eh;
3154 struct ext4_map_blocks split_map; 3154 struct ext4_map_blocks split_map;
3155 struct ext4_extent zero_ex; 3155 struct ext4_extent zero_ex;
3156 struct ext4_extent *ex; 3156 struct ext4_extent *ex, *abut_ex;
3157 ext4_lblk_t ee_block, eof_block; 3157 ext4_lblk_t ee_block, eof_block;
3158 unsigned int ee_len, depth; 3158 unsigned int ee_len, depth, map_len = map->m_len;
3159 int allocated, max_zeroout = 0; 3159 int allocated = 0, max_zeroout = 0;
3160 int err = 0; 3160 int err = 0;
3161 int split_flag = 0; 3161 int split_flag = 0;
3162 3162
3163 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 3163 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
3164 "block %llu, max_blocks %u\n", inode->i_ino, 3164 "block %llu, max_blocks %u\n", inode->i_ino,
3165 (unsigned long long)map->m_lblk, map->m_len); 3165 (unsigned long long)map->m_lblk, map_len);
3166 3166
3167 sbi = EXT4_SB(inode->i_sb); 3167 sbi = EXT4_SB(inode->i_sb);
3168 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3168 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3169 inode->i_sb->s_blocksize_bits; 3169 inode->i_sb->s_blocksize_bits;
3170 if (eof_block < map->m_lblk + map->m_len) 3170 if (eof_block < map->m_lblk + map_len)
3171 eof_block = map->m_lblk + map->m_len; 3171 eof_block = map->m_lblk + map_len;
3172 3172
3173 depth = ext_depth(inode); 3173 depth = ext_depth(inode);
3174 eh = path[depth].p_hdr; 3174 eh = path[depth].p_hdr;
3175 ex = path[depth].p_ext; 3175 ex = path[depth].p_ext;
3176 ee_block = le32_to_cpu(ex->ee_block); 3176 ee_block = le32_to_cpu(ex->ee_block);
3177 ee_len = ext4_ext_get_actual_len(ex); 3177 ee_len = ext4_ext_get_actual_len(ex);
3178 allocated = ee_len - (map->m_lblk - ee_block);
3179 zero_ex.ee_len = 0; 3178 zero_ex.ee_len = 0;
3180 3179
3181 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); 3180 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
@@ -3186,77 +3185,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3186 3185
3187 /* 3186 /*
3188 * Attempt to transfer newly initialized blocks from the currently 3187 * Attempt to transfer newly initialized blocks from the currently
3189 * uninitialized extent to its left neighbor. This is much cheaper 3188 * uninitialized extent to its neighbor. This is much cheaper
3190 * than an insertion followed by a merge as those involve costly 3189 * than an insertion followed by a merge as those involve costly
3191 * memmove() calls. This is the common case in steady state for 3190 * memmove() calls. Transferring to the left is the common case in
3192 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append 3191 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3193 * writes. 3192 * followed by append writes.
3194 * 3193 *
3195 * Limitations of the current logic: 3194 * Limitations of the current logic:
3196 * - L1: we only deal with writes at the start of the extent. 3195 * - L1: we do not deal with writes covering the whole extent.
3197 * The approach could be extended to writes at the end
3198 * of the extent but this scenario was deemed less common.
3199 * - L2: we do not deal with writes covering the whole extent.
3200 * This would require removing the extent if the transfer 3196 * This would require removing the extent if the transfer
3201 * is possible. 3197 * is possible.
3202 * - L3: we only attempt to merge with an extent stored in the 3198 * - L2: we only attempt to merge with an extent stored in the
3203 * same extent tree node. 3199 * same extent tree node.
3204 */ 3200 */
3205 if ((map->m_lblk == ee_block) && /*L1*/ 3201 if ((map->m_lblk == ee_block) &&
3206 (map->m_len < ee_len) && /*L2*/ 3202 /* See if we can merge left */
3207 (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ 3203 (map_len < ee_len) && /*L1*/
3208 struct ext4_extent *prev_ex; 3204 (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
3209 ext4_lblk_t prev_lblk; 3205 ext4_lblk_t prev_lblk;
3210 ext4_fsblk_t prev_pblk, ee_pblk; 3206 ext4_fsblk_t prev_pblk, ee_pblk;
3211 unsigned int prev_len, write_len; 3207 unsigned int prev_len;
3212 3208
3213 prev_ex = ex - 1; 3209 abut_ex = ex - 1;
3214 prev_lblk = le32_to_cpu(prev_ex->ee_block); 3210 prev_lblk = le32_to_cpu(abut_ex->ee_block);
3215 prev_len = ext4_ext_get_actual_len(prev_ex); 3211 prev_len = ext4_ext_get_actual_len(abut_ex);
3216 prev_pblk = ext4_ext_pblock(prev_ex); 3212 prev_pblk = ext4_ext_pblock(abut_ex);
3217 ee_pblk = ext4_ext_pblock(ex); 3213 ee_pblk = ext4_ext_pblock(ex);
3218 write_len = map->m_len;
3219 3214
3220 /* 3215 /*
3221 * A transfer of blocks from 'ex' to 'prev_ex' is allowed 3216 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3222 * upon those conditions: 3217 * upon those conditions:
3223 * - C1: prev_ex is initialized, 3218 * - C1: abut_ex is initialized,
3224 * - C2: prev_ex is logically abutting ex, 3219 * - C2: abut_ex is logically abutting ex,
3225 * - C3: prev_ex is physically abutting ex, 3220 * - C3: abut_ex is physically abutting ex,
3226 * - C4: prev_ex can receive the additional blocks without 3221 * - C4: abut_ex can receive the additional blocks without
3227 * overflowing the (initialized) length limit. 3222 * overflowing the (initialized) length limit.
3228 */ 3223 */
3229 if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ 3224 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
3230 ((prev_lblk + prev_len) == ee_block) && /*C2*/ 3225 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3231 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ 3226 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3232 (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ 3227 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3233 err = ext4_ext_get_access(handle, inode, path + depth); 3228 err = ext4_ext_get_access(handle, inode, path + depth);
3234 if (err) 3229 if (err)
3235 goto out; 3230 goto out;
3236 3231
3237 trace_ext4_ext_convert_to_initialized_fastpath(inode, 3232 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3238 map, ex, prev_ex); 3233 map, ex, abut_ex);
3239 3234
3240 /* Shift the start of ex by 'write_len' blocks */ 3235 /* Shift the start of ex by 'map_len' blocks */
3241 ex->ee_block = cpu_to_le32(ee_block + write_len); 3236 ex->ee_block = cpu_to_le32(ee_block + map_len);
3242 ext4_ext_store_pblock(ex, ee_pblk + write_len); 3237 ext4_ext_store_pblock(ex, ee_pblk + map_len);
3243 ex->ee_len = cpu_to_le16(ee_len - write_len); 3238 ex->ee_len = cpu_to_le16(ee_len - map_len);
3244 ext4_ext_mark_uninitialized(ex); /* Restore the flag */ 3239 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3245 3240
3246 /* Extend prev_ex by 'write_len' blocks */ 3241 /* Extend abut_ex by 'map_len' blocks */
3247 prev_ex->ee_len = cpu_to_le16(prev_len + write_len); 3242 abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3248 3243
3249 /* Mark the block containing both extents as dirty */ 3244 /* Result: number of initialized blocks past m_lblk */
3250 ext4_ext_dirty(handle, inode, path + depth); 3245 allocated = map_len;
3246 }
3247 } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3248 (map_len < ee_len) && /*L1*/
3249 ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3250 /* See if we can merge right */
3251 ext4_lblk_t next_lblk;
3252 ext4_fsblk_t next_pblk, ee_pblk;
3253 unsigned int next_len;
3254
3255 abut_ex = ex + 1;
3256 next_lblk = le32_to_cpu(abut_ex->ee_block);
3257 next_len = ext4_ext_get_actual_len(abut_ex);
3258 next_pblk = ext4_ext_pblock(abut_ex);
3259 ee_pblk = ext4_ext_pblock(ex);
3251 3260
3252 /* Update path to point to the right extent */ 3261 /*
3253 path[depth].p_ext = prev_ex; 3262 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3263 * upon those conditions:
3264 * - C1: abut_ex is initialized,
3265 * - C2: abut_ex is logically abutting ex,
3266 * - C3: abut_ex is physically abutting ex,
3267 * - C4: abut_ex can receive the additional blocks without
3268 * overflowing the (initialized) length limit.
3269 */
3270 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
3271 ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3272 ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3273 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3274 err = ext4_ext_get_access(handle, inode, path + depth);
3275 if (err)
3276 goto out;
3277
3278 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3279 map, ex, abut_ex);
3280
3281 /* Shift the start of abut_ex by 'map_len' blocks */
3282 abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3283 ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3284 ex->ee_len = cpu_to_le16(ee_len - map_len);
3285 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3286
3287 /* Extend abut_ex by 'map_len' blocks */
3288 abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3254 3289
3255 /* Result: number of initialized blocks past m_lblk */ 3290 /* Result: number of initialized blocks past m_lblk */
3256 allocated = write_len; 3291 allocated = map_len;
3257 goto out;
3258 } 3292 }
3259 } 3293 }
3294 if (allocated) {
3295 /* Mark the block containing both extents as dirty */
3296 ext4_ext_dirty(handle, inode, path + depth);
3297
3298 /* Update path to point to the right extent */
3299 path[depth].p_ext = abut_ex;
3300 goto out;
3301 } else
3302 allocated = ee_len - (map->m_lblk - ee_block);
3260 3303
3261 WARN_ON(map->m_lblk < ee_block); 3304 WARN_ON(map->m_lblk < ee_block);
3262 /* 3305 /*