diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/extents.c | 135 |
1 files changed, 89 insertions, 46 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 235246719074..892662334be9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3153,29 +3153,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3153 | struct ext4_extent_header *eh; | 3153 | struct ext4_extent_header *eh; |
3154 | struct ext4_map_blocks split_map; | 3154 | struct ext4_map_blocks split_map; |
3155 | struct ext4_extent zero_ex; | 3155 | struct ext4_extent zero_ex; |
3156 | struct ext4_extent *ex; | 3156 | struct ext4_extent *ex, *abut_ex; |
3157 | ext4_lblk_t ee_block, eof_block; | 3157 | ext4_lblk_t ee_block, eof_block; |
3158 | unsigned int ee_len, depth; | 3158 | unsigned int ee_len, depth, map_len = map->m_len; |
3159 | int allocated, max_zeroout = 0; | 3159 | int allocated = 0, max_zeroout = 0; |
3160 | int err = 0; | 3160 | int err = 0; |
3161 | int split_flag = 0; | 3161 | int split_flag = 0; |
3162 | 3162 | ||
3163 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" | 3163 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" |
3164 | "block %llu, max_blocks %u\n", inode->i_ino, | 3164 | "block %llu, max_blocks %u\n", inode->i_ino, |
3165 | (unsigned long long)map->m_lblk, map->m_len); | 3165 | (unsigned long long)map->m_lblk, map_len); |
3166 | 3166 | ||
3167 | sbi = EXT4_SB(inode->i_sb); | 3167 | sbi = EXT4_SB(inode->i_sb); |
3168 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3168 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3169 | inode->i_sb->s_blocksize_bits; | 3169 | inode->i_sb->s_blocksize_bits; |
3170 | if (eof_block < map->m_lblk + map->m_len) | 3170 | if (eof_block < map->m_lblk + map_len) |
3171 | eof_block = map->m_lblk + map->m_len; | 3171 | eof_block = map->m_lblk + map_len; |
3172 | 3172 | ||
3173 | depth = ext_depth(inode); | 3173 | depth = ext_depth(inode); |
3174 | eh = path[depth].p_hdr; | 3174 | eh = path[depth].p_hdr; |
3175 | ex = path[depth].p_ext; | 3175 | ex = path[depth].p_ext; |
3176 | ee_block = le32_to_cpu(ex->ee_block); | 3176 | ee_block = le32_to_cpu(ex->ee_block); |
3177 | ee_len = ext4_ext_get_actual_len(ex); | 3177 | ee_len = ext4_ext_get_actual_len(ex); |
3178 | allocated = ee_len - (map->m_lblk - ee_block); | ||
3179 | zero_ex.ee_len = 0; | 3178 | zero_ex.ee_len = 0; |
3180 | 3179 | ||
3181 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); | 3180 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); |
@@ -3186,77 +3185,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3186 | 3185 | ||
3187 | /* | 3186 | /* |
3188 | * Attempt to transfer newly initialized blocks from the currently | 3187 | * Attempt to transfer newly initialized blocks from the currently |
3189 | * uninitialized extent to its left neighbor. This is much cheaper | 3188 | * uninitialized extent to its neighbor. This is much cheaper |
3190 | * than an insertion followed by a merge as those involve costly | 3189 | * than an insertion followed by a merge as those involve costly |
3191 | * memmove() calls. This is the common case in steady state for | 3190 | * memmove() calls. Transferring to the left is the common case in |
3192 | * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append | 3191 | * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE) |
3193 | * writes. | 3192 | * followed by append writes. |
3194 | * | 3193 | * |
3195 | * Limitations of the current logic: | 3194 | * Limitations of the current logic: |
3196 | * - L1: we only deal with writes at the start of the extent. | 3195 | * - L1: we do not deal with writes covering the whole extent. |
3197 | * The approach could be extended to writes at the end | ||
3198 | * of the extent but this scenario was deemed less common. | ||
3199 | * - L2: we do not deal with writes covering the whole extent. | ||
3200 | * This would require removing the extent if the transfer | 3196 | * This would require removing the extent if the transfer |
3201 | * is possible. | 3197 | * is possible. |
3202 | * - L3: we only attempt to merge with an extent stored in the | 3198 | * - L2: we only attempt to merge with an extent stored in the |
3203 | * same extent tree node. | 3199 | * same extent tree node. |
3204 | */ | 3200 | */ |
3205 | if ((map->m_lblk == ee_block) && /*L1*/ | 3201 | if ((map->m_lblk == ee_block) && |
3206 | (map->m_len < ee_len) && /*L2*/ | 3202 | /* See if we can merge left */ |
3207 | (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ | 3203 | (map_len < ee_len) && /*L1*/ |
3208 | struct ext4_extent *prev_ex; | 3204 | (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/ |
3209 | ext4_lblk_t prev_lblk; | 3205 | ext4_lblk_t prev_lblk; |
3210 | ext4_fsblk_t prev_pblk, ee_pblk; | 3206 | ext4_fsblk_t prev_pblk, ee_pblk; |
3211 | unsigned int prev_len, write_len; | 3207 | unsigned int prev_len; |
3212 | 3208 | ||
3213 | prev_ex = ex - 1; | 3209 | abut_ex = ex - 1; |
3214 | prev_lblk = le32_to_cpu(prev_ex->ee_block); | 3210 | prev_lblk = le32_to_cpu(abut_ex->ee_block); |
3215 | prev_len = ext4_ext_get_actual_len(prev_ex); | 3211 | prev_len = ext4_ext_get_actual_len(abut_ex); |
3216 | prev_pblk = ext4_ext_pblock(prev_ex); | 3212 | prev_pblk = ext4_ext_pblock(abut_ex); |
3217 | ee_pblk = ext4_ext_pblock(ex); | 3213 | ee_pblk = ext4_ext_pblock(ex); |
3218 | write_len = map->m_len; | ||
3219 | 3214 | ||
3220 | /* | 3215 | /* |
3221 | * A transfer of blocks from 'ex' to 'prev_ex' is allowed | 3216 | * A transfer of blocks from 'ex' to 'abut_ex' is allowed |
3222 | * upon those conditions: | 3217 | * upon those conditions: |
3223 | * - C1: prev_ex is initialized, | 3218 | * - C1: abut_ex is initialized, |
3224 | * - C2: prev_ex is logically abutting ex, | 3219 | * - C2: abut_ex is logically abutting ex, |
3225 | * - C3: prev_ex is physically abutting ex, | 3220 | * - C3: abut_ex is physically abutting ex, |
3226 | * - C4: prev_ex can receive the additional blocks without | 3221 | * - C4: abut_ex can receive the additional blocks without |
3227 | * overflowing the (initialized) length limit. | 3222 | * overflowing the (initialized) length limit. |
3228 | */ | 3223 | */ |
3229 | if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ | 3224 | if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ |
3230 | ((prev_lblk + prev_len) == ee_block) && /*C2*/ | 3225 | ((prev_lblk + prev_len) == ee_block) && /*C2*/ |
3231 | ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ | 3226 | ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ |
3232 | (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ | 3227 | (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ |
3233 | err = ext4_ext_get_access(handle, inode, path + depth); | 3228 | err = ext4_ext_get_access(handle, inode, path + depth); |
3234 | if (err) | 3229 | if (err) |
3235 | goto out; | 3230 | goto out; |
3236 | 3231 | ||
3237 | trace_ext4_ext_convert_to_initialized_fastpath(inode, | 3232 | trace_ext4_ext_convert_to_initialized_fastpath(inode, |
3238 | map, ex, prev_ex); | 3233 | map, ex, abut_ex); |
3239 | 3234 | ||
3240 | /* Shift the start of ex by 'write_len' blocks */ | 3235 | /* Shift the start of ex by 'map_len' blocks */ |
3241 | ex->ee_block = cpu_to_le32(ee_block + write_len); | 3236 | ex->ee_block = cpu_to_le32(ee_block + map_len); |
3242 | ext4_ext_store_pblock(ex, ee_pblk + write_len); | 3237 | ext4_ext_store_pblock(ex, ee_pblk + map_len); |
3243 | ex->ee_len = cpu_to_le16(ee_len - write_len); | 3238 | ex->ee_len = cpu_to_le16(ee_len - map_len); |
3244 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ | 3239 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ |
3245 | 3240 | ||
3246 | /* Extend prev_ex by 'write_len' blocks */ | 3241 | /* Extend abut_ex by 'map_len' blocks */ |
3247 | prev_ex->ee_len = cpu_to_le16(prev_len + write_len); | 3242 | abut_ex->ee_len = cpu_to_le16(prev_len + map_len); |
3248 | 3243 | ||
3249 | /* Mark the block containing both extents as dirty */ | 3244 | /* Result: number of initialized blocks past m_lblk */ |
3250 | ext4_ext_dirty(handle, inode, path + depth); | 3245 | allocated = map_len; |
3246 | } | ||
3247 | } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) && | ||
3248 | (map_len < ee_len) && /*L1*/ | ||
3249 | ex < EXT_LAST_EXTENT(eh)) { /*L2*/ | ||
3250 | /* See if we can merge right */ | ||
3251 | ext4_lblk_t next_lblk; | ||
3252 | ext4_fsblk_t next_pblk, ee_pblk; | ||
3253 | unsigned int next_len; | ||
3254 | |||
3255 | abut_ex = ex + 1; | ||
3256 | next_lblk = le32_to_cpu(abut_ex->ee_block); | ||
3257 | next_len = ext4_ext_get_actual_len(abut_ex); | ||
3258 | next_pblk = ext4_ext_pblock(abut_ex); | ||
3259 | ee_pblk = ext4_ext_pblock(ex); | ||
3251 | 3260 | ||
3252 | /* Update path to point to the right extent */ | 3261 | /* |
3253 | path[depth].p_ext = prev_ex; | 3262 | * A transfer of blocks from 'ex' to 'abut_ex' is allowed |
3263 | * upon those conditions: | ||
3264 | * - C1: abut_ex is initialized, | ||
3265 | * - C2: abut_ex is logically abutting ex, | ||
3266 | * - C3: abut_ex is physically abutting ex, | ||
3267 | * - C4: abut_ex can receive the additional blocks without | ||
3268 | * overflowing the (initialized) length limit. | ||
3269 | */ | ||
3270 | if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ | ||
3271 | ((map->m_lblk + map_len) == next_lblk) && /*C2*/ | ||
3272 | ((ee_pblk + ee_len) == next_pblk) && /*C3*/ | ||
3273 | (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ | ||
3274 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3275 | if (err) | ||
3276 | goto out; | ||
3277 | |||
3278 | trace_ext4_ext_convert_to_initialized_fastpath(inode, | ||
3279 | map, ex, abut_ex); | ||
3280 | |||
3281 | /* Shift the start of abut_ex by 'map_len' blocks */ | ||
3282 | abut_ex->ee_block = cpu_to_le32(next_lblk - map_len); | ||
3283 | ext4_ext_store_pblock(abut_ex, next_pblk - map_len); | ||
3284 | ex->ee_len = cpu_to_le16(ee_len - map_len); | ||
3285 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ | ||
3286 | |||
3287 | /* Extend abut_ex by 'map_len' blocks */ | ||
3288 | abut_ex->ee_len = cpu_to_le16(next_len + map_len); | ||
3254 | 3289 | ||
3255 | /* Result: number of initialized blocks past m_lblk */ | 3290 | /* Result: number of initialized blocks past m_lblk */ |
3256 | allocated = write_len; | 3291 | allocated = map_len; |
3257 | goto out; | ||
3258 | } | 3292 | } |
3259 | } | 3293 | } |
3294 | if (allocated) { | ||
3295 | /* Mark the block containing both extents as dirty */ | ||
3296 | ext4_ext_dirty(handle, inode, path + depth); | ||
3297 | |||
3298 | /* Update path to point to the right extent */ | ||
3299 | path[depth].p_ext = abut_ex; | ||
3300 | goto out; | ||
3301 | } else | ||
3302 | allocated = ee_len - (map->m_lblk - ee_block); | ||
3260 | 3303 | ||
3261 | WARN_ON(map->m_lblk < ee_block); | 3304 | WARN_ON(map->m_lblk < ee_block); |
3262 | /* | 3305 | /* |