diff options
author | Dmitry Monakhov <dmonakhov@openvz.org> | 2012-10-10 01:04:58 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-10-10 01:04:58 -0400 |
commit | dee1f973ca341c266229faa5a1a5bb268bed3531 (patch) | |
tree | 1be44adf3df6f32f8d13df54a0e68f7fd8b9ac26 /fs | |
parent | 60d4616f3dc63371b3dc367e5e88fd4b4f037f65 (diff) |
ext4: race-condition protection for ext4_convert_unwritten_extents_endio
We assumed that at the time we call ext4_convert_unwritten_extents_endio()
extent in question is fully inside [map.m_lblk, map->m_len] because
it was already split during submission. But this may not be true due to
a race between writeback vs fallocate.
If extent in question is larger than requested we will split it again.
Special precautions should being done if zeroout required because
[map.m_lblk, map->m_len] already contains valid data.
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/extents.c | 57 |
1 files changed, 46 insertions, 11 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c2789271e7b4..7011ac967208 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -52,6 +52,9 @@ | |||
52 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | 52 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ |
53 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | 53 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ |
54 | 54 | ||
55 | #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ | ||
56 | #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ | ||
57 | |||
55 | static __le32 ext4_extent_block_csum(struct inode *inode, | 58 | static __le32 ext4_extent_block_csum(struct inode *inode, |
56 | struct ext4_extent_header *eh) | 59 | struct ext4_extent_header *eh) |
57 | { | 60 | { |
@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2914 | unsigned int ee_len, depth; | 2917 | unsigned int ee_len, depth; |
2915 | int err = 0; | 2918 | int err = 0; |
2916 | 2919 | ||
2920 | BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == | ||
2921 | (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); | ||
2922 | |||
2917 | ext_debug("ext4_split_extents_at: inode %lu, logical" | 2923 | ext_debug("ext4_split_extents_at: inode %lu, logical" |
2918 | "block %llu\n", inode->i_ino, (unsigned long long)split); | 2924 | "block %llu\n", inode->i_ino, (unsigned long long)split); |
2919 | 2925 | ||
@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2972 | 2978 | ||
2973 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 2979 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
2974 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 2980 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { |
2975 | err = ext4_ext_zeroout(inode, &orig_ex); | 2981 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { |
2982 | if (split_flag & EXT4_EXT_DATA_VALID1) | ||
2983 | err = ext4_ext_zeroout(inode, ex2); | ||
2984 | else | ||
2985 | err = ext4_ext_zeroout(inode, ex); | ||
2986 | } else | ||
2987 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2988 | |||
2976 | if (err) | 2989 | if (err) |
2977 | goto fix_extent_len; | 2990 | goto fix_extent_len; |
2978 | /* update the extent length and mark as initialized */ | 2991 | /* update the extent length and mark as initialized */ |
@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle, | |||
3025 | uninitialized = ext4_ext_is_uninitialized(ex); | 3038 | uninitialized = ext4_ext_is_uninitialized(ex); |
3026 | 3039 | ||
3027 | if (map->m_lblk + map->m_len < ee_block + ee_len) { | 3040 | if (map->m_lblk + map->m_len < ee_block + ee_len) { |
3028 | split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? | 3041 | split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; |
3029 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
3030 | flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; | 3042 | flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; |
3031 | if (uninitialized) | 3043 | if (uninitialized) |
3032 | split_flag1 |= EXT4_EXT_MARK_UNINIT1 | | 3044 | split_flag1 |= EXT4_EXT_MARK_UNINIT1 | |
3033 | EXT4_EXT_MARK_UNINIT2; | 3045 | EXT4_EXT_MARK_UNINIT2; |
3046 | if (split_flag & EXT4_EXT_DATA_VALID2) | ||
3047 | split_flag1 |= EXT4_EXT_DATA_VALID1; | ||
3034 | err = ext4_split_extent_at(handle, inode, path, | 3048 | err = ext4_split_extent_at(handle, inode, path, |
3035 | map->m_lblk + map->m_len, split_flag1, flags1); | 3049 | map->m_lblk + map->m_len, split_flag1, flags1); |
3036 | if (err) | 3050 | if (err) |
@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle, | |||
3043 | return PTR_ERR(path); | 3057 | return PTR_ERR(path); |
3044 | 3058 | ||
3045 | if (map->m_lblk >= ee_block) { | 3059 | if (map->m_lblk >= ee_block) { |
3046 | split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? | 3060 | split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | |
3047 | EXT4_EXT_MAY_ZEROOUT : 0; | 3061 | EXT4_EXT_DATA_VALID2); |
3048 | if (uninitialized) | 3062 | if (uninitialized) |
3049 | split_flag1 |= EXT4_EXT_MARK_UNINIT1; | 3063 | split_flag1 |= EXT4_EXT_MARK_UNINIT1; |
3050 | if (split_flag & EXT4_EXT_MARK_UNINIT2) | 3064 | if (split_flag & EXT4_EXT_MARK_UNINIT2) |
@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3323 | 3337 | ||
3324 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3338 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
3325 | split_flag |= EXT4_EXT_MARK_UNINIT2; | 3339 | split_flag |= EXT4_EXT_MARK_UNINIT2; |
3326 | 3340 | if (flags & EXT4_GET_BLOCKS_CONVERT) | |
3341 | split_flag |= EXT4_EXT_DATA_VALID2; | ||
3327 | flags |= EXT4_GET_BLOCKS_PRE_IO; | 3342 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
3328 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); | 3343 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); |
3329 | } | 3344 | } |
3330 | 3345 | ||
3331 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3346 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
3332 | struct inode *inode, | 3347 | struct inode *inode, |
3333 | struct ext4_ext_path *path) | 3348 | struct ext4_map_blocks *map, |
3349 | struct ext4_ext_path *path) | ||
3334 | { | 3350 | { |
3335 | struct ext4_extent *ex; | 3351 | struct ext4_extent *ex; |
3352 | ext4_lblk_t ee_block; | ||
3353 | unsigned int ee_len; | ||
3336 | int depth; | 3354 | int depth; |
3337 | int err = 0; | 3355 | int err = 0; |
3338 | 3356 | ||
3339 | depth = ext_depth(inode); | 3357 | depth = ext_depth(inode); |
3340 | ex = path[depth].p_ext; | 3358 | ex = path[depth].p_ext; |
3359 | ee_block = le32_to_cpu(ex->ee_block); | ||
3360 | ee_len = ext4_ext_get_actual_len(ex); | ||
3341 | 3361 | ||
3342 | ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" | 3362 | ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" |
3343 | "block %llu, max_blocks %u\n", inode->i_ino, | 3363 | "block %llu, max_blocks %u\n", inode->i_ino, |
3344 | (unsigned long long)le32_to_cpu(ex->ee_block), | 3364 | (unsigned long long)ee_block, ee_len); |
3345 | ext4_ext_get_actual_len(ex)); | 3365 | |
3366 | /* If extent is larger than requested then split is required */ | ||
3367 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
3368 | err = ext4_split_unwritten_extents(handle, inode, map, path, | ||
3369 | EXT4_GET_BLOCKS_CONVERT); | ||
3370 | if (err < 0) | ||
3371 | goto out; | ||
3372 | ext4_ext_drop_refs(path); | ||
3373 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | ||
3374 | if (IS_ERR(path)) { | ||
3375 | err = PTR_ERR(path); | ||
3376 | goto out; | ||
3377 | } | ||
3378 | depth = ext_depth(inode); | ||
3379 | ex = path[depth].p_ext; | ||
3380 | } | ||
3346 | 3381 | ||
3347 | err = ext4_ext_get_access(handle, inode, path + depth); | 3382 | err = ext4_ext_get_access(handle, inode, path + depth); |
3348 | if (err) | 3383 | if (err) |
@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3652 | } | 3687 | } |
3653 | /* IO end_io complete, convert the filled extent to written */ | 3688 | /* IO end_io complete, convert the filled extent to written */ |
3654 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { | 3689 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { |
3655 | ret = ext4_convert_unwritten_extents_endio(handle, inode, | 3690 | ret = ext4_convert_unwritten_extents_endio(handle, inode, map, |
3656 | path); | 3691 | path); |
3657 | if (ret >= 0) { | 3692 | if (ret >= 0) { |
3658 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3693 | ext4_update_inode_fsync_trans(handle, inode, 1); |