aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Monakhov <dmonakhov@openvz.org>2012-10-10 01:04:58 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-10-10 01:04:58 -0400
commitdee1f973ca341c266229faa5a1a5bb268bed3531 (patch)
tree1be44adf3df6f32f8d13df54a0e68f7fd8b9ac26
parent60d4616f3dc63371b3dc367e5e88fd4b4f037f65 (diff)
ext4: race-condition protection for ext4_convert_unwritten_extents_endio
We assumed that at the time we call ext4_convert_unwritten_extents_endio() extent in question is fully inside [map.m_lblk, map->m_len] because it was already split during submission. But this may not be true due to a race between writeback vs fallocate. If extent in question is larger than requested we will split it again. Special precautions should being done if zeroout required because [map.m_lblk, map->m_len] already contains valid data. Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
-rw-r--r--fs/ext4/extents.c57
1 files changed, 46 insertions, 11 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c2789271e7b4..7011ac967208 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -52,6 +52,9 @@
52#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 52#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
53#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 53#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
54 54
55#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
56#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
57
55static __le32 ext4_extent_block_csum(struct inode *inode, 58static __le32 ext4_extent_block_csum(struct inode *inode,
56 struct ext4_extent_header *eh) 59 struct ext4_extent_header *eh)
57{ 60{
@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle,
2914 unsigned int ee_len, depth; 2917 unsigned int ee_len, depth;
2915 int err = 0; 2918 int err = 0;
2916 2919
2920 BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
2921 (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
2922
2917 ext_debug("ext4_split_extents_at: inode %lu, logical" 2923 ext_debug("ext4_split_extents_at: inode %lu, logical"
2918 "block %llu\n", inode->i_ino, (unsigned long long)split); 2924 "block %llu\n", inode->i_ino, (unsigned long long)split);
2919 2925
@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle,
2972 2978
2973 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 2979 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
2974 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2980 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2975 err = ext4_ext_zeroout(inode, &orig_ex); 2981 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2982 if (split_flag & EXT4_EXT_DATA_VALID1)
2983 err = ext4_ext_zeroout(inode, ex2);
2984 else
2985 err = ext4_ext_zeroout(inode, ex);
2986 } else
2987 err = ext4_ext_zeroout(inode, &orig_ex);
2988
2976 if (err) 2989 if (err)
2977 goto fix_extent_len; 2990 goto fix_extent_len;
2978 /* update the extent length and mark as initialized */ 2991 /* update the extent length and mark as initialized */
@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle,
3025 uninitialized = ext4_ext_is_uninitialized(ex); 3038 uninitialized = ext4_ext_is_uninitialized(ex);
3026 3039
3027 if (map->m_lblk + map->m_len < ee_block + ee_len) { 3040 if (map->m_lblk + map->m_len < ee_block + ee_len) {
3028 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? 3041 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3029 EXT4_EXT_MAY_ZEROOUT : 0;
3030 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; 3042 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3031 if (uninitialized) 3043 if (uninitialized)
3032 split_flag1 |= EXT4_EXT_MARK_UNINIT1 | 3044 split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
3033 EXT4_EXT_MARK_UNINIT2; 3045 EXT4_EXT_MARK_UNINIT2;
3046 if (split_flag & EXT4_EXT_DATA_VALID2)
3047 split_flag1 |= EXT4_EXT_DATA_VALID1;
3034 err = ext4_split_extent_at(handle, inode, path, 3048 err = ext4_split_extent_at(handle, inode, path,
3035 map->m_lblk + map->m_len, split_flag1, flags1); 3049 map->m_lblk + map->m_len, split_flag1, flags1);
3036 if (err) 3050 if (err)
@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle,
3043 return PTR_ERR(path); 3057 return PTR_ERR(path);
3044 3058
3045 if (map->m_lblk >= ee_block) { 3059 if (map->m_lblk >= ee_block) {
3046 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? 3060 split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
3047 EXT4_EXT_MAY_ZEROOUT : 0; 3061 EXT4_EXT_DATA_VALID2);
3048 if (uninitialized) 3062 if (uninitialized)
3049 split_flag1 |= EXT4_EXT_MARK_UNINIT1; 3063 split_flag1 |= EXT4_EXT_MARK_UNINIT1;
3050 if (split_flag & EXT4_EXT_MARK_UNINIT2) 3064 if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3323 3337
3324 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3338 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3325 split_flag |= EXT4_EXT_MARK_UNINIT2; 3339 split_flag |= EXT4_EXT_MARK_UNINIT2;
3326 3340 if (flags & EXT4_GET_BLOCKS_CONVERT)
3341 split_flag |= EXT4_EXT_DATA_VALID2;
3327 flags |= EXT4_GET_BLOCKS_PRE_IO; 3342 flags |= EXT4_GET_BLOCKS_PRE_IO;
3328 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3343 return ext4_split_extent(handle, inode, path, map, split_flag, flags);
3329} 3344}
3330 3345
3331static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3346static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3332 struct inode *inode, 3347 struct inode *inode,
3333 struct ext4_ext_path *path) 3348 struct ext4_map_blocks *map,
3349 struct ext4_ext_path *path)
3334{ 3350{
3335 struct ext4_extent *ex; 3351 struct ext4_extent *ex;
3352 ext4_lblk_t ee_block;
3353 unsigned int ee_len;
3336 int depth; 3354 int depth;
3337 int err = 0; 3355 int err = 0;
3338 3356
3339 depth = ext_depth(inode); 3357 depth = ext_depth(inode);
3340 ex = path[depth].p_ext; 3358 ex = path[depth].p_ext;
3359 ee_block = le32_to_cpu(ex->ee_block);
3360 ee_len = ext4_ext_get_actual_len(ex);
3341 3361
3342 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" 3362 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
3343 "block %llu, max_blocks %u\n", inode->i_ino, 3363 "block %llu, max_blocks %u\n", inode->i_ino,
3344 (unsigned long long)le32_to_cpu(ex->ee_block), 3364 (unsigned long long)ee_block, ee_len);
3345 ext4_ext_get_actual_len(ex)); 3365
3366 /* If extent is larger than requested then split is required */
3367 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3368 err = ext4_split_unwritten_extents(handle, inode, map, path,
3369 EXT4_GET_BLOCKS_CONVERT);
3370 if (err < 0)
3371 goto out;
3372 ext4_ext_drop_refs(path);
3373 path = ext4_ext_find_extent(inode, map->m_lblk, path);
3374 if (IS_ERR(path)) {
3375 err = PTR_ERR(path);
3376 goto out;
3377 }
3378 depth = ext_depth(inode);
3379 ex = path[depth].p_ext;
3380 }
3346 3381
3347 err = ext4_ext_get_access(handle, inode, path + depth); 3382 err = ext4_ext_get_access(handle, inode, path + depth);
3348 if (err) 3383 if (err)
@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3652 } 3687 }
3653 /* IO end_io complete, convert the filled extent to written */ 3688 /* IO end_io complete, convert the filled extent to written */
3654 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3689 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3655 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3690 ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
3656 path); 3691 path);
3657 if (ret >= 0) { 3692 if (ret >= 0) {
3658 ext4_update_inode_fsync_trans(handle, inode, 1); 3693 ext4_update_inode_fsync_trans(handle, inode, 1);