diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/extents.c | 93 | ||||
-rw-r--r-- | fs/ext4/super.c | 1 |
2 files changed, 94 insertions, 0 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c2ac06cb2d46..8b6a17b60970 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2919,12 +2919,23 @@ out: | |||
2919 | * a> There is no split required: Entire extent should be initialized | 2919 | * a> There is no split required: Entire extent should be initialized |
2920 | * b> Splits in two extents: Write is happening at either end of the extent | 2920 | * b> Splits in two extents: Write is happening at either end of the extent |
2921 | * c> Splits in three extents: Somone is writing in middle of the extent | 2921 | * c> Splits in three extents: Somone is writing in middle of the extent |
2922 | * | ||
2923 | * Pre-conditions: | ||
2924 | * - The extent pointed to by 'path' is uninitialized. | ||
2925 | * - The extent pointed to by 'path' contains a superset | ||
2926 | * of the logical span [map->m_lblk, map->m_lblk + map->m_len). | ||
2927 | * | ||
2928 | * Post-conditions on success: | ||
2929 | * - the returned value is the number of blocks beyond map->l_lblk | ||
2930 | * that are allocated and initialized. | ||
2931 | * It is guaranteed to be >= map->m_len. | ||
2922 | */ | 2932 | */ |
2923 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 2933 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
2924 | struct inode *inode, | 2934 | struct inode *inode, |
2925 | struct ext4_map_blocks *map, | 2935 | struct ext4_map_blocks *map, |
2926 | struct ext4_ext_path *path) | 2936 | struct ext4_ext_path *path) |
2927 | { | 2937 | { |
2938 | struct ext4_extent_header *eh; | ||
2928 | struct ext4_map_blocks split_map; | 2939 | struct ext4_map_blocks split_map; |
2929 | struct ext4_extent zero_ex; | 2940 | struct ext4_extent zero_ex; |
2930 | struct ext4_extent *ex; | 2941 | struct ext4_extent *ex; |
@@ -2944,11 +2955,93 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2944 | eof_block = map->m_lblk + map->m_len; | 2955 | eof_block = map->m_lblk + map->m_len; |
2945 | 2956 | ||
2946 | depth = ext_depth(inode); | 2957 | depth = ext_depth(inode); |
2958 | eh = path[depth].p_hdr; | ||
2947 | ex = path[depth].p_ext; | 2959 | ex = path[depth].p_ext; |
2948 | ee_block = le32_to_cpu(ex->ee_block); | 2960 | ee_block = le32_to_cpu(ex->ee_block); |
2949 | ee_len = ext4_ext_get_actual_len(ex); | 2961 | ee_len = ext4_ext_get_actual_len(ex); |
2950 | allocated = ee_len - (map->m_lblk - ee_block); | 2962 | allocated = ee_len - (map->m_lblk - ee_block); |
2951 | 2963 | ||
2964 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); | ||
2965 | |||
2966 | /* Pre-conditions */ | ||
2967 | BUG_ON(!ext4_ext_is_uninitialized(ex)); | ||
2968 | BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); | ||
2969 | BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len); | ||
2970 | |||
2971 | /* | ||
2972 | * Attempt to transfer newly initialized blocks from the currently | ||
2973 | * uninitialized extent to its left neighbor. This is much cheaper | ||
2974 | * than an insertion followed by a merge as those involve costly | ||
2975 | * memmove() calls. This is the common case in steady state for | ||
2976 | * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append | ||
2977 | * writes. | ||
2978 | * | ||
2979 | * Limitations of the current logic: | ||
2980 | * - L1: we only deal with writes at the start of the extent. | ||
2981 | * The approach could be extended to writes at the end | ||
2982 | * of the extent but this scenario was deemed less common. | ||
2983 | * - L2: we do not deal with writes covering the whole extent. | ||
2984 | * This would require removing the extent if the transfer | ||
2985 | * is possible. | ||
2986 | * - L3: we only attempt to merge with an extent stored in the | ||
2987 | * same extent tree node. | ||
2988 | */ | ||
2989 | if ((map->m_lblk == ee_block) && /*L1*/ | ||
2990 | (map->m_len < ee_len) && /*L2*/ | ||
2991 | (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ | ||
2992 | struct ext4_extent *prev_ex; | ||
2993 | ext4_lblk_t prev_lblk; | ||
2994 | ext4_fsblk_t prev_pblk, ee_pblk; | ||
2995 | unsigned int prev_len, write_len; | ||
2996 | |||
2997 | prev_ex = ex - 1; | ||
2998 | prev_lblk = le32_to_cpu(prev_ex->ee_block); | ||
2999 | prev_len = ext4_ext_get_actual_len(prev_ex); | ||
3000 | prev_pblk = ext4_ext_pblock(prev_ex); | ||
3001 | ee_pblk = ext4_ext_pblock(ex); | ||
3002 | write_len = map->m_len; | ||
3003 | |||
3004 | /* | ||
3005 | * A transfer of blocks from 'ex' to 'prev_ex' is allowed | ||
3006 | * upon those conditions: | ||
3007 | * - C1: prev_ex is initialized, | ||
3008 | * - C2: prev_ex is logically abutting ex, | ||
3009 | * - C3: prev_ex is physically abutting ex, | ||
3010 | * - C4: prev_ex can receive the additional blocks without | ||
3011 | * overflowing the (initialized) length limit. | ||
3012 | */ | ||
3013 | if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ | ||
3014 | ((prev_lblk + prev_len) == ee_block) && /*C2*/ | ||
3015 | ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ | ||
3016 | (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ | ||
3017 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3018 | if (err) | ||
3019 | goto out; | ||
3020 | |||
3021 | trace_ext4_ext_convert_to_initialized_fastpath(inode, | ||
3022 | map, ex, prev_ex); | ||
3023 | |||
3024 | /* Shift the start of ex by 'write_len' blocks */ | ||
3025 | ex->ee_block = cpu_to_le32(ee_block + write_len); | ||
3026 | ext4_ext_store_pblock(ex, ee_pblk + write_len); | ||
3027 | ex->ee_len = cpu_to_le16(ee_len - write_len); | ||
3028 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ | ||
3029 | |||
3030 | /* Extend prev_ex by 'write_len' blocks */ | ||
3031 | prev_ex->ee_len = cpu_to_le16(prev_len + write_len); | ||
3032 | |||
3033 | /* Mark the block containing both extents as dirty */ | ||
3034 | ext4_ext_dirty(handle, inode, path + depth); | ||
3035 | |||
3036 | /* Update path to point to the right extent */ | ||
3037 | path[depth].p_ext = prev_ex; | ||
3038 | |||
3039 | /* Result: number of initialized blocks past m_lblk */ | ||
3040 | allocated = write_len; | ||
3041 | goto out; | ||
3042 | } | ||
3043 | } | ||
3044 | |||
2952 | WARN_ON(map->m_lblk < ee_block); | 3045 | WARN_ON(map->m_lblk < ee_block); |
2953 | /* | 3046 | /* |
2954 | * It is safe to convert extent to initialized via explicit | 3047 | * It is safe to convert extent to initialized via explicit |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dcc460537bc7..9953d80145ad 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/freezer.h> | 45 | #include <linux/freezer.h> |
46 | 46 | ||
47 | #include "ext4.h" | 47 | #include "ext4.h" |
48 | #include "ext4_extents.h" | ||
48 | #include "ext4_jbd2.h" | 49 | #include "ext4_jbd2.h" |
49 | #include "xattr.h" | 50 | #include "xattr.h" |
50 | #include "acl.h" | 51 | #include "acl.h" |