diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 259 |
1 files changed, 152 insertions, 107 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cd0c7ed06772..1c94cca35ed1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1177 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1177 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1178 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); | 1178 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
1179 | 1179 | ||
1180 | neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); | 1180 | le16_add_cpu(&neh->eh_depth, 1); |
1181 | ext4_mark_inode_dirty(handle, inode); | 1181 | ext4_mark_inode_dirty(handle, inode); |
1182 | out: | 1182 | out: |
1183 | brelse(bh); | 1183 | brelse(bh); |
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
1656 | } | 1656 | } |
1657 | 1657 | ||
1658 | /* | 1658 | /* |
1659 | * This function does a very simple check to see if we can collapse | ||
1660 | * an extent tree with a single extent tree leaf block into the inode. | ||
1661 | */ | ||
1662 | static void ext4_ext_try_to_merge_up(handle_t *handle, | ||
1663 | struct inode *inode, | ||
1664 | struct ext4_ext_path *path) | ||
1665 | { | ||
1666 | size_t s; | ||
1667 | unsigned max_root = ext4_ext_space_root(inode, 0); | ||
1668 | ext4_fsblk_t blk; | ||
1669 | |||
1670 | if ((path[0].p_depth != 1) || | ||
1671 | (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || | ||
1672 | (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) | ||
1673 | return; | ||
1674 | |||
1675 | /* | ||
1676 | * We need to modify the block allocation bitmap and the block | ||
1677 | * group descriptor to release the extent tree block. If we | ||
1678 | * can't get the journal credits, give up. | ||
1679 | */ | ||
1680 | if (ext4_journal_extend(handle, 2)) | ||
1681 | return; | ||
1682 | |||
1683 | /* | ||
1684 | * Copy the extent data up to the inode | ||
1685 | */ | ||
1686 | blk = ext4_idx_pblock(path[0].p_idx); | ||
1687 | s = le16_to_cpu(path[1].p_hdr->eh_entries) * | ||
1688 | sizeof(struct ext4_extent_idx); | ||
1689 | s += sizeof(struct ext4_extent_header); | ||
1690 | |||
1691 | memcpy(path[0].p_hdr, path[1].p_hdr, s); | ||
1692 | path[0].p_depth = 0; | ||
1693 | path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + | ||
1694 | (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); | ||
1695 | path[0].p_hdr->eh_max = cpu_to_le16(max_root); | ||
1696 | |||
1697 | brelse(path[1].p_bh); | ||
1698 | ext4_free_blocks(handle, inode, NULL, blk, 1, | ||
1699 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | ||
1700 | } | ||
1701 | |||
1702 | /* | ||
1659 | * This function tries to merge the @ex extent to neighbours in the tree. | 1703 | * This function tries to merge the @ex extent to neighbours in the tree. |
1660 | * return 1 if merge left else 0. | 1704 | * return 1 if merge left else 0. |
1661 | */ | 1705 | */ |
1662 | static int ext4_ext_try_to_merge(struct inode *inode, | 1706 | static void ext4_ext_try_to_merge(handle_t *handle, |
1707 | struct inode *inode, | ||
1663 | struct ext4_ext_path *path, | 1708 | struct ext4_ext_path *path, |
1664 | struct ext4_extent *ex) { | 1709 | struct ext4_extent *ex) { |
1665 | struct ext4_extent_header *eh; | 1710 | struct ext4_extent_header *eh; |
1666 | unsigned int depth; | 1711 | unsigned int depth; |
1667 | int merge_done = 0; | 1712 | int merge_done = 0; |
1668 | int ret = 0; | ||
1669 | 1713 | ||
1670 | depth = ext_depth(inode); | 1714 | depth = ext_depth(inode); |
1671 | BUG_ON(path[depth].p_hdr == NULL); | 1715 | BUG_ON(path[depth].p_hdr == NULL); |
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode, | |||
1675 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); | 1719 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); |
1676 | 1720 | ||
1677 | if (!merge_done) | 1721 | if (!merge_done) |
1678 | ret = ext4_ext_try_to_merge_right(inode, path, ex); | 1722 | (void) ext4_ext_try_to_merge_right(inode, path, ex); |
1679 | 1723 | ||
1680 | return ret; | 1724 | ext4_ext_try_to_merge_up(handle, inode, path); |
1681 | } | 1725 | } |
1682 | 1726 | ||
1683 | /* | 1727 | /* |
@@ -1893,7 +1937,7 @@ has_space: | |||
1893 | merge: | 1937 | merge: |
1894 | /* try to merge extents */ | 1938 | /* try to merge extents */ |
1895 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) | 1939 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) |
1896 | ext4_ext_try_to_merge(inode, path, nearex); | 1940 | ext4_ext_try_to_merge(handle, inode, path, nearex); |
1897 | 1941 | ||
1898 | 1942 | ||
1899 | /* time to correct all indexes above */ | 1943 | /* time to correct all indexes above */ |
@@ -1901,7 +1945,7 @@ merge: | |||
1901 | if (err) | 1945 | if (err) |
1902 | goto cleanup; | 1946 | goto cleanup; |
1903 | 1947 | ||
1904 | err = ext4_ext_dirty(handle, inode, path + depth); | 1948 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
1905 | 1949 | ||
1906 | cleanup: | 1950 | cleanup: |
1907 | if (npath) { | 1951 | if (npath) { |
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2092 | } | 2136 | } |
2093 | 2137 | ||
2094 | /* | 2138 | /* |
2095 | * ext4_ext_check_cache() | 2139 | * ext4_ext_in_cache() |
2096 | * Checks to see if the given block is in the cache. | 2140 | * Checks to see if the given block is in the cache. |
2097 | * If it is, the cached extent is stored in the given | 2141 | * If it is, the cached extent is stored in the given |
2098 | * cache extent pointer. If the cached extent is a hole, | 2142 | * cache extent pointer. |
2099 | * this routine should be used instead of | ||
2100 | * ext4_ext_in_cache if the calling function needs to | ||
2101 | * know the size of the hole. | ||
2102 | * | 2143 | * |
2103 | * @inode: The files inode | 2144 | * @inode: The files inode |
2104 | * @block: The block to look for in the cache | 2145 | * @block: The block to look for in the cache |
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2107 | * | 2148 | * |
2108 | * Return 0 if cache is invalid; 1 if the cache is valid | 2149 | * Return 0 if cache is invalid; 1 if the cache is valid |
2109 | */ | 2150 | */ |
2110 | static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, | 2151 | static int |
2111 | struct ext4_ext_cache *ex){ | 2152 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, |
2153 | struct ext4_extent *ex) | ||
2154 | { | ||
2112 | struct ext4_ext_cache *cex; | 2155 | struct ext4_ext_cache *cex; |
2113 | struct ext4_sb_info *sbi; | 2156 | struct ext4_sb_info *sbi; |
2114 | int ret = 0; | 2157 | int ret = 0; |
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, | |||
2125 | goto errout; | 2168 | goto errout; |
2126 | 2169 | ||
2127 | if (in_range(block, cex->ec_block, cex->ec_len)) { | 2170 | if (in_range(block, cex->ec_block, cex->ec_len)) { |
2128 | memcpy(ex, cex, sizeof(struct ext4_ext_cache)); | 2171 | ex->ee_block = cpu_to_le32(cex->ec_block); |
2172 | ext4_ext_store_pblock(ex, cex->ec_start); | ||
2173 | ex->ee_len = cpu_to_le16(cex->ec_len); | ||
2129 | ext_debug("%u cached by %u:%u:%llu\n", | 2174 | ext_debug("%u cached by %u:%u:%llu\n", |
2130 | block, | 2175 | block, |
2131 | cex->ec_block, cex->ec_len, cex->ec_start); | 2176 | cex->ec_block, cex->ec_len, cex->ec_start); |
@@ -2138,37 +2183,6 @@ errout: | |||
2138 | } | 2183 | } |
2139 | 2184 | ||
2140 | /* | 2185 | /* |
2141 | * ext4_ext_in_cache() | ||
2142 | * Checks to see if the given block is in the cache. | ||
2143 | * If it is, the cached extent is stored in the given | ||
2144 | * extent pointer. | ||
2145 | * | ||
2146 | * @inode: The files inode | ||
2147 | * @block: The block to look for in the cache | ||
2148 | * @ex: Pointer where the cached extent will be stored | ||
2149 | * if it contains block | ||
2150 | * | ||
2151 | * Return 0 if cache is invalid; 1 if the cache is valid | ||
2152 | */ | ||
2153 | static int | ||
2154 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | ||
2155 | struct ext4_extent *ex) | ||
2156 | { | ||
2157 | struct ext4_ext_cache cex; | ||
2158 | int ret = 0; | ||
2159 | |||
2160 | if (ext4_ext_check_cache(inode, block, &cex)) { | ||
2161 | ex->ee_block = cpu_to_le32(cex.ec_block); | ||
2162 | ext4_ext_store_pblock(ex, cex.ec_start); | ||
2163 | ex->ee_len = cpu_to_le16(cex.ec_len); | ||
2164 | ret = 1; | ||
2165 | } | ||
2166 | |||
2167 | return ret; | ||
2168 | } | ||
2169 | |||
2170 | |||
2171 | /* | ||
2172 | * ext4_ext_rm_idx: | 2186 | * ext4_ext_rm_idx: |
2173 | * removes index from the index block. | 2187 | * removes index from the index block. |
2174 | */ | 2188 | */ |
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2274 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2288 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2275 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2289 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2276 | ext4_fsblk_t pblk; | 2290 | ext4_fsblk_t pblk; |
2277 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2291 | int flags = 0; |
2278 | 2292 | ||
2279 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 2293 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
2280 | flags |= EXT4_FREE_BLOCKS_METADATA; | 2294 | flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; |
2295 | else if (ext4_should_journal_data(inode)) | ||
2296 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
2297 | |||
2281 | /* | 2298 | /* |
2282 | * For bigalloc file systems, we never free a partial cluster | 2299 | * For bigalloc file systems, we never free a partial cluster |
2283 | * at the beginning of the extent. Instead, we make a note | 2300 | * at the beginning of the extent. Instead, we make a note |
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2572 | struct ext4_ext_path *path = NULL; | 2589 | struct ext4_ext_path *path = NULL; |
2573 | ext4_fsblk_t partial_cluster = 0; | 2590 | ext4_fsblk_t partial_cluster = 0; |
2574 | handle_t *handle; | 2591 | handle_t *handle; |
2575 | int i = 0, err; | 2592 | int i = 0, err = 0; |
2576 | 2593 | ||
2577 | ext_debug("truncate since %u to %u\n", start, end); | 2594 | ext_debug("truncate since %u to %u\n", start, end); |
2578 | 2595 | ||
@@ -2604,12 +2621,16 @@ again: | |||
2604 | return PTR_ERR(path); | 2621 | return PTR_ERR(path); |
2605 | } | 2622 | } |
2606 | depth = ext_depth(inode); | 2623 | depth = ext_depth(inode); |
2624 | /* Leaf not may not exist only if inode has no blocks at all */ | ||
2607 | ex = path[depth].p_ext; | 2625 | ex = path[depth].p_ext; |
2608 | if (!ex) { | 2626 | if (!ex) { |
2609 | ext4_ext_drop_refs(path); | 2627 | if (depth) { |
2610 | kfree(path); | 2628 | EXT4_ERROR_INODE(inode, |
2611 | path = NULL; | 2629 | "path[%d].p_hdr == NULL", |
2612 | goto cont; | 2630 | depth); |
2631 | err = -EIO; | ||
2632 | } | ||
2633 | goto out; | ||
2613 | } | 2634 | } |
2614 | 2635 | ||
2615 | ee_block = le32_to_cpu(ex->ee_block); | 2636 | ee_block = le32_to_cpu(ex->ee_block); |
@@ -2641,8 +2662,6 @@ again: | |||
2641 | goto out; | 2662 | goto out; |
2642 | } | 2663 | } |
2643 | } | 2664 | } |
2644 | cont: | ||
2645 | |||
2646 | /* | 2665 | /* |
2647 | * We start scanning from right side, freeing all the blocks | 2666 | * We start scanning from right side, freeing all the blocks |
2648 | * after i_size and walking into the tree depth-wise. | 2667 | * after i_size and walking into the tree depth-wise. |
@@ -2662,6 +2681,7 @@ cont: | |||
2662 | } | 2681 | } |
2663 | path[0].p_depth = depth; | 2682 | path[0].p_depth = depth; |
2664 | path[0].p_hdr = ext_inode_hdr(inode); | 2683 | path[0].p_hdr = ext_inode_hdr(inode); |
2684 | i = 0; | ||
2665 | 2685 | ||
2666 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | 2686 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { |
2667 | err = -EIO; | 2687 | err = -EIO; |
@@ -2923,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2923 | ext4_ext_mark_initialized(ex); | 2943 | ext4_ext_mark_initialized(ex); |
2924 | 2944 | ||
2925 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) | 2945 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) |
2926 | ext4_ext_try_to_merge(inode, path, ex); | 2946 | ext4_ext_try_to_merge(handle, inode, path, ex); |
2927 | 2947 | ||
2928 | err = ext4_ext_dirty(handle, inode, path + depth); | 2948 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
2929 | goto out; | 2949 | goto out; |
2930 | } | 2950 | } |
2931 | 2951 | ||
@@ -2957,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2957 | goto fix_extent_len; | 2977 | goto fix_extent_len; |
2958 | /* update the extent length and mark as initialized */ | 2978 | /* update the extent length and mark as initialized */ |
2959 | ex->ee_len = cpu_to_le16(ee_len); | 2979 | ex->ee_len = cpu_to_le16(ee_len); |
2960 | ext4_ext_try_to_merge(inode, path, ex); | 2980 | ext4_ext_try_to_merge(handle, inode, path, ex); |
2961 | err = ext4_ext_dirty(handle, inode, path + depth); | 2981 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
2962 | goto out; | 2982 | goto out; |
2963 | } else if (err) | 2983 | } else if (err) |
2964 | goto fix_extent_len; | 2984 | goto fix_extent_len; |
@@ -3040,7 +3060,6 @@ out: | |||
3040 | return err ? err : map->m_len; | 3060 | return err ? err : map->m_len; |
3041 | } | 3061 | } |
3042 | 3062 | ||
3043 | #define EXT4_EXT_ZERO_LEN 7 | ||
3044 | /* | 3063 | /* |
3045 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 3064 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
3046 | * to an uninitialized extent. It may result in splitting the uninitialized | 3065 | * to an uninitialized extent. It may result in splitting the uninitialized |
@@ -3066,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3066 | struct ext4_map_blocks *map, | 3085 | struct ext4_map_blocks *map, |
3067 | struct ext4_ext_path *path) | 3086 | struct ext4_ext_path *path) |
3068 | { | 3087 | { |
3088 | struct ext4_sb_info *sbi; | ||
3069 | struct ext4_extent_header *eh; | 3089 | struct ext4_extent_header *eh; |
3070 | struct ext4_map_blocks split_map; | 3090 | struct ext4_map_blocks split_map; |
3071 | struct ext4_extent zero_ex; | 3091 | struct ext4_extent zero_ex; |
3072 | struct ext4_extent *ex; | 3092 | struct ext4_extent *ex; |
3073 | ext4_lblk_t ee_block, eof_block; | 3093 | ext4_lblk_t ee_block, eof_block; |
3074 | unsigned int ee_len, depth; | 3094 | unsigned int ee_len, depth; |
3075 | int allocated; | 3095 | int allocated, max_zeroout = 0; |
3076 | int err = 0; | 3096 | int err = 0; |
3077 | int split_flag = 0; | 3097 | int split_flag = 0; |
3078 | 3098 | ||
@@ -3080,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3080 | "block %llu, max_blocks %u\n", inode->i_ino, | 3100 | "block %llu, max_blocks %u\n", inode->i_ino, |
3081 | (unsigned long long)map->m_lblk, map->m_len); | 3101 | (unsigned long long)map->m_lblk, map->m_len); |
3082 | 3102 | ||
3103 | sbi = EXT4_SB(inode->i_sb); | ||
3083 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3104 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3084 | inode->i_sb->s_blocksize_bits; | 3105 | inode->i_sb->s_blocksize_bits; |
3085 | if (eof_block < map->m_lblk + map->m_len) | 3106 | if (eof_block < map->m_lblk + map->m_len) |
@@ -3179,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3179 | */ | 3200 | */ |
3180 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3201 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
3181 | 3202 | ||
3182 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 3203 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) |
3183 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && | 3204 | max_zeroout = sbi->s_extent_max_zeroout_kb >> |
3184 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 3205 | inode->i_sb->s_blocksize_bits; |
3206 | |||
3207 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ | ||
3208 | if (max_zeroout && (ee_len <= max_zeroout)) { | ||
3185 | err = ext4_ext_zeroout(inode, ex); | 3209 | err = ext4_ext_zeroout(inode, ex); |
3186 | if (err) | 3210 | if (err) |
3187 | goto out; | 3211 | goto out; |
@@ -3190,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3190 | if (err) | 3214 | if (err) |
3191 | goto out; | 3215 | goto out; |
3192 | ext4_ext_mark_initialized(ex); | 3216 | ext4_ext_mark_initialized(ex); |
3193 | ext4_ext_try_to_merge(inode, path, ex); | 3217 | ext4_ext_try_to_merge(handle, inode, path, ex); |
3194 | err = ext4_ext_dirty(handle, inode, path + depth); | 3218 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
3195 | goto out; | 3219 | goto out; |
3196 | } | 3220 | } |
3197 | 3221 | ||
@@ -3205,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3205 | split_map.m_lblk = map->m_lblk; | 3229 | split_map.m_lblk = map->m_lblk; |
3206 | split_map.m_len = map->m_len; | 3230 | split_map.m_len = map->m_len; |
3207 | 3231 | ||
3208 | if (allocated > map->m_len) { | 3232 | if (max_zeroout && (allocated > map->m_len)) { |
3209 | if (allocated <= EXT4_EXT_ZERO_LEN && | 3233 | if (allocated <= max_zeroout) { |
3210 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3211 | /* case 3 */ | 3234 | /* case 3 */ |
3212 | zero_ex.ee_block = | 3235 | zero_ex.ee_block = |
3213 | cpu_to_le32(map->m_lblk); | 3236 | cpu_to_le32(map->m_lblk); |
@@ -3219,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3219 | goto out; | 3242 | goto out; |
3220 | split_map.m_lblk = map->m_lblk; | 3243 | split_map.m_lblk = map->m_lblk; |
3221 | split_map.m_len = allocated; | 3244 | split_map.m_len = allocated; |
3222 | } else if ((map->m_lblk - ee_block + map->m_len < | 3245 | } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { |
3223 | EXT4_EXT_ZERO_LEN) && | ||
3224 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3225 | /* case 2 */ | 3246 | /* case 2 */ |
3226 | if (map->m_lblk != ee_block) { | 3247 | if (map->m_lblk != ee_block) { |
3227 | zero_ex.ee_block = ex->ee_block; | 3248 | zero_ex.ee_block = ex->ee_block; |
@@ -3241,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3241 | } | 3262 | } |
3242 | 3263 | ||
3243 | allocated = ext4_split_extent(handle, inode, path, | 3264 | allocated = ext4_split_extent(handle, inode, path, |
3244 | &split_map, split_flag, 0); | 3265 | &split_map, split_flag, 0); |
3245 | if (allocated < 0) | 3266 | if (allocated < 0) |
3246 | err = allocated; | 3267 | err = allocated; |
3247 | 3268 | ||
@@ -3255,7 +3276,7 @@ out: | |||
3255 | * to an uninitialized extent. | 3276 | * to an uninitialized extent. |
3256 | * | 3277 | * |
3257 | * Writing to an uninitialized extent may result in splitting the uninitialized | 3278 | * Writing to an uninitialized extent may result in splitting the uninitialized |
3258 | * extent into multiple /initialized uninitialized extents (up to three) | 3279 | * extent into multiple initialized/uninitialized extents (up to three) |
3259 | * There are three possibilities: | 3280 | * There are three possibilities: |
3260 | * a> There is no split required: Entire extent should be uninitialized | 3281 | * a> There is no split required: Entire extent should be uninitialized |
3261 | * b> Splits in two extents: Write is happening at either end of the extent | 3282 | * b> Splits in two extents: Write is happening at either end of the extent |
@@ -3332,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3332 | /* note: ext4_ext_correct_indexes() isn't needed here because | 3353 | /* note: ext4_ext_correct_indexes() isn't needed here because |
3333 | * borders are not changed | 3354 | * borders are not changed |
3334 | */ | 3355 | */ |
3335 | ext4_ext_try_to_merge(inode, path, ex); | 3356 | ext4_ext_try_to_merge(handle, inode, path, ex); |
3336 | 3357 | ||
3337 | /* Mark modified extent as dirty */ | 3358 | /* Mark modified extent as dirty */ |
3338 | err = ext4_ext_dirty(handle, inode, path + depth); | 3359 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
3339 | out: | 3360 | out: |
3340 | ext4_ext_show_leaf(inode, path); | 3361 | ext4_ext_show_leaf(inode, path); |
3341 | return err; | 3362 | return err; |
@@ -3599,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3599 | { | 3620 | { |
3600 | int ret = 0; | 3621 | int ret = 0; |
3601 | int err = 0; | 3622 | int err = 0; |
3602 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3623 | ext4_io_end_t *io = ext4_inode_aio(inode); |
3603 | 3624 | ||
3604 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " | 3625 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " |
3605 | "block %llu, max_blocks %u, flags %x, allocated %u\n", | 3626 | "block %llu, max_blocks %u, flags %x, allocated %u\n", |
@@ -3614,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3614 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3635 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3615 | ret = ext4_split_unwritten_extents(handle, inode, map, | 3636 | ret = ext4_split_unwritten_extents(handle, inode, map, |
3616 | path, flags); | 3637 | path, flags); |
3638 | if (ret <= 0) | ||
3639 | goto out; | ||
3617 | /* | 3640 | /* |
3618 | * Flag the inode(non aio case) or end_io struct (aio case) | 3641 | * Flag the inode(non aio case) or end_io struct (aio case) |
3619 | * that this IO needs to conversion to written when IO is | 3642 | * that this IO needs to conversion to written when IO is |
@@ -3857,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3857 | unsigned int allocated = 0, offset = 0; | 3880 | unsigned int allocated = 0, offset = 0; |
3858 | unsigned int allocated_clusters = 0; | 3881 | unsigned int allocated_clusters = 0; |
3859 | struct ext4_allocation_request ar; | 3882 | struct ext4_allocation_request ar; |
3860 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3883 | ext4_io_end_t *io = ext4_inode_aio(inode); |
3861 | ext4_lblk_t cluster_offset; | 3884 | ext4_lblk_t cluster_offset; |
3885 | int set_unwritten = 0; | ||
3862 | 3886 | ||
3863 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3887 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3864 | map->m_lblk, map->m_len, inode->i_ino); | 3888 | map->m_lblk, map->m_len, inode->i_ino); |
@@ -4081,13 +4105,8 @@ got_allocated_blocks: | |||
4081 | * For non asycn direct IO case, flag the inode state | 4105 | * For non asycn direct IO case, flag the inode state |
4082 | * that we need to perform conversion when IO is done. | 4106 | * that we need to perform conversion when IO is done. |
4083 | */ | 4107 | */ |
4084 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4108 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) |
4085 | if (io) | 4109 | set_unwritten = 1; |
4086 | ext4_set_io_unwritten_flag(inode, io); | ||
4087 | else | ||
4088 | ext4_set_inode_state(inode, | ||
4089 | EXT4_STATE_DIO_UNWRITTEN); | ||
4090 | } | ||
4091 | if (ext4_should_dioread_nolock(inode)) | 4110 | if (ext4_should_dioread_nolock(inode)) |
4092 | map->m_flags |= EXT4_MAP_UNINIT; | 4111 | map->m_flags |= EXT4_MAP_UNINIT; |
4093 | } | 4112 | } |
@@ -4099,6 +4118,15 @@ got_allocated_blocks: | |||
4099 | if (!err) | 4118 | if (!err) |
4100 | err = ext4_ext_insert_extent(handle, inode, path, | 4119 | err = ext4_ext_insert_extent(handle, inode, path, |
4101 | &newex, flags); | 4120 | &newex, flags); |
4121 | |||
4122 | if (!err && set_unwritten) { | ||
4123 | if (io) | ||
4124 | ext4_set_io_unwritten_flag(inode, io); | ||
4125 | else | ||
4126 | ext4_set_inode_state(inode, | ||
4127 | EXT4_STATE_DIO_UNWRITTEN); | ||
4128 | } | ||
4129 | |||
4102 | if (err && free_on_err) { | 4130 | if (err && free_on_err) { |
4103 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? | 4131 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? |
4104 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; | 4132 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; |
@@ -4240,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
4240 | * finish any pending end_io work so we won't run the risk of | 4268 | * finish any pending end_io work so we won't run the risk of |
4241 | * converting any truncated blocks to initialized later | 4269 | * converting any truncated blocks to initialized later |
4242 | */ | 4270 | */ |
4243 | ext4_flush_completed_IO(inode); | 4271 | ext4_flush_unwritten_io(inode); |
4244 | 4272 | ||
4245 | /* | 4273 | /* |
4246 | * probably first extent we're gonna free will be last in block | 4274 | * probably first extent we're gonna free will be last in block |
@@ -4768,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4768 | loff_t first_page_offset, last_page_offset; | 4796 | loff_t first_page_offset, last_page_offset; |
4769 | int credits, err = 0; | 4797 | int credits, err = 0; |
4770 | 4798 | ||
4799 | /* | ||
4800 | * Write out all dirty pages to avoid race conditions | ||
4801 | * Then release them. | ||
4802 | */ | ||
4803 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4804 | err = filemap_write_and_wait_range(mapping, | ||
4805 | offset, offset + length - 1); | ||
4806 | |||
4807 | if (err) | ||
4808 | return err; | ||
4809 | } | ||
4810 | |||
4811 | mutex_lock(&inode->i_mutex); | ||
4812 | /* It's not possible punch hole on append only file */ | ||
4813 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
4814 | err = -EPERM; | ||
4815 | goto out_mutex; | ||
4816 | } | ||
4817 | if (IS_SWAPFILE(inode)) { | ||
4818 | err = -ETXTBSY; | ||
4819 | goto out_mutex; | ||
4820 | } | ||
4821 | |||
4771 | /* No need to punch hole beyond i_size */ | 4822 | /* No need to punch hole beyond i_size */ |
4772 | if (offset >= inode->i_size) | 4823 | if (offset >= inode->i_size) |
4773 | return 0; | 4824 | goto out_mutex; |
4774 | 4825 | ||
4775 | /* | 4826 | /* |
4776 | * If the hole extends beyond i_size, set the hole | 4827 | * If the hole extends beyond i_size, set the hole |
@@ -4788,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4788 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 4839 | first_page_offset = first_page << PAGE_CACHE_SHIFT; |
4789 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 4840 | last_page_offset = last_page << PAGE_CACHE_SHIFT; |
4790 | 4841 | ||
4791 | /* | ||
4792 | * Write out all dirty pages to avoid race conditions | ||
4793 | * Then release them. | ||
4794 | */ | ||
4795 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4796 | err = filemap_write_and_wait_range(mapping, | ||
4797 | offset, offset + length - 1); | ||
4798 | |||
4799 | if (err) | ||
4800 | return err; | ||
4801 | } | ||
4802 | |||
4803 | /* Now release the pages */ | 4842 | /* Now release the pages */ |
4804 | if (last_page_offset > first_page_offset) { | 4843 | if (last_page_offset > first_page_offset) { |
4805 | truncate_pagecache_range(inode, first_page_offset, | 4844 | truncate_pagecache_range(inode, first_page_offset, |
4806 | last_page_offset - 1); | 4845 | last_page_offset - 1); |
4807 | } | 4846 | } |
4808 | 4847 | ||
4809 | /* finish any pending end_io work */ | 4848 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4810 | ext4_flush_completed_IO(inode); | 4849 | ext4_inode_block_unlocked_dio(inode); |
4850 | err = ext4_flush_unwritten_io(inode); | ||
4851 | if (err) | ||
4852 | goto out_dio; | ||
4853 | inode_dio_wait(inode); | ||
4811 | 4854 | ||
4812 | credits = ext4_writepage_trans_blocks(inode); | 4855 | credits = ext4_writepage_trans_blocks(inode); |
4813 | handle = ext4_journal_start(inode, credits); | 4856 | handle = ext4_journal_start(inode, credits); |
4814 | if (IS_ERR(handle)) | 4857 | if (IS_ERR(handle)) { |
4815 | return PTR_ERR(handle); | 4858 | err = PTR_ERR(handle); |
4859 | goto out_dio; | ||
4860 | } | ||
4816 | 4861 | ||
4817 | err = ext4_orphan_add(handle, inode); | ||
4818 | if (err) | ||
4819 | goto out; | ||
4820 | 4862 | ||
4821 | /* | 4863 | /* |
4822 | * Now we need to zero out the non-page-aligned data in the | 4864 | * Now we need to zero out the non-page-aligned data in the |
@@ -4902,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4902 | up_write(&EXT4_I(inode)->i_data_sem); | 4944 | up_write(&EXT4_I(inode)->i_data_sem); |
4903 | 4945 | ||
4904 | out: | 4946 | out: |
4905 | ext4_orphan_del(handle, inode); | ||
4906 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4947 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4907 | ext4_mark_inode_dirty(handle, inode); | 4948 | ext4_mark_inode_dirty(handle, inode); |
4908 | ext4_journal_stop(handle); | 4949 | ext4_journal_stop(handle); |
4950 | out_dio: | ||
4951 | ext4_inode_resume_unlocked_dio(inode); | ||
4952 | out_mutex: | ||
4953 | mutex_unlock(&inode->i_mutex); | ||
4909 | return err; | 4954 | return err; |
4910 | } | 4955 | } |
4911 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4956 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |