aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c259
1 files changed, 152 insertions, 107 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd0c7ed06772..1c94cca35ed1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1179 1179
1180 neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); 1180 le16_add_cpu(&neh->eh_depth, 1);
1181 ext4_mark_inode_dirty(handle, inode); 1181 ext4_mark_inode_dirty(handle, inode);
1182out: 1182out:
1183 brelse(bh); 1183 brelse(bh);
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1656} 1656}
1657 1657
1658/* 1658/*
1659 * This function does a very simple check to see if we can collapse
1660 * an extent tree with a single extent tree leaf block into the inode.
1661 */
1662static void ext4_ext_try_to_merge_up(handle_t *handle,
1663 struct inode *inode,
1664 struct ext4_ext_path *path)
1665{
1666 size_t s;
1667 unsigned max_root = ext4_ext_space_root(inode, 0);
1668 ext4_fsblk_t blk;
1669
1670 if ((path[0].p_depth != 1) ||
1671 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1672 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1673 return;
1674
1675 /*
1676 * We need to modify the block allocation bitmap and the block
1677 * group descriptor to release the extent tree block. If we
1678 * can't get the journal credits, give up.
1679 */
1680 if (ext4_journal_extend(handle, 2))
1681 return;
1682
1683 /*
1684 * Copy the extent data up to the inode
1685 */
1686 blk = ext4_idx_pblock(path[0].p_idx);
1687 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1688 sizeof(struct ext4_extent_idx);
1689 s += sizeof(struct ext4_extent_header);
1690
1691 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1692 path[0].p_depth = 0;
1693 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1694 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1695 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1696
1697 brelse(path[1].p_bh);
1698 ext4_free_blocks(handle, inode, NULL, blk, 1,
1699 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1700}
1701
1702/*
1659 * This function tries to merge the @ex extent to neighbours in the tree. 1703 * This function tries to merge the @ex extent to neighbours in the tree.
1660 * return 1 if merge left else 0. 1704 * return 1 if merge left else 0.
1661 */ 1705 */
1662static int ext4_ext_try_to_merge(struct inode *inode, 1706static void ext4_ext_try_to_merge(handle_t *handle,
1707 struct inode *inode,
1663 struct ext4_ext_path *path, 1708 struct ext4_ext_path *path,
1664 struct ext4_extent *ex) { 1709 struct ext4_extent *ex) {
1665 struct ext4_extent_header *eh; 1710 struct ext4_extent_header *eh;
1666 unsigned int depth; 1711 unsigned int depth;
1667 int merge_done = 0; 1712 int merge_done = 0;
1668 int ret = 0;
1669 1713
1670 depth = ext_depth(inode); 1714 depth = ext_depth(inode);
1671 BUG_ON(path[depth].p_hdr == NULL); 1715 BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1675 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1719 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1676 1720
1677 if (!merge_done) 1721 if (!merge_done)
1678 ret = ext4_ext_try_to_merge_right(inode, path, ex); 1722 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1679 1723
1680 return ret; 1724 ext4_ext_try_to_merge_up(handle, inode, path);
1681} 1725}
1682 1726
1683/* 1727/*
@@ -1893,7 +1937,7 @@ has_space:
1893merge: 1937merge:
1894 /* try to merge extents */ 1938 /* try to merge extents */
1895 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1939 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1896 ext4_ext_try_to_merge(inode, path, nearex); 1940 ext4_ext_try_to_merge(handle, inode, path, nearex);
1897 1941
1898 1942
1899 /* time to correct all indexes above */ 1943 /* time to correct all indexes above */
@@ -1901,7 +1945,7 @@ merge:
1901 if (err) 1945 if (err)
1902 goto cleanup; 1946 goto cleanup;
1903 1947
1904 err = ext4_ext_dirty(handle, inode, path + depth); 1948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
1905 1949
1906cleanup: 1950cleanup:
1907 if (npath) { 1951 if (npath) {
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2092} 2136}
2093 2137
2094/* 2138/*
2095 * ext4_ext_check_cache() 2139 * ext4_ext_in_cache()
2096 * Checks to see if the given block is in the cache. 2140 * Checks to see if the given block is in the cache.
2097 * If it is, the cached extent is stored in the given 2141 * If it is, the cached extent is stored in the given
2098 * cache extent pointer. If the cached extent is a hole, 2142 * cache extent pointer.
2099 * this routine should be used instead of
2100 * ext4_ext_in_cache if the calling function needs to
2101 * know the size of the hole.
2102 * 2143 *
2103 * @inode: The files inode 2144 * @inode: The files inode
2104 * @block: The block to look for in the cache 2145 * @block: The block to look for in the cache
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2107 * 2148 *
2108 * Return 0 if cache is invalid; 1 if the cache is valid 2149 * Return 0 if cache is invalid; 1 if the cache is valid
2109 */ 2150 */
2110static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, 2151static int
2111 struct ext4_ext_cache *ex){ 2152ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2153 struct ext4_extent *ex)
2154{
2112 struct ext4_ext_cache *cex; 2155 struct ext4_ext_cache *cex;
2113 struct ext4_sb_info *sbi; 2156 struct ext4_sb_info *sbi;
2114 int ret = 0; 2157 int ret = 0;
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2125 goto errout; 2168 goto errout;
2126 2169
2127 if (in_range(block, cex->ec_block, cex->ec_len)) { 2170 if (in_range(block, cex->ec_block, cex->ec_len)) {
2128 memcpy(ex, cex, sizeof(struct ext4_ext_cache)); 2171 ex->ee_block = cpu_to_le32(cex->ec_block);
2172 ext4_ext_store_pblock(ex, cex->ec_start);
2173 ex->ee_len = cpu_to_le16(cex->ec_len);
2129 ext_debug("%u cached by %u:%u:%llu\n", 2174 ext_debug("%u cached by %u:%u:%llu\n",
2130 block, 2175 block,
2131 cex->ec_block, cex->ec_len, cex->ec_start); 2176 cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2138,37 +2183,6 @@ errout:
2138} 2183}
2139 2184
2140/* 2185/*
2141 * ext4_ext_in_cache()
2142 * Checks to see if the given block is in the cache.
2143 * If it is, the cached extent is stored in the given
2144 * extent pointer.
2145 *
2146 * @inode: The files inode
2147 * @block: The block to look for in the cache
2148 * @ex: Pointer where the cached extent will be stored
2149 * if it contains block
2150 *
2151 * Return 0 if cache is invalid; 1 if the cache is valid
2152 */
2153static int
2154ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2155 struct ext4_extent *ex)
2156{
2157 struct ext4_ext_cache cex;
2158 int ret = 0;
2159
2160 if (ext4_ext_check_cache(inode, block, &cex)) {
2161 ex->ee_block = cpu_to_le32(cex.ec_block);
2162 ext4_ext_store_pblock(ex, cex.ec_start);
2163 ex->ee_len = cpu_to_le16(cex.ec_len);
2164 ret = 1;
2165 }
2166
2167 return ret;
2168}
2169
2170
2171/*
2172 * ext4_ext_rm_idx: 2186 * ext4_ext_rm_idx:
2173 * removes index from the index block. 2187 * removes index from the index block.
2174 */ 2188 */
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2274 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2288 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2275 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2289 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2276 ext4_fsblk_t pblk; 2290 ext4_fsblk_t pblk;
2277 int flags = EXT4_FREE_BLOCKS_FORGET; 2291 int flags = 0;
2278 2292
2279 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2293 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2280 flags |= EXT4_FREE_BLOCKS_METADATA; 2294 flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2295 else if (ext4_should_journal_data(inode))
2296 flags |= EXT4_FREE_BLOCKS_FORGET;
2297
2281 /* 2298 /*
2282 * For bigalloc file systems, we never free a partial cluster 2299 * For bigalloc file systems, we never free a partial cluster
2283 * at the beginning of the extent. Instead, we make a note 2300 * at the beginning of the extent. Instead, we make a note
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2572 struct ext4_ext_path *path = NULL; 2589 struct ext4_ext_path *path = NULL;
2573 ext4_fsblk_t partial_cluster = 0; 2590 ext4_fsblk_t partial_cluster = 0;
2574 handle_t *handle; 2591 handle_t *handle;
2575 int i = 0, err; 2592 int i = 0, err = 0;
2576 2593
2577 ext_debug("truncate since %u to %u\n", start, end); 2594 ext_debug("truncate since %u to %u\n", start, end);
2578 2595
@@ -2604,12 +2621,16 @@ again:
2604 return PTR_ERR(path); 2621 return PTR_ERR(path);
2605 } 2622 }
2606 depth = ext_depth(inode); 2623 depth = ext_depth(inode);
2624 /* Leaf not may not exist only if inode has no blocks at all */
2607 ex = path[depth].p_ext; 2625 ex = path[depth].p_ext;
2608 if (!ex) { 2626 if (!ex) {
2609 ext4_ext_drop_refs(path); 2627 if (depth) {
2610 kfree(path); 2628 EXT4_ERROR_INODE(inode,
2611 path = NULL; 2629 "path[%d].p_hdr == NULL",
2612 goto cont; 2630 depth);
2631 err = -EIO;
2632 }
2633 goto out;
2613 } 2634 }
2614 2635
2615 ee_block = le32_to_cpu(ex->ee_block); 2636 ee_block = le32_to_cpu(ex->ee_block);
@@ -2641,8 +2662,6 @@ again:
2641 goto out; 2662 goto out;
2642 } 2663 }
2643 } 2664 }
2644cont:
2645
2646 /* 2665 /*
2647 * We start scanning from right side, freeing all the blocks 2666 * We start scanning from right side, freeing all the blocks
2648 * after i_size and walking into the tree depth-wise. 2667 * after i_size and walking into the tree depth-wise.
@@ -2662,6 +2681,7 @@ cont:
2662 } 2681 }
2663 path[0].p_depth = depth; 2682 path[0].p_depth = depth;
2664 path[0].p_hdr = ext_inode_hdr(inode); 2683 path[0].p_hdr = ext_inode_hdr(inode);
2684 i = 0;
2665 2685
2666 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2686 if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2667 err = -EIO; 2687 err = -EIO;
@@ -2923,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle,
2923 ext4_ext_mark_initialized(ex); 2943 ext4_ext_mark_initialized(ex);
2924 2944
2925 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2945 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2926 ext4_ext_try_to_merge(inode, path, ex); 2946 ext4_ext_try_to_merge(handle, inode, path, ex);
2927 2947
2928 err = ext4_ext_dirty(handle, inode, path + depth); 2948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2929 goto out; 2949 goto out;
2930 } 2950 }
2931 2951
@@ -2957,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle,
2957 goto fix_extent_len; 2977 goto fix_extent_len;
2958 /* update the extent length and mark as initialized */ 2978 /* update the extent length and mark as initialized */
2959 ex->ee_len = cpu_to_le16(ee_len); 2979 ex->ee_len = cpu_to_le16(ee_len);
2960 ext4_ext_try_to_merge(inode, path, ex); 2980 ext4_ext_try_to_merge(handle, inode, path, ex);
2961 err = ext4_ext_dirty(handle, inode, path + depth); 2981 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2962 goto out; 2982 goto out;
2963 } else if (err) 2983 } else if (err)
2964 goto fix_extent_len; 2984 goto fix_extent_len;
@@ -3040,7 +3060,6 @@ out:
3040 return err ? err : map->m_len; 3060 return err ? err : map->m_len;
3041} 3061}
3042 3062
3043#define EXT4_EXT_ZERO_LEN 7
3044/* 3063/*
3045 * This function is called by ext4_ext_map_blocks() if someone tries to write 3064 * This function is called by ext4_ext_map_blocks() if someone tries to write
3046 * to an uninitialized extent. It may result in splitting the uninitialized 3065 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3066,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3066 struct ext4_map_blocks *map, 3085 struct ext4_map_blocks *map,
3067 struct ext4_ext_path *path) 3086 struct ext4_ext_path *path)
3068{ 3087{
3088 struct ext4_sb_info *sbi;
3069 struct ext4_extent_header *eh; 3089 struct ext4_extent_header *eh;
3070 struct ext4_map_blocks split_map; 3090 struct ext4_map_blocks split_map;
3071 struct ext4_extent zero_ex; 3091 struct ext4_extent zero_ex;
3072 struct ext4_extent *ex; 3092 struct ext4_extent *ex;
3073 ext4_lblk_t ee_block, eof_block; 3093 ext4_lblk_t ee_block, eof_block;
3074 unsigned int ee_len, depth; 3094 unsigned int ee_len, depth;
3075 int allocated; 3095 int allocated, max_zeroout = 0;
3076 int err = 0; 3096 int err = 0;
3077 int split_flag = 0; 3097 int split_flag = 0;
3078 3098
@@ -3080,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3080 "block %llu, max_blocks %u\n", inode->i_ino, 3100 "block %llu, max_blocks %u\n", inode->i_ino,
3081 (unsigned long long)map->m_lblk, map->m_len); 3101 (unsigned long long)map->m_lblk, map->m_len);
3082 3102
3103 sbi = EXT4_SB(inode->i_sb);
3083 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3104 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3084 inode->i_sb->s_blocksize_bits; 3105 inode->i_sb->s_blocksize_bits;
3085 if (eof_block < map->m_lblk + map->m_len) 3106 if (eof_block < map->m_lblk + map->m_len)
@@ -3179,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3179 */ 3200 */
3180 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3201 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3181 3202
3182 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 3203 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3183 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && 3204 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3184 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3205 inode->i_sb->s_blocksize_bits;
3206
3207 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3208 if (max_zeroout && (ee_len <= max_zeroout)) {
3185 err = ext4_ext_zeroout(inode, ex); 3209 err = ext4_ext_zeroout(inode, ex);
3186 if (err) 3210 if (err)
3187 goto out; 3211 goto out;
@@ -3190,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3190 if (err) 3214 if (err)
3191 goto out; 3215 goto out;
3192 ext4_ext_mark_initialized(ex); 3216 ext4_ext_mark_initialized(ex);
3193 ext4_ext_try_to_merge(inode, path, ex); 3217 ext4_ext_try_to_merge(handle, inode, path, ex);
3194 err = ext4_ext_dirty(handle, inode, path + depth); 3218 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3195 goto out; 3219 goto out;
3196 } 3220 }
3197 3221
@@ -3205,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3205 split_map.m_lblk = map->m_lblk; 3229 split_map.m_lblk = map->m_lblk;
3206 split_map.m_len = map->m_len; 3230 split_map.m_len = map->m_len;
3207 3231
3208 if (allocated > map->m_len) { 3232 if (max_zeroout && (allocated > map->m_len)) {
3209 if (allocated <= EXT4_EXT_ZERO_LEN && 3233 if (allocated <= max_zeroout) {
3210 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3211 /* case 3 */ 3234 /* case 3 */
3212 zero_ex.ee_block = 3235 zero_ex.ee_block =
3213 cpu_to_le32(map->m_lblk); 3236 cpu_to_le32(map->m_lblk);
@@ -3219,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3219 goto out; 3242 goto out;
3220 split_map.m_lblk = map->m_lblk; 3243 split_map.m_lblk = map->m_lblk;
3221 split_map.m_len = allocated; 3244 split_map.m_len = allocated;
3222 } else if ((map->m_lblk - ee_block + map->m_len < 3245 } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
3223 EXT4_EXT_ZERO_LEN) &&
3224 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3225 /* case 2 */ 3246 /* case 2 */
3226 if (map->m_lblk != ee_block) { 3247 if (map->m_lblk != ee_block) {
3227 zero_ex.ee_block = ex->ee_block; 3248 zero_ex.ee_block = ex->ee_block;
@@ -3241,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3241 } 3262 }
3242 3263
3243 allocated = ext4_split_extent(handle, inode, path, 3264 allocated = ext4_split_extent(handle, inode, path,
3244 &split_map, split_flag, 0); 3265 &split_map, split_flag, 0);
3245 if (allocated < 0) 3266 if (allocated < 0)
3246 err = allocated; 3267 err = allocated;
3247 3268
@@ -3255,7 +3276,7 @@ out:
3255 * to an uninitialized extent. 3276 * to an uninitialized extent.
3256 * 3277 *
3257 * Writing to an uninitialized extent may result in splitting the uninitialized 3278 * Writing to an uninitialized extent may result in splitting the uninitialized
3258 * extent into multiple /initialized uninitialized extents (up to three) 3279 * extent into multiple initialized/uninitialized extents (up to three)
3259 * There are three possibilities: 3280 * There are three possibilities:
3260 * a> There is no split required: Entire extent should be uninitialized 3281 * a> There is no split required: Entire extent should be uninitialized
3261 * b> Splits in two extents: Write is happening at either end of the extent 3282 * b> Splits in two extents: Write is happening at either end of the extent
@@ -3332,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3332 /* note: ext4_ext_correct_indexes() isn't needed here because 3353 /* note: ext4_ext_correct_indexes() isn't needed here because
3333 * borders are not changed 3354 * borders are not changed
3334 */ 3355 */
3335 ext4_ext_try_to_merge(inode, path, ex); 3356 ext4_ext_try_to_merge(handle, inode, path, ex);
3336 3357
3337 /* Mark modified extent as dirty */ 3358 /* Mark modified extent as dirty */
3338 err = ext4_ext_dirty(handle, inode, path + depth); 3359 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3339out: 3360out:
3340 ext4_ext_show_leaf(inode, path); 3361 ext4_ext_show_leaf(inode, path);
3341 return err; 3362 return err;
@@ -3599,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3599{ 3620{
3600 int ret = 0; 3621 int ret = 0;
3601 int err = 0; 3622 int err = 0;
3602 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3623 ext4_io_end_t *io = ext4_inode_aio(inode);
3603 3624
3604 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " 3625 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3605 "block %llu, max_blocks %u, flags %x, allocated %u\n", 3626 "block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3614,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3614 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3635 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3615 ret = ext4_split_unwritten_extents(handle, inode, map, 3636 ret = ext4_split_unwritten_extents(handle, inode, map,
3616 path, flags); 3637 path, flags);
3638 if (ret <= 0)
3639 goto out;
3617 /* 3640 /*
3618 * Flag the inode(non aio case) or end_io struct (aio case) 3641 * Flag the inode(non aio case) or end_io struct (aio case)
3619 * that this IO needs to conversion to written when IO is 3642 * that this IO needs to conversion to written when IO is
@@ -3857,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3857 unsigned int allocated = 0, offset = 0; 3880 unsigned int allocated = 0, offset = 0;
3858 unsigned int allocated_clusters = 0; 3881 unsigned int allocated_clusters = 0;
3859 struct ext4_allocation_request ar; 3882 struct ext4_allocation_request ar;
3860 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3883 ext4_io_end_t *io = ext4_inode_aio(inode);
3861 ext4_lblk_t cluster_offset; 3884 ext4_lblk_t cluster_offset;
3885 int set_unwritten = 0;
3862 3886
3863 ext_debug("blocks %u/%u requested for inode %lu\n", 3887 ext_debug("blocks %u/%u requested for inode %lu\n",
3864 map->m_lblk, map->m_len, inode->i_ino); 3888 map->m_lblk, map->m_len, inode->i_ino);
@@ -4081,13 +4105,8 @@ got_allocated_blocks:
4081 * For non asycn direct IO case, flag the inode state 4105 * For non asycn direct IO case, flag the inode state
4082 * that we need to perform conversion when IO is done. 4106 * that we need to perform conversion when IO is done.
4083 */ 4107 */
4084 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4108 if ((flags & EXT4_GET_BLOCKS_PRE_IO))
4085 if (io) 4109 set_unwritten = 1;
4086 ext4_set_io_unwritten_flag(inode, io);
4087 else
4088 ext4_set_inode_state(inode,
4089 EXT4_STATE_DIO_UNWRITTEN);
4090 }
4091 if (ext4_should_dioread_nolock(inode)) 4110 if (ext4_should_dioread_nolock(inode))
4092 map->m_flags |= EXT4_MAP_UNINIT; 4111 map->m_flags |= EXT4_MAP_UNINIT;
4093 } 4112 }
@@ -4099,6 +4118,15 @@ got_allocated_blocks:
4099 if (!err) 4118 if (!err)
4100 err = ext4_ext_insert_extent(handle, inode, path, 4119 err = ext4_ext_insert_extent(handle, inode, path,
4101 &newex, flags); 4120 &newex, flags);
4121
4122 if (!err && set_unwritten) {
4123 if (io)
4124 ext4_set_io_unwritten_flag(inode, io);
4125 else
4126 ext4_set_inode_state(inode,
4127 EXT4_STATE_DIO_UNWRITTEN);
4128 }
4129
4102 if (err && free_on_err) { 4130 if (err && free_on_err) {
4103 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4131 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
4104 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4132 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4240,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
4240 * finish any pending end_io work so we won't run the risk of 4268 * finish any pending end_io work so we won't run the risk of
4241 * converting any truncated blocks to initialized later 4269 * converting any truncated blocks to initialized later
4242 */ 4270 */
4243 ext4_flush_completed_IO(inode); 4271 ext4_flush_unwritten_io(inode);
4244 4272
4245 /* 4273 /*
4246 * probably first extent we're gonna free will be last in block 4274 * probably first extent we're gonna free will be last in block
@@ -4768,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4768 loff_t first_page_offset, last_page_offset; 4796 loff_t first_page_offset, last_page_offset;
4769 int credits, err = 0; 4797 int credits, err = 0;
4770 4798
4799 /*
4800 * Write out all dirty pages to avoid race conditions
4801 * Then release them.
4802 */
4803 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4804 err = filemap_write_and_wait_range(mapping,
4805 offset, offset + length - 1);
4806
4807 if (err)
4808 return err;
4809 }
4810
4811 mutex_lock(&inode->i_mutex);
4812 /* It's not possible punch hole on append only file */
4813 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
4814 err = -EPERM;
4815 goto out_mutex;
4816 }
4817 if (IS_SWAPFILE(inode)) {
4818 err = -ETXTBSY;
4819 goto out_mutex;
4820 }
4821
4771 /* No need to punch hole beyond i_size */ 4822 /* No need to punch hole beyond i_size */
4772 if (offset >= inode->i_size) 4823 if (offset >= inode->i_size)
4773 return 0; 4824 goto out_mutex;
4774 4825
4775 /* 4826 /*
4776 * If the hole extends beyond i_size, set the hole 4827 * If the hole extends beyond i_size, set the hole
@@ -4788,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4788 first_page_offset = first_page << PAGE_CACHE_SHIFT; 4839 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4789 last_page_offset = last_page << PAGE_CACHE_SHIFT; 4840 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4790 4841
4791 /*
4792 * Write out all dirty pages to avoid race conditions
4793 * Then release them.
4794 */
4795 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4796 err = filemap_write_and_wait_range(mapping,
4797 offset, offset + length - 1);
4798
4799 if (err)
4800 return err;
4801 }
4802
4803 /* Now release the pages */ 4842 /* Now release the pages */
4804 if (last_page_offset > first_page_offset) { 4843 if (last_page_offset > first_page_offset) {
4805 truncate_pagecache_range(inode, first_page_offset, 4844 truncate_pagecache_range(inode, first_page_offset,
4806 last_page_offset - 1); 4845 last_page_offset - 1);
4807 } 4846 }
4808 4847
4809 /* finish any pending end_io work */ 4848 /* Wait all existing dio workers, newcomers will block on i_mutex */
4810 ext4_flush_completed_IO(inode); 4849 ext4_inode_block_unlocked_dio(inode);
4850 err = ext4_flush_unwritten_io(inode);
4851 if (err)
4852 goto out_dio;
4853 inode_dio_wait(inode);
4811 4854
4812 credits = ext4_writepage_trans_blocks(inode); 4855 credits = ext4_writepage_trans_blocks(inode);
4813 handle = ext4_journal_start(inode, credits); 4856 handle = ext4_journal_start(inode, credits);
4814 if (IS_ERR(handle)) 4857 if (IS_ERR(handle)) {
4815 return PTR_ERR(handle); 4858 err = PTR_ERR(handle);
4859 goto out_dio;
4860 }
4816 4861
4817 err = ext4_orphan_add(handle, inode);
4818 if (err)
4819 goto out;
4820 4862
4821 /* 4863 /*
4822 * Now we need to zero out the non-page-aligned data in the 4864 * Now we need to zero out the non-page-aligned data in the
@@ -4902,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4902 up_write(&EXT4_I(inode)->i_data_sem); 4944 up_write(&EXT4_I(inode)->i_data_sem);
4903 4945
4904out: 4946out:
4905 ext4_orphan_del(handle, inode);
4906 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4947 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4907 ext4_mark_inode_dirty(handle, inode); 4948 ext4_mark_inode_dirty(handle, inode);
4908 ext4_journal_stop(handle); 4949 ext4_journal_stop(handle);
4950out_dio:
4951 ext4_inode_resume_unlocked_dio(inode);
4952out_mutex:
4953 mutex_unlock(&inode->i_mutex);
4909 return err; 4954 return err;
4910} 4955}
4911int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4956int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,