aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c258
1 files changed, 151 insertions, 107 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index aabbb3f53683..1c94cca35ed1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1179 1179
1180 neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); 1180 le16_add_cpu(&neh->eh_depth, 1);
1181 ext4_mark_inode_dirty(handle, inode); 1181 ext4_mark_inode_dirty(handle, inode);
1182out: 1182out:
1183 brelse(bh); 1183 brelse(bh);
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1656} 1656}
1657 1657
1658/* 1658/*
1659 * This function does a very simple check to see if we can collapse
1660 * an extent tree with a single extent tree leaf block into the inode.
1661 */
1662static void ext4_ext_try_to_merge_up(handle_t *handle,
1663 struct inode *inode,
1664 struct ext4_ext_path *path)
1665{
1666 size_t s;
1667 unsigned max_root = ext4_ext_space_root(inode, 0);
1668 ext4_fsblk_t blk;
1669
1670 if ((path[0].p_depth != 1) ||
1671 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1672 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1673 return;
1674
1675 /*
1676 * We need to modify the block allocation bitmap and the block
1677 * group descriptor to release the extent tree block. If we
1678 * can't get the journal credits, give up.
1679 */
1680 if (ext4_journal_extend(handle, 2))
1681 return;
1682
1683 /*
1684 * Copy the extent data up to the inode
1685 */
1686 blk = ext4_idx_pblock(path[0].p_idx);
1687 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1688 sizeof(struct ext4_extent_idx);
1689 s += sizeof(struct ext4_extent_header);
1690
1691 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1692 path[0].p_depth = 0;
1693 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1694 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1695 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1696
1697 brelse(path[1].p_bh);
1698 ext4_free_blocks(handle, inode, NULL, blk, 1,
1699 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1700}
1701
1702/*
1659 * This function tries to merge the @ex extent to neighbours in the tree. 1703 * This function tries to merge the @ex extent to neighbours in the tree.
1660 * return 1 if merge left else 0. 1704 * return 1 if merge left else 0.
1661 */ 1705 */
1662static int ext4_ext_try_to_merge(struct inode *inode, 1706static void ext4_ext_try_to_merge(handle_t *handle,
1707 struct inode *inode,
1663 struct ext4_ext_path *path, 1708 struct ext4_ext_path *path,
1664 struct ext4_extent *ex) { 1709 struct ext4_extent *ex) {
1665 struct ext4_extent_header *eh; 1710 struct ext4_extent_header *eh;
1666 unsigned int depth; 1711 unsigned int depth;
1667 int merge_done = 0; 1712 int merge_done = 0;
1668 int ret = 0;
1669 1713
1670 depth = ext_depth(inode); 1714 depth = ext_depth(inode);
1671 BUG_ON(path[depth].p_hdr == NULL); 1715 BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1675 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1719 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1676 1720
1677 if (!merge_done) 1721 if (!merge_done)
1678 ret = ext4_ext_try_to_merge_right(inode, path, ex); 1722 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1679 1723
1680 return ret; 1724 ext4_ext_try_to_merge_up(handle, inode, path);
1681} 1725}
1682 1726
1683/* 1727/*
@@ -1893,7 +1937,7 @@ has_space:
1893merge: 1937merge:
1894 /* try to merge extents */ 1938 /* try to merge extents */
1895 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1939 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1896 ext4_ext_try_to_merge(inode, path, nearex); 1940 ext4_ext_try_to_merge(handle, inode, path, nearex);
1897 1941
1898 1942
1899 /* time to correct all indexes above */ 1943 /* time to correct all indexes above */
@@ -1901,7 +1945,7 @@ merge:
1901 if (err) 1945 if (err)
1902 goto cleanup; 1946 goto cleanup;
1903 1947
1904 err = ext4_ext_dirty(handle, inode, path + depth); 1948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
1905 1949
1906cleanup: 1950cleanup:
1907 if (npath) { 1951 if (npath) {
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2092} 2136}
2093 2137
2094/* 2138/*
2095 * ext4_ext_check_cache() 2139 * ext4_ext_in_cache()
2096 * Checks to see if the given block is in the cache. 2140 * Checks to see if the given block is in the cache.
2097 * If it is, the cached extent is stored in the given 2141 * If it is, the cached extent is stored in the given
2098 * cache extent pointer. If the cached extent is a hole, 2142 * cache extent pointer.
2099 * this routine should be used instead of
2100 * ext4_ext_in_cache if the calling function needs to
2101 * know the size of the hole.
2102 * 2143 *
2103 * @inode: The files inode 2144 * @inode: The files inode
2104 * @block: The block to look for in the cache 2145 * @block: The block to look for in the cache
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2107 * 2148 *
2108 * Return 0 if cache is invalid; 1 if the cache is valid 2149 * Return 0 if cache is invalid; 1 if the cache is valid
2109 */ 2150 */
2110static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, 2151static int
2111 struct ext4_ext_cache *ex){ 2152ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2153 struct ext4_extent *ex)
2154{
2112 struct ext4_ext_cache *cex; 2155 struct ext4_ext_cache *cex;
2113 struct ext4_sb_info *sbi; 2156 struct ext4_sb_info *sbi;
2114 int ret = 0; 2157 int ret = 0;
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2125 goto errout; 2168 goto errout;
2126 2169
2127 if (in_range(block, cex->ec_block, cex->ec_len)) { 2170 if (in_range(block, cex->ec_block, cex->ec_len)) {
2128 memcpy(ex, cex, sizeof(struct ext4_ext_cache)); 2171 ex->ee_block = cpu_to_le32(cex->ec_block);
2172 ext4_ext_store_pblock(ex, cex->ec_start);
2173 ex->ee_len = cpu_to_le16(cex->ec_len);
2129 ext_debug("%u cached by %u:%u:%llu\n", 2174 ext_debug("%u cached by %u:%u:%llu\n",
2130 block, 2175 block,
2131 cex->ec_block, cex->ec_len, cex->ec_start); 2176 cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2138,37 +2183,6 @@ errout:
2138} 2183}
2139 2184
2140/* 2185/*
2141 * ext4_ext_in_cache()
2142 * Checks to see if the given block is in the cache.
2143 * If it is, the cached extent is stored in the given
2144 * extent pointer.
2145 *
2146 * @inode: The files inode
2147 * @block: The block to look for in the cache
2148 * @ex: Pointer where the cached extent will be stored
2149 * if it contains block
2150 *
2151 * Return 0 if cache is invalid; 1 if the cache is valid
2152 */
2153static int
2154ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2155 struct ext4_extent *ex)
2156{
2157 struct ext4_ext_cache cex;
2158 int ret = 0;
2159
2160 if (ext4_ext_check_cache(inode, block, &cex)) {
2161 ex->ee_block = cpu_to_le32(cex.ec_block);
2162 ext4_ext_store_pblock(ex, cex.ec_start);
2163 ex->ee_len = cpu_to_le16(cex.ec_len);
2164 ret = 1;
2165 }
2166
2167 return ret;
2168}
2169
2170
2171/*
2172 * ext4_ext_rm_idx: 2186 * ext4_ext_rm_idx:
2173 * removes index from the index block. 2187 * removes index from the index block.
2174 */ 2188 */
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2274 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2288 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2275 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2289 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2276 ext4_fsblk_t pblk; 2290 ext4_fsblk_t pblk;
2277 int flags = EXT4_FREE_BLOCKS_FORGET; 2291 int flags = 0;
2278 2292
2279 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2293 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2280 flags |= EXT4_FREE_BLOCKS_METADATA; 2294 flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2295 else if (ext4_should_journal_data(inode))
2296 flags |= EXT4_FREE_BLOCKS_FORGET;
2297
2281 /* 2298 /*
2282 * For bigalloc file systems, we never free a partial cluster 2299 * For bigalloc file systems, we never free a partial cluster
2283 * at the beginning of the extent. Instead, we make a note 2300 * at the beginning of the extent. Instead, we make a note
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2572 struct ext4_ext_path *path = NULL; 2589 struct ext4_ext_path *path = NULL;
2573 ext4_fsblk_t partial_cluster = 0; 2590 ext4_fsblk_t partial_cluster = 0;
2574 handle_t *handle; 2591 handle_t *handle;
2575 int i = 0, err; 2592 int i = 0, err = 0;
2576 2593
2577 ext_debug("truncate since %u to %u\n", start, end); 2594 ext_debug("truncate since %u to %u\n", start, end);
2578 2595
@@ -2604,12 +2621,16 @@ again:
2604 return PTR_ERR(path); 2621 return PTR_ERR(path);
2605 } 2622 }
2606 depth = ext_depth(inode); 2623 depth = ext_depth(inode);
2624 /* Leaf not may not exist only if inode has no blocks at all */
2607 ex = path[depth].p_ext; 2625 ex = path[depth].p_ext;
2608 if (!ex) { 2626 if (!ex) {
2609 ext4_ext_drop_refs(path); 2627 if (depth) {
2610 kfree(path); 2628 EXT4_ERROR_INODE(inode,
2611 path = NULL; 2629 "path[%d].p_hdr == NULL",
2612 goto cont; 2630 depth);
2631 err = -EIO;
2632 }
2633 goto out;
2613 } 2634 }
2614 2635
2615 ee_block = le32_to_cpu(ex->ee_block); 2636 ee_block = le32_to_cpu(ex->ee_block);
@@ -2641,8 +2662,6 @@ again:
2641 goto out; 2662 goto out;
2642 } 2663 }
2643 } 2664 }
2644cont:
2645
2646 /* 2665 /*
2647 * We start scanning from right side, freeing all the blocks 2666 * We start scanning from right side, freeing all the blocks
2648 * after i_size and walking into the tree depth-wise. 2667 * after i_size and walking into the tree depth-wise.
@@ -2924,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle,
2924 ext4_ext_mark_initialized(ex); 2943 ext4_ext_mark_initialized(ex);
2925 2944
2926 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2945 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2927 ext4_ext_try_to_merge(inode, path, ex); 2946 ext4_ext_try_to_merge(handle, inode, path, ex);
2928 2947
2929 err = ext4_ext_dirty(handle, inode, path + depth); 2948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2930 goto out; 2949 goto out;
2931 } 2950 }
2932 2951
@@ -2958,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle,
2958 goto fix_extent_len; 2977 goto fix_extent_len;
2959 /* update the extent length and mark as initialized */ 2978 /* update the extent length and mark as initialized */
2960 ex->ee_len = cpu_to_le16(ee_len); 2979 ex->ee_len = cpu_to_le16(ee_len);
2961 ext4_ext_try_to_merge(inode, path, ex); 2980 ext4_ext_try_to_merge(handle, inode, path, ex);
2962 err = ext4_ext_dirty(handle, inode, path + depth); 2981 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2963 goto out; 2982 goto out;
2964 } else if (err) 2983 } else if (err)
2965 goto fix_extent_len; 2984 goto fix_extent_len;
@@ -3041,7 +3060,6 @@ out:
3041 return err ? err : map->m_len; 3060 return err ? err : map->m_len;
3042} 3061}
3043 3062
3044#define EXT4_EXT_ZERO_LEN 7
3045/* 3063/*
3046 * This function is called by ext4_ext_map_blocks() if someone tries to write 3064 * This function is called by ext4_ext_map_blocks() if someone tries to write
3047 * to an uninitialized extent. It may result in splitting the uninitialized 3065 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3067,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3067 struct ext4_map_blocks *map, 3085 struct ext4_map_blocks *map,
3068 struct ext4_ext_path *path) 3086 struct ext4_ext_path *path)
3069{ 3087{
3088 struct ext4_sb_info *sbi;
3070 struct ext4_extent_header *eh; 3089 struct ext4_extent_header *eh;
3071 struct ext4_map_blocks split_map; 3090 struct ext4_map_blocks split_map;
3072 struct ext4_extent zero_ex; 3091 struct ext4_extent zero_ex;
3073 struct ext4_extent *ex; 3092 struct ext4_extent *ex;
3074 ext4_lblk_t ee_block, eof_block; 3093 ext4_lblk_t ee_block, eof_block;
3075 unsigned int ee_len, depth; 3094 unsigned int ee_len, depth;
3076 int allocated; 3095 int allocated, max_zeroout = 0;
3077 int err = 0; 3096 int err = 0;
3078 int split_flag = 0; 3097 int split_flag = 0;
3079 3098
@@ -3081,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3081 "block %llu, max_blocks %u\n", inode->i_ino, 3100 "block %llu, max_blocks %u\n", inode->i_ino,
3082 (unsigned long long)map->m_lblk, map->m_len); 3101 (unsigned long long)map->m_lblk, map->m_len);
3083 3102
3103 sbi = EXT4_SB(inode->i_sb);
3084 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3104 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3085 inode->i_sb->s_blocksize_bits; 3105 inode->i_sb->s_blocksize_bits;
3086 if (eof_block < map->m_lblk + map->m_len) 3106 if (eof_block < map->m_lblk + map->m_len)
@@ -3180,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3180 */ 3200 */
3181 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3201 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3182 3202
3183 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 3203 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3184 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && 3204 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3185 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3205 inode->i_sb->s_blocksize_bits;
3206
3207 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3208 if (max_zeroout && (ee_len <= max_zeroout)) {
3186 err = ext4_ext_zeroout(inode, ex); 3209 err = ext4_ext_zeroout(inode, ex);
3187 if (err) 3210 if (err)
3188 goto out; 3211 goto out;
@@ -3191,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3191 if (err) 3214 if (err)
3192 goto out; 3215 goto out;
3193 ext4_ext_mark_initialized(ex); 3216 ext4_ext_mark_initialized(ex);
3194 ext4_ext_try_to_merge(inode, path, ex); 3217 ext4_ext_try_to_merge(handle, inode, path, ex);
3195 err = ext4_ext_dirty(handle, inode, path + depth); 3218 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3196 goto out; 3219 goto out;
3197 } 3220 }
3198 3221
@@ -3206,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3206 split_map.m_lblk = map->m_lblk; 3229 split_map.m_lblk = map->m_lblk;
3207 split_map.m_len = map->m_len; 3230 split_map.m_len = map->m_len;
3208 3231
3209 if (allocated > map->m_len) { 3232 if (max_zeroout && (allocated > map->m_len)) {
3210 if (allocated <= EXT4_EXT_ZERO_LEN && 3233 if (allocated <= max_zeroout) {
3211 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3212 /* case 3 */ 3234 /* case 3 */
3213 zero_ex.ee_block = 3235 zero_ex.ee_block =
3214 cpu_to_le32(map->m_lblk); 3236 cpu_to_le32(map->m_lblk);
@@ -3220,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3220 goto out; 3242 goto out;
3221 split_map.m_lblk = map->m_lblk; 3243 split_map.m_lblk = map->m_lblk;
3222 split_map.m_len = allocated; 3244 split_map.m_len = allocated;
3223 } else if ((map->m_lblk - ee_block + map->m_len < 3245 } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
3224 EXT4_EXT_ZERO_LEN) &&
3225 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3226 /* case 2 */ 3246 /* case 2 */
3227 if (map->m_lblk != ee_block) { 3247 if (map->m_lblk != ee_block) {
3228 zero_ex.ee_block = ex->ee_block; 3248 zero_ex.ee_block = ex->ee_block;
@@ -3242,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3242 } 3262 }
3243 3263
3244 allocated = ext4_split_extent(handle, inode, path, 3264 allocated = ext4_split_extent(handle, inode, path,
3245 &split_map, split_flag, 0); 3265 &split_map, split_flag, 0);
3246 if (allocated < 0) 3266 if (allocated < 0)
3247 err = allocated; 3267 err = allocated;
3248 3268
@@ -3256,7 +3276,7 @@ out:
3256 * to an uninitialized extent. 3276 * to an uninitialized extent.
3257 * 3277 *
3258 * Writing to an uninitialized extent may result in splitting the uninitialized 3278 * Writing to an uninitialized extent may result in splitting the uninitialized
3259 * extent into multiple /initialized uninitialized extents (up to three) 3279 * extent into multiple initialized/uninitialized extents (up to three)
3260 * There are three possibilities: 3280 * There are three possibilities:
3261 * a> There is no split required: Entire extent should be uninitialized 3281 * a> There is no split required: Entire extent should be uninitialized
3262 * b> Splits in two extents: Write is happening at either end of the extent 3282 * b> Splits in two extents: Write is happening at either end of the extent
@@ -3333,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3333 /* note: ext4_ext_correct_indexes() isn't needed here because 3353 /* note: ext4_ext_correct_indexes() isn't needed here because
3334 * borders are not changed 3354 * borders are not changed
3335 */ 3355 */
3336 ext4_ext_try_to_merge(inode, path, ex); 3356 ext4_ext_try_to_merge(handle, inode, path, ex);
3337 3357
3338 /* Mark modified extent as dirty */ 3358 /* Mark modified extent as dirty */
3339 err = ext4_ext_dirty(handle, inode, path + depth); 3359 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3340out: 3360out:
3341 ext4_ext_show_leaf(inode, path); 3361 ext4_ext_show_leaf(inode, path);
3342 return err; 3362 return err;
@@ -3600,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3600{ 3620{
3601 int ret = 0; 3621 int ret = 0;
3602 int err = 0; 3622 int err = 0;
3603 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3623 ext4_io_end_t *io = ext4_inode_aio(inode);
3604 3624
3605 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " 3625 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3606 "block %llu, max_blocks %u, flags %x, allocated %u\n", 3626 "block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3615,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3615 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3635 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3616 ret = ext4_split_unwritten_extents(handle, inode, map, 3636 ret = ext4_split_unwritten_extents(handle, inode, map,
3617 path, flags); 3637 path, flags);
3638 if (ret <= 0)
3639 goto out;
3618 /* 3640 /*
3619 * Flag the inode(non aio case) or end_io struct (aio case) 3641 * Flag the inode(non aio case) or end_io struct (aio case)
3620 * that this IO needs to conversion to written when IO is 3642 * that this IO needs to conversion to written when IO is
@@ -3858,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3858 unsigned int allocated = 0, offset = 0; 3880 unsigned int allocated = 0, offset = 0;
3859 unsigned int allocated_clusters = 0; 3881 unsigned int allocated_clusters = 0;
3860 struct ext4_allocation_request ar; 3882 struct ext4_allocation_request ar;
3861 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3883 ext4_io_end_t *io = ext4_inode_aio(inode);
3862 ext4_lblk_t cluster_offset; 3884 ext4_lblk_t cluster_offset;
3885 int set_unwritten = 0;
3863 3886
3864 ext_debug("blocks %u/%u requested for inode %lu\n", 3887 ext_debug("blocks %u/%u requested for inode %lu\n",
3865 map->m_lblk, map->m_len, inode->i_ino); 3888 map->m_lblk, map->m_len, inode->i_ino);
@@ -4082,13 +4105,8 @@ got_allocated_blocks:
4082 * For non asycn direct IO case, flag the inode state 4105 * For non asycn direct IO case, flag the inode state
4083 * that we need to perform conversion when IO is done. 4106 * that we need to perform conversion when IO is done.
4084 */ 4107 */
4085 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4108 if ((flags & EXT4_GET_BLOCKS_PRE_IO))
4086 if (io) 4109 set_unwritten = 1;
4087 ext4_set_io_unwritten_flag(inode, io);
4088 else
4089 ext4_set_inode_state(inode,
4090 EXT4_STATE_DIO_UNWRITTEN);
4091 }
4092 if (ext4_should_dioread_nolock(inode)) 4110 if (ext4_should_dioread_nolock(inode))
4093 map->m_flags |= EXT4_MAP_UNINIT; 4111 map->m_flags |= EXT4_MAP_UNINIT;
4094 } 4112 }
@@ -4100,6 +4118,15 @@ got_allocated_blocks:
4100 if (!err) 4118 if (!err)
4101 err = ext4_ext_insert_extent(handle, inode, path, 4119 err = ext4_ext_insert_extent(handle, inode, path,
4102 &newex, flags); 4120 &newex, flags);
4121
4122 if (!err && set_unwritten) {
4123 if (io)
4124 ext4_set_io_unwritten_flag(inode, io);
4125 else
4126 ext4_set_inode_state(inode,
4127 EXT4_STATE_DIO_UNWRITTEN);
4128 }
4129
4103 if (err && free_on_err) { 4130 if (err && free_on_err) {
4104 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4131 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
4105 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4132 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4241,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
4241 * finish any pending end_io work so we won't run the risk of 4268 * finish any pending end_io work so we won't run the risk of
4242 * converting any truncated blocks to initialized later 4269 * converting any truncated blocks to initialized later
4243 */ 4270 */
4244 ext4_flush_completed_IO(inode); 4271 ext4_flush_unwritten_io(inode);
4245 4272
4246 /* 4273 /*
4247 * probably first extent we're gonna free will be last in block 4274 * probably first extent we're gonna free will be last in block
@@ -4769,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4769 loff_t first_page_offset, last_page_offset; 4796 loff_t first_page_offset, last_page_offset;
4770 int credits, err = 0; 4797 int credits, err = 0;
4771 4798
4799 /*
4800 * Write out all dirty pages to avoid race conditions
4801 * Then release them.
4802 */
4803 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4804 err = filemap_write_and_wait_range(mapping,
4805 offset, offset + length - 1);
4806
4807 if (err)
4808 return err;
4809 }
4810
4811 mutex_lock(&inode->i_mutex);
4812 /* It's not possible punch hole on append only file */
4813 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
4814 err = -EPERM;
4815 goto out_mutex;
4816 }
4817 if (IS_SWAPFILE(inode)) {
4818 err = -ETXTBSY;
4819 goto out_mutex;
4820 }
4821
4772 /* No need to punch hole beyond i_size */ 4822 /* No need to punch hole beyond i_size */
4773 if (offset >= inode->i_size) 4823 if (offset >= inode->i_size)
4774 return 0; 4824 goto out_mutex;
4775 4825
4776 /* 4826 /*
4777 * If the hole extends beyond i_size, set the hole 4827 * If the hole extends beyond i_size, set the hole
@@ -4789,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4789 first_page_offset = first_page << PAGE_CACHE_SHIFT; 4839 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4790 last_page_offset = last_page << PAGE_CACHE_SHIFT; 4840 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4791 4841
4792 /*
4793 * Write out all dirty pages to avoid race conditions
4794 * Then release them.
4795 */
4796 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4797 err = filemap_write_and_wait_range(mapping,
4798 offset, offset + length - 1);
4799
4800 if (err)
4801 return err;
4802 }
4803
4804 /* Now release the pages */ 4842 /* Now release the pages */
4805 if (last_page_offset > first_page_offset) { 4843 if (last_page_offset > first_page_offset) {
4806 truncate_pagecache_range(inode, first_page_offset, 4844 truncate_pagecache_range(inode, first_page_offset,
4807 last_page_offset - 1); 4845 last_page_offset - 1);
4808 } 4846 }
4809 4847
4810 /* finish any pending end_io work */ 4848 /* Wait all existing dio workers, newcomers will block on i_mutex */
4811 ext4_flush_completed_IO(inode); 4849 ext4_inode_block_unlocked_dio(inode);
4850 err = ext4_flush_unwritten_io(inode);
4851 if (err)
4852 goto out_dio;
4853 inode_dio_wait(inode);
4812 4854
4813 credits = ext4_writepage_trans_blocks(inode); 4855 credits = ext4_writepage_trans_blocks(inode);
4814 handle = ext4_journal_start(inode, credits); 4856 handle = ext4_journal_start(inode, credits);
4815 if (IS_ERR(handle)) 4857 if (IS_ERR(handle)) {
4816 return PTR_ERR(handle); 4858 err = PTR_ERR(handle);
4859 goto out_dio;
4860 }
4817 4861
4818 err = ext4_orphan_add(handle, inode);
4819 if (err)
4820 goto out;
4821 4862
4822 /* 4863 /*
4823 * Now we need to zero out the non-page-aligned data in the 4864 * Now we need to zero out the non-page-aligned data in the
@@ -4903,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4903 up_write(&EXT4_I(inode)->i_data_sem); 4944 up_write(&EXT4_I(inode)->i_data_sem);
4904 4945
4905out: 4946out:
4906 ext4_orphan_del(handle, inode);
4907 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4947 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4908 ext4_mark_inode_dirty(handle, inode); 4948 ext4_mark_inode_dirty(handle, inode);
4909 ext4_journal_stop(handle); 4949 ext4_journal_stop(handle);
4950out_dio:
4951 ext4_inode_resume_unlocked_dio(inode);
4952out_mutex:
4953 mutex_unlock(&inode->i_mutex);
4910 return err; 4954 return err;
4911} 4955}
4912int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4956int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,