aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
committerJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
commitf474af7051212b4efc8267583fad9c4ebf33ccff (patch)
tree1aa46ebc8065a341f247c2a2d9af2f624ad1d4f8 /fs/ext4/extents.c
parent0d22f68f02c10d5d10ec5712917e5828b001a822 (diff)
parente3dd9a52cb5552c46c2a4ca7ccdfb4dab5c72457 (diff)
nfs: disintegrate UAPI for nfs
This is to complete part of the Userspace API (UAPI) disintegration for which the preparatory patches were pulled recently. After these patches, userspace headers will be segregated into: include/uapi/linux/.../foo.h for the userspace interface stuff, and: include/linux/.../foo.h for the strictly kernel internal stuff. Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c259
1 files changed, 152 insertions, 107 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd0c7ed06772..1c94cca35ed1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1179 1179
1180 neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); 1180 le16_add_cpu(&neh->eh_depth, 1);
1181 ext4_mark_inode_dirty(handle, inode); 1181 ext4_mark_inode_dirty(handle, inode);
1182out: 1182out:
1183 brelse(bh); 1183 brelse(bh);
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1656} 1656}
1657 1657
1658/* 1658/*
1659 * This function does a very simple check to see if we can collapse
1660 * an extent tree with a single extent tree leaf block into the inode.
1661 */
1662static void ext4_ext_try_to_merge_up(handle_t *handle,
1663 struct inode *inode,
1664 struct ext4_ext_path *path)
1665{
1666 size_t s;
1667 unsigned max_root = ext4_ext_space_root(inode, 0);
1668 ext4_fsblk_t blk;
1669
1670 if ((path[0].p_depth != 1) ||
1671 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1672 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1673 return;
1674
1675 /*
1676 * We need to modify the block allocation bitmap and the block
1677 * group descriptor to release the extent tree block. If we
1678 * can't get the journal credits, give up.
1679 */
1680 if (ext4_journal_extend(handle, 2))
1681 return;
1682
1683 /*
1684 * Copy the extent data up to the inode
1685 */
1686 blk = ext4_idx_pblock(path[0].p_idx);
1687 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1688 sizeof(struct ext4_extent_idx);
1689 s += sizeof(struct ext4_extent_header);
1690
1691 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1692 path[0].p_depth = 0;
1693 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1694 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1695 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1696
1697 brelse(path[1].p_bh);
1698 ext4_free_blocks(handle, inode, NULL, blk, 1,
1699 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1700}
1701
1702/*
1659 * This function tries to merge the @ex extent to neighbours in the tree. 1703 * This function tries to merge the @ex extent to neighbours in the tree.
1660 * return 1 if merge left else 0. 1704 * return 1 if merge left else 0.
1661 */ 1705 */
1662static int ext4_ext_try_to_merge(struct inode *inode, 1706static void ext4_ext_try_to_merge(handle_t *handle,
1707 struct inode *inode,
1663 struct ext4_ext_path *path, 1708 struct ext4_ext_path *path,
1664 struct ext4_extent *ex) { 1709 struct ext4_extent *ex) {
1665 struct ext4_extent_header *eh; 1710 struct ext4_extent_header *eh;
1666 unsigned int depth; 1711 unsigned int depth;
1667 int merge_done = 0; 1712 int merge_done = 0;
1668 int ret = 0;
1669 1713
1670 depth = ext_depth(inode); 1714 depth = ext_depth(inode);
1671 BUG_ON(path[depth].p_hdr == NULL); 1715 BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1675 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1719 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1676 1720
1677 if (!merge_done) 1721 if (!merge_done)
1678 ret = ext4_ext_try_to_merge_right(inode, path, ex); 1722 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1679 1723
1680 return ret; 1724 ext4_ext_try_to_merge_up(handle, inode, path);
1681} 1725}
1682 1726
1683/* 1727/*
@@ -1893,7 +1937,7 @@ has_space:
1893merge: 1937merge:
1894 /* try to merge extents */ 1938 /* try to merge extents */
1895 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1939 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1896 ext4_ext_try_to_merge(inode, path, nearex); 1940 ext4_ext_try_to_merge(handle, inode, path, nearex);
1897 1941
1898 1942
1899 /* time to correct all indexes above */ 1943 /* time to correct all indexes above */
@@ -1901,7 +1945,7 @@ merge:
1901 if (err) 1945 if (err)
1902 goto cleanup; 1946 goto cleanup;
1903 1947
1904 err = ext4_ext_dirty(handle, inode, path + depth); 1948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
1905 1949
1906cleanup: 1950cleanup:
1907 if (npath) { 1951 if (npath) {
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2092} 2136}
2093 2137
2094/* 2138/*
2095 * ext4_ext_check_cache() 2139 * ext4_ext_in_cache()
2096 * Checks to see if the given block is in the cache. 2140 * Checks to see if the given block is in the cache.
2097 * If it is, the cached extent is stored in the given 2141 * If it is, the cached extent is stored in the given
2098 * cache extent pointer. If the cached extent is a hole, 2142 * cache extent pointer.
2099 * this routine should be used instead of
2100 * ext4_ext_in_cache if the calling function needs to
2101 * know the size of the hole.
2102 * 2143 *
2103 * @inode: The files inode 2144 * @inode: The files inode
2104 * @block: The block to look for in the cache 2145 * @block: The block to look for in the cache
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2107 * 2148 *
2108 * Return 0 if cache is invalid; 1 if the cache is valid 2149 * Return 0 if cache is invalid; 1 if the cache is valid
2109 */ 2150 */
2110static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, 2151static int
2111 struct ext4_ext_cache *ex){ 2152ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2153 struct ext4_extent *ex)
2154{
2112 struct ext4_ext_cache *cex; 2155 struct ext4_ext_cache *cex;
2113 struct ext4_sb_info *sbi; 2156 struct ext4_sb_info *sbi;
2114 int ret = 0; 2157 int ret = 0;
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2125 goto errout; 2168 goto errout;
2126 2169
2127 if (in_range(block, cex->ec_block, cex->ec_len)) { 2170 if (in_range(block, cex->ec_block, cex->ec_len)) {
2128 memcpy(ex, cex, sizeof(struct ext4_ext_cache)); 2171 ex->ee_block = cpu_to_le32(cex->ec_block);
2172 ext4_ext_store_pblock(ex, cex->ec_start);
2173 ex->ee_len = cpu_to_le16(cex->ec_len);
2129 ext_debug("%u cached by %u:%u:%llu\n", 2174 ext_debug("%u cached by %u:%u:%llu\n",
2130 block, 2175 block,
2131 cex->ec_block, cex->ec_len, cex->ec_start); 2176 cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2138,37 +2183,6 @@ errout:
2138} 2183}
2139 2184
2140/* 2185/*
2141 * ext4_ext_in_cache()
2142 * Checks to see if the given block is in the cache.
2143 * If it is, the cached extent is stored in the given
2144 * extent pointer.
2145 *
2146 * @inode: The files inode
2147 * @block: The block to look for in the cache
2148 * @ex: Pointer where the cached extent will be stored
2149 * if it contains block
2150 *
2151 * Return 0 if cache is invalid; 1 if the cache is valid
2152 */
2153static int
2154ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2155 struct ext4_extent *ex)
2156{
2157 struct ext4_ext_cache cex;
2158 int ret = 0;
2159
2160 if (ext4_ext_check_cache(inode, block, &cex)) {
2161 ex->ee_block = cpu_to_le32(cex.ec_block);
2162 ext4_ext_store_pblock(ex, cex.ec_start);
2163 ex->ee_len = cpu_to_le16(cex.ec_len);
2164 ret = 1;
2165 }
2166
2167 return ret;
2168}
2169
2170
2171/*
2172 * ext4_ext_rm_idx: 2186 * ext4_ext_rm_idx:
2173 * removes index from the index block. 2187 * removes index from the index block.
2174 */ 2188 */
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2274 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2288 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2275 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2289 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2276 ext4_fsblk_t pblk; 2290 ext4_fsblk_t pblk;
2277 int flags = EXT4_FREE_BLOCKS_FORGET; 2291 int flags = 0;
2278 2292
2279 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2293 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2280 flags |= EXT4_FREE_BLOCKS_METADATA; 2294 flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2295 else if (ext4_should_journal_data(inode))
2296 flags |= EXT4_FREE_BLOCKS_FORGET;
2297
2281 /* 2298 /*
2282 * For bigalloc file systems, we never free a partial cluster 2299 * For bigalloc file systems, we never free a partial cluster
2283 * at the beginning of the extent. Instead, we make a note 2300 * at the beginning of the extent. Instead, we make a note
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2572 struct ext4_ext_path *path = NULL; 2589 struct ext4_ext_path *path = NULL;
2573 ext4_fsblk_t partial_cluster = 0; 2590 ext4_fsblk_t partial_cluster = 0;
2574 handle_t *handle; 2591 handle_t *handle;
2575 int i = 0, err; 2592 int i = 0, err = 0;
2576 2593
2577 ext_debug("truncate since %u to %u\n", start, end); 2594 ext_debug("truncate since %u to %u\n", start, end);
2578 2595
@@ -2604,12 +2621,16 @@ again:
2604 return PTR_ERR(path); 2621 return PTR_ERR(path);
2605 } 2622 }
2606 depth = ext_depth(inode); 2623 depth = ext_depth(inode);
2624 /* Leaf not may not exist only if inode has no blocks at all */
2607 ex = path[depth].p_ext; 2625 ex = path[depth].p_ext;
2608 if (!ex) { 2626 if (!ex) {
2609 ext4_ext_drop_refs(path); 2627 if (depth) {
2610 kfree(path); 2628 EXT4_ERROR_INODE(inode,
2611 path = NULL; 2629 "path[%d].p_hdr == NULL",
2612 goto cont; 2630 depth);
2631 err = -EIO;
2632 }
2633 goto out;
2613 } 2634 }
2614 2635
2615 ee_block = le32_to_cpu(ex->ee_block); 2636 ee_block = le32_to_cpu(ex->ee_block);
@@ -2641,8 +2662,6 @@ again:
2641 goto out; 2662 goto out;
2642 } 2663 }
2643 } 2664 }
2644cont:
2645
2646 /* 2665 /*
2647 * We start scanning from right side, freeing all the blocks 2666 * We start scanning from right side, freeing all the blocks
2648 * after i_size and walking into the tree depth-wise. 2667 * after i_size and walking into the tree depth-wise.
@@ -2662,6 +2681,7 @@ cont:
2662 } 2681 }
2663 path[0].p_depth = depth; 2682 path[0].p_depth = depth;
2664 path[0].p_hdr = ext_inode_hdr(inode); 2683 path[0].p_hdr = ext_inode_hdr(inode);
2684 i = 0;
2665 2685
2666 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2686 if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2667 err = -EIO; 2687 err = -EIO;
@@ -2923,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle,
2923 ext4_ext_mark_initialized(ex); 2943 ext4_ext_mark_initialized(ex);
2924 2944
2925 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2945 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2926 ext4_ext_try_to_merge(inode, path, ex); 2946 ext4_ext_try_to_merge(handle, inode, path, ex);
2927 2947
2928 err = ext4_ext_dirty(handle, inode, path + depth); 2948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2929 goto out; 2949 goto out;
2930 } 2950 }
2931 2951
@@ -2957,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle,
2957 goto fix_extent_len; 2977 goto fix_extent_len;
2958 /* update the extent length and mark as initialized */ 2978 /* update the extent length and mark as initialized */
2959 ex->ee_len = cpu_to_le16(ee_len); 2979 ex->ee_len = cpu_to_le16(ee_len);
2960 ext4_ext_try_to_merge(inode, path, ex); 2980 ext4_ext_try_to_merge(handle, inode, path, ex);
2961 err = ext4_ext_dirty(handle, inode, path + depth); 2981 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2962 goto out; 2982 goto out;
2963 } else if (err) 2983 } else if (err)
2964 goto fix_extent_len; 2984 goto fix_extent_len;
@@ -3040,7 +3060,6 @@ out:
3040 return err ? err : map->m_len; 3060 return err ? err : map->m_len;
3041} 3061}
3042 3062
3043#define EXT4_EXT_ZERO_LEN 7
3044/* 3063/*
3045 * This function is called by ext4_ext_map_blocks() if someone tries to write 3064 * This function is called by ext4_ext_map_blocks() if someone tries to write
3046 * to an uninitialized extent. It may result in splitting the uninitialized 3065 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3066,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3066 struct ext4_map_blocks *map, 3085 struct ext4_map_blocks *map,
3067 struct ext4_ext_path *path) 3086 struct ext4_ext_path *path)
3068{ 3087{
3088 struct ext4_sb_info *sbi;
3069 struct ext4_extent_header *eh; 3089 struct ext4_extent_header *eh;
3070 struct ext4_map_blocks split_map; 3090 struct ext4_map_blocks split_map;
3071 struct ext4_extent zero_ex; 3091 struct ext4_extent zero_ex;
3072 struct ext4_extent *ex; 3092 struct ext4_extent *ex;
3073 ext4_lblk_t ee_block, eof_block; 3093 ext4_lblk_t ee_block, eof_block;
3074 unsigned int ee_len, depth; 3094 unsigned int ee_len, depth;
3075 int allocated; 3095 int allocated, max_zeroout = 0;
3076 int err = 0; 3096 int err = 0;
3077 int split_flag = 0; 3097 int split_flag = 0;
3078 3098
@@ -3080,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3080 "block %llu, max_blocks %u\n", inode->i_ino, 3100 "block %llu, max_blocks %u\n", inode->i_ino,
3081 (unsigned long long)map->m_lblk, map->m_len); 3101 (unsigned long long)map->m_lblk, map->m_len);
3082 3102
3103 sbi = EXT4_SB(inode->i_sb);
3083 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3104 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3084 inode->i_sb->s_blocksize_bits; 3105 inode->i_sb->s_blocksize_bits;
3085 if (eof_block < map->m_lblk + map->m_len) 3106 if (eof_block < map->m_lblk + map->m_len)
@@ -3179,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3179 */ 3200 */
3180 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3201 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3181 3202
3182 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 3203 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3183 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && 3204 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3184 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3205 inode->i_sb->s_blocksize_bits;
3206
3207 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3208 if (max_zeroout && (ee_len <= max_zeroout)) {
3185 err = ext4_ext_zeroout(inode, ex); 3209 err = ext4_ext_zeroout(inode, ex);
3186 if (err) 3210 if (err)
3187 goto out; 3211 goto out;
@@ -3190,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3190 if (err) 3214 if (err)
3191 goto out; 3215 goto out;
3192 ext4_ext_mark_initialized(ex); 3216 ext4_ext_mark_initialized(ex);
3193 ext4_ext_try_to_merge(inode, path, ex); 3217 ext4_ext_try_to_merge(handle, inode, path, ex);
3194 err = ext4_ext_dirty(handle, inode, path + depth); 3218 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3195 goto out; 3219 goto out;
3196 } 3220 }
3197 3221
@@ -3205,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3205 split_map.m_lblk = map->m_lblk; 3229 split_map.m_lblk = map->m_lblk;
3206 split_map.m_len = map->m_len; 3230 split_map.m_len = map->m_len;
3207 3231
3208 if (allocated > map->m_len) { 3232 if (max_zeroout && (allocated > map->m_len)) {
3209 if (allocated <= EXT4_EXT_ZERO_LEN && 3233 if (allocated <= max_zeroout) {
3210 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3211 /* case 3 */ 3234 /* case 3 */
3212 zero_ex.ee_block = 3235 zero_ex.ee_block =
3213 cpu_to_le32(map->m_lblk); 3236 cpu_to_le32(map->m_lblk);
@@ -3219,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3219 goto out; 3242 goto out;
3220 split_map.m_lblk = map->m_lblk; 3243 split_map.m_lblk = map->m_lblk;
3221 split_map.m_len = allocated; 3244 split_map.m_len = allocated;
3222 } else if ((map->m_lblk - ee_block + map->m_len < 3245 } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
3223 EXT4_EXT_ZERO_LEN) &&
3224 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3225 /* case 2 */ 3246 /* case 2 */
3226 if (map->m_lblk != ee_block) { 3247 if (map->m_lblk != ee_block) {
3227 zero_ex.ee_block = ex->ee_block; 3248 zero_ex.ee_block = ex->ee_block;
@@ -3241,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3241 } 3262 }
3242 3263
3243 allocated = ext4_split_extent(handle, inode, path, 3264 allocated = ext4_split_extent(handle, inode, path,
3244 &split_map, split_flag, 0); 3265 &split_map, split_flag, 0);
3245 if (allocated < 0) 3266 if (allocated < 0)
3246 err = allocated; 3267 err = allocated;
3247 3268
@@ -3255,7 +3276,7 @@ out:
3255 * to an uninitialized extent. 3276 * to an uninitialized extent.
3256 * 3277 *
3257 * Writing to an uninitialized extent may result in splitting the uninitialized 3278 * Writing to an uninitialized extent may result in splitting the uninitialized
3258 * extent into multiple /initialized uninitialized extents (up to three) 3279 * extent into multiple initialized/uninitialized extents (up to three)
3259 * There are three possibilities: 3280 * There are three possibilities:
3260 * a> There is no split required: Entire extent should be uninitialized 3281 * a> There is no split required: Entire extent should be uninitialized
3261 * b> Splits in two extents: Write is happening at either end of the extent 3282 * b> Splits in two extents: Write is happening at either end of the extent
@@ -3332,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3332 /* note: ext4_ext_correct_indexes() isn't needed here because 3353 /* note: ext4_ext_correct_indexes() isn't needed here because
3333 * borders are not changed 3354 * borders are not changed
3334 */ 3355 */
3335 ext4_ext_try_to_merge(inode, path, ex); 3356 ext4_ext_try_to_merge(handle, inode, path, ex);
3336 3357
3337 /* Mark modified extent as dirty */ 3358 /* Mark modified extent as dirty */
3338 err = ext4_ext_dirty(handle, inode, path + depth); 3359 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3339out: 3360out:
3340 ext4_ext_show_leaf(inode, path); 3361 ext4_ext_show_leaf(inode, path);
3341 return err; 3362 return err;
@@ -3599,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3599{ 3620{
3600 int ret = 0; 3621 int ret = 0;
3601 int err = 0; 3622 int err = 0;
3602 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3623 ext4_io_end_t *io = ext4_inode_aio(inode);
3603 3624
3604 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " 3625 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3605 "block %llu, max_blocks %u, flags %x, allocated %u\n", 3626 "block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3614,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3614 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3635 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3615 ret = ext4_split_unwritten_extents(handle, inode, map, 3636 ret = ext4_split_unwritten_extents(handle, inode, map,
3616 path, flags); 3637 path, flags);
3638 if (ret <= 0)
3639 goto out;
3617 /* 3640 /*
3618 * Flag the inode(non aio case) or end_io struct (aio case) 3641 * Flag the inode(non aio case) or end_io struct (aio case)
3619 * that this IO needs to conversion to written when IO is 3642 * that this IO needs to conversion to written when IO is
@@ -3857,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3857 unsigned int allocated = 0, offset = 0; 3880 unsigned int allocated = 0, offset = 0;
3858 unsigned int allocated_clusters = 0; 3881 unsigned int allocated_clusters = 0;
3859 struct ext4_allocation_request ar; 3882 struct ext4_allocation_request ar;
3860 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3883 ext4_io_end_t *io = ext4_inode_aio(inode);
3861 ext4_lblk_t cluster_offset; 3884 ext4_lblk_t cluster_offset;
3885 int set_unwritten = 0;
3862 3886
3863 ext_debug("blocks %u/%u requested for inode %lu\n", 3887 ext_debug("blocks %u/%u requested for inode %lu\n",
3864 map->m_lblk, map->m_len, inode->i_ino); 3888 map->m_lblk, map->m_len, inode->i_ino);
@@ -4081,13 +4105,8 @@ got_allocated_blocks:
4081 * For non asycn direct IO case, flag the inode state 4105 * For non asycn direct IO case, flag the inode state
4082 * that we need to perform conversion when IO is done. 4106 * that we need to perform conversion when IO is done.
4083 */ 4107 */
4084 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4108 if ((flags & EXT4_GET_BLOCKS_PRE_IO))
4085 if (io) 4109 set_unwritten = 1;
4086 ext4_set_io_unwritten_flag(inode, io);
4087 else
4088 ext4_set_inode_state(inode,
4089 EXT4_STATE_DIO_UNWRITTEN);
4090 }
4091 if (ext4_should_dioread_nolock(inode)) 4110 if (ext4_should_dioread_nolock(inode))
4092 map->m_flags |= EXT4_MAP_UNINIT; 4111 map->m_flags |= EXT4_MAP_UNINIT;
4093 } 4112 }
@@ -4099,6 +4118,15 @@ got_allocated_blocks:
4099 if (!err) 4118 if (!err)
4100 err = ext4_ext_insert_extent(handle, inode, path, 4119 err = ext4_ext_insert_extent(handle, inode, path,
4101 &newex, flags); 4120 &newex, flags);
4121
4122 if (!err && set_unwritten) {
4123 if (io)
4124 ext4_set_io_unwritten_flag(inode, io);
4125 else
4126 ext4_set_inode_state(inode,
4127 EXT4_STATE_DIO_UNWRITTEN);
4128 }
4129
4102 if (err && free_on_err) { 4130 if (err && free_on_err) {
4103 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4131 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
4104 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4132 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4240,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
4240 * finish any pending end_io work so we won't run the risk of 4268 * finish any pending end_io work so we won't run the risk of
4241 * converting any truncated blocks to initialized later 4269 * converting any truncated blocks to initialized later
4242 */ 4270 */
4243 ext4_flush_completed_IO(inode); 4271 ext4_flush_unwritten_io(inode);
4244 4272
4245 /* 4273 /*
4246 * probably first extent we're gonna free will be last in block 4274 * probably first extent we're gonna free will be last in block
@@ -4768,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4768 loff_t first_page_offset, last_page_offset; 4796 loff_t first_page_offset, last_page_offset;
4769 int credits, err = 0; 4797 int credits, err = 0;
4770 4798
4799 /*
4800 * Write out all dirty pages to avoid race conditions
4801 * Then release them.
4802 */
4803 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4804 err = filemap_write_and_wait_range(mapping,
4805 offset, offset + length - 1);
4806
4807 if (err)
4808 return err;
4809 }
4810
4811 mutex_lock(&inode->i_mutex);
4812 /* It's not possible punch hole on append only file */
4813 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
4814 err = -EPERM;
4815 goto out_mutex;
4816 }
4817 if (IS_SWAPFILE(inode)) {
4818 err = -ETXTBSY;
4819 goto out_mutex;
4820 }
4821
4771 /* No need to punch hole beyond i_size */ 4822 /* No need to punch hole beyond i_size */
4772 if (offset >= inode->i_size) 4823 if (offset >= inode->i_size)
4773 return 0; 4824 goto out_mutex;
4774 4825
4775 /* 4826 /*
4776 * If the hole extends beyond i_size, set the hole 4827 * If the hole extends beyond i_size, set the hole
@@ -4788,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4788 first_page_offset = first_page << PAGE_CACHE_SHIFT; 4839 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4789 last_page_offset = last_page << PAGE_CACHE_SHIFT; 4840 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4790 4841
4791 /*
4792 * Write out all dirty pages to avoid race conditions
4793 * Then release them.
4794 */
4795 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4796 err = filemap_write_and_wait_range(mapping,
4797 offset, offset + length - 1);
4798
4799 if (err)
4800 return err;
4801 }
4802
4803 /* Now release the pages */ 4842 /* Now release the pages */
4804 if (last_page_offset > first_page_offset) { 4843 if (last_page_offset > first_page_offset) {
4805 truncate_pagecache_range(inode, first_page_offset, 4844 truncate_pagecache_range(inode, first_page_offset,
4806 last_page_offset - 1); 4845 last_page_offset - 1);
4807 } 4846 }
4808 4847
4809 /* finish any pending end_io work */ 4848 /* Wait all existing dio workers, newcomers will block on i_mutex */
4810 ext4_flush_completed_IO(inode); 4849 ext4_inode_block_unlocked_dio(inode);
4850 err = ext4_flush_unwritten_io(inode);
4851 if (err)
4852 goto out_dio;
4853 inode_dio_wait(inode);
4811 4854
4812 credits = ext4_writepage_trans_blocks(inode); 4855 credits = ext4_writepage_trans_blocks(inode);
4813 handle = ext4_journal_start(inode, credits); 4856 handle = ext4_journal_start(inode, credits);
4814 if (IS_ERR(handle)) 4857 if (IS_ERR(handle)) {
4815 return PTR_ERR(handle); 4858 err = PTR_ERR(handle);
4859 goto out_dio;
4860 }
4816 4861
4817 err = ext4_orphan_add(handle, inode);
4818 if (err)
4819 goto out;
4820 4862
4821 /* 4863 /*
4822 * Now we need to zero out the non-page-aligned data in the 4864 * Now we need to zero out the non-page-aligned data in the
@@ -4902,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4902 up_write(&EXT4_I(inode)->i_data_sem); 4944 up_write(&EXT4_I(inode)->i_data_sem);
4903 4945
4904out: 4946out:
4905 ext4_orphan_del(handle, inode);
4906 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4947 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4907 ext4_mark_inode_dirty(handle, inode); 4948 ext4_mark_inode_dirty(handle, inode);
4908 ext4_journal_stop(handle); 4949 ext4_journal_stop(handle);
4950out_dio:
4951 ext4_inode_resume_unlocked_dio(inode);
4952out_mutex:
4953 mutex_unlock(&inode->i_mutex);
4909 return err; 4954 return err;
4910} 4955}
4911int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4956int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,