aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c522
1 files changed, 197 insertions, 325 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9c6d06dcef8b..107936db244e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -157,11 +157,8 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
157 * - ENOMEM 157 * - ENOMEM
158 * - EIO 158 * - EIO
159 */ 159 */
160#define ext4_ext_dirty(handle, inode, path) \ 160int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
161 __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) 161 struct inode *inode, struct ext4_ext_path *path)
162static int __ext4_ext_dirty(const char *where, unsigned int line,
163 handle_t *handle, struct inode *inode,
164 struct ext4_ext_path *path)
165{ 162{
166 int err; 163 int err;
167 if (path->p_bh) { 164 if (path->p_bh) {
@@ -1813,39 +1810,101 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1813 } 1810 }
1814 depth = ext_depth(inode); 1811 depth = ext_depth(inode);
1815 ex = path[depth].p_ext; 1812 ex = path[depth].p_ext;
1813 eh = path[depth].p_hdr;
1816 if (unlikely(path[depth].p_hdr == NULL)) { 1814 if (unlikely(path[depth].p_hdr == NULL)) {
1817 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1815 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1818 return -EIO; 1816 return -EIO;
1819 } 1817 }
1820 1818
1821 /* try to insert block into found extent and return */ 1819 /* try to insert block into found extent and return */
1822 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1820 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) {
1823 && ext4_can_extents_be_merged(inode, ex, newext)) {
1824 ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
1825 ext4_ext_is_uninitialized(newext),
1826 ext4_ext_get_actual_len(newext),
1827 le32_to_cpu(ex->ee_block),
1828 ext4_ext_is_uninitialized(ex),
1829 ext4_ext_get_actual_len(ex),
1830 ext4_ext_pblock(ex));
1831 err = ext4_ext_get_access(handle, inode, path + depth);
1832 if (err)
1833 return err;
1834 1821
1835 /* 1822 /*
1836 * ext4_can_extents_be_merged should have checked that either 1823 * Try to see whether we should rather test the extent on
1837 * both extents are uninitialized, or both aren't. Thus we 1824 * right from ex, or from the left of ex. This is because
1838 * need to check only one of them here. 1825 * ext4_ext_find_extent() can return either extent on the
1826 * left, or on the right from the searched position. This
1827 * will make merging more effective.
1839 */ 1828 */
1840 if (ext4_ext_is_uninitialized(ex)) 1829 if (ex < EXT_LAST_EXTENT(eh) &&
1841 uninitialized = 1; 1830 (le32_to_cpu(ex->ee_block) +
1842 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1831 ext4_ext_get_actual_len(ex) <
1832 le32_to_cpu(newext->ee_block))) {
1833 ex += 1;
1834 goto prepend;
1835 } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
1836 (le32_to_cpu(newext->ee_block) +
1837 ext4_ext_get_actual_len(newext) <
1838 le32_to_cpu(ex->ee_block)))
1839 ex -= 1;
1840
1841 /* Try to append newex to the ex */
1842 if (ext4_can_extents_be_merged(inode, ex, newext)) {
1843 ext_debug("append [%d]%d block to %u:[%d]%d"
1844 "(from %llu)\n",
1845 ext4_ext_is_uninitialized(newext),
1846 ext4_ext_get_actual_len(newext),
1847 le32_to_cpu(ex->ee_block),
1848 ext4_ext_is_uninitialized(ex),
1849 ext4_ext_get_actual_len(ex),
1850 ext4_ext_pblock(ex));
1851 err = ext4_ext_get_access(handle, inode,
1852 path + depth);
1853 if (err)
1854 return err;
1855
1856 /*
1857 * ext4_can_extents_be_merged should have checked
1858 * that either both extents are uninitialized, or
1859 * both aren't. Thus we need to check only one of
1860 * them here.
1861 */
1862 if (ext4_ext_is_uninitialized(ex))
1863 uninitialized = 1;
1864 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1843 + ext4_ext_get_actual_len(newext)); 1865 + ext4_ext_get_actual_len(newext));
1844 if (uninitialized) 1866 if (uninitialized)
1845 ext4_ext_mark_uninitialized(ex); 1867 ext4_ext_mark_uninitialized(ex);
1846 eh = path[depth].p_hdr; 1868 eh = path[depth].p_hdr;
1847 nearex = ex; 1869 nearex = ex;
1848 goto merge; 1870 goto merge;
1871 }
1872
1873prepend:
1874 /* Try to prepend newex to the ex */
1875 if (ext4_can_extents_be_merged(inode, newext, ex)) {
1876 ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
1877 "(from %llu)\n",
1878 le32_to_cpu(newext->ee_block),
1879 ext4_ext_is_uninitialized(newext),
1880 ext4_ext_get_actual_len(newext),
1881 le32_to_cpu(ex->ee_block),
1882 ext4_ext_is_uninitialized(ex),
1883 ext4_ext_get_actual_len(ex),
1884 ext4_ext_pblock(ex));
1885 err = ext4_ext_get_access(handle, inode,
1886 path + depth);
1887 if (err)
1888 return err;
1889
1890 /*
1891 * ext4_can_extents_be_merged should have checked
1892 * that either both extents are uninitialized, or
1893 * both aren't. Thus we need to check only one of
1894 * them here.
1895 */
1896 if (ext4_ext_is_uninitialized(ex))
1897 uninitialized = 1;
1898 ex->ee_block = newext->ee_block;
1899 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1900 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1901 + ext4_ext_get_actual_len(newext));
1902 if (uninitialized)
1903 ext4_ext_mark_uninitialized(ex);
1904 eh = path[depth].p_hdr;
1905 nearex = ex;
1906 goto merge;
1907 }
1849 } 1908 }
1850 1909
1851 depth = ext_depth(inode); 1910 depth = ext_depth(inode);
@@ -1880,8 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1880 * There is no free space in the found leaf. 1939 * There is no free space in the found leaf.
1881 * We're gonna add a new leaf in the tree. 1940 * We're gonna add a new leaf in the tree.
1882 */ 1941 */
1883 if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) 1942 if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
1884 flags = EXT4_MB_USE_ROOT_BLOCKS; 1943 flags = EXT4_MB_USE_RESERVED;
1885 err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); 1944 err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
1886 if (err) 1945 if (err)
1887 goto cleanup; 1946 goto cleanup;
@@ -2599,8 +2658,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
2599 return 1; 2658 return 1;
2600} 2659}
2601 2660
2602static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, 2661int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2603 ext4_lblk_t end) 2662 ext4_lblk_t end)
2604{ 2663{
2605 struct super_block *sb = inode->i_sb; 2664 struct super_block *sb = inode->i_sb;
2606 int depth = ext_depth(inode); 2665 int depth = ext_depth(inode);
@@ -2667,12 +2726,14 @@ again:
2667 2726
2668 /* 2727 /*
2669 * Split the extent in two so that 'end' is the last 2728 * Split the extent in two so that 'end' is the last
2670 * block in the first new extent 2729 * block in the first new extent. Also we should not
2730 * fail removing space due to ENOSPC so try to use
2731 * reserved block if that happens.
2671 */ 2732 */
2672 err = ext4_split_extent_at(handle, inode, path, 2733 err = ext4_split_extent_at(handle, inode, path,
2673 end + 1, split_flag, 2734 end + 1, split_flag,
2674 EXT4_GET_BLOCKS_PRE_IO | 2735 EXT4_GET_BLOCKS_PRE_IO |
2675 EXT4_GET_BLOCKS_PUNCH_OUT_EXT); 2736 EXT4_GET_BLOCKS_METADATA_NOFAIL);
2676 2737
2677 if (err < 0) 2738 if (err < 0)
2678 goto out; 2739 goto out;
@@ -3147,35 +3208,35 @@ out:
3147static int ext4_ext_convert_to_initialized(handle_t *handle, 3208static int ext4_ext_convert_to_initialized(handle_t *handle,
3148 struct inode *inode, 3209 struct inode *inode,
3149 struct ext4_map_blocks *map, 3210 struct ext4_map_blocks *map,
3150 struct ext4_ext_path *path) 3211 struct ext4_ext_path *path,
3212 int flags)
3151{ 3213{
3152 struct ext4_sb_info *sbi; 3214 struct ext4_sb_info *sbi;
3153 struct ext4_extent_header *eh; 3215 struct ext4_extent_header *eh;
3154 struct ext4_map_blocks split_map; 3216 struct ext4_map_blocks split_map;
3155 struct ext4_extent zero_ex; 3217 struct ext4_extent zero_ex;
3156 struct ext4_extent *ex; 3218 struct ext4_extent *ex, *abut_ex;
3157 ext4_lblk_t ee_block, eof_block; 3219 ext4_lblk_t ee_block, eof_block;
3158 unsigned int ee_len, depth; 3220 unsigned int ee_len, depth, map_len = map->m_len;
3159 int allocated, max_zeroout = 0; 3221 int allocated = 0, max_zeroout = 0;
3160 int err = 0; 3222 int err = 0;
3161 int split_flag = 0; 3223 int split_flag = 0;
3162 3224
3163 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 3225 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
3164 "block %llu, max_blocks %u\n", inode->i_ino, 3226 "block %llu, max_blocks %u\n", inode->i_ino,
3165 (unsigned long long)map->m_lblk, map->m_len); 3227 (unsigned long long)map->m_lblk, map_len);
3166 3228
3167 sbi = EXT4_SB(inode->i_sb); 3229 sbi = EXT4_SB(inode->i_sb);
3168 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3230 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3169 inode->i_sb->s_blocksize_bits; 3231 inode->i_sb->s_blocksize_bits;
3170 if (eof_block < map->m_lblk + map->m_len) 3232 if (eof_block < map->m_lblk + map_len)
3171 eof_block = map->m_lblk + map->m_len; 3233 eof_block = map->m_lblk + map_len;
3172 3234
3173 depth = ext_depth(inode); 3235 depth = ext_depth(inode);
3174 eh = path[depth].p_hdr; 3236 eh = path[depth].p_hdr;
3175 ex = path[depth].p_ext; 3237 ex = path[depth].p_ext;
3176 ee_block = le32_to_cpu(ex->ee_block); 3238 ee_block = le32_to_cpu(ex->ee_block);
3177 ee_len = ext4_ext_get_actual_len(ex); 3239 ee_len = ext4_ext_get_actual_len(ex);
3178 allocated = ee_len - (map->m_lblk - ee_block);
3179 zero_ex.ee_len = 0; 3240 zero_ex.ee_len = 0;
3180 3241
3181 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); 3242 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
@@ -3186,77 +3247,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3186 3247
3187 /* 3248 /*
3188 * Attempt to transfer newly initialized blocks from the currently 3249 * Attempt to transfer newly initialized blocks from the currently
3189 * uninitialized extent to its left neighbor. This is much cheaper 3250 * uninitialized extent to its neighbor. This is much cheaper
3190 * than an insertion followed by a merge as those involve costly 3251 * than an insertion followed by a merge as those involve costly
3191 * memmove() calls. This is the common case in steady state for 3252 * memmove() calls. Transferring to the left is the common case in
3192 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append 3253 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3193 * writes. 3254 * followed by append writes.
3194 * 3255 *
3195 * Limitations of the current logic: 3256 * Limitations of the current logic:
3196 * - L1: we only deal with writes at the start of the extent. 3257 * - L1: we do not deal with writes covering the whole extent.
3197 * The approach could be extended to writes at the end
3198 * of the extent but this scenario was deemed less common.
3199 * - L2: we do not deal with writes covering the whole extent.
3200 * This would require removing the extent if the transfer 3258 * This would require removing the extent if the transfer
3201 * is possible. 3259 * is possible.
3202 * - L3: we only attempt to merge with an extent stored in the 3260 * - L2: we only attempt to merge with an extent stored in the
3203 * same extent tree node. 3261 * same extent tree node.
3204 */ 3262 */
3205 if ((map->m_lblk == ee_block) && /*L1*/ 3263 if ((map->m_lblk == ee_block) &&
3206 (map->m_len < ee_len) && /*L2*/ 3264 /* See if we can merge left */
3207 (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ 3265 (map_len < ee_len) && /*L1*/
3208 struct ext4_extent *prev_ex; 3266 (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
3209 ext4_lblk_t prev_lblk; 3267 ext4_lblk_t prev_lblk;
3210 ext4_fsblk_t prev_pblk, ee_pblk; 3268 ext4_fsblk_t prev_pblk, ee_pblk;
3211 unsigned int prev_len, write_len; 3269 unsigned int prev_len;
3212 3270
3213 prev_ex = ex - 1; 3271 abut_ex = ex - 1;
3214 prev_lblk = le32_to_cpu(prev_ex->ee_block); 3272 prev_lblk = le32_to_cpu(abut_ex->ee_block);
3215 prev_len = ext4_ext_get_actual_len(prev_ex); 3273 prev_len = ext4_ext_get_actual_len(abut_ex);
3216 prev_pblk = ext4_ext_pblock(prev_ex); 3274 prev_pblk = ext4_ext_pblock(abut_ex);
3217 ee_pblk = ext4_ext_pblock(ex); 3275 ee_pblk = ext4_ext_pblock(ex);
3218 write_len = map->m_len;
3219 3276
3220 /* 3277 /*
3221 * A transfer of blocks from 'ex' to 'prev_ex' is allowed 3278 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3222 * upon those conditions: 3279 * upon those conditions:
3223 * - C1: prev_ex is initialized, 3280 * - C1: abut_ex is initialized,
3224 * - C2: prev_ex is logically abutting ex, 3281 * - C2: abut_ex is logically abutting ex,
3225 * - C3: prev_ex is physically abutting ex, 3282 * - C3: abut_ex is physically abutting ex,
3226 * - C4: prev_ex can receive the additional blocks without 3283 * - C4: abut_ex can receive the additional blocks without
3227 * overflowing the (initialized) length limit. 3284 * overflowing the (initialized) length limit.
3228 */ 3285 */
3229 if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ 3286 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
3230 ((prev_lblk + prev_len) == ee_block) && /*C2*/ 3287 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3231 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ 3288 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3232 (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ 3289 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3233 err = ext4_ext_get_access(handle, inode, path + depth); 3290 err = ext4_ext_get_access(handle, inode, path + depth);
3234 if (err) 3291 if (err)
3235 goto out; 3292 goto out;
3236 3293
3237 trace_ext4_ext_convert_to_initialized_fastpath(inode, 3294 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3238 map, ex, prev_ex); 3295 map, ex, abut_ex);
3239 3296
3240 /* Shift the start of ex by 'write_len' blocks */ 3297 /* Shift the start of ex by 'map_len' blocks */
3241 ex->ee_block = cpu_to_le32(ee_block + write_len); 3298 ex->ee_block = cpu_to_le32(ee_block + map_len);
3242 ext4_ext_store_pblock(ex, ee_pblk + write_len); 3299 ext4_ext_store_pblock(ex, ee_pblk + map_len);
3243 ex->ee_len = cpu_to_le16(ee_len - write_len); 3300 ex->ee_len = cpu_to_le16(ee_len - map_len);
3244 ext4_ext_mark_uninitialized(ex); /* Restore the flag */ 3301 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3245 3302
3246 /* Extend prev_ex by 'write_len' blocks */ 3303 /* Extend abut_ex by 'map_len' blocks */
3247 prev_ex->ee_len = cpu_to_le16(prev_len + write_len); 3304 abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3248 3305
3249 /* Mark the block containing both extents as dirty */ 3306 /* Result: number of initialized blocks past m_lblk */
3250 ext4_ext_dirty(handle, inode, path + depth); 3307 allocated = map_len;
3308 }
3309 } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3310 (map_len < ee_len) && /*L1*/
3311 ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3312 /* See if we can merge right */
3313 ext4_lblk_t next_lblk;
3314 ext4_fsblk_t next_pblk, ee_pblk;
3315 unsigned int next_len;
3316
3317 abut_ex = ex + 1;
3318 next_lblk = le32_to_cpu(abut_ex->ee_block);
3319 next_len = ext4_ext_get_actual_len(abut_ex);
3320 next_pblk = ext4_ext_pblock(abut_ex);
3321 ee_pblk = ext4_ext_pblock(ex);
3251 3322
3252 /* Update path to point to the right extent */ 3323 /*
3253 path[depth].p_ext = prev_ex; 3324 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3325 * upon those conditions:
3326 * - C1: abut_ex is initialized,
3327 * - C2: abut_ex is logically abutting ex,
3328 * - C3: abut_ex is physically abutting ex,
3329 * - C4: abut_ex can receive the additional blocks without
3330 * overflowing the (initialized) length limit.
3331 */
3332 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
3333 ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3334 ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3335 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3336 err = ext4_ext_get_access(handle, inode, path + depth);
3337 if (err)
3338 goto out;
3339
3340 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3341 map, ex, abut_ex);
3342
3343 /* Shift the start of abut_ex by 'map_len' blocks */
3344 abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3345 ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3346 ex->ee_len = cpu_to_le16(ee_len - map_len);
3347 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3348
3349 /* Extend abut_ex by 'map_len' blocks */
3350 abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3254 3351
3255 /* Result: number of initialized blocks past m_lblk */ 3352 /* Result: number of initialized blocks past m_lblk */
3256 allocated = write_len; 3353 allocated = map_len;
3257 goto out;
3258 } 3354 }
3259 } 3355 }
3356 if (allocated) {
3357 /* Mark the block containing both extents as dirty */
3358 ext4_ext_dirty(handle, inode, path + depth);
3359
3360 /* Update path to point to the right extent */
3361 path[depth].p_ext = abut_ex;
3362 goto out;
3363 } else
3364 allocated = ee_len - (map->m_lblk - ee_block);
3260 3365
3261 WARN_ON(map->m_lblk < ee_block); 3366 WARN_ON(map->m_lblk < ee_block);
3262 /* 3367 /*
@@ -3330,7 +3435,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3330 } 3435 }
3331 3436
3332 allocated = ext4_split_extent(handle, inode, path, 3437 allocated = ext4_split_extent(handle, inode, path,
3333 &split_map, split_flag, 0); 3438 &split_map, split_flag, flags);
3334 if (allocated < 0) 3439 if (allocated < 0)
3335 err = allocated; 3440 err = allocated;
3336 3441
@@ -3650,6 +3755,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3650 flags, allocated); 3755 flags, allocated);
3651 ext4_ext_show_leaf(inode, path); 3756 ext4_ext_show_leaf(inode, path);
3652 3757
3758 /*
3759 * When writing into uninitialized space, we should not fail to
3760 * allocate metadata blocks for the new extent block if needed.
3761 */
3762 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
3763
3653 trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, 3764 trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
3654 allocated, newblock); 3765 allocated, newblock);
3655 3766
@@ -3713,7 +3824,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3713 } 3824 }
3714 3825
3715 /* buffered write, writepage time, convert*/ 3826 /* buffered write, writepage time, convert*/
3716 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3827 ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
3717 if (ret >= 0) 3828 if (ret >= 0)
3718 ext4_update_inode_fsync_trans(handle, inode, 1); 3829 ext4_update_inode_fsync_trans(handle, inode, 1);
3719out: 3830out:
@@ -4257,48 +4368,13 @@ out3:
4257 return err ? err : allocated; 4368 return err ? err : allocated;
4258} 4369}
4259 4370
4260void ext4_ext_truncate(struct inode *inode) 4371void ext4_ext_truncate(handle_t *handle, struct inode *inode)
4261{ 4372{
4262 struct address_space *mapping = inode->i_mapping;
4263 struct super_block *sb = inode->i_sb; 4373 struct super_block *sb = inode->i_sb;
4264 ext4_lblk_t last_block; 4374 ext4_lblk_t last_block;
4265 handle_t *handle;
4266 loff_t page_len;
4267 int err = 0; 4375 int err = 0;
4268 4376
4269 /* 4377 /*
4270 * finish any pending end_io work so we won't run the risk of
4271 * converting any truncated blocks to initialized later
4272 */
4273 ext4_flush_unwritten_io(inode);
4274
4275 /*
4276 * probably first extent we're gonna free will be last in block
4277 */
4278 err = ext4_writepage_trans_blocks(inode);
4279 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, err);
4280 if (IS_ERR(handle))
4281 return;
4282
4283 if (inode->i_size % PAGE_CACHE_SIZE != 0) {
4284 page_len = PAGE_CACHE_SIZE -
4285 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4286
4287 err = ext4_discard_partial_page_buffers(handle,
4288 mapping, inode->i_size, page_len, 0);
4289
4290 if (err)
4291 goto out_stop;
4292 }
4293
4294 if (ext4_orphan_add(handle, inode))
4295 goto out_stop;
4296
4297 down_write(&EXT4_I(inode)->i_data_sem);
4298
4299 ext4_discard_preallocations(inode);
4300
4301 /*
4302 * TODO: optimization is possible here. 4378 * TODO: optimization is possible here.
4303 * Probably we need not scan at all, 4379 * Probably we need not scan at all,
4304 * because page truncation is enough. 4380 * because page truncation is enough.
@@ -4313,29 +4389,6 @@ void ext4_ext_truncate(struct inode *inode)
4313 err = ext4_es_remove_extent(inode, last_block, 4389 err = ext4_es_remove_extent(inode, last_block,
4314 EXT_MAX_BLOCKS - last_block); 4390 EXT_MAX_BLOCKS - last_block);
4315 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); 4391 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4316
4317 /* In a multi-transaction truncate, we only make the final
4318 * transaction synchronous.
4319 */
4320 if (IS_SYNC(inode))
4321 ext4_handle_sync(handle);
4322
4323 up_write(&EXT4_I(inode)->i_data_sem);
4324
4325out_stop:
4326 /*
4327 * If this was a simple ftruncate() and the file will remain alive,
4328 * then we need to clear up the orphan record which we created above.
4329 * However, if this was a real unlink then we were called by
4330 * ext4_delete_inode(), and we allow that function to clean up the
4331 * orphan info for us.
4332 */
4333 if (inode->i_nlink)
4334 ext4_orphan_del(handle, inode);
4335
4336 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4337 ext4_mark_inode_dirty(handle, inode);
4338 ext4_journal_stop(handle);
4339} 4392}
4340 4393
4341static void ext4_falloc_update_inode(struct inode *inode, 4394static void ext4_falloc_update_inode(struct inode *inode,
@@ -4623,187 +4676,6 @@ static int ext4_xattr_fiemap(struct inode *inode,
4623 return (error < 0 ? error : 0); 4676 return (error < 0 ? error : 0);
4624} 4677}
4625 4678
4626/*
4627 * ext4_ext_punch_hole
4628 *
4629 * Punches a hole of "length" bytes in a file starting
4630 * at byte "offset"
4631 *
4632 * @inode: The inode of the file to punch a hole in
4633 * @offset: The starting byte offset of the hole
4634 * @length: The length of the hole
4635 *
4636 * Returns the number of blocks removed or negative on err
4637 */
4638int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4639{
4640 struct inode *inode = file_inode(file);
4641 struct super_block *sb = inode->i_sb;
4642 ext4_lblk_t first_block, stop_block;
4643 struct address_space *mapping = inode->i_mapping;
4644 handle_t *handle;
4645 loff_t first_page, last_page, page_len;
4646 loff_t first_page_offset, last_page_offset;
4647 int credits, err = 0;
4648
4649 /*
4650 * Write out all dirty pages to avoid race conditions
4651 * Then release them.
4652 */
4653 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4654 err = filemap_write_and_wait_range(mapping,
4655 offset, offset + length - 1);
4656
4657 if (err)
4658 return err;
4659 }
4660
4661 mutex_lock(&inode->i_mutex);
4662 /* It's not possible punch hole on append only file */
4663 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
4664 err = -EPERM;
4665 goto out_mutex;
4666 }
4667 if (IS_SWAPFILE(inode)) {
4668 err = -ETXTBSY;
4669 goto out_mutex;
4670 }
4671
4672 /* No need to punch hole beyond i_size */
4673 if (offset >= inode->i_size)
4674 goto out_mutex;
4675
4676 /*
4677 * If the hole extends beyond i_size, set the hole
4678 * to end after the page that contains i_size
4679 */
4680 if (offset + length > inode->i_size) {
4681 length = inode->i_size +
4682 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
4683 offset;
4684 }
4685
4686 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4687 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4688
4689 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4690 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4691
4692 /* Now release the pages */
4693 if (last_page_offset > first_page_offset) {
4694 truncate_pagecache_range(inode, first_page_offset,
4695 last_page_offset - 1);
4696 }
4697
4698 /* Wait all existing dio workers, newcomers will block on i_mutex */
4699 ext4_inode_block_unlocked_dio(inode);
4700 err = ext4_flush_unwritten_io(inode);
4701 if (err)
4702 goto out_dio;
4703 inode_dio_wait(inode);
4704
4705 credits = ext4_writepage_trans_blocks(inode);
4706 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
4707 if (IS_ERR(handle)) {
4708 err = PTR_ERR(handle);
4709 goto out_dio;
4710 }
4711
4712
4713 /*
4714 * Now we need to zero out the non-page-aligned data in the
4715 * pages at the start and tail of the hole, and unmap the buffer
4716 * heads for the block aligned regions of the page that were
4717 * completely zeroed.
4718 */
4719 if (first_page > last_page) {
4720 /*
4721 * If the file space being truncated is contained within a page
4722 * just zero out and unmap the middle of that page
4723 */
4724 err = ext4_discard_partial_page_buffers(handle,
4725 mapping, offset, length, 0);
4726
4727 if (err)
4728 goto out;
4729 } else {
4730 /*
4731 * zero out and unmap the partial page that contains
4732 * the start of the hole
4733 */
4734 page_len = first_page_offset - offset;
4735 if (page_len > 0) {
4736 err = ext4_discard_partial_page_buffers(handle, mapping,
4737 offset, page_len, 0);
4738 if (err)
4739 goto out;
4740 }
4741
4742 /*
4743 * zero out and unmap the partial page that contains
4744 * the end of the hole
4745 */
4746 page_len = offset + length - last_page_offset;
4747 if (page_len > 0) {
4748 err = ext4_discard_partial_page_buffers(handle, mapping,
4749 last_page_offset, page_len, 0);
4750 if (err)
4751 goto out;
4752 }
4753 }
4754
4755 /*
4756 * If i_size is contained in the last page, we need to
4757 * unmap and zero the partial page after i_size
4758 */
4759 if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
4760 inode->i_size % PAGE_CACHE_SIZE != 0) {
4761
4762 page_len = PAGE_CACHE_SIZE -
4763 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4764
4765 if (page_len > 0) {
4766 err = ext4_discard_partial_page_buffers(handle,
4767 mapping, inode->i_size, page_len, 0);
4768
4769 if (err)
4770 goto out;
4771 }
4772 }
4773
4774 first_block = (offset + sb->s_blocksize - 1) >>
4775 EXT4_BLOCK_SIZE_BITS(sb);
4776 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4777
4778 /* If there are no blocks to remove, return now */
4779 if (first_block >= stop_block)
4780 goto out;
4781
4782 down_write(&EXT4_I(inode)->i_data_sem);
4783 ext4_discard_preallocations(inode);
4784
4785 err = ext4_es_remove_extent(inode, first_block,
4786 stop_block - first_block);
4787 err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
4788
4789 ext4_discard_preallocations(inode);
4790
4791 if (IS_SYNC(inode))
4792 ext4_handle_sync(handle);
4793
4794 up_write(&EXT4_I(inode)->i_data_sem);
4795
4796out:
4797 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4798 ext4_mark_inode_dirty(handle, inode);
4799 ext4_journal_stop(handle);
4800out_dio:
4801 ext4_inode_resume_unlocked_dio(inode);
4802out_mutex:
4803 mutex_unlock(&inode->i_mutex);
4804 return err;
4805}
4806
4807int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4679int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4808 __u64 start, __u64 len) 4680 __u64 start, __u64 len)
4809{ 4681{