diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 522 |
1 files changed, 197 insertions, 325 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9c6d06dcef8b..107936db244e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -157,11 +157,8 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, | |||
157 | * - ENOMEM | 157 | * - ENOMEM |
158 | * - EIO | 158 | * - EIO |
159 | */ | 159 | */ |
160 | #define ext4_ext_dirty(handle, inode, path) \ | 160 | int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, |
161 | __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) | 161 | struct inode *inode, struct ext4_ext_path *path) |
162 | static int __ext4_ext_dirty(const char *where, unsigned int line, | ||
163 | handle_t *handle, struct inode *inode, | ||
164 | struct ext4_ext_path *path) | ||
165 | { | 162 | { |
166 | int err; | 163 | int err; |
167 | if (path->p_bh) { | 164 | if (path->p_bh) { |
@@ -1813,39 +1810,101 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1813 | } | 1810 | } |
1814 | depth = ext_depth(inode); | 1811 | depth = ext_depth(inode); |
1815 | ex = path[depth].p_ext; | 1812 | ex = path[depth].p_ext; |
1813 | eh = path[depth].p_hdr; | ||
1816 | if (unlikely(path[depth].p_hdr == NULL)) { | 1814 | if (unlikely(path[depth].p_hdr == NULL)) { |
1817 | EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); | 1815 | EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); |
1818 | return -EIO; | 1816 | return -EIO; |
1819 | } | 1817 | } |
1820 | 1818 | ||
1821 | /* try to insert block into found extent and return */ | 1819 | /* try to insert block into found extent and return */ |
1822 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) | 1820 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) { |
1823 | && ext4_can_extents_be_merged(inode, ex, newext)) { | ||
1824 | ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n", | ||
1825 | ext4_ext_is_uninitialized(newext), | ||
1826 | ext4_ext_get_actual_len(newext), | ||
1827 | le32_to_cpu(ex->ee_block), | ||
1828 | ext4_ext_is_uninitialized(ex), | ||
1829 | ext4_ext_get_actual_len(ex), | ||
1830 | ext4_ext_pblock(ex)); | ||
1831 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
1832 | if (err) | ||
1833 | return err; | ||
1834 | 1821 | ||
1835 | /* | 1822 | /* |
1836 | * ext4_can_extents_be_merged should have checked that either | 1823 | * Try to see whether we should rather test the extent on |
1837 | * both extents are uninitialized, or both aren't. Thus we | 1824 | * right from ex, or from the left of ex. This is because |
1838 | * need to check only one of them here. | 1825 | * ext4_ext_find_extent() can return either extent on the |
1826 | * left, or on the right from the searched position. This | ||
1827 | * will make merging more effective. | ||
1839 | */ | 1828 | */ |
1840 | if (ext4_ext_is_uninitialized(ex)) | 1829 | if (ex < EXT_LAST_EXTENT(eh) && |
1841 | uninitialized = 1; | 1830 | (le32_to_cpu(ex->ee_block) + |
1842 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1831 | ext4_ext_get_actual_len(ex) < |
1832 | le32_to_cpu(newext->ee_block))) { | ||
1833 | ex += 1; | ||
1834 | goto prepend; | ||
1835 | } else if ((ex > EXT_FIRST_EXTENT(eh)) && | ||
1836 | (le32_to_cpu(newext->ee_block) + | ||
1837 | ext4_ext_get_actual_len(newext) < | ||
1838 | le32_to_cpu(ex->ee_block))) | ||
1839 | ex -= 1; | ||
1840 | |||
1841 | /* Try to append newex to the ex */ | ||
1842 | if (ext4_can_extents_be_merged(inode, ex, newext)) { | ||
1843 | ext_debug("append [%d]%d block to %u:[%d]%d" | ||
1844 | "(from %llu)\n", | ||
1845 | ext4_ext_is_uninitialized(newext), | ||
1846 | ext4_ext_get_actual_len(newext), | ||
1847 | le32_to_cpu(ex->ee_block), | ||
1848 | ext4_ext_is_uninitialized(ex), | ||
1849 | ext4_ext_get_actual_len(ex), | ||
1850 | ext4_ext_pblock(ex)); | ||
1851 | err = ext4_ext_get_access(handle, inode, | ||
1852 | path + depth); | ||
1853 | if (err) | ||
1854 | return err; | ||
1855 | |||
1856 | /* | ||
1857 | * ext4_can_extents_be_merged should have checked | ||
1858 | * that either both extents are uninitialized, or | ||
1859 | * both aren't. Thus we need to check only one of | ||
1860 | * them here. | ||
1861 | */ | ||
1862 | if (ext4_ext_is_uninitialized(ex)) | ||
1863 | uninitialized = 1; | ||
1864 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | ||
1843 | + ext4_ext_get_actual_len(newext)); | 1865 | + ext4_ext_get_actual_len(newext)); |
1844 | if (uninitialized) | 1866 | if (uninitialized) |
1845 | ext4_ext_mark_uninitialized(ex); | 1867 | ext4_ext_mark_uninitialized(ex); |
1846 | eh = path[depth].p_hdr; | 1868 | eh = path[depth].p_hdr; |
1847 | nearex = ex; | 1869 | nearex = ex; |
1848 | goto merge; | 1870 | goto merge; |
1871 | } | ||
1872 | |||
1873 | prepend: | ||
1874 | /* Try to prepend newex to the ex */ | ||
1875 | if (ext4_can_extents_be_merged(inode, newext, ex)) { | ||
1876 | ext_debug("prepend %u[%d]%d block to %u:[%d]%d" | ||
1877 | "(from %llu)\n", | ||
1878 | le32_to_cpu(newext->ee_block), | ||
1879 | ext4_ext_is_uninitialized(newext), | ||
1880 | ext4_ext_get_actual_len(newext), | ||
1881 | le32_to_cpu(ex->ee_block), | ||
1882 | ext4_ext_is_uninitialized(ex), | ||
1883 | ext4_ext_get_actual_len(ex), | ||
1884 | ext4_ext_pblock(ex)); | ||
1885 | err = ext4_ext_get_access(handle, inode, | ||
1886 | path + depth); | ||
1887 | if (err) | ||
1888 | return err; | ||
1889 | |||
1890 | /* | ||
1891 | * ext4_can_extents_be_merged should have checked | ||
1892 | * that either both extents are uninitialized, or | ||
1893 | * both aren't. Thus we need to check only one of | ||
1894 | * them here. | ||
1895 | */ | ||
1896 | if (ext4_ext_is_uninitialized(ex)) | ||
1897 | uninitialized = 1; | ||
1898 | ex->ee_block = newext->ee_block; | ||
1899 | ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); | ||
1900 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | ||
1901 | + ext4_ext_get_actual_len(newext)); | ||
1902 | if (uninitialized) | ||
1903 | ext4_ext_mark_uninitialized(ex); | ||
1904 | eh = path[depth].p_hdr; | ||
1905 | nearex = ex; | ||
1906 | goto merge; | ||
1907 | } | ||
1849 | } | 1908 | } |
1850 | 1909 | ||
1851 | depth = ext_depth(inode); | 1910 | depth = ext_depth(inode); |
@@ -1880,8 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1880 | * There is no free space in the found leaf. | 1939 | * There is no free space in the found leaf. |
1881 | * We're gonna add a new leaf in the tree. | 1940 | * We're gonna add a new leaf in the tree. |
1882 | */ | 1941 | */ |
1883 | if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) | 1942 | if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL) |
1884 | flags = EXT4_MB_USE_ROOT_BLOCKS; | 1943 | flags = EXT4_MB_USE_RESERVED; |
1885 | err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); | 1944 | err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); |
1886 | if (err) | 1945 | if (err) |
1887 | goto cleanup; | 1946 | goto cleanup; |
@@ -2599,8 +2658,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2599 | return 1; | 2658 | return 1; |
2600 | } | 2659 | } |
2601 | 2660 | ||
2602 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | 2661 | int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2603 | ext4_lblk_t end) | 2662 | ext4_lblk_t end) |
2604 | { | 2663 | { |
2605 | struct super_block *sb = inode->i_sb; | 2664 | struct super_block *sb = inode->i_sb; |
2606 | int depth = ext_depth(inode); | 2665 | int depth = ext_depth(inode); |
@@ -2667,12 +2726,14 @@ again: | |||
2667 | 2726 | ||
2668 | /* | 2727 | /* |
2669 | * Split the extent in two so that 'end' is the last | 2728 | * Split the extent in two so that 'end' is the last |
2670 | * block in the first new extent | 2729 | * block in the first new extent. Also we should not |
2730 | * fail removing space due to ENOSPC so try to use | ||
2731 | * reserved block if that happens. | ||
2671 | */ | 2732 | */ |
2672 | err = ext4_split_extent_at(handle, inode, path, | 2733 | err = ext4_split_extent_at(handle, inode, path, |
2673 | end + 1, split_flag, | 2734 | end + 1, split_flag, |
2674 | EXT4_GET_BLOCKS_PRE_IO | | 2735 | EXT4_GET_BLOCKS_PRE_IO | |
2675 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT); | 2736 | EXT4_GET_BLOCKS_METADATA_NOFAIL); |
2676 | 2737 | ||
2677 | if (err < 0) | 2738 | if (err < 0) |
2678 | goto out; | 2739 | goto out; |
@@ -3147,35 +3208,35 @@ out: | |||
3147 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 3208 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
3148 | struct inode *inode, | 3209 | struct inode *inode, |
3149 | struct ext4_map_blocks *map, | 3210 | struct ext4_map_blocks *map, |
3150 | struct ext4_ext_path *path) | 3211 | struct ext4_ext_path *path, |
3212 | int flags) | ||
3151 | { | 3213 | { |
3152 | struct ext4_sb_info *sbi; | 3214 | struct ext4_sb_info *sbi; |
3153 | struct ext4_extent_header *eh; | 3215 | struct ext4_extent_header *eh; |
3154 | struct ext4_map_blocks split_map; | 3216 | struct ext4_map_blocks split_map; |
3155 | struct ext4_extent zero_ex; | 3217 | struct ext4_extent zero_ex; |
3156 | struct ext4_extent *ex; | 3218 | struct ext4_extent *ex, *abut_ex; |
3157 | ext4_lblk_t ee_block, eof_block; | 3219 | ext4_lblk_t ee_block, eof_block; |
3158 | unsigned int ee_len, depth; | 3220 | unsigned int ee_len, depth, map_len = map->m_len; |
3159 | int allocated, max_zeroout = 0; | 3221 | int allocated = 0, max_zeroout = 0; |
3160 | int err = 0; | 3222 | int err = 0; |
3161 | int split_flag = 0; | 3223 | int split_flag = 0; |
3162 | 3224 | ||
3163 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" | 3225 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" |
3164 | "block %llu, max_blocks %u\n", inode->i_ino, | 3226 | "block %llu, max_blocks %u\n", inode->i_ino, |
3165 | (unsigned long long)map->m_lblk, map->m_len); | 3227 | (unsigned long long)map->m_lblk, map_len); |
3166 | 3228 | ||
3167 | sbi = EXT4_SB(inode->i_sb); | 3229 | sbi = EXT4_SB(inode->i_sb); |
3168 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3230 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3169 | inode->i_sb->s_blocksize_bits; | 3231 | inode->i_sb->s_blocksize_bits; |
3170 | if (eof_block < map->m_lblk + map->m_len) | 3232 | if (eof_block < map->m_lblk + map_len) |
3171 | eof_block = map->m_lblk + map->m_len; | 3233 | eof_block = map->m_lblk + map_len; |
3172 | 3234 | ||
3173 | depth = ext_depth(inode); | 3235 | depth = ext_depth(inode); |
3174 | eh = path[depth].p_hdr; | 3236 | eh = path[depth].p_hdr; |
3175 | ex = path[depth].p_ext; | 3237 | ex = path[depth].p_ext; |
3176 | ee_block = le32_to_cpu(ex->ee_block); | 3238 | ee_block = le32_to_cpu(ex->ee_block); |
3177 | ee_len = ext4_ext_get_actual_len(ex); | 3239 | ee_len = ext4_ext_get_actual_len(ex); |
3178 | allocated = ee_len - (map->m_lblk - ee_block); | ||
3179 | zero_ex.ee_len = 0; | 3240 | zero_ex.ee_len = 0; |
3180 | 3241 | ||
3181 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); | 3242 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); |
@@ -3186,77 +3247,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3186 | 3247 | ||
3187 | /* | 3248 | /* |
3188 | * Attempt to transfer newly initialized blocks from the currently | 3249 | * Attempt to transfer newly initialized blocks from the currently |
3189 | * uninitialized extent to its left neighbor. This is much cheaper | 3250 | * uninitialized extent to its neighbor. This is much cheaper |
3190 | * than an insertion followed by a merge as those involve costly | 3251 | * than an insertion followed by a merge as those involve costly |
3191 | * memmove() calls. This is the common case in steady state for | 3252 | * memmove() calls. Transferring to the left is the common case in |
3192 | * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append | 3253 | * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE) |
3193 | * writes. | 3254 | * followed by append writes. |
3194 | * | 3255 | * |
3195 | * Limitations of the current logic: | 3256 | * Limitations of the current logic: |
3196 | * - L1: we only deal with writes at the start of the extent. | 3257 | * - L1: we do not deal with writes covering the whole extent. |
3197 | * The approach could be extended to writes at the end | ||
3198 | * of the extent but this scenario was deemed less common. | ||
3199 | * - L2: we do not deal with writes covering the whole extent. | ||
3200 | * This would require removing the extent if the transfer | 3258 | * This would require removing the extent if the transfer |
3201 | * is possible. | 3259 | * is possible. |
3202 | * - L3: we only attempt to merge with an extent stored in the | 3260 | * - L2: we only attempt to merge with an extent stored in the |
3203 | * same extent tree node. | 3261 | * same extent tree node. |
3204 | */ | 3262 | */ |
3205 | if ((map->m_lblk == ee_block) && /*L1*/ | 3263 | if ((map->m_lblk == ee_block) && |
3206 | (map->m_len < ee_len) && /*L2*/ | 3264 | /* See if we can merge left */ |
3207 | (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ | 3265 | (map_len < ee_len) && /*L1*/ |
3208 | struct ext4_extent *prev_ex; | 3266 | (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/ |
3209 | ext4_lblk_t prev_lblk; | 3267 | ext4_lblk_t prev_lblk; |
3210 | ext4_fsblk_t prev_pblk, ee_pblk; | 3268 | ext4_fsblk_t prev_pblk, ee_pblk; |
3211 | unsigned int prev_len, write_len; | 3269 | unsigned int prev_len; |
3212 | 3270 | ||
3213 | prev_ex = ex - 1; | 3271 | abut_ex = ex - 1; |
3214 | prev_lblk = le32_to_cpu(prev_ex->ee_block); | 3272 | prev_lblk = le32_to_cpu(abut_ex->ee_block); |
3215 | prev_len = ext4_ext_get_actual_len(prev_ex); | 3273 | prev_len = ext4_ext_get_actual_len(abut_ex); |
3216 | prev_pblk = ext4_ext_pblock(prev_ex); | 3274 | prev_pblk = ext4_ext_pblock(abut_ex); |
3217 | ee_pblk = ext4_ext_pblock(ex); | 3275 | ee_pblk = ext4_ext_pblock(ex); |
3218 | write_len = map->m_len; | ||
3219 | 3276 | ||
3220 | /* | 3277 | /* |
3221 | * A transfer of blocks from 'ex' to 'prev_ex' is allowed | 3278 | * A transfer of blocks from 'ex' to 'abut_ex' is allowed |
3222 | * upon those conditions: | 3279 | * upon those conditions: |
3223 | * - C1: prev_ex is initialized, | 3280 | * - C1: abut_ex is initialized, |
3224 | * - C2: prev_ex is logically abutting ex, | 3281 | * - C2: abut_ex is logically abutting ex, |
3225 | * - C3: prev_ex is physically abutting ex, | 3282 | * - C3: abut_ex is physically abutting ex, |
3226 | * - C4: prev_ex can receive the additional blocks without | 3283 | * - C4: abut_ex can receive the additional blocks without |
3227 | * overflowing the (initialized) length limit. | 3284 | * overflowing the (initialized) length limit. |
3228 | */ | 3285 | */ |
3229 | if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ | 3286 | if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ |
3230 | ((prev_lblk + prev_len) == ee_block) && /*C2*/ | 3287 | ((prev_lblk + prev_len) == ee_block) && /*C2*/ |
3231 | ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ | 3288 | ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ |
3232 | (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ | 3289 | (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ |
3233 | err = ext4_ext_get_access(handle, inode, path + depth); | 3290 | err = ext4_ext_get_access(handle, inode, path + depth); |
3234 | if (err) | 3291 | if (err) |
3235 | goto out; | 3292 | goto out; |
3236 | 3293 | ||
3237 | trace_ext4_ext_convert_to_initialized_fastpath(inode, | 3294 | trace_ext4_ext_convert_to_initialized_fastpath(inode, |
3238 | map, ex, prev_ex); | 3295 | map, ex, abut_ex); |
3239 | 3296 | ||
3240 | /* Shift the start of ex by 'write_len' blocks */ | 3297 | /* Shift the start of ex by 'map_len' blocks */ |
3241 | ex->ee_block = cpu_to_le32(ee_block + write_len); | 3298 | ex->ee_block = cpu_to_le32(ee_block + map_len); |
3242 | ext4_ext_store_pblock(ex, ee_pblk + write_len); | 3299 | ext4_ext_store_pblock(ex, ee_pblk + map_len); |
3243 | ex->ee_len = cpu_to_le16(ee_len - write_len); | 3300 | ex->ee_len = cpu_to_le16(ee_len - map_len); |
3244 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ | 3301 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ |
3245 | 3302 | ||
3246 | /* Extend prev_ex by 'write_len' blocks */ | 3303 | /* Extend abut_ex by 'map_len' blocks */ |
3247 | prev_ex->ee_len = cpu_to_le16(prev_len + write_len); | 3304 | abut_ex->ee_len = cpu_to_le16(prev_len + map_len); |
3248 | 3305 | ||
3249 | /* Mark the block containing both extents as dirty */ | 3306 | /* Result: number of initialized blocks past m_lblk */ |
3250 | ext4_ext_dirty(handle, inode, path + depth); | 3307 | allocated = map_len; |
3308 | } | ||
3309 | } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) && | ||
3310 | (map_len < ee_len) && /*L1*/ | ||
3311 | ex < EXT_LAST_EXTENT(eh)) { /*L2*/ | ||
3312 | /* See if we can merge right */ | ||
3313 | ext4_lblk_t next_lblk; | ||
3314 | ext4_fsblk_t next_pblk, ee_pblk; | ||
3315 | unsigned int next_len; | ||
3316 | |||
3317 | abut_ex = ex + 1; | ||
3318 | next_lblk = le32_to_cpu(abut_ex->ee_block); | ||
3319 | next_len = ext4_ext_get_actual_len(abut_ex); | ||
3320 | next_pblk = ext4_ext_pblock(abut_ex); | ||
3321 | ee_pblk = ext4_ext_pblock(ex); | ||
3251 | 3322 | ||
3252 | /* Update path to point to the right extent */ | 3323 | /* |
3253 | path[depth].p_ext = prev_ex; | 3324 | * A transfer of blocks from 'ex' to 'abut_ex' is allowed |
3325 | * upon those conditions: | ||
3326 | * - C1: abut_ex is initialized, | ||
3327 | * - C2: abut_ex is logically abutting ex, | ||
3328 | * - C3: abut_ex is physically abutting ex, | ||
3329 | * - C4: abut_ex can receive the additional blocks without | ||
3330 | * overflowing the (initialized) length limit. | ||
3331 | */ | ||
3332 | if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ | ||
3333 | ((map->m_lblk + map_len) == next_lblk) && /*C2*/ | ||
3334 | ((ee_pblk + ee_len) == next_pblk) && /*C3*/ | ||
3335 | (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ | ||
3336 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3337 | if (err) | ||
3338 | goto out; | ||
3339 | |||
3340 | trace_ext4_ext_convert_to_initialized_fastpath(inode, | ||
3341 | map, ex, abut_ex); | ||
3342 | |||
3343 | /* Shift the start of abut_ex by 'map_len' blocks */ | ||
3344 | abut_ex->ee_block = cpu_to_le32(next_lblk - map_len); | ||
3345 | ext4_ext_store_pblock(abut_ex, next_pblk - map_len); | ||
3346 | ex->ee_len = cpu_to_le16(ee_len - map_len); | ||
3347 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ | ||
3348 | |||
3349 | /* Extend abut_ex by 'map_len' blocks */ | ||
3350 | abut_ex->ee_len = cpu_to_le16(next_len + map_len); | ||
3254 | 3351 | ||
3255 | /* Result: number of initialized blocks past m_lblk */ | 3352 | /* Result: number of initialized blocks past m_lblk */ |
3256 | allocated = write_len; | 3353 | allocated = map_len; |
3257 | goto out; | ||
3258 | } | 3354 | } |
3259 | } | 3355 | } |
3356 | if (allocated) { | ||
3357 | /* Mark the block containing both extents as dirty */ | ||
3358 | ext4_ext_dirty(handle, inode, path + depth); | ||
3359 | |||
3360 | /* Update path to point to the right extent */ | ||
3361 | path[depth].p_ext = abut_ex; | ||
3362 | goto out; | ||
3363 | } else | ||
3364 | allocated = ee_len - (map->m_lblk - ee_block); | ||
3260 | 3365 | ||
3261 | WARN_ON(map->m_lblk < ee_block); | 3366 | WARN_ON(map->m_lblk < ee_block); |
3262 | /* | 3367 | /* |
@@ -3330,7 +3435,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3330 | } | 3435 | } |
3331 | 3436 | ||
3332 | allocated = ext4_split_extent(handle, inode, path, | 3437 | allocated = ext4_split_extent(handle, inode, path, |
3333 | &split_map, split_flag, 0); | 3438 | &split_map, split_flag, flags); |
3334 | if (allocated < 0) | 3439 | if (allocated < 0) |
3335 | err = allocated; | 3440 | err = allocated; |
3336 | 3441 | ||
@@ -3650,6 +3755,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3650 | flags, allocated); | 3755 | flags, allocated); |
3651 | ext4_ext_show_leaf(inode, path); | 3756 | ext4_ext_show_leaf(inode, path); |
3652 | 3757 | ||
3758 | /* | ||
3759 | * When writing into uninitialized space, we should not fail to | ||
3760 | * allocate metadata blocks for the new extent block if needed. | ||
3761 | */ | ||
3762 | flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; | ||
3763 | |||
3653 | trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, | 3764 | trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, |
3654 | allocated, newblock); | 3765 | allocated, newblock); |
3655 | 3766 | ||
@@ -3713,7 +3824,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3713 | } | 3824 | } |
3714 | 3825 | ||
3715 | /* buffered write, writepage time, convert*/ | 3826 | /* buffered write, writepage time, convert*/ |
3716 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); | 3827 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags); |
3717 | if (ret >= 0) | 3828 | if (ret >= 0) |
3718 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3829 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3719 | out: | 3830 | out: |
@@ -4257,48 +4368,13 @@ out3: | |||
4257 | return err ? err : allocated; | 4368 | return err ? err : allocated; |
4258 | } | 4369 | } |
4259 | 4370 | ||
4260 | void ext4_ext_truncate(struct inode *inode) | 4371 | void ext4_ext_truncate(handle_t *handle, struct inode *inode) |
4261 | { | 4372 | { |
4262 | struct address_space *mapping = inode->i_mapping; | ||
4263 | struct super_block *sb = inode->i_sb; | 4373 | struct super_block *sb = inode->i_sb; |
4264 | ext4_lblk_t last_block; | 4374 | ext4_lblk_t last_block; |
4265 | handle_t *handle; | ||
4266 | loff_t page_len; | ||
4267 | int err = 0; | 4375 | int err = 0; |
4268 | 4376 | ||
4269 | /* | 4377 | /* |
4270 | * finish any pending end_io work so we won't run the risk of | ||
4271 | * converting any truncated blocks to initialized later | ||
4272 | */ | ||
4273 | ext4_flush_unwritten_io(inode); | ||
4274 | |||
4275 | /* | ||
4276 | * probably first extent we're gonna free will be last in block | ||
4277 | */ | ||
4278 | err = ext4_writepage_trans_blocks(inode); | ||
4279 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, err); | ||
4280 | if (IS_ERR(handle)) | ||
4281 | return; | ||
4282 | |||
4283 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
4284 | page_len = PAGE_CACHE_SIZE - | ||
4285 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
4286 | |||
4287 | err = ext4_discard_partial_page_buffers(handle, | ||
4288 | mapping, inode->i_size, page_len, 0); | ||
4289 | |||
4290 | if (err) | ||
4291 | goto out_stop; | ||
4292 | } | ||
4293 | |||
4294 | if (ext4_orphan_add(handle, inode)) | ||
4295 | goto out_stop; | ||
4296 | |||
4297 | down_write(&EXT4_I(inode)->i_data_sem); | ||
4298 | |||
4299 | ext4_discard_preallocations(inode); | ||
4300 | |||
4301 | /* | ||
4302 | * TODO: optimization is possible here. | 4378 | * TODO: optimization is possible here. |
4303 | * Probably we need not scan at all, | 4379 | * Probably we need not scan at all, |
4304 | * because page truncation is enough. | 4380 | * because page truncation is enough. |
@@ -4313,29 +4389,6 @@ void ext4_ext_truncate(struct inode *inode) | |||
4313 | err = ext4_es_remove_extent(inode, last_block, | 4389 | err = ext4_es_remove_extent(inode, last_block, |
4314 | EXT_MAX_BLOCKS - last_block); | 4390 | EXT_MAX_BLOCKS - last_block); |
4315 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); | 4391 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
4316 | |||
4317 | /* In a multi-transaction truncate, we only make the final | ||
4318 | * transaction synchronous. | ||
4319 | */ | ||
4320 | if (IS_SYNC(inode)) | ||
4321 | ext4_handle_sync(handle); | ||
4322 | |||
4323 | up_write(&EXT4_I(inode)->i_data_sem); | ||
4324 | |||
4325 | out_stop: | ||
4326 | /* | ||
4327 | * If this was a simple ftruncate() and the file will remain alive, | ||
4328 | * then we need to clear up the orphan record which we created above. | ||
4329 | * However, if this was a real unlink then we were called by | ||
4330 | * ext4_delete_inode(), and we allow that function to clean up the | ||
4331 | * orphan info for us. | ||
4332 | */ | ||
4333 | if (inode->i_nlink) | ||
4334 | ext4_orphan_del(handle, inode); | ||
4335 | |||
4336 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4337 | ext4_mark_inode_dirty(handle, inode); | ||
4338 | ext4_journal_stop(handle); | ||
4339 | } | 4392 | } |
4340 | 4393 | ||
4341 | static void ext4_falloc_update_inode(struct inode *inode, | 4394 | static void ext4_falloc_update_inode(struct inode *inode, |
@@ -4623,187 +4676,6 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4623 | return (error < 0 ? error : 0); | 4676 | return (error < 0 ? error : 0); |
4624 | } | 4677 | } |
4625 | 4678 | ||
4626 | /* | ||
4627 | * ext4_ext_punch_hole | ||
4628 | * | ||
4629 | * Punches a hole of "length" bytes in a file starting | ||
4630 | * at byte "offset" | ||
4631 | * | ||
4632 | * @inode: The inode of the file to punch a hole in | ||
4633 | * @offset: The starting byte offset of the hole | ||
4634 | * @length: The length of the hole | ||
4635 | * | ||
4636 | * Returns the number of blocks removed or negative on err | ||
4637 | */ | ||
4638 | int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
4639 | { | ||
4640 | struct inode *inode = file_inode(file); | ||
4641 | struct super_block *sb = inode->i_sb; | ||
4642 | ext4_lblk_t first_block, stop_block; | ||
4643 | struct address_space *mapping = inode->i_mapping; | ||
4644 | handle_t *handle; | ||
4645 | loff_t first_page, last_page, page_len; | ||
4646 | loff_t first_page_offset, last_page_offset; | ||
4647 | int credits, err = 0; | ||
4648 | |||
4649 | /* | ||
4650 | * Write out all dirty pages to avoid race conditions | ||
4651 | * Then release them. | ||
4652 | */ | ||
4653 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4654 | err = filemap_write_and_wait_range(mapping, | ||
4655 | offset, offset + length - 1); | ||
4656 | |||
4657 | if (err) | ||
4658 | return err; | ||
4659 | } | ||
4660 | |||
4661 | mutex_lock(&inode->i_mutex); | ||
4662 | /* It's not possible punch hole on append only file */ | ||
4663 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
4664 | err = -EPERM; | ||
4665 | goto out_mutex; | ||
4666 | } | ||
4667 | if (IS_SWAPFILE(inode)) { | ||
4668 | err = -ETXTBSY; | ||
4669 | goto out_mutex; | ||
4670 | } | ||
4671 | |||
4672 | /* No need to punch hole beyond i_size */ | ||
4673 | if (offset >= inode->i_size) | ||
4674 | goto out_mutex; | ||
4675 | |||
4676 | /* | ||
4677 | * If the hole extends beyond i_size, set the hole | ||
4678 | * to end after the page that contains i_size | ||
4679 | */ | ||
4680 | if (offset + length > inode->i_size) { | ||
4681 | length = inode->i_size + | ||
4682 | PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - | ||
4683 | offset; | ||
4684 | } | ||
4685 | |||
4686 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
4687 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | ||
4688 | |||
4689 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | ||
4690 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | ||
4691 | |||
4692 | /* Now release the pages */ | ||
4693 | if (last_page_offset > first_page_offset) { | ||
4694 | truncate_pagecache_range(inode, first_page_offset, | ||
4695 | last_page_offset - 1); | ||
4696 | } | ||
4697 | |||
4698 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
4699 | ext4_inode_block_unlocked_dio(inode); | ||
4700 | err = ext4_flush_unwritten_io(inode); | ||
4701 | if (err) | ||
4702 | goto out_dio; | ||
4703 | inode_dio_wait(inode); | ||
4704 | |||
4705 | credits = ext4_writepage_trans_blocks(inode); | ||
4706 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | ||
4707 | if (IS_ERR(handle)) { | ||
4708 | err = PTR_ERR(handle); | ||
4709 | goto out_dio; | ||
4710 | } | ||
4711 | |||
4712 | |||
4713 | /* | ||
4714 | * Now we need to zero out the non-page-aligned data in the | ||
4715 | * pages at the start and tail of the hole, and unmap the buffer | ||
4716 | * heads for the block aligned regions of the page that were | ||
4717 | * completely zeroed. | ||
4718 | */ | ||
4719 | if (first_page > last_page) { | ||
4720 | /* | ||
4721 | * If the file space being truncated is contained within a page | ||
4722 | * just zero out and unmap the middle of that page | ||
4723 | */ | ||
4724 | err = ext4_discard_partial_page_buffers(handle, | ||
4725 | mapping, offset, length, 0); | ||
4726 | |||
4727 | if (err) | ||
4728 | goto out; | ||
4729 | } else { | ||
4730 | /* | ||
4731 | * zero out and unmap the partial page that contains | ||
4732 | * the start of the hole | ||
4733 | */ | ||
4734 | page_len = first_page_offset - offset; | ||
4735 | if (page_len > 0) { | ||
4736 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
4737 | offset, page_len, 0); | ||
4738 | if (err) | ||
4739 | goto out; | ||
4740 | } | ||
4741 | |||
4742 | /* | ||
4743 | * zero out and unmap the partial page that contains | ||
4744 | * the end of the hole | ||
4745 | */ | ||
4746 | page_len = offset + length - last_page_offset; | ||
4747 | if (page_len > 0) { | ||
4748 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
4749 | last_page_offset, page_len, 0); | ||
4750 | if (err) | ||
4751 | goto out; | ||
4752 | } | ||
4753 | } | ||
4754 | |||
4755 | /* | ||
4756 | * If i_size is contained in the last page, we need to | ||
4757 | * unmap and zero the partial page after i_size | ||
4758 | */ | ||
4759 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
4760 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
4761 | |||
4762 | page_len = PAGE_CACHE_SIZE - | ||
4763 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
4764 | |||
4765 | if (page_len > 0) { | ||
4766 | err = ext4_discard_partial_page_buffers(handle, | ||
4767 | mapping, inode->i_size, page_len, 0); | ||
4768 | |||
4769 | if (err) | ||
4770 | goto out; | ||
4771 | } | ||
4772 | } | ||
4773 | |||
4774 | first_block = (offset + sb->s_blocksize - 1) >> | ||
4775 | EXT4_BLOCK_SIZE_BITS(sb); | ||
4776 | stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
4777 | |||
4778 | /* If there are no blocks to remove, return now */ | ||
4779 | if (first_block >= stop_block) | ||
4780 | goto out; | ||
4781 | |||
4782 | down_write(&EXT4_I(inode)->i_data_sem); | ||
4783 | ext4_discard_preallocations(inode); | ||
4784 | |||
4785 | err = ext4_es_remove_extent(inode, first_block, | ||
4786 | stop_block - first_block); | ||
4787 | err = ext4_ext_remove_space(inode, first_block, stop_block - 1); | ||
4788 | |||
4789 | ext4_discard_preallocations(inode); | ||
4790 | |||
4791 | if (IS_SYNC(inode)) | ||
4792 | ext4_handle_sync(handle); | ||
4793 | |||
4794 | up_write(&EXT4_I(inode)->i_data_sem); | ||
4795 | |||
4796 | out: | ||
4797 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4798 | ext4_mark_inode_dirty(handle, inode); | ||
4799 | ext4_journal_stop(handle); | ||
4800 | out_dio: | ||
4801 | ext4_inode_resume_unlocked_dio(inode); | ||
4802 | out_mutex: | ||
4803 | mutex_unlock(&inode->i_mutex); | ||
4804 | return err; | ||
4805 | } | ||
4806 | |||
4807 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4679 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
4808 | __u64 start, __u64 len) | 4680 | __u64 start, __u64 len) |
4809 | { | 4681 | { |