aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c105
1 files changed, 64 insertions, 41 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0554c48cb1fd..63a75810b7c3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -117,11 +117,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
117 struct ext4_extent *ex; 117 struct ext4_extent *ex;
118 depth = path->p_depth; 118 depth = path->p_depth;
119 119
120 /* try to predict block placement */ 120 /*
121 * Try to predict block placement assuming that we are
122 * filling in a file which will eventually be
123 * non-sparse --- i.e., in the case of libbfd writing
124 * an ELF object sections out-of-order but in a way
125 * the eventually results in a contiguous object or
126 * executable file, or some database extending a table
127 * space file. However, this is actually somewhat
128 * non-ideal if we are writing a sparse file such as
129 * qemu or KVM writing a raw image file that is going
130 * to stay fairly sparse, since it will end up
131 * fragmenting the file system's free space. Maybe we
132 * should have some hueristics or some way to allow
133 * userspace to pass a hint to file system,
134 * especiially if the latter case turns out to be
135 * common.
136 */
121 ex = path[depth].p_ext; 137 ex = path[depth].p_ext;
122 if (ex) 138 if (ex) {
123 return (ext4_ext_pblock(ex) + 139 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
124 (block - le32_to_cpu(ex->ee_block))); 140 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
141
142 if (block > ext_block)
143 return ext_pblk + (block - ext_block);
144 else
145 return ext_pblk - (ext_block - block);
146 }
125 147
126 /* it looks like index is empty; 148 /* it looks like index is empty;
127 * try to find starting block from index itself */ 149 * try to find starting block from index itself */
@@ -244,7 +266,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
244 * to allocate @blocks 266 * to allocate @blocks
245 * Worse case is one block per extent 267 * Worse case is one block per extent
246 */ 268 */
247int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) 269int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
248{ 270{
249 struct ext4_inode_info *ei = EXT4_I(inode); 271 struct ext4_inode_info *ei = EXT4_I(inode);
250 int idxs, num = 0; 272 int idxs, num = 0;
@@ -1872,12 +1894,10 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1872 cbex.ec_block = start; 1894 cbex.ec_block = start;
1873 cbex.ec_len = end - start; 1895 cbex.ec_len = end - start;
1874 cbex.ec_start = 0; 1896 cbex.ec_start = 0;
1875 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1876 } else { 1897 } else {
1877 cbex.ec_block = le32_to_cpu(ex->ee_block); 1898 cbex.ec_block = le32_to_cpu(ex->ee_block);
1878 cbex.ec_len = ext4_ext_get_actual_len(ex); 1899 cbex.ec_len = ext4_ext_get_actual_len(ex);
1879 cbex.ec_start = ext4_ext_pblock(ex); 1900 cbex.ec_start = ext4_ext_pblock(ex);
1880 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1881 } 1901 }
1882 1902
1883 if (unlikely(cbex.ec_len == 0)) { 1903 if (unlikely(cbex.ec_len == 0)) {
@@ -1917,13 +1937,12 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1917 1937
1918static void 1938static void
1919ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1939ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1920 __u32 len, ext4_fsblk_t start, int type) 1940 __u32 len, ext4_fsblk_t start)
1921{ 1941{
1922 struct ext4_ext_cache *cex; 1942 struct ext4_ext_cache *cex;
1923 BUG_ON(len == 0); 1943 BUG_ON(len == 0);
1924 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1944 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1925 cex = &EXT4_I(inode)->i_cached_extent; 1945 cex = &EXT4_I(inode)->i_cached_extent;
1926 cex->ec_type = type;
1927 cex->ec_block = block; 1946 cex->ec_block = block;
1928 cex->ec_len = len; 1947 cex->ec_len = len;
1929 cex->ec_start = start; 1948 cex->ec_start = start;
@@ -1976,15 +1995,18 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1976 } 1995 }
1977 1996
1978 ext_debug(" -> %u:%lu\n", lblock, len); 1997 ext_debug(" -> %u:%lu\n", lblock, len);
1979 ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); 1998 ext4_ext_put_in_cache(inode, lblock, len, 0);
1980} 1999}
1981 2000
2001/*
2002 * Return 0 if cache is invalid; 1 if the cache is valid
2003 */
1982static int 2004static int
1983ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, 2005ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1984 struct ext4_extent *ex) 2006 struct ext4_extent *ex)
1985{ 2007{
1986 struct ext4_ext_cache *cex; 2008 struct ext4_ext_cache *cex;
1987 int ret = EXT4_EXT_CACHE_NO; 2009 int ret = 0;
1988 2010
1989 /* 2011 /*
1990 * We borrow i_block_reservation_lock to protect i_cached_extent 2012 * We borrow i_block_reservation_lock to protect i_cached_extent
@@ -1993,11 +2015,9 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1993 cex = &EXT4_I(inode)->i_cached_extent; 2015 cex = &EXT4_I(inode)->i_cached_extent;
1994 2016
1995 /* has cache valid data? */ 2017 /* has cache valid data? */
1996 if (cex->ec_type == EXT4_EXT_CACHE_NO) 2018 if (cex->ec_len == 0)
1997 goto errout; 2019 goto errout;
1998 2020
1999 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
2000 cex->ec_type != EXT4_EXT_CACHE_EXTENT);
2001 if (in_range(block, cex->ec_block, cex->ec_len)) { 2021 if (in_range(block, cex->ec_block, cex->ec_len)) {
2002 ex->ee_block = cpu_to_le32(cex->ec_block); 2022 ex->ee_block = cpu_to_le32(cex->ec_block);
2003 ext4_ext_store_pblock(ex, cex->ec_start); 2023 ext4_ext_store_pblock(ex, cex->ec_start);
@@ -2005,7 +2025,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2005 ext_debug("%u cached by %u:%u:%llu\n", 2025 ext_debug("%u cached by %u:%u:%llu\n",
2006 block, 2026 block,
2007 cex->ec_block, cex->ec_len, cex->ec_start); 2027 cex->ec_block, cex->ec_len, cex->ec_start);
2008 ret = cex->ec_type; 2028 ret = 1;
2009 } 2029 }
2010errout: 2030errout:
2011 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2031 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2825,14 +2845,14 @@ fix_extent_len:
2825 * to an uninitialized extent. 2845 * to an uninitialized extent.
2826 * 2846 *
2827 * Writing to an uninitized extent may result in splitting the uninitialized 2847 * Writing to an uninitized extent may result in splitting the uninitialized
2828 * extent into multiple /intialized unintialized extents (up to three) 2848 * extent into multiple /initialized uninitialized extents (up to three)
2829 * There are three possibilities: 2849 * There are three possibilities:
2830 * a> There is no split required: Entire extent should be uninitialized 2850 * a> There is no split required: Entire extent should be uninitialized
2831 * b> Splits in two extents: Write is happening at either end of the extent 2851 * b> Splits in two extents: Write is happening at either end of the extent
2832 * c> Splits in three extents: Somone is writing in middle of the extent 2852 * c> Splits in three extents: Somone is writing in middle of the extent
2833 * 2853 *
2834 * One of more index blocks maybe needed if the extent tree grow after 2854 * One of more index blocks maybe needed if the extent tree grow after
2835 * the unintialized extent split. To prevent ENOSPC occur at the IO 2855 * the uninitialized extent split. To prevent ENOSPC occur at the IO
2836 * complete, we need to split the uninitialized extent before DIO submit 2856 * complete, we need to split the uninitialized extent before DIO submit
2837 * the IO. The uninitialized extent called at this time will be split 2857 * the IO. The uninitialized extent called at this time will be split
2838 * into three uninitialized extent(at most). After IO complete, the part 2858 * into three uninitialized extent(at most). After IO complete, the part
@@ -3082,7 +3102,7 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3082 * Handle EOFBLOCKS_FL flag, clearing it if necessary 3102 * Handle EOFBLOCKS_FL flag, clearing it if necessary
3083 */ 3103 */
3084static int check_eofblocks_fl(handle_t *handle, struct inode *inode, 3104static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3085 struct ext4_map_blocks *map, 3105 ext4_lblk_t lblk,
3086 struct ext4_ext_path *path, 3106 struct ext4_ext_path *path,
3087 unsigned int len) 3107 unsigned int len)
3088{ 3108{
@@ -3112,7 +3132,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3112 * this turns out to be false, we can bail out from this 3132 * this turns out to be false, we can bail out from this
3113 * function immediately. 3133 * function immediately.
3114 */ 3134 */
3115 if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + 3135 if (lblk + len < le32_to_cpu(last_ex->ee_block) +
3116 ext4_ext_get_actual_len(last_ex)) 3136 ext4_ext_get_actual_len(last_ex))
3117 return 0; 3137 return 0;
3118 /* 3138 /*
@@ -3168,8 +3188,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3168 path); 3188 path);
3169 if (ret >= 0) { 3189 if (ret >= 0) {
3170 ext4_update_inode_fsync_trans(handle, inode, 1); 3190 ext4_update_inode_fsync_trans(handle, inode, 1);
3171 err = check_eofblocks_fl(handle, inode, map, path, 3191 err = check_eofblocks_fl(handle, inode, map->m_lblk,
3172 map->m_len); 3192 path, map->m_len);
3173 } else 3193 } else
3174 err = ret; 3194 err = ret;
3175 goto out2; 3195 goto out2;
@@ -3199,7 +3219,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3199 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3219 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3200 if (ret >= 0) { 3220 if (ret >= 0) {
3201 ext4_update_inode_fsync_trans(handle, inode, 1); 3221 ext4_update_inode_fsync_trans(handle, inode, 1);
3202 err = check_eofblocks_fl(handle, inode, map, path, map->m_len); 3222 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3223 map->m_len);
3203 if (err < 0) 3224 if (err < 0)
3204 goto out2; 3225 goto out2;
3205 } 3226 }
@@ -3276,7 +3297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3276 struct ext4_extent_header *eh; 3297 struct ext4_extent_header *eh;
3277 struct ext4_extent newex, *ex; 3298 struct ext4_extent newex, *ex;
3278 ext4_fsblk_t newblock; 3299 ext4_fsblk_t newblock;
3279 int err = 0, depth, ret, cache_type; 3300 int err = 0, depth, ret;
3280 unsigned int allocated = 0; 3301 unsigned int allocated = 0;
3281 struct ext4_allocation_request ar; 3302 struct ext4_allocation_request ar;
3282 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3303 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3285,9 +3306,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3285 map->m_lblk, map->m_len, inode->i_ino); 3306 map->m_lblk, map->m_len, inode->i_ino);
3286 3307
3287 /* check in cache */ 3308 /* check in cache */
3288 cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); 3309 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3289 if (cache_type) { 3310 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3290 if (cache_type == EXT4_EXT_CACHE_GAP) {
3291 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3311 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3292 /* 3312 /*
3293 * block isn't allocated yet and 3313 * block isn't allocated yet and
@@ -3296,7 +3316,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3296 goto out2; 3316 goto out2;
3297 } 3317 }
3298 /* we should allocate requested block */ 3318 /* we should allocate requested block */
3299 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { 3319 } else {
3300 /* block is already allocated */ 3320 /* block is already allocated */
3301 newblock = map->m_lblk 3321 newblock = map->m_lblk
3302 - le32_to_cpu(newex.ee_block) 3322 - le32_to_cpu(newex.ee_block)
@@ -3305,8 +3325,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3305 allocated = ext4_ext_get_actual_len(&newex) - 3325 allocated = ext4_ext_get_actual_len(&newex) -
3306 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3326 (map->m_lblk - le32_to_cpu(newex.ee_block));
3307 goto out; 3327 goto out;
3308 } else {
3309 BUG();
3310 } 3328 }
3311 } 3329 }
3312 3330
@@ -3357,8 +3375,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3357 /* Do not put uninitialized extent in the cache */ 3375 /* Do not put uninitialized extent in the cache */
3358 if (!ext4_ext_is_uninitialized(ex)) { 3376 if (!ext4_ext_is_uninitialized(ex)) {
3359 ext4_ext_put_in_cache(inode, ee_block, 3377 ext4_ext_put_in_cache(inode, ee_block,
3360 ee_len, ee_start, 3378 ee_len, ee_start);
3361 EXT4_EXT_CACHE_EXTENT);
3362 goto out; 3379 goto out;
3363 } 3380 }
3364 ret = ext4_ext_handle_uninitialized_extents(handle, 3381 ret = ext4_ext_handle_uninitialized_extents(handle,
@@ -3456,7 +3473,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3456 map->m_flags |= EXT4_MAP_UNINIT; 3473 map->m_flags |= EXT4_MAP_UNINIT;
3457 } 3474 }
3458 3475
3459 err = check_eofblocks_fl(handle, inode, map, path, ar.len); 3476 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
3460 if (err) 3477 if (err)
3461 goto out2; 3478 goto out2;
3462 3479
@@ -3490,8 +3507,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3490 * when it is _not_ an uninitialized extent. 3507 * when it is _not_ an uninitialized extent.
3491 */ 3508 */
3492 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 3509 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3493 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, 3510 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
3494 EXT4_EXT_CACHE_EXTENT);
3495 ext4_update_inode_fsync_trans(handle, inode, 1); 3511 ext4_update_inode_fsync_trans(handle, inode, 1);
3496 } else 3512 } else
3497 ext4_update_inode_fsync_trans(handle, inode, 0); 3513 ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -3519,6 +3535,12 @@ void ext4_ext_truncate(struct inode *inode)
3519 int err = 0; 3535 int err = 0;
3520 3536
3521 /* 3537 /*
3538 * finish any pending end_io work so we won't run the risk of
3539 * converting any truncated blocks to initialized later
3540 */
3541 ext4_flush_completed_IO(inode);
3542
3543 /*
3522 * probably first extent we're gonna free will be last in block 3544 * probably first extent we're gonna free will be last in block
3523 */ 3545 */
3524 err = ext4_writepage_trans_blocks(inode); 3546 err = ext4_writepage_trans_blocks(inode);
@@ -3605,14 +3627,15 @@ static void ext4_falloc_update_inode(struct inode *inode,
3605} 3627}
3606 3628
3607/* 3629/*
3608 * preallocate space for a file. This implements ext4's fallocate inode 3630 * preallocate space for a file. This implements ext4's fallocate file
3609 * operation, which gets called from sys_fallocate system call. 3631 * operation, which gets called from sys_fallocate system call.
3610 * For block-mapped files, posix_fallocate should fall back to the method 3632 * For block-mapped files, posix_fallocate should fall back to the method
3611 * of writing zeroes to the required new blocks (the same behavior which is 3633 * of writing zeroes to the required new blocks (the same behavior which is
3612 * expected for file systems which do not support fallocate() system call). 3634 * expected for file systems which do not support fallocate() system call).
3613 */ 3635 */
3614long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) 3636long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3615{ 3637{
3638 struct inode *inode = file->f_path.dentry->d_inode;
3616 handle_t *handle; 3639 handle_t *handle;
3617 loff_t new_size; 3640 loff_t new_size;
3618 unsigned int max_blocks; 3641 unsigned int max_blocks;
@@ -3622,6 +3645,10 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
3622 struct ext4_map_blocks map; 3645 struct ext4_map_blocks map;
3623 unsigned int credits, blkbits = inode->i_blkbits; 3646 unsigned int credits, blkbits = inode->i_blkbits;
3624 3647
3648 /* We only support the FALLOC_FL_KEEP_SIZE mode */
3649 if (mode & ~FALLOC_FL_KEEP_SIZE)
3650 return -EOPNOTSUPP;
3651
3625 /* 3652 /*
3626 * currently supporting (pre)allocate mode for extent-based 3653 * currently supporting (pre)allocate mode for extent-based
3627 * files _only_ 3654 * files _only_
@@ -3629,10 +3656,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
3629 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3656 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
3630 return -EOPNOTSUPP; 3657 return -EOPNOTSUPP;
3631 3658
3632 /* preallocation to directories is currently not supported */
3633 if (S_ISDIR(inode->i_mode))
3634 return -ENODEV;
3635
3636 map.m_lblk = offset >> blkbits; 3659 map.m_lblk = offset >> blkbits;
3637 /* 3660 /*
3638 * We can't just convert len to max_blocks because 3661 * We can't just convert len to max_blocks because
@@ -3767,7 +3790,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3767 3790
3768 logical = (__u64)newex->ec_block << blksize_bits; 3791 logical = (__u64)newex->ec_block << blksize_bits;
3769 3792
3770 if (newex->ec_type == EXT4_EXT_CACHE_GAP) { 3793 if (newex->ec_start == 0) {
3771 pgoff_t offset; 3794 pgoff_t offset;
3772 struct page *page; 3795 struct page *page;
3773 struct buffer_head *bh = NULL; 3796 struct buffer_head *bh = NULL;