diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 818 |
1 files changed, 706 insertions, 112 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74bc2d549c58..82df3ce9874a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <linux/quotaops.h> | 37 | #include <linux/quotaops.h> |
38 | #include <linux/string.h> | 38 | #include <linux/string.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/falloc.h> | ||
41 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
42 | #include <linux/fiemap.h> | 41 | #include <linux/fiemap.h> |
43 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
@@ -1691,7 +1690,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1691 | * the extent that was written properly split out and conversion to | 1690 | * the extent that was written properly split out and conversion to |
1692 | * initialized is trivial. | 1691 | * initialized is trivial. |
1693 | */ | 1692 | */ |
1694 | if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) | 1693 | if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) |
1695 | return 0; | 1694 | return 0; |
1696 | 1695 | ||
1697 | ext1_ee_len = ext4_ext_get_actual_len(ex1); | 1696 | ext1_ee_len = ext4_ext_get_actual_len(ex1); |
@@ -1708,6 +1707,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1708 | */ | 1707 | */ |
1709 | if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) | 1708 | if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) |
1710 | return 0; | 1709 | return 0; |
1710 | if (ext4_ext_is_uninitialized(ex1) && | ||
1711 | (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || | ||
1712 | atomic_read(&EXT4_I(inode)->i_unwritten) || | ||
1713 | (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) | ||
1714 | return 0; | ||
1711 | #ifdef AGGRESSIVE_TEST | 1715 | #ifdef AGGRESSIVE_TEST |
1712 | if (ext1_ee_len >= 4) | 1716 | if (ext1_ee_len >= 4) |
1713 | return 0; | 1717 | return 0; |
@@ -1731,7 +1735,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
1731 | { | 1735 | { |
1732 | struct ext4_extent_header *eh; | 1736 | struct ext4_extent_header *eh; |
1733 | unsigned int depth, len; | 1737 | unsigned int depth, len; |
1734 | int merge_done = 0; | 1738 | int merge_done = 0, uninit; |
1735 | 1739 | ||
1736 | depth = ext_depth(inode); | 1740 | depth = ext_depth(inode); |
1737 | BUG_ON(path[depth].p_hdr == NULL); | 1741 | BUG_ON(path[depth].p_hdr == NULL); |
@@ -1741,8 +1745,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
1741 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) | 1745 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) |
1742 | break; | 1746 | break; |
1743 | /* merge with next extent! */ | 1747 | /* merge with next extent! */ |
1748 | uninit = ext4_ext_is_uninitialized(ex); | ||
1744 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1749 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
1745 | + ext4_ext_get_actual_len(ex + 1)); | 1750 | + ext4_ext_get_actual_len(ex + 1)); |
1751 | if (uninit) | ||
1752 | ext4_ext_mark_uninitialized(ex); | ||
1746 | 1753 | ||
1747 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { | 1754 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { |
1748 | len = (EXT_LAST_EXTENT(eh) - ex - 1) | 1755 | len = (EXT_LAST_EXTENT(eh) - ex - 1) |
@@ -1896,7 +1903,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1896 | struct ext4_ext_path *npath = NULL; | 1903 | struct ext4_ext_path *npath = NULL; |
1897 | int depth, len, err; | 1904 | int depth, len, err; |
1898 | ext4_lblk_t next; | 1905 | ext4_lblk_t next; |
1899 | int mb_flags = 0; | 1906 | int mb_flags = 0, uninit; |
1900 | 1907 | ||
1901 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1908 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
1902 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1909 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
@@ -1946,9 +1953,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1946 | path + depth); | 1953 | path + depth); |
1947 | if (err) | 1954 | if (err) |
1948 | return err; | 1955 | return err; |
1949 | 1956 | uninit = ext4_ext_is_uninitialized(ex); | |
1950 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1957 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
1951 | + ext4_ext_get_actual_len(newext)); | 1958 | + ext4_ext_get_actual_len(newext)); |
1959 | if (uninit) | ||
1960 | ext4_ext_mark_uninitialized(ex); | ||
1952 | eh = path[depth].p_hdr; | 1961 | eh = path[depth].p_hdr; |
1953 | nearex = ex; | 1962 | nearex = ex; |
1954 | goto merge; | 1963 | goto merge; |
@@ -1971,10 +1980,13 @@ prepend: | |||
1971 | if (err) | 1980 | if (err) |
1972 | return err; | 1981 | return err; |
1973 | 1982 | ||
1983 | uninit = ext4_ext_is_uninitialized(ex); | ||
1974 | ex->ee_block = newext->ee_block; | 1984 | ex->ee_block = newext->ee_block; |
1975 | ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); | 1985 | ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); |
1976 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1986 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
1977 | + ext4_ext_get_actual_len(newext)); | 1987 | + ext4_ext_get_actual_len(newext)); |
1988 | if (uninit) | ||
1989 | ext4_ext_mark_uninitialized(ex); | ||
1978 | eh = path[depth].p_hdr; | 1990 | eh = path[depth].p_hdr; |
1979 | nearex = ex; | 1991 | nearex = ex; |
1980 | goto merge; | 1992 | goto merge; |
@@ -2585,6 +2597,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2585 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2597 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2586 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2598 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2587 | 2599 | ||
2600 | /* | ||
2601 | * If we're starting with an extent other than the last one in the | ||
2602 | * node, we need to see if it shares a cluster with the extent to | ||
2603 | * the right (towards the end of the file). If its leftmost cluster | ||
2604 | * is this extent's rightmost cluster and it is not cluster aligned, | ||
2605 | * we'll mark it as a partial that is not to be deallocated. | ||
2606 | */ | ||
2607 | |||
2608 | if (ex != EXT_LAST_EXTENT(eh)) { | ||
2609 | ext4_fsblk_t current_pblk, right_pblk; | ||
2610 | long long current_cluster, right_cluster; | ||
2611 | |||
2612 | current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; | ||
2613 | current_cluster = (long long)EXT4_B2C(sbi, current_pblk); | ||
2614 | right_pblk = ext4_ext_pblock(ex + 1); | ||
2615 | right_cluster = (long long)EXT4_B2C(sbi, right_pblk); | ||
2616 | if (current_cluster == right_cluster && | ||
2617 | EXT4_PBLK_COFF(sbi, right_pblk)) | ||
2618 | *partial_cluster = -right_cluster; | ||
2619 | } | ||
2620 | |||
2588 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); | 2621 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); |
2589 | 2622 | ||
2590 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2623 | while (ex >= EXT_FIRST_EXTENT(eh) && |
@@ -2710,10 +2743,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2710 | err = ext4_ext_correct_indexes(handle, inode, path); | 2743 | err = ext4_ext_correct_indexes(handle, inode, path); |
2711 | 2744 | ||
2712 | /* | 2745 | /* |
2713 | * Free the partial cluster only if the current extent does not | 2746 | * If there's a partial cluster and at least one extent remains in |
2714 | * reference it. Otherwise we might free used cluster. | 2747 | * the leaf, free the partial cluster if it isn't shared with the |
2748 | * current extent. If there's a partial cluster and no extents | ||
2749 | * remain in the leaf, it can't be freed here. It can only be | ||
2750 | * freed when it's possible to determine if it's not shared with | ||
2751 | * any other extent - when the next leaf is processed or when space | ||
2752 | * removal is complete. | ||
2715 | */ | 2753 | */ |
2716 | if (*partial_cluster > 0 && | 2754 | if (*partial_cluster > 0 && eh->eh_entries && |
2717 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2755 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != |
2718 | *partial_cluster)) { | 2756 | *partial_cluster)) { |
2719 | int flags = get_default_free_blocks_flags(inode); | 2757 | int flags = get_default_free_blocks_flags(inode); |
@@ -3569,6 +3607,8 @@ out: | |||
3569 | * b> Splits in two extents: Write is happening at either end of the extent | 3607 | * b> Splits in two extents: Write is happening at either end of the extent |
3570 | * c> Splits in three extents: Somone is writing in middle of the extent | 3608 | * c> Splits in three extents: Somone is writing in middle of the extent |
3571 | * | 3609 | * |
3610 | * This works the same way in the case of initialized -> unwritten conversion. | ||
3611 | * | ||
3572 | * One of more index blocks maybe needed if the extent tree grow after | 3612 | * One of more index blocks maybe needed if the extent tree grow after |
3573 | * the uninitialized extent split. To prevent ENOSPC occur at the IO | 3613 | * the uninitialized extent split. To prevent ENOSPC occur at the IO |
3574 | * complete, we need to split the uninitialized extent before DIO submit | 3614 | * complete, we need to split the uninitialized extent before DIO submit |
@@ -3579,7 +3619,7 @@ out: | |||
3579 | * | 3619 | * |
3580 | * Returns the size of uninitialized extent to be written on success. | 3620 | * Returns the size of uninitialized extent to be written on success. |
3581 | */ | 3621 | */ |
3582 | static int ext4_split_unwritten_extents(handle_t *handle, | 3622 | static int ext4_split_convert_extents(handle_t *handle, |
3583 | struct inode *inode, | 3623 | struct inode *inode, |
3584 | struct ext4_map_blocks *map, | 3624 | struct ext4_map_blocks *map, |
3585 | struct ext4_ext_path *path, | 3625 | struct ext4_ext_path *path, |
@@ -3591,9 +3631,9 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3591 | unsigned int ee_len; | 3631 | unsigned int ee_len; |
3592 | int split_flag = 0, depth; | 3632 | int split_flag = 0, depth; |
3593 | 3633 | ||
3594 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" | 3634 | ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n", |
3595 | "block %llu, max_blocks %u\n", inode->i_ino, | 3635 | __func__, inode->i_ino, |
3596 | (unsigned long long)map->m_lblk, map->m_len); | 3636 | (unsigned long long)map->m_lblk, map->m_len); |
3597 | 3637 | ||
3598 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3638 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3599 | inode->i_sb->s_blocksize_bits; | 3639 | inode->i_sb->s_blocksize_bits; |
@@ -3608,14 +3648,73 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3608 | ee_block = le32_to_cpu(ex->ee_block); | 3648 | ee_block = le32_to_cpu(ex->ee_block); |
3609 | ee_len = ext4_ext_get_actual_len(ex); | 3649 | ee_len = ext4_ext_get_actual_len(ex); |
3610 | 3650 | ||
3611 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3651 | /* Convert to unwritten */ |
3612 | split_flag |= EXT4_EXT_MARK_UNINIT2; | 3652 | if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { |
3613 | if (flags & EXT4_GET_BLOCKS_CONVERT) | 3653 | split_flag |= EXT4_EXT_DATA_VALID1; |
3614 | split_flag |= EXT4_EXT_DATA_VALID2; | 3654 | /* Convert to initialized */ |
3655 | } else if (flags & EXT4_GET_BLOCKS_CONVERT) { | ||
3656 | split_flag |= ee_block + ee_len <= eof_block ? | ||
3657 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
3658 | split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); | ||
3659 | } | ||
3615 | flags |= EXT4_GET_BLOCKS_PRE_IO; | 3660 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
3616 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); | 3661 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); |
3617 | } | 3662 | } |
3618 | 3663 | ||
3664 | static int ext4_convert_initialized_extents(handle_t *handle, | ||
3665 | struct inode *inode, | ||
3666 | struct ext4_map_blocks *map, | ||
3667 | struct ext4_ext_path *path) | ||
3668 | { | ||
3669 | struct ext4_extent *ex; | ||
3670 | ext4_lblk_t ee_block; | ||
3671 | unsigned int ee_len; | ||
3672 | int depth; | ||
3673 | int err = 0; | ||
3674 | |||
3675 | depth = ext_depth(inode); | ||
3676 | ex = path[depth].p_ext; | ||
3677 | ee_block = le32_to_cpu(ex->ee_block); | ||
3678 | ee_len = ext4_ext_get_actual_len(ex); | ||
3679 | |||
3680 | ext_debug("%s: inode %lu, logical" | ||
3681 | "block %llu, max_blocks %u\n", __func__, inode->i_ino, | ||
3682 | (unsigned long long)ee_block, ee_len); | ||
3683 | |||
3684 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
3685 | err = ext4_split_convert_extents(handle, inode, map, path, | ||
3686 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); | ||
3687 | if (err < 0) | ||
3688 | goto out; | ||
3689 | ext4_ext_drop_refs(path); | ||
3690 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | ||
3691 | if (IS_ERR(path)) { | ||
3692 | err = PTR_ERR(path); | ||
3693 | goto out; | ||
3694 | } | ||
3695 | depth = ext_depth(inode); | ||
3696 | ex = path[depth].p_ext; | ||
3697 | } | ||
3698 | |||
3699 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3700 | if (err) | ||
3701 | goto out; | ||
3702 | /* first mark the extent as uninitialized */ | ||
3703 | ext4_ext_mark_uninitialized(ex); | ||
3704 | |||
3705 | /* note: ext4_ext_correct_indexes() isn't needed here because | ||
3706 | * borders are not changed | ||
3707 | */ | ||
3708 | ext4_ext_try_to_merge(handle, inode, path, ex); | ||
3709 | |||
3710 | /* Mark modified extent as dirty */ | ||
3711 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | ||
3712 | out: | ||
3713 | ext4_ext_show_leaf(inode, path); | ||
3714 | return err; | ||
3715 | } | ||
3716 | |||
3717 | |||
3619 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3718 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
3620 | struct inode *inode, | 3719 | struct inode *inode, |
3621 | struct ext4_map_blocks *map, | 3720 | struct ext4_map_blocks *map, |
@@ -3649,8 +3748,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3649 | inode->i_ino, (unsigned long long)ee_block, ee_len, | 3748 | inode->i_ino, (unsigned long long)ee_block, ee_len, |
3650 | (unsigned long long)map->m_lblk, map->m_len); | 3749 | (unsigned long long)map->m_lblk, map->m_len); |
3651 | #endif | 3750 | #endif |
3652 | err = ext4_split_unwritten_extents(handle, inode, map, path, | 3751 | err = ext4_split_convert_extents(handle, inode, map, path, |
3653 | EXT4_GET_BLOCKS_CONVERT); | 3752 | EXT4_GET_BLOCKS_CONVERT); |
3654 | if (err < 0) | 3753 | if (err < 0) |
3655 | goto out; | 3754 | goto out; |
3656 | ext4_ext_drop_refs(path); | 3755 | ext4_ext_drop_refs(path); |
@@ -3851,6 +3950,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
3851 | } | 3950 | } |
3852 | 3951 | ||
3853 | static int | 3952 | static int |
3953 | ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, | ||
3954 | struct ext4_map_blocks *map, | ||
3955 | struct ext4_ext_path *path, int flags, | ||
3956 | unsigned int allocated, ext4_fsblk_t newblock) | ||
3957 | { | ||
3958 | int ret = 0; | ||
3959 | int err = 0; | ||
3960 | |||
3961 | /* | ||
3962 | * Make sure that the extent is no bigger than we support with | ||
3963 | * uninitialized extent | ||
3964 | */ | ||
3965 | if (map->m_len > EXT_UNINIT_MAX_LEN) | ||
3966 | map->m_len = EXT_UNINIT_MAX_LEN / 2; | ||
3967 | |||
3968 | ret = ext4_convert_initialized_extents(handle, inode, map, | ||
3969 | path); | ||
3970 | if (ret >= 0) { | ||
3971 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
3972 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | ||
3973 | path, map->m_len); | ||
3974 | } else | ||
3975 | err = ret; | ||
3976 | map->m_flags |= EXT4_MAP_UNWRITTEN; | ||
3977 | if (allocated > map->m_len) | ||
3978 | allocated = map->m_len; | ||
3979 | map->m_len = allocated; | ||
3980 | |||
3981 | return err ? err : allocated; | ||
3982 | } | ||
3983 | |||
3984 | static int | ||
3854 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3985 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3855 | struct ext4_map_blocks *map, | 3986 | struct ext4_map_blocks *map, |
3856 | struct ext4_ext_path *path, int flags, | 3987 | struct ext4_ext_path *path, int flags, |
@@ -3877,8 +4008,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3877 | 4008 | ||
3878 | /* get_block() before submit the IO, split the extent */ | 4009 | /* get_block() before submit the IO, split the extent */ |
3879 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4010 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3880 | ret = ext4_split_unwritten_extents(handle, inode, map, | 4011 | ret = ext4_split_convert_extents(handle, inode, map, |
3881 | path, flags); | 4012 | path, flags | EXT4_GET_BLOCKS_CONVERT); |
3882 | if (ret <= 0) | 4013 | if (ret <= 0) |
3883 | goto out; | 4014 | goto out; |
3884 | /* | 4015 | /* |
@@ -3993,10 +4124,6 @@ out1: | |||
3993 | map->m_pblk = newblock; | 4124 | map->m_pblk = newblock; |
3994 | map->m_len = allocated; | 4125 | map->m_len = allocated; |
3995 | out2: | 4126 | out2: |
3996 | if (path) { | ||
3997 | ext4_ext_drop_refs(path); | ||
3998 | kfree(path); | ||
3999 | } | ||
4000 | return err ? err : allocated; | 4127 | return err ? err : allocated; |
4001 | } | 4128 | } |
4002 | 4129 | ||
@@ -4128,7 +4255,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4128 | struct ext4_extent newex, *ex, *ex2; | 4255 | struct ext4_extent newex, *ex, *ex2; |
4129 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4256 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
4130 | ext4_fsblk_t newblock = 0; | 4257 | ext4_fsblk_t newblock = 0; |
4131 | int free_on_err = 0, err = 0, depth; | 4258 | int free_on_err = 0, err = 0, depth, ret; |
4132 | unsigned int allocated = 0, offset = 0; | 4259 | unsigned int allocated = 0, offset = 0; |
4133 | unsigned int allocated_clusters = 0; | 4260 | unsigned int allocated_clusters = 0; |
4134 | struct ext4_allocation_request ar; | 4261 | struct ext4_allocation_request ar; |
@@ -4170,6 +4297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4170 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | 4297 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
4171 | unsigned short ee_len; | 4298 | unsigned short ee_len; |
4172 | 4299 | ||
4300 | |||
4173 | /* | 4301 | /* |
4174 | * Uninitialized extents are treated as holes, except that | 4302 | * Uninitialized extents are treated as holes, except that |
4175 | * we split out initialized portions during a write. | 4303 | * we split out initialized portions during a write. |
@@ -4186,13 +4314,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4186 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, | 4314 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
4187 | ee_block, ee_len, newblock); | 4315 | ee_block, ee_len, newblock); |
4188 | 4316 | ||
4189 | if (!ext4_ext_is_uninitialized(ex)) | 4317 | /* |
4318 | * If the extent is initialized check whether the | ||
4319 | * caller wants to convert it to unwritten. | ||
4320 | */ | ||
4321 | if ((!ext4_ext_is_uninitialized(ex)) && | ||
4322 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { | ||
4323 | allocated = ext4_ext_convert_initialized_extent( | ||
4324 | handle, inode, map, path, flags, | ||
4325 | allocated, newblock); | ||
4326 | goto out2; | ||
4327 | } else if (!ext4_ext_is_uninitialized(ex)) | ||
4190 | goto out; | 4328 | goto out; |
4191 | 4329 | ||
4192 | allocated = ext4_ext_handle_uninitialized_extents( | 4330 | ret = ext4_ext_handle_uninitialized_extents( |
4193 | handle, inode, map, path, flags, | 4331 | handle, inode, map, path, flags, |
4194 | allocated, newblock); | 4332 | allocated, newblock); |
4195 | goto out3; | 4333 | if (ret < 0) |
4334 | err = ret; | ||
4335 | else | ||
4336 | allocated = ret; | ||
4337 | goto out2; | ||
4196 | } | 4338 | } |
4197 | } | 4339 | } |
4198 | 4340 | ||
@@ -4473,7 +4615,6 @@ out2: | |||
4473 | kfree(path); | 4615 | kfree(path); |
4474 | } | 4616 | } |
4475 | 4617 | ||
4476 | out3: | ||
4477 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4618 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4478 | err ? err : allocated); | 4619 | err ? err : allocated); |
4479 | ext4_es_lru_add(inode); | 4620 | ext4_es_lru_add(inode); |
@@ -4514,34 +4655,200 @@ retry: | |||
4514 | ext4_std_error(inode->i_sb, err); | 4655 | ext4_std_error(inode->i_sb, err); |
4515 | } | 4656 | } |
4516 | 4657 | ||
4517 | static void ext4_falloc_update_inode(struct inode *inode, | 4658 | static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, |
4518 | int mode, loff_t new_size, int update_ctime) | 4659 | ext4_lblk_t len, int flags, int mode) |
4519 | { | 4660 | { |
4520 | struct timespec now; | 4661 | struct inode *inode = file_inode(file); |
4662 | handle_t *handle; | ||
4663 | int ret = 0; | ||
4664 | int ret2 = 0; | ||
4665 | int retries = 0; | ||
4666 | struct ext4_map_blocks map; | ||
4667 | unsigned int credits; | ||
4521 | 4668 | ||
4522 | if (update_ctime) { | 4669 | map.m_lblk = offset; |
4523 | now = current_fs_time(inode->i_sb); | 4670 | /* |
4524 | if (!timespec_equal(&inode->i_ctime, &now)) | 4671 | * Don't normalize the request if it can fit in one extent so |
4525 | inode->i_ctime = now; | 4672 | * that it doesn't get unnecessarily split into multiple |
4673 | * extents. | ||
4674 | */ | ||
4675 | if (len <= EXT_UNINIT_MAX_LEN) | ||
4676 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
4677 | |||
4678 | /* | ||
4679 | * credits to insert 1 extent into extent tree | ||
4680 | */ | ||
4681 | credits = ext4_chunk_trans_blocks(inode, len); | ||
4682 | |||
4683 | retry: | ||
4684 | while (ret >= 0 && ret < len) { | ||
4685 | map.m_lblk = map.m_lblk + ret; | ||
4686 | map.m_len = len = len - ret; | ||
4687 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||
4688 | credits); | ||
4689 | if (IS_ERR(handle)) { | ||
4690 | ret = PTR_ERR(handle); | ||
4691 | break; | ||
4692 | } | ||
4693 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
4694 | if (ret <= 0) { | ||
4695 | ext4_debug("inode #%lu: block %u: len %u: " | ||
4696 | "ext4_ext_map_blocks returned %d", | ||
4697 | inode->i_ino, map.m_lblk, | ||
4698 | map.m_len, ret); | ||
4699 | ext4_mark_inode_dirty(handle, inode); | ||
4700 | ret2 = ext4_journal_stop(handle); | ||
4701 | break; | ||
4702 | } | ||
4703 | ret2 = ext4_journal_stop(handle); | ||
4704 | if (ret2) | ||
4705 | break; | ||
4706 | } | ||
4707 | if (ret == -ENOSPC && | ||
4708 | ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
4709 | ret = 0; | ||
4710 | goto retry; | ||
4526 | } | 4711 | } |
4712 | |||
4713 | return ret > 0 ? ret2 : ret; | ||
4714 | } | ||
4715 | |||
4716 | static long ext4_zero_range(struct file *file, loff_t offset, | ||
4717 | loff_t len, int mode) | ||
4718 | { | ||
4719 | struct inode *inode = file_inode(file); | ||
4720 | handle_t *handle = NULL; | ||
4721 | unsigned int max_blocks; | ||
4722 | loff_t new_size = 0; | ||
4723 | int ret = 0; | ||
4724 | int flags; | ||
4725 | int partial; | ||
4726 | loff_t start, end; | ||
4727 | ext4_lblk_t lblk; | ||
4728 | struct address_space *mapping = inode->i_mapping; | ||
4729 | unsigned int blkbits = inode->i_blkbits; | ||
4730 | |||
4731 | trace_ext4_zero_range(inode, offset, len, mode); | ||
4732 | |||
4733 | /* | ||
4734 | * Write out all dirty pages to avoid race conditions | ||
4735 | * Then release them. | ||
4736 | */ | ||
4737 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4738 | ret = filemap_write_and_wait_range(mapping, offset, | ||
4739 | offset + len - 1); | ||
4740 | if (ret) | ||
4741 | return ret; | ||
4742 | } | ||
4743 | |||
4527 | /* | 4744 | /* |
4528 | * Update only when preallocation was requested beyond | 4745 | * Round up offset. This is not fallocate, we neet to zero out |
4529 | * the file size. | 4746 | * blocks, so convert interior block aligned part of the range to |
4747 | * unwritten and possibly manually zero out unaligned parts of the | ||
4748 | * range. | ||
4530 | */ | 4749 | */ |
4531 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { | 4750 | start = round_up(offset, 1 << blkbits); |
4751 | end = round_down((offset + len), 1 << blkbits); | ||
4752 | |||
4753 | if (start < offset || end > offset + len) | ||
4754 | return -EINVAL; | ||
4755 | partial = (offset + len) & ((1 << blkbits) - 1); | ||
4756 | |||
4757 | lblk = start >> blkbits; | ||
4758 | max_blocks = (end >> blkbits); | ||
4759 | if (max_blocks < lblk) | ||
4760 | max_blocks = 0; | ||
4761 | else | ||
4762 | max_blocks -= lblk; | ||
4763 | |||
4764 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | | ||
4765 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; | ||
4766 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
4767 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | ||
4768 | |||
4769 | mutex_lock(&inode->i_mutex); | ||
4770 | |||
4771 | /* | ||
4772 | * Indirect files do not support unwritten extnets | ||
4773 | */ | ||
4774 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | ||
4775 | ret = -EOPNOTSUPP; | ||
4776 | goto out_mutex; | ||
4777 | } | ||
4778 | |||
4779 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
4780 | offset + len > i_size_read(inode)) { | ||
4781 | new_size = offset + len; | ||
4782 | ret = inode_newsize_ok(inode, new_size); | ||
4783 | if (ret) | ||
4784 | goto out_mutex; | ||
4785 | /* | ||
4786 | * If we have a partial block after EOF we have to allocate | ||
4787 | * the entire block. | ||
4788 | */ | ||
4789 | if (partial) | ||
4790 | max_blocks += 1; | ||
4791 | } | ||
4792 | |||
4793 | if (max_blocks > 0) { | ||
4794 | |||
4795 | /* Now release the pages and zero block aligned part of pages*/ | ||
4796 | truncate_pagecache_range(inode, start, end - 1); | ||
4797 | |||
4798 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
4799 | ext4_inode_block_unlocked_dio(inode); | ||
4800 | inode_dio_wait(inode); | ||
4801 | |||
4802 | /* | ||
4803 | * Remove entire range from the extent status tree. | ||
4804 | */ | ||
4805 | ret = ext4_es_remove_extent(inode, lblk, max_blocks); | ||
4806 | if (ret) | ||
4807 | goto out_dio; | ||
4808 | |||
4809 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, | ||
4810 | mode); | ||
4811 | if (ret) | ||
4812 | goto out_dio; | ||
4813 | } | ||
4814 | |||
4815 | handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); | ||
4816 | if (IS_ERR(handle)) { | ||
4817 | ret = PTR_ERR(handle); | ||
4818 | ext4_std_error(inode->i_sb, ret); | ||
4819 | goto out_dio; | ||
4820 | } | ||
4821 | |||
4822 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4823 | |||
4824 | if (new_size) { | ||
4532 | if (new_size > i_size_read(inode)) | 4825 | if (new_size > i_size_read(inode)) |
4533 | i_size_write(inode, new_size); | 4826 | i_size_write(inode, new_size); |
4534 | if (new_size > EXT4_I(inode)->i_disksize) | 4827 | if (new_size > EXT4_I(inode)->i_disksize) |
4535 | ext4_update_i_disksize(inode, new_size); | 4828 | ext4_update_i_disksize(inode, new_size); |
4536 | } else { | 4829 | } else { |
4537 | /* | 4830 | /* |
4538 | * Mark that we allocate beyond EOF so the subsequent truncate | 4831 | * Mark that we allocate beyond EOF so the subsequent truncate |
4539 | * can proceed even if the new size is the same as i_size. | 4832 | * can proceed even if the new size is the same as i_size. |
4540 | */ | 4833 | */ |
4541 | if (new_size > i_size_read(inode)) | 4834 | if ((offset + len) > i_size_read(inode)) |
4542 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 4835 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4543 | } | 4836 | } |
4544 | 4837 | ||
4838 | ext4_mark_inode_dirty(handle, inode); | ||
4839 | |||
4840 | /* Zero out partial block at the edges of the range */ | ||
4841 | ret = ext4_zero_partial_blocks(handle, inode, offset, len); | ||
4842 | |||
4843 | if (file->f_flags & O_SYNC) | ||
4844 | ext4_handle_sync(handle); | ||
4845 | |||
4846 | ext4_journal_stop(handle); | ||
4847 | out_dio: | ||
4848 | ext4_inode_resume_unlocked_dio(inode); | ||
4849 | out_mutex: | ||
4850 | mutex_unlock(&inode->i_mutex); | ||
4851 | return ret; | ||
4545 | } | 4852 | } |
4546 | 4853 | ||
4547 | /* | 4854 | /* |
@@ -4555,22 +4862,25 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4555 | { | 4862 | { |
4556 | struct inode *inode = file_inode(file); | 4863 | struct inode *inode = file_inode(file); |
4557 | handle_t *handle; | 4864 | handle_t *handle; |
4558 | loff_t new_size; | 4865 | loff_t new_size = 0; |
4559 | unsigned int max_blocks; | 4866 | unsigned int max_blocks; |
4560 | int ret = 0; | 4867 | int ret = 0; |
4561 | int ret2 = 0; | ||
4562 | int retries = 0; | ||
4563 | int flags; | 4868 | int flags; |
4564 | struct ext4_map_blocks map; | 4869 | ext4_lblk_t lblk; |
4565 | unsigned int credits, blkbits = inode->i_blkbits; | 4870 | struct timespec tv; |
4871 | unsigned int blkbits = inode->i_blkbits; | ||
4566 | 4872 | ||
4567 | /* Return error if mode is not supported */ | 4873 | /* Return error if mode is not supported */ |
4568 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 4874 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
4875 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
4569 | return -EOPNOTSUPP; | 4876 | return -EOPNOTSUPP; |
4570 | 4877 | ||
4571 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4878 | if (mode & FALLOC_FL_PUNCH_HOLE) |
4572 | return ext4_punch_hole(inode, offset, len); | 4879 | return ext4_punch_hole(inode, offset, len); |
4573 | 4880 | ||
4881 | if (mode & FALLOC_FL_COLLAPSE_RANGE) | ||
4882 | return ext4_collapse_range(inode, offset, len); | ||
4883 | |||
4574 | ret = ext4_convert_inline_data(inode); | 4884 | ret = ext4_convert_inline_data(inode); |
4575 | if (ret) | 4885 | if (ret) |
4576 | return ret; | 4886 | return ret; |
@@ -4582,83 +4892,66 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4582 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4892 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
4583 | return -EOPNOTSUPP; | 4893 | return -EOPNOTSUPP; |
4584 | 4894 | ||
4895 | if (mode & FALLOC_FL_ZERO_RANGE) | ||
4896 | return ext4_zero_range(file, offset, len, mode); | ||
4897 | |||
4585 | trace_ext4_fallocate_enter(inode, offset, len, mode); | 4898 | trace_ext4_fallocate_enter(inode, offset, len, mode); |
4586 | map.m_lblk = offset >> blkbits; | 4899 | lblk = offset >> blkbits; |
4587 | /* | 4900 | /* |
4588 | * We can't just convert len to max_blocks because | 4901 | * We can't just convert len to max_blocks because |
4589 | * If blocksize = 4096 offset = 3072 and len = 2048 | 4902 | * If blocksize = 4096 offset = 3072 and len = 2048 |
4590 | */ | 4903 | */ |
4591 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 4904 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
4592 | - map.m_lblk; | 4905 | - lblk; |
4593 | /* | 4906 | |
4594 | * credits to insert 1 extent into extent tree | ||
4595 | */ | ||
4596 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
4597 | mutex_lock(&inode->i_mutex); | ||
4598 | ret = inode_newsize_ok(inode, (len + offset)); | ||
4599 | if (ret) { | ||
4600 | mutex_unlock(&inode->i_mutex); | ||
4601 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | ||
4602 | return ret; | ||
4603 | } | ||
4604 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; | 4907 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; |
4605 | if (mode & FALLOC_FL_KEEP_SIZE) | 4908 | if (mode & FALLOC_FL_KEEP_SIZE) |
4606 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | 4909 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; |
4607 | /* | ||
4608 | * Don't normalize the request if it can fit in one extent so | ||
4609 | * that it doesn't get unnecessarily split into multiple | ||
4610 | * extents. | ||
4611 | */ | ||
4612 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) | ||
4613 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
4614 | 4910 | ||
4615 | retry: | 4911 | mutex_lock(&inode->i_mutex); |
4616 | while (ret >= 0 && ret < max_blocks) { | ||
4617 | map.m_lblk = map.m_lblk + ret; | ||
4618 | map.m_len = max_blocks = max_blocks - ret; | ||
4619 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||
4620 | credits); | ||
4621 | if (IS_ERR(handle)) { | ||
4622 | ret = PTR_ERR(handle); | ||
4623 | break; | ||
4624 | } | ||
4625 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
4626 | if (ret <= 0) { | ||
4627 | #ifdef EXT4FS_DEBUG | ||
4628 | ext4_warning(inode->i_sb, | ||
4629 | "inode #%lu: block %u: len %u: " | ||
4630 | "ext4_ext_map_blocks returned %d", | ||
4631 | inode->i_ino, map.m_lblk, | ||
4632 | map.m_len, ret); | ||
4633 | #endif | ||
4634 | ext4_mark_inode_dirty(handle, inode); | ||
4635 | ret2 = ext4_journal_stop(handle); | ||
4636 | break; | ||
4637 | } | ||
4638 | if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, | ||
4639 | blkbits) >> blkbits)) | ||
4640 | new_size = offset + len; | ||
4641 | else | ||
4642 | new_size = ((loff_t) map.m_lblk + ret) << blkbits; | ||
4643 | 4912 | ||
4644 | ext4_falloc_update_inode(inode, mode, new_size, | 4913 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
4645 | (map.m_flags & EXT4_MAP_NEW)); | 4914 | offset + len > i_size_read(inode)) { |
4646 | ext4_mark_inode_dirty(handle, inode); | 4915 | new_size = offset + len; |
4647 | if ((file->f_flags & O_SYNC) && ret >= max_blocks) | 4916 | ret = inode_newsize_ok(inode, new_size); |
4648 | ext4_handle_sync(handle); | 4917 | if (ret) |
4649 | ret2 = ext4_journal_stop(handle); | 4918 | goto out; |
4650 | if (ret2) | ||
4651 | break; | ||
4652 | } | 4919 | } |
4653 | if (ret == -ENOSPC && | 4920 | |
4654 | ext4_should_retry_alloc(inode->i_sb, &retries)) { | 4921 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); |
4655 | ret = 0; | 4922 | if (ret) |
4656 | goto retry; | 4923 | goto out; |
4924 | |||
4925 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||
4926 | if (IS_ERR(handle)) | ||
4927 | goto out; | ||
4928 | |||
4929 | tv = inode->i_ctime = ext4_current_time(inode); | ||
4930 | |||
4931 | if (new_size) { | ||
4932 | if (new_size > i_size_read(inode)) { | ||
4933 | i_size_write(inode, new_size); | ||
4934 | inode->i_mtime = tv; | ||
4935 | } | ||
4936 | if (new_size > EXT4_I(inode)->i_disksize) | ||
4937 | ext4_update_i_disksize(inode, new_size); | ||
4938 | } else { | ||
4939 | /* | ||
4940 | * Mark that we allocate beyond EOF so the subsequent truncate | ||
4941 | * can proceed even if the new size is the same as i_size. | ||
4942 | */ | ||
4943 | if ((offset + len) > i_size_read(inode)) | ||
4944 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
4657 | } | 4945 | } |
4946 | ext4_mark_inode_dirty(handle, inode); | ||
4947 | if (file->f_flags & O_SYNC) | ||
4948 | ext4_handle_sync(handle); | ||
4949 | |||
4950 | ext4_journal_stop(handle); | ||
4951 | out: | ||
4658 | mutex_unlock(&inode->i_mutex); | 4952 | mutex_unlock(&inode->i_mutex); |
4659 | trace_ext4_fallocate_exit(inode, offset, max_blocks, | 4953 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |
4660 | ret > 0 ? ret2 : ret); | 4954 | return ret; |
4661 | return ret > 0 ? ret2 : ret; | ||
4662 | } | 4955 | } |
4663 | 4956 | ||
4664 | /* | 4957 | /* |
@@ -4869,3 +5162,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4869 | ext4_es_lru_add(inode); | 5162 | ext4_es_lru_add(inode); |
4870 | return error; | 5163 | return error; |
4871 | } | 5164 | } |
5165 | |||
5166 | /* | ||
5167 | * ext4_access_path: | ||
5168 | * Function to access the path buffer for marking it dirty. | ||
5169 | * It also checks if there are sufficient credits left in the journal handle | ||
5170 | * to update path. | ||
5171 | */ | ||
5172 | static int | ||
5173 | ext4_access_path(handle_t *handle, struct inode *inode, | ||
5174 | struct ext4_ext_path *path) | ||
5175 | { | ||
5176 | int credits, err; | ||
5177 | |||
5178 | if (!ext4_handle_valid(handle)) | ||
5179 | return 0; | ||
5180 | |||
5181 | /* | ||
5182 | * Check if need to extend journal credits | ||
5183 | * 3 for leaf, sb, and inode plus 2 (bmap and group | ||
5184 | * descriptor) for each block group; assume two block | ||
5185 | * groups | ||
5186 | */ | ||
5187 | if (handle->h_buffer_credits < 7) { | ||
5188 | credits = ext4_writepage_trans_blocks(inode); | ||
5189 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); | ||
5190 | /* EAGAIN is success */ | ||
5191 | if (err && err != -EAGAIN) | ||
5192 | return err; | ||
5193 | } | ||
5194 | |||
5195 | err = ext4_ext_get_access(handle, inode, path); | ||
5196 | return err; | ||
5197 | } | ||
5198 | |||
5199 | /* | ||
5200 | * ext4_ext_shift_path_extents: | ||
5201 | * Shift the extents of a path structure lying between path[depth].p_ext | ||
5202 | * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift | ||
5203 | * from starting block for each extent. | ||
5204 | */ | ||
5205 | static int | ||
5206 | ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, | ||
5207 | struct inode *inode, handle_t *handle, | ||
5208 | ext4_lblk_t *start) | ||
5209 | { | ||
5210 | int depth, err = 0; | ||
5211 | struct ext4_extent *ex_start, *ex_last; | ||
5212 | bool update = 0; | ||
5213 | depth = path->p_depth; | ||
5214 | |||
5215 | while (depth >= 0) { | ||
5216 | if (depth == path->p_depth) { | ||
5217 | ex_start = path[depth].p_ext; | ||
5218 | if (!ex_start) | ||
5219 | return -EIO; | ||
5220 | |||
5221 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); | ||
5222 | if (!ex_last) | ||
5223 | return -EIO; | ||
5224 | |||
5225 | err = ext4_access_path(handle, inode, path + depth); | ||
5226 | if (err) | ||
5227 | goto out; | ||
5228 | |||
5229 | if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) | ||
5230 | update = 1; | ||
5231 | |||
5232 | *start = ex_last->ee_block + | ||
5233 | ext4_ext_get_actual_len(ex_last); | ||
5234 | |||
5235 | while (ex_start <= ex_last) { | ||
5236 | ex_start->ee_block -= shift; | ||
5237 | if (ex_start > | ||
5238 | EXT_FIRST_EXTENT(path[depth].p_hdr)) { | ||
5239 | if (ext4_ext_try_to_merge_right(inode, | ||
5240 | path, ex_start - 1)) | ||
5241 | ex_last--; | ||
5242 | } | ||
5243 | ex_start++; | ||
5244 | } | ||
5245 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
5246 | if (err) | ||
5247 | goto out; | ||
5248 | |||
5249 | if (--depth < 0 || !update) | ||
5250 | break; | ||
5251 | } | ||
5252 | |||
5253 | /* Update index too */ | ||
5254 | err = ext4_access_path(handle, inode, path + depth); | ||
5255 | if (err) | ||
5256 | goto out; | ||
5257 | |||
5258 | path[depth].p_idx->ei_block -= shift; | ||
5259 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
5260 | if (err) | ||
5261 | goto out; | ||
5262 | |||
5263 | /* we are done if current index is not a starting index */ | ||
5264 | if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) | ||
5265 | break; | ||
5266 | |||
5267 | depth--; | ||
5268 | } | ||
5269 | |||
5270 | out: | ||
5271 | return err; | ||
5272 | } | ||
5273 | |||
5274 | /* | ||
5275 | * ext4_ext_shift_extents: | ||
5276 | * All the extents which lies in the range from start to the last allocated | ||
5277 | * block for the file are shifted downwards by shift blocks. | ||
5278 | * On success, 0 is returned, error otherwise. | ||
5279 | */ | ||
5280 | static int | ||
5281 | ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | ||
5282 | ext4_lblk_t start, ext4_lblk_t shift) | ||
5283 | { | ||
5284 | struct ext4_ext_path *path; | ||
5285 | int ret = 0, depth; | ||
5286 | struct ext4_extent *extent; | ||
5287 | ext4_lblk_t stop_block, current_block; | ||
5288 | ext4_lblk_t ex_start, ex_end; | ||
5289 | |||
5290 | /* Let path point to the last extent */ | ||
5291 | path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); | ||
5292 | if (IS_ERR(path)) | ||
5293 | return PTR_ERR(path); | ||
5294 | |||
5295 | depth = path->p_depth; | ||
5296 | extent = path[depth].p_ext; | ||
5297 | if (!extent) { | ||
5298 | ext4_ext_drop_refs(path); | ||
5299 | kfree(path); | ||
5300 | return ret; | ||
5301 | } | ||
5302 | |||
5303 | stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
5304 | ext4_ext_drop_refs(path); | ||
5305 | kfree(path); | ||
5306 | |||
5307 | /* Nothing to shift, if hole is at the end of file */ | ||
5308 | if (start >= stop_block) | ||
5309 | return ret; | ||
5310 | |||
5311 | /* | ||
5312 | * Don't start shifting extents until we make sure the hole is big | ||
5313 | * enough to accomodate the shift. | ||
5314 | */ | ||
5315 | path = ext4_ext_find_extent(inode, start - 1, NULL, 0); | ||
5316 | depth = path->p_depth; | ||
5317 | extent = path[depth].p_ext; | ||
5318 | ex_start = extent->ee_block; | ||
5319 | ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
5320 | ext4_ext_drop_refs(path); | ||
5321 | kfree(path); | ||
5322 | |||
5323 | if ((start == ex_start && shift > ex_start) || | ||
5324 | (shift > start - ex_end)) | ||
5325 | return -EINVAL; | ||
5326 | |||
5327 | /* Its safe to start updating extents */ | ||
5328 | while (start < stop_block) { | ||
5329 | path = ext4_ext_find_extent(inode, start, NULL, 0); | ||
5330 | if (IS_ERR(path)) | ||
5331 | return PTR_ERR(path); | ||
5332 | depth = path->p_depth; | ||
5333 | extent = path[depth].p_ext; | ||
5334 | current_block = extent->ee_block; | ||
5335 | if (start > current_block) { | ||
5336 | /* Hole, move to the next extent */ | ||
5337 | ret = mext_next_extent(inode, path, &extent); | ||
5338 | if (ret != 0) { | ||
5339 | ext4_ext_drop_refs(path); | ||
5340 | kfree(path); | ||
5341 | if (ret == 1) | ||
5342 | ret = 0; | ||
5343 | break; | ||
5344 | } | ||
5345 | } | ||
5346 | ret = ext4_ext_shift_path_extents(path, shift, inode, | ||
5347 | handle, &start); | ||
5348 | ext4_ext_drop_refs(path); | ||
5349 | kfree(path); | ||
5350 | if (ret) | ||
5351 | break; | ||
5352 | } | ||
5353 | |||
5354 | return ret; | ||
5355 | } | ||
5356 | |||
5357 | /* | ||
5358 | * ext4_collapse_range: | ||
5359 | * This implements the fallocate's collapse range functionality for ext4 | ||
5360 | * Returns: 0 and non-zero on error. | ||
5361 | */ | ||
5362 | int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | ||
5363 | { | ||
5364 | struct super_block *sb = inode->i_sb; | ||
5365 | ext4_lblk_t punch_start, punch_stop; | ||
5366 | handle_t *handle; | ||
5367 | unsigned int credits; | ||
5368 | loff_t new_size; | ||
5369 | int ret; | ||
5370 | |||
5371 | BUG_ON(offset + len > i_size_read(inode)); | ||
5372 | |||
5373 | /* Collapse range works only on fs block size aligned offsets. */ | ||
5374 | if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || | ||
5375 | len & (EXT4_BLOCK_SIZE(sb) - 1)) | ||
5376 | return -EINVAL; | ||
5377 | |||
5378 | if (!S_ISREG(inode->i_mode)) | ||
5379 | return -EOPNOTSUPP; | ||
5380 | |||
5381 | trace_ext4_collapse_range(inode, offset, len); | ||
5382 | |||
5383 | punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); | ||
5384 | punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
5385 | |||
5386 | /* Write out all dirty pages */ | ||
5387 | ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); | ||
5388 | if (ret) | ||
5389 | return ret; | ||
5390 | |||
5391 | /* Take mutex lock */ | ||
5392 | mutex_lock(&inode->i_mutex); | ||
5393 | |||
5394 | /* It's not possible punch hole on append only file */ | ||
5395 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
5396 | ret = -EPERM; | ||
5397 | goto out_mutex; | ||
5398 | } | ||
5399 | |||
5400 | if (IS_SWAPFILE(inode)) { | ||
5401 | ret = -ETXTBSY; | ||
5402 | goto out_mutex; | ||
5403 | } | ||
5404 | |||
5405 | /* Currently just for extent based files */ | ||
5406 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
5407 | ret = -EOPNOTSUPP; | ||
5408 | goto out_mutex; | ||
5409 | } | ||
5410 | |||
5411 | truncate_pagecache_range(inode, offset, -1); | ||
5412 | |||
5413 | /* Wait for existing dio to complete */ | ||
5414 | ext4_inode_block_unlocked_dio(inode); | ||
5415 | inode_dio_wait(inode); | ||
5416 | |||
5417 | credits = ext4_writepage_trans_blocks(inode); | ||
5418 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | ||
5419 | if (IS_ERR(handle)) { | ||
5420 | ret = PTR_ERR(handle); | ||
5421 | goto out_dio; | ||
5422 | } | ||
5423 | |||
5424 | down_write(&EXT4_I(inode)->i_data_sem); | ||
5425 | ext4_discard_preallocations(inode); | ||
5426 | |||
5427 | ret = ext4_es_remove_extent(inode, punch_start, | ||
5428 | EXT_MAX_BLOCKS - punch_start - 1); | ||
5429 | if (ret) { | ||
5430 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5431 | goto out_stop; | ||
5432 | } | ||
5433 | |||
5434 | ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); | ||
5435 | if (ret) { | ||
5436 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5437 | goto out_stop; | ||
5438 | } | ||
5439 | |||
5440 | ret = ext4_ext_shift_extents(inode, handle, punch_stop, | ||
5441 | punch_stop - punch_start); | ||
5442 | if (ret) { | ||
5443 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5444 | goto out_stop; | ||
5445 | } | ||
5446 | |||
5447 | new_size = i_size_read(inode) - len; | ||
5448 | truncate_setsize(inode, new_size); | ||
5449 | EXT4_I(inode)->i_disksize = new_size; | ||
5450 | |||
5451 | ext4_discard_preallocations(inode); | ||
5452 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5453 | if (IS_SYNC(inode)) | ||
5454 | ext4_handle_sync(handle); | ||
5455 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
5456 | ext4_mark_inode_dirty(handle, inode); | ||
5457 | |||
5458 | out_stop: | ||
5459 | ext4_journal_stop(handle); | ||
5460 | out_dio: | ||
5461 | ext4_inode_resume_unlocked_dio(inode); | ||
5462 | out_mutex: | ||
5463 | mutex_unlock(&inode->i_mutex); | ||
5464 | return ret; | ||
5465 | } | ||