aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorNamjae Jeon <namjae.jeon@samsung.com>2014-02-23 15:18:59 -0500
committerTheodore Ts'o <tytso@mit.edu>2014-02-23 15:18:59 -0500
commit9eb79482a97152930b113b51dff530aba9e28c8e (patch)
treef6a7c5c7c3de4cae54c9453d3457a0504e9aa070 /fs/ext4
parenta633f5a319cf4116d977e25fea2830dce23a8e74 (diff)
ext4: Add support FALLOC_FL_COLLAPSE_RANGE for fallocate
This patch implements fallocate's FALLOC_FL_COLLAPSE_RANGE for Ext4. The semantics of this flag are following: 1) It collapses the range lying between offset and length by removing any data blocks which are present in this range and than updates all the logical offsets of extents beyond "offset + len" to nullify the hole created by removing blocks. In short, it does not leave a hole. 2) It should be used exclusively. No other fallocate flag in combination. 3) Offset and length supplied to fallocate should be fs block size aligned in case of xfs and ext4. 4) Collaspe range does not work beyond i_size. Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com> Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com> Tested-by: Dongsu Park <dongsu.park@profitbricks.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/extents.c307
-rw-r--r--fs/ext4/move_extent.c2
3 files changed, 310 insertions, 2 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b7207db3107c..beec42750a8c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2758,6 +2758,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2758extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2758extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2759 __u64 start, __u64 len); 2759 __u64 start, __u64 len);
2760extern int ext4_ext_precache(struct inode *inode); 2760extern int ext4_ext_precache(struct inode *inode);
2761extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2761 2762
2762/* move_extent.c */ 2763/* move_extent.c */
2763extern void ext4_double_down_write_data_sem(struct inode *first, 2764extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2767,6 +2768,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2767extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2768extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2768 __u64 start_orig, __u64 start_donor, 2769 __u64 start_orig, __u64 start_donor,
2769 __u64 len, __u64 *moved_len); 2770 __u64 len, __u64 *moved_len);
2771extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
2772 struct ext4_extent **extent);
2770 2773
2771/* page-io.c */ 2774/* page-io.c */
2772extern int __init ext4_init_pageio(void); 2775extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2e0608e3be6e..bbba1ef5417d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4581,12 +4581,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4581 unsigned int credits, blkbits = inode->i_blkbits; 4581 unsigned int credits, blkbits = inode->i_blkbits;
4582 4582
4583 /* Return error if mode is not supported */ 4583 /* Return error if mode is not supported */
4584 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 4584 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4585 FALLOC_FL_COLLAPSE_RANGE))
4585 return -EOPNOTSUPP; 4586 return -EOPNOTSUPP;
4586 4587
4587 if (mode & FALLOC_FL_PUNCH_HOLE) 4588 if (mode & FALLOC_FL_PUNCH_HOLE)
4588 return ext4_punch_hole(inode, offset, len); 4589 return ext4_punch_hole(inode, offset, len);
4589 4590
4591 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4592 return ext4_collapse_range(inode, offset, len);
4593
4590 ret = ext4_convert_inline_data(inode); 4594 ret = ext4_convert_inline_data(inode);
4591 if (ret) 4595 if (ret)
4592 return ret; 4596 return ret;
@@ -4885,3 +4889,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4885 ext4_es_lru_add(inode); 4889 ext4_es_lru_add(inode);
4886 return error; 4890 return error;
4887} 4891}
4892
4893/*
4894 * ext4_access_path:
4895 * Function to access the path buffer for marking it dirty.
4896 * It also checks if there are sufficient credits left in the journal handle
4897 * to update path.
4898 */
4899static int
4900ext4_access_path(handle_t *handle, struct inode *inode,
4901 struct ext4_ext_path *path)
4902{
4903 int credits, err;
4904
4905 if (!ext4_handle_valid(handle))
4906 return 0;
4907
4908 /*
4909 * Check if need to extend journal credits
4910 * 3 for leaf, sb, and inode plus 2 (bmap and group
4911 * descriptor) for each block group; assume two block
4912 * groups
4913 */
4914 if (handle->h_buffer_credits < 7) {
4915 credits = ext4_writepage_trans_blocks(inode);
4916 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
4917 /* EAGAIN is success */
4918 if (err && err != -EAGAIN)
4919 return err;
4920 }
4921
4922 err = ext4_ext_get_access(handle, inode, path);
4923 return err;
4924}
4925
4926/*
4927 * ext4_ext_shift_path_extents:
4928 * Shift the extents of a path structure lying between path[depth].p_ext
4929 * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
4930 * from starting block for each extent.
4931 */
4932static int
4933ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
4934 struct inode *inode, handle_t *handle,
4935 ext4_lblk_t *start)
4936{
4937 int depth, err = 0;
4938 struct ext4_extent *ex_start, *ex_last;
4939 bool update = 0;
4940 depth = path->p_depth;
4941
4942 while (depth >= 0) {
4943 if (depth == path->p_depth) {
4944 ex_start = path[depth].p_ext;
4945 if (!ex_start)
4946 return -EIO;
4947
4948 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
4949 if (!ex_last)
4950 return -EIO;
4951
4952 err = ext4_access_path(handle, inode, path + depth);
4953 if (err)
4954 goto out;
4955
4956 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
4957 update = 1;
4958
4959 *start = ex_last->ee_block +
4960 ext4_ext_get_actual_len(ex_last);
4961
4962 while (ex_start <= ex_last) {
4963 ex_start->ee_block -= shift;
4964 if (ex_start >
4965 EXT_FIRST_EXTENT(path[depth].p_hdr)) {
4966 if (ext4_ext_try_to_merge_right(inode,
4967 path, ex_start - 1))
4968 ex_last--;
4969 }
4970 ex_start++;
4971 }
4972 err = ext4_ext_dirty(handle, inode, path + depth);
4973 if (err)
4974 goto out;
4975
4976 if (--depth < 0 || !update)
4977 break;
4978 }
4979
4980 /* Update index too */
4981 err = ext4_access_path(handle, inode, path + depth);
4982 if (err)
4983 goto out;
4984
4985 path[depth].p_idx->ei_block -= shift;
4986 err = ext4_ext_dirty(handle, inode, path + depth);
4987 if (err)
4988 goto out;
4989
4990 /* we are done if current index is not a starting index */
4991 if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
4992 break;
4993
4994 depth--;
4995 }
4996
4997out:
4998 return err;
4999}
5000
5001/*
5002 * ext4_ext_shift_extents:
5003 * All the extents which lies in the range from start to the last allocated
5004 * block for the file are shifted downwards by shift blocks.
5005 * On success, 0 is returned, error otherwise.
5006 */
5007static int
5008ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5009 ext4_lblk_t start, ext4_lblk_t shift)
5010{
5011 struct ext4_ext_path *path;
5012 int ret = 0, depth;
5013 struct ext4_extent *extent;
5014 ext4_lblk_t stop_block, current_block;
5015 ext4_lblk_t ex_start, ex_end;
5016
5017 /* Let path point to the last extent */
5018 path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
5019 if (IS_ERR(path))
5020 return PTR_ERR(path);
5021
5022 depth = path->p_depth;
5023 extent = path[depth].p_ext;
5024 if (!extent) {
5025 ext4_ext_drop_refs(path);
5026 kfree(path);
5027 return ret;
5028 }
5029
5030 stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
5031 ext4_ext_drop_refs(path);
5032 kfree(path);
5033
5034 /* Nothing to shift, if hole is at the end of file */
5035 if (start >= stop_block)
5036 return ret;
5037
5038 /*
5039 * Don't start shifting extents until we make sure the hole is big
5040 * enough to accomodate the shift.
5041 */
5042 path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
5043 depth = path->p_depth;
5044 extent = path[depth].p_ext;
5045 ex_start = extent->ee_block;
5046 ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
5047 ext4_ext_drop_refs(path);
5048 kfree(path);
5049
5050 if ((start == ex_start && shift > ex_start) ||
5051 (shift > start - ex_end))
5052 return -EINVAL;
5053
5054 /* Its safe to start updating extents */
5055 while (start < stop_block) {
5056 path = ext4_ext_find_extent(inode, start, NULL, 0);
5057 if (IS_ERR(path))
5058 return PTR_ERR(path);
5059 depth = path->p_depth;
5060 extent = path[depth].p_ext;
5061 current_block = extent->ee_block;
5062 if (start > current_block) {
5063 /* Hole, move to the next extent */
5064 ret = mext_next_extent(inode, path, &extent);
5065 if (ret != 0) {
5066 ext4_ext_drop_refs(path);
5067 kfree(path);
5068 if (ret == 1)
5069 ret = 0;
5070 break;
5071 }
5072 }
5073 ret = ext4_ext_shift_path_extents(path, shift, inode,
5074 handle, &start);
5075 ext4_ext_drop_refs(path);
5076 kfree(path);
5077 if (ret)
5078 break;
5079 }
5080
5081 return ret;
5082}
5083
5084/*
5085 * ext4_collapse_range:
5086 * This implements the fallocate's collapse range functionality for ext4
5087 * Returns: 0 and non-zero on error.
5088 */
5089int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5090{
5091 struct super_block *sb = inode->i_sb;
5092 ext4_lblk_t punch_start, punch_stop;
5093 handle_t *handle;
5094 unsigned int credits;
5095 loff_t new_size;
5096 int ret;
5097
5098 BUG_ON(offset + len > i_size_read(inode));
5099
5100 /* Collapse range works only on fs block size aligned offsets. */
5101 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
5102 len & (EXT4_BLOCK_SIZE(sb) - 1))
5103 return -EINVAL;
5104
5105 if (!S_ISREG(inode->i_mode))
5106 return -EOPNOTSUPP;
5107
5108 trace_ext4_collapse_range(inode, offset, len);
5109
5110 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5111 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5112
5113 /* Write out all dirty pages */
5114 ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
5115 if (ret)
5116 return ret;
5117
5118 /* Take mutex lock */
5119 mutex_lock(&inode->i_mutex);
5120
5121 /* It's not possible punch hole on append only file */
5122 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
5123 ret = -EPERM;
5124 goto out_mutex;
5125 }
5126
5127 if (IS_SWAPFILE(inode)) {
5128 ret = -ETXTBSY;
5129 goto out_mutex;
5130 }
5131
5132 /* Currently just for extent based files */
5133 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5134 ret = -EOPNOTSUPP;
5135 goto out_mutex;
5136 }
5137
5138 truncate_pagecache_range(inode, offset, -1);
5139
5140 /* Wait for existing dio to complete */
5141 ext4_inode_block_unlocked_dio(inode);
5142 inode_dio_wait(inode);
5143
5144 credits = ext4_writepage_trans_blocks(inode);
5145 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5146 if (IS_ERR(handle)) {
5147 ret = PTR_ERR(handle);
5148 goto out_dio;
5149 }
5150
5151 down_write(&EXT4_I(inode)->i_data_sem);
5152 ext4_discard_preallocations(inode);
5153
5154 ret = ext4_es_remove_extent(inode, punch_start,
5155 EXT_MAX_BLOCKS - punch_start - 1);
5156 if (ret) {
5157 up_write(&EXT4_I(inode)->i_data_sem);
5158 goto out_stop;
5159 }
5160
5161 ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5162 if (ret) {
5163 up_write(&EXT4_I(inode)->i_data_sem);
5164 goto out_stop;
5165 }
5166
5167 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5168 punch_stop - punch_start);
5169 if (ret) {
5170 up_write(&EXT4_I(inode)->i_data_sem);
5171 goto out_stop;
5172 }
5173
5174 new_size = i_size_read(inode) - len;
5175 truncate_setsize(inode, new_size);
5176 EXT4_I(inode)->i_disksize = new_size;
5177
5178 ext4_discard_preallocations(inode);
5179 up_write(&EXT4_I(inode)->i_data_sem);
5180 if (IS_SYNC(inode))
5181 ext4_handle_sync(handle);
5182 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
5183 ext4_mark_inode_dirty(handle, inode);
5184
5185out_stop:
5186 ext4_journal_stop(handle);
5187out_dio:
5188 ext4_inode_resume_unlocked_dio(inode);
5189out_mutex:
5190 mutex_unlock(&inode->i_mutex);
5191 return ret;
5192}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index f39a88abe32c..58ee7dc87669 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
76 * ext4_ext_path structure refers to the last extent, or a negative error 76 * ext4_ext_path structure refers to the last extent, or a negative error
77 * value on failure. 77 * value on failure.
78 */ 78 */
79static int 79int
80mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 80mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
81 struct ext4_extent **extent) 81 struct ext4_extent **extent)
82{ 82{