aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/extents.c307
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--include/trace/events/ext4.h33
4 files changed, 342 insertions, 3 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b7207db3107c..beec42750a8c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2758,6 +2758,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2758extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2758extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2759 __u64 start, __u64 len); 2759 __u64 start, __u64 len);
2760extern int ext4_ext_precache(struct inode *inode); 2760extern int ext4_ext_precache(struct inode *inode);
2761extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2761 2762
2762/* move_extent.c */ 2763/* move_extent.c */
2763extern void ext4_double_down_write_data_sem(struct inode *first, 2764extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2767,6 +2768,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2767extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2768extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2768 __u64 start_orig, __u64 start_donor, 2769 __u64 start_orig, __u64 start_donor,
2769 __u64 len, __u64 *moved_len); 2770 __u64 len, __u64 *moved_len);
2771extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
2772 struct ext4_extent **extent);
2770 2773
2771/* page-io.c */ 2774/* page-io.c */
2772extern int __init ext4_init_pageio(void); 2775extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2e0608e3be6e..bbba1ef5417d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4581,12 +4581,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4581 unsigned int credits, blkbits = inode->i_blkbits; 4581 unsigned int credits, blkbits = inode->i_blkbits;
4582 4582
4583 /* Return error if mode is not supported */ 4583 /* Return error if mode is not supported */
4584 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 4584 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4585 FALLOC_FL_COLLAPSE_RANGE))
4585 return -EOPNOTSUPP; 4586 return -EOPNOTSUPP;
4586 4587
4587 if (mode & FALLOC_FL_PUNCH_HOLE) 4588 if (mode & FALLOC_FL_PUNCH_HOLE)
4588 return ext4_punch_hole(inode, offset, len); 4589 return ext4_punch_hole(inode, offset, len);
4589 4590
4591 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4592 return ext4_collapse_range(inode, offset, len);
4593
4590 ret = ext4_convert_inline_data(inode); 4594 ret = ext4_convert_inline_data(inode);
4591 if (ret) 4595 if (ret)
4592 return ret; 4596 return ret;
@@ -4885,3 +4889,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4885 ext4_es_lru_add(inode); 4889 ext4_es_lru_add(inode);
4886 return error; 4890 return error;
4887} 4891}
4892
4893/*
4894 * ext4_access_path:
4895 * Function to access the path buffer for marking it dirty.
4896 * It also checks if there are sufficient credits left in the journal handle
4897 * to update path.
4898 */
4899static int
4900ext4_access_path(handle_t *handle, struct inode *inode,
4901 struct ext4_ext_path *path)
4902{
4903 int credits, err;
4904
4905 if (!ext4_handle_valid(handle))
4906 return 0;
4907
4908 /*
4909 * Check if need to extend journal credits
4910 * 3 for leaf, sb, and inode plus 2 (bmap and group
4911 * descriptor) for each block group; assume two block
4912 * groups
4913 */
4914 if (handle->h_buffer_credits < 7) {
4915 credits = ext4_writepage_trans_blocks(inode);
4916 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
4917 /* EAGAIN is success */
4918 if (err && err != -EAGAIN)
4919 return err;
4920 }
4921
4922 err = ext4_ext_get_access(handle, inode, path);
4923 return err;
4924}
4925
4926/*
4927 * ext4_ext_shift_path_extents:
4928 * Shift the extents of a path structure lying between path[depth].p_ext
4929 * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
4930 * from starting block for each extent.
4931 */
4932static int
4933ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
4934 struct inode *inode, handle_t *handle,
4935 ext4_lblk_t *start)
4936{
4937 int depth, err = 0;
4938 struct ext4_extent *ex_start, *ex_last;
4939 bool update = 0;
4940 depth = path->p_depth;
4941
4942 while (depth >= 0) {
4943 if (depth == path->p_depth) {
4944 ex_start = path[depth].p_ext;
4945 if (!ex_start)
4946 return -EIO;
4947
4948 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
4949 if (!ex_last)
4950 return -EIO;
4951
4952 err = ext4_access_path(handle, inode, path + depth);
4953 if (err)
4954 goto out;
4955
4956 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
4957 update = 1;
4958
4959 *start = ex_last->ee_block +
4960 ext4_ext_get_actual_len(ex_last);
4961
4962 while (ex_start <= ex_last) {
4963 ex_start->ee_block -= shift;
4964 if (ex_start >
4965 EXT_FIRST_EXTENT(path[depth].p_hdr)) {
4966 if (ext4_ext_try_to_merge_right(inode,
4967 path, ex_start - 1))
4968 ex_last--;
4969 }
4970 ex_start++;
4971 }
4972 err = ext4_ext_dirty(handle, inode, path + depth);
4973 if (err)
4974 goto out;
4975
4976 if (--depth < 0 || !update)
4977 break;
4978 }
4979
4980 /* Update index too */
4981 err = ext4_access_path(handle, inode, path + depth);
4982 if (err)
4983 goto out;
4984
4985 path[depth].p_idx->ei_block -= shift;
4986 err = ext4_ext_dirty(handle, inode, path + depth);
4987 if (err)
4988 goto out;
4989
4990 /* we are done if current index is not a starting index */
4991 if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
4992 break;
4993
4994 depth--;
4995 }
4996
4997out:
4998 return err;
4999}
5000
5001/*
5002 * ext4_ext_shift_extents:
5003 * All the extents which lies in the range from start to the last allocated
5004 * block for the file are shifted downwards by shift blocks.
5005 * On success, 0 is returned, error otherwise.
5006 */
5007static int
5008ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5009 ext4_lblk_t start, ext4_lblk_t shift)
5010{
5011 struct ext4_ext_path *path;
5012 int ret = 0, depth;
5013 struct ext4_extent *extent;
5014 ext4_lblk_t stop_block, current_block;
5015 ext4_lblk_t ex_start, ex_end;
5016
5017 /* Let path point to the last extent */
5018 path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
5019 if (IS_ERR(path))
5020 return PTR_ERR(path);
5021
5022 depth = path->p_depth;
5023 extent = path[depth].p_ext;
5024 if (!extent) {
5025 ext4_ext_drop_refs(path);
5026 kfree(path);
5027 return ret;
5028 }
5029
5030 stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
5031 ext4_ext_drop_refs(path);
5032 kfree(path);
5033
5034 /* Nothing to shift, if hole is at the end of file */
5035 if (start >= stop_block)
5036 return ret;
5037
5038 /*
5039 * Don't start shifting extents until we make sure the hole is big
5040 * enough to accomodate the shift.
5041 */
5042 path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
5043 depth = path->p_depth;
5044 extent = path[depth].p_ext;
5045 ex_start = extent->ee_block;
5046 ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
5047 ext4_ext_drop_refs(path);
5048 kfree(path);
5049
5050 if ((start == ex_start && shift > ex_start) ||
5051 (shift > start - ex_end))
5052 return -EINVAL;
5053
5054 /* Its safe to start updating extents */
5055 while (start < stop_block) {
5056 path = ext4_ext_find_extent(inode, start, NULL, 0);
5057 if (IS_ERR(path))
5058 return PTR_ERR(path);
5059 depth = path->p_depth;
5060 extent = path[depth].p_ext;
5061 current_block = extent->ee_block;
5062 if (start > current_block) {
5063 /* Hole, move to the next extent */
5064 ret = mext_next_extent(inode, path, &extent);
5065 if (ret != 0) {
5066 ext4_ext_drop_refs(path);
5067 kfree(path);
5068 if (ret == 1)
5069 ret = 0;
5070 break;
5071 }
5072 }
5073 ret = ext4_ext_shift_path_extents(path, shift, inode,
5074 handle, &start);
5075 ext4_ext_drop_refs(path);
5076 kfree(path);
5077 if (ret)
5078 break;
5079 }
5080
5081 return ret;
5082}
5083
5084/*
5085 * ext4_collapse_range:
5086 * This implements the fallocate's collapse range functionality for ext4
5087 * Returns: 0 and non-zero on error.
5088 */
5089int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5090{
5091 struct super_block *sb = inode->i_sb;
5092 ext4_lblk_t punch_start, punch_stop;
5093 handle_t *handle;
5094 unsigned int credits;
5095 loff_t new_size;
5096 int ret;
5097
5098 BUG_ON(offset + len > i_size_read(inode));
5099
5100 /* Collapse range works only on fs block size aligned offsets. */
5101 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
5102 len & (EXT4_BLOCK_SIZE(sb) - 1))
5103 return -EINVAL;
5104
5105 if (!S_ISREG(inode->i_mode))
5106 return -EOPNOTSUPP;
5107
5108 trace_ext4_collapse_range(inode, offset, len);
5109
5110 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5111 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5112
5113 /* Write out all dirty pages */
5114 ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
5115 if (ret)
5116 return ret;
5117
5118 /* Take mutex lock */
5119 mutex_lock(&inode->i_mutex);
5120
5121 /* It's not possible punch hole on append only file */
5122 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
5123 ret = -EPERM;
5124 goto out_mutex;
5125 }
5126
5127 if (IS_SWAPFILE(inode)) {
5128 ret = -ETXTBSY;
5129 goto out_mutex;
5130 }
5131
5132 /* Currently just for extent based files */
5133 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5134 ret = -EOPNOTSUPP;
5135 goto out_mutex;
5136 }
5137
5138 truncate_pagecache_range(inode, offset, -1);
5139
5140 /* Wait for existing dio to complete */
5141 ext4_inode_block_unlocked_dio(inode);
5142 inode_dio_wait(inode);
5143
5144 credits = ext4_writepage_trans_blocks(inode);
5145 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5146 if (IS_ERR(handle)) {
5147 ret = PTR_ERR(handle);
5148 goto out_dio;
5149 }
5150
5151 down_write(&EXT4_I(inode)->i_data_sem);
5152 ext4_discard_preallocations(inode);
5153
5154 ret = ext4_es_remove_extent(inode, punch_start,
5155 EXT_MAX_BLOCKS - punch_start - 1);
5156 if (ret) {
5157 up_write(&EXT4_I(inode)->i_data_sem);
5158 goto out_stop;
5159 }
5160
5161 ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5162 if (ret) {
5163 up_write(&EXT4_I(inode)->i_data_sem);
5164 goto out_stop;
5165 }
5166
5167 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5168 punch_stop - punch_start);
5169 if (ret) {
5170 up_write(&EXT4_I(inode)->i_data_sem);
5171 goto out_stop;
5172 }
5173
5174 new_size = i_size_read(inode) - len;
5175 truncate_setsize(inode, new_size);
5176 EXT4_I(inode)->i_disksize = new_size;
5177
5178 ext4_discard_preallocations(inode);
5179 up_write(&EXT4_I(inode)->i_data_sem);
5180 if (IS_SYNC(inode))
5181 ext4_handle_sync(handle);
5182 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
5183 ext4_mark_inode_dirty(handle, inode);
5184
5185out_stop:
5186 ext4_journal_stop(handle);
5187out_dio:
5188 ext4_inode_resume_unlocked_dio(inode);
5189out_mutex:
5190 mutex_unlock(&inode->i_mutex);
5191 return ret;
5192}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index f39a88abe32c..58ee7dc87669 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
76 * ext4_ext_path structure refers to the last extent, or a negative error 76 * ext4_ext_path structure refers to the last extent, or a negative error
77 * value on failure. 77 * value on failure.
78 */ 78 */
79static int 79int
80mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 80mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
81 struct ext4_extent **extent) 81 struct ext4_extent **extent)
82{ 82{
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 451e0202aa69..e9d7ee77d3a1 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -16,6 +16,11 @@ struct mpage_da_data;
16struct ext4_map_blocks; 16struct ext4_map_blocks;
17struct extent_status; 17struct extent_status;
18 18
19/* shim until we merge in the xfs_collapse_range branch */
20#ifndef FALLOC_FL_COLLAPSE_RANGE
21#define FALLOC_FL_COLLAPSE_RANGE 0x08
22#endif
23
19#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) 24#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
20 25
21#define show_mballoc_flags(flags) __print_flags(flags, "|", \ 26#define show_mballoc_flags(flags) __print_flags(flags, "|", \
@@ -71,7 +76,8 @@ struct extent_status;
71#define show_falloc_mode(mode) __print_flags(mode, "|", \ 76#define show_falloc_mode(mode) __print_flags(mode, "|", \
72 { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ 77 { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
73 { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ 78 { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
74 { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}) 79 { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \
80 { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"})
75 81
76 82
77TRACE_EVENT(ext4_free_inode, 83TRACE_EVENT(ext4_free_inode,
@@ -2415,6 +2421,31 @@ TRACE_EVENT(ext4_es_shrink_exit,
2415 __entry->shrunk_nr, __entry->cache_cnt) 2421 __entry->shrunk_nr, __entry->cache_cnt)
2416); 2422);
2417 2423
2424TRACE_EVENT(ext4_collapse_range,
2425 TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
2426
2427 TP_ARGS(inode, offset, len),
2428
2429 TP_STRUCT__entry(
2430 __field(dev_t, dev)
2431 __field(ino_t, ino)
2432 __field(loff_t, offset)
2433 __field(loff_t, len)
2434 ),
2435
2436 TP_fast_assign(
2437 __entry->dev = inode->i_sb->s_dev;
2438 __entry->ino = inode->i_ino;
2439 __entry->offset = offset;
2440 __entry->len = len;
2441 ),
2442
2443 TP_printk("dev %d,%d ino %lu offset %lld len %lld",
2444 MAJOR(__entry->dev), MINOR(__entry->dev),
2445 (unsigned long) __entry->ino,
2446 __entry->offset, __entry->len)
2447);
2448
2418#endif /* _TRACE_EXT4_H */ 2449#endif /* _TRACE_EXT4_H */
2419 2450
2420/* This part must be outside protection */ 2451/* This part must be outside protection */