diff options
-rw-r--r-- | fs/ext4/ext4.h | 3 | ||||
-rw-r--r-- | fs/ext4/extents.c | 307 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 2 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 33 |
4 files changed, 342 insertions, 3 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b7207db3107c..beec42750a8c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -2758,6 +2758,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); | |||
2758 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2758 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2759 | __u64 start, __u64 len); | 2759 | __u64 start, __u64 len); |
2760 | extern int ext4_ext_precache(struct inode *inode); | 2760 | extern int ext4_ext_precache(struct inode *inode); |
2761 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); | ||
2761 | 2762 | ||
2762 | /* move_extent.c */ | 2763 | /* move_extent.c */ |
2763 | extern void ext4_double_down_write_data_sem(struct inode *first, | 2764 | extern void ext4_double_down_write_data_sem(struct inode *first, |
@@ -2767,6 +2768,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode, | |||
2767 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | 2768 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, |
2768 | __u64 start_orig, __u64 start_donor, | 2769 | __u64 start_orig, __u64 start_donor, |
2769 | __u64 len, __u64 *moved_len); | 2770 | __u64 len, __u64 *moved_len); |
2771 | extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
2772 | struct ext4_extent **extent); | ||
2770 | 2773 | ||
2771 | /* page-io.c */ | 2774 | /* page-io.c */ |
2772 | extern int __init ext4_init_pageio(void); | 2775 | extern int __init ext4_init_pageio(void); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2e0608e3be6e..bbba1ef5417d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -4581,12 +4581,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4581 | unsigned int credits, blkbits = inode->i_blkbits; | 4581 | unsigned int credits, blkbits = inode->i_blkbits; |
4582 | 4582 | ||
4583 | /* Return error if mode is not supported */ | 4583 | /* Return error if mode is not supported */ |
4584 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 4584 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
4585 | FALLOC_FL_COLLAPSE_RANGE)) | ||
4585 | return -EOPNOTSUPP; | 4586 | return -EOPNOTSUPP; |
4586 | 4587 | ||
4587 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4588 | if (mode & FALLOC_FL_PUNCH_HOLE) |
4588 | return ext4_punch_hole(inode, offset, len); | 4589 | return ext4_punch_hole(inode, offset, len); |
4589 | 4590 | ||
4591 | if (mode & FALLOC_FL_COLLAPSE_RANGE) | ||
4592 | return ext4_collapse_range(inode, offset, len); | ||
4593 | |||
4590 | ret = ext4_convert_inline_data(inode); | 4594 | ret = ext4_convert_inline_data(inode); |
4591 | if (ret) | 4595 | if (ret) |
4592 | return ret; | 4596 | return ret; |
@@ -4885,3 +4889,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4885 | ext4_es_lru_add(inode); | 4889 | ext4_es_lru_add(inode); |
4886 | return error; | 4890 | return error; |
4887 | } | 4891 | } |
4892 | |||
4893 | /* | ||
4894 | * ext4_access_path: | ||
4895 | * Function to access the path buffer for marking it dirty. | ||
4896 | * It also checks if there are sufficient credits left in the journal handle | ||
4897 | * to update path. | ||
4898 | */ | ||
4899 | static int | ||
4900 | ext4_access_path(handle_t *handle, struct inode *inode, | ||
4901 | struct ext4_ext_path *path) | ||
4902 | { | ||
4903 | int credits, err; | ||
4904 | |||
4905 | if (!ext4_handle_valid(handle)) | ||
4906 | return 0; | ||
4907 | |||
4908 | /* | ||
4909 | * Check if need to extend journal credits | ||
4910 | * 3 for leaf, sb, and inode plus 2 (bmap and group | ||
4911 | * descriptor) for each block group; assume two block | ||
4912 | * groups | ||
4913 | */ | ||
4914 | if (handle->h_buffer_credits < 7) { | ||
4915 | credits = ext4_writepage_trans_blocks(inode); | ||
4916 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); | ||
4917 | /* EAGAIN is success */ | ||
4918 | if (err && err != -EAGAIN) | ||
4919 | return err; | ||
4920 | } | ||
4921 | |||
4922 | err = ext4_ext_get_access(handle, inode, path); | ||
4923 | return err; | ||
4924 | } | ||
4925 | |||
4926 | /* | ||
4927 | * ext4_ext_shift_path_extents: | ||
4928 | * Shift the extents of a path structure lying between path[depth].p_ext | ||
4929 | * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift | ||
4930 | * from starting block for each extent. | ||
4931 | */ | ||
4932 | static int | ||
4933 | ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, | ||
4934 | struct inode *inode, handle_t *handle, | ||
4935 | ext4_lblk_t *start) | ||
4936 | { | ||
4937 | int depth, err = 0; | ||
4938 | struct ext4_extent *ex_start, *ex_last; | ||
4939 | bool update = 0; | ||
4940 | depth = path->p_depth; | ||
4941 | |||
4942 | while (depth >= 0) { | ||
4943 | if (depth == path->p_depth) { | ||
4944 | ex_start = path[depth].p_ext; | ||
4945 | if (!ex_start) | ||
4946 | return -EIO; | ||
4947 | |||
4948 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); | ||
4949 | if (!ex_last) | ||
4950 | return -EIO; | ||
4951 | |||
4952 | err = ext4_access_path(handle, inode, path + depth); | ||
4953 | if (err) | ||
4954 | goto out; | ||
4955 | |||
4956 | if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) | ||
4957 | update = 1; | ||
4958 | |||
4959 | *start = ex_last->ee_block + | ||
4960 | ext4_ext_get_actual_len(ex_last); | ||
4961 | |||
4962 | while (ex_start <= ex_last) { | ||
4963 | ex_start->ee_block -= shift; | ||
4964 | if (ex_start > | ||
4965 | EXT_FIRST_EXTENT(path[depth].p_hdr)) { | ||
4966 | if (ext4_ext_try_to_merge_right(inode, | ||
4967 | path, ex_start - 1)) | ||
4968 | ex_last--; | ||
4969 | } | ||
4970 | ex_start++; | ||
4971 | } | ||
4972 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
4973 | if (err) | ||
4974 | goto out; | ||
4975 | |||
4976 | if (--depth < 0 || !update) | ||
4977 | break; | ||
4978 | } | ||
4979 | |||
4980 | /* Update index too */ | ||
4981 | err = ext4_access_path(handle, inode, path + depth); | ||
4982 | if (err) | ||
4983 | goto out; | ||
4984 | |||
4985 | path[depth].p_idx->ei_block -= shift; | ||
4986 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
4987 | if (err) | ||
4988 | goto out; | ||
4989 | |||
4990 | /* we are done if current index is not a starting index */ | ||
4991 | if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) | ||
4992 | break; | ||
4993 | |||
4994 | depth--; | ||
4995 | } | ||
4996 | |||
4997 | out: | ||
4998 | return err; | ||
4999 | } | ||
5000 | |||
5001 | /* | ||
5002 | * ext4_ext_shift_extents: | ||
5003 | * All the extents which lies in the range from start to the last allocated | ||
5004 | * block for the file are shifted downwards by shift blocks. | ||
5005 | * On success, 0 is returned, error otherwise. | ||
5006 | */ | ||
5007 | static int | ||
5008 | ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | ||
5009 | ext4_lblk_t start, ext4_lblk_t shift) | ||
5010 | { | ||
5011 | struct ext4_ext_path *path; | ||
5012 | int ret = 0, depth; | ||
5013 | struct ext4_extent *extent; | ||
5014 | ext4_lblk_t stop_block, current_block; | ||
5015 | ext4_lblk_t ex_start, ex_end; | ||
5016 | |||
5017 | /* Let path point to the last extent */ | ||
5018 | path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); | ||
5019 | if (IS_ERR(path)) | ||
5020 | return PTR_ERR(path); | ||
5021 | |||
5022 | depth = path->p_depth; | ||
5023 | extent = path[depth].p_ext; | ||
5024 | if (!extent) { | ||
5025 | ext4_ext_drop_refs(path); | ||
5026 | kfree(path); | ||
5027 | return ret; | ||
5028 | } | ||
5029 | |||
5030 | stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
5031 | ext4_ext_drop_refs(path); | ||
5032 | kfree(path); | ||
5033 | |||
5034 | /* Nothing to shift, if hole is at the end of file */ | ||
5035 | if (start >= stop_block) | ||
5036 | return ret; | ||
5037 | |||
5038 | /* | ||
5039 | * Don't start shifting extents until we make sure the hole is big | ||
5040 | * enough to accomodate the shift. | ||
5041 | */ | ||
5042 | path = ext4_ext_find_extent(inode, start - 1, NULL, 0); | ||
5043 | depth = path->p_depth; | ||
5044 | extent = path[depth].p_ext; | ||
5045 | ex_start = extent->ee_block; | ||
5046 | ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
5047 | ext4_ext_drop_refs(path); | ||
5048 | kfree(path); | ||
5049 | |||
5050 | if ((start == ex_start && shift > ex_start) || | ||
5051 | (shift > start - ex_end)) | ||
5052 | return -EINVAL; | ||
5053 | |||
5054 | /* Its safe to start updating extents */ | ||
5055 | while (start < stop_block) { | ||
5056 | path = ext4_ext_find_extent(inode, start, NULL, 0); | ||
5057 | if (IS_ERR(path)) | ||
5058 | return PTR_ERR(path); | ||
5059 | depth = path->p_depth; | ||
5060 | extent = path[depth].p_ext; | ||
5061 | current_block = extent->ee_block; | ||
5062 | if (start > current_block) { | ||
5063 | /* Hole, move to the next extent */ | ||
5064 | ret = mext_next_extent(inode, path, &extent); | ||
5065 | if (ret != 0) { | ||
5066 | ext4_ext_drop_refs(path); | ||
5067 | kfree(path); | ||
5068 | if (ret == 1) | ||
5069 | ret = 0; | ||
5070 | break; | ||
5071 | } | ||
5072 | } | ||
5073 | ret = ext4_ext_shift_path_extents(path, shift, inode, | ||
5074 | handle, &start); | ||
5075 | ext4_ext_drop_refs(path); | ||
5076 | kfree(path); | ||
5077 | if (ret) | ||
5078 | break; | ||
5079 | } | ||
5080 | |||
5081 | return ret; | ||
5082 | } | ||
5083 | |||
5084 | /* | ||
5085 | * ext4_collapse_range: | ||
5086 | * This implements the fallocate's collapse range functionality for ext4 | ||
5087 | * Returns: 0 and non-zero on error. | ||
5088 | */ | ||
5089 | int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | ||
5090 | { | ||
5091 | struct super_block *sb = inode->i_sb; | ||
5092 | ext4_lblk_t punch_start, punch_stop; | ||
5093 | handle_t *handle; | ||
5094 | unsigned int credits; | ||
5095 | loff_t new_size; | ||
5096 | int ret; | ||
5097 | |||
5098 | BUG_ON(offset + len > i_size_read(inode)); | ||
5099 | |||
5100 | /* Collapse range works only on fs block size aligned offsets. */ | ||
5101 | if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || | ||
5102 | len & (EXT4_BLOCK_SIZE(sb) - 1)) | ||
5103 | return -EINVAL; | ||
5104 | |||
5105 | if (!S_ISREG(inode->i_mode)) | ||
5106 | return -EOPNOTSUPP; | ||
5107 | |||
5108 | trace_ext4_collapse_range(inode, offset, len); | ||
5109 | |||
5110 | punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); | ||
5111 | punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
5112 | |||
5113 | /* Write out all dirty pages */ | ||
5114 | ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); | ||
5115 | if (ret) | ||
5116 | return ret; | ||
5117 | |||
5118 | /* Take mutex lock */ | ||
5119 | mutex_lock(&inode->i_mutex); | ||
5120 | |||
5121 | /* It's not possible punch hole on append only file */ | ||
5122 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
5123 | ret = -EPERM; | ||
5124 | goto out_mutex; | ||
5125 | } | ||
5126 | |||
5127 | if (IS_SWAPFILE(inode)) { | ||
5128 | ret = -ETXTBSY; | ||
5129 | goto out_mutex; | ||
5130 | } | ||
5131 | |||
5132 | /* Currently just for extent based files */ | ||
5133 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
5134 | ret = -EOPNOTSUPP; | ||
5135 | goto out_mutex; | ||
5136 | } | ||
5137 | |||
5138 | truncate_pagecache_range(inode, offset, -1); | ||
5139 | |||
5140 | /* Wait for existing dio to complete */ | ||
5141 | ext4_inode_block_unlocked_dio(inode); | ||
5142 | inode_dio_wait(inode); | ||
5143 | |||
5144 | credits = ext4_writepage_trans_blocks(inode); | ||
5145 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | ||
5146 | if (IS_ERR(handle)) { | ||
5147 | ret = PTR_ERR(handle); | ||
5148 | goto out_dio; | ||
5149 | } | ||
5150 | |||
5151 | down_write(&EXT4_I(inode)->i_data_sem); | ||
5152 | ext4_discard_preallocations(inode); | ||
5153 | |||
5154 | ret = ext4_es_remove_extent(inode, punch_start, | ||
5155 | EXT_MAX_BLOCKS - punch_start - 1); | ||
5156 | if (ret) { | ||
5157 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5158 | goto out_stop; | ||
5159 | } | ||
5160 | |||
5161 | ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); | ||
5162 | if (ret) { | ||
5163 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5164 | goto out_stop; | ||
5165 | } | ||
5166 | |||
5167 | ret = ext4_ext_shift_extents(inode, handle, punch_stop, | ||
5168 | punch_stop - punch_start); | ||
5169 | if (ret) { | ||
5170 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5171 | goto out_stop; | ||
5172 | } | ||
5173 | |||
5174 | new_size = i_size_read(inode) - len; | ||
5175 | truncate_setsize(inode, new_size); | ||
5176 | EXT4_I(inode)->i_disksize = new_size; | ||
5177 | |||
5178 | ext4_discard_preallocations(inode); | ||
5179 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5180 | if (IS_SYNC(inode)) | ||
5181 | ext4_handle_sync(handle); | ||
5182 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
5183 | ext4_mark_inode_dirty(handle, inode); | ||
5184 | |||
5185 | out_stop: | ||
5186 | ext4_journal_stop(handle); | ||
5187 | out_dio: | ||
5188 | ext4_inode_resume_unlocked_dio(inode); | ||
5189 | out_mutex: | ||
5190 | mutex_unlock(&inode->i_mutex); | ||
5191 | return ret; | ||
5192 | } | ||
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index f39a88abe32c..58ee7dc87669 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) | |||
76 | * ext4_ext_path structure refers to the last extent, or a negative error | 76 | * ext4_ext_path structure refers to the last extent, or a negative error |
77 | * value on failure. | 77 | * value on failure. |
78 | */ | 78 | */ |
79 | static int | 79 | int |
80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | 80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, |
81 | struct ext4_extent **extent) | 81 | struct ext4_extent **extent) |
82 | { | 82 | { |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 451e0202aa69..e9d7ee77d3a1 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -16,6 +16,11 @@ struct mpage_da_data; | |||
16 | struct ext4_map_blocks; | 16 | struct ext4_map_blocks; |
17 | struct extent_status; | 17 | struct extent_status; |
18 | 18 | ||
19 | /* shim until we merge in the xfs_collapse_range branch */ | ||
20 | #ifndef FALLOC_FL_COLLAPSE_RANGE | ||
21 | #define FALLOC_FL_COLLAPSE_RANGE 0x08 | ||
22 | #endif | ||
23 | |||
19 | #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) | 24 | #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) |
20 | 25 | ||
21 | #define show_mballoc_flags(flags) __print_flags(flags, "|", \ | 26 | #define show_mballoc_flags(flags) __print_flags(flags, "|", \ |
@@ -71,7 +76,8 @@ struct extent_status; | |||
71 | #define show_falloc_mode(mode) __print_flags(mode, "|", \ | 76 | #define show_falloc_mode(mode) __print_flags(mode, "|", \ |
72 | { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ | 77 | { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ |
73 | { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ | 78 | { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ |
74 | { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}) | 79 | { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ |
80 | { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}) | ||
75 | 81 | ||
76 | 82 | ||
77 | TRACE_EVENT(ext4_free_inode, | 83 | TRACE_EVENT(ext4_free_inode, |
@@ -2415,6 +2421,31 @@ TRACE_EVENT(ext4_es_shrink_exit, | |||
2415 | __entry->shrunk_nr, __entry->cache_cnt) | 2421 | __entry->shrunk_nr, __entry->cache_cnt) |
2416 | ); | 2422 | ); |
2417 | 2423 | ||
2424 | TRACE_EVENT(ext4_collapse_range, | ||
2425 | TP_PROTO(struct inode *inode, loff_t offset, loff_t len), | ||
2426 | |||
2427 | TP_ARGS(inode, offset, len), | ||
2428 | |||
2429 | TP_STRUCT__entry( | ||
2430 | __field(dev_t, dev) | ||
2431 | __field(ino_t, ino) | ||
2432 | __field(loff_t, offset) | ||
2433 | __field(loff_t, len) | ||
2434 | ), | ||
2435 | |||
2436 | TP_fast_assign( | ||
2437 | __entry->dev = inode->i_sb->s_dev; | ||
2438 | __entry->ino = inode->i_ino; | ||
2439 | __entry->offset = offset; | ||
2440 | __entry->len = len; | ||
2441 | ), | ||
2442 | |||
2443 | TP_printk("dev %d,%d ino %lu offset %lld len %lld", | ||
2444 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
2445 | (unsigned long) __entry->ino, | ||
2446 | __entry->offset, __entry->len) | ||
2447 | ); | ||
2448 | |||
2418 | #endif /* _TRACE_EXT4_H */ | 2449 | #endif /* _TRACE_EXT4_H */ |
2419 | 2450 | ||
2420 | /* This part must be outside protection */ | 2451 | /* This part must be outside protection */ |