aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
authorNamjae Jeon <namjae.jeon@samsung.com>2015-06-09 01:55:03 -0400
committerTheodore Ts'o <tytso@mit.edu>2015-06-09 01:55:03 -0400
commit331573febb6a224bc50322e3670da326cb7f4cfc (patch)
treea302585d04479bea70f2c60371ff40d57db0f699 /fs/ext4/extents.c
parentde92c8caf16ca84926fa31b7a5590c0fb9c0d5ca (diff)
ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate
This patch implements fallocate's FALLOC_FL_INSERT_RANGE for Ext4. 1) Make sure that both offset and len are block size aligned. 2) Update the i_size of inode by len bytes. 3) Compute the file's logical block number against offset. If the computed block number is not the starting block of the extent, split the extent such that the block number is the starting block of the extent. 4) Shift all the extents which are lying between [offset, last allocated extent] towards right by len bytes. This step will make a hole of len bytes at offset. Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com> Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c313
1 files changed, 261 insertions, 52 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f38a6d6c4a93..08f5afc95c88 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4912,12 +4912,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4912 * bug we should fix.... 4912 * bug we should fix....
4913 */ 4913 */
4914 if (ext4_encrypted_inode(inode) && 4914 if (ext4_encrypted_inode(inode) &&
4915 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))) 4915 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
4916 FALLOC_FL_ZERO_RANGE)))
4916 return -EOPNOTSUPP; 4917 return -EOPNOTSUPP;
4917 4918
4918 /* Return error if mode is not supported */ 4919 /* Return error if mode is not supported */
4919 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 4920 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4920 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 4921 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4922 FALLOC_FL_INSERT_RANGE))
4921 return -EOPNOTSUPP; 4923 return -EOPNOTSUPP;
4922 4924
4923 if (mode & FALLOC_FL_PUNCH_HOLE) 4925 if (mode & FALLOC_FL_PUNCH_HOLE)
@@ -4930,6 +4932,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4930 if (mode & FALLOC_FL_COLLAPSE_RANGE) 4932 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4931 return ext4_collapse_range(inode, offset, len); 4933 return ext4_collapse_range(inode, offset, len);
4932 4934
4935 if (mode & FALLOC_FL_INSERT_RANGE)
4936 return ext4_insert_range(inode, offset, len);
4937
4933 if (mode & FALLOC_FL_ZERO_RANGE) 4938 if (mode & FALLOC_FL_ZERO_RANGE)
4934 return ext4_zero_range(file, offset, len, mode); 4939 return ext4_zero_range(file, offset, len, mode);
4935 4940
@@ -5224,13 +5229,13 @@ ext4_access_path(handle_t *handle, struct inode *inode,
5224/* 5229/*
5225 * ext4_ext_shift_path_extents: 5230 * ext4_ext_shift_path_extents:
5226 * Shift the extents of a path structure lying between path[depth].p_ext 5231 * Shift the extents of a path structure lying between path[depth].p_ext
5227 * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift 5232 * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
5228 * from starting block for each extent. 5233 * if it is right shift or left shift operation.
5229 */ 5234 */
5230static int 5235static int
5231ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, 5236ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5232 struct inode *inode, handle_t *handle, 5237 struct inode *inode, handle_t *handle,
5233 ext4_lblk_t *start) 5238 enum SHIFT_DIRECTION SHIFT)
5234{ 5239{
5235 int depth, err = 0; 5240 int depth, err = 0;
5236 struct ext4_extent *ex_start, *ex_last; 5241 struct ext4_extent *ex_start, *ex_last;
@@ -5252,19 +5257,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5252 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) 5257 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5253 update = 1; 5258 update = 1;
5254 5259
5255 *start = le32_to_cpu(ex_last->ee_block) +
5256 ext4_ext_get_actual_len(ex_last);
5257
5258 while (ex_start <= ex_last) { 5260 while (ex_start <= ex_last) {
5259 le32_add_cpu(&ex_start->ee_block, -shift); 5261 if (SHIFT == SHIFT_LEFT) {
5260 /* Try to merge to the left. */ 5262 le32_add_cpu(&ex_start->ee_block,
5261 if ((ex_start > 5263 -shift);
5262 EXT_FIRST_EXTENT(path[depth].p_hdr)) && 5264 /* Try to merge to the left. */
5263 ext4_ext_try_to_merge_right(inode, 5265 if ((ex_start >
5264 path, ex_start - 1)) 5266 EXT_FIRST_EXTENT(path[depth].p_hdr))
5267 &&
5268 ext4_ext_try_to_merge_right(inode,
5269 path, ex_start - 1))
5270 ex_last--;
5271 else
5272 ex_start++;
5273 } else {
5274 le32_add_cpu(&ex_last->ee_block, shift);
5275 ext4_ext_try_to_merge_right(inode, path,
5276 ex_last);
5265 ex_last--; 5277 ex_last--;
5266 else 5278 }
5267 ex_start++;
5268 } 5279 }
5269 err = ext4_ext_dirty(handle, inode, path + depth); 5280 err = ext4_ext_dirty(handle, inode, path + depth);
5270 if (err) 5281 if (err)
@@ -5279,7 +5290,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5279 if (err) 5290 if (err)
5280 goto out; 5291 goto out;
5281 5292
5282 le32_add_cpu(&path[depth].p_idx->ei_block, -shift); 5293 if (SHIFT == SHIFT_LEFT)
5294 le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5295 else
5296 le32_add_cpu(&path[depth].p_idx->ei_block, shift);
5283 err = ext4_ext_dirty(handle, inode, path + depth); 5297 err = ext4_ext_dirty(handle, inode, path + depth);
5284 if (err) 5298 if (err)
5285 goto out; 5299 goto out;
@@ -5297,19 +5311,20 @@ out:
5297 5311
5298/* 5312/*
5299 * ext4_ext_shift_extents: 5313 * ext4_ext_shift_extents:
5300 * All the extents which lies in the range from start to the last allocated 5314 * All the extents which lies in the range from @start to the last allocated
5301 * block for the file are shifted downwards by shift blocks. 5315 * block for the @inode are shifted either towards left or right (depending
5316 * upon @SHIFT) by @shift blocks.
5302 * On success, 0 is returned, error otherwise. 5317 * On success, 0 is returned, error otherwise.
5303 */ 5318 */
5304static int 5319static int
5305ext4_ext_shift_extents(struct inode *inode, handle_t *handle, 5320ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5306 ext4_lblk_t start, ext4_lblk_t shift) 5321 ext4_lblk_t start, ext4_lblk_t shift,
5322 enum SHIFT_DIRECTION SHIFT)
5307{ 5323{
5308 struct ext4_ext_path *path; 5324 struct ext4_ext_path *path;
5309 int ret = 0, depth; 5325 int ret = 0, depth;
5310 struct ext4_extent *extent; 5326 struct ext4_extent *extent;
5311 ext4_lblk_t stop_block; 5327 ext4_lblk_t stop, *iterator, ex_start, ex_end;
5312 ext4_lblk_t ex_start, ex_end;
5313 5328
5314 /* Let path point to the last extent */ 5329 /* Let path point to the last extent */
5315 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 5330 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
@@ -5321,58 +5336,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5321 if (!extent) 5336 if (!extent)
5322 goto out; 5337 goto out;
5323 5338
5324 stop_block = le32_to_cpu(extent->ee_block) + 5339 stop = le32_to_cpu(extent->ee_block) +
5325 ext4_ext_get_actual_len(extent); 5340 ext4_ext_get_actual_len(extent);
5326 5341
5327 /* Nothing to shift, if hole is at the end of file */ 5342 /*
5328 if (start >= stop_block) 5343 * In case of left shift, Don't start shifting extents until we make
5329 goto out; 5344 * sure the hole is big enough to accommodate the shift.
5345 */
5346 if (SHIFT == SHIFT_LEFT) {
5347 path = ext4_find_extent(inode, start - 1, &path, 0);
5348 if (IS_ERR(path))
5349 return PTR_ERR(path);
5350 depth = path->p_depth;
5351 extent = path[depth].p_ext;
5352 if (extent) {
5353 ex_start = le32_to_cpu(extent->ee_block);
5354 ex_end = le32_to_cpu(extent->ee_block) +
5355 ext4_ext_get_actual_len(extent);
5356 } else {
5357 ex_start = 0;
5358 ex_end = 0;
5359 }
5330 5360
5331 /* 5361 if ((start == ex_start && shift > ex_start) ||
5332 * Don't start shifting extents until we make sure the hole is big 5362 (shift > start - ex_end)) {
5333 * enough to accomodate the shift. 5363 ext4_ext_drop_refs(path);
5334 */ 5364 kfree(path);
5335 path = ext4_find_extent(inode, start - 1, &path, 0); 5365 return -EINVAL;
5336 if (IS_ERR(path)) 5366 }
5337 return PTR_ERR(path);
5338 depth = path->p_depth;
5339 extent = path[depth].p_ext;
5340 if (extent) {
5341 ex_start = le32_to_cpu(extent->ee_block);
5342 ex_end = le32_to_cpu(extent->ee_block) +
5343 ext4_ext_get_actual_len(extent);
5344 } else {
5345 ex_start = 0;
5346 ex_end = 0;
5347 } 5367 }
5348 5368
5349 if ((start == ex_start && shift > ex_start) || 5369 /*
5350 (shift > start - ex_end)) 5370 * In case of left shift, iterator points to start and it is increased
5351 return -EINVAL; 5371 * till we reach stop. In case of right shift, iterator points to stop
5372 * and it is decreased till we reach start.
5373 */
5374 if (SHIFT == SHIFT_LEFT)
5375 iterator = &start;
5376 else
5377 iterator = &stop;
5352 5378
5353 /* Its safe to start updating extents */ 5379 /* Its safe to start updating extents */
5354 while (start < stop_block) { 5380 while (start < stop) {
5355 path = ext4_find_extent(inode, start, &path, 0); 5381 path = ext4_find_extent(inode, *iterator, &path, 0);
5356 if (IS_ERR(path)) 5382 if (IS_ERR(path))
5357 return PTR_ERR(path); 5383 return PTR_ERR(path);
5358 depth = path->p_depth; 5384 depth = path->p_depth;
5359 extent = path[depth].p_ext; 5385 extent = path[depth].p_ext;
5360 if (!extent) { 5386 if (!extent) {
5361 EXT4_ERROR_INODE(inode, "unexpected hole at %lu", 5387 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5362 (unsigned long) start); 5388 (unsigned long) *iterator);
5363 return -EIO; 5389 return -EIO;
5364 } 5390 }
5365 if (start > le32_to_cpu(extent->ee_block)) { 5391 if (SHIFT == SHIFT_LEFT && *iterator >
5392 le32_to_cpu(extent->ee_block)) {
5366 /* Hole, move to the next extent */ 5393 /* Hole, move to the next extent */
5367 if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { 5394 if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5368 path[depth].p_ext++; 5395 path[depth].p_ext++;
5369 } else { 5396 } else {
5370 start = ext4_ext_next_allocated_block(path); 5397 *iterator = ext4_ext_next_allocated_block(path);
5371 continue; 5398 continue;
5372 } 5399 }
5373 } 5400 }
5401
5402 if (SHIFT == SHIFT_LEFT) {
5403 extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5404 *iterator = le32_to_cpu(extent->ee_block) +
5405 ext4_ext_get_actual_len(extent);
5406 } else {
5407 extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5408 *iterator = le32_to_cpu(extent->ee_block) > 0 ?
5409 le32_to_cpu(extent->ee_block) - 1 : 0;
5410 /* Update path extent in case we need to stop */
5411 while (le32_to_cpu(extent->ee_block) < start)
5412 extent++;
5413 path[depth].p_ext = extent;
5414 }
5374 ret = ext4_ext_shift_path_extents(path, shift, inode, 5415 ret = ext4_ext_shift_path_extents(path, shift, inode,
5375 handle, &start); 5416 handle, SHIFT);
5376 if (ret) 5417 if (ret)
5377 break; 5418 break;
5378 } 5419 }
@@ -5485,7 +5526,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5485 ext4_discard_preallocations(inode); 5526 ext4_discard_preallocations(inode);
5486 5527
5487 ret = ext4_ext_shift_extents(inode, handle, punch_stop, 5528 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5488 punch_stop - punch_start); 5529 punch_stop - punch_start, SHIFT_LEFT);
5489 if (ret) { 5530 if (ret) {
5490 up_write(&EXT4_I(inode)->i_data_sem); 5531 up_write(&EXT4_I(inode)->i_data_sem);
5491 goto out_stop; 5532 goto out_stop;
@@ -5510,6 +5551,174 @@ out_mutex:
5510 return ret; 5551 return ret;
5511} 5552}
5512 5553
5554/*
5555 * ext4_insert_range:
5556 * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
5557 * The data blocks starting from @offset to the EOF are shifted by @len
5558 * towards right to create a hole in the @inode. Inode size is increased
5559 * by len bytes.
5560 * Returns 0 on success, error otherwise.
5561 */
5562int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
5563{
5564 struct super_block *sb = inode->i_sb;
5565 handle_t *handle;
5566 struct ext4_ext_path *path;
5567 struct ext4_extent *extent;
5568 ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
5569 unsigned int credits, ee_len;
5570 int ret = 0, depth, split_flag = 0;
5571 loff_t ioffset;
5572
5573 /*
5574 * We need to test this early because xfstests assumes that an
5575 * insert range of (0, 1) will return EOPNOTSUPP if the file
5576 * system does not support insert range.
5577 */
5578 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5579 return -EOPNOTSUPP;
5580
5581 /* Insert range works only on fs block size aligned offsets. */
5582 if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
5583 len & (EXT4_CLUSTER_SIZE(sb) - 1))
5584 return -EINVAL;
5585
5586 if (!S_ISREG(inode->i_mode))
5587 return -EOPNOTSUPP;
5588
5589 trace_ext4_insert_range(inode, offset, len);
5590
5591 offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5592 len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
5593
5594 /* Call ext4_force_commit to flush all data in case of data=journal */
5595 if (ext4_should_journal_data(inode)) {
5596 ret = ext4_force_commit(inode->i_sb);
5597 if (ret)
5598 return ret;
5599 }
5600
5601 /*
5602 * Need to round down to align start offset to page size boundary
5603 * for page size > block size.
5604 */
5605 ioffset = round_down(offset, PAGE_SIZE);
5606
5607 /* Write out all dirty pages */
5608 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5609 LLONG_MAX);
5610 if (ret)
5611 return ret;
5612
5613 /* Take mutex lock */
5614 mutex_lock(&inode->i_mutex);
5615
5616 /* Currently just for extent based files */
5617 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5618 ret = -EOPNOTSUPP;
5619 goto out_mutex;
5620 }
5621
5622 /* Check for wrap through zero */
5623 if (inode->i_size + len > inode->i_sb->s_maxbytes) {
5624 ret = -EFBIG;
5625 goto out_mutex;
5626 }
5627
5628 /* Offset should be less than i_size */
5629 if (offset >= i_size_read(inode)) {
5630 ret = -EINVAL;
5631 goto out_mutex;
5632 }
5633
5634 truncate_pagecache(inode, ioffset);
5635
5636 /* Wait for existing dio to complete */
5637 ext4_inode_block_unlocked_dio(inode);
5638 inode_dio_wait(inode);
5639
5640 credits = ext4_writepage_trans_blocks(inode);
5641 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5642 if (IS_ERR(handle)) {
5643 ret = PTR_ERR(handle);
5644 goto out_dio;
5645 }
5646
5647 /* Expand file to avoid data loss if there is error while shifting */
5648 inode->i_size += len;
5649 EXT4_I(inode)->i_disksize += len;
5650 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
5651 ret = ext4_mark_inode_dirty(handle, inode);
5652 if (ret)
5653 goto out_stop;
5654
5655 down_write(&EXT4_I(inode)->i_data_sem);
5656 ext4_discard_preallocations(inode);
5657
5658 path = ext4_find_extent(inode, offset_lblk, NULL, 0);
5659 if (IS_ERR(path)) {
5660 up_write(&EXT4_I(inode)->i_data_sem);
5661 goto out_stop;
5662 }
5663
5664 depth = ext_depth(inode);
5665 extent = path[depth].p_ext;
5666 if (extent) {
5667 ee_start_lblk = le32_to_cpu(extent->ee_block);
5668 ee_len = ext4_ext_get_actual_len(extent);
5669
5670 /*
5671 * If offset_lblk is not the starting block of extent, split
5672 * the extent @offset_lblk
5673 */
5674 if ((offset_lblk > ee_start_lblk) &&
5675 (offset_lblk < (ee_start_lblk + ee_len))) {
5676 if (ext4_ext_is_unwritten(extent))
5677 split_flag = EXT4_EXT_MARK_UNWRIT1 |
5678 EXT4_EXT_MARK_UNWRIT2;
5679 ret = ext4_split_extent_at(handle, inode, &path,
5680 offset_lblk, split_flag,
5681 EXT4_EX_NOCACHE |
5682 EXT4_GET_BLOCKS_PRE_IO |
5683 EXT4_GET_BLOCKS_METADATA_NOFAIL);
5684 }
5685
5686 ext4_ext_drop_refs(path);
5687 kfree(path);
5688 if (ret < 0) {
5689 up_write(&EXT4_I(inode)->i_data_sem);
5690 goto out_stop;
5691 }
5692 }
5693
5694 ret = ext4_es_remove_extent(inode, offset_lblk,
5695 EXT_MAX_BLOCKS - offset_lblk);
5696 if (ret) {
5697 up_write(&EXT4_I(inode)->i_data_sem);
5698 goto out_stop;
5699 }
5700
5701 /*
5702 * if offset_lblk lies in a hole which is at start of file, use
5703 * ee_start_lblk to shift extents
5704 */
5705 ret = ext4_ext_shift_extents(inode, handle,
5706 ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
5707 len_lblk, SHIFT_RIGHT);
5708
5709 up_write(&EXT4_I(inode)->i_data_sem);
5710 if (IS_SYNC(inode))
5711 ext4_handle_sync(handle);
5712
5713out_stop:
5714 ext4_journal_stop(handle);
5715out_dio:
5716 ext4_inode_resume_unlocked_dio(inode);
5717out_mutex:
5718 mutex_unlock(&inode->i_mutex);
5719 return ret;
5720}
5721
5513/** 5722/**
5514 * ext4_swap_extents - Swap extents between two inodes 5723 * ext4_swap_extents - Swap extents between two inodes
5515 * 5724 *