aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c273
-rw-r--r--fs/ext4/inode.c17
3 files changed, 272 insertions, 20 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index beec42750a8c..1b3cbf8cacf9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -568,6 +568,8 @@ enum {
568#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 568#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
569 /* Do not put hole in extent cache */ 569 /* Do not put hole in extent cache */
570#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 570#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
571 /* Convert written extents to unwritten */
572#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400
571 573
572/* 574/*
573 * The bit position of these flags must not overlap with any of the 575 * The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2db2d77769a2..464e95da716e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3602,6 +3602,8 @@ out:
3602 * b> Splits in two extents: Write is happening at either end of the extent 3602 * b> Splits in two extents: Write is happening at either end of the extent
3603 * c> Splits in three extents: Somone is writing in middle of the extent 3603 * c> Splits in three extents: Somone is writing in middle of the extent
3604 * 3604 *
3605 * This works the same way in the case of initialized -> unwritten conversion.
3606 *
3605 * One of more index blocks maybe needed if the extent tree grow after 3607 * One of more index blocks maybe needed if the extent tree grow after
3606 * the uninitialized extent split. To prevent ENOSPC occur at the IO 3608 * the uninitialized extent split. To prevent ENOSPC occur at the IO
3607 * complete, we need to split the uninitialized extent before DIO submit 3609 * complete, we need to split the uninitialized extent before DIO submit
@@ -3612,7 +3614,7 @@ out:
3612 * 3614 *
3613 * Returns the size of uninitialized extent to be written on success. 3615 * Returns the size of uninitialized extent to be written on success.
3614 */ 3616 */
3615static int ext4_split_unwritten_extents(handle_t *handle, 3617static int ext4_split_convert_extents(handle_t *handle,
3616 struct inode *inode, 3618 struct inode *inode,
3617 struct ext4_map_blocks *map, 3619 struct ext4_map_blocks *map,
3618 struct ext4_ext_path *path, 3620 struct ext4_ext_path *path,
@@ -3624,9 +3626,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3624 unsigned int ee_len; 3626 unsigned int ee_len;
3625 int split_flag = 0, depth; 3627 int split_flag = 0, depth;
3626 3628
3627 ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 3629 ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n",
3628 "block %llu, max_blocks %u\n", inode->i_ino, 3630 __func__, inode->i_ino,
3629 (unsigned long long)map->m_lblk, map->m_len); 3631 (unsigned long long)map->m_lblk, map->m_len);
3630 3632
3631 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3633 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3632 inode->i_sb->s_blocksize_bits; 3634 inode->i_sb->s_blocksize_bits;
@@ -3641,14 +3643,73 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3641 ee_block = le32_to_cpu(ex->ee_block); 3643 ee_block = le32_to_cpu(ex->ee_block);
3642 ee_len = ext4_ext_get_actual_len(ex); 3644 ee_len = ext4_ext_get_actual_len(ex);
3643 3645
3644 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3646 /* Convert to unwritten */
3645 split_flag |= EXT4_EXT_MARK_UNINIT2; 3647 if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3646 if (flags & EXT4_GET_BLOCKS_CONVERT) 3648 split_flag |= EXT4_EXT_DATA_VALID1;
3647 split_flag |= EXT4_EXT_DATA_VALID2; 3649 /* Convert to initialized */
3650 } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3651 split_flag |= ee_block + ee_len <= eof_block ?
3652 EXT4_EXT_MAY_ZEROOUT : 0;
3653 split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2);
3654 }
3648 flags |= EXT4_GET_BLOCKS_PRE_IO; 3655 flags |= EXT4_GET_BLOCKS_PRE_IO;
3649 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3656 return ext4_split_extent(handle, inode, path, map, split_flag, flags);
3650} 3657}
3651 3658
3659static int ext4_convert_initialized_extents(handle_t *handle,
3660 struct inode *inode,
3661 struct ext4_map_blocks *map,
3662 struct ext4_ext_path *path)
3663{
3664 struct ext4_extent *ex;
3665 ext4_lblk_t ee_block;
3666 unsigned int ee_len;
3667 int depth;
3668 int err = 0;
3669
3670 depth = ext_depth(inode);
3671 ex = path[depth].p_ext;
3672 ee_block = le32_to_cpu(ex->ee_block);
3673 ee_len = ext4_ext_get_actual_len(ex);
3674
3675 ext_debug("%s: inode %lu, logical"
3676 "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3677 (unsigned long long)ee_block, ee_len);
3678
3679 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3680 err = ext4_split_convert_extents(handle, inode, map, path,
3681 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3682 if (err < 0)
3683 goto out;
3684 ext4_ext_drop_refs(path);
3685 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3686 if (IS_ERR(path)) {
3687 err = PTR_ERR(path);
3688 goto out;
3689 }
3690 depth = ext_depth(inode);
3691 ex = path[depth].p_ext;
3692 }
3693
3694 err = ext4_ext_get_access(handle, inode, path + depth);
3695 if (err)
3696 goto out;
3697 /* first mark the extent as uninitialized */
3698 ext4_ext_mark_uninitialized(ex);
3699
3700 /* note: ext4_ext_correct_indexes() isn't needed here because
3701 * borders are not changed
3702 */
3703 ext4_ext_try_to_merge(handle, inode, path, ex);
3704
3705 /* Mark modified extent as dirty */
3706 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3707out:
3708 ext4_ext_show_leaf(inode, path);
3709 return err;
3710}
3711
3712
3652static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3713static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3653 struct inode *inode, 3714 struct inode *inode,
3654 struct ext4_map_blocks *map, 3715 struct ext4_map_blocks *map,
@@ -3682,8 +3743,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3682 inode->i_ino, (unsigned long long)ee_block, ee_len, 3743 inode->i_ino, (unsigned long long)ee_block, ee_len,
3683 (unsigned long long)map->m_lblk, map->m_len); 3744 (unsigned long long)map->m_lblk, map->m_len);
3684#endif 3745#endif
3685 err = ext4_split_unwritten_extents(handle, inode, map, path, 3746 err = ext4_split_convert_extents(handle, inode, map, path,
3686 EXT4_GET_BLOCKS_CONVERT); 3747 EXT4_GET_BLOCKS_CONVERT);
3687 if (err < 0) 3748 if (err < 0)
3688 goto out; 3749 goto out;
3689 ext4_ext_drop_refs(path); 3750 ext4_ext_drop_refs(path);
@@ -3884,6 +3945,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3884} 3945}
3885 3946
3886static int 3947static int
3948ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
3949 struct ext4_map_blocks *map,
3950 struct ext4_ext_path *path, int flags,
3951 unsigned int allocated, ext4_fsblk_t newblock)
3952{
3953 int ret = 0;
3954 int err = 0;
3955
3956 /*
3957 * Make sure that the extent is no bigger than we support with
3958 * uninitialized extent
3959 */
3960 if (map->m_len > EXT_UNINIT_MAX_LEN)
3961 map->m_len = EXT_UNINIT_MAX_LEN / 2;
3962
3963 ret = ext4_convert_initialized_extents(handle, inode, map,
3964 path);
3965 if (ret >= 0) {
3966 ext4_update_inode_fsync_trans(handle, inode, 1);
3967 err = check_eofblocks_fl(handle, inode, map->m_lblk,
3968 path, map->m_len);
3969 } else
3970 err = ret;
3971 map->m_flags |= EXT4_MAP_UNWRITTEN;
3972 if (allocated > map->m_len)
3973 allocated = map->m_len;
3974 map->m_len = allocated;
3975
3976 return err ? err : allocated;
3977}
3978
3979static int
3887ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3980ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3888 struct ext4_map_blocks *map, 3981 struct ext4_map_blocks *map,
3889 struct ext4_ext_path *path, int flags, 3982 struct ext4_ext_path *path, int flags,
@@ -3910,8 +4003,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3910 4003
3911 /* get_block() before submit the IO, split the extent */ 4004 /* get_block() before submit the IO, split the extent */
3912 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4005 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3913 ret = ext4_split_unwritten_extents(handle, inode, map, 4006 ret = ext4_split_convert_extents(handle, inode, map,
3914 path, flags); 4007 path, flags | EXT4_GET_BLOCKS_CONVERT);
3915 if (ret <= 0) 4008 if (ret <= 0)
3916 goto out; 4009 goto out;
3917 /* 4010 /*
@@ -4199,6 +4292,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4199 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 4292 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4200 unsigned short ee_len; 4293 unsigned short ee_len;
4201 4294
4295
4202 /* 4296 /*
4203 * Uninitialized extents are treated as holes, except that 4297 * Uninitialized extents are treated as holes, except that
4204 * we split out initialized portions during a write. 4298 * we split out initialized portions during a write.
@@ -4215,7 +4309,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4215 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 4309 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
4216 ee_block, ee_len, newblock); 4310 ee_block, ee_len, newblock);
4217 4311
4218 if (!ext4_ext_is_uninitialized(ex)) 4312 /*
4313 * If the extent is initialized check whether the
4314 * caller wants to convert it to unwritten.
4315 */
4316 if ((!ext4_ext_is_uninitialized(ex)) &&
4317 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4318 allocated = ext4_ext_convert_initialized_extent(
4319 handle, inode, map, path, flags,
4320 allocated, newblock);
4321 goto out2;
4322 } else if (!ext4_ext_is_uninitialized(ex))
4219 goto out; 4323 goto out;
4220 4324
4221 ret = ext4_ext_handle_uninitialized_extents( 4325 ret = ext4_ext_handle_uninitialized_extents(
@@ -4604,6 +4708,144 @@ retry:
4604 return ret > 0 ? ret2 : ret; 4708 return ret > 0 ? ret2 : ret;
4605} 4709}
4606 4710
4711static long ext4_zero_range(struct file *file, loff_t offset,
4712 loff_t len, int mode)
4713{
4714 struct inode *inode = file_inode(file);
4715 handle_t *handle = NULL;
4716 unsigned int max_blocks;
4717 loff_t new_size = 0;
4718 int ret = 0;
4719 int flags;
4720 int partial;
4721 loff_t start, end;
4722 ext4_lblk_t lblk;
4723 struct address_space *mapping = inode->i_mapping;
4724 unsigned int blkbits = inode->i_blkbits;
4725
4726 trace_ext4_zero_range(inode, offset, len, mode);
4727
4728 /*
4729 * Write out all dirty pages to avoid race conditions
4730 * Then release them.
4731 */
4732 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4733 ret = filemap_write_and_wait_range(mapping, offset,
4734 offset + len - 1);
4735 if (ret)
4736 return ret;
4737 }
4738
4739 /*
4740 * Round up offset. This is not fallocate, we neet to zero out
4741 * blocks, so convert interior block aligned part of the range to
4742 * unwritten and possibly manually zero out unaligned parts of the
4743 * range.
4744 */
4745 start = round_up(offset, 1 << blkbits);
4746 end = round_down((offset + len), 1 << blkbits);
4747
4748 if (start < offset || end > offset + len)
4749 return -EINVAL;
4750 partial = (offset + len) & ((1 << blkbits) - 1);
4751
4752 lblk = start >> blkbits;
4753 max_blocks = (end >> blkbits);
4754 if (max_blocks < lblk)
4755 max_blocks = 0;
4756 else
4757 max_blocks -= lblk;
4758
4759 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
4760 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
4761 if (mode & FALLOC_FL_KEEP_SIZE)
4762 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4763
4764 mutex_lock(&inode->i_mutex);
4765
4766 /*
4767 * Indirect files do not support unwritten extnets
4768 */
4769 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4770 ret = -EOPNOTSUPP;
4771 goto out_mutex;
4772 }
4773
4774 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4775 offset + len > i_size_read(inode)) {
4776 new_size = offset + len;
4777 ret = inode_newsize_ok(inode, new_size);
4778 if (ret)
4779 goto out_mutex;
4780 /*
4781 * If we have a partial block after EOF we have to allocate
4782 * the entire block.
4783 */
4784 if (partial)
4785 max_blocks += 1;
4786 }
4787
4788 if (max_blocks > 0) {
4789
4790 /* Now release the pages and zero block aligned part of pages*/
4791 truncate_pagecache_range(inode, start, end - 1);
4792
4793 /* Wait all existing dio workers, newcomers will block on i_mutex */
4794 ext4_inode_block_unlocked_dio(inode);
4795 inode_dio_wait(inode);
4796
4797 /*
4798 * Remove entire range from the extent status tree.
4799 */
4800 ret = ext4_es_remove_extent(inode, lblk, max_blocks);
4801 if (ret)
4802 goto out_dio;
4803
4804 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
4805 mode);
4806 if (ret)
4807 goto out_dio;
4808 }
4809
4810 handle = ext4_journal_start(inode, EXT4_HT_MISC, 4);
4811 if (IS_ERR(handle)) {
4812 ret = PTR_ERR(handle);
4813 ext4_std_error(inode->i_sb, ret);
4814 goto out_dio;
4815 }
4816
4817 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4818
4819 if (!ret && new_size) {
4820 if (new_size > i_size_read(inode))
4821 i_size_write(inode, new_size);
4822 if (new_size > EXT4_I(inode)->i_disksize)
4823 ext4_update_i_disksize(inode, new_size);
4824 } else if (!ret && !new_size) {
4825 /*
4826 * Mark that we allocate beyond EOF so the subsequent truncate
4827 * can proceed even if the new size is the same as i_size.
4828 */
4829 if ((offset + len) > i_size_read(inode))
4830 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4831 }
4832
4833 ext4_mark_inode_dirty(handle, inode);
4834
4835 /* Zero out partial block at the edges of the range */
4836 ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4837
4838 if (file->f_flags & O_SYNC)
4839 ext4_handle_sync(handle);
4840
4841 ext4_journal_stop(handle);
4842out_dio:
4843 ext4_inode_resume_unlocked_dio(inode);
4844out_mutex:
4845 mutex_unlock(&inode->i_mutex);
4846 return ret;
4847}
4848
4607/* 4849/*
4608 * preallocate space for a file. This implements ext4's fallocate file 4850 * preallocate space for a file. This implements ext4's fallocate file
4609 * operation, which gets called from sys_fallocate system call. 4851 * operation, which gets called from sys_fallocate system call.
@@ -4625,7 +4867,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4625 4867
4626 /* Return error if mode is not supported */ 4868 /* Return error if mode is not supported */
4627 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 4869 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4628 FALLOC_FL_COLLAPSE_RANGE)) 4870 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
4629 return -EOPNOTSUPP; 4871 return -EOPNOTSUPP;
4630 4872
4631 if (mode & FALLOC_FL_PUNCH_HOLE) 4873 if (mode & FALLOC_FL_PUNCH_HOLE)
@@ -4645,6 +4887,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4645 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4887 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4646 return -EOPNOTSUPP; 4888 return -EOPNOTSUPP;
4647 4889
4890 if (mode & FALLOC_FL_ZERO_RANGE)
4891 return ext4_zero_range(file, offset, len, mode);
4892
4648 trace_ext4_fallocate_enter(inode, offset, len, mode); 4893 trace_ext4_fallocate_enter(inode, offset, len, mode);
4649 lblk = offset >> blkbits; 4894 lblk = offset >> blkbits;
4650 /* 4895 /*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab3e8357929d..7cc24555eca8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -503,6 +503,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
503{ 503{
504 struct extent_status es; 504 struct extent_status es;
505 int retval; 505 int retval;
506 int ret = 0;
506#ifdef ES_AGGRESSIVE_TEST 507#ifdef ES_AGGRESSIVE_TEST
507 struct ext4_map_blocks orig_map; 508 struct ext4_map_blocks orig_map;
508 509
@@ -558,7 +559,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
558 EXT4_GET_BLOCKS_KEEP_SIZE); 559 EXT4_GET_BLOCKS_KEEP_SIZE);
559 } 560 }
560 if (retval > 0) { 561 if (retval > 0) {
561 int ret;
562 unsigned int status; 562 unsigned int status;
563 563
564 if (unlikely(retval != map->m_len)) { 564 if (unlikely(retval != map->m_len)) {
@@ -585,7 +585,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
585 585
586found: 586found:
587 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 587 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
588 int ret = check_block_validity(inode, map); 588 ret = check_block_validity(inode, map);
589 if (ret != 0) 589 if (ret != 0)
590 return ret; 590 return ret;
591 } 591 }
@@ -602,7 +602,13 @@ found:
602 * with buffer head unmapped. 602 * with buffer head unmapped.
603 */ 603 */
604 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 604 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
605 return retval; 605 /*
606 * If we need to convert extent to unwritten
607 * we continue and do the actual work in
608 * ext4_ext_map_blocks()
609 */
610 if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN))
611 return retval;
606 612
607 /* 613 /*
608 * Here we clear m_flags because after allocating an new extent, 614 * Here we clear m_flags because after allocating an new extent,
@@ -658,7 +664,6 @@ found:
658 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 664 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
659 665
660 if (retval > 0) { 666 if (retval > 0) {
661 int ret;
662 unsigned int status; 667 unsigned int status;
663 668
664 if (unlikely(retval != map->m_len)) { 669 if (unlikely(retval != map->m_len)) {
@@ -693,7 +698,7 @@ found:
693has_zeroout: 698has_zeroout:
694 up_write((&EXT4_I(inode)->i_data_sem)); 699 up_write((&EXT4_I(inode)->i_data_sem));
695 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 700 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
696 int ret = check_block_validity(inode, map); 701 ret = check_block_validity(inode, map);
697 if (ret != 0) 702 if (ret != 0)
698 return ret; 703 return ret;
699 } 704 }
@@ -3507,7 +3512,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3507 if (!S_ISREG(inode->i_mode)) 3512 if (!S_ISREG(inode->i_mode))
3508 return -EOPNOTSUPP; 3513 return -EOPNOTSUPP;
3509 3514
3510 trace_ext4_punch_hole(inode, offset, length); 3515 trace_ext4_punch_hole(inode, offset, length, 0);
3511 3516
3512 /* 3517 /*
3513 * Write out all dirty pages to avoid race conditions 3518 * Write out all dirty pages to avoid race conditions