aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2013-04-03 12:45:17 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-03 12:45:17 -0400
commit26a4c0c6ccecf6814cf44f951c97222bd795bc1a (patch)
tree9b3a2fa02c61464ead1456499cad46e41644878b /fs/ext4/inode.c
parent781f143ea0fd7981ebe2e8cd96114997c8cf6c07 (diff)
ext4: refactor punch hole code
Move common code in ext4_ind_punch_hole() and ext4_ext_punch_hole() into ext4_punch_hole(). This saves over 150 lines of code. This also fixes a potential bug when the punch_hole() code is racing against indirect-to-extents or extents-to-indirect migation. We are currently using i_mutex to protect against changes to the inode flag; specifically, the append-only, immutable, and extents inode flags. So we need to take i_mutex before deciding whether to use the extents-specific or indirect-specific punch_hole code. Also, there was a missing call to ext4_inode_block_unlocked_dio() in the indirect punch codepath. This was added in commit 02d262dffcf4c to block DIO readers racing against the punch operation in the codepath for extent-mapped inodes, but it was missing for indirect-block mapped inodes. One of the advantages of refactoring the code is that it makes such oversights much less likely. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c180
1 files changed, 175 insertions, 5 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4ffb470fbf3..9bda50aa34e2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3566,20 +3566,190 @@ int ext4_can_truncate(struct inode *inode)
3566int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) 3566int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3567{ 3567{
3568 struct inode *inode = file_inode(file); 3568 struct inode *inode = file_inode(file);
3569 struct super_block *sb = inode->i_sb;
3570 ext4_lblk_t first_block, stop_block;
3571 struct address_space *mapping = inode->i_mapping;
3572 loff_t first_page, last_page, page_len;
3573 loff_t first_page_offset, last_page_offset;
3574 handle_t *handle;
3575 unsigned int credits;
3576 int ret = 0;
3577
3569 if (!S_ISREG(inode->i_mode)) 3578 if (!S_ISREG(inode->i_mode))
3570 return -EOPNOTSUPP; 3579 return -EOPNOTSUPP;
3571 3580
3572 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3581 if (EXT4_SB(sb)->s_cluster_ratio > 1) {
3573 return ext4_ind_punch_hole(file, offset, length);
3574
3575 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3576 /* TODO: Add support for bigalloc file systems */ 3582 /* TODO: Add support for bigalloc file systems */
3577 return -EOPNOTSUPP; 3583 return -EOPNOTSUPP;
3578 } 3584 }
3579 3585
3580 trace_ext4_punch_hole(inode, offset, length); 3586 trace_ext4_punch_hole(inode, offset, length);
3581 3587
3582 return ext4_ext_punch_hole(file, offset, length); 3588 /*
3589 * Write out all dirty pages to avoid race conditions
3590 * Then release them.
3591 */
3592 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
3593 ret = filemap_write_and_wait_range(mapping, offset,
3594 offset + length - 1);
3595 if (ret)
3596 return ret;
3597 }
3598
3599 mutex_lock(&inode->i_mutex);
3600 /* It's not possible punch hole on append only file */
3601 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
3602 ret = -EPERM;
3603 goto out_mutex;
3604 }
3605 if (IS_SWAPFILE(inode)) {
3606 ret = -ETXTBSY;
3607 goto out_mutex;
3608 }
3609
3610 /* No need to punch hole beyond i_size */
3611 if (offset >= inode->i_size)
3612 goto out_mutex;
3613
3614 /*
3615 * If the hole extends beyond i_size, set the hole
3616 * to end after the page that contains i_size
3617 */
3618 if (offset + length > inode->i_size) {
3619 length = inode->i_size +
3620 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
3621 offset;
3622 }
3623
3624 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
3625 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
3626
3627 first_page_offset = first_page << PAGE_CACHE_SHIFT;
3628 last_page_offset = last_page << PAGE_CACHE_SHIFT;
3629
3630 /* Now release the pages */
3631 if (last_page_offset > first_page_offset) {
3632 truncate_pagecache_range(inode, first_page_offset,
3633 last_page_offset - 1);
3634 }
3635
3636 /* Wait all existing dio workers, newcomers will block on i_mutex */
3637 ext4_inode_block_unlocked_dio(inode);
3638 ret = ext4_flush_unwritten_io(inode);
3639 if (ret)
3640 goto out_dio;
3641 inode_dio_wait(inode);
3642
3643 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3644 credits = ext4_writepage_trans_blocks(inode);
3645 else
3646 credits = ext4_blocks_for_truncate(inode);
3647 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
3648 if (IS_ERR(handle)) {
3649 ret = PTR_ERR(handle);
3650 ext4_std_error(sb, ret);
3651 goto out_dio;
3652 }
3653
3654 /*
3655 * Now we need to zero out the non-page-aligned data in the
3656 * pages at the start and tail of the hole, and unmap the
3657 * buffer heads for the block aligned regions of the page that
3658 * were completely zeroed.
3659 */
3660 if (first_page > last_page) {
3661 /*
3662 * If the file space being truncated is contained
3663 * within a page just zero out and unmap the middle of
3664 * that page
3665 */
3666 ret = ext4_discard_partial_page_buffers(handle,
3667 mapping, offset, length, 0);
3668
3669 if (ret)
3670 goto out_stop;
3671 } else {
3672 /*
3673 * zero out and unmap the partial page that contains
3674 * the start of the hole
3675 */
3676 page_len = first_page_offset - offset;
3677 if (page_len > 0) {
3678 ret = ext4_discard_partial_page_buffers(handle, mapping,
3679 offset, page_len, 0);
3680 if (ret)
3681 goto out_stop;
3682 }
3683
3684 /*
3685 * zero out and unmap the partial page that contains
3686 * the end of the hole
3687 */
3688 page_len = offset + length - last_page_offset;
3689 if (page_len > 0) {
3690 ret = ext4_discard_partial_page_buffers(handle, mapping,
3691 last_page_offset, page_len, 0);
3692 if (ret)
3693 goto out_stop;
3694 }
3695 }
3696
3697 /*
3698 * If i_size is contained in the last page, we need to
3699 * unmap and zero the partial page after i_size
3700 */
3701 if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
3702 inode->i_size % PAGE_CACHE_SIZE != 0) {
3703 page_len = PAGE_CACHE_SIZE -
3704 (inode->i_size & (PAGE_CACHE_SIZE - 1));
3705
3706 if (page_len > 0) {
3707 ret = ext4_discard_partial_page_buffers(handle,
3708 mapping, inode->i_size, page_len, 0);
3709
3710 if (ret)
3711 goto out_stop;
3712 }
3713 }
3714
3715 first_block = (offset + sb->s_blocksize - 1) >>
3716 EXT4_BLOCK_SIZE_BITS(sb);
3717 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
3718
3719 /* If there are no blocks to remove, return now */
3720 if (first_block >= stop_block)
3721 goto out_stop;
3722
3723 down_write(&EXT4_I(inode)->i_data_sem);
3724 ext4_discard_preallocations(inode);
3725
3726 ret = ext4_es_remove_extent(inode, first_block,
3727 stop_block - first_block);
3728 if (ret) {
3729 up_write(&EXT4_I(inode)->i_data_sem);
3730 goto out_stop;
3731 }
3732
3733 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3734 ret = ext4_ext_remove_space(inode, first_block,
3735 stop_block - 1);
3736 else
3737 ret = ext4_free_hole_blocks(handle, inode, first_block,
3738 stop_block);
3739
3740 ext4_discard_preallocations(inode);
3741 if (IS_SYNC(inode))
3742 ext4_handle_sync(handle);
3743 up_write(&EXT4_I(inode)->i_data_sem);
3744 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3745 ext4_mark_inode_dirty(handle, inode);
3746out_stop:
3747 ext4_journal_stop(handle);
3748out_dio:
3749 ext4_inode_resume_unlocked_dio(inode);
3750out_mutex:
3751 mutex_unlock(&inode->i_mutex);
3752 return ret;
3583} 3753}
3584 3754
3585/* 3755/*