diff options
author | Theodore Ts'o <tytso@mit.edu> | 2013-04-03 12:45:17 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-04-03 12:45:17 -0400 |
commit | 26a4c0c6ccecf6814cf44f951c97222bd795bc1a (patch) | |
tree | 9b3a2fa02c61464ead1456499cad46e41644878b /fs/ext4/inode.c | |
parent | 781f143ea0fd7981ebe2e8cd96114997c8cf6c07 (diff) |
ext4: refactor punch hole code
Move common code in ext4_ind_punch_hole() and ext4_ext_punch_hole()
into ext4_punch_hole(). This saves over 150 lines of code.
This also fixes a potential bug when the punch_hole() code is racing
against indirect-to-extents or extents-to-indirect migation. We are
currently using i_mutex to protect against changes to the inode flag;
specifically, the append-only, immutable, and extents inode flags. So
we need to take i_mutex before deciding whether to use the
extents-specific or indirect-specific punch_hole code.
Also, there was a missing call to ext4_inode_block_unlocked_dio() in
the indirect punch codepath. This was added in commit 02d262dffcf4c
to block DIO readers racing against the punch operation in the
codepath for extent-mapped inodes, but it was missing for
indirect-block mapped inodes. One of the advantages of refactoring
the code is that it makes such oversights much less likely.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 180 |
1 files changed, 175 insertions, 5 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a4ffb470fbf3..9bda50aa34e2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3566,20 +3566,190 @@ int ext4_can_truncate(struct inode *inode) | |||
3566 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | 3566 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) |
3567 | { | 3567 | { |
3568 | struct inode *inode = file_inode(file); | 3568 | struct inode *inode = file_inode(file); |
3569 | struct super_block *sb = inode->i_sb; | ||
3570 | ext4_lblk_t first_block, stop_block; | ||
3571 | struct address_space *mapping = inode->i_mapping; | ||
3572 | loff_t first_page, last_page, page_len; | ||
3573 | loff_t first_page_offset, last_page_offset; | ||
3574 | handle_t *handle; | ||
3575 | unsigned int credits; | ||
3576 | int ret = 0; | ||
3577 | |||
3569 | if (!S_ISREG(inode->i_mode)) | 3578 | if (!S_ISREG(inode->i_mode)) |
3570 | return -EOPNOTSUPP; | 3579 | return -EOPNOTSUPP; |
3571 | 3580 | ||
3572 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3581 | if (EXT4_SB(sb)->s_cluster_ratio > 1) { |
3573 | return ext4_ind_punch_hole(file, offset, length); | ||
3574 | |||
3575 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | ||
3576 | /* TODO: Add support for bigalloc file systems */ | 3582 | /* TODO: Add support for bigalloc file systems */ |
3577 | return -EOPNOTSUPP; | 3583 | return -EOPNOTSUPP; |
3578 | } | 3584 | } |
3579 | 3585 | ||
3580 | trace_ext4_punch_hole(inode, offset, length); | 3586 | trace_ext4_punch_hole(inode, offset, length); |
3581 | 3587 | ||
3582 | return ext4_ext_punch_hole(file, offset, length); | 3588 | /* |
3589 | * Write out all dirty pages to avoid race conditions | ||
3590 | * Then release them. | ||
3591 | */ | ||
3592 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
3593 | ret = filemap_write_and_wait_range(mapping, offset, | ||
3594 | offset + length - 1); | ||
3595 | if (ret) | ||
3596 | return ret; | ||
3597 | } | ||
3598 | |||
3599 | mutex_lock(&inode->i_mutex); | ||
3600 | /* It's not possible punch hole on append only file */ | ||
3601 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
3602 | ret = -EPERM; | ||
3603 | goto out_mutex; | ||
3604 | } | ||
3605 | if (IS_SWAPFILE(inode)) { | ||
3606 | ret = -ETXTBSY; | ||
3607 | goto out_mutex; | ||
3608 | } | ||
3609 | |||
3610 | /* No need to punch hole beyond i_size */ | ||
3611 | if (offset >= inode->i_size) | ||
3612 | goto out_mutex; | ||
3613 | |||
3614 | /* | ||
3615 | * If the hole extends beyond i_size, set the hole | ||
3616 | * to end after the page that contains i_size | ||
3617 | */ | ||
3618 | if (offset + length > inode->i_size) { | ||
3619 | length = inode->i_size + | ||
3620 | PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - | ||
3621 | offset; | ||
3622 | } | ||
3623 | |||
3624 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
3625 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | ||
3626 | |||
3627 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | ||
3628 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | ||
3629 | |||
3630 | /* Now release the pages */ | ||
3631 | if (last_page_offset > first_page_offset) { | ||
3632 | truncate_pagecache_range(inode, first_page_offset, | ||
3633 | last_page_offset - 1); | ||
3634 | } | ||
3635 | |||
3636 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
3637 | ext4_inode_block_unlocked_dio(inode); | ||
3638 | ret = ext4_flush_unwritten_io(inode); | ||
3639 | if (ret) | ||
3640 | goto out_dio; | ||
3641 | inode_dio_wait(inode); | ||
3642 | |||
3643 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
3644 | credits = ext4_writepage_trans_blocks(inode); | ||
3645 | else | ||
3646 | credits = ext4_blocks_for_truncate(inode); | ||
3647 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | ||
3648 | if (IS_ERR(handle)) { | ||
3649 | ret = PTR_ERR(handle); | ||
3650 | ext4_std_error(sb, ret); | ||
3651 | goto out_dio; | ||
3652 | } | ||
3653 | |||
3654 | /* | ||
3655 | * Now we need to zero out the non-page-aligned data in the | ||
3656 | * pages at the start and tail of the hole, and unmap the | ||
3657 | * buffer heads for the block aligned regions of the page that | ||
3658 | * were completely zeroed. | ||
3659 | */ | ||
3660 | if (first_page > last_page) { | ||
3661 | /* | ||
3662 | * If the file space being truncated is contained | ||
3663 | * within a page just zero out and unmap the middle of | ||
3664 | * that page | ||
3665 | */ | ||
3666 | ret = ext4_discard_partial_page_buffers(handle, | ||
3667 | mapping, offset, length, 0); | ||
3668 | |||
3669 | if (ret) | ||
3670 | goto out_stop; | ||
3671 | } else { | ||
3672 | /* | ||
3673 | * zero out and unmap the partial page that contains | ||
3674 | * the start of the hole | ||
3675 | */ | ||
3676 | page_len = first_page_offset - offset; | ||
3677 | if (page_len > 0) { | ||
3678 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3679 | offset, page_len, 0); | ||
3680 | if (ret) | ||
3681 | goto out_stop; | ||
3682 | } | ||
3683 | |||
3684 | /* | ||
3685 | * zero out and unmap the partial page that contains | ||
3686 | * the end of the hole | ||
3687 | */ | ||
3688 | page_len = offset + length - last_page_offset; | ||
3689 | if (page_len > 0) { | ||
3690 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3691 | last_page_offset, page_len, 0); | ||
3692 | if (ret) | ||
3693 | goto out_stop; | ||
3694 | } | ||
3695 | } | ||
3696 | |||
3697 | /* | ||
3698 | * If i_size is contained in the last page, we need to | ||
3699 | * unmap and zero the partial page after i_size | ||
3700 | */ | ||
3701 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
3702 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
3703 | page_len = PAGE_CACHE_SIZE - | ||
3704 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
3705 | |||
3706 | if (page_len > 0) { | ||
3707 | ret = ext4_discard_partial_page_buffers(handle, | ||
3708 | mapping, inode->i_size, page_len, 0); | ||
3709 | |||
3710 | if (ret) | ||
3711 | goto out_stop; | ||
3712 | } | ||
3713 | } | ||
3714 | |||
3715 | first_block = (offset + sb->s_blocksize - 1) >> | ||
3716 | EXT4_BLOCK_SIZE_BITS(sb); | ||
3717 | stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
3718 | |||
3719 | /* If there are no blocks to remove, return now */ | ||
3720 | if (first_block >= stop_block) | ||
3721 | goto out_stop; | ||
3722 | |||
3723 | down_write(&EXT4_I(inode)->i_data_sem); | ||
3724 | ext4_discard_preallocations(inode); | ||
3725 | |||
3726 | ret = ext4_es_remove_extent(inode, first_block, | ||
3727 | stop_block - first_block); | ||
3728 | if (ret) { | ||
3729 | up_write(&EXT4_I(inode)->i_data_sem); | ||
3730 | goto out_stop; | ||
3731 | } | ||
3732 | |||
3733 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
3734 | ret = ext4_ext_remove_space(inode, first_block, | ||
3735 | stop_block - 1); | ||
3736 | else | ||
3737 | ret = ext4_free_hole_blocks(handle, inode, first_block, | ||
3738 | stop_block); | ||
3739 | |||
3740 | ext4_discard_preallocations(inode); | ||
3741 | if (IS_SYNC(inode)) | ||
3742 | ext4_handle_sync(handle); | ||
3743 | up_write(&EXT4_I(inode)->i_data_sem); | ||
3744 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
3745 | ext4_mark_inode_dirty(handle, inode); | ||
3746 | out_stop: | ||
3747 | ext4_journal_stop(handle); | ||
3748 | out_dio: | ||
3749 | ext4_inode_resume_unlocked_dio(inode); | ||
3750 | out_mutex: | ||
3751 | mutex_unlock(&inode->i_mutex); | ||
3752 | return ret; | ||
3583 | } | 3753 | } |
3584 | 3754 | ||
3585 | /* | 3755 | /* |