diff options
author | Dmitry Monakhov <dmonakhov@openvz.org> | 2012-09-30 23:03:42 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-09-30 23:03:42 -0400 |
commit | 02d262dffcf4c74e5c4612ee736bdb94f18ed5b9 (patch) | |
tree | 1afa479ec99369e739f789ff020df4fba9a4aab8 /fs/ext4/extents.c | |
parent | 1f555cfa29e8f787d675e8390f88ce517a37271a (diff) |
ext4: punch_hole should wait for DIO writers
punch_hole is the place where we have to wait for all existing writers
(writeback, aio, dio), but currently we simply flush pended end_io request
which is not sufficient. Other issue is that punch_hole performed w/o i_mutex
held which obviously result in dangerous data corruption due to
write-after-free.
This patch performs following changes:
- Guard punch_hole with i_mutex
- Recheck inode flags under i_mutex
- Block all new dio readers in order to prevent information leak caused by
read-after-free pattern.
- punch_hole now wait for all writers in flight
NOTE: XXX write-after-free race is still possible because new dirty pages
may appear due to mmap(), and currently there is no easy way to stop
writeback while punch_hole is in progress.
[ Fixed error return from ext4_ext_punch_hole() to make sure that we
release i_mutex before returning EPERM or ETXTBUSY -- Ted ]
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 53 |
1 files changed, 36 insertions, 17 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 232077439aa8..5920e75fc05f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -4794,9 +4794,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4794 | loff_t first_page_offset, last_page_offset; | 4794 | loff_t first_page_offset, last_page_offset; |
4795 | int credits, err = 0; | 4795 | int credits, err = 0; |
4796 | 4796 | ||
4797 | /* | ||
4798 | * Write out all dirty pages to avoid race conditions | ||
4799 | * Then release them. | ||
4800 | */ | ||
4801 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4802 | err = filemap_write_and_wait_range(mapping, | ||
4803 | offset, offset + length - 1); | ||
4804 | |||
4805 | if (err) | ||
4806 | return err; | ||
4807 | } | ||
4808 | |||
4809 | mutex_lock(&inode->i_mutex); | ||
4810 | /* It's not possible punch hole on append only file */ | ||
4811 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
4812 | err = -EPERM; | ||
4813 | goto out_mutex; | ||
4814 | } | ||
4815 | if (IS_SWAPFILE(inode)) { | ||
4816 | err = -ETXTBSY; | ||
4817 | goto out_mutex; | ||
4818 | } | ||
4819 | |||
4797 | /* No need to punch hole beyond i_size */ | 4820 | /* No need to punch hole beyond i_size */ |
4798 | if (offset >= inode->i_size) | 4821 | if (offset >= inode->i_size) |
4799 | return 0; | 4822 | goto out_mutex; |
4800 | 4823 | ||
4801 | /* | 4824 | /* |
4802 | * If the hole extends beyond i_size, set the hole | 4825 | * If the hole extends beyond i_size, set the hole |
@@ -4814,33 +4837,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4814 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 4837 | first_page_offset = first_page << PAGE_CACHE_SHIFT; |
4815 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 4838 | last_page_offset = last_page << PAGE_CACHE_SHIFT; |
4816 | 4839 | ||
4817 | /* | ||
4818 | * Write out all dirty pages to avoid race conditions | ||
4819 | * Then release them. | ||
4820 | */ | ||
4821 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4822 | err = filemap_write_and_wait_range(mapping, | ||
4823 | offset, offset + length - 1); | ||
4824 | |||
4825 | if (err) | ||
4826 | return err; | ||
4827 | } | ||
4828 | |||
4829 | /* Now release the pages */ | 4840 | /* Now release the pages */ |
4830 | if (last_page_offset > first_page_offset) { | 4841 | if (last_page_offset > first_page_offset) { |
4831 | truncate_pagecache_range(inode, first_page_offset, | 4842 | truncate_pagecache_range(inode, first_page_offset, |
4832 | last_page_offset - 1); | 4843 | last_page_offset - 1); |
4833 | } | 4844 | } |
4834 | 4845 | ||
4835 | /* finish any pending end_io work */ | 4846 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4847 | ext4_inode_block_unlocked_dio(inode); | ||
4848 | inode_dio_wait(inode); | ||
4836 | err = ext4_flush_completed_IO(inode); | 4849 | err = ext4_flush_completed_IO(inode); |
4837 | if (err) | 4850 | if (err) |
4838 | return err; | 4851 | goto out_dio; |
4839 | 4852 | ||
4840 | credits = ext4_writepage_trans_blocks(inode); | 4853 | credits = ext4_writepage_trans_blocks(inode); |
4841 | handle = ext4_journal_start(inode, credits); | 4854 | handle = ext4_journal_start(inode, credits); |
4842 | if (IS_ERR(handle)) | 4855 | if (IS_ERR(handle)) { |
4843 | return PTR_ERR(handle); | 4856 | err = PTR_ERR(handle); |
4857 | goto out_dio; | ||
4858 | } | ||
4844 | 4859 | ||
4845 | 4860 | ||
4846 | /* | 4861 | /* |
@@ -4930,6 +4945,10 @@ out: | |||
4930 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4945 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4931 | ext4_mark_inode_dirty(handle, inode); | 4946 | ext4_mark_inode_dirty(handle, inode); |
4932 | ext4_journal_stop(handle); | 4947 | ext4_journal_stop(handle); |
4948 | out_dio: | ||
4949 | ext4_inode_resume_unlocked_dio(inode); | ||
4950 | out_mutex: | ||
4951 | mutex_unlock(&inode->i_mutex); | ||
4933 | return err; | 4952 | return err; |
4934 | } | 4953 | } |
4935 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4954 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |