aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
authorDmitry Monakhov <dmonakhov@openvz.org>2012-09-30 23:03:42 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-09-30 23:03:42 -0400
commit02d262dffcf4c74e5c4612ee736bdb94f18ed5b9 (patch)
tree1afa479ec99369e739f789ff020df4fba9a4aab8 /fs/ext4/extents.c
parent1f555cfa29e8f787d675e8390f88ce517a37271a (diff)
ext4: punch_hole should wait for DIO writers
punch_hole is the place where we have to wait for all existing writers (writeback, aio, dio), but currently we simply flush pended end_io request which is not sufficient. Other issue is that punch_hole performed w/o i_mutex held which obviously result in dangerous data corruption due to write-after-free. This patch performs following changes: - Guard punch_hole with i_mutex - Recheck inode flags under i_mutex - Block all new dio readers in order to prevent information leak caused by read-after-free pattern. - punch_hole now wait for all writers in flight NOTE: XXX write-after-free race is still possible because new dirty pages may appear due to mmap(), and currently there is no easy way to stop writeback while punch_hole is in progress. [ Fixed error return from ext4_ext_punch_hole() to make sure that we release i_mutex before returning EPERM or ETXTBUSY -- Ted ] Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c53
1 files changed, 36 insertions, 17 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 232077439aa8..5920e75fc05f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4794,9 +4794,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4794 loff_t first_page_offset, last_page_offset; 4794 loff_t first_page_offset, last_page_offset;
4795 int credits, err = 0; 4795 int credits, err = 0;
4796 4796
4797 /*
4798 * Write out all dirty pages to avoid race conditions
4799 * Then release them.
4800 */
4801 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4802 err = filemap_write_and_wait_range(mapping,
4803 offset, offset + length - 1);
4804
4805 if (err)
4806 return err;
4807 }
4808
4809 mutex_lock(&inode->i_mutex);
4810 /* It's not possible punch hole on append only file */
4811 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
4812 err = -EPERM;
4813 goto out_mutex;
4814 }
4815 if (IS_SWAPFILE(inode)) {
4816 err = -ETXTBSY;
4817 goto out_mutex;
4818 }
4819
4797 /* No need to punch hole beyond i_size */ 4820 /* No need to punch hole beyond i_size */
4798 if (offset >= inode->i_size) 4821 if (offset >= inode->i_size)
4799 return 0; 4822 goto out_mutex;
4800 4823
4801 /* 4824 /*
4802 * If the hole extends beyond i_size, set the hole 4825 * If the hole extends beyond i_size, set the hole
@@ -4814,33 +4837,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4814 first_page_offset = first_page << PAGE_CACHE_SHIFT; 4837 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4815 last_page_offset = last_page << PAGE_CACHE_SHIFT; 4838 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4816 4839
4817 /*
4818 * Write out all dirty pages to avoid race conditions
4819 * Then release them.
4820 */
4821 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4822 err = filemap_write_and_wait_range(mapping,
4823 offset, offset + length - 1);
4824
4825 if (err)
4826 return err;
4827 }
4828
4829 /* Now release the pages */ 4840 /* Now release the pages */
4830 if (last_page_offset > first_page_offset) { 4841 if (last_page_offset > first_page_offset) {
4831 truncate_pagecache_range(inode, first_page_offset, 4842 truncate_pagecache_range(inode, first_page_offset,
4832 last_page_offset - 1); 4843 last_page_offset - 1);
4833 } 4844 }
4834 4845
4835 /* finish any pending end_io work */ 4846 /* Wait all existing dio workers, newcomers will block on i_mutex */
4847 ext4_inode_block_unlocked_dio(inode);
4848 inode_dio_wait(inode);
4836 err = ext4_flush_completed_IO(inode); 4849 err = ext4_flush_completed_IO(inode);
4837 if (err) 4850 if (err)
4838 return err; 4851 goto out_dio;
4839 4852
4840 credits = ext4_writepage_trans_blocks(inode); 4853 credits = ext4_writepage_trans_blocks(inode);
4841 handle = ext4_journal_start(inode, credits); 4854 handle = ext4_journal_start(inode, credits);
4842 if (IS_ERR(handle)) 4855 if (IS_ERR(handle)) {
4843 return PTR_ERR(handle); 4856 err = PTR_ERR(handle);
4857 goto out_dio;
4858 }
4844 4859
4845 4860
4846 /* 4861 /*
@@ -4930,6 +4945,10 @@ out:
4930 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4945 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4931 ext4_mark_inode_dirty(handle, inode); 4946 ext4_mark_inode_dirty(handle, inode);
4932 ext4_journal_stop(handle); 4947 ext4_journal_stop(handle);
4948out_dio:
4949 ext4_inode_resume_unlocked_dio(inode);
4950out_mutex:
4951 mutex_unlock(&inode->i_mutex);
4933 return err; 4952 return err;
4934} 4953}
4935int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4954int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,