diff options
author | Jan Kara <jack@suse.com> | 2015-12-07 14:28:03 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2015-12-07 14:28:03 -0500 |
commit | ea3d7209ca01da209cda6f0dea8be9cc4b7a933b (patch) | |
tree | 809b37322befdf8dda2d12b991d1c832241bc8bc | |
parent | f41683a204ea61568f0fd0804d47c19561f2ee39 (diff) |
ext4: fix races between page faults and hole punching
Currently, page faults and hole punching are completely unsynchronized.
This can result in page fault faulting in a page into a range that we
are punching after truncate_pagecache_range() has been called and thus
we can end up with a page mapped to disk blocks that will be shortly
freed. Filesystem corruption will shortly follow. Note that the same
race is avoided for truncate by checking page fault offset against
i_size but there isn't similar mechanism available for punching holes.
Fix the problem by creating new rw semaphore i_mmap_sem in inode and
grab it for writing over truncate, hole punching, and other functions
removing blocks from extent tree and for read over page faults. We
cannot easily use i_data_sem for this since that ranks below transaction
start and we need something ranking above it so that it can be held over
the whole truncate / hole punching operation. Also remove various
workarounds we had in the code to reduce race window when page fault
could have created pages with stale mapping information.
Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r-- | fs/ext4/ext4.h | 10 | ||||
-rw-r--r-- | fs/ext4/extents.c | 54 | ||||
-rw-r--r-- | fs/ext4/file.c | 66 | ||||
-rw-r--r-- | fs/ext4/inode.c | 36 | ||||
-rw-r--r-- | fs/ext4/super.c | 1 | ||||
-rw-r--r-- | fs/ext4/truncate.h | 2 |
6 files changed, 127 insertions, 42 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cc7ca4e87144..348a5ff4a0e2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -910,6 +910,15 @@ struct ext4_inode_info { | |||
910 | * by other means, so we have i_data_sem. | 910 | * by other means, so we have i_data_sem. |
911 | */ | 911 | */ |
912 | struct rw_semaphore i_data_sem; | 912 | struct rw_semaphore i_data_sem; |
913 | /* | ||
914 | * i_mmap_sem is for serializing page faults with truncate / punch hole | ||
915 | * operations. We have to make sure that new page cannot be faulted in | ||
916 | * a section of the inode that is being punched. We cannot easily use | ||
917 | * i_data_sem for this since we need protection for the whole punch | ||
918 | * operation and i_data_sem ranks below transaction start so we have | ||
919 | * to occasionally drop it. | ||
920 | */ | ||
921 | struct rw_semaphore i_mmap_sem; | ||
913 | struct inode vfs_inode; | 922 | struct inode vfs_inode; |
914 | struct jbd2_inode *jinode; | 923 | struct jbd2_inode *jinode; |
915 | 924 | ||
@@ -2484,6 +2493,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | |||
2484 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 2493 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
2485 | loff_t lstart, loff_t lend); | 2494 | loff_t lstart, loff_t lend); |
2486 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2495 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2496 | extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); | ||
2487 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 2497 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
2488 | extern void ext4_da_update_reserve_space(struct inode *inode, | 2498 | extern void ext4_da_update_reserve_space(struct inode *inode, |
2489 | int used, int quota_claim); | 2499 | int used, int quota_claim); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 551353b1b17a..5be9ca5a8a7a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -4770,7 +4770,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4770 | int partial_begin, partial_end; | 4770 | int partial_begin, partial_end; |
4771 | loff_t start, end; | 4771 | loff_t start, end; |
4772 | ext4_lblk_t lblk; | 4772 | ext4_lblk_t lblk; |
4773 | struct address_space *mapping = inode->i_mapping; | ||
4774 | unsigned int blkbits = inode->i_blkbits; | 4773 | unsigned int blkbits = inode->i_blkbits; |
4775 | 4774 | ||
4776 | trace_ext4_zero_range(inode, offset, len, mode); | 4775 | trace_ext4_zero_range(inode, offset, len, mode); |
@@ -4786,17 +4785,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4786 | } | 4785 | } |
4787 | 4786 | ||
4788 | /* | 4787 | /* |
4789 | * Write out all dirty pages to avoid race conditions | ||
4790 | * Then release them. | ||
4791 | */ | ||
4792 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4793 | ret = filemap_write_and_wait_range(mapping, offset, | ||
4794 | offset + len - 1); | ||
4795 | if (ret) | ||
4796 | return ret; | ||
4797 | } | ||
4798 | |||
4799 | /* | ||
4800 | * Round up offset. This is not fallocate, we neet to zero out | 4788 | * Round up offset. This is not fallocate, we neet to zero out |
4801 | * blocks, so convert interior block aligned part of the range to | 4789 | * blocks, so convert interior block aligned part of the range to |
4802 | * unwritten and possibly manually zero out unaligned parts of the | 4790 | * unwritten and possibly manually zero out unaligned parts of the |
@@ -4856,16 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4856 | flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | | 4844 | flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | |
4857 | EXT4_EX_NOCACHE); | 4845 | EXT4_EX_NOCACHE); |
4858 | 4846 | ||
4859 | /* Now release the pages and zero block aligned part of pages*/ | ||
4860 | truncate_pagecache_range(inode, start, end - 1); | ||
4861 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4862 | |||
4863 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | 4847 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4864 | ext4_inode_block_unlocked_dio(inode); | 4848 | ext4_inode_block_unlocked_dio(inode); |
4865 | inode_dio_wait(inode); | 4849 | inode_dio_wait(inode); |
4866 | 4850 | ||
4851 | /* | ||
4852 | * Prevent page faults from reinstantiating pages we have | ||
4853 | * released from page cache. | ||
4854 | */ | ||
4855 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
4856 | /* Now release the pages and zero block aligned part of pages */ | ||
4857 | truncate_pagecache_range(inode, start, end - 1); | ||
4858 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4859 | |||
4867 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | 4860 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, |
4868 | flags, mode); | 4861 | flags, mode); |
4862 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
4869 | if (ret) | 4863 | if (ret) |
4870 | goto out_dio; | 4864 | goto out_dio; |
4871 | } | 4865 | } |
@@ -5524,17 +5518,22 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
5524 | goto out_mutex; | 5518 | goto out_mutex; |
5525 | } | 5519 | } |
5526 | 5520 | ||
5527 | truncate_pagecache(inode, ioffset); | ||
5528 | |||
5529 | /* Wait for existing dio to complete */ | 5521 | /* Wait for existing dio to complete */ |
5530 | ext4_inode_block_unlocked_dio(inode); | 5522 | ext4_inode_block_unlocked_dio(inode); |
5531 | inode_dio_wait(inode); | 5523 | inode_dio_wait(inode); |
5532 | 5524 | ||
5525 | /* | ||
5526 | * Prevent page faults from reinstantiating pages we have released from | ||
5527 | * page cache. | ||
5528 | */ | ||
5529 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
5530 | truncate_pagecache(inode, ioffset); | ||
5531 | |||
5533 | credits = ext4_writepage_trans_blocks(inode); | 5532 | credits = ext4_writepage_trans_blocks(inode); |
5534 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | 5533 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); |
5535 | if (IS_ERR(handle)) { | 5534 | if (IS_ERR(handle)) { |
5536 | ret = PTR_ERR(handle); | 5535 | ret = PTR_ERR(handle); |
5537 | goto out_dio; | 5536 | goto out_mmap; |
5538 | } | 5537 | } |
5539 | 5538 | ||
5540 | down_write(&EXT4_I(inode)->i_data_sem); | 5539 | down_write(&EXT4_I(inode)->i_data_sem); |
@@ -5573,7 +5572,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
5573 | 5572 | ||
5574 | out_stop: | 5573 | out_stop: |
5575 | ext4_journal_stop(handle); | 5574 | ext4_journal_stop(handle); |
5576 | out_dio: | 5575 | out_mmap: |
5576 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
5577 | ext4_inode_resume_unlocked_dio(inode); | 5577 | ext4_inode_resume_unlocked_dio(inode); |
5578 | out_mutex: | 5578 | out_mutex: |
5579 | mutex_unlock(&inode->i_mutex); | 5579 | mutex_unlock(&inode->i_mutex); |
@@ -5660,17 +5660,22 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
5660 | goto out_mutex; | 5660 | goto out_mutex; |
5661 | } | 5661 | } |
5662 | 5662 | ||
5663 | truncate_pagecache(inode, ioffset); | ||
5664 | |||
5665 | /* Wait for existing dio to complete */ | 5663 | /* Wait for existing dio to complete */ |
5666 | ext4_inode_block_unlocked_dio(inode); | 5664 | ext4_inode_block_unlocked_dio(inode); |
5667 | inode_dio_wait(inode); | 5665 | inode_dio_wait(inode); |
5668 | 5666 | ||
5667 | /* | ||
5668 | * Prevent page faults from reinstantiating pages we have released from | ||
5669 | * page cache. | ||
5670 | */ | ||
5671 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
5672 | truncate_pagecache(inode, ioffset); | ||
5673 | |||
5669 | credits = ext4_writepage_trans_blocks(inode); | 5674 | credits = ext4_writepage_trans_blocks(inode); |
5670 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | 5675 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); |
5671 | if (IS_ERR(handle)) { | 5676 | if (IS_ERR(handle)) { |
5672 | ret = PTR_ERR(handle); | 5677 | ret = PTR_ERR(handle); |
5673 | goto out_dio; | 5678 | goto out_mmap; |
5674 | } | 5679 | } |
5675 | 5680 | ||
5676 | /* Expand file to avoid data loss if there is error while shifting */ | 5681 | /* Expand file to avoid data loss if there is error while shifting */ |
@@ -5741,7 +5746,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
5741 | 5746 | ||
5742 | out_stop: | 5747 | out_stop: |
5743 | ext4_journal_stop(handle); | 5748 | ext4_journal_stop(handle); |
5744 | out_dio: | 5749 | out_mmap: |
5750 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
5745 | ext4_inode_resume_unlocked_dio(inode); | 5751 | ext4_inode_resume_unlocked_dio(inode); |
5746 | out_mutex: | 5752 | out_mutex: |
5747 | mutex_unlock(&inode->i_mutex); | 5753 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 113837e7ba98..0d24ebcd7c9e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
209 | { | 209 | { |
210 | int result; | 210 | int result; |
211 | handle_t *handle = NULL; | 211 | handle_t *handle = NULL; |
212 | struct super_block *sb = file_inode(vma->vm_file)->i_sb; | 212 | struct inode *inode = file_inode(vma->vm_file); |
213 | struct super_block *sb = inode->i_sb; | ||
213 | bool write = vmf->flags & FAULT_FLAG_WRITE; | 214 | bool write = vmf->flags & FAULT_FLAG_WRITE; |
214 | 215 | ||
215 | if (write) { | 216 | if (write) { |
216 | sb_start_pagefault(sb); | 217 | sb_start_pagefault(sb); |
217 | file_update_time(vma->vm_file); | 218 | file_update_time(vma->vm_file); |
219 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
218 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, | 220 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, |
219 | EXT4_DATA_TRANS_BLOCKS(sb)); | 221 | EXT4_DATA_TRANS_BLOCKS(sb)); |
220 | } | 222 | } else |
223 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
221 | 224 | ||
222 | if (IS_ERR(handle)) | 225 | if (IS_ERR(handle)) |
223 | result = VM_FAULT_SIGBUS; | 226 | result = VM_FAULT_SIGBUS; |
@@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
228 | if (write) { | 231 | if (write) { |
229 | if (!IS_ERR(handle)) | 232 | if (!IS_ERR(handle)) |
230 | ext4_journal_stop(handle); | 233 | ext4_journal_stop(handle); |
234 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
231 | sb_end_pagefault(sb); | 235 | sb_end_pagefault(sb); |
232 | } | 236 | } else |
237 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
233 | 238 | ||
234 | return result; | 239 | return result; |
235 | } | 240 | } |
@@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
246 | if (write) { | 251 | if (write) { |
247 | sb_start_pagefault(sb); | 252 | sb_start_pagefault(sb); |
248 | file_update_time(vma->vm_file); | 253 | file_update_time(vma->vm_file); |
254 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
249 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, | 255 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, |
250 | ext4_chunk_trans_blocks(inode, | 256 | ext4_chunk_trans_blocks(inode, |
251 | PMD_SIZE / PAGE_SIZE)); | 257 | PMD_SIZE / PAGE_SIZE)); |
252 | } | 258 | } else |
259 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
253 | 260 | ||
254 | if (IS_ERR(handle)) | 261 | if (IS_ERR(handle)) |
255 | result = VM_FAULT_SIGBUS; | 262 | result = VM_FAULT_SIGBUS; |
@@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
260 | if (write) { | 267 | if (write) { |
261 | if (!IS_ERR(handle)) | 268 | if (!IS_ERR(handle)) |
262 | ext4_journal_stop(handle); | 269 | ext4_journal_stop(handle); |
270 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
263 | sb_end_pagefault(sb); | 271 | sb_end_pagefault(sb); |
264 | } | 272 | } else |
273 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
265 | 274 | ||
266 | return result; | 275 | return result; |
267 | } | 276 | } |
268 | 277 | ||
269 | static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 278 | static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
270 | { | 279 | { |
271 | return dax_mkwrite(vma, vmf, ext4_get_block_dax, | 280 | int err; |
272 | ext4_end_io_unwritten); | 281 | struct inode *inode = file_inode(vma->vm_file); |
282 | |||
283 | sb_start_pagefault(inode->i_sb); | ||
284 | file_update_time(vma->vm_file); | ||
285 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
286 | err = __dax_mkwrite(vma, vmf, ext4_get_block_dax, | ||
287 | ext4_end_io_unwritten); | ||
288 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
289 | sb_end_pagefault(inode->i_sb); | ||
290 | |||
291 | return err; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite() | ||
296 | * handler we check for races agaist truncate. Note that since we cycle through | ||
297 | * i_mmap_sem, we are sure that also any hole punching that began before we | ||
298 | * were called is finished by now and so if it included part of the file we | ||
299 | * are working on, our pte will get unmapped and the check for pte_same() in | ||
300 | * wp_pfn_shared() fails. Thus fault gets retried and things work out as | ||
301 | * desired. | ||
302 | */ | ||
303 | static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, | ||
304 | struct vm_fault *vmf) | ||
305 | { | ||
306 | struct inode *inode = file_inode(vma->vm_file); | ||
307 | struct super_block *sb = inode->i_sb; | ||
308 | int ret = VM_FAULT_NOPAGE; | ||
309 | loff_t size; | ||
310 | |||
311 | sb_start_pagefault(sb); | ||
312 | file_update_time(vma->vm_file); | ||
313 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
314 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
315 | if (vmf->pgoff >= size) | ||
316 | ret = VM_FAULT_SIGBUS; | ||
317 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
318 | sb_end_pagefault(sb); | ||
319 | |||
320 | return ret; | ||
273 | } | 321 | } |
274 | 322 | ||
275 | static const struct vm_operations_struct ext4_dax_vm_ops = { | 323 | static const struct vm_operations_struct ext4_dax_vm_ops = { |
276 | .fault = ext4_dax_fault, | 324 | .fault = ext4_dax_fault, |
277 | .pmd_fault = ext4_dax_pmd_fault, | 325 | .pmd_fault = ext4_dax_pmd_fault, |
278 | .page_mkwrite = ext4_dax_mkwrite, | 326 | .page_mkwrite = ext4_dax_mkwrite, |
279 | .pfn_mkwrite = dax_pfn_mkwrite, | 327 | .pfn_mkwrite = ext4_dax_pfn_mkwrite, |
280 | }; | 328 | }; |
281 | #else | 329 | #else |
282 | #define ext4_dax_vm_ops ext4_file_vm_ops | 330 | #define ext4_dax_vm_ops ext4_file_vm_ops |
283 | #endif | 331 | #endif |
284 | 332 | ||
285 | static const struct vm_operations_struct ext4_file_vm_ops = { | 333 | static const struct vm_operations_struct ext4_file_vm_ops = { |
286 | .fault = filemap_fault, | 334 | .fault = ext4_filemap_fault, |
287 | .map_pages = filemap_map_pages, | 335 | .map_pages = filemap_map_pages, |
288 | .page_mkwrite = ext4_page_mkwrite, | 336 | .page_mkwrite = ext4_page_mkwrite, |
289 | }; | 337 | }; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ea433a7f4bca..d1207d03c961 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3623,6 +3623,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3623 | 3623 | ||
3624 | } | 3624 | } |
3625 | 3625 | ||
3626 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
3627 | ext4_inode_block_unlocked_dio(inode); | ||
3628 | inode_dio_wait(inode); | ||
3629 | |||
3630 | /* | ||
3631 | * Prevent page faults from reinstantiating pages we have released from | ||
3632 | * page cache. | ||
3633 | */ | ||
3634 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
3626 | first_block_offset = round_up(offset, sb->s_blocksize); | 3635 | first_block_offset = round_up(offset, sb->s_blocksize); |
3627 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; | 3636 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; |
3628 | 3637 | ||
@@ -3631,10 +3640,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3631 | truncate_pagecache_range(inode, first_block_offset, | 3640 | truncate_pagecache_range(inode, first_block_offset, |
3632 | last_block_offset); | 3641 | last_block_offset); |
3633 | 3642 | ||
3634 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
3635 | ext4_inode_block_unlocked_dio(inode); | ||
3636 | inode_dio_wait(inode); | ||
3637 | |||
3638 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3643 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3639 | credits = ext4_writepage_trans_blocks(inode); | 3644 | credits = ext4_writepage_trans_blocks(inode); |
3640 | else | 3645 | else |
@@ -3680,16 +3685,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3680 | if (IS_SYNC(inode)) | 3685 | if (IS_SYNC(inode)) |
3681 | ext4_handle_sync(handle); | 3686 | ext4_handle_sync(handle); |
3682 | 3687 | ||
3683 | /* Now release the pages again to reduce race window */ | ||
3684 | if (last_block_offset > first_block_offset) | ||
3685 | truncate_pagecache_range(inode, first_block_offset, | ||
3686 | last_block_offset); | ||
3687 | |||
3688 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3688 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3689 | ext4_mark_inode_dirty(handle, inode); | 3689 | ext4_mark_inode_dirty(handle, inode); |
3690 | out_stop: | 3690 | out_stop: |
3691 | ext4_journal_stop(handle); | 3691 | ext4_journal_stop(handle); |
3692 | out_dio: | 3692 | out_dio: |
3693 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
3693 | ext4_inode_resume_unlocked_dio(inode); | 3694 | ext4_inode_resume_unlocked_dio(inode); |
3694 | out_mutex: | 3695 | out_mutex: |
3695 | mutex_unlock(&inode->i_mutex); | 3696 | mutex_unlock(&inode->i_mutex); |
@@ -4823,6 +4824,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4823 | } else | 4824 | } else |
4824 | ext4_wait_for_tail_page_commit(inode); | 4825 | ext4_wait_for_tail_page_commit(inode); |
4825 | } | 4826 | } |
4827 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
4826 | /* | 4828 | /* |
4827 | * Truncate pagecache after we've waited for commit | 4829 | * Truncate pagecache after we've waited for commit |
4828 | * in data=journal mode to make pages freeable. | 4830 | * in data=journal mode to make pages freeable. |
@@ -4830,6 +4832,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4830 | truncate_pagecache(inode, inode->i_size); | 4832 | truncate_pagecache(inode, inode->i_size); |
4831 | if (shrink) | 4833 | if (shrink) |
4832 | ext4_truncate(inode); | 4834 | ext4_truncate(inode); |
4835 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
4833 | } | 4836 | } |
4834 | 4837 | ||
4835 | if (!rc) { | 4838 | if (!rc) { |
@@ -5278,6 +5281,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5278 | 5281 | ||
5279 | sb_start_pagefault(inode->i_sb); | 5282 | sb_start_pagefault(inode->i_sb); |
5280 | file_update_time(vma->vm_file); | 5283 | file_update_time(vma->vm_file); |
5284 | |||
5285 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
5281 | /* Delalloc case is easy... */ | 5286 | /* Delalloc case is easy... */ |
5282 | if (test_opt(inode->i_sb, DELALLOC) && | 5287 | if (test_opt(inode->i_sb, DELALLOC) && |
5283 | !ext4_should_journal_data(inode) && | 5288 | !ext4_should_journal_data(inode) && |
@@ -5347,6 +5352,19 @@ retry_alloc: | |||
5347 | out_ret: | 5352 | out_ret: |
5348 | ret = block_page_mkwrite_return(ret); | 5353 | ret = block_page_mkwrite_return(ret); |
5349 | out: | 5354 | out: |
5355 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
5350 | sb_end_pagefault(inode->i_sb); | 5356 | sb_end_pagefault(inode->i_sb); |
5351 | return ret; | 5357 | return ret; |
5352 | } | 5358 | } |
5359 | |||
5360 | int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
5361 | { | ||
5362 | struct inode *inode = file_inode(vma->vm_file); | ||
5363 | int err; | ||
5364 | |||
5365 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
5366 | err = filemap_fault(vma, vmf); | ||
5367 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
5368 | |||
5369 | return err; | ||
5370 | } | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c9ab67da6e5a..493370e6590e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -958,6 +958,7 @@ static void init_once(void *foo) | |||
958 | INIT_LIST_HEAD(&ei->i_orphan); | 958 | INIT_LIST_HEAD(&ei->i_orphan); |
959 | init_rwsem(&ei->xattr_sem); | 959 | init_rwsem(&ei->xattr_sem); |
960 | init_rwsem(&ei->i_data_sem); | 960 | init_rwsem(&ei->i_data_sem); |
961 | init_rwsem(&ei->i_mmap_sem); | ||
961 | inode_init_once(&ei->vfs_inode); | 962 | inode_init_once(&ei->vfs_inode); |
962 | } | 963 | } |
963 | 964 | ||
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index 011ba6670d99..c70d06a383e2 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h | |||
@@ -10,8 +10,10 @@ | |||
10 | */ | 10 | */ |
11 | static inline void ext4_truncate_failed_write(struct inode *inode) | 11 | static inline void ext4_truncate_failed_write(struct inode *inode) |
12 | { | 12 | { |
13 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
13 | truncate_inode_pages(inode->i_mapping, inode->i_size); | 14 | truncate_inode_pages(inode->i_mapping, inode->i_size); |
14 | ext4_truncate(inode); | 15 | ext4_truncate(inode); |
16 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
15 | } | 17 | } |
16 | 18 | ||
17 | /* | 19 | /* |