diff options
author | Jan Kara <jack@suse.com> | 2015-12-07 14:31:11 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2015-12-07 14:31:11 -0500 |
commit | 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 (patch) | |
tree | 11d7854acb61128821b948796d1eb2e2f87b925f | |
parent | 17048e8a083fec7ad841d88ef0812707fbc7e39f (diff) |
ext4: fix races between buffered IO and collapse / insert range
Current code implementing FALLOC_FL_COLLAPSE_RANGE and
FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page
faults. If buffered write or write via mmap manages to squeeze between
filemap_write_and_wait_range() and truncate_pagecache() in the fallocate
implementations, the written data is simply discarded by
truncate_pagecache() although it should have been shifted.
Fix the problem by moving filemap_write_and_wait_range() call inside
i_mutex and i_mmap_sem. That way we are protected against races with
both buffered writes and page faults.
Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r-- | fs/ext4/extents.c | 59 |
1 files changed, 31 insertions, 28 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 65b5ada2833f..4b105c96df08 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -5487,21 +5487,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
5487 | return ret; | 5487 | return ret; |
5488 | } | 5488 | } |
5489 | 5489 | ||
5490 | /* | ||
5491 | * Need to round down offset to be aligned with page size boundary | ||
5492 | * for page size > block size. | ||
5493 | */ | ||
5494 | ioffset = round_down(offset, PAGE_SIZE); | ||
5495 | |||
5496 | /* Write out all dirty pages */ | ||
5497 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, | ||
5498 | LLONG_MAX); | ||
5499 | if (ret) | ||
5500 | return ret; | ||
5501 | |||
5502 | /* Take mutex lock */ | ||
5503 | mutex_lock(&inode->i_mutex); | 5490 | mutex_lock(&inode->i_mutex); |
5504 | |||
5505 | /* | 5491 | /* |
5506 | * There is no need to overlap collapse range with EOF, in which case | 5492 | * There is no need to overlap collapse range with EOF, in which case |
5507 | * it is effectively a truncate operation | 5493 | * it is effectively a truncate operation |
@@ -5526,6 +5512,27 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
5526 | * page cache. | 5512 | * page cache. |
5527 | */ | 5513 | */ |
5528 | down_write(&EXT4_I(inode)->i_mmap_sem); | 5514 | down_write(&EXT4_I(inode)->i_mmap_sem); |
5515 | /* | ||
5516 | * Need to round down offset to be aligned with page size boundary | ||
5517 | * for page size > block size. | ||
5518 | */ | ||
5519 | ioffset = round_down(offset, PAGE_SIZE); | ||
5520 | /* | ||
5521 | * Write tail of the last page before removed range since it will get | ||
5522 | * removed from the page cache below. | ||
5523 | */ | ||
5524 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); | ||
5525 | if (ret) | ||
5526 | goto out_mmap; | ||
5527 | /* | ||
5528 | * Write data that will be shifted to preserve them when discarding | ||
5529 | * page cache below. We are also protected from pages becoming dirty | ||
5530 | * by i_mmap_sem. | ||
5531 | */ | ||
5532 | ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, | ||
5533 | LLONG_MAX); | ||
5534 | if (ret) | ||
5535 | goto out_mmap; | ||
5529 | truncate_pagecache(inode, ioffset); | 5536 | truncate_pagecache(inode, ioffset); |
5530 | 5537 | ||
5531 | credits = ext4_writepage_trans_blocks(inode); | 5538 | credits = ext4_writepage_trans_blocks(inode); |
@@ -5626,21 +5633,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
5626 | return ret; | 5633 | return ret; |
5627 | } | 5634 | } |
5628 | 5635 | ||
5629 | /* | ||
5630 | * Need to round down to align start offset to page size boundary | ||
5631 | * for page size > block size. | ||
5632 | */ | ||
5633 | ioffset = round_down(offset, PAGE_SIZE); | ||
5634 | |||
5635 | /* Write out all dirty pages */ | ||
5636 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, | ||
5637 | LLONG_MAX); | ||
5638 | if (ret) | ||
5639 | return ret; | ||
5640 | |||
5641 | /* Take mutex lock */ | ||
5642 | mutex_lock(&inode->i_mutex); | 5636 | mutex_lock(&inode->i_mutex); |
5643 | |||
5644 | /* Currently just for extent based files */ | 5637 | /* Currently just for extent based files */ |
5645 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 5638 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
5646 | ret = -EOPNOTSUPP; | 5639 | ret = -EOPNOTSUPP; |
@@ -5668,6 +5661,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
5668 | * page cache. | 5661 | * page cache. |
5669 | */ | 5662 | */ |
5670 | down_write(&EXT4_I(inode)->i_mmap_sem); | 5663 | down_write(&EXT4_I(inode)->i_mmap_sem); |
5664 | /* | ||
5665 | * Need to round down to align start offset to page size boundary | ||
5666 | * for page size > block size. | ||
5667 | */ | ||
5668 | ioffset = round_down(offset, PAGE_SIZE); | ||
5669 | /* Write out all dirty pages */ | ||
5670 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, | ||
5671 | LLONG_MAX); | ||
5672 | if (ret) | ||
5673 | goto out_mmap; | ||
5671 | truncate_pagecache(inode, ioffset); | 5674 | truncate_pagecache(inode, ioffset); |
5672 | 5675 | ||
5673 | credits = ext4_writepage_trans_blocks(inode); | 5676 | credits = ext4_writepage_trans_blocks(inode); |