diff options
author | Jan Kara <jack@suse.cz> | 2013-08-17 10:09:31 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-08-17 10:09:31 -0400 |
commit | 90e775b71ac4e685898c7995756fe58c135adaa6 (patch) | |
tree | e7e84b7990ad61717808830726b0fce4e9c0464a /fs/ext4 | |
parent | 5208386c501276df18fee464e21d3c58d2d79517 (diff) |
ext4: fix lost truncate due to race with writeback
The following race can lead to a loss of i_disksize update from truncate
thus resulting in a wrong inode size if the inode size isn't updated
again before inode is reclaimed:
ext4_setattr() mpage_map_and_submit_extent()
EXT4_I(inode)->i_disksize = attr->ia_size;
... ...
disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT
/* False because i_size isn't
* updated yet */
if (disksize > i_size_read(inode))
/* True, because i_disksize is
* already truncated */
if (disksize > EXT4_I(inode)->i_disksize)
/* Overwrite i_disksize
* update from truncate */
ext4_update_i_disksize()
i_size_write(inode, attr->ia_size);
For other places updating i_disksize such race cannot happen because
i_mutex prevents these races. Writeback is the only place where we do
not hold i_mutex and we cannot grab it there because of lock ordering.
We fix the race by doing both i_disksize and i_size update in truncate
atomically under i_data_sem and in mpage_map_and_submit_extent() we move
the check against i_size under i_data_sem as well.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 24 | ||||
-rw-r--r-- | fs/ext4/inode.c | 17 |
2 files changed, 32 insertions, 9 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 58dede76f75f..3dbc56eb4849 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -2432,16 +2432,32 @@ do { \ | |||
2432 | #define EXT4_FREECLUSTERS_WATERMARK 0 | 2432 | #define EXT4_FREECLUSTERS_WATERMARK 0 |
2433 | #endif | 2433 | #endif |
2434 | 2434 | ||
2435 | /* Update i_disksize. Requires i_mutex to avoid races with truncate */ | ||
2435 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | 2436 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) |
2436 | { | 2437 | { |
2437 | /* | 2438 | WARN_ON_ONCE(S_ISREG(inode->i_mode) && |
2438 | * XXX: replace with spinlock if seen contended -bzzz | 2439 | !mutex_is_locked(&inode->i_mutex)); |
2439 | */ | 2440 | down_write(&EXT4_I(inode)->i_data_sem); |
2441 | if (newsize > EXT4_I(inode)->i_disksize) | ||
2442 | EXT4_I(inode)->i_disksize = newsize; | ||
2443 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2444 | } | ||
2445 | |||
2446 | /* | ||
2447 | * Update i_disksize after writeback has been started. Races with truncate | ||
2448 | * are avoided by checking i_size under i_data_sem. | ||
2449 | */ | ||
2450 | static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize) | ||
2451 | { | ||
2452 | loff_t i_size; | ||
2453 | |||
2440 | down_write(&EXT4_I(inode)->i_data_sem); | 2454 | down_write(&EXT4_I(inode)->i_data_sem); |
2455 | i_size = i_size_read(inode); | ||
2456 | if (newsize > i_size) | ||
2457 | newsize = i_size; | ||
2441 | if (newsize > EXT4_I(inode)->i_disksize) | 2458 | if (newsize > EXT4_I(inode)->i_disksize) |
2442 | EXT4_I(inode)->i_disksize = newsize; | 2459 | EXT4_I(inode)->i_disksize = newsize; |
2443 | up_write(&EXT4_I(inode)->i_data_sem); | 2460 | up_write(&EXT4_I(inode)->i_data_sem); |
2444 | return ; | ||
2445 | } | 2461 | } |
2446 | 2462 | ||
2447 | struct ext4_group_info { | 2463 | struct ext4_group_info { |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 38f430119fef..fc4051eb4e0f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2237,12 +2237,10 @@ static int mpage_map_and_submit_extent(handle_t *handle, | |||
2237 | 2237 | ||
2238 | /* Update on-disk size after IO is submitted */ | 2238 | /* Update on-disk size after IO is submitted */ |
2239 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | 2239 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; |
2240 | if (disksize > i_size_read(inode)) | ||
2241 | disksize = i_size_read(inode); | ||
2242 | if (disksize > EXT4_I(inode)->i_disksize) { | 2240 | if (disksize > EXT4_I(inode)->i_disksize) { |
2243 | int err2; | 2241 | int err2; |
2244 | 2242 | ||
2245 | ext4_update_i_disksize(inode, disksize); | 2243 | ext4_wb_update_i_disksize(inode, disksize); |
2246 | err2 = ext4_mark_inode_dirty(handle, inode); | 2244 | err2 = ext4_mark_inode_dirty(handle, inode); |
2247 | if (err2) | 2245 | if (err2) |
2248 | ext4_error(inode->i_sb, | 2246 | ext4_error(inode->i_sb, |
@@ -4627,18 +4625,27 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4627 | error = ext4_orphan_add(handle, inode); | 4625 | error = ext4_orphan_add(handle, inode); |
4628 | orphan = 1; | 4626 | orphan = 1; |
4629 | } | 4627 | } |
4628 | down_write(&EXT4_I(inode)->i_data_sem); | ||
4630 | EXT4_I(inode)->i_disksize = attr->ia_size; | 4629 | EXT4_I(inode)->i_disksize = attr->ia_size; |
4631 | rc = ext4_mark_inode_dirty(handle, inode); | 4630 | rc = ext4_mark_inode_dirty(handle, inode); |
4632 | if (!error) | 4631 | if (!error) |
4633 | error = rc; | 4632 | error = rc; |
4633 | /* | ||
4634 | * We have to update i_size under i_data_sem together | ||
4635 | * with i_disksize to avoid races with writeback code | ||
4636 | * running ext4_wb_update_i_disksize(). | ||
4637 | */ | ||
4638 | if (!error) | ||
4639 | i_size_write(inode, attr->ia_size); | ||
4640 | up_write(&EXT4_I(inode)->i_data_sem); | ||
4634 | ext4_journal_stop(handle); | 4641 | ext4_journal_stop(handle); |
4635 | if (error) { | 4642 | if (error) { |
4636 | ext4_orphan_del(NULL, inode); | 4643 | ext4_orphan_del(NULL, inode); |
4637 | goto err_out; | 4644 | goto err_out; |
4638 | } | 4645 | } |
4639 | } | 4646 | } else |
4647 | i_size_write(inode, attr->ia_size); | ||
4640 | 4648 | ||
4641 | i_size_write(inode, attr->ia_size); | ||
4642 | /* | 4649 | /* |
4643 | * Blocks are going to be removed from the inode. Wait | 4650 | * Blocks are going to be removed from the inode. Wait |
4644 | * for dio in flight. Temporarily disable | 4651 | * for dio in flight. Temporarily disable |