aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-08-17 10:09:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-08-17 10:09:31 -0400
commit90e775b71ac4e685898c7995756fe58c135adaa6 (patch)
treee7e84b7990ad61717808830726b0fce4e9c0464a /fs/ext4
parent5208386c501276df18fee464e21d3c58d2d79517 (diff)
ext4: fix lost truncate due to race with writeback
The following race can lead to a loss of i_disksize update from truncate thus resulting in a wrong inode size if the inode size isn't updated again before inode is reclaimed: ext4_setattr() mpage_map_and_submit_extent() EXT4_I(inode)->i_disksize = attr->ia_size; ... ... disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT /* False because i_size isn't * updated yet */ if (disksize > i_size_read(inode)) /* True, because i_disksize is * already truncated */ if (disksize > EXT4_I(inode)->i_disksize) /* Overwrite i_disksize * update from truncate */ ext4_update_i_disksize() i_size_write(inode, attr->ia_size); For other places updating i_disksize such race cannot happen because i_mutex prevents these races. Writeback is the only place where we do not hold i_mutex and we cannot grab it there because of lock ordering. We fix the race by doing both i_disksize and i_size update in truncate atomically under i_data_sem and in mpage_map_and_submit_extent() we move the check against i_size under i_data_sem as well. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h24
-rw-r--r--fs/ext4/inode.c17
2 files changed, 32 insertions, 9 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 58dede76f75f..3dbc56eb4849 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2432,16 +2432,32 @@ do { \
2432#define EXT4_FREECLUSTERS_WATERMARK 0 2432#define EXT4_FREECLUSTERS_WATERMARK 0
2433#endif 2433#endif
2434 2434
2435/* Update i_disksize. Requires i_mutex to avoid races with truncate */
2435static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) 2436static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2436{ 2437{
2437 /* 2438 WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
2438 * XXX: replace with spinlock if seen contended -bzzz 2439 !mutex_is_locked(&inode->i_mutex));
2439 */ 2440 down_write(&EXT4_I(inode)->i_data_sem);
2441 if (newsize > EXT4_I(inode)->i_disksize)
2442 EXT4_I(inode)->i_disksize = newsize;
2443 up_write(&EXT4_I(inode)->i_data_sem);
2444}
2445
2446/*
2447 * Update i_disksize after writeback has been started. Races with truncate
2448 * are avoided by checking i_size under i_data_sem.
2449 */
2450static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
2451{
2452 loff_t i_size;
2453
2440 down_write(&EXT4_I(inode)->i_data_sem); 2454 down_write(&EXT4_I(inode)->i_data_sem);
2455 i_size = i_size_read(inode);
2456 if (newsize > i_size)
2457 newsize = i_size;
2441 if (newsize > EXT4_I(inode)->i_disksize) 2458 if (newsize > EXT4_I(inode)->i_disksize)
2442 EXT4_I(inode)->i_disksize = newsize; 2459 EXT4_I(inode)->i_disksize = newsize;
2443 up_write(&EXT4_I(inode)->i_data_sem); 2460 up_write(&EXT4_I(inode)->i_data_sem);
2444 return ;
2445} 2461}
2446 2462
2447struct ext4_group_info { 2463struct ext4_group_info {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 38f430119fef..fc4051eb4e0f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2237,12 +2237,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2237 2237
2238 /* Update on-disk size after IO is submitted */ 2238 /* Update on-disk size after IO is submitted */
2239 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2239 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2240 if (disksize > i_size_read(inode))
2241 disksize = i_size_read(inode);
2242 if (disksize > EXT4_I(inode)->i_disksize) { 2240 if (disksize > EXT4_I(inode)->i_disksize) {
2243 int err2; 2241 int err2;
2244 2242
2245 ext4_update_i_disksize(inode, disksize); 2243 ext4_wb_update_i_disksize(inode, disksize);
2246 err2 = ext4_mark_inode_dirty(handle, inode); 2244 err2 = ext4_mark_inode_dirty(handle, inode);
2247 if (err2) 2245 if (err2)
2248 ext4_error(inode->i_sb, 2246 ext4_error(inode->i_sb,
@@ -4627,18 +4625,27 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4627 error = ext4_orphan_add(handle, inode); 4625 error = ext4_orphan_add(handle, inode);
4628 orphan = 1; 4626 orphan = 1;
4629 } 4627 }
4628 down_write(&EXT4_I(inode)->i_data_sem);
4630 EXT4_I(inode)->i_disksize = attr->ia_size; 4629 EXT4_I(inode)->i_disksize = attr->ia_size;
4631 rc = ext4_mark_inode_dirty(handle, inode); 4630 rc = ext4_mark_inode_dirty(handle, inode);
4632 if (!error) 4631 if (!error)
4633 error = rc; 4632 error = rc;
4633 /*
4634 * We have to update i_size under i_data_sem together
4635 * with i_disksize to avoid races with writeback code
4636 * running ext4_wb_update_i_disksize().
4637 */
4638 if (!error)
4639 i_size_write(inode, attr->ia_size);
4640 up_write(&EXT4_I(inode)->i_data_sem);
4634 ext4_journal_stop(handle); 4641 ext4_journal_stop(handle);
4635 if (error) { 4642 if (error) {
4636 ext4_orphan_del(NULL, inode); 4643 ext4_orphan_del(NULL, inode);
4637 goto err_out; 4644 goto err_out;
4638 } 4645 }
4639 } 4646 } else
4647 i_size_write(inode, attr->ia_size);
4640 4648
4641 i_size_write(inode, attr->ia_size);
4642 /* 4649 /*
4643 * Blocks are going to be removed from the inode. Wait 4650 * Blocks are going to be removed from the inode. Wait
4644 * for dio in flight. Temporarily disable 4651 * for dio in flight. Temporarily disable