aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-07-26 09:07:11 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-07-26 09:07:11 -0400
commit2d859db3e4a82a365572592d57624a5f996ed0ec (patch)
treed725aca3ab9555b8ee92f753a797034ff79f580c /fs/ext4
parentb7ca1e8ec53259359db5313f923a0a20fa04bdb6 (diff)
ext4: fix data corruption in inodes with journalled data
When journalling data for an inode (either because it is a symlink or because the filesystem is mounted in data=journal mode), ext4_evict_inode() can discard unwritten data by calling truncate_inode_pages(). This is because we don't mark the buffer / page dirty when journalling data but only add the buffer to the running transaction and thus mm does not know there are still unwritten data. Fix the problem by carefully tracking transaction containing inode's data, committing this transaction, and writing uncheckpointed buffers when inode should be reaped. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/inode.c29
1 files changed, 29 insertions, 0 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index de50b16a8f67..43e4abd67be7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -121,6 +121,33 @@ void ext4_evict_inode(struct inode *inode)
121 121
122 trace_ext4_evict_inode(inode); 122 trace_ext4_evict_inode(inode);
123 if (inode->i_nlink) { 123 if (inode->i_nlink) {
124 /*
125 * When journalling data dirty buffers are tracked only in the
126 * journal. So although mm thinks everything is clean and
127 * ready for reaping the inode might still have some pages to
128 * write in the running transaction or waiting to be
129 * checkpointed. Thus calling jbd2_journal_invalidatepage()
130 * (via truncate_inode_pages()) to discard these buffers can
131 * cause data loss. Also even if we did not discard these
132 * buffers, we would have no way to find them after the inode
133 * is reaped and thus user could see stale data if he tries to
134 * read them before the transaction is checkpointed. So be
135 * careful and force everything to disk here... We use
136 * ei->i_datasync_tid to store the newest transaction
137 * containing inode's data.
138 *
139 * Note that directories do not have this problem because they
140 * don't use page cache.
141 */
142 if (ext4_should_journal_data(inode) &&
143 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
144 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
145 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
146
147 jbd2_log_start_commit(journal, commit_tid);
148 jbd2_log_wait_commit(journal, commit_tid);
149 filemap_write_and_wait(&inode->i_data);
150 }
124 truncate_inode_pages(&inode->i_data, 0); 151 truncate_inode_pages(&inode->i_data, 0);
125 goto no_delete; 152 goto no_delete;
126 } 153 }
@@ -970,6 +997,7 @@ static int ext4_journalled_write_end(struct file *file,
970 if (new_i_size > inode->i_size) 997 if (new_i_size > inode->i_size)
971 i_size_write(inode, pos+copied); 998 i_size_write(inode, pos+copied);
972 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 999 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1000 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
973 if (new_i_size > EXT4_I(inode)->i_disksize) { 1001 if (new_i_size > EXT4_I(inode)->i_disksize) {
974 ext4_update_i_disksize(inode, new_i_size); 1002 ext4_update_i_disksize(inode, new_i_size);
975 ret2 = ext4_mark_inode_dirty(handle, inode); 1003 ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1678,6 +1706,7 @@ static int __ext4_journalled_writepage(struct page *page,
1678 write_end_fn); 1706 write_end_fn);
1679 if (ret == 0) 1707 if (ret == 0)
1680 ret = err; 1708 ret = err;
1709 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1681 err = ext4_journal_stop(handle); 1710 err = ext4_journal_stop(handle);
1682 if (!ret) 1711 if (!ret)
1683 ret = err; 1712 ret = err;