aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-07-22 19:21:38 -0400
committerJan Kara <jack@suse.cz>2011-07-22 19:49:00 -0400
commitb22570d9abb3d844e65c15c8bc0d57a78129e3b4 (patch)
tree455217fe8cac7529c1ed6ce351cde629729c90c4 /fs
parent03b5bb342978f99f75fb36d69cd29bab32109fd4 (diff)
ext3: Fix data corruption in inodes with journalled data
When journalling data for an inode (either because it is a symlink or because the filesystem is mounted in data=journal mode), ext3_evict_inode() can discard unwritten data by calling truncate_inode_pages(). This is because we don't mark the buffer / page dirty when journalling data but only add the buffer to the running transaction and thus mm does not know there are still unwritten data. Fix the problem by carefully tracking transaction containing inode's data, committing this transaction, and writing uncheckpointed buffers when inode should be reaped. Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext3/inode.c40
1 files changed, 35 insertions, 5 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d2e4547c7806..f57c87b0cb83 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -197,6 +197,7 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
197 */ 197 */
198void ext3_evict_inode (struct inode *inode) 198void ext3_evict_inode (struct inode *inode)
199{ 199{
200 struct ext3_inode_info *ei = EXT3_I(inode);
200 struct ext3_block_alloc_info *rsv; 201 struct ext3_block_alloc_info *rsv;
201 handle_t *handle; 202 handle_t *handle;
202 int want_delete = 0; 203 int want_delete = 0;
@@ -207,11 +208,36 @@ void ext3_evict_inode (struct inode *inode)
207 want_delete = 1; 208 want_delete = 1;
208 } 209 }
209 210
211 /*
212 * When journalling data dirty buffers are tracked only in the journal.
213 * So although mm thinks everything is clean and ready for reaping the
214 * inode might still have some pages to write in the running
215 * transaction or waiting to be checkpointed. Thus calling
216 * journal_invalidatepage() (via truncate_inode_pages()) to discard
217 * these buffers can cause data loss. Also even if we did not discard
218 * these buffers, we would have no way to find them after the inode
219 * is reaped and thus user could see stale data if he tries to read
220 * them before the transaction is checkpointed. So be careful and
221 * force everything to disk here... We use ei->i_datasync_tid to
222 * store the newest transaction containing inode's data.
223 *
224 * Note that directories do not have this problem because they don't
225 * use page cache.
226 */
227 if (inode->i_nlink && ext3_should_journal_data(inode) &&
228 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
229 tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
230 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
231
232 log_start_commit(journal, commit_tid);
233 log_wait_commit(journal, commit_tid);
234 filemap_write_and_wait(&inode->i_data);
235 }
210 truncate_inode_pages(&inode->i_data, 0); 236 truncate_inode_pages(&inode->i_data, 0);
211 237
212 ext3_discard_reservation(inode); 238 ext3_discard_reservation(inode);
213 rsv = EXT3_I(inode)->i_block_alloc_info; 239 rsv = ei->i_block_alloc_info;
214 EXT3_I(inode)->i_block_alloc_info = NULL; 240 ei->i_block_alloc_info = NULL;
215 if (unlikely(rsv)) 241 if (unlikely(rsv))
216 kfree(rsv); 242 kfree(rsv);
217 243
@@ -241,7 +267,7 @@ void ext3_evict_inode (struct inode *inode)
241 * have removed the record. 267 * have removed the record.
242 */ 268 */
243 ext3_orphan_del(handle, inode); 269 ext3_orphan_del(handle, inode);
244 EXT3_I(inode)->i_dtime = get_seconds(); 270 ei->i_dtime = get_seconds();
245 271
246 /* 272 /*
247 * One subtle ordering requirement: if anything has gone wrong 273 * One subtle ordering requirement: if anything has gone wrong
@@ -1411,6 +1437,7 @@ static int ext3_journalled_write_end(struct file *file,
1411{ 1437{
1412 handle_t *handle = ext3_journal_current_handle(); 1438 handle_t *handle = ext3_journal_current_handle();
1413 struct inode *inode = mapping->host; 1439 struct inode *inode = mapping->host;
1440 struct ext3_inode_info *ei = EXT3_I(inode);
1414 int ret = 0, ret2; 1441 int ret = 0, ret2;
1415 int partial = 0; 1442 int partial = 0;
1416 unsigned from, to; 1443 unsigned from, to;
@@ -1440,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
1440 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1467 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1441 ext3_orphan_add(handle, inode); 1468 ext3_orphan_add(handle, inode);
1442 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1469 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1443 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1470 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
1444 EXT3_I(inode)->i_disksize = inode->i_size; 1471 if (inode->i_size > ei->i_disksize) {
1472 ei->i_disksize = inode->i_size;
1445 ret2 = ext3_mark_inode_dirty(handle, inode); 1473 ret2 = ext3_mark_inode_dirty(handle, inode);
1446 if (!ret) 1474 if (!ret)
1447 ret = ret2; 1475 ret = ret2;
@@ -1739,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page,
1739 if (ret == 0) 1767 if (ret == 0)
1740 ret = err; 1768 ret = err;
1741 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1769 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1770 atomic_set(&EXT3_I(inode)->i_datasync_tid,
1771 handle->h_transaction->t_tid);
1742 unlock_page(page); 1772 unlock_page(page);
1743 } else { 1773 } else {
1744 /* 1774 /*