aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2009-12-08 23:51:10 -0500
committerTheodore Ts'o <tytso@mit.edu>2009-12-08 23:51:10 -0500
commitb436b9bef84de6893e86346d8fbf7104bc520645 (patch)
tree50fb9ae167bcd622e9adf47646bcf3b4c7dd111d
parent194074acacebc169ded90a4657193f5180015051 (diff)
ext4: Wait for proper transaction commit on fsync
We cannot rely on buffer dirty bits during fsync because pdflush can come before fsync is called and clear dirty bits without forcing a transaction commit. What we do is that we track which transaction has last changed the inode and which transaction last changed allocation and force it to disk on fsync. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/ext4.h7
-rw-r--r--fs/ext4/ext4_jbd2.h13
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/fsync.c46
-rw-r--r--fs/ext4/inode.c29
-rw-r--r--fs/ext4/super.c2
6 files changed, 80 insertions, 31 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4cfc2f0edb3f..ab31e65d46d0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -709,6 +709,13 @@ struct ext4_inode_info {
709 struct list_head i_aio_dio_complete_list; 709 struct list_head i_aio_dio_complete_list;
710 /* current io_end structure for async DIO write*/ 710 /* current io_end structure for async DIO write*/
711 ext4_io_end_t *cur_aio_dio; 711 ext4_io_end_t *cur_aio_dio;
712
713 /*
714 * Transactions that contain inode's metadata needed to complete
715 * fsync and fdatasync, respectively.
716 */
717 tid_t i_sync_tid;
718 tid_t i_datasync_tid;
712}; 719};
713 720
714/* 721/*
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 2c2b262bd31b..05eca817d704 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -249,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
249 return 0; 249 return 0;
250} 250}
251 251
252static inline void ext4_update_inode_fsync_trans(handle_t *handle,
253 struct inode *inode,
254 int datasync)
255{
256 struct ext4_inode_info *ei = EXT4_I(inode);
257
258 if (ext4_handle_valid(handle)) {
259 ei->i_sync_tid = handle->h_transaction->t_tid;
260 if (datasync)
261 ei->i_datasync_tid = handle->h_transaction->t_tid;
262 }
263}
264
252/* super.c */ 265/* super.c */
253int ext4_force_commit(struct super_block *sb); 266int ext4_force_commit(struct super_block *sb);
254 267
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5967f18fd7e7..700206e525da 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3058,6 +3058,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3058 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { 3058 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
3059 ret = ext4_convert_unwritten_extents_dio(handle, inode, 3059 ret = ext4_convert_unwritten_extents_dio(handle, inode,
3060 path); 3060 path);
3061 if (ret >= 0)
3062 ext4_update_inode_fsync_trans(handle, inode, 1);
3061 goto out2; 3063 goto out2;
3062 } 3064 }
3063 /* buffered IO case */ 3065 /* buffered IO case */
@@ -3085,6 +3087,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3085 ret = ext4_ext_convert_to_initialized(handle, inode, 3087 ret = ext4_ext_convert_to_initialized(handle, inode,
3086 path, iblock, 3088 path, iblock,
3087 max_blocks); 3089 max_blocks);
3090 if (ret >= 0)
3091 ext4_update_inode_fsync_trans(handle, inode, 1);
3088out: 3092out:
3089 if (ret <= 0) { 3093 if (ret <= 0) {
3090 err = ret; 3094 err = ret;
@@ -3323,10 +3327,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3323 allocated = ext4_ext_get_actual_len(&newex); 3327 allocated = ext4_ext_get_actual_len(&newex);
3324 set_buffer_new(bh_result); 3328 set_buffer_new(bh_result);
3325 3329
3326 /* Cache only when it is _not_ an uninitialized extent */ 3330 /*
3327 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) 3331 * Cache the extent and update transaction to commit on fdatasync only
3332 * when it is _not_ an uninitialized extent.
3333 */
3334 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3328 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 3335 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
3329 EXT4_EXT_CACHE_EXTENT); 3336 EXT4_EXT_CACHE_EXTENT);
3337 ext4_update_inode_fsync_trans(handle, inode, 1);
3338 } else
3339 ext4_update_inode_fsync_trans(handle, inode, 0);
3330out: 3340out:
3331 if (allocated > max_blocks) 3341 if (allocated > max_blocks)
3332 allocated = max_blocks; 3342 allocated = max_blocks;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a3c25076aef1..0b22497d92e1 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -51,25 +51,30 @@
51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) 51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52{ 52{
53 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
54 struct ext4_inode_info *ei = EXT4_I(inode);
54 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 55 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
55 int err, ret = 0; 56 int ret;
57 tid_t commit_tid;
56 58
57 J_ASSERT(ext4_journal_current_handle() == NULL); 59 J_ASSERT(ext4_journal_current_handle() == NULL);
58 60
59 trace_ext4_sync_file(file, dentry, datasync); 61 trace_ext4_sync_file(file, dentry, datasync);
60 62
63 if (inode->i_sb->s_flags & MS_RDONLY)
64 return 0;
65
61 ret = flush_aio_dio_completed_IO(inode); 66 ret = flush_aio_dio_completed_IO(inode);
62 if (ret < 0) 67 if (ret < 0)
63 return ret; 68 return ret;
69
70 if (!journal)
71 return simple_fsync(file, dentry, datasync);
72
64 /* 73 /*
65 * data=writeback: 74 * data=writeback,ordered:
66 * The caller's filemap_fdatawrite()/wait will sync the data. 75 * The caller's filemap_fdatawrite()/wait will sync the data.
67 * sync_inode() will sync the metadata 76 * Metadata is in the journal, we wait for proper transaction to
68 * 77 * commit here.
69 * data=ordered:
70 * The caller's filemap_fdatawrite() will write the data and
71 * sync_inode() will write the inode if it is dirty. Then the caller's
72 * filemap_fdatawait() will wait on the pages.
73 * 78 *
74 * data=journal: 79 * data=journal:
75 * filemap_fdatawrite won't do anything (the buffers are clean). 80 * filemap_fdatawrite won't do anything (the buffers are clean).
@@ -82,27 +87,10 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
82 if (ext4_should_journal_data(inode)) 87 if (ext4_should_journal_data(inode))
83 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
84 89
85 if (!journal) 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
86 ret = sync_mapping_buffers(inode->i_mapping); 91 if (jbd2_log_start_commit(journal, commit_tid))
87 92 jbd2_log_wait_commit(journal, commit_tid);
88 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 93 else if (journal->j_flags & JBD2_BARRIER)
89 goto out;
90
91 /*
92 * The VFS has written the file data. If the inode is unaltered
93 * then we need not start a commit.
94 */
95 if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
96 struct writeback_control wbc = {
97 .sync_mode = WB_SYNC_ALL,
98 .nr_to_write = 0, /* sys_fsync did this */
99 };
100 err = sync_inode(inode, &wbc);
101 if (ret == 0)
102 ret = err;
103 }
104out:
105 if (journal && (journal->j_flags & JBD2_BARRIER))
106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
107 return ret; 95 return ret;
108} 96}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 958c3ff800e9..f1bc1e338828 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -983,6 +983,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
983 goto cleanup; 983 goto cleanup;
984 984
985 set_buffer_new(bh_result); 985 set_buffer_new(bh_result);
986
987 ext4_update_inode_fsync_trans(handle, inode, 1);
986got_it: 988got_it:
987 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 989 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
988 if (count > blocks_to_boundary) 990 if (count > blocks_to_boundary)
@@ -4738,6 +4740,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4738 struct ext4_inode *raw_inode; 4740 struct ext4_inode *raw_inode;
4739 struct ext4_inode_info *ei; 4741 struct ext4_inode_info *ei;
4740 struct inode *inode; 4742 struct inode *inode;
4743 journal_t *journal = EXT4_SB(sb)->s_journal;
4741 long ret; 4744 long ret;
4742 int block; 4745 int block;
4743 4746
@@ -4802,6 +4805,31 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4802 ei->i_data[block] = raw_inode->i_block[block]; 4805 ei->i_data[block] = raw_inode->i_block[block];
4803 INIT_LIST_HEAD(&ei->i_orphan); 4806 INIT_LIST_HEAD(&ei->i_orphan);
4804 4807
4808 /*
4809 * Set transaction id's of transactions that have to be committed
4810 * to finish f[data]sync. We set them to currently running transaction
4811 * as we cannot be sure that the inode or some of its metadata isn't
4812 * part of the transaction - the inode could have been reclaimed and
4813 * now it is reread from disk.
4814 */
4815 if (journal) {
4816 transaction_t *transaction;
4817 tid_t tid;
4818
4819 spin_lock(&journal->j_state_lock);
4820 if (journal->j_running_transaction)
4821 transaction = journal->j_running_transaction;
4822 else
4823 transaction = journal->j_committing_transaction;
4824 if (transaction)
4825 tid = transaction->t_tid;
4826 else
4827 tid = journal->j_commit_sequence;
4828 spin_unlock(&journal->j_state_lock);
4829 ei->i_sync_tid = tid;
4830 ei->i_datasync_tid = tid;
4831 }
4832
4805 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4833 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4806 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4834 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4807 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4835 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
@@ -5056,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
5056 err = rc; 5084 err = rc;
5057 ei->i_state &= ~EXT4_STATE_NEW; 5085 ei->i_state &= ~EXT4_STATE_NEW;
5058 5086
5087 ext4_update_inode_fsync_trans(handle, inode, 0);
5059out_brelse: 5088out_brelse:
5060 brelse(bh); 5089 brelse(bh);
5061 ext4_std_error(inode->i_sb, err); 5090 ext4_std_error(inode->i_sb, err);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8ab0c9518473..2b13dcfcf775 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -706,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
706 spin_lock_init(&(ei->i_block_reservation_lock)); 706 spin_lock_init(&(ei->i_block_reservation_lock));
707 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 707 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
708 ei->cur_aio_dio = NULL; 708 ei->cur_aio_dio = NULL;
709 ei->i_sync_tid = 0;
710 ei->i_datasync_tid = 0;
709 711
710 return &ei->vfs_inode; 712 return &ei->vfs_inode;
711} 713}