aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/inode.c
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2014-04-03 17:46:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-03 19:20:53 -0400
commit2931cdcb49194503b19345c597b68fdcf78396f8 (patch)
tree2492e18b4aa23b3815b6f54112bc4667158e101a /fs/ocfs2/inode.c
parenta75fe48cad2fb81e0e2671c73aea6c78ce5626d4 (diff)
ocfs2: improve fsync efficiency and fix deadlock between aio_write and sync_file
Currently, ocfs2_sync_file grabs i_mutex and forces the current journal transaction to complete. This isn't terribly efficient, since sync_file really only needs to wait for the last transaction involving that inode to complete, and this doesn't require i_mutex. Therefore, implement the necessary bits to track the newest tid associated with an inode, and teach sync_file to wait for that instead of waiting for everything in the journal to commit. Furthermore, only issue the flush request to the drive if jbd2 hasn't already done so. This also eliminates the deadlock between ocfs2_file_aio_write() and ocfs2_sync_file(). aio_write takes i_mutex then calls ocfs2_aiodio_wait() to wait for unaligned dio writes to finish. However, if that dio completion involves calling fsync, then we can get into trouble when some ocfs2_sync_file tries to take i_mutex. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/inode.c')
-rw-r--r--fs/ocfs2/inode.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f29a90fde619..28ab8a9e88a1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -130,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
130 struct inode *inode = NULL; 130 struct inode *inode = NULL;
131 struct super_block *sb = osb->sb; 131 struct super_block *sb = osb->sb;
132 struct ocfs2_find_inode_args args; 132 struct ocfs2_find_inode_args args;
133 journal_t *journal = OCFS2_SB(sb)->journal->j_journal;
133 134
134 trace_ocfs2_iget_begin((unsigned long long)blkno, flags, 135 trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
135 sysfile_type); 136 sysfile_type);
@@ -169,6 +170,32 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
169 goto bail; 170 goto bail;
170 } 171 }
171 172
173 /*
174 * Set transaction id's of transactions that have to be committed
175 * to finish f[data]sync. We set them to currently running transaction
176 * as we cannot be sure that the inode or some of its metadata isn't
177 * part of the transaction - the inode could have been reclaimed and
178 * now it is reread from disk.
179 */
180 if (journal) {
181 transaction_t *transaction;
182 tid_t tid;
183 struct ocfs2_inode_info *oi = OCFS2_I(inode);
184
185 read_lock(&journal->j_state_lock);
186 if (journal->j_running_transaction)
187 transaction = journal->j_running_transaction;
188 else
189 transaction = journal->j_committing_transaction;
190 if (transaction)
191 tid = transaction->t_tid;
192 else
193 tid = journal->j_commit_sequence;
194 read_unlock(&journal->j_state_lock);
195 oi->i_sync_tid = tid;
196 oi->i_datasync_tid = tid;
197 }
198
172bail: 199bail:
173 if (!IS_ERR(inode)) { 200 if (!IS_ERR(inode)) {
174 trace_ocfs2_iget_end(inode, 201 trace_ocfs2_iget_end(inode,
@@ -1260,6 +1287,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1260 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1287 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1261 1288
1262 ocfs2_journal_dirty(handle, bh); 1289 ocfs2_journal_dirty(handle, bh);
1290 ocfs2_update_inode_fsync_trans(handle, inode, 1);
1263leave: 1291leave:
1264 return status; 1292 return status;
1265} 1293}