aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r--fs/xfs/xfs_file.c84
1 files changed, 16 insertions, 68 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7e5bc872f2b4..54a67dd9ac0a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -163,7 +163,6 @@ xfs_file_fsync(
163 struct inode *inode = file->f_mapping->host; 163 struct inode *inode = file->f_mapping->host;
164 struct xfs_inode *ip = XFS_I(inode); 164 struct xfs_inode *ip = XFS_I(inode);
165 struct xfs_mount *mp = ip->i_mount; 165 struct xfs_mount *mp = ip->i_mount;
166 struct xfs_trans *tp;
167 int error = 0; 166 int error = 0;
168 int log_flushed = 0; 167 int log_flushed = 0;
169 xfs_lsn_t lsn = 0; 168 xfs_lsn_t lsn = 0;
@@ -194,75 +193,18 @@ xfs_file_fsync(
194 } 193 }
195 194
196 /* 195 /*
197 * We always need to make sure that the required inode state is safe on 196 * All metadata updates are logged, which means that we just have
198 * disk. The inode might be clean but we still might need to force the 197 * to flush the log up to the latest LSN that touched the inode.
199 * log because of committed transactions that haven't hit the disk yet.
200 * Likewise, there could be unflushed non-transactional changes to the
201 * inode core that have to go to disk and this requires us to issue
202 * a synchronous transaction to capture these changes correctly.
203 *
204 * This code relies on the assumption that if the i_update_core field
205 * of the inode is clear and the inode is unpinned then it is clean
206 * and no action is required.
207 */ 198 */
208 xfs_ilock(ip, XFS_ILOCK_SHARED); 199 xfs_ilock(ip, XFS_ILOCK_SHARED);
209 200 if (xfs_ipincount(ip)) {
210 /* 201 if (!datasync ||
211 * First check if the VFS inode is marked dirty. All the dirtying 202 (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
212 * of non-transactional updates do not go through mark_inode_dirty*,
213 * which allows us to distinguish between pure timestamp updates
214 * and i_size updates which need to be caught for fdatasync.
215 * After that also check for the dirty state in the XFS inode, which
216 * might gets cleared when the inode gets written out via the AIL
217 * or xfs_iflush_cluster.
218 */
219 if (((inode->i_state & I_DIRTY_DATASYNC) ||
220 ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
221 ip->i_update_core) {
222 /*
223 * Kick off a transaction to log the inode core to get the
224 * updates. The sync transaction will also force the log.
225 */
226 xfs_iunlock(ip, XFS_ILOCK_SHARED);
227 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
228 error = xfs_trans_reserve(tp, 0,
229 XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
230 if (error) {
231 xfs_trans_cancel(tp, 0);
232 return -error;
233 }
234 xfs_ilock(ip, XFS_ILOCK_EXCL);
235
236 /*
237 * Note - it's possible that we might have pushed ourselves out
238 * of the way during trans_reserve which would flush the inode.
239 * But there's no guarantee that the inode buffer has actually
240 * gone out yet (it's delwri). Plus the buffer could be pinned
241 * anyway if it's part of an inode in another recent
242 * transaction. So we play it safe and fire off the
243 * transaction anyway.
244 */
245 xfs_trans_ijoin(tp, ip, 0);
246 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
247 error = xfs_trans_commit(tp, 0);
248
249 lsn = ip->i_itemp->ili_last_lsn;
250 xfs_iunlock(ip, XFS_ILOCK_EXCL);
251 } else {
252 /*
253 * Timestamps/size haven't changed since last inode flush or
254 * inode transaction commit. That means either nothing got
255 * written or a transaction committed which caught the updates.
256 * If the latter happened and the transaction hasn't hit the
257 * disk yet, the inode will be still be pinned. If it is,
258 * force the log.
259 */
260 if (xfs_ipincount(ip))
261 lsn = ip->i_itemp->ili_last_lsn; 203 lsn = ip->i_itemp->ili_last_lsn;
262 xfs_iunlock(ip, XFS_ILOCK_SHARED);
263 } 204 }
205 xfs_iunlock(ip, XFS_ILOCK_SHARED);
264 206
265 if (!error && lsn) 207 if (lsn)
266 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 208 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
267 209
268 /* 210 /*
@@ -659,9 +601,6 @@ restart:
659 return error; 601 return error;
660 } 602 }
661 603
662 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
663 file_update_time(file);
664
665 /* 604 /*
666 * If the offset is beyond the size of the file, we need to zero any 605 * If the offset is beyond the size of the file, we need to zero any
667 * blocks that fall between the existing EOF and the start of this 606 * blocks that fall between the existing EOF and the start of this
@@ -685,6 +624,15 @@ restart:
685 return error; 624 return error;
686 625
687 /* 626 /*
627 * Updating the timestamps will grab the ilock again from
628 * xfs_fs_dirty_inode, so we have to call it after dropping the
629 * lock above. Eventually we should look into a way to avoid
630 * the pointless lock roundtrip.
631 */
632 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
633 file_update_time(file);
634
635 /*
688 * If we're writing the file then make sure to clear the setuid and 636 * If we're writing the file then make sure to clear the setuid and
689 * setgid bits if the process is not being run by root. This keeps 637 * setgid bits if the process is not being run by root. This keeps
690 * people from modifying setuid and setgid binaries. 638 * people from modifying setuid and setgid binaries.