1 files changed, 16 insertions, 68 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7e5bc872f2b4..54a67dd9ac0a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -163,7 +163,6 @@ xfs_file_fsync(
        struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-        struct xfs_trans        *tp;
        int                     error = 0;
        int                     log_flushed = 0;
        xfs_lsn_t               lsn = 0;
@@ -194,75 +193,18 @@ xfs_file_fsync(
        }
        /*
-         * We always need to make sure that the required inode state is safe on
+         * All metadata updates are logged, which means that we just have
-         * disk.  The inode might be clean but we still might need to force the
+         * to flush the log up to the latest LSN that touched the inode.
-         * log because of committed transactions that haven't hit the disk yet.
-         * Likewise, there could be unflushed non-transactional changes to the
-         * inode core that have to go to disk and this requires us to issue
-         * a synchronous transaction to capture these changes correctly.
-         *
-         * This code relies on the assumption that if the i_update_core field
-         * of the inode is clear and the inode is unpinned then it is clean
-         * and no action is required.
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
+        if (xfs_ipincount(ip)) {
-        /*
+                if (!datasync ||
-         * First check if the VFS inode is marked dirty.  All the dirtying
+                    (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
-         * of non-transactional updates do not go through mark_inode_dirty*,
-         * which allows us to distinguish between pure timestamp updates
-         * and i_size updates which need to be caught for fdatasync.
-         * After that also check for the dirty state in the XFS inode, which
-         * might gets cleared when the inode gets written out via the AIL
-         * or xfs_iflush_cluster.
-         */
-        if (((inode->i_state & I_DIRTY_DATASYNC) ||
-            ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
-            ip->i_update_core) {
-                /*
-                 * Kick off a transaction to log the inode core to get the
-                 * updates.  The sync transaction will also force the log.
-                 */
-                xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-                error = xfs_trans_reserve(tp, 0,
-                                XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-                if (error) {
-                        xfs_trans_cancel(tp, 0);
-                        return -error;
-                }
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                /*
-                 * Note - it's possible that we might have pushed ourselves out
-                 * of the way during trans_reserve which would flush the inode.
-                 * But there's no guarantee that the inode buffer has actually
-                 * gone out yet (it's delwri).  Plus the buffer could be pinned
-                 * anyway if it's part of an inode in another recent
-                 * transaction.  So we play it safe and fire off the
-                 * transaction anyway.
-                 */
-                xfs_trans_ijoin(tp, ip, 0);
-                xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-                error = xfs_trans_commit(tp, 0);
-                lsn = ip->i_itemp->ili_last_lsn;
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        } else {
-                /*
-                 * Timestamps/size haven't changed since last inode flush or
-                 * inode transaction commit.  That means either nothing got
-                 * written or a transaction committed which caught the updates.
-                 * If the latter happened and the transaction hasn't hit the
-                 * disk yet, the inode will be still be pinned.  If it is,
-                 * force the log.
-                 */
-                if (xfs_ipincount(ip))
                        lsn = ip->i_itemp->ili_last_lsn;
-                xfs_iunlock(ip, XFS_ILOCK_SHARED);
        }
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-        if (!error && lsn)
+        if (lsn)
                error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
        /*
@@ -659,9 +601,6 @@ restart:
                return error;
        }
-        if (likely(!(file->f_mode & FMODE_NOCMTIME)))
-                file_update_time(file);
        /*
         * If the offset is beyond the size of the file, we need to zero any
         * blocks that fall between the existing EOF and the start of this
@@ -685,6 +624,15 @@ restart:
                return error;
        /*
+         * Updating the timestamps will grab the ilock again from
+         * xfs_fs_dirty_inode, so we have to call it after dropping the
+         * lock above.  Eventually we should look into a way to avoid
+         * the pointless lock roundtrip.
+         */
+        if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+                file_update_time(file);
+        /*
         * If we're writing the file then make sure to clear the setuid and
         * setgid bits if the process is not being run by root.  This keeps
         * people from modifying setuid and setgid binaries.

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7e5bc872f2b4..54a67dd9ac0a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c
@@ -163,7 +163,6 @@ xfs_file_fsync(
163	struct inode *inode = file->f_mapping->host;	163	struct inode *inode = file->f_mapping->host;
164	struct xfs_inode *ip = XFS_I(inode);	164	struct xfs_inode *ip = XFS_I(inode);
165	struct xfs_mount *mp = ip->i_mount;	165	struct xfs_mount *mp = ip->i_mount;
166	struct xfs_trans *tp;
167	int error = 0;	166	int error = 0;
168	int log_flushed = 0;	167	int log_flushed = 0;
169	xfs_lsn_t lsn = 0;	168	xfs_lsn_t lsn = 0;
@@ -194,75 +193,18 @@ xfs_file_fsync(
194	}	193	}
195		194
196	/*	195	/*
197	* We always need to make sure that the required inode state is safe on	196	* All metadata updates are logged, which means that we just have
198	* disk. The inode might be clean but we still might need to force the	197	* to flush the log up to the latest LSN that touched the inode.
199	* log because of committed transactions that haven't hit the disk yet.
200	* Likewise, there could be unflushed non-transactional changes to the
201	* inode core that have to go to disk and this requires us to issue
202	* a synchronous transaction to capture these changes correctly.
203	*
204	* This code relies on the assumption that if the i_update_core field
205	* of the inode is clear and the inode is unpinned then it is clean
206	* and no action is required.
207	*/	198	*/
208	xfs_ilock(ip, XFS_ILOCK_SHARED);	199	xfs_ilock(ip, XFS_ILOCK_SHARED);
209		200	if (xfs_ipincount(ip)) {
210	/*	201	if (!datasync \|\|
211	* First check if the VFS inode is marked dirty. All the dirtying	202	(ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
212	* of non-transactional updates do not go through mark_inode_dirty*,
213	* which allows us to distinguish between pure timestamp updates
214	* and i_size updates which need to be caught for fdatasync.
215	* After that also check for the dirty state in the XFS inode, which
216	* might gets cleared when the inode gets written out via the AIL
217	* or xfs_iflush_cluster.
218	*/
219	if (((inode->i_state & I_DIRTY_DATASYNC) \|\|
220	((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
221	ip->i_update_core) {
222	/*
223	* Kick off a transaction to log the inode core to get the
224	* updates. The sync transaction will also force the log.
225	*/
226	xfs_iunlock(ip, XFS_ILOCK_SHARED);
227	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
228	error = xfs_trans_reserve(tp, 0,
229	XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
230	if (error) {
231	xfs_trans_cancel(tp, 0);
232	return -error;
233	}
234	xfs_ilock(ip, XFS_ILOCK_EXCL);
235
236	/*
237	* Note - it's possible that we might have pushed ourselves out
238	* of the way during trans_reserve which would flush the inode.
239	* But there's no guarantee that the inode buffer has actually
240	* gone out yet (it's delwri). Plus the buffer could be pinned
241	* anyway if it's part of an inode in another recent
242	* transaction. So we play it safe and fire off the
243	* transaction anyway.
244	*/
245	xfs_trans_ijoin(tp, ip, 0);
246	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
247	error = xfs_trans_commit(tp, 0);
248
249	lsn = ip->i_itemp->ili_last_lsn;
250	xfs_iunlock(ip, XFS_ILOCK_EXCL);
251	} else {
252	/*
253	* Timestamps/size haven't changed since last inode flush or
254	* inode transaction commit. That means either nothing got
255	* written or a transaction committed which caught the updates.
256	* If the latter happened and the transaction hasn't hit the
257	* disk yet, the inode will be still be pinned. If it is,
258	* force the log.
259	*/
260	if (xfs_ipincount(ip))
261	lsn = ip->i_itemp->ili_last_lsn;	203	lsn = ip->i_itemp->ili_last_lsn;
262	xfs_iunlock(ip, XFS_ILOCK_SHARED);
263	}	204	}
		205	xfs_iunlock(ip, XFS_ILOCK_SHARED);
264		206
265	if (!error && lsn)	207	if (lsn)
266	error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);	208	error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
267		209
268	/*	210	/*
@@ -659,9 +601,6 @@ restart:
659	return error;	601	return error;
660	}	602	}
661		603
662	if (likely(!(file->f_mode & FMODE_NOCMTIME)))
663	file_update_time(file);
664
665	/*	604	/*
666	* If the offset is beyond the size of the file, we need to zero any	605	* If the offset is beyond the size of the file, we need to zero any
667	* blocks that fall between the existing EOF and the start of this	606	* blocks that fall between the existing EOF and the start of this
@@ -685,6 +624,15 @@ restart:
685	return error;	624	return error;
686		625
687	/*	626	/*
		627	* Updating the timestamps will grab the ilock again from
		628	* xfs_fs_dirty_inode, so we have to call it after dropping the
		629	* lock above. Eventually we should look into a way to avoid
		630	* the pointless lock roundtrip.
		631	*/
		632	if (likely(!(file->f_mode & FMODE_NOCMTIME)))
		633	file_update_time(file);
		634
		635	/*
688	* If we're writing the file then make sure to clear the setuid and	636	* If we're writing the file then make sure to clear the setuid and
689	* setgid bits if the process is not being run by root. This keeps	637	* setgid bits if the process is not being run by root. This keeps
690	* people from modifying setuid and setgid binaries.	638	* people from modifying setuid and setgid binaries.