aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2010-02-08 19:43:49 -0500
committerDave Chinner <david@fromorbit.com>2010-02-08 19:43:49 -0500
commit07fec73625dc0db6f9aed68019918208a2ca53f5 (patch)
tree04f4ad3f829c3d64d6b2a3c0bddf11a4374eb93d
parente8b217e7530c6a073ac69f1c85b922d93fdf5647 (diff)
xfs: log changed inodes instead of writing them synchronously
When an inode has already be flushed delayed write, xfs_inode_clean() returns true and hence xfs_fs_write_inode() can return on a synchronous inode write without having written the inode. Currently these sycnhronous writes only come sync(1), unmount, a sycnhronous NFS export and cachefiles so should be relatively rare and out of common performance paths. Realistically, a synchronous inode write is not necessary here; we can avoid writing the inode by logging any non-transactional changes that are pending. This needs to be done with synchronous transactions, but it avoids seeking between the log and inode clusters as we do now. We don't force the log if the inode is pinned, though, so this differs from the fsync case. For normal sys_sync and unmount behaviour this is fine because we do a synchronous log force in xfs_sync_data which is called from the ->sync_fs code. It does however break the NFS synchronous export guarantees for now, but work is under way to fix this at a higher level or for the higher level to provide an additional flag in the writeback control to tell us that a log force is needed. Portions of this patch are based on work from Dave Chinner. Signed-off-by: Christoph Hellwig <hch@infradead.org> Reviewed-by: Dave Chinner <david@fromorbit.com> Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c111
1 files changed, 82 insertions, 29 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 3b5b46b8e3b9..25ea2408118f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1021,12 +1021,45 @@ xfs_fs_dirty_inode(
1021 XFS_I(inode)->i_update_core = 1; 1021 XFS_I(inode)->i_update_core = 1;
1022} 1022}
1023 1023
1024/* 1024STATIC int
1025 * Attempt to flush the inode, this will actually fail 1025xfs_log_inode(
1026 * if the inode is pinned, but we dirty the inode again 1026 struct xfs_inode *ip)
1027 * at the point when it is unpinned after a log write, 1027{
1028 * since this is when the inode itself becomes flushable. 1028 struct xfs_mount *mp = ip->i_mount;
1029 */ 1029 struct xfs_trans *tp;
1030 int error;
1031
1032 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1033 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1034 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
1035
1036 if (error) {
1037 xfs_trans_cancel(tp, 0);
1038 /* we need to return with the lock hold shared */
1039 xfs_ilock(ip, XFS_ILOCK_SHARED);
1040 return error;
1041 }
1042
1043 xfs_ilock(ip, XFS_ILOCK_EXCL);
1044
1045 /*
1046 * Note - it's possible that we might have pushed ourselves out of the
1047 * way during trans_reserve which would flush the inode. But there's
1048 * no guarantee that the inode buffer has actually gone out yet (it's
1049 * delwri). Plus the buffer could be pinned anyway if it's part of
1050 * an inode in another recent transaction. So we play it safe and
1051 * fire off the transaction anyway.
1052 */
1053 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1054 xfs_trans_ihold(tp, ip);
1055 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1056 xfs_trans_set_sync(tp);
1057 error = xfs_trans_commit(tp, 0);
1058 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1059
1060 return error;
1061}
1062
1030STATIC int 1063STATIC int
1031xfs_fs_write_inode( 1064xfs_fs_write_inode(
1032 struct inode *inode, 1065 struct inode *inode,
@@ -1034,7 +1067,7 @@ xfs_fs_write_inode(
1034{ 1067{
1035 struct xfs_inode *ip = XFS_I(inode); 1068 struct xfs_inode *ip = XFS_I(inode);
1036 struct xfs_mount *mp = ip->i_mount; 1069 struct xfs_mount *mp = ip->i_mount;
1037 int error = 0; 1070 int error = EAGAIN;
1038 1071
1039 xfs_itrace_entry(ip); 1072 xfs_itrace_entry(ip);
1040 1073
@@ -1045,35 +1078,55 @@ xfs_fs_write_inode(
1045 error = xfs_wait_on_pages(ip, 0, -1); 1078 error = xfs_wait_on_pages(ip, 0, -1);
1046 if (error) 1079 if (error)
1047 goto out; 1080 goto out;
1048 }
1049
1050 /*
1051 * Bypass inodes which have already been cleaned by
1052 * the inode flush clustering code inside xfs_iflush
1053 */
1054 if (xfs_inode_clean(ip))
1055 goto out;
1056 1081
1057 /* 1082 /*
1058 * We make this non-blocking if the inode is contended, return 1083 * Make sure the inode has hit stable storage. By using the
1059 * EAGAIN to indicate to the caller that they did not succeed. 1084 * log and the fsync transactions we reduce the IOs we have
1060 * This prevents the flush path from blocking on inodes inside 1085 * to do here from two (log and inode) to just the log.
1061 * another operation right now, they get caught later by xfs_sync. 1086 *
1062 */ 1087 * Note: We still need to do a delwri write of the inode after
1063 if (sync) { 1088 * this to flush it to the backing buffer so that bulkstat
1089 * works properly if this is the first time the inode has been
1090 * written. Because we hold the ilock atomically over the
1091 * transaction commit and the inode flush we are guaranteed
1092 * that the inode is not pinned when it returns. If the flush
1093 * lock is already held, then the inode has already been
1094 * flushed once and we don't need to flush it again. Hence
1095 * the code will only flush the inode if it isn't already
1096 * being flushed.
1097 */
1064 xfs_ilock(ip, XFS_ILOCK_SHARED); 1098 xfs_ilock(ip, XFS_ILOCK_SHARED);
1065 xfs_iflock(ip); 1099 if (ip->i_update_core) {
1066 1100 error = xfs_log_inode(ip);
1067 error = xfs_iflush(ip, SYNC_WAIT); 1101 if (error)
1102 goto out_unlock;
1103 }
1068 } else { 1104 } else {
1069 error = EAGAIN; 1105 /*
1106 * We make this non-blocking if the inode is contended, return
1107 * EAGAIN to indicate to the caller that they did not succeed.
1108 * This prevents the flush path from blocking on inodes inside
1109 * another operation right now, they get caught later by xfs_sync.
1110 */
1070 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) 1111 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1071 goto out; 1112 goto out;
1072 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 1113 }
1073 goto out_unlock; 1114
1115 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1116 goto out_unlock;
1074 1117
1075 error = xfs_iflush(ip, 0); 1118 /*
1119 * Now we have the flush lock and the inode is not pinned, we can check
1120 * if the inode is really clean as we know that there are no pending
1121 * transaction completions, it is not waiting on the delayed write
1122 * queue and there is no IO in progress.
1123 */
1124 if (xfs_inode_clean(ip)) {
1125 xfs_ifunlock(ip);
1126 error = 0;
1127 goto out_unlock;
1076 } 1128 }
1129 error = xfs_iflush(ip, 0);
1077 1130
1078 out_unlock: 1131 out_unlock:
1079 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1132 xfs_iunlock(ip, XFS_ILOCK_SHARED);