diff options
author | Christoph Hellwig <hch@lst.de> | 2009-08-31 20:00:31 -0400 |
---|---|---|
committer | Felix Blyakher <felixb@sgi.com> | 2009-09-01 13:45:57 -0400 |
commit | 13e6d5cdde0e785aa943810f08b801cadd0935df (patch) | |
tree | 72b62d1e3e4b35f1613458b6e1dbbadd74534a92 /fs/xfs/xfs_rw.c | |
parent | bd169565993b39b9b4b102cdac8b13e0a259ce2f (diff) |
xfs: merge fsync and O_SYNC handling
The guarantees for O_SYNC are exactly the same as the ones we need to
make for an fsync call (and given that Linux O_SYNC is O_DSYNC the
equivalent is fdadatasync, but we treat both the same in XFS), except
with a range data writeout. Jan Kara has started unifying these two
path for filesystems using the generic helpers, and I've started to
look at XFS.
The actual transaction commited by xfs_fsync and xfs_write_sync_logforce
has a different transaction number, but actually is exactly the same.
We'll only use the fsync transaction going forward. One major difference
is that xfs_write_sync_logforce never issues a cache flush unless we
commit a transaction causing that as a side-effect, which is an obvious
bug in the O_SYNC handling. Second all the locking and i_update_size
vs i_update_core changes from 978b7237123d007b9fa983af6e0e2fa8f97f9934
never made it to xfs_write_sync_logforce, so we add them back.
To make xfs_fsync easily usable from the O_SYNC path, the filemap_fdatawait
call is moved up to xfs_file_fsync, so that we don't wait on the whole
file after we already waited for our portion in xfs_write.
We'll also use a plain call to filemap_write_and_wait_range instead
of the previous sync_page_rang which did it in two steps including
an half-hearted inode write out that doesn't help us.
Once we're done with this also remove the now useless i_update_size
tracking.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_rw.c')
-rw-r--r-- | fs/xfs/xfs_rw.c | 84 |
1 files changed, 0 insertions, 84 deletions
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index fea68615ed23..3f816ad7ff19 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -88,90 +88,6 @@ xfs_write_clear_setuid( | |||
88 | } | 88 | } |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Handle logging requirements of various synchronous types of write. | ||
92 | */ | ||
93 | int | ||
94 | xfs_write_sync_logforce( | ||
95 | xfs_mount_t *mp, | ||
96 | xfs_inode_t *ip) | ||
97 | { | ||
98 | int error = 0; | ||
99 | |||
100 | /* | ||
101 | * If we're treating this as O_DSYNC and we have not updated the | ||
102 | * size, force the log. | ||
103 | */ | ||
104 | if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && | ||
105 | !(ip->i_update_size)) { | ||
106 | xfs_inode_log_item_t *iip = ip->i_itemp; | ||
107 | |||
108 | /* | ||
109 | * If an allocation transaction occurred | ||
110 | * without extending the size, then we have to force | ||
111 | * the log up the proper point to ensure that the | ||
112 | * allocation is permanent. We can't count on | ||
113 | * the fact that buffered writes lock out direct I/O | ||
114 | * writes - the direct I/O write could have extended | ||
115 | * the size nontransactionally, then finished before | ||
116 | * we started. xfs_write_file will think that the file | ||
117 | * didn't grow but the update isn't safe unless the | ||
118 | * size change is logged. | ||
119 | * | ||
120 | * Force the log if we've committed a transaction | ||
121 | * against the inode or if someone else has and | ||
122 | * the commit record hasn't gone to disk (e.g. | ||
123 | * the inode is pinned). This guarantees that | ||
124 | * all changes affecting the inode are permanent | ||
125 | * when we return. | ||
126 | */ | ||
127 | if (iip && iip->ili_last_lsn) { | ||
128 | error = _xfs_log_force(mp, iip->ili_last_lsn, | ||
129 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | ||
130 | } else if (xfs_ipincount(ip) > 0) { | ||
131 | error = _xfs_log_force(mp, (xfs_lsn_t)0, | ||
132 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | ||
133 | } | ||
134 | |||
135 | } else { | ||
136 | xfs_trans_t *tp; | ||
137 | |||
138 | /* | ||
139 | * O_SYNC or O_DSYNC _with_ a size update are handled | ||
140 | * the same way. | ||
141 | * | ||
142 | * If the write was synchronous then we need to make | ||
143 | * sure that the inode modification time is permanent. | ||
144 | * We'll have updated the timestamp above, so here | ||
145 | * we use a synchronous transaction to log the inode. | ||
146 | * It's not fast, but it's necessary. | ||
147 | * | ||
148 | * If this a dsync write and the size got changed | ||
149 | * non-transactionally, then we need to ensure that | ||
150 | * the size change gets logged in a synchronous | ||
151 | * transaction. | ||
152 | */ | ||
153 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); | ||
154 | if ((error = xfs_trans_reserve(tp, 0, | ||
155 | XFS_SWRITE_LOG_RES(mp), | ||
156 | 0, 0, 0))) { | ||
157 | /* Transaction reserve failed */ | ||
158 | xfs_trans_cancel(tp, 0); | ||
159 | } else { | ||
160 | /* Transaction reserve successful */ | ||
161 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
162 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
163 | xfs_trans_ihold(tp, ip); | ||
164 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
165 | xfs_trans_set_sync(tp); | ||
166 | error = xfs_trans_commit(tp, 0); | ||
167 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | return error; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Force a shutdown of the filesystem instantly while keeping | 91 | * Force a shutdown of the filesystem instantly while keeping |
176 | * the filesystem consistent. We don't do an unmount here; just shutdown | 92 | * the filesystem consistent. We don't do an unmount here; just shutdown |
177 | * the shop, make sure that absolutely nothing persistent happens to | 93 | * the shop, make sure that absolutely nothing persistent happens to |