diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r-- | fs/xfs/xfs_file.c | 84 |
1 files changed, 16 insertions, 68 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7e5bc872f2b4..54a67dd9ac0a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -163,7 +163,6 @@ xfs_file_fsync( | |||
163 | struct inode *inode = file->f_mapping->host; | 163 | struct inode *inode = file->f_mapping->host; |
164 | struct xfs_inode *ip = XFS_I(inode); | 164 | struct xfs_inode *ip = XFS_I(inode); |
165 | struct xfs_mount *mp = ip->i_mount; | 165 | struct xfs_mount *mp = ip->i_mount; |
166 | struct xfs_trans *tp; | ||
167 | int error = 0; | 166 | int error = 0; |
168 | int log_flushed = 0; | 167 | int log_flushed = 0; |
169 | xfs_lsn_t lsn = 0; | 168 | xfs_lsn_t lsn = 0; |
@@ -194,75 +193,18 @@ xfs_file_fsync( | |||
194 | } | 193 | } |
195 | 194 | ||
196 | /* | 195 | /* |
197 | * We always need to make sure that the required inode state is safe on | 196 | * All metadata updates are logged, which means that we just have |
198 | * disk. The inode might be clean but we still might need to force the | 197 | * to flush the log up to the latest LSN that touched the inode. |
199 | * log because of committed transactions that haven't hit the disk yet. | ||
200 | * Likewise, there could be unflushed non-transactional changes to the | ||
201 | * inode core that have to go to disk and this requires us to issue | ||
202 | * a synchronous transaction to capture these changes correctly. | ||
203 | * | ||
204 | * This code relies on the assumption that if the i_update_core field | ||
205 | * of the inode is clear and the inode is unpinned then it is clean | ||
206 | * and no action is required. | ||
207 | */ | 198 | */ |
208 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 199 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
209 | 200 | if (xfs_ipincount(ip)) { | |
210 | /* | 201 | if (!datasync || |
211 | * First check if the VFS inode is marked dirty. All the dirtying | 202 | (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) |
212 | * of non-transactional updates do not go through mark_inode_dirty*, | ||
213 | * which allows us to distinguish between pure timestamp updates | ||
214 | * and i_size updates which need to be caught for fdatasync. | ||
215 | * After that also check for the dirty state in the XFS inode, which | ||
216 | * might gets cleared when the inode gets written out via the AIL | ||
217 | * or xfs_iflush_cluster. | ||
218 | */ | ||
219 | if (((inode->i_state & I_DIRTY_DATASYNC) || | ||
220 | ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && | ||
221 | ip->i_update_core) { | ||
222 | /* | ||
223 | * Kick off a transaction to log the inode core to get the | ||
224 | * updates. The sync transaction will also force the log. | ||
225 | */ | ||
226 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
227 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
228 | error = xfs_trans_reserve(tp, 0, | ||
229 | XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); | ||
230 | if (error) { | ||
231 | xfs_trans_cancel(tp, 0); | ||
232 | return -error; | ||
233 | } | ||
234 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
235 | |||
236 | /* | ||
237 | * Note - it's possible that we might have pushed ourselves out | ||
238 | * of the way during trans_reserve which would flush the inode. | ||
239 | * But there's no guarantee that the inode buffer has actually | ||
240 | * gone out yet (it's delwri). Plus the buffer could be pinned | ||
241 | * anyway if it's part of an inode in another recent | ||
242 | * transaction. So we play it safe and fire off the | ||
243 | * transaction anyway. | ||
244 | */ | ||
245 | xfs_trans_ijoin(tp, ip, 0); | ||
246 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
247 | error = xfs_trans_commit(tp, 0); | ||
248 | |||
249 | lsn = ip->i_itemp->ili_last_lsn; | ||
250 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
251 | } else { | ||
252 | /* | ||
253 | * Timestamps/size haven't changed since last inode flush or | ||
254 | * inode transaction commit. That means either nothing got | ||
255 | * written or a transaction committed which caught the updates. | ||
256 | * If the latter happened and the transaction hasn't hit the | ||
257 | * disk yet, the inode will be still be pinned. If it is, | ||
258 | * force the log. | ||
259 | */ | ||
260 | if (xfs_ipincount(ip)) | ||
261 | lsn = ip->i_itemp->ili_last_lsn; | 203 | lsn = ip->i_itemp->ili_last_lsn; |
262 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
263 | } | 204 | } |
205 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
264 | 206 | ||
265 | if (!error && lsn) | 207 | if (lsn) |
266 | error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); | 208 | error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); |
267 | 209 | ||
268 | /* | 210 | /* |
@@ -659,9 +601,6 @@ restart: | |||
659 | return error; | 601 | return error; |
660 | } | 602 | } |
661 | 603 | ||
662 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) | ||
663 | file_update_time(file); | ||
664 | |||
665 | /* | 604 | /* |
666 | * If the offset is beyond the size of the file, we need to zero any | 605 | * If the offset is beyond the size of the file, we need to zero any |
667 | * blocks that fall between the existing EOF and the start of this | 606 | * blocks that fall between the existing EOF and the start of this |
@@ -685,6 +624,15 @@ restart: | |||
685 | return error; | 624 | return error; |
686 | 625 | ||
687 | /* | 626 | /* |
627 | * Updating the timestamps will grab the ilock again from | ||
628 | * xfs_fs_dirty_inode, so we have to call it after dropping the | ||
629 | * lock above. Eventually we should look into a way to avoid | ||
630 | * the pointless lock roundtrip. | ||
631 | */ | ||
632 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) | ||
633 | file_update_time(file); | ||
634 | |||
635 | /* | ||
688 | * If we're writing the file then make sure to clear the setuid and | 636 | * If we're writing the file then make sure to clear the setuid and |
689 | * setgid bits if the process is not being run by root. This keeps | 637 | * setgid bits if the process is not being run by root. This keeps |
690 | * people from modifying setuid and setgid binaries. | 638 | * people from modifying setuid and setgid binaries. |