aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@sgi.com>2005-11-01 18:26:59 -0500
committerNathan Scott <nathans@sgi.com>2005-11-01 18:26:59 -0500
commitf538d4da8d521746ca5ebf8c1a8105eb49bfb45e (patch)
tree5516e1d2df01e412709284e379085b348122c501
parent739cafd316235fc55463849e80710f2ca308b9ae (diff)
[XFS] write barrier support Issue all log sync operations as ordered
writes. In addition flush the disk cache on fsync if the sync cached operation didn't sync the log to disk (this requires some additional bookeping in the transaction and log code). If the device doesn't claim to support barriers, the filesystem has an extern log volume or the trial superblock write with barriers enabled failed we disable barriers and print a warning. We should probably fail the mount completely, but that could lead to nasty boot failures for the root filesystem. Not enabled by default yet, needs more destructive testing first. SGI-PV: 912426 SGI-Modid: xfs-linux:xfs-kern:198723a Signed-off-by: Christoph Hellwig <hch@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c66
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h1
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_log.c65
-rw-r--r--fs/xfs/xfs_log.h9
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_trans.c13
-rw-r--r--fs/xfs/xfs_trans.h7
-rw-r--r--fs/xfs/xfs_vfsops.c38
-rw-r--r--fs/xfs/xfs_vnodeops.c32
12 files changed, 190 insertions, 59 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 188cbbd5b74a..4663f7dbff1c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1295,6 +1295,11 @@ _pagebuf_ioapply(
1295 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; 1295 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
1296 } 1296 }
1297 1297
1298 if (pb->pb_flags & PBF_ORDERED) {
1299 ASSERT(!(pb->pb_flags & PBF_READ));
1300 rw = WRITE_BARRIER;
1301 }
1302
1298 /* Special code path for reading a sub page size pagebuf in -- 1303 /* Special code path for reading a sub page size pagebuf in --
1299 * we populate up the whole page, and hence the other metadata 1304 * we populate up the whole page, and hence the other metadata
1300 * in the same page. This optimization is only valid when the 1305 * in the same page. This optimization is only valid when the
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 39c8ca122534..fa21d1f9cb0b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -74,7 +74,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */
74 PBF_DELWRI = (1 << 6), /* buffer has dirty pages */ 74 PBF_DELWRI = (1 << 6), /* buffer has dirty pages */
75 PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ 75 PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
76 PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 76 PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
77 PBF_FLUSH = (1 << 11), /* flush disk write cache */ 77 PBF_ORDERED = (1 << 11), /* use ordered writes */
78 PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 78 PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
79 79
80 /* flags used only as arguments to access routines */ 80 /* flags used only as arguments to access routines */
@@ -383,9 +383,9 @@ extern void pagebuf_trace(
383#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC) 383#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
384#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC) 384#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)
385 385
386#define XFS_BUF_FLUSH(x) ((x)->pb_flags |= PBF_FLUSH) 386#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED)
387#define XFS_BUF_UNFLUSH(x) ((x)->pb_flags &= ~PBF_FLUSH) 387#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED)
388#define XFS_BUF_ISFLUSH(x) ((x)->pb_flags & PBF_FLUSH) 388#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED)
389 389
390#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n") 390#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
391#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n") 391#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2302454d8d47..d2701cc624b9 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -278,6 +278,72 @@ xfs_blkdev_put(
278 close_bdev_excl(bdev); 278 close_bdev_excl(bdev);
279} 279}
280 280
281/*
282 * Try to write out the superblock using barriers.
283 */
284STATIC int
285xfs_barrier_test(
286 xfs_mount_t *mp)
287{
288 xfs_buf_t *sbp = xfs_getsb(mp, 0);
289 int error;
290
291 XFS_BUF_UNDONE(sbp);
292 XFS_BUF_UNREAD(sbp);
293 XFS_BUF_UNDELAYWRITE(sbp);
294 XFS_BUF_WRITE(sbp);
295 XFS_BUF_UNASYNC(sbp);
296 XFS_BUF_ORDERED(sbp);
297
298 xfsbdstrat(mp, sbp);
299 error = xfs_iowait(sbp);
300
301 /*
302 * Clear all the flags we set and possible error state in the
303 * buffer. We only did the write to try out whether barriers
304 * worked and shouldn't leave any traces in the superblock
305 * buffer.
306 */
307 XFS_BUF_DONE(sbp);
308 XFS_BUF_ERROR(sbp, 0);
309 XFS_BUF_UNORDERED(sbp);
310
311 xfs_buf_relse(sbp);
312 return error;
313}
314
315void
316xfs_mountfs_check_barriers(xfs_mount_t *mp)
317{
318 int error;
319
320 if (mp->m_logdev_targp != mp->m_ddev_targp) {
321 xfs_fs_cmn_err(CE_NOTE, mp,
322 "Disabling barriers, not supported with external log device");
323 mp->m_flags &= ~XFS_MOUNT_BARRIER;
324 }
325
326 if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
327 QUEUE_ORDERED_NONE) {
328 xfs_fs_cmn_err(CE_NOTE, mp,
329 "Disabling barriers, not supported by the underlying device");
330 mp->m_flags &= ~XFS_MOUNT_BARRIER;
331 }
332
333 error = xfs_barrier_test(mp);
334 if (error) {
335 xfs_fs_cmn_err(CE_NOTE, mp,
336 "Disabling barriers, trial barrier write failed");
337 mp->m_flags &= ~XFS_MOUNT_BARRIER;
338 }
339}
340
341void
342xfs_blkdev_issue_flush(
343 xfs_buftarg_t *buftarg)
344{
345 blkdev_issue_flush(buftarg->pbr_bdev, NULL);
346}
281 347
282STATIC struct inode * 348STATIC struct inode *
283linvfs_alloc_inode( 349linvfs_alloc_inode(
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index ec7e0035c731..ad77e3743e04 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -132,6 +132,7 @@ extern void xfs_flush_device(struct xfs_inode *);
132extern int xfs_blkdev_get(struct xfs_mount *, const char *, 132extern int xfs_blkdev_get(struct xfs_mount *, const char *,
133 struct block_device **); 133 struct block_device **);
134extern void xfs_blkdev_put(struct block_device *); 134extern void xfs_blkdev_put(struct block_device *);
135extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
135 136
136extern struct export_operations linvfs_export_ops; 137extern struct export_operations linvfs_export_ops;
137 138
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index b3215ffe0be8..c93cb282f3d8 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -99,7 +99,7 @@ struct xfs_mount_args {
99 * enforcement */ 99 * enforcement */
100#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */ 100#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
101#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */ 101#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
102#define XFSMNT_NOLOGFLUSH 0x04000000 /* Don't flush for log blocks */ 102#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
103#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */ 103#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */
104#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width 104#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
105 * allocation */ 105 * allocation */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 51814c32eddf..b9d3ad35240e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -93,8 +93,11 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
93STATIC void xlog_state_switch_iclogs(xlog_t *log, 93STATIC void xlog_state_switch_iclogs(xlog_t *log,
94 xlog_in_core_t *iclog, 94 xlog_in_core_t *iclog,
95 int eventual_size); 95 int eventual_size);
96STATIC int xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags); 96STATIC int xlog_state_sync(xlog_t *log,
97STATIC int xlog_state_sync_all(xlog_t *log, uint flags); 97 xfs_lsn_t lsn,
98 uint flags,
99 int *log_flushed);
100STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
98STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 101STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
99 102
100/* local functions to manipulate grant head */ 103/* local functions to manipulate grant head */
@@ -312,12 +315,17 @@ xfs_log_done(xfs_mount_t *mp,
312 * semaphore. 315 * semaphore.
313 */ 316 */
314int 317int
315xfs_log_force(xfs_mount_t *mp, 318_xfs_log_force(
316 xfs_lsn_t lsn, 319 xfs_mount_t *mp,
317 uint flags) 320 xfs_lsn_t lsn,
321 uint flags,
322 int *log_flushed)
318{ 323{
319 int rval; 324 xlog_t *log = mp->m_log;
320 xlog_t *log = mp->m_log; 325 int dummy;
326
327 if (!log_flushed)
328 log_flushed = &dummy;
321 329
322#if defined(DEBUG) || defined(XLOG_NOLOG) 330#if defined(DEBUG) || defined(XLOG_NOLOG)
323 if (!xlog_debug && xlog_target == log->l_targ) 331 if (!xlog_debug && xlog_target == log->l_targ)
@@ -328,17 +336,12 @@ xfs_log_force(xfs_mount_t *mp,
328 336
329 XFS_STATS_INC(xs_log_force); 337 XFS_STATS_INC(xs_log_force);
330 338
331 if ((log->l_flags & XLOG_IO_ERROR) == 0) { 339 if (log->l_flags & XLOG_IO_ERROR)
332 if (lsn == 0) 340 return XFS_ERROR(EIO);
333 rval = xlog_state_sync_all(log, flags); 341 if (lsn == 0)
334 else 342 return xlog_state_sync_all(log, flags, log_flushed);
335 rval = xlog_state_sync(log, lsn, flags); 343 else
336 } else { 344 return xlog_state_sync(log, lsn, flags, log_flushed);
337 rval = XFS_ERROR(EIO);
338 }
339
340 return rval;
341
342} /* xfs_log_force */ 345} /* xfs_log_force */
343 346
344/* 347/*
@@ -1467,14 +1470,13 @@ xlog_sync(xlog_t *log,
1467 XFS_BUF_BUSY(bp); 1470 XFS_BUF_BUSY(bp);
1468 XFS_BUF_ASYNC(bp); 1471 XFS_BUF_ASYNC(bp);
1469 /* 1472 /*
1470 * Do a disk write cache flush for the log block. 1473 * Do an ordered write for the log block.
1471 * This is a bit of a sledgehammer, it would be better 1474 *
1472 * to use a tag barrier here that just prevents reordering.
1473 * It may not be needed to flush the first split block in the log wrap 1475 * It may not be needed to flush the first split block in the log wrap
1474 * case, but do it anyways to be safe -AK 1476 * case, but do it anyways to be safe -AK
1475 */ 1477 */
1476 if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH)) 1478 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1477 XFS_BUF_FLUSH(bp); 1479 XFS_BUF_ORDERED(bp);
1478 1480
1479 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1481 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1480 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1482 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@@ -1505,8 +1507,8 @@ xlog_sync(xlog_t *log,
1505 XFS_BUF_SET_FSPRIVATE(bp, iclog); 1507 XFS_BUF_SET_FSPRIVATE(bp, iclog);
1506 XFS_BUF_BUSY(bp); 1508 XFS_BUF_BUSY(bp);
1507 XFS_BUF_ASYNC(bp); 1509 XFS_BUF_ASYNC(bp);
1508 if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH)) 1510 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1509 XFS_BUF_FLUSH(bp); 1511 XFS_BUF_ORDERED(bp);
1510 dptr = XFS_BUF_PTR(bp); 1512 dptr = XFS_BUF_PTR(bp);
1511 /* 1513 /*
1512 * Bump the cycle numbers at the start of each block 1514 * Bump the cycle numbers at the start of each block
@@ -2951,7 +2953,7 @@ xlog_state_switch_iclogs(xlog_t *log,
2951 * not in the active nor dirty state. 2953 * not in the active nor dirty state.
2952 */ 2954 */
2953STATIC int 2955STATIC int
2954xlog_state_sync_all(xlog_t *log, uint flags) 2956xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2955{ 2957{
2956 xlog_in_core_t *iclog; 2958 xlog_in_core_t *iclog;
2957 xfs_lsn_t lsn; 2959 xfs_lsn_t lsn;
@@ -3000,6 +3002,7 @@ xlog_state_sync_all(xlog_t *log, uint flags)
3000 3002
3001 if (xlog_state_release_iclog(log, iclog)) 3003 if (xlog_state_release_iclog(log, iclog))
3002 return XFS_ERROR(EIO); 3004 return XFS_ERROR(EIO);
3005 *log_flushed = 1;
3003 s = LOG_LOCK(log); 3006 s = LOG_LOCK(log);
3004 if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn && 3007 if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn &&
3005 iclog->ic_state != XLOG_STATE_DIRTY) 3008 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -3043,6 +3046,7 @@ maybe_sleep:
3043 */ 3046 */
3044 if (iclog->ic_state & XLOG_STATE_IOERROR) 3047 if (iclog->ic_state & XLOG_STATE_IOERROR)
3045 return XFS_ERROR(EIO); 3048 return XFS_ERROR(EIO);
3049 *log_flushed = 1;
3046 3050
3047 } else { 3051 } else {
3048 3052
@@ -3068,7 +3072,8 @@ no_sleep:
3068int 3072int
3069xlog_state_sync(xlog_t *log, 3073xlog_state_sync(xlog_t *log,
3070 xfs_lsn_t lsn, 3074 xfs_lsn_t lsn,
3071 uint flags) 3075 uint flags,
3076 int *log_flushed)
3072{ 3077{
3073 xlog_in_core_t *iclog; 3078 xlog_in_core_t *iclog;
3074 int already_slept = 0; 3079 int already_slept = 0;
@@ -3120,6 +3125,7 @@ try_again:
3120 XFS_STATS_INC(xs_log_force_sleep); 3125 XFS_STATS_INC(xs_log_force_sleep);
3121 sv_wait(&iclog->ic_prev->ic_writesema, PSWP, 3126 sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
3122 &log->l_icloglock, s); 3127 &log->l_icloglock, s);
3128 *log_flushed = 1;
3123 already_slept = 1; 3129 already_slept = 1;
3124 goto try_again; 3130 goto try_again;
3125 } else { 3131 } else {
@@ -3128,6 +3134,7 @@ try_again:
3128 LOG_UNLOCK(log, s); 3134 LOG_UNLOCK(log, s);
3129 if (xlog_state_release_iclog(log, iclog)) 3135 if (xlog_state_release_iclog(log, iclog))
3130 return XFS_ERROR(EIO); 3136 return XFS_ERROR(EIO);
3137 *log_flushed = 1;
3131 s = LOG_LOCK(log); 3138 s = LOG_LOCK(log);
3132 } 3139 }
3133 } 3140 }
@@ -3152,6 +3159,7 @@ try_again:
3152 */ 3159 */
3153 if (iclog->ic_state & XLOG_STATE_IOERROR) 3160 if (iclog->ic_state & XLOG_STATE_IOERROR)
3154 return XFS_ERROR(EIO); 3161 return XFS_ERROR(EIO);
3162 *log_flushed = 1;
3155 } else { /* just return */ 3163 } else { /* just return */
3156 LOG_UNLOCK(log, s); 3164 LOG_UNLOCK(log, s);
3157 } 3165 }
@@ -3606,6 +3614,7 @@ xfs_log_force_umount(
3606 xlog_ticket_t *tic; 3614 xlog_ticket_t *tic;
3607 xlog_t *log; 3615 xlog_t *log;
3608 int retval; 3616 int retval;
3617 int dummy;
3609 SPLDECL(s); 3618 SPLDECL(s);
3610 SPLDECL(s2); 3619 SPLDECL(s2);
3611 3620
@@ -3684,7 +3693,7 @@ xfs_log_force_umount(
3684 * Force the incore logs to disk before shutting the 3693 * Force the incore logs to disk before shutting the
3685 * log down completely. 3694 * log down completely.
3686 */ 3695 */
3687 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC); 3696 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy);
3688 s2 = LOG_LOCK(log); 3697 s2 = LOG_LOCK(log);
3689 retval = xlog_state_ioerror(log); 3698 retval = xlog_state_ioerror(log);
3690 LOG_UNLOCK(log, s2); 3699 LOG_UNLOCK(log, s2);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 18961119fc65..dc920f83412d 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -174,9 +174,12 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
174 xfs_log_ticket_t ticket, 174 xfs_log_ticket_t ticket,
175 void **iclog, 175 void **iclog,
176 uint flags); 176 uint flags);
177int xfs_log_force(struct xfs_mount *mp, 177int _xfs_log_force(struct xfs_mount *mp,
178 xfs_lsn_t lsn, 178 xfs_lsn_t lsn,
179 uint flags); 179 uint flags,
180 int *log_forced);
181#define xfs_log_force(mp, lsn, flags) \
182 _xfs_log_force(mp, lsn, flags, NULL);
180int xfs_log_mount(struct xfs_mount *mp, 183int xfs_log_mount(struct xfs_mount *mp,
181 struct xfs_buftarg *log_target, 184 struct xfs_buftarg *log_target,
182 xfs_daddr_t start_block, 185 xfs_daddr_t start_block,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5affba38a577..bc55931ac74e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -415,7 +415,7 @@ typedef struct xfs_mount {
415 * 32 bits in size */ 415 * 32 bits in size */
416#define XFS_MOUNT_32BITINOOPT 0x00008000 /* saved mount option state */ 416#define XFS_MOUNT_32BITINOOPT 0x00008000 /* saved mount option state */
417#define XFS_MOUNT_NOUUID 0x00010000 /* ignore uuid during mount */ 417#define XFS_MOUNT_NOUUID 0x00010000 /* ignore uuid during mount */
418#define XFS_MOUNT_NOLOGFLUSH 0x00020000 418#define XFS_MOUNT_BARRIER 0x00020000
419#define XFS_MOUNT_IDELETE 0x00040000 /* delete empty inode clusters*/ 419#define XFS_MOUNT_IDELETE 0x00040000 /* delete empty inode clusters*/
420#define XFS_MOUNT_SWALLOC 0x00080000 /* turn on stripe width 420#define XFS_MOUNT_SWALLOC 0x00080000 /* turn on stripe width
421 * allocation */ 421 * allocation */
@@ -542,6 +542,7 @@ extern xfs_mount_t *xfs_mount_init(void);
542extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 542extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
543extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); 543extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
544extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int); 544extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int);
545extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
545 546
546extern int xfs_unmountfs(xfs_mount_t *, struct cred *); 547extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
547extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *); 548extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 92efe272b83d..5e33891b8049 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -661,10 +661,11 @@ xfs_trans_unreserve_and_mod_sb(
661 */ 661 */
662 /*ARGSUSED*/ 662 /*ARGSUSED*/
663int 663int
664xfs_trans_commit( 664_xfs_trans_commit(
665 xfs_trans_t *tp, 665 xfs_trans_t *tp,
666 uint flags, 666 uint flags,
667 xfs_lsn_t *commit_lsn_p) 667 xfs_lsn_t *commit_lsn_p,
668 int *log_flushed)
668{ 669{
669 xfs_log_iovec_t *log_vector; 670 xfs_log_iovec_t *log_vector;
670 int nvec; 671 int nvec;
@@ -893,9 +894,11 @@ shut_us_down:
893 * log out now and wait for it. 894 * log out now and wait for it.
894 */ 895 */
895 if (sync) { 896 if (sync) {
896 if (!error) 897 if (!error) {
897 error = xfs_log_force(mp, commit_lsn, 898 error = _xfs_log_force(mp, commit_lsn,
898 XFS_LOG_FORCE | XFS_LOG_SYNC); 899 XFS_LOG_FORCE | XFS_LOG_SYNC,
900 log_flushed);
901 }
899 XFS_STATS_INC(xs_trans_sync); 902 XFS_STATS_INC(xs_trans_sync);
900 } else { 903 } else {
901 XFS_STATS_INC(xs_trans_async); 904 XFS_STATS_INC(xs_trans_async);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a263aec8b3a6..0cc7af5c1f00 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -1025,7 +1025,12 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
1025 struct xfs_efd_log_item *, 1025 struct xfs_efd_log_item *,
1026 xfs_fsblock_t, 1026 xfs_fsblock_t,
1027 xfs_extlen_t); 1027 xfs_extlen_t);
1028int xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *); 1028int _xfs_trans_commit(xfs_trans_t *,
1029 uint flags,
1030 xfs_lsn_t *,
1031 int *);
1032#define xfs_trans_commit(tp, flags, lsn) \
1033 _xfs_trans_commit(tp, flags, lsn, NULL)
1029void xfs_trans_cancel(xfs_trans_t *, int); 1034void xfs_trans_cancel(xfs_trans_t *, int);
1030void xfs_trans_ail_init(struct xfs_mount *); 1035void xfs_trans_ail_init(struct xfs_mount *);
1031xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); 1036xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index f1a904e23ade..8238c7517822 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -321,8 +321,8 @@ xfs_start_flags(
321 321
322 if (ap->flags & XFSMNT_NOUUID) 322 if (ap->flags & XFSMNT_NOUUID)
323 mp->m_flags |= XFS_MOUNT_NOUUID; 323 mp->m_flags |= XFS_MOUNT_NOUUID;
324 if (ap->flags & XFSMNT_NOLOGFLUSH) 324 if (ap->flags & XFSMNT_BARRIER)
325 mp->m_flags |= XFS_MOUNT_NOLOGFLUSH; 325 mp->m_flags |= XFS_MOUNT_BARRIER;
326 326
327 return 0; 327 return 0;
328} 328}
@@ -512,8 +512,14 @@ xfs_mount(
512 goto error2; 512 goto error2;
513 513
514 error = XFS_IOINIT(vfsp, args, flags); 514 error = XFS_IOINIT(vfsp, args, flags);
515 if (!error) 515 if (error)
516 return 0; 516 goto error2;
517
518 if ((args->flags & XFSMNT_BARRIER) &&
519 !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
520 xfs_mountfs_check_barriers(mp);
521 return 0;
522
517error2: 523error2:
518 if (mp->m_sb_bp) 524 if (mp->m_sb_bp)
519 xfs_freesb(mp); 525 xfs_freesb(mp);
@@ -656,19 +662,24 @@ xfs_mntupdate(
656 else 662 else
657 mp->m_flags &= ~XFS_MOUNT_NOATIME; 663 mp->m_flags &= ~XFS_MOUNT_NOATIME;
658 664
659 if (!(vfsp->vfs_flag & VFS_RDONLY)) { 665 if ((vfsp->vfs_flag & VFS_RDONLY) &&
660 VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error); 666 !(*flags & MS_RDONLY)) {
667 vfsp->vfs_flag &= ~VFS_RDONLY;
668
669 if (args->flags & XFSMNT_BARRIER)
670 xfs_mountfs_check_barriers(mp);
661 } 671 }
662 672
663 if (*flags & MS_RDONLY) { 673 if (!(vfsp->vfs_flag & VFS_RDONLY) &&
674 (*flags & MS_RDONLY)) {
675 VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
676
664 xfs_quiesce_fs(mp); 677 xfs_quiesce_fs(mp);
665 678
666 /* Ok now write out an unmount record */ 679 /* Ok now write out an unmount record */
667 xfs_log_unmount_write(mp); 680 xfs_log_unmount_write(mp);
668 xfs_unmountfs_writesb(mp); 681 xfs_unmountfs_writesb(mp);
669 vfsp->vfs_flag |= VFS_RDONLY; 682 vfsp->vfs_flag |= VFS_RDONLY;
670 } else {
671 vfsp->vfs_flag &= ~VFS_RDONLY;
672 } 683 }
673 684
674 return 0; 685 return 0;
@@ -1628,7 +1639,8 @@ xfs_vget(
1628#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ 1639#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
1629#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ 1640#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */
1630#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1641#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
1631#define MNTOPT_NOLOGFLUSH "nologflush" /* don't hard flush on log writes */ 1642#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
1643 unwritten extent conversion */
1632#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ 1644#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
1633#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 1645#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
1634#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 1646#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
@@ -1791,8 +1803,8 @@ xfs_parseargs(
1791#endif 1803#endif
1792 } else if (!strcmp(this_char, MNTOPT_NOUUID)) { 1804 } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
1793 args->flags |= XFSMNT_NOUUID; 1805 args->flags |= XFSMNT_NOUUID;
1794 } else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) { 1806 } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
1795 args->flags |= XFSMNT_NOLOGFLUSH; 1807 args->flags |= XFSMNT_BARRIER;
1796 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 1808 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
1797 args->flags &= ~XFSMNT_IDELETE; 1809 args->flags &= ~XFSMNT_IDELETE;
1798 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 1810 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
@@ -1866,7 +1878,7 @@ xfs_showargs(
1866 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 1878 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
1867 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, 1879 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
1868 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, 1880 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
1869 { XFS_MOUNT_NOLOGFLUSH, "," MNTOPT_NOLOGFLUSH }, 1881 { XFS_MOUNT_BARRIER, "," MNTOPT_BARRIER },
1870 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP }, 1882 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP },
1871 { 0, NULL } 1883 { 0, NULL }
1872 }; 1884 };
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 58bfe629b933..e2bf2ef58b66 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1118,6 +1118,7 @@ xfs_fsync(
1118 xfs_inode_t *ip; 1118 xfs_inode_t *ip;
1119 xfs_trans_t *tp; 1119 xfs_trans_t *tp;
1120 int error; 1120 int error;
1121 int log_flushed = 0, changed = 1;
1121 1122
1122 vn_trace_entry(BHV_TO_VNODE(bdp), 1123 vn_trace_entry(BHV_TO_VNODE(bdp),
1123 __FUNCTION__, (inst_t *)__return_address); 1124 __FUNCTION__, (inst_t *)__return_address);
@@ -1171,10 +1172,18 @@ xfs_fsync(
1171 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1172 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1172 1173
1173 if (xfs_ipincount(ip)) { 1174 if (xfs_ipincount(ip)) {
1174 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 1175 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
1175 XFS_LOG_FORCE | 1176 XFS_LOG_FORCE |
1176 ((flag & FSYNC_WAIT) 1177 ((flag & FSYNC_WAIT)
1177 ? XFS_LOG_SYNC : 0)); 1178 ? XFS_LOG_SYNC : 0),
1179 &log_flushed);
1180 } else {
1181 /*
1182 * If the inode is not pinned and nothing
1183 * has changed we don't need to flush the
1184 * cache.
1185 */
1186 changed = 0;
1178 } 1187 }
1179 error = 0; 1188 error = 0;
1180 } else { 1189 } else {
@@ -1210,10 +1219,27 @@ xfs_fsync(
1210 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1219 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1211 if (flag & FSYNC_WAIT) 1220 if (flag & FSYNC_WAIT)
1212 xfs_trans_set_sync(tp); 1221 xfs_trans_set_sync(tp);
1213 error = xfs_trans_commit(tp, 0, NULL); 1222 error = _xfs_trans_commit(tp, 0, NULL, &log_flushed);
1214 1223
1215 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1224 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1216 } 1225 }
1226
1227 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
1228 /*
1229 * If the log write didn't issue an ordered tag we need
1230 * to flush the disk cache for the data device now.
1231 */
1232 if (!log_flushed)
1233 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
1234
1235 /*
1236 * If this inode is on the RT dev we need to flush that
1237 * cache aswell.
1238 */
1239 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
1240 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
1241 }
1242
1217 return error; 1243 return error;
1218} 1244}
1219 1245