aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c525
1 files changed, 265 insertions, 260 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 600b5b06aaeb..2be019136287 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone;
50 (off) += (bytes);} 50 (off) += (bytes);}
51 51
52/* Local miscellaneous function prototypes */ 52/* Local miscellaneous function prototypes */
53STATIC int xlog_bdstrat_cb(struct xfs_buf *);
54STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
55 xlog_in_core_t **, xfs_lsn_t *); 54 xlog_in_core_t **, xfs_lsn_t *);
56STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
@@ -61,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
61STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
62STATIC void xlog_dealloc_log(xlog_t *log); 61STATIC void xlog_dealloc_log(xlog_t *log);
63STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
64 int nentries, xfs_log_ticket_t tic, 63 int nentries, struct xlog_ticket *tic,
65 xfs_lsn_t *start_lsn, 64 xfs_lsn_t *start_lsn,
66 xlog_in_core_t **commit_iclog, 65 xlog_in_core_t **commit_iclog,
67 uint flags); 66 uint flags);
@@ -80,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
80STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
81 xlog_in_core_t *iclog, 80 xlog_in_core_t *iclog,
82 int eventual_size); 81 int eventual_size);
83STATIC int xlog_state_sync(xlog_t *log,
84 xfs_lsn_t lsn,
85 uint flags,
86 int *log_flushed);
87STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
88STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 82STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
89 83
90/* local functions to manipulate grant head */ 84/* local functions to manipulate grant head */
@@ -249,14 +243,14 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
249 * out when the next write occurs. 243 * out when the next write occurs.
250 */ 244 */
251xfs_lsn_t 245xfs_lsn_t
252xfs_log_done(xfs_mount_t *mp, 246xfs_log_done(
253 xfs_log_ticket_t xtic, 247 struct xfs_mount *mp,
254 void **iclog, 248 struct xlog_ticket *ticket,
255 uint flags) 249 struct xlog_in_core **iclog,
250 uint flags)
256{ 251{
257 xlog_t *log = mp->m_log; 252 struct log *log = mp->m_log;
258 xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic; 253 xfs_lsn_t lsn = 0;
259 xfs_lsn_t lsn = 0;
260 254
261 if (XLOG_FORCED_SHUTDOWN(log) || 255 if (XLOG_FORCED_SHUTDOWN(log) ||
262 /* 256 /*
@@ -264,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp,
264 * If we get an error, just continue and give back the log ticket. 258 * If we get an error, just continue and give back the log ticket.
265 */ 259 */
266 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
267 (xlog_commit_record(mp, ticket, 261 (xlog_commit_record(mp, ticket, iclog, &lsn)))) {
268 (xlog_in_core_t **)iclog, &lsn)))) {
269 lsn = (xfs_lsn_t) -1; 262 lsn = (xfs_lsn_t) -1;
270 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 263 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
271 flags |= XFS_LOG_REL_PERM_RESERV; 264 flags |= XFS_LOG_REL_PERM_RESERV;
@@ -295,67 +288,8 @@ xfs_log_done(xfs_mount_t *mp,
295 } 288 }
296 289
297 return lsn; 290 return lsn;
298} /* xfs_log_done */
299
300
301/*
302 * Force the in-core log to disk. If flags == XFS_LOG_SYNC,
303 * the force is done synchronously.
304 *
305 * Asynchronous forces are implemented by setting the WANT_SYNC
306 * bit in the appropriate in-core log and then returning.
307 *
308 * Synchronous forces are implemented with a signal variable. All callers
309 * to force a given lsn to disk will wait on a the sv attached to the
310 * specific in-core log. When given in-core log finally completes its
311 * write to disk, that thread will wake up all threads waiting on the
312 * sv.
313 */
314int
315_xfs_log_force(
316 xfs_mount_t *mp,
317 xfs_lsn_t lsn,
318 uint flags,
319 int *log_flushed)
320{
321 xlog_t *log = mp->m_log;
322 int dummy;
323
324 if (!log_flushed)
325 log_flushed = &dummy;
326
327 ASSERT(flags & XFS_LOG_FORCE);
328
329 XFS_STATS_INC(xs_log_force);
330
331 if (log->l_flags & XLOG_IO_ERROR)
332 return XFS_ERROR(EIO);
333 if (lsn == 0)
334 return xlog_state_sync_all(log, flags, log_flushed);
335 else
336 return xlog_state_sync(log, lsn, flags, log_flushed);
337} /* _xfs_log_force */
338
339/*
340 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
341 * about errors or whether the log was flushed or not. This is the normal
342 * interface to use when trying to unpin items or move the log forward.
343 */
344void
345xfs_log_force(
346 xfs_mount_t *mp,
347 xfs_lsn_t lsn,
348 uint flags)
349{
350 int error;
351 error = _xfs_log_force(mp, lsn, flags, NULL);
352 if (error) {
353 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
354 "error %d returned.", error);
355 }
356} 291}
357 292
358
359/* 293/*
360 * Attaches a new iclog I/O completion callback routine during 294 * Attaches a new iclog I/O completion callback routine during
361 * transaction commit. If the log is in error state, a non-zero 295 * transaction commit. If the log is in error state, a non-zero
@@ -363,11 +297,11 @@ xfs_log_force(
363 * executing the callback at an appropriate time. 297 * executing the callback at an appropriate time.
364 */ 298 */
365int 299int
366xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ 300xfs_log_notify(
367 void *iclog_hndl, /* iclog to hang callback off */ 301 struct xfs_mount *mp,
368 xfs_log_callback_t *cb) 302 struct xlog_in_core *iclog,
303 xfs_log_callback_t *cb)
369{ 304{
370 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
371 int abortflg; 305 int abortflg;
372 306
373 spin_lock(&iclog->ic_callback_lock); 307 spin_lock(&iclog->ic_callback_lock);
@@ -381,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
381 } 315 }
382 spin_unlock(&iclog->ic_callback_lock); 316 spin_unlock(&iclog->ic_callback_lock);
383 return abortflg; 317 return abortflg;
384} /* xfs_log_notify */ 318}
385 319
386int 320int
387xfs_log_release_iclog(xfs_mount_t *mp, 321xfs_log_release_iclog(
388 void *iclog_hndl) 322 struct xfs_mount *mp,
323 struct xlog_in_core *iclog)
389{ 324{
390 xlog_t *log = mp->m_log; 325 if (xlog_state_release_iclog(mp->m_log, iclog)) {
391 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
392
393 if (xlog_state_release_iclog(log, iclog)) {
394 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 326 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
395 return EIO; 327 return EIO;
396 } 328 }
@@ -409,17 +341,18 @@ xfs_log_release_iclog(xfs_mount_t *mp,
409 * reservation, we prevent over allocation problems. 341 * reservation, we prevent over allocation problems.
410 */ 342 */
411int 343int
412xfs_log_reserve(xfs_mount_t *mp, 344xfs_log_reserve(
413 int unit_bytes, 345 struct xfs_mount *mp,
414 int cnt, 346 int unit_bytes,
415 xfs_log_ticket_t *ticket, 347 int cnt,
416 __uint8_t client, 348 struct xlog_ticket **ticket,
417 uint flags, 349 __uint8_t client,
418 uint t_type) 350 uint flags,
351 uint t_type)
419{ 352{
420 xlog_t *log = mp->m_log; 353 struct log *log = mp->m_log;
421 xlog_ticket_t *internal_ticket; 354 struct xlog_ticket *internal_ticket;
422 int retval = 0; 355 int retval = 0;
423 356
424 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 357 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
425 ASSERT((flags & XFS_LOG_NOSLEEP) == 0); 358 ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
@@ -432,7 +365,7 @@ xfs_log_reserve(xfs_mount_t *mp,
432 365
433 if (*ticket != NULL) { 366 if (*ticket != NULL) {
434 ASSERT(flags & XFS_LOG_PERM_RESERV); 367 ASSERT(flags & XFS_LOG_PERM_RESERV);
435 internal_ticket = (xlog_ticket_t *)*ticket; 368 internal_ticket = *ticket;
436 369
437 trace_xfs_log_reserve(log, internal_ticket); 370 trace_xfs_log_reserve(log, internal_ticket);
438 371
@@ -584,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
584 xlog_in_core_t *first_iclog; 517 xlog_in_core_t *first_iclog;
585#endif 518#endif
586 xfs_log_iovec_t reg[1]; 519 xfs_log_iovec_t reg[1];
587 xfs_log_ticket_t tic = NULL; 520 xlog_ticket_t *tic = NULL;
588 xfs_lsn_t lsn; 521 xfs_lsn_t lsn;
589 int error; 522 int error;
590 523
@@ -602,7 +535,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
602 if (mp->m_flags & XFS_MOUNT_RDONLY) 535 if (mp->m_flags & XFS_MOUNT_RDONLY)
603 return 0; 536 return 0;
604 537
605 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 538 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
606 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 539 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
607 540
608#ifdef DEBUG 541#ifdef DEBUG
@@ -618,7 +551,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
618 if (! (XLOG_FORCED_SHUTDOWN(log))) { 551 if (! (XLOG_FORCED_SHUTDOWN(log))) {
619 reg[0].i_addr = (void*)&magic; 552 reg[0].i_addr = (void*)&magic;
620 reg[0].i_len = sizeof(magic); 553 reg[0].i_len = sizeof(magic);
621 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT); 554 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
622 555
623 error = xfs_log_reserve(mp, 600, 1, &tic, 556 error = xfs_log_reserve(mp, 600, 1, &tic,
624 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 557 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
@@ -721,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp)
721 * transaction occur with one call to xfs_log_write(). 654 * transaction occur with one call to xfs_log_write().
722 */ 655 */
723int 656int
724xfs_log_write(xfs_mount_t * mp, 657xfs_log_write(
725 xfs_log_iovec_t reg[], 658 struct xfs_mount *mp,
726 int nentries, 659 struct xfs_log_iovec reg[],
727 xfs_log_ticket_t tic, 660 int nentries,
728 xfs_lsn_t *start_lsn) 661 struct xlog_ticket *tic,
662 xfs_lsn_t *start_lsn)
729{ 663{
730 int error; 664 struct log *log = mp->m_log;
731 xlog_t *log = mp->m_log; 665 int error;
732 666
733 if (XLOG_FORCED_SHUTDOWN(log)) 667 if (XLOG_FORCED_SHUTDOWN(log))
734 return XFS_ERROR(EIO); 668 return XFS_ERROR(EIO);
735 669
736 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { 670 error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
671 if (error)
737 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 672 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
738 }
739 return error; 673 return error;
740} /* xfs_log_write */ 674}
741
742 675
743void 676void
744xfs_log_move_tail(xfs_mount_t *mp, 677xfs_log_move_tail(xfs_mount_t *mp,
@@ -812,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp,
812 745
813/* 746/*
814 * Determine if we have a transaction that has gone to disk 747 * Determine if we have a transaction that has gone to disk
815 * that needs to be covered. Log activity needs to be idle (no AIL and 748 * that needs to be covered. To begin the transition to the idle state
816 * nothing in the iclogs). And, we need to be in the right state indicating 749 * firstly the log needs to be idle (no AIL and nothing in the iclogs).
817 * something has gone out. 750 * If we are then in a state where covering is needed, the caller is informed
751 * that dummy transactions are required to move the log into the idle state.
752 *
753 * Because this is called as part of the sync process, we should also indicate
754 * that dummy transactions should be issued in anything but the covered or
755 * idle states. This ensures that the log tail is accurately reflected in
756 * the log at the end of the sync, hence if a crash occurrs avoids replay
757 * of transactions where the metadata is already on disk.
818 */ 758 */
819int 759int
820xfs_log_need_covered(xfs_mount_t *mp) 760xfs_log_need_covered(xfs_mount_t *mp)
@@ -826,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp)
826 return 0; 766 return 0;
827 767
828 spin_lock(&log->l_icloglock); 768 spin_lock(&log->l_icloglock);
829 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 769 switch (log->l_covered_state) {
830 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 770 case XLOG_STATE_COVER_DONE:
831 && !xfs_trans_ail_tail(log->l_ailp) 771 case XLOG_STATE_COVER_DONE2:
832 && xlog_iclogs_empty(log)) { 772 case XLOG_STATE_COVER_IDLE:
833 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 773 break;
834 log->l_covered_state = XLOG_STATE_COVER_DONE; 774 case XLOG_STATE_COVER_NEED:
835 else { 775 case XLOG_STATE_COVER_NEED2:
836 ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); 776 if (!xfs_trans_ail_tail(log->l_ailp) &&
837 log->l_covered_state = XLOG_STATE_COVER_DONE2; 777 xlog_iclogs_empty(log)) {
778 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
779 log->l_covered_state = XLOG_STATE_COVER_DONE;
780 else
781 log->l_covered_state = XLOG_STATE_COVER_DONE2;
838 } 782 }
783 /* FALLTHRU */
784 default:
839 needed = 1; 785 needed = 1;
786 break;
840 } 787 }
841 spin_unlock(&log->l_icloglock); 788 spin_unlock(&log->l_icloglock);
842 return needed; 789 return needed;
@@ -988,35 +935,6 @@ xlog_iodone(xfs_buf_t *bp)
988} /* xlog_iodone */ 935} /* xlog_iodone */
989 936
990/* 937/*
991 * The bdstrat callback function for log bufs. This gives us a central
992 * place to trap bufs in case we get hit by a log I/O error and need to
993 * shutdown. Actually, in practice, even when we didn't get a log error,
994 * we transition the iclogs to IOERROR state *after* flushing all existing
995 * iclogs to disk. This is because we don't want anymore new transactions to be
996 * started or completed afterwards.
997 */
998STATIC int
999xlog_bdstrat_cb(struct xfs_buf *bp)
1000{
1001 xlog_in_core_t *iclog;
1002
1003 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1004
1005 if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
1006 /* note for irix bstrat will need struct bdevsw passed
1007 * Fix the following macro if the code ever is merged
1008 */
1009 XFS_bdstrat(bp);
1010 return 0;
1011 }
1012
1013 XFS_BUF_ERROR(bp, EIO);
1014 XFS_BUF_STALE(bp);
1015 xfs_biodone(bp);
1016 return XFS_ERROR(EIO);
1017}
1018
1019/*
1020 * Return size of each in-core log record buffer. 938 * Return size of each in-core log record buffer.
1021 * 939 *
1022 * All machines get 8 x 32kB buffers by default, unless tuned otherwise. 940 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
@@ -1158,7 +1076,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1158 if (!bp) 1076 if (!bp)
1159 goto out_free_log; 1077 goto out_free_log;
1160 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1078 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1161 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1162 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1079 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1163 ASSERT(XFS_BUF_ISBUSY(bp)); 1080 ASSERT(XFS_BUF_ISBUSY(bp));
1164 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 1081 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
@@ -1196,7 +1113,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1196 if (!XFS_BUF_CPSEMA(bp)) 1113 if (!XFS_BUF_CPSEMA(bp))
1197 ASSERT(0); 1114 ASSERT(0);
1198 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1115 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1199 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1200 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1116 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1201 iclog->ic_bp = bp; 1117 iclog->ic_bp = bp;
1202 iclog->ic_data = bp->b_addr; 1118 iclog->ic_data = bp->b_addr;
@@ -1268,7 +1184,7 @@ xlog_commit_record(xfs_mount_t *mp,
1268 1184
1269 reg[0].i_addr = NULL; 1185 reg[0].i_addr = NULL;
1270 reg[0].i_len = 0; 1186 reg[0].i_len = 0;
1271 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT); 1187 reg[0].i_type = XLOG_REG_TYPE_COMMIT;
1272 1188
1273 ASSERT_ALWAYS(iclog); 1189 ASSERT_ALWAYS(iclog);
1274 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1190 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1343,6 +1259,37 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1343 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1259 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1344} /* xlog_grant_push_ail */ 1260} /* xlog_grant_push_ail */
1345 1261
1262/*
1263 * The bdstrat callback function for log bufs. This gives us a central
1264 * place to trap bufs in case we get hit by a log I/O error and need to
1265 * shutdown. Actually, in practice, even when we didn't get a log error,
1266 * we transition the iclogs to IOERROR state *after* flushing all existing
1267 * iclogs to disk. This is because we don't want anymore new transactions to be
1268 * started or completed afterwards.
1269 */
1270STATIC int
1271xlog_bdstrat(
1272 struct xfs_buf *bp)
1273{
1274 struct xlog_in_core *iclog;
1275
1276 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1277 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1278 XFS_BUF_ERROR(bp, EIO);
1279 XFS_BUF_STALE(bp);
1280 xfs_biodone(bp);
1281 /*
1282 * It would seem logical to return EIO here, but we rely on
1283 * the log state machine to propagate I/O errors instead of
1284 * doing it here.
1285 */
1286 return 0;
1287 }
1288
1289 bp->b_flags |= _XBF_RUN_QUEUES;
1290 xfs_buf_iorequest(bp);
1291 return 0;
1292}
1346 1293
1347/* 1294/*
1348 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 1295 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
@@ -1462,7 +1409,7 @@ xlog_sync(xlog_t *log,
1462 */ 1409 */
1463 XFS_BUF_WRITE(bp); 1410 XFS_BUF_WRITE(bp);
1464 1411
1465 if ((error = XFS_bwrite(bp))) { 1412 if ((error = xlog_bdstrat(bp))) {
1466 xfs_ioerror_alert("xlog_sync", log->l_mp, bp, 1413 xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1467 XFS_BUF_ADDR(bp)); 1414 XFS_BUF_ADDR(bp));
1468 return error; 1415 return error;
@@ -1502,7 +1449,7 @@ xlog_sync(xlog_t *log,
1502 /* account for internal log which doesn't start at block #0 */ 1449 /* account for internal log which doesn't start at block #0 */
1503 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1450 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1504 XFS_BUF_WRITE(bp); 1451 XFS_BUF_WRITE(bp);
1505 if ((error = XFS_bwrite(bp))) { 1452 if ((error = xlog_bdstrat(bp))) {
1506 xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 1453 xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1507 bp, XFS_BUF_ADDR(bp)); 1454 bp, XFS_BUF_ADDR(bp));
1508 return error; 1455 return error;
@@ -1707,16 +1654,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1707 * bytes have been written out. 1654 * bytes have been written out.
1708 */ 1655 */
1709STATIC int 1656STATIC int
1710xlog_write(xfs_mount_t * mp, 1657xlog_write(
1711 xfs_log_iovec_t reg[], 1658 struct xfs_mount *mp,
1712 int nentries, 1659 struct xfs_log_iovec reg[],
1713 xfs_log_ticket_t tic, 1660 int nentries,
1714 xfs_lsn_t *start_lsn, 1661 struct xlog_ticket *ticket,
1715 xlog_in_core_t **commit_iclog, 1662 xfs_lsn_t *start_lsn,
1716 uint flags) 1663 struct xlog_in_core **commit_iclog,
1664 uint flags)
1717{ 1665{
1718 xlog_t *log = mp->m_log; 1666 xlog_t *log = mp->m_log;
1719 xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
1720 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1667 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
1721 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1668 xlog_op_header_t *logop_head; /* ptr to log operation header */
1722 __psint_t ptr; /* copy address into data region */ 1669 __psint_t ptr; /* copy address into data region */
@@ -1830,7 +1777,7 @@ xlog_write(xfs_mount_t * mp,
1830 default: 1777 default:
1831 xfs_fs_cmn_err(CE_WARN, mp, 1778 xfs_fs_cmn_err(CE_WARN, mp,
1832 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1779 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1833 logop_head->oh_clientid, tic); 1780 logop_head->oh_clientid, ticket);
1834 return XFS_ERROR(EIO); 1781 return XFS_ERROR(EIO);
1835 } 1782 }
1836 1783
@@ -2854,7 +2801,6 @@ xlog_state_switch_iclogs(xlog_t *log,
2854 log->l_iclog = iclog->ic_next; 2801 log->l_iclog = iclog->ic_next;
2855} /* xlog_state_switch_iclogs */ 2802} /* xlog_state_switch_iclogs */
2856 2803
2857
2858/* 2804/*
2859 * Write out all data in the in-core log as of this exact moment in time. 2805 * Write out all data in the in-core log as of this exact moment in time.
2860 * 2806 *
@@ -2882,11 +2828,17 @@ xlog_state_switch_iclogs(xlog_t *log,
2882 * b) when we return from flushing out this iclog, it is still 2828 * b) when we return from flushing out this iclog, it is still
2883 * not in the active nor dirty state. 2829 * not in the active nor dirty state.
2884 */ 2830 */
2885STATIC int 2831int
2886xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) 2832_xfs_log_force(
2833 struct xfs_mount *mp,
2834 uint flags,
2835 int *log_flushed)
2887{ 2836{
2888 xlog_in_core_t *iclog; 2837 struct log *log = mp->m_log;
2889 xfs_lsn_t lsn; 2838 struct xlog_in_core *iclog;
2839 xfs_lsn_t lsn;
2840
2841 XFS_STATS_INC(xs_log_force);
2890 2842
2891 spin_lock(&log->l_icloglock); 2843 spin_lock(&log->l_icloglock);
2892 2844
@@ -2932,7 +2884,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2932 2884
2933 if (xlog_state_release_iclog(log, iclog)) 2885 if (xlog_state_release_iclog(log, iclog))
2934 return XFS_ERROR(EIO); 2886 return XFS_ERROR(EIO);
2935 *log_flushed = 1; 2887
2888 if (log_flushed)
2889 *log_flushed = 1;
2936 spin_lock(&log->l_icloglock); 2890 spin_lock(&log->l_icloglock);
2937 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && 2891 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
2938 iclog->ic_state != XLOG_STATE_DIRTY) 2892 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -2976,19 +2930,37 @@ maybe_sleep:
2976 */ 2930 */
2977 if (iclog->ic_state & XLOG_STATE_IOERROR) 2931 if (iclog->ic_state & XLOG_STATE_IOERROR)
2978 return XFS_ERROR(EIO); 2932 return XFS_ERROR(EIO);
2979 *log_flushed = 1; 2933 if (log_flushed)
2980 2934 *log_flushed = 1;
2981 } else { 2935 } else {
2982 2936
2983no_sleep: 2937no_sleep:
2984 spin_unlock(&log->l_icloglock); 2938 spin_unlock(&log->l_icloglock);
2985 } 2939 }
2986 return 0; 2940 return 0;
2987} /* xlog_state_sync_all */ 2941}
2942
2943/*
2944 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
2945 * about errors or whether the log was flushed or not. This is the normal
2946 * interface to use when trying to unpin items or move the log forward.
2947 */
2948void
2949xfs_log_force(
2950 xfs_mount_t *mp,
2951 uint flags)
2952{
2953 int error;
2988 2954
2955 error = _xfs_log_force(mp, flags, NULL);
2956 if (error) {
2957 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
2958 "error %d returned.", error);
2959 }
2960}
2989 2961
2990/* 2962/*
2991 * Used by code which implements synchronous log forces. 2963 * Force the in-core log to disk for a specific LSN.
2992 * 2964 *
2993 * Find in-core log with lsn. 2965 * Find in-core log with lsn.
2994 * If it is in the DIRTY state, just return. 2966 * If it is in the DIRTY state, just return.
@@ -2996,109 +2968,142 @@ no_sleep:
2996 * state and go to sleep or return. 2968 * state and go to sleep or return.
2997 * If it is in any other state, go to sleep or return. 2969 * If it is in any other state, go to sleep or return.
2998 * 2970 *
2999 * If filesystem activity goes to zero, the iclog will get flushed only by 2971 * Synchronous forces are implemented with a signal variable. All callers
3000 * bdflush(). 2972 * to force a given lsn to disk will wait on a the sv attached to the
2973 * specific in-core log. When given in-core log finally completes its
2974 * write to disk, that thread will wake up all threads waiting on the
2975 * sv.
3001 */ 2976 */
3002STATIC int 2977int
3003xlog_state_sync(xlog_t *log, 2978_xfs_log_force_lsn(
3004 xfs_lsn_t lsn, 2979 struct xfs_mount *mp,
3005 uint flags, 2980 xfs_lsn_t lsn,
3006 int *log_flushed) 2981 uint flags,
2982 int *log_flushed)
3007{ 2983{
3008 xlog_in_core_t *iclog; 2984 struct log *log = mp->m_log;
3009 int already_slept = 0; 2985 struct xlog_in_core *iclog;
2986 int already_slept = 0;
3010 2987
3011try_again: 2988 ASSERT(lsn != 0);
3012 spin_lock(&log->l_icloglock);
3013 iclog = log->l_iclog;
3014 2989
3015 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2990 XFS_STATS_INC(xs_log_force);
3016 spin_unlock(&log->l_icloglock);
3017 return XFS_ERROR(EIO);
3018 }
3019
3020 do {
3021 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3022 iclog = iclog->ic_next;
3023 continue;
3024 }
3025 2991
3026 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2992try_again:
2993 spin_lock(&log->l_icloglock);
2994 iclog = log->l_iclog;
2995 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3027 spin_unlock(&log->l_icloglock); 2996 spin_unlock(&log->l_icloglock);
3028 return 0; 2997 return XFS_ERROR(EIO);
3029 } 2998 }
3030 2999
3031 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3000 do {
3032 /* 3001 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3033 * We sleep here if we haven't already slept (e.g. 3002 iclog = iclog->ic_next;
3034 * this is the first time we've looked at the correct 3003 continue;
3035 * iclog buf) and the buffer before us is going to 3004 }
3036 * be sync'ed. The reason for this is that if we 3005
3037 * are doing sync transactions here, by waiting for 3006 if (iclog->ic_state == XLOG_STATE_DIRTY) {
3038 * the previous I/O to complete, we can allow a few 3007 spin_unlock(&log->l_icloglock);
3039 * more transactions into this iclog before we close 3008 return 0;
3040 * it down. 3009 }
3041 * 3010
3042 * Otherwise, we mark the buffer WANT_SYNC, and bump 3011 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3043 * up the refcnt so we can release the log (which drops 3012 /*
3044 * the ref count). The state switch keeps new transaction 3013 * We sleep here if we haven't already slept (e.g.
3045 * commits from using this buffer. When the current commits 3014 * this is the first time we've looked at the correct
3046 * finish writing into the buffer, the refcount will drop to 3015 * iclog buf) and the buffer before us is going to
3047 * zero and the buffer will go out then. 3016 * be sync'ed. The reason for this is that if we
3048 */ 3017 * are doing sync transactions here, by waiting for
3049 if (!already_slept && 3018 * the previous I/O to complete, we can allow a few
3050 (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | 3019 * more transactions into this iclog before we close
3051 XLOG_STATE_SYNCING))) { 3020 * it down.
3052 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3021 *
3053 XFS_STATS_INC(xs_log_force_sleep); 3022 * Otherwise, we mark the buffer WANT_SYNC, and bump
3054 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, 3023 * up the refcnt so we can release the log (which
3055 &log->l_icloglock, s); 3024 * drops the ref count). The state switch keeps new
3056 *log_flushed = 1; 3025 * transaction commits from using this buffer. When
3057 already_slept = 1; 3026 * the current commits finish writing into the buffer,
3058 goto try_again; 3027 * the refcount will drop to zero and the buffer will
3059 } else { 3028 * go out then.
3029 */
3030 if (!already_slept &&
3031 (iclog->ic_prev->ic_state &
3032 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3033 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3034
3035 XFS_STATS_INC(xs_log_force_sleep);
3036
3037 sv_wait(&iclog->ic_prev->ic_write_wait,
3038 PSWP, &log->l_icloglock, s);
3039 if (log_flushed)
3040 *log_flushed = 1;
3041 already_slept = 1;
3042 goto try_again;
3043 }
3060 atomic_inc(&iclog->ic_refcnt); 3044 atomic_inc(&iclog->ic_refcnt);
3061 xlog_state_switch_iclogs(log, iclog, 0); 3045 xlog_state_switch_iclogs(log, iclog, 0);
3062 spin_unlock(&log->l_icloglock); 3046 spin_unlock(&log->l_icloglock);
3063 if (xlog_state_release_iclog(log, iclog)) 3047 if (xlog_state_release_iclog(log, iclog))
3064 return XFS_ERROR(EIO); 3048 return XFS_ERROR(EIO);
3065 *log_flushed = 1; 3049 if (log_flushed)
3050 *log_flushed = 1;
3066 spin_lock(&log->l_icloglock); 3051 spin_lock(&log->l_icloglock);
3067 } 3052 }
3068 }
3069 3053
3070 if ((flags & XFS_LOG_SYNC) && /* sleep */ 3054 if ((flags & XFS_LOG_SYNC) && /* sleep */
3071 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3055 !(iclog->ic_state &
3056 (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3057 /*
3058 * Don't wait on completion if we know that we've
3059 * gotten a log write error.
3060 */
3061 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3062 spin_unlock(&log->l_icloglock);
3063 return XFS_ERROR(EIO);
3064 }
3065 XFS_STATS_INC(xs_log_force_sleep);
3066 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3067 /*
3068 * No need to grab the log lock here since we're
3069 * only deciding whether or not to return EIO
3070 * and the memory read should be atomic.
3071 */
3072 if (iclog->ic_state & XLOG_STATE_IOERROR)
3073 return XFS_ERROR(EIO);
3072 3074
3073 /* 3075 if (log_flushed)
3074 * Don't wait on completion if we know that we've 3076 *log_flushed = 1;
3075 * gotten a log write error. 3077 } else { /* just return */
3076 */
3077 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3078 spin_unlock(&log->l_icloglock); 3078 spin_unlock(&log->l_icloglock);
3079 return XFS_ERROR(EIO);
3080 } 3079 }
3081 XFS_STATS_INC(xs_log_force_sleep);
3082 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3083 /*
3084 * No need to grab the log lock here since we're
3085 * only deciding whether or not to return EIO
3086 * and the memory read should be atomic.
3087 */
3088 if (iclog->ic_state & XLOG_STATE_IOERROR)
3089 return XFS_ERROR(EIO);
3090 *log_flushed = 1;
3091 } else { /* just return */
3092 spin_unlock(&log->l_icloglock);
3093 }
3094 return 0;
3095 3080
3096 } while (iclog != log->l_iclog); 3081 return 0;
3082 } while (iclog != log->l_iclog);
3097 3083
3098 spin_unlock(&log->l_icloglock); 3084 spin_unlock(&log->l_icloglock);
3099 return 0; 3085 return 0;
3100} /* xlog_state_sync */ 3086}
3101 3087
3088/*
3089 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3090 * about errors or whether the log was flushed or not. This is the normal
3091 * interface to use when trying to unpin items or move the log forward.
3092 */
3093void
3094xfs_log_force_lsn(
3095 xfs_mount_t *mp,
3096 xfs_lsn_t lsn,
3097 uint flags)
3098{
3099 int error;
3100
3101 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3102 if (error) {
3103 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3104 "error %d returned.", error);
3105 }
3106}
3102 3107
3103/* 3108/*
3104 * Called when we want to mark the current iclog as being ready to sync to 3109 * Called when we want to mark the current iclog as being ready to sync to
@@ -3463,7 +3468,6 @@ xfs_log_force_umount(
3463 xlog_ticket_t *tic; 3468 xlog_ticket_t *tic;
3464 xlog_t *log; 3469 xlog_t *log;
3465 int retval; 3470 int retval;
3466 int dummy;
3467 3471
3468 log = mp->m_log; 3472 log = mp->m_log;
3469 3473
@@ -3537,13 +3541,14 @@ xfs_log_force_umount(
3537 } 3541 }
3538 spin_unlock(&log->l_grant_lock); 3542 spin_unlock(&log->l_grant_lock);
3539 3543
3540 if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3544 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3541 ASSERT(!logerror); 3545 ASSERT(!logerror);
3542 /* 3546 /*
3543 * Force the incore logs to disk before shutting the 3547 * Force the incore logs to disk before shutting the
3544 * log down completely. 3548 * log down completely.
3545 */ 3549 */
3546 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); 3550 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3551
3547 spin_lock(&log->l_icloglock); 3552 spin_lock(&log->l_icloglock);
3548 retval = xlog_state_ioerror(log); 3553 retval = xlog_state_ioerror(log);
3549 spin_unlock(&log->l_icloglock); 3554 spin_unlock(&log->l_icloglock);