aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c487
1 files changed, 239 insertions, 248 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 600b5b06aaeb..e8fba92d7cd9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone;
50 (off) += (bytes);} 50 (off) += (bytes);}
51 51
52/* Local miscellaneous function prototypes */ 52/* Local miscellaneous function prototypes */
53STATIC int xlog_bdstrat_cb(struct xfs_buf *);
54STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
55 xlog_in_core_t **, xfs_lsn_t *); 54 xlog_in_core_t **, xfs_lsn_t *);
56STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
@@ -61,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
61STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
62STATIC void xlog_dealloc_log(xlog_t *log); 61STATIC void xlog_dealloc_log(xlog_t *log);
63STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
64 int nentries, xfs_log_ticket_t tic, 63 int nentries, struct xlog_ticket *tic,
65 xfs_lsn_t *start_lsn, 64 xfs_lsn_t *start_lsn,
66 xlog_in_core_t **commit_iclog, 65 xlog_in_core_t **commit_iclog,
67 uint flags); 66 uint flags);
@@ -80,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
80STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
81 xlog_in_core_t *iclog, 80 xlog_in_core_t *iclog,
82 int eventual_size); 81 int eventual_size);
83STATIC int xlog_state_sync(xlog_t *log,
84 xfs_lsn_t lsn,
85 uint flags,
86 int *log_flushed);
87STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
88STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 82STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
89 83
90/* local functions to manipulate grant head */ 84/* local functions to manipulate grant head */
@@ -249,14 +243,14 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
249 * out when the next write occurs. 243 * out when the next write occurs.
250 */ 244 */
251xfs_lsn_t 245xfs_lsn_t
252xfs_log_done(xfs_mount_t *mp, 246xfs_log_done(
253 xfs_log_ticket_t xtic, 247 struct xfs_mount *mp,
254 void **iclog, 248 struct xlog_ticket *ticket,
255 uint flags) 249 struct xlog_in_core **iclog,
250 uint flags)
256{ 251{
257 xlog_t *log = mp->m_log; 252 struct log *log = mp->m_log;
258 xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic; 253 xfs_lsn_t lsn = 0;
259 xfs_lsn_t lsn = 0;
260 254
261 if (XLOG_FORCED_SHUTDOWN(log) || 255 if (XLOG_FORCED_SHUTDOWN(log) ||
262 /* 256 /*
@@ -264,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp,
264 * If we get an error, just continue and give back the log ticket. 258 * If we get an error, just continue and give back the log ticket.
265 */ 259 */
266 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
267 (xlog_commit_record(mp, ticket, 261 (xlog_commit_record(mp, ticket, iclog, &lsn)))) {
268 (xlog_in_core_t **)iclog, &lsn)))) {
269 lsn = (xfs_lsn_t) -1; 262 lsn = (xfs_lsn_t) -1;
270 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 263 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
271 flags |= XFS_LOG_REL_PERM_RESERV; 264 flags |= XFS_LOG_REL_PERM_RESERV;
@@ -295,67 +288,8 @@ xfs_log_done(xfs_mount_t *mp,
295 } 288 }
296 289
297 return lsn; 290 return lsn;
298} /* xfs_log_done */
299
300
301/*
302 * Force the in-core log to disk. If flags == XFS_LOG_SYNC,
303 * the force is done synchronously.
304 *
305 * Asynchronous forces are implemented by setting the WANT_SYNC
306 * bit in the appropriate in-core log and then returning.
307 *
308 * Synchronous forces are implemented with a signal variable. All callers
309 * to force a given lsn to disk will wait on a the sv attached to the
310 * specific in-core log. When given in-core log finally completes its
311 * write to disk, that thread will wake up all threads waiting on the
312 * sv.
313 */
314int
315_xfs_log_force(
316 xfs_mount_t *mp,
317 xfs_lsn_t lsn,
318 uint flags,
319 int *log_flushed)
320{
321 xlog_t *log = mp->m_log;
322 int dummy;
323
324 if (!log_flushed)
325 log_flushed = &dummy;
326
327 ASSERT(flags & XFS_LOG_FORCE);
328
329 XFS_STATS_INC(xs_log_force);
330
331 if (log->l_flags & XLOG_IO_ERROR)
332 return XFS_ERROR(EIO);
333 if (lsn == 0)
334 return xlog_state_sync_all(log, flags, log_flushed);
335 else
336 return xlog_state_sync(log, lsn, flags, log_flushed);
337} /* _xfs_log_force */
338
339/*
340 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
341 * about errors or whether the log was flushed or not. This is the normal
342 * interface to use when trying to unpin items or move the log forward.
343 */
344void
345xfs_log_force(
346 xfs_mount_t *mp,
347 xfs_lsn_t lsn,
348 uint flags)
349{
350 int error;
351 error = _xfs_log_force(mp, lsn, flags, NULL);
352 if (error) {
353 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
354 "error %d returned.", error);
355 }
356} 291}
357 292
358
359/* 293/*
360 * Attaches a new iclog I/O completion callback routine during 294 * Attaches a new iclog I/O completion callback routine during
361 * transaction commit. If the log is in error state, a non-zero 295 * transaction commit. If the log is in error state, a non-zero
@@ -363,11 +297,11 @@ xfs_log_force(
363 * executing the callback at an appropriate time. 297 * executing the callback at an appropriate time.
364 */ 298 */
365int 299int
366xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ 300xfs_log_notify(
367 void *iclog_hndl, /* iclog to hang callback off */ 301 struct xfs_mount *mp,
368 xfs_log_callback_t *cb) 302 struct xlog_in_core *iclog,
303 xfs_log_callback_t *cb)
369{ 304{
370 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
371 int abortflg; 305 int abortflg;
372 306
373 spin_lock(&iclog->ic_callback_lock); 307 spin_lock(&iclog->ic_callback_lock);
@@ -381,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
381 } 315 }
382 spin_unlock(&iclog->ic_callback_lock); 316 spin_unlock(&iclog->ic_callback_lock);
383 return abortflg; 317 return abortflg;
384} /* xfs_log_notify */ 318}
385 319
386int 320int
387xfs_log_release_iclog(xfs_mount_t *mp, 321xfs_log_release_iclog(
388 void *iclog_hndl) 322 struct xfs_mount *mp,
323 struct xlog_in_core *iclog)
389{ 324{
390 xlog_t *log = mp->m_log; 325 if (xlog_state_release_iclog(mp->m_log, iclog)) {
391 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
392
393 if (xlog_state_release_iclog(log, iclog)) {
394 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 326 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
395 return EIO; 327 return EIO;
396 } 328 }
@@ -409,17 +341,18 @@ xfs_log_release_iclog(xfs_mount_t *mp,
409 * reservation, we prevent over allocation problems. 341 * reservation, we prevent over allocation problems.
410 */ 342 */
411int 343int
412xfs_log_reserve(xfs_mount_t *mp, 344xfs_log_reserve(
413 int unit_bytes, 345 struct xfs_mount *mp,
414 int cnt, 346 int unit_bytes,
415 xfs_log_ticket_t *ticket, 347 int cnt,
416 __uint8_t client, 348 struct xlog_ticket **ticket,
417 uint flags, 349 __uint8_t client,
418 uint t_type) 350 uint flags,
351 uint t_type)
419{ 352{
420 xlog_t *log = mp->m_log; 353 struct log *log = mp->m_log;
421 xlog_ticket_t *internal_ticket; 354 struct xlog_ticket *internal_ticket;
422 int retval = 0; 355 int retval = 0;
423 356
424 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 357 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
425 ASSERT((flags & XFS_LOG_NOSLEEP) == 0); 358 ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
@@ -432,7 +365,7 @@ xfs_log_reserve(xfs_mount_t *mp,
432 365
433 if (*ticket != NULL) { 366 if (*ticket != NULL) {
434 ASSERT(flags & XFS_LOG_PERM_RESERV); 367 ASSERT(flags & XFS_LOG_PERM_RESERV);
435 internal_ticket = (xlog_ticket_t *)*ticket; 368 internal_ticket = *ticket;
436 369
437 trace_xfs_log_reserve(log, internal_ticket); 370 trace_xfs_log_reserve(log, internal_ticket);
438 371
@@ -584,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
584 xlog_in_core_t *first_iclog; 517 xlog_in_core_t *first_iclog;
585#endif 518#endif
586 xfs_log_iovec_t reg[1]; 519 xfs_log_iovec_t reg[1];
587 xfs_log_ticket_t tic = NULL; 520 xlog_ticket_t *tic = NULL;
588 xfs_lsn_t lsn; 521 xfs_lsn_t lsn;
589 int error; 522 int error;
590 523
@@ -602,7 +535,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
602 if (mp->m_flags & XFS_MOUNT_RDONLY) 535 if (mp->m_flags & XFS_MOUNT_RDONLY)
603 return 0; 536 return 0;
604 537
605 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 538 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
606 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 539 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
607 540
608#ifdef DEBUG 541#ifdef DEBUG
@@ -618,7 +551,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
618 if (! (XLOG_FORCED_SHUTDOWN(log))) { 551 if (! (XLOG_FORCED_SHUTDOWN(log))) {
619 reg[0].i_addr = (void*)&magic; 552 reg[0].i_addr = (void*)&magic;
620 reg[0].i_len = sizeof(magic); 553 reg[0].i_len = sizeof(magic);
621 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT); 554 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
622 555
623 error = xfs_log_reserve(mp, 600, 1, &tic, 556 error = xfs_log_reserve(mp, 600, 1, &tic,
624 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 557 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
@@ -721,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp)
721 * transaction occur with one call to xfs_log_write(). 654 * transaction occur with one call to xfs_log_write().
722 */ 655 */
723int 656int
724xfs_log_write(xfs_mount_t * mp, 657xfs_log_write(
725 xfs_log_iovec_t reg[], 658 struct xfs_mount *mp,
726 int nentries, 659 struct xfs_log_iovec reg[],
727 xfs_log_ticket_t tic, 660 int nentries,
728 xfs_lsn_t *start_lsn) 661 struct xlog_ticket *tic,
662 xfs_lsn_t *start_lsn)
729{ 663{
730 int error; 664 struct log *log = mp->m_log;
731 xlog_t *log = mp->m_log; 665 int error;
732 666
733 if (XLOG_FORCED_SHUTDOWN(log)) 667 if (XLOG_FORCED_SHUTDOWN(log))
734 return XFS_ERROR(EIO); 668 return XFS_ERROR(EIO);
735 669
736 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { 670 error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
671 if (error)
737 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 672 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
738 }
739 return error; 673 return error;
740} /* xfs_log_write */ 674}
741
742 675
743void 676void
744xfs_log_move_tail(xfs_mount_t *mp, 677xfs_log_move_tail(xfs_mount_t *mp,
@@ -988,35 +921,6 @@ xlog_iodone(xfs_buf_t *bp)
988} /* xlog_iodone */ 921} /* xlog_iodone */
989 922
990/* 923/*
991 * The bdstrat callback function for log bufs. This gives us a central
992 * place to trap bufs in case we get hit by a log I/O error and need to
993 * shutdown. Actually, in practice, even when we didn't get a log error,
994 * we transition the iclogs to IOERROR state *after* flushing all existing
995 * iclogs to disk. This is because we don't want anymore new transactions to be
996 * started or completed afterwards.
997 */
998STATIC int
999xlog_bdstrat_cb(struct xfs_buf *bp)
1000{
1001 xlog_in_core_t *iclog;
1002
1003 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1004
1005 if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
1006 /* note for irix bstrat will need struct bdevsw passed
1007 * Fix the following macro if the code ever is merged
1008 */
1009 XFS_bdstrat(bp);
1010 return 0;
1011 }
1012
1013 XFS_BUF_ERROR(bp, EIO);
1014 XFS_BUF_STALE(bp);
1015 xfs_biodone(bp);
1016 return XFS_ERROR(EIO);
1017}
1018
1019/*
1020 * Return size of each in-core log record buffer. 924 * Return size of each in-core log record buffer.
1021 * 925 *
1022 * All machines get 8 x 32kB buffers by default, unless tuned otherwise. 926 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
@@ -1158,7 +1062,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1158 if (!bp) 1062 if (!bp)
1159 goto out_free_log; 1063 goto out_free_log;
1160 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1064 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1161 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1162 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1065 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1163 ASSERT(XFS_BUF_ISBUSY(bp)); 1066 ASSERT(XFS_BUF_ISBUSY(bp));
1164 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 1067 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
@@ -1196,7 +1099,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1196 if (!XFS_BUF_CPSEMA(bp)) 1099 if (!XFS_BUF_CPSEMA(bp))
1197 ASSERT(0); 1100 ASSERT(0);
1198 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1101 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1199 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1200 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1102 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1201 iclog->ic_bp = bp; 1103 iclog->ic_bp = bp;
1202 iclog->ic_data = bp->b_addr; 1104 iclog->ic_data = bp->b_addr;
@@ -1268,7 +1170,7 @@ xlog_commit_record(xfs_mount_t *mp,
1268 1170
1269 reg[0].i_addr = NULL; 1171 reg[0].i_addr = NULL;
1270 reg[0].i_len = 0; 1172 reg[0].i_len = 0;
1271 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT); 1173 reg[0].i_type = XLOG_REG_TYPE_COMMIT;
1272 1174
1273 ASSERT_ALWAYS(iclog); 1175 ASSERT_ALWAYS(iclog);
1274 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1176 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1343,6 +1245,37 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1343 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1245 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1344} /* xlog_grant_push_ail */ 1246} /* xlog_grant_push_ail */
1345 1247
1248/*
1249 * The bdstrat callback function for log bufs. This gives us a central
1250 * place to trap bufs in case we get hit by a log I/O error and need to
1251 * shutdown. Actually, in practice, even when we didn't get a log error,
1252 * we transition the iclogs to IOERROR state *after* flushing all existing
1253 * iclogs to disk. This is because we don't want anymore new transactions to be
1254 * started or completed afterwards.
1255 */
1256STATIC int
1257xlog_bdstrat(
1258 struct xfs_buf *bp)
1259{
1260 struct xlog_in_core *iclog;
1261
1262 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1263 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1264 XFS_BUF_ERROR(bp, EIO);
1265 XFS_BUF_STALE(bp);
1266 xfs_biodone(bp);
1267 /*
1268 * It would seem logical to return EIO here, but we rely on
1269 * the log state machine to propagate I/O errors instead of
1270 * doing it here.
1271 */
1272 return 0;
1273 }
1274
1275 bp->b_flags |= _XBF_RUN_QUEUES;
1276 xfs_buf_iorequest(bp);
1277 return 0;
1278}
1346 1279
1347/* 1280/*
1348 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 1281 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
@@ -1462,7 +1395,7 @@ xlog_sync(xlog_t *log,
1462 */ 1395 */
1463 XFS_BUF_WRITE(bp); 1396 XFS_BUF_WRITE(bp);
1464 1397
1465 if ((error = XFS_bwrite(bp))) { 1398 if ((error = xlog_bdstrat(bp))) {
1466 xfs_ioerror_alert("xlog_sync", log->l_mp, bp, 1399 xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1467 XFS_BUF_ADDR(bp)); 1400 XFS_BUF_ADDR(bp));
1468 return error; 1401 return error;
@@ -1502,7 +1435,7 @@ xlog_sync(xlog_t *log,
1502 /* account for internal log which doesn't start at block #0 */ 1435 /* account for internal log which doesn't start at block #0 */
1503 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1436 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1504 XFS_BUF_WRITE(bp); 1437 XFS_BUF_WRITE(bp);
1505 if ((error = XFS_bwrite(bp))) { 1438 if ((error = xlog_bdstrat(bp))) {
1506 xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 1439 xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1507 bp, XFS_BUF_ADDR(bp)); 1440 bp, XFS_BUF_ADDR(bp));
1508 return error; 1441 return error;
@@ -1707,16 +1640,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1707 * bytes have been written out. 1640 * bytes have been written out.
1708 */ 1641 */
1709STATIC int 1642STATIC int
1710xlog_write(xfs_mount_t * mp, 1643xlog_write(
1711 xfs_log_iovec_t reg[], 1644 struct xfs_mount *mp,
1712 int nentries, 1645 struct xfs_log_iovec reg[],
1713 xfs_log_ticket_t tic, 1646 int nentries,
1714 xfs_lsn_t *start_lsn, 1647 struct xlog_ticket *ticket,
1715 xlog_in_core_t **commit_iclog, 1648 xfs_lsn_t *start_lsn,
1716 uint flags) 1649 struct xlog_in_core **commit_iclog,
1650 uint flags)
1717{ 1651{
1718 xlog_t *log = mp->m_log; 1652 xlog_t *log = mp->m_log;
1719 xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
1720 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1653 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
1721 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1654 xlog_op_header_t *logop_head; /* ptr to log operation header */
1722 __psint_t ptr; /* copy address into data region */ 1655 __psint_t ptr; /* copy address into data region */
@@ -1830,7 +1763,7 @@ xlog_write(xfs_mount_t * mp,
1830 default: 1763 default:
1831 xfs_fs_cmn_err(CE_WARN, mp, 1764 xfs_fs_cmn_err(CE_WARN, mp,
1832 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1765 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1833 logop_head->oh_clientid, tic); 1766 logop_head->oh_clientid, ticket);
1834 return XFS_ERROR(EIO); 1767 return XFS_ERROR(EIO);
1835 } 1768 }
1836 1769
@@ -2854,7 +2787,6 @@ xlog_state_switch_iclogs(xlog_t *log,
2854 log->l_iclog = iclog->ic_next; 2787 log->l_iclog = iclog->ic_next;
2855} /* xlog_state_switch_iclogs */ 2788} /* xlog_state_switch_iclogs */
2856 2789
2857
2858/* 2790/*
2859 * Write out all data in the in-core log as of this exact moment in time. 2791 * Write out all data in the in-core log as of this exact moment in time.
2860 * 2792 *
@@ -2882,11 +2814,17 @@ xlog_state_switch_iclogs(xlog_t *log,
2882 * b) when we return from flushing out this iclog, it is still 2814 * b) when we return from flushing out this iclog, it is still
2883 * not in the active nor dirty state. 2815 * not in the active nor dirty state.
2884 */ 2816 */
2885STATIC int 2817int
2886xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) 2818_xfs_log_force(
2819 struct xfs_mount *mp,
2820 uint flags,
2821 int *log_flushed)
2887{ 2822{
2888 xlog_in_core_t *iclog; 2823 struct log *log = mp->m_log;
2889 xfs_lsn_t lsn; 2824 struct xlog_in_core *iclog;
2825 xfs_lsn_t lsn;
2826
2827 XFS_STATS_INC(xs_log_force);
2890 2828
2891 spin_lock(&log->l_icloglock); 2829 spin_lock(&log->l_icloglock);
2892 2830
@@ -2932,7 +2870,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2932 2870
2933 if (xlog_state_release_iclog(log, iclog)) 2871 if (xlog_state_release_iclog(log, iclog))
2934 return XFS_ERROR(EIO); 2872 return XFS_ERROR(EIO);
2935 *log_flushed = 1; 2873
2874 if (log_flushed)
2875 *log_flushed = 1;
2936 spin_lock(&log->l_icloglock); 2876 spin_lock(&log->l_icloglock);
2937 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && 2877 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
2938 iclog->ic_state != XLOG_STATE_DIRTY) 2878 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -2976,19 +2916,37 @@ maybe_sleep:
2976 */ 2916 */
2977 if (iclog->ic_state & XLOG_STATE_IOERROR) 2917 if (iclog->ic_state & XLOG_STATE_IOERROR)
2978 return XFS_ERROR(EIO); 2918 return XFS_ERROR(EIO);
2979 *log_flushed = 1; 2919 if (log_flushed)
2980 2920 *log_flushed = 1;
2981 } else { 2921 } else {
2982 2922
2983no_sleep: 2923no_sleep:
2984 spin_unlock(&log->l_icloglock); 2924 spin_unlock(&log->l_icloglock);
2985 } 2925 }
2986 return 0; 2926 return 0;
2987} /* xlog_state_sync_all */ 2927}
2988 2928
2929/*
2930 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
2931 * about errors or whether the log was flushed or not. This is the normal
2932 * interface to use when trying to unpin items or move the log forward.
2933 */
2934void
2935xfs_log_force(
2936 xfs_mount_t *mp,
2937 uint flags)
2938{
2939 int error;
2940
2941 error = _xfs_log_force(mp, flags, NULL);
2942 if (error) {
2943 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
2944 "error %d returned.", error);
2945 }
2946}
2989 2947
2990/* 2948/*
2991 * Used by code which implements synchronous log forces. 2949 * Force the in-core log to disk for a specific LSN.
2992 * 2950 *
2993 * Find in-core log with lsn. 2951 * Find in-core log with lsn.
2994 * If it is in the DIRTY state, just return. 2952 * If it is in the DIRTY state, just return.
@@ -2996,109 +2954,142 @@ no_sleep:
2996 * state and go to sleep or return. 2954 * state and go to sleep or return.
2997 * If it is in any other state, go to sleep or return. 2955 * If it is in any other state, go to sleep or return.
2998 * 2956 *
2999 * If filesystem activity goes to zero, the iclog will get flushed only by 2957 * Synchronous forces are implemented with a signal variable. All callers
3000 * bdflush(). 2958 * to force a given lsn to disk will wait on a the sv attached to the
2959 * specific in-core log. When given in-core log finally completes its
2960 * write to disk, that thread will wake up all threads waiting on the
2961 * sv.
3001 */ 2962 */
3002STATIC int 2963int
3003xlog_state_sync(xlog_t *log, 2964_xfs_log_force_lsn(
3004 xfs_lsn_t lsn, 2965 struct xfs_mount *mp,
3005 uint flags, 2966 xfs_lsn_t lsn,
3006 int *log_flushed) 2967 uint flags,
2968 int *log_flushed)
3007{ 2969{
3008 xlog_in_core_t *iclog; 2970 struct log *log = mp->m_log;
3009 int already_slept = 0; 2971 struct xlog_in_core *iclog;
3010 2972 int already_slept = 0;
3011try_again:
3012 spin_lock(&log->l_icloglock);
3013 iclog = log->l_iclog;
3014 2973
3015 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2974 ASSERT(lsn != 0);
3016 spin_unlock(&log->l_icloglock);
3017 return XFS_ERROR(EIO);
3018 }
3019 2975
3020 do { 2976 XFS_STATS_INC(xs_log_force);
3021 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3022 iclog = iclog->ic_next;
3023 continue;
3024 }
3025 2977
3026 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2978try_again:
2979 spin_lock(&log->l_icloglock);
2980 iclog = log->l_iclog;
2981 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3027 spin_unlock(&log->l_icloglock); 2982 spin_unlock(&log->l_icloglock);
3028 return 0; 2983 return XFS_ERROR(EIO);
3029 } 2984 }
3030 2985
3031 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 2986 do {
3032 /* 2987 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3033 * We sleep here if we haven't already slept (e.g. 2988 iclog = iclog->ic_next;
3034 * this is the first time we've looked at the correct 2989 continue;
3035 * iclog buf) and the buffer before us is going to 2990 }
3036 * be sync'ed. The reason for this is that if we 2991
3037 * are doing sync transactions here, by waiting for 2992 if (iclog->ic_state == XLOG_STATE_DIRTY) {
3038 * the previous I/O to complete, we can allow a few 2993 spin_unlock(&log->l_icloglock);
3039 * more transactions into this iclog before we close 2994 return 0;
3040 * it down. 2995 }
3041 * 2996
3042 * Otherwise, we mark the buffer WANT_SYNC, and bump 2997 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3043 * up the refcnt so we can release the log (which drops 2998 /*
3044 * the ref count). The state switch keeps new transaction 2999 * We sleep here if we haven't already slept (e.g.
3045 * commits from using this buffer. When the current commits 3000 * this is the first time we've looked at the correct
3046 * finish writing into the buffer, the refcount will drop to 3001 * iclog buf) and the buffer before us is going to
3047 * zero and the buffer will go out then. 3002 * be sync'ed. The reason for this is that if we
3048 */ 3003 * are doing sync transactions here, by waiting for
3049 if (!already_slept && 3004 * the previous I/O to complete, we can allow a few
3050 (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | 3005 * more transactions into this iclog before we close
3051 XLOG_STATE_SYNCING))) { 3006 * it down.
3052 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3007 *
3053 XFS_STATS_INC(xs_log_force_sleep); 3008 * Otherwise, we mark the buffer WANT_SYNC, and bump
3054 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, 3009 * up the refcnt so we can release the log (which
3055 &log->l_icloglock, s); 3010 * drops the ref count). The state switch keeps new
3056 *log_flushed = 1; 3011 * transaction commits from using this buffer. When
3057 already_slept = 1; 3012 * the current commits finish writing into the buffer,
3058 goto try_again; 3013 * the refcount will drop to zero and the buffer will
3059 } else { 3014 * go out then.
3015 */
3016 if (!already_slept &&
3017 (iclog->ic_prev->ic_state &
3018 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3019 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3020
3021 XFS_STATS_INC(xs_log_force_sleep);
3022
3023 sv_wait(&iclog->ic_prev->ic_write_wait,
3024 PSWP, &log->l_icloglock, s);
3025 if (log_flushed)
3026 *log_flushed = 1;
3027 already_slept = 1;
3028 goto try_again;
3029 }
3060 atomic_inc(&iclog->ic_refcnt); 3030 atomic_inc(&iclog->ic_refcnt);
3061 xlog_state_switch_iclogs(log, iclog, 0); 3031 xlog_state_switch_iclogs(log, iclog, 0);
3062 spin_unlock(&log->l_icloglock); 3032 spin_unlock(&log->l_icloglock);
3063 if (xlog_state_release_iclog(log, iclog)) 3033 if (xlog_state_release_iclog(log, iclog))
3064 return XFS_ERROR(EIO); 3034 return XFS_ERROR(EIO);
3065 *log_flushed = 1; 3035 if (log_flushed)
3036 *log_flushed = 1;
3066 spin_lock(&log->l_icloglock); 3037 spin_lock(&log->l_icloglock);
3067 } 3038 }
3068 }
3069 3039
3070 if ((flags & XFS_LOG_SYNC) && /* sleep */ 3040 if ((flags & XFS_LOG_SYNC) && /* sleep */
3071 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3041 !(iclog->ic_state &
3042 (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3043 /*
3044 * Don't wait on completion if we know that we've
3045 * gotten a log write error.
3046 */
3047 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3048 spin_unlock(&log->l_icloglock);
3049 return XFS_ERROR(EIO);
3050 }
3051 XFS_STATS_INC(xs_log_force_sleep);
3052 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3053 /*
3054 * No need to grab the log lock here since we're
3055 * only deciding whether or not to return EIO
3056 * and the memory read should be atomic.
3057 */
3058 if (iclog->ic_state & XLOG_STATE_IOERROR)
3059 return XFS_ERROR(EIO);
3072 3060
3073 /* 3061 if (log_flushed)
3074 * Don't wait on completion if we know that we've 3062 *log_flushed = 1;
3075 * gotten a log write error. 3063 } else { /* just return */
3076 */
3077 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3078 spin_unlock(&log->l_icloglock); 3064 spin_unlock(&log->l_icloglock);
3079 return XFS_ERROR(EIO);
3080 } 3065 }
3081 XFS_STATS_INC(xs_log_force_sleep);
3082 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3083 /*
3084 * No need to grab the log lock here since we're
3085 * only deciding whether or not to return EIO
3086 * and the memory read should be atomic.
3087 */
3088 if (iclog->ic_state & XLOG_STATE_IOERROR)
3089 return XFS_ERROR(EIO);
3090 *log_flushed = 1;
3091 } else { /* just return */
3092 spin_unlock(&log->l_icloglock);
3093 }
3094 return 0;
3095 3066
3096 } while (iclog != log->l_iclog); 3067 return 0;
3068 } while (iclog != log->l_iclog);
3097 3069
3098 spin_unlock(&log->l_icloglock); 3070 spin_unlock(&log->l_icloglock);
3099 return 0; 3071 return 0;
3100} /* xlog_state_sync */ 3072}
3073
3074/*
3075 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3076 * about errors or whether the log was flushed or not. This is the normal
3077 * interface to use when trying to unpin items or move the log forward.
3078 */
3079void
3080xfs_log_force_lsn(
3081 xfs_mount_t *mp,
3082 xfs_lsn_t lsn,
3083 uint flags)
3084{
3085 int error;
3101 3086
3087 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3088 if (error) {
3089 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3090 "error %d returned.", error);
3091 }
3092}
3102 3093
3103/* 3094/*
3104 * Called when we want to mark the current iclog as being ready to sync to 3095 * Called when we want to mark the current iclog as being ready to sync to
@@ -3463,7 +3454,6 @@ xfs_log_force_umount(
3463 xlog_ticket_t *tic; 3454 xlog_ticket_t *tic;
3464 xlog_t *log; 3455 xlog_t *log;
3465 int retval; 3456 int retval;
3466 int dummy;
3467 3457
3468 log = mp->m_log; 3458 log = mp->m_log;
3469 3459
@@ -3537,13 +3527,14 @@ xfs_log_force_umount(
3537 } 3527 }
3538 spin_unlock(&log->l_grant_lock); 3528 spin_unlock(&log->l_grant_lock);
3539 3529
3540 if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3530 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3541 ASSERT(!logerror); 3531 ASSERT(!logerror);
3542 /* 3532 /*
3543 * Force the incore logs to disk before shutting the 3533 * Force the incore logs to disk before shutting the
3544 * log down completely. 3534 * log down completely.
3545 */ 3535 */
3546 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); 3536 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3537
3547 spin_lock(&log->l_icloglock); 3538 spin_lock(&log->l_icloglock);
3548 retval = xlog_state_ioerror(log); 3539 retval = xlog_state_ioerror(log);
3549 spin_unlock(&log->l_icloglock); 3540 spin_unlock(&log->l_icloglock);