diff options
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r-- | fs/xfs/xfs_log.c | 487 |
1 files changed, 239 insertions, 248 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 600b5b06aaeb..e8fba92d7cd9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone; | |||
50 | (off) += (bytes);} | 50 | (off) += (bytes);} |
51 | 51 | ||
52 | /* Local miscellaneous function prototypes */ | 52 | /* Local miscellaneous function prototypes */ |
53 | STATIC int xlog_bdstrat_cb(struct xfs_buf *); | ||
54 | STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, | 53 | STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, |
55 | xlog_in_core_t **, xfs_lsn_t *); | 54 | xlog_in_core_t **, xfs_lsn_t *); |
56 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | 55 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, |
@@ -61,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | |||
61 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 60 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
62 | STATIC void xlog_dealloc_log(xlog_t *log); | 61 | STATIC void xlog_dealloc_log(xlog_t *log); |
63 | STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], | 62 | STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], |
64 | int nentries, xfs_log_ticket_t tic, | 63 | int nentries, struct xlog_ticket *tic, |
65 | xfs_lsn_t *start_lsn, | 64 | xfs_lsn_t *start_lsn, |
66 | xlog_in_core_t **commit_iclog, | 65 | xlog_in_core_t **commit_iclog, |
67 | uint flags); | 66 | uint flags); |
@@ -80,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log, | |||
80 | STATIC void xlog_state_switch_iclogs(xlog_t *log, | 79 | STATIC void xlog_state_switch_iclogs(xlog_t *log, |
81 | xlog_in_core_t *iclog, | 80 | xlog_in_core_t *iclog, |
82 | int eventual_size); | 81 | int eventual_size); |
83 | STATIC int xlog_state_sync(xlog_t *log, | ||
84 | xfs_lsn_t lsn, | ||
85 | uint flags, | ||
86 | int *log_flushed); | ||
87 | STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed); | ||
88 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | 82 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); |
89 | 83 | ||
90 | /* local functions to manipulate grant head */ | 84 | /* local functions to manipulate grant head */ |
@@ -249,14 +243,14 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) | |||
249 | * out when the next write occurs. | 243 | * out when the next write occurs. |
250 | */ | 244 | */ |
251 | xfs_lsn_t | 245 | xfs_lsn_t |
252 | xfs_log_done(xfs_mount_t *mp, | 246 | xfs_log_done( |
253 | xfs_log_ticket_t xtic, | 247 | struct xfs_mount *mp, |
254 | void **iclog, | 248 | struct xlog_ticket *ticket, |
255 | uint flags) | 249 | struct xlog_in_core **iclog, |
250 | uint flags) | ||
256 | { | 251 | { |
257 | xlog_t *log = mp->m_log; | 252 | struct log *log = mp->m_log; |
258 | xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic; | 253 | xfs_lsn_t lsn = 0; |
259 | xfs_lsn_t lsn = 0; | ||
260 | 254 | ||
261 | if (XLOG_FORCED_SHUTDOWN(log) || | 255 | if (XLOG_FORCED_SHUTDOWN(log) || |
262 | /* | 256 | /* |
@@ -264,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp, | |||
264 | * If we get an error, just continue and give back the log ticket. | 258 | * If we get an error, just continue and give back the log ticket. |
265 | */ | 259 | */ |
266 | (((ticket->t_flags & XLOG_TIC_INITED) == 0) && | 260 | (((ticket->t_flags & XLOG_TIC_INITED) == 0) && |
267 | (xlog_commit_record(mp, ticket, | 261 | (xlog_commit_record(mp, ticket, iclog, &lsn)))) { |
268 | (xlog_in_core_t **)iclog, &lsn)))) { | ||
269 | lsn = (xfs_lsn_t) -1; | 262 | lsn = (xfs_lsn_t) -1; |
270 | if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { | 263 | if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { |
271 | flags |= XFS_LOG_REL_PERM_RESERV; | 264 | flags |= XFS_LOG_REL_PERM_RESERV; |
@@ -295,67 +288,8 @@ xfs_log_done(xfs_mount_t *mp, | |||
295 | } | 288 | } |
296 | 289 | ||
297 | return lsn; | 290 | return lsn; |
298 | } /* xfs_log_done */ | ||
299 | |||
300 | |||
301 | /* | ||
302 | * Force the in-core log to disk. If flags == XFS_LOG_SYNC, | ||
303 | * the force is done synchronously. | ||
304 | * | ||
305 | * Asynchronous forces are implemented by setting the WANT_SYNC | ||
306 | * bit in the appropriate in-core log and then returning. | ||
307 | * | ||
308 | * Synchronous forces are implemented with a signal variable. All callers | ||
309 | * to force a given lsn to disk will wait on a the sv attached to the | ||
310 | * specific in-core log. When given in-core log finally completes its | ||
311 | * write to disk, that thread will wake up all threads waiting on the | ||
312 | * sv. | ||
313 | */ | ||
314 | int | ||
315 | _xfs_log_force( | ||
316 | xfs_mount_t *mp, | ||
317 | xfs_lsn_t lsn, | ||
318 | uint flags, | ||
319 | int *log_flushed) | ||
320 | { | ||
321 | xlog_t *log = mp->m_log; | ||
322 | int dummy; | ||
323 | |||
324 | if (!log_flushed) | ||
325 | log_flushed = &dummy; | ||
326 | |||
327 | ASSERT(flags & XFS_LOG_FORCE); | ||
328 | |||
329 | XFS_STATS_INC(xs_log_force); | ||
330 | |||
331 | if (log->l_flags & XLOG_IO_ERROR) | ||
332 | return XFS_ERROR(EIO); | ||
333 | if (lsn == 0) | ||
334 | return xlog_state_sync_all(log, flags, log_flushed); | ||
335 | else | ||
336 | return xlog_state_sync(log, lsn, flags, log_flushed); | ||
337 | } /* _xfs_log_force */ | ||
338 | |||
339 | /* | ||
340 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
341 | * about errors or whether the log was flushed or not. This is the normal | ||
342 | * interface to use when trying to unpin items or move the log forward. | ||
343 | */ | ||
344 | void | ||
345 | xfs_log_force( | ||
346 | xfs_mount_t *mp, | ||
347 | xfs_lsn_t lsn, | ||
348 | uint flags) | ||
349 | { | ||
350 | int error; | ||
351 | error = _xfs_log_force(mp, lsn, flags, NULL); | ||
352 | if (error) { | ||
353 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
354 | "error %d returned.", error); | ||
355 | } | ||
356 | } | 291 | } |
357 | 292 | ||
358 | |||
359 | /* | 293 | /* |
360 | * Attaches a new iclog I/O completion callback routine during | 294 | * Attaches a new iclog I/O completion callback routine during |
361 | * transaction commit. If the log is in error state, a non-zero | 295 | * transaction commit. If the log is in error state, a non-zero |
@@ -363,11 +297,11 @@ xfs_log_force( | |||
363 | * executing the callback at an appropriate time. | 297 | * executing the callback at an appropriate time. |
364 | */ | 298 | */ |
365 | int | 299 | int |
366 | xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | 300 | xfs_log_notify( |
367 | void *iclog_hndl, /* iclog to hang callback off */ | 301 | struct xfs_mount *mp, |
368 | xfs_log_callback_t *cb) | 302 | struct xlog_in_core *iclog, |
303 | xfs_log_callback_t *cb) | ||
369 | { | 304 | { |
370 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; | ||
371 | int abortflg; | 305 | int abortflg; |
372 | 306 | ||
373 | spin_lock(&iclog->ic_callback_lock); | 307 | spin_lock(&iclog->ic_callback_lock); |
@@ -381,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
381 | } | 315 | } |
382 | spin_unlock(&iclog->ic_callback_lock); | 316 | spin_unlock(&iclog->ic_callback_lock); |
383 | return abortflg; | 317 | return abortflg; |
384 | } /* xfs_log_notify */ | 318 | } |
385 | 319 | ||
386 | int | 320 | int |
387 | xfs_log_release_iclog(xfs_mount_t *mp, | 321 | xfs_log_release_iclog( |
388 | void *iclog_hndl) | 322 | struct xfs_mount *mp, |
323 | struct xlog_in_core *iclog) | ||
389 | { | 324 | { |
390 | xlog_t *log = mp->m_log; | 325 | if (xlog_state_release_iclog(mp->m_log, iclog)) { |
391 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; | ||
392 | |||
393 | if (xlog_state_release_iclog(log, iclog)) { | ||
394 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 326 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
395 | return EIO; | 327 | return EIO; |
396 | } | 328 | } |
@@ -409,17 +341,18 @@ xfs_log_release_iclog(xfs_mount_t *mp, | |||
409 | * reservation, we prevent over allocation problems. | 341 | * reservation, we prevent over allocation problems. |
410 | */ | 342 | */ |
411 | int | 343 | int |
412 | xfs_log_reserve(xfs_mount_t *mp, | 344 | xfs_log_reserve( |
413 | int unit_bytes, | 345 | struct xfs_mount *mp, |
414 | int cnt, | 346 | int unit_bytes, |
415 | xfs_log_ticket_t *ticket, | 347 | int cnt, |
416 | __uint8_t client, | 348 | struct xlog_ticket **ticket, |
417 | uint flags, | 349 | __uint8_t client, |
418 | uint t_type) | 350 | uint flags, |
351 | uint t_type) | ||
419 | { | 352 | { |
420 | xlog_t *log = mp->m_log; | 353 | struct log *log = mp->m_log; |
421 | xlog_ticket_t *internal_ticket; | 354 | struct xlog_ticket *internal_ticket; |
422 | int retval = 0; | 355 | int retval = 0; |
423 | 356 | ||
424 | ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); | 357 | ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); |
425 | ASSERT((flags & XFS_LOG_NOSLEEP) == 0); | 358 | ASSERT((flags & XFS_LOG_NOSLEEP) == 0); |
@@ -432,7 +365,7 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
432 | 365 | ||
433 | if (*ticket != NULL) { | 366 | if (*ticket != NULL) { |
434 | ASSERT(flags & XFS_LOG_PERM_RESERV); | 367 | ASSERT(flags & XFS_LOG_PERM_RESERV); |
435 | internal_ticket = (xlog_ticket_t *)*ticket; | 368 | internal_ticket = *ticket; |
436 | 369 | ||
437 | trace_xfs_log_reserve(log, internal_ticket); | 370 | trace_xfs_log_reserve(log, internal_ticket); |
438 | 371 | ||
@@ -584,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
584 | xlog_in_core_t *first_iclog; | 517 | xlog_in_core_t *first_iclog; |
585 | #endif | 518 | #endif |
586 | xfs_log_iovec_t reg[1]; | 519 | xfs_log_iovec_t reg[1]; |
587 | xfs_log_ticket_t tic = NULL; | 520 | xlog_ticket_t *tic = NULL; |
588 | xfs_lsn_t lsn; | 521 | xfs_lsn_t lsn; |
589 | int error; | 522 | int error; |
590 | 523 | ||
@@ -602,7 +535,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
602 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 535 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
603 | return 0; | 536 | return 0; |
604 | 537 | ||
605 | error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); | 538 | error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); |
606 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); | 539 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); |
607 | 540 | ||
608 | #ifdef DEBUG | 541 | #ifdef DEBUG |
@@ -618,7 +551,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
618 | if (! (XLOG_FORCED_SHUTDOWN(log))) { | 551 | if (! (XLOG_FORCED_SHUTDOWN(log))) { |
619 | reg[0].i_addr = (void*)&magic; | 552 | reg[0].i_addr = (void*)&magic; |
620 | reg[0].i_len = sizeof(magic); | 553 | reg[0].i_len = sizeof(magic); |
621 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); | 554 | reg[0].i_type = XLOG_REG_TYPE_UNMOUNT; |
622 | 555 | ||
623 | error = xfs_log_reserve(mp, 600, 1, &tic, | 556 | error = xfs_log_reserve(mp, 600, 1, &tic, |
624 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); | 557 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); |
@@ -721,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp) | |||
721 | * transaction occur with one call to xfs_log_write(). | 654 | * transaction occur with one call to xfs_log_write(). |
722 | */ | 655 | */ |
723 | int | 656 | int |
724 | xfs_log_write(xfs_mount_t * mp, | 657 | xfs_log_write( |
725 | xfs_log_iovec_t reg[], | 658 | struct xfs_mount *mp, |
726 | int nentries, | 659 | struct xfs_log_iovec reg[], |
727 | xfs_log_ticket_t tic, | 660 | int nentries, |
728 | xfs_lsn_t *start_lsn) | 661 | struct xlog_ticket *tic, |
662 | xfs_lsn_t *start_lsn) | ||
729 | { | 663 | { |
730 | int error; | 664 | struct log *log = mp->m_log; |
731 | xlog_t *log = mp->m_log; | 665 | int error; |
732 | 666 | ||
733 | if (XLOG_FORCED_SHUTDOWN(log)) | 667 | if (XLOG_FORCED_SHUTDOWN(log)) |
734 | return XFS_ERROR(EIO); | 668 | return XFS_ERROR(EIO); |
735 | 669 | ||
736 | if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { | 670 | error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0); |
671 | if (error) | ||
737 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 672 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
738 | } | ||
739 | return error; | 673 | return error; |
740 | } /* xfs_log_write */ | 674 | } |
741 | |||
742 | 675 | ||
743 | void | 676 | void |
744 | xfs_log_move_tail(xfs_mount_t *mp, | 677 | xfs_log_move_tail(xfs_mount_t *mp, |
@@ -988,35 +921,6 @@ xlog_iodone(xfs_buf_t *bp) | |||
988 | } /* xlog_iodone */ | 921 | } /* xlog_iodone */ |
989 | 922 | ||
990 | /* | 923 | /* |
991 | * The bdstrat callback function for log bufs. This gives us a central | ||
992 | * place to trap bufs in case we get hit by a log I/O error and need to | ||
993 | * shutdown. Actually, in practice, even when we didn't get a log error, | ||
994 | * we transition the iclogs to IOERROR state *after* flushing all existing | ||
995 | * iclogs to disk. This is because we don't want anymore new transactions to be | ||
996 | * started or completed afterwards. | ||
997 | */ | ||
998 | STATIC int | ||
999 | xlog_bdstrat_cb(struct xfs_buf *bp) | ||
1000 | { | ||
1001 | xlog_in_core_t *iclog; | ||
1002 | |||
1003 | iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); | ||
1004 | |||
1005 | if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) { | ||
1006 | /* note for irix bstrat will need struct bdevsw passed | ||
1007 | * Fix the following macro if the code ever is merged | ||
1008 | */ | ||
1009 | XFS_bdstrat(bp); | ||
1010 | return 0; | ||
1011 | } | ||
1012 | |||
1013 | XFS_BUF_ERROR(bp, EIO); | ||
1014 | XFS_BUF_STALE(bp); | ||
1015 | xfs_biodone(bp); | ||
1016 | return XFS_ERROR(EIO); | ||
1017 | } | ||
1018 | |||
1019 | /* | ||
1020 | * Return size of each in-core log record buffer. | 924 | * Return size of each in-core log record buffer. |
1021 | * | 925 | * |
1022 | * All machines get 8 x 32kB buffers by default, unless tuned otherwise. | 926 | * All machines get 8 x 32kB buffers by default, unless tuned otherwise. |
@@ -1158,7 +1062,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1158 | if (!bp) | 1062 | if (!bp) |
1159 | goto out_free_log; | 1063 | goto out_free_log; |
1160 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | 1064 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); |
1161 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1162 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1065 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1163 | ASSERT(XFS_BUF_ISBUSY(bp)); | 1066 | ASSERT(XFS_BUF_ISBUSY(bp)); |
1164 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 1067 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
@@ -1196,7 +1099,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1196 | if (!XFS_BUF_CPSEMA(bp)) | 1099 | if (!XFS_BUF_CPSEMA(bp)) |
1197 | ASSERT(0); | 1100 | ASSERT(0); |
1198 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | 1101 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); |
1199 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1200 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1102 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1201 | iclog->ic_bp = bp; | 1103 | iclog->ic_bp = bp; |
1202 | iclog->ic_data = bp->b_addr; | 1104 | iclog->ic_data = bp->b_addr; |
@@ -1268,7 +1170,7 @@ xlog_commit_record(xfs_mount_t *mp, | |||
1268 | 1170 | ||
1269 | reg[0].i_addr = NULL; | 1171 | reg[0].i_addr = NULL; |
1270 | reg[0].i_len = 0; | 1172 | reg[0].i_len = 0; |
1271 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); | 1173 | reg[0].i_type = XLOG_REG_TYPE_COMMIT; |
1272 | 1174 | ||
1273 | ASSERT_ALWAYS(iclog); | 1175 | ASSERT_ALWAYS(iclog); |
1274 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, | 1176 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, |
@@ -1343,6 +1245,37 @@ xlog_grant_push_ail(xfs_mount_t *mp, | |||
1343 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | 1245 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); |
1344 | } /* xlog_grant_push_ail */ | 1246 | } /* xlog_grant_push_ail */ |
1345 | 1247 | ||
1248 | /* | ||
1249 | * The bdstrat callback function for log bufs. This gives us a central | ||
1250 | * place to trap bufs in case we get hit by a log I/O error and need to | ||
1251 | * shutdown. Actually, in practice, even when we didn't get a log error, | ||
1252 | * we transition the iclogs to IOERROR state *after* flushing all existing | ||
1253 | * iclogs to disk. This is because we don't want anymore new transactions to be | ||
1254 | * started or completed afterwards. | ||
1255 | */ | ||
1256 | STATIC int | ||
1257 | xlog_bdstrat( | ||
1258 | struct xfs_buf *bp) | ||
1259 | { | ||
1260 | struct xlog_in_core *iclog; | ||
1261 | |||
1262 | iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); | ||
1263 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
1264 | XFS_BUF_ERROR(bp, EIO); | ||
1265 | XFS_BUF_STALE(bp); | ||
1266 | xfs_biodone(bp); | ||
1267 | /* | ||
1268 | * It would seem logical to return EIO here, but we rely on | ||
1269 | * the log state machine to propagate I/O errors instead of | ||
1270 | * doing it here. | ||
1271 | */ | ||
1272 | return 0; | ||
1273 | } | ||
1274 | |||
1275 | bp->b_flags |= _XBF_RUN_QUEUES; | ||
1276 | xfs_buf_iorequest(bp); | ||
1277 | return 0; | ||
1278 | } | ||
1346 | 1279 | ||
1347 | /* | 1280 | /* |
1348 | * Flush out the in-core log (iclog) to the on-disk log in an asynchronous | 1281 | * Flush out the in-core log (iclog) to the on-disk log in an asynchronous |
@@ -1462,7 +1395,7 @@ xlog_sync(xlog_t *log, | |||
1462 | */ | 1395 | */ |
1463 | XFS_BUF_WRITE(bp); | 1396 | XFS_BUF_WRITE(bp); |
1464 | 1397 | ||
1465 | if ((error = XFS_bwrite(bp))) { | 1398 | if ((error = xlog_bdstrat(bp))) { |
1466 | xfs_ioerror_alert("xlog_sync", log->l_mp, bp, | 1399 | xfs_ioerror_alert("xlog_sync", log->l_mp, bp, |
1467 | XFS_BUF_ADDR(bp)); | 1400 | XFS_BUF_ADDR(bp)); |
1468 | return error; | 1401 | return error; |
@@ -1502,7 +1435,7 @@ xlog_sync(xlog_t *log, | |||
1502 | /* account for internal log which doesn't start at block #0 */ | 1435 | /* account for internal log which doesn't start at block #0 */ |
1503 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); | 1436 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); |
1504 | XFS_BUF_WRITE(bp); | 1437 | XFS_BUF_WRITE(bp); |
1505 | if ((error = XFS_bwrite(bp))) { | 1438 | if ((error = xlog_bdstrat(bp))) { |
1506 | xfs_ioerror_alert("xlog_sync (split)", log->l_mp, | 1439 | xfs_ioerror_alert("xlog_sync (split)", log->l_mp, |
1507 | bp, XFS_BUF_ADDR(bp)); | 1440 | bp, XFS_BUF_ADDR(bp)); |
1508 | return error; | 1441 | return error; |
@@ -1707,16 +1640,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | |||
1707 | * bytes have been written out. | 1640 | * bytes have been written out. |
1708 | */ | 1641 | */ |
1709 | STATIC int | 1642 | STATIC int |
1710 | xlog_write(xfs_mount_t * mp, | 1643 | xlog_write( |
1711 | xfs_log_iovec_t reg[], | 1644 | struct xfs_mount *mp, |
1712 | int nentries, | 1645 | struct xfs_log_iovec reg[], |
1713 | xfs_log_ticket_t tic, | 1646 | int nentries, |
1714 | xfs_lsn_t *start_lsn, | 1647 | struct xlog_ticket *ticket, |
1715 | xlog_in_core_t **commit_iclog, | 1648 | xfs_lsn_t *start_lsn, |
1716 | uint flags) | 1649 | struct xlog_in_core **commit_iclog, |
1650 | uint flags) | ||
1717 | { | 1651 | { |
1718 | xlog_t *log = mp->m_log; | 1652 | xlog_t *log = mp->m_log; |
1719 | xlog_ticket_t *ticket = (xlog_ticket_t *)tic; | ||
1720 | xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ | 1653 | xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ |
1721 | xlog_op_header_t *logop_head; /* ptr to log operation header */ | 1654 | xlog_op_header_t *logop_head; /* ptr to log operation header */ |
1722 | __psint_t ptr; /* copy address into data region */ | 1655 | __psint_t ptr; /* copy address into data region */ |
@@ -1830,7 +1763,7 @@ xlog_write(xfs_mount_t * mp, | |||
1830 | default: | 1763 | default: |
1831 | xfs_fs_cmn_err(CE_WARN, mp, | 1764 | xfs_fs_cmn_err(CE_WARN, mp, |
1832 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", | 1765 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", |
1833 | logop_head->oh_clientid, tic); | 1766 | logop_head->oh_clientid, ticket); |
1834 | return XFS_ERROR(EIO); | 1767 | return XFS_ERROR(EIO); |
1835 | } | 1768 | } |
1836 | 1769 | ||
@@ -2854,7 +2787,6 @@ xlog_state_switch_iclogs(xlog_t *log, | |||
2854 | log->l_iclog = iclog->ic_next; | 2787 | log->l_iclog = iclog->ic_next; |
2855 | } /* xlog_state_switch_iclogs */ | 2788 | } /* xlog_state_switch_iclogs */ |
2856 | 2789 | ||
2857 | |||
2858 | /* | 2790 | /* |
2859 | * Write out all data in the in-core log as of this exact moment in time. | 2791 | * Write out all data in the in-core log as of this exact moment in time. |
2860 | * | 2792 | * |
@@ -2882,11 +2814,17 @@ xlog_state_switch_iclogs(xlog_t *log, | |||
2882 | * b) when we return from flushing out this iclog, it is still | 2814 | * b) when we return from flushing out this iclog, it is still |
2883 | * not in the active nor dirty state. | 2815 | * not in the active nor dirty state. |
2884 | */ | 2816 | */ |
2885 | STATIC int | 2817 | int |
2886 | xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | 2818 | _xfs_log_force( |
2819 | struct xfs_mount *mp, | ||
2820 | uint flags, | ||
2821 | int *log_flushed) | ||
2887 | { | 2822 | { |
2888 | xlog_in_core_t *iclog; | 2823 | struct log *log = mp->m_log; |
2889 | xfs_lsn_t lsn; | 2824 | struct xlog_in_core *iclog; |
2825 | xfs_lsn_t lsn; | ||
2826 | |||
2827 | XFS_STATS_INC(xs_log_force); | ||
2890 | 2828 | ||
2891 | spin_lock(&log->l_icloglock); | 2829 | spin_lock(&log->l_icloglock); |
2892 | 2830 | ||
@@ -2932,7 +2870,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
2932 | 2870 | ||
2933 | if (xlog_state_release_iclog(log, iclog)) | 2871 | if (xlog_state_release_iclog(log, iclog)) |
2934 | return XFS_ERROR(EIO); | 2872 | return XFS_ERROR(EIO); |
2935 | *log_flushed = 1; | 2873 | |
2874 | if (log_flushed) | ||
2875 | *log_flushed = 1; | ||
2936 | spin_lock(&log->l_icloglock); | 2876 | spin_lock(&log->l_icloglock); |
2937 | if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && | 2877 | if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && |
2938 | iclog->ic_state != XLOG_STATE_DIRTY) | 2878 | iclog->ic_state != XLOG_STATE_DIRTY) |
@@ -2976,19 +2916,37 @@ maybe_sleep: | |||
2976 | */ | 2916 | */ |
2977 | if (iclog->ic_state & XLOG_STATE_IOERROR) | 2917 | if (iclog->ic_state & XLOG_STATE_IOERROR) |
2978 | return XFS_ERROR(EIO); | 2918 | return XFS_ERROR(EIO); |
2979 | *log_flushed = 1; | 2919 | if (log_flushed) |
2980 | 2920 | *log_flushed = 1; | |
2981 | } else { | 2921 | } else { |
2982 | 2922 | ||
2983 | no_sleep: | 2923 | no_sleep: |
2984 | spin_unlock(&log->l_icloglock); | 2924 | spin_unlock(&log->l_icloglock); |
2985 | } | 2925 | } |
2986 | return 0; | 2926 | return 0; |
2987 | } /* xlog_state_sync_all */ | 2927 | } |
2988 | 2928 | ||
2929 | /* | ||
2930 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
2931 | * about errors or whether the log was flushed or not. This is the normal | ||
2932 | * interface to use when trying to unpin items or move the log forward. | ||
2933 | */ | ||
2934 | void | ||
2935 | xfs_log_force( | ||
2936 | xfs_mount_t *mp, | ||
2937 | uint flags) | ||
2938 | { | ||
2939 | int error; | ||
2940 | |||
2941 | error = _xfs_log_force(mp, flags, NULL); | ||
2942 | if (error) { | ||
2943 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
2944 | "error %d returned.", error); | ||
2945 | } | ||
2946 | } | ||
2989 | 2947 | ||
2990 | /* | 2948 | /* |
2991 | * Used by code which implements synchronous log forces. | 2949 | * Force the in-core log to disk for a specific LSN. |
2992 | * | 2950 | * |
2993 | * Find in-core log with lsn. | 2951 | * Find in-core log with lsn. |
2994 | * If it is in the DIRTY state, just return. | 2952 | * If it is in the DIRTY state, just return. |
@@ -2996,109 +2954,142 @@ no_sleep: | |||
2996 | * state and go to sleep or return. | 2954 | * state and go to sleep or return. |
2997 | * If it is in any other state, go to sleep or return. | 2955 | * If it is in any other state, go to sleep or return. |
2998 | * | 2956 | * |
2999 | * If filesystem activity goes to zero, the iclog will get flushed only by | 2957 | * Synchronous forces are implemented with a signal variable. All callers |
3000 | * bdflush(). | 2958 | * to force a given lsn to disk will wait on a the sv attached to the |
2959 | * specific in-core log. When given in-core log finally completes its | ||
2960 | * write to disk, that thread will wake up all threads waiting on the | ||
2961 | * sv. | ||
3001 | */ | 2962 | */ |
3002 | STATIC int | 2963 | int |
3003 | xlog_state_sync(xlog_t *log, | 2964 | _xfs_log_force_lsn( |
3004 | xfs_lsn_t lsn, | 2965 | struct xfs_mount *mp, |
3005 | uint flags, | 2966 | xfs_lsn_t lsn, |
3006 | int *log_flushed) | 2967 | uint flags, |
2968 | int *log_flushed) | ||
3007 | { | 2969 | { |
3008 | xlog_in_core_t *iclog; | 2970 | struct log *log = mp->m_log; |
3009 | int already_slept = 0; | 2971 | struct xlog_in_core *iclog; |
3010 | 2972 | int already_slept = 0; | |
3011 | try_again: | ||
3012 | spin_lock(&log->l_icloglock); | ||
3013 | iclog = log->l_iclog; | ||
3014 | 2973 | ||
3015 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 2974 | ASSERT(lsn != 0); |
3016 | spin_unlock(&log->l_icloglock); | ||
3017 | return XFS_ERROR(EIO); | ||
3018 | } | ||
3019 | 2975 | ||
3020 | do { | 2976 | XFS_STATS_INC(xs_log_force); |
3021 | if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { | ||
3022 | iclog = iclog->ic_next; | ||
3023 | continue; | ||
3024 | } | ||
3025 | 2977 | ||
3026 | if (iclog->ic_state == XLOG_STATE_DIRTY) { | 2978 | try_again: |
2979 | spin_lock(&log->l_icloglock); | ||
2980 | iclog = log->l_iclog; | ||
2981 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3027 | spin_unlock(&log->l_icloglock); | 2982 | spin_unlock(&log->l_icloglock); |
3028 | return 0; | 2983 | return XFS_ERROR(EIO); |
3029 | } | 2984 | } |
3030 | 2985 | ||
3031 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { | 2986 | do { |
3032 | /* | 2987 | if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { |
3033 | * We sleep here if we haven't already slept (e.g. | 2988 | iclog = iclog->ic_next; |
3034 | * this is the first time we've looked at the correct | 2989 | continue; |
3035 | * iclog buf) and the buffer before us is going to | 2990 | } |
3036 | * be sync'ed. The reason for this is that if we | 2991 | |
3037 | * are doing sync transactions here, by waiting for | 2992 | if (iclog->ic_state == XLOG_STATE_DIRTY) { |
3038 | * the previous I/O to complete, we can allow a few | 2993 | spin_unlock(&log->l_icloglock); |
3039 | * more transactions into this iclog before we close | 2994 | return 0; |
3040 | * it down. | 2995 | } |
3041 | * | 2996 | |
3042 | * Otherwise, we mark the buffer WANT_SYNC, and bump | 2997 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { |
3043 | * up the refcnt so we can release the log (which drops | 2998 | /* |
3044 | * the ref count). The state switch keeps new transaction | 2999 | * We sleep here if we haven't already slept (e.g. |
3045 | * commits from using this buffer. When the current commits | 3000 | * this is the first time we've looked at the correct |
3046 | * finish writing into the buffer, the refcount will drop to | 3001 | * iclog buf) and the buffer before us is going to |
3047 | * zero and the buffer will go out then. | 3002 | * be sync'ed. The reason for this is that if we |
3048 | */ | 3003 | * are doing sync transactions here, by waiting for |
3049 | if (!already_slept && | 3004 | * the previous I/O to complete, we can allow a few |
3050 | (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | | 3005 | * more transactions into this iclog before we close |
3051 | XLOG_STATE_SYNCING))) { | 3006 | * it down. |
3052 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); | 3007 | * |
3053 | XFS_STATS_INC(xs_log_force_sleep); | 3008 | * Otherwise, we mark the buffer WANT_SYNC, and bump |
3054 | sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, | 3009 | * up the refcnt so we can release the log (which |
3055 | &log->l_icloglock, s); | 3010 | * drops the ref count). The state switch keeps new |
3056 | *log_flushed = 1; | 3011 | * transaction commits from using this buffer. When |
3057 | already_slept = 1; | 3012 | * the current commits finish writing into the buffer, |
3058 | goto try_again; | 3013 | * the refcount will drop to zero and the buffer will |
3059 | } else { | 3014 | * go out then. |
3015 | */ | ||
3016 | if (!already_slept && | ||
3017 | (iclog->ic_prev->ic_state & | ||
3018 | (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { | ||
3019 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); | ||
3020 | |||
3021 | XFS_STATS_INC(xs_log_force_sleep); | ||
3022 | |||
3023 | sv_wait(&iclog->ic_prev->ic_write_wait, | ||
3024 | PSWP, &log->l_icloglock, s); | ||
3025 | if (log_flushed) | ||
3026 | *log_flushed = 1; | ||
3027 | already_slept = 1; | ||
3028 | goto try_again; | ||
3029 | } | ||
3060 | atomic_inc(&iclog->ic_refcnt); | 3030 | atomic_inc(&iclog->ic_refcnt); |
3061 | xlog_state_switch_iclogs(log, iclog, 0); | 3031 | xlog_state_switch_iclogs(log, iclog, 0); |
3062 | spin_unlock(&log->l_icloglock); | 3032 | spin_unlock(&log->l_icloglock); |
3063 | if (xlog_state_release_iclog(log, iclog)) | 3033 | if (xlog_state_release_iclog(log, iclog)) |
3064 | return XFS_ERROR(EIO); | 3034 | return XFS_ERROR(EIO); |
3065 | *log_flushed = 1; | 3035 | if (log_flushed) |
3036 | *log_flushed = 1; | ||
3066 | spin_lock(&log->l_icloglock); | 3037 | spin_lock(&log->l_icloglock); |
3067 | } | 3038 | } |
3068 | } | ||
3069 | 3039 | ||
3070 | if ((flags & XFS_LOG_SYNC) && /* sleep */ | 3040 | if ((flags & XFS_LOG_SYNC) && /* sleep */ |
3071 | !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { | 3041 | !(iclog->ic_state & |
3042 | (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { | ||
3043 | /* | ||
3044 | * Don't wait on completion if we know that we've | ||
3045 | * gotten a log write error. | ||
3046 | */ | ||
3047 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3048 | spin_unlock(&log->l_icloglock); | ||
3049 | return XFS_ERROR(EIO); | ||
3050 | } | ||
3051 | XFS_STATS_INC(xs_log_force_sleep); | ||
3052 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | ||
3053 | /* | ||
3054 | * No need to grab the log lock here since we're | ||
3055 | * only deciding whether or not to return EIO | ||
3056 | * and the memory read should be atomic. | ||
3057 | */ | ||
3058 | if (iclog->ic_state & XLOG_STATE_IOERROR) | ||
3059 | return XFS_ERROR(EIO); | ||
3072 | 3060 | ||
3073 | /* | 3061 | if (log_flushed) |
3074 | * Don't wait on completion if we know that we've | 3062 | *log_flushed = 1; |
3075 | * gotten a log write error. | 3063 | } else { /* just return */ |
3076 | */ | ||
3077 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3078 | spin_unlock(&log->l_icloglock); | 3064 | spin_unlock(&log->l_icloglock); |
3079 | return XFS_ERROR(EIO); | ||
3080 | } | 3065 | } |
3081 | XFS_STATS_INC(xs_log_force_sleep); | ||
3082 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | ||
3083 | /* | ||
3084 | * No need to grab the log lock here since we're | ||
3085 | * only deciding whether or not to return EIO | ||
3086 | * and the memory read should be atomic. | ||
3087 | */ | ||
3088 | if (iclog->ic_state & XLOG_STATE_IOERROR) | ||
3089 | return XFS_ERROR(EIO); | ||
3090 | *log_flushed = 1; | ||
3091 | } else { /* just return */ | ||
3092 | spin_unlock(&log->l_icloglock); | ||
3093 | } | ||
3094 | return 0; | ||
3095 | 3066 | ||
3096 | } while (iclog != log->l_iclog); | 3067 | return 0; |
3068 | } while (iclog != log->l_iclog); | ||
3097 | 3069 | ||
3098 | spin_unlock(&log->l_icloglock); | 3070 | spin_unlock(&log->l_icloglock); |
3099 | return 0; | 3071 | return 0; |
3100 | } /* xlog_state_sync */ | 3072 | } |
3073 | |||
3074 | /* | ||
3075 | * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care | ||
3076 | * about errors or whether the log was flushed or not. This is the normal | ||
3077 | * interface to use when trying to unpin items or move the log forward. | ||
3078 | */ | ||
3079 | void | ||
3080 | xfs_log_force_lsn( | ||
3081 | xfs_mount_t *mp, | ||
3082 | xfs_lsn_t lsn, | ||
3083 | uint flags) | ||
3084 | { | ||
3085 | int error; | ||
3101 | 3086 | ||
3087 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); | ||
3088 | if (error) { | ||
3089 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
3090 | "error %d returned.", error); | ||
3091 | } | ||
3092 | } | ||
3102 | 3093 | ||
3103 | /* | 3094 | /* |
3104 | * Called when we want to mark the current iclog as being ready to sync to | 3095 | * Called when we want to mark the current iclog as being ready to sync to |
@@ -3463,7 +3454,6 @@ xfs_log_force_umount( | |||
3463 | xlog_ticket_t *tic; | 3454 | xlog_ticket_t *tic; |
3464 | xlog_t *log; | 3455 | xlog_t *log; |
3465 | int retval; | 3456 | int retval; |
3466 | int dummy; | ||
3467 | 3457 | ||
3468 | log = mp->m_log; | 3458 | log = mp->m_log; |
3469 | 3459 | ||
@@ -3537,13 +3527,14 @@ xfs_log_force_umount( | |||
3537 | } | 3527 | } |
3538 | spin_unlock(&log->l_grant_lock); | 3528 | spin_unlock(&log->l_grant_lock); |
3539 | 3529 | ||
3540 | if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3530 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
3541 | ASSERT(!logerror); | 3531 | ASSERT(!logerror); |
3542 | /* | 3532 | /* |
3543 | * Force the incore logs to disk before shutting the | 3533 | * Force the incore logs to disk before shutting the |
3544 | * log down completely. | 3534 | * log down completely. |
3545 | */ | 3535 | */ |
3546 | xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); | 3536 | _xfs_log_force(mp, XFS_LOG_SYNC, NULL); |
3537 | |||
3547 | spin_lock(&log->l_icloglock); | 3538 | spin_lock(&log->l_icloglock); |
3548 | retval = xlog_state_ioerror(log); | 3539 | retval = xlog_state_ioerror(log); |
3549 | spin_unlock(&log->l_icloglock); | 3540 | spin_unlock(&log->l_icloglock); |