diff options
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r-- | fs/xfs/xfs_log.c | 525 |
1 files changed, 265 insertions, 260 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 600b5b06aaeb..2be019136287 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone; | |||
50 | (off) += (bytes);} | 50 | (off) += (bytes);} |
51 | 51 | ||
52 | /* Local miscellaneous function prototypes */ | 52 | /* Local miscellaneous function prototypes */ |
53 | STATIC int xlog_bdstrat_cb(struct xfs_buf *); | ||
54 | STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, | 53 | STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, |
55 | xlog_in_core_t **, xfs_lsn_t *); | 54 | xlog_in_core_t **, xfs_lsn_t *); |
56 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | 55 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, |
@@ -61,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | |||
61 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 60 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
62 | STATIC void xlog_dealloc_log(xlog_t *log); | 61 | STATIC void xlog_dealloc_log(xlog_t *log); |
63 | STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], | 62 | STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], |
64 | int nentries, xfs_log_ticket_t tic, | 63 | int nentries, struct xlog_ticket *tic, |
65 | xfs_lsn_t *start_lsn, | 64 | xfs_lsn_t *start_lsn, |
66 | xlog_in_core_t **commit_iclog, | 65 | xlog_in_core_t **commit_iclog, |
67 | uint flags); | 66 | uint flags); |
@@ -80,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log, | |||
80 | STATIC void xlog_state_switch_iclogs(xlog_t *log, | 79 | STATIC void xlog_state_switch_iclogs(xlog_t *log, |
81 | xlog_in_core_t *iclog, | 80 | xlog_in_core_t *iclog, |
82 | int eventual_size); | 81 | int eventual_size); |
83 | STATIC int xlog_state_sync(xlog_t *log, | ||
84 | xfs_lsn_t lsn, | ||
85 | uint flags, | ||
86 | int *log_flushed); | ||
87 | STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed); | ||
88 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | 82 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); |
89 | 83 | ||
90 | /* local functions to manipulate grant head */ | 84 | /* local functions to manipulate grant head */ |
@@ -249,14 +243,14 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) | |||
249 | * out when the next write occurs. | 243 | * out when the next write occurs. |
250 | */ | 244 | */ |
251 | xfs_lsn_t | 245 | xfs_lsn_t |
252 | xfs_log_done(xfs_mount_t *mp, | 246 | xfs_log_done( |
253 | xfs_log_ticket_t xtic, | 247 | struct xfs_mount *mp, |
254 | void **iclog, | 248 | struct xlog_ticket *ticket, |
255 | uint flags) | 249 | struct xlog_in_core **iclog, |
250 | uint flags) | ||
256 | { | 251 | { |
257 | xlog_t *log = mp->m_log; | 252 | struct log *log = mp->m_log; |
258 | xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic; | 253 | xfs_lsn_t lsn = 0; |
259 | xfs_lsn_t lsn = 0; | ||
260 | 254 | ||
261 | if (XLOG_FORCED_SHUTDOWN(log) || | 255 | if (XLOG_FORCED_SHUTDOWN(log) || |
262 | /* | 256 | /* |
@@ -264,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp, | |||
264 | * If we get an error, just continue and give back the log ticket. | 258 | * If we get an error, just continue and give back the log ticket. |
265 | */ | 259 | */ |
266 | (((ticket->t_flags & XLOG_TIC_INITED) == 0) && | 260 | (((ticket->t_flags & XLOG_TIC_INITED) == 0) && |
267 | (xlog_commit_record(mp, ticket, | 261 | (xlog_commit_record(mp, ticket, iclog, &lsn)))) { |
268 | (xlog_in_core_t **)iclog, &lsn)))) { | ||
269 | lsn = (xfs_lsn_t) -1; | 262 | lsn = (xfs_lsn_t) -1; |
270 | if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { | 263 | if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { |
271 | flags |= XFS_LOG_REL_PERM_RESERV; | 264 | flags |= XFS_LOG_REL_PERM_RESERV; |
@@ -295,67 +288,8 @@ xfs_log_done(xfs_mount_t *mp, | |||
295 | } | 288 | } |
296 | 289 | ||
297 | return lsn; | 290 | return lsn; |
298 | } /* xfs_log_done */ | ||
299 | |||
300 | |||
301 | /* | ||
302 | * Force the in-core log to disk. If flags == XFS_LOG_SYNC, | ||
303 | * the force is done synchronously. | ||
304 | * | ||
305 | * Asynchronous forces are implemented by setting the WANT_SYNC | ||
306 | * bit in the appropriate in-core log and then returning. | ||
307 | * | ||
308 | * Synchronous forces are implemented with a signal variable. All callers | ||
309 | * to force a given lsn to disk will wait on a the sv attached to the | ||
310 | * specific in-core log. When given in-core log finally completes its | ||
311 | * write to disk, that thread will wake up all threads waiting on the | ||
312 | * sv. | ||
313 | */ | ||
314 | int | ||
315 | _xfs_log_force( | ||
316 | xfs_mount_t *mp, | ||
317 | xfs_lsn_t lsn, | ||
318 | uint flags, | ||
319 | int *log_flushed) | ||
320 | { | ||
321 | xlog_t *log = mp->m_log; | ||
322 | int dummy; | ||
323 | |||
324 | if (!log_flushed) | ||
325 | log_flushed = &dummy; | ||
326 | |||
327 | ASSERT(flags & XFS_LOG_FORCE); | ||
328 | |||
329 | XFS_STATS_INC(xs_log_force); | ||
330 | |||
331 | if (log->l_flags & XLOG_IO_ERROR) | ||
332 | return XFS_ERROR(EIO); | ||
333 | if (lsn == 0) | ||
334 | return xlog_state_sync_all(log, flags, log_flushed); | ||
335 | else | ||
336 | return xlog_state_sync(log, lsn, flags, log_flushed); | ||
337 | } /* _xfs_log_force */ | ||
338 | |||
339 | /* | ||
340 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
341 | * about errors or whether the log was flushed or not. This is the normal | ||
342 | * interface to use when trying to unpin items or move the log forward. | ||
343 | */ | ||
344 | void | ||
345 | xfs_log_force( | ||
346 | xfs_mount_t *mp, | ||
347 | xfs_lsn_t lsn, | ||
348 | uint flags) | ||
349 | { | ||
350 | int error; | ||
351 | error = _xfs_log_force(mp, lsn, flags, NULL); | ||
352 | if (error) { | ||
353 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
354 | "error %d returned.", error); | ||
355 | } | ||
356 | } | 291 | } |
357 | 292 | ||
358 | |||
359 | /* | 293 | /* |
360 | * Attaches a new iclog I/O completion callback routine during | 294 | * Attaches a new iclog I/O completion callback routine during |
361 | * transaction commit. If the log is in error state, a non-zero | 295 | * transaction commit. If the log is in error state, a non-zero |
@@ -363,11 +297,11 @@ xfs_log_force( | |||
363 | * executing the callback at an appropriate time. | 297 | * executing the callback at an appropriate time. |
364 | */ | 298 | */ |
365 | int | 299 | int |
366 | xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | 300 | xfs_log_notify( |
367 | void *iclog_hndl, /* iclog to hang callback off */ | 301 | struct xfs_mount *mp, |
368 | xfs_log_callback_t *cb) | 302 | struct xlog_in_core *iclog, |
303 | xfs_log_callback_t *cb) | ||
369 | { | 304 | { |
370 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; | ||
371 | int abortflg; | 305 | int abortflg; |
372 | 306 | ||
373 | spin_lock(&iclog->ic_callback_lock); | 307 | spin_lock(&iclog->ic_callback_lock); |
@@ -381,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
381 | } | 315 | } |
382 | spin_unlock(&iclog->ic_callback_lock); | 316 | spin_unlock(&iclog->ic_callback_lock); |
383 | return abortflg; | 317 | return abortflg; |
384 | } /* xfs_log_notify */ | 318 | } |
385 | 319 | ||
386 | int | 320 | int |
387 | xfs_log_release_iclog(xfs_mount_t *mp, | 321 | xfs_log_release_iclog( |
388 | void *iclog_hndl) | 322 | struct xfs_mount *mp, |
323 | struct xlog_in_core *iclog) | ||
389 | { | 324 | { |
390 | xlog_t *log = mp->m_log; | 325 | if (xlog_state_release_iclog(mp->m_log, iclog)) { |
391 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; | ||
392 | |||
393 | if (xlog_state_release_iclog(log, iclog)) { | ||
394 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 326 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
395 | return EIO; | 327 | return EIO; |
396 | } | 328 | } |
@@ -409,17 +341,18 @@ xfs_log_release_iclog(xfs_mount_t *mp, | |||
409 | * reservation, we prevent over allocation problems. | 341 | * reservation, we prevent over allocation problems. |
410 | */ | 342 | */ |
411 | int | 343 | int |
412 | xfs_log_reserve(xfs_mount_t *mp, | 344 | xfs_log_reserve( |
413 | int unit_bytes, | 345 | struct xfs_mount *mp, |
414 | int cnt, | 346 | int unit_bytes, |
415 | xfs_log_ticket_t *ticket, | 347 | int cnt, |
416 | __uint8_t client, | 348 | struct xlog_ticket **ticket, |
417 | uint flags, | 349 | __uint8_t client, |
418 | uint t_type) | 350 | uint flags, |
351 | uint t_type) | ||
419 | { | 352 | { |
420 | xlog_t *log = mp->m_log; | 353 | struct log *log = mp->m_log; |
421 | xlog_ticket_t *internal_ticket; | 354 | struct xlog_ticket *internal_ticket; |
422 | int retval = 0; | 355 | int retval = 0; |
423 | 356 | ||
424 | ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); | 357 | ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); |
425 | ASSERT((flags & XFS_LOG_NOSLEEP) == 0); | 358 | ASSERT((flags & XFS_LOG_NOSLEEP) == 0); |
@@ -432,7 +365,7 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
432 | 365 | ||
433 | if (*ticket != NULL) { | 366 | if (*ticket != NULL) { |
434 | ASSERT(flags & XFS_LOG_PERM_RESERV); | 367 | ASSERT(flags & XFS_LOG_PERM_RESERV); |
435 | internal_ticket = (xlog_ticket_t *)*ticket; | 368 | internal_ticket = *ticket; |
436 | 369 | ||
437 | trace_xfs_log_reserve(log, internal_ticket); | 370 | trace_xfs_log_reserve(log, internal_ticket); |
438 | 371 | ||
@@ -584,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
584 | xlog_in_core_t *first_iclog; | 517 | xlog_in_core_t *first_iclog; |
585 | #endif | 518 | #endif |
586 | xfs_log_iovec_t reg[1]; | 519 | xfs_log_iovec_t reg[1]; |
587 | xfs_log_ticket_t tic = NULL; | 520 | xlog_ticket_t *tic = NULL; |
588 | xfs_lsn_t lsn; | 521 | xfs_lsn_t lsn; |
589 | int error; | 522 | int error; |
590 | 523 | ||
@@ -602,7 +535,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
602 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 535 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
603 | return 0; | 536 | return 0; |
604 | 537 | ||
605 | error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); | 538 | error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); |
606 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); | 539 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); |
607 | 540 | ||
608 | #ifdef DEBUG | 541 | #ifdef DEBUG |
@@ -618,7 +551,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
618 | if (! (XLOG_FORCED_SHUTDOWN(log))) { | 551 | if (! (XLOG_FORCED_SHUTDOWN(log))) { |
619 | reg[0].i_addr = (void*)&magic; | 552 | reg[0].i_addr = (void*)&magic; |
620 | reg[0].i_len = sizeof(magic); | 553 | reg[0].i_len = sizeof(magic); |
621 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); | 554 | reg[0].i_type = XLOG_REG_TYPE_UNMOUNT; |
622 | 555 | ||
623 | error = xfs_log_reserve(mp, 600, 1, &tic, | 556 | error = xfs_log_reserve(mp, 600, 1, &tic, |
624 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); | 557 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); |
@@ -721,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp) | |||
721 | * transaction occur with one call to xfs_log_write(). | 654 | * transaction occur with one call to xfs_log_write(). |
722 | */ | 655 | */ |
723 | int | 656 | int |
724 | xfs_log_write(xfs_mount_t * mp, | 657 | xfs_log_write( |
725 | xfs_log_iovec_t reg[], | 658 | struct xfs_mount *mp, |
726 | int nentries, | 659 | struct xfs_log_iovec reg[], |
727 | xfs_log_ticket_t tic, | 660 | int nentries, |
728 | xfs_lsn_t *start_lsn) | 661 | struct xlog_ticket *tic, |
662 | xfs_lsn_t *start_lsn) | ||
729 | { | 663 | { |
730 | int error; | 664 | struct log *log = mp->m_log; |
731 | xlog_t *log = mp->m_log; | 665 | int error; |
732 | 666 | ||
733 | if (XLOG_FORCED_SHUTDOWN(log)) | 667 | if (XLOG_FORCED_SHUTDOWN(log)) |
734 | return XFS_ERROR(EIO); | 668 | return XFS_ERROR(EIO); |
735 | 669 | ||
736 | if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { | 670 | error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0); |
671 | if (error) | ||
737 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 672 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
738 | } | ||
739 | return error; | 673 | return error; |
740 | } /* xfs_log_write */ | 674 | } |
741 | |||
742 | 675 | ||
743 | void | 676 | void |
744 | xfs_log_move_tail(xfs_mount_t *mp, | 677 | xfs_log_move_tail(xfs_mount_t *mp, |
@@ -812,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
812 | 745 | ||
813 | /* | 746 | /* |
814 | * Determine if we have a transaction that has gone to disk | 747 | * Determine if we have a transaction that has gone to disk |
815 | * that needs to be covered. Log activity needs to be idle (no AIL and | 748 | * that needs to be covered. To begin the transition to the idle state |
816 | * nothing in the iclogs). And, we need to be in the right state indicating | 749 | * firstly the log needs to be idle (no AIL and nothing in the iclogs). |
817 | * something has gone out. | 750 | * If we are then in a state where covering is needed, the caller is informed |
751 | * that dummy transactions are required to move the log into the idle state. | ||
752 | * | ||
753 | * Because this is called as part of the sync process, we should also indicate | ||
754 | * that dummy transactions should be issued in anything but the covered or | ||
755 | * idle states. This ensures that the log tail is accurately reflected in | ||
756 | * the log at the end of the sync, hence if a crash occurrs avoids replay | ||
757 | * of transactions where the metadata is already on disk. | ||
818 | */ | 758 | */ |
819 | int | 759 | int |
820 | xfs_log_need_covered(xfs_mount_t *mp) | 760 | xfs_log_need_covered(xfs_mount_t *mp) |
@@ -826,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
826 | return 0; | 766 | return 0; |
827 | 767 | ||
828 | spin_lock(&log->l_icloglock); | 768 | spin_lock(&log->l_icloglock); |
829 | if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || | 769 | switch (log->l_covered_state) { |
830 | (log->l_covered_state == XLOG_STATE_COVER_NEED2)) | 770 | case XLOG_STATE_COVER_DONE: |
831 | && !xfs_trans_ail_tail(log->l_ailp) | 771 | case XLOG_STATE_COVER_DONE2: |
832 | && xlog_iclogs_empty(log)) { | 772 | case XLOG_STATE_COVER_IDLE: |
833 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | 773 | break; |
834 | log->l_covered_state = XLOG_STATE_COVER_DONE; | 774 | case XLOG_STATE_COVER_NEED: |
835 | else { | 775 | case XLOG_STATE_COVER_NEED2: |
836 | ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); | 776 | if (!xfs_trans_ail_tail(log->l_ailp) && |
837 | log->l_covered_state = XLOG_STATE_COVER_DONE2; | 777 | xlog_iclogs_empty(log)) { |
778 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | ||
779 | log->l_covered_state = XLOG_STATE_COVER_DONE; | ||
780 | else | ||
781 | log->l_covered_state = XLOG_STATE_COVER_DONE2; | ||
838 | } | 782 | } |
783 | /* FALLTHRU */ | ||
784 | default: | ||
839 | needed = 1; | 785 | needed = 1; |
786 | break; | ||
840 | } | 787 | } |
841 | spin_unlock(&log->l_icloglock); | 788 | spin_unlock(&log->l_icloglock); |
842 | return needed; | 789 | return needed; |
@@ -988,35 +935,6 @@ xlog_iodone(xfs_buf_t *bp) | |||
988 | } /* xlog_iodone */ | 935 | } /* xlog_iodone */ |
989 | 936 | ||
990 | /* | 937 | /* |
991 | * The bdstrat callback function for log bufs. This gives us a central | ||
992 | * place to trap bufs in case we get hit by a log I/O error and need to | ||
993 | * shutdown. Actually, in practice, even when we didn't get a log error, | ||
994 | * we transition the iclogs to IOERROR state *after* flushing all existing | ||
995 | * iclogs to disk. This is because we don't want anymore new transactions to be | ||
996 | * started or completed afterwards. | ||
997 | */ | ||
998 | STATIC int | ||
999 | xlog_bdstrat_cb(struct xfs_buf *bp) | ||
1000 | { | ||
1001 | xlog_in_core_t *iclog; | ||
1002 | |||
1003 | iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); | ||
1004 | |||
1005 | if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) { | ||
1006 | /* note for irix bstrat will need struct bdevsw passed | ||
1007 | * Fix the following macro if the code ever is merged | ||
1008 | */ | ||
1009 | XFS_bdstrat(bp); | ||
1010 | return 0; | ||
1011 | } | ||
1012 | |||
1013 | XFS_BUF_ERROR(bp, EIO); | ||
1014 | XFS_BUF_STALE(bp); | ||
1015 | xfs_biodone(bp); | ||
1016 | return XFS_ERROR(EIO); | ||
1017 | } | ||
1018 | |||
1019 | /* | ||
1020 | * Return size of each in-core log record buffer. | 938 | * Return size of each in-core log record buffer. |
1021 | * | 939 | * |
1022 | * All machines get 8 x 32kB buffers by default, unless tuned otherwise. | 940 | * All machines get 8 x 32kB buffers by default, unless tuned otherwise. |
@@ -1158,7 +1076,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1158 | if (!bp) | 1076 | if (!bp) |
1159 | goto out_free_log; | 1077 | goto out_free_log; |
1160 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | 1078 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); |
1161 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1162 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1079 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1163 | ASSERT(XFS_BUF_ISBUSY(bp)); | 1080 | ASSERT(XFS_BUF_ISBUSY(bp)); |
1164 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 1081 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
@@ -1196,7 +1113,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1196 | if (!XFS_BUF_CPSEMA(bp)) | 1113 | if (!XFS_BUF_CPSEMA(bp)) |
1197 | ASSERT(0); | 1114 | ASSERT(0); |
1198 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | 1115 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); |
1199 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1200 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1116 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1201 | iclog->ic_bp = bp; | 1117 | iclog->ic_bp = bp; |
1202 | iclog->ic_data = bp->b_addr; | 1118 | iclog->ic_data = bp->b_addr; |
@@ -1268,7 +1184,7 @@ xlog_commit_record(xfs_mount_t *mp, | |||
1268 | 1184 | ||
1269 | reg[0].i_addr = NULL; | 1185 | reg[0].i_addr = NULL; |
1270 | reg[0].i_len = 0; | 1186 | reg[0].i_len = 0; |
1271 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); | 1187 | reg[0].i_type = XLOG_REG_TYPE_COMMIT; |
1272 | 1188 | ||
1273 | ASSERT_ALWAYS(iclog); | 1189 | ASSERT_ALWAYS(iclog); |
1274 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, | 1190 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, |
@@ -1343,6 +1259,37 @@ xlog_grant_push_ail(xfs_mount_t *mp, | |||
1343 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | 1259 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); |
1344 | } /* xlog_grant_push_ail */ | 1260 | } /* xlog_grant_push_ail */ |
1345 | 1261 | ||
1262 | /* | ||
1263 | * The bdstrat callback function for log bufs. This gives us a central | ||
1264 | * place to trap bufs in case we get hit by a log I/O error and need to | ||
1265 | * shutdown. Actually, in practice, even when we didn't get a log error, | ||
1266 | * we transition the iclogs to IOERROR state *after* flushing all existing | ||
1267 | * iclogs to disk. This is because we don't want anymore new transactions to be | ||
1268 | * started or completed afterwards. | ||
1269 | */ | ||
1270 | STATIC int | ||
1271 | xlog_bdstrat( | ||
1272 | struct xfs_buf *bp) | ||
1273 | { | ||
1274 | struct xlog_in_core *iclog; | ||
1275 | |||
1276 | iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); | ||
1277 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
1278 | XFS_BUF_ERROR(bp, EIO); | ||
1279 | XFS_BUF_STALE(bp); | ||
1280 | xfs_biodone(bp); | ||
1281 | /* | ||
1282 | * It would seem logical to return EIO here, but we rely on | ||
1283 | * the log state machine to propagate I/O errors instead of | ||
1284 | * doing it here. | ||
1285 | */ | ||
1286 | return 0; | ||
1287 | } | ||
1288 | |||
1289 | bp->b_flags |= _XBF_RUN_QUEUES; | ||
1290 | xfs_buf_iorequest(bp); | ||
1291 | return 0; | ||
1292 | } | ||
1346 | 1293 | ||
1347 | /* | 1294 | /* |
1348 | * Flush out the in-core log (iclog) to the on-disk log in an asynchronous | 1295 | * Flush out the in-core log (iclog) to the on-disk log in an asynchronous |
@@ -1462,7 +1409,7 @@ xlog_sync(xlog_t *log, | |||
1462 | */ | 1409 | */ |
1463 | XFS_BUF_WRITE(bp); | 1410 | XFS_BUF_WRITE(bp); |
1464 | 1411 | ||
1465 | if ((error = XFS_bwrite(bp))) { | 1412 | if ((error = xlog_bdstrat(bp))) { |
1466 | xfs_ioerror_alert("xlog_sync", log->l_mp, bp, | 1413 | xfs_ioerror_alert("xlog_sync", log->l_mp, bp, |
1467 | XFS_BUF_ADDR(bp)); | 1414 | XFS_BUF_ADDR(bp)); |
1468 | return error; | 1415 | return error; |
@@ -1502,7 +1449,7 @@ xlog_sync(xlog_t *log, | |||
1502 | /* account for internal log which doesn't start at block #0 */ | 1449 | /* account for internal log which doesn't start at block #0 */ |
1503 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); | 1450 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); |
1504 | XFS_BUF_WRITE(bp); | 1451 | XFS_BUF_WRITE(bp); |
1505 | if ((error = XFS_bwrite(bp))) { | 1452 | if ((error = xlog_bdstrat(bp))) { |
1506 | xfs_ioerror_alert("xlog_sync (split)", log->l_mp, | 1453 | xfs_ioerror_alert("xlog_sync (split)", log->l_mp, |
1507 | bp, XFS_BUF_ADDR(bp)); | 1454 | bp, XFS_BUF_ADDR(bp)); |
1508 | return error; | 1455 | return error; |
@@ -1707,16 +1654,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | |||
1707 | * bytes have been written out. | 1654 | * bytes have been written out. |
1708 | */ | 1655 | */ |
1709 | STATIC int | 1656 | STATIC int |
1710 | xlog_write(xfs_mount_t * mp, | 1657 | xlog_write( |
1711 | xfs_log_iovec_t reg[], | 1658 | struct xfs_mount *mp, |
1712 | int nentries, | 1659 | struct xfs_log_iovec reg[], |
1713 | xfs_log_ticket_t tic, | 1660 | int nentries, |
1714 | xfs_lsn_t *start_lsn, | 1661 | struct xlog_ticket *ticket, |
1715 | xlog_in_core_t **commit_iclog, | 1662 | xfs_lsn_t *start_lsn, |
1716 | uint flags) | 1663 | struct xlog_in_core **commit_iclog, |
1664 | uint flags) | ||
1717 | { | 1665 | { |
1718 | xlog_t *log = mp->m_log; | 1666 | xlog_t *log = mp->m_log; |
1719 | xlog_ticket_t *ticket = (xlog_ticket_t *)tic; | ||
1720 | xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ | 1667 | xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ |
1721 | xlog_op_header_t *logop_head; /* ptr to log operation header */ | 1668 | xlog_op_header_t *logop_head; /* ptr to log operation header */ |
1722 | __psint_t ptr; /* copy address into data region */ | 1669 | __psint_t ptr; /* copy address into data region */ |
@@ -1830,7 +1777,7 @@ xlog_write(xfs_mount_t * mp, | |||
1830 | default: | 1777 | default: |
1831 | xfs_fs_cmn_err(CE_WARN, mp, | 1778 | xfs_fs_cmn_err(CE_WARN, mp, |
1832 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", | 1779 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", |
1833 | logop_head->oh_clientid, tic); | 1780 | logop_head->oh_clientid, ticket); |
1834 | return XFS_ERROR(EIO); | 1781 | return XFS_ERROR(EIO); |
1835 | } | 1782 | } |
1836 | 1783 | ||
@@ -2854,7 +2801,6 @@ xlog_state_switch_iclogs(xlog_t *log, | |||
2854 | log->l_iclog = iclog->ic_next; | 2801 | log->l_iclog = iclog->ic_next; |
2855 | } /* xlog_state_switch_iclogs */ | 2802 | } /* xlog_state_switch_iclogs */ |
2856 | 2803 | ||
2857 | |||
2858 | /* | 2804 | /* |
2859 | * Write out all data in the in-core log as of this exact moment in time. | 2805 | * Write out all data in the in-core log as of this exact moment in time. |
2860 | * | 2806 | * |
@@ -2882,11 +2828,17 @@ xlog_state_switch_iclogs(xlog_t *log, | |||
2882 | * b) when we return from flushing out this iclog, it is still | 2828 | * b) when we return from flushing out this iclog, it is still |
2883 | * not in the active nor dirty state. | 2829 | * not in the active nor dirty state. |
2884 | */ | 2830 | */ |
2885 | STATIC int | 2831 | int |
2886 | xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | 2832 | _xfs_log_force( |
2833 | struct xfs_mount *mp, | ||
2834 | uint flags, | ||
2835 | int *log_flushed) | ||
2887 | { | 2836 | { |
2888 | xlog_in_core_t *iclog; | 2837 | struct log *log = mp->m_log; |
2889 | xfs_lsn_t lsn; | 2838 | struct xlog_in_core *iclog; |
2839 | xfs_lsn_t lsn; | ||
2840 | |||
2841 | XFS_STATS_INC(xs_log_force); | ||
2890 | 2842 | ||
2891 | spin_lock(&log->l_icloglock); | 2843 | spin_lock(&log->l_icloglock); |
2892 | 2844 | ||
@@ -2932,7 +2884,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
2932 | 2884 | ||
2933 | if (xlog_state_release_iclog(log, iclog)) | 2885 | if (xlog_state_release_iclog(log, iclog)) |
2934 | return XFS_ERROR(EIO); | 2886 | return XFS_ERROR(EIO); |
2935 | *log_flushed = 1; | 2887 | |
2888 | if (log_flushed) | ||
2889 | *log_flushed = 1; | ||
2936 | spin_lock(&log->l_icloglock); | 2890 | spin_lock(&log->l_icloglock); |
2937 | if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && | 2891 | if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && |
2938 | iclog->ic_state != XLOG_STATE_DIRTY) | 2892 | iclog->ic_state != XLOG_STATE_DIRTY) |
@@ -2976,19 +2930,37 @@ maybe_sleep: | |||
2976 | */ | 2930 | */ |
2977 | if (iclog->ic_state & XLOG_STATE_IOERROR) | 2931 | if (iclog->ic_state & XLOG_STATE_IOERROR) |
2978 | return XFS_ERROR(EIO); | 2932 | return XFS_ERROR(EIO); |
2979 | *log_flushed = 1; | 2933 | if (log_flushed) |
2980 | 2934 | *log_flushed = 1; | |
2981 | } else { | 2935 | } else { |
2982 | 2936 | ||
2983 | no_sleep: | 2937 | no_sleep: |
2984 | spin_unlock(&log->l_icloglock); | 2938 | spin_unlock(&log->l_icloglock); |
2985 | } | 2939 | } |
2986 | return 0; | 2940 | return 0; |
2987 | } /* xlog_state_sync_all */ | 2941 | } |
2942 | |||
2943 | /* | ||
2944 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
2945 | * about errors or whether the log was flushed or not. This is the normal | ||
2946 | * interface to use when trying to unpin items or move the log forward. | ||
2947 | */ | ||
2948 | void | ||
2949 | xfs_log_force( | ||
2950 | xfs_mount_t *mp, | ||
2951 | uint flags) | ||
2952 | { | ||
2953 | int error; | ||
2988 | 2954 | ||
2955 | error = _xfs_log_force(mp, flags, NULL); | ||
2956 | if (error) { | ||
2957 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
2958 | "error %d returned.", error); | ||
2959 | } | ||
2960 | } | ||
2989 | 2961 | ||
2990 | /* | 2962 | /* |
2991 | * Used by code which implements synchronous log forces. | 2963 | * Force the in-core log to disk for a specific LSN. |
2992 | * | 2964 | * |
2993 | * Find in-core log with lsn. | 2965 | * Find in-core log with lsn. |
2994 | * If it is in the DIRTY state, just return. | 2966 | * If it is in the DIRTY state, just return. |
@@ -2996,109 +2968,142 @@ no_sleep: | |||
2996 | * state and go to sleep or return. | 2968 | * state and go to sleep or return. |
2997 | * If it is in any other state, go to sleep or return. | 2969 | * If it is in any other state, go to sleep or return. |
2998 | * | 2970 | * |
2999 | * If filesystem activity goes to zero, the iclog will get flushed only by | 2971 | * Synchronous forces are implemented with a signal variable. All callers |
3000 | * bdflush(). | 2972 | * to force a given lsn to disk will wait on a the sv attached to the |
2973 | * specific in-core log. When given in-core log finally completes its | ||
2974 | * write to disk, that thread will wake up all threads waiting on the | ||
2975 | * sv. | ||
3001 | */ | 2976 | */ |
3002 | STATIC int | 2977 | int |
3003 | xlog_state_sync(xlog_t *log, | 2978 | _xfs_log_force_lsn( |
3004 | xfs_lsn_t lsn, | 2979 | struct xfs_mount *mp, |
3005 | uint flags, | 2980 | xfs_lsn_t lsn, |
3006 | int *log_flushed) | 2981 | uint flags, |
2982 | int *log_flushed) | ||
3007 | { | 2983 | { |
3008 | xlog_in_core_t *iclog; | 2984 | struct log *log = mp->m_log; |
3009 | int already_slept = 0; | 2985 | struct xlog_in_core *iclog; |
2986 | int already_slept = 0; | ||
3010 | 2987 | ||
3011 | try_again: | 2988 | ASSERT(lsn != 0); |
3012 | spin_lock(&log->l_icloglock); | ||
3013 | iclog = log->l_iclog; | ||
3014 | 2989 | ||
3015 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 2990 | XFS_STATS_INC(xs_log_force); |
3016 | spin_unlock(&log->l_icloglock); | ||
3017 | return XFS_ERROR(EIO); | ||
3018 | } | ||
3019 | |||
3020 | do { | ||
3021 | if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { | ||
3022 | iclog = iclog->ic_next; | ||
3023 | continue; | ||
3024 | } | ||
3025 | 2991 | ||
3026 | if (iclog->ic_state == XLOG_STATE_DIRTY) { | 2992 | try_again: |
2993 | spin_lock(&log->l_icloglock); | ||
2994 | iclog = log->l_iclog; | ||
2995 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3027 | spin_unlock(&log->l_icloglock); | 2996 | spin_unlock(&log->l_icloglock); |
3028 | return 0; | 2997 | return XFS_ERROR(EIO); |
3029 | } | 2998 | } |
3030 | 2999 | ||
3031 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { | 3000 | do { |
3032 | /* | 3001 | if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { |
3033 | * We sleep here if we haven't already slept (e.g. | 3002 | iclog = iclog->ic_next; |
3034 | * this is the first time we've looked at the correct | 3003 | continue; |
3035 | * iclog buf) and the buffer before us is going to | 3004 | } |
3036 | * be sync'ed. The reason for this is that if we | 3005 | |
3037 | * are doing sync transactions here, by waiting for | 3006 | if (iclog->ic_state == XLOG_STATE_DIRTY) { |
3038 | * the previous I/O to complete, we can allow a few | 3007 | spin_unlock(&log->l_icloglock); |
3039 | * more transactions into this iclog before we close | 3008 | return 0; |
3040 | * it down. | 3009 | } |
3041 | * | 3010 | |
3042 | * Otherwise, we mark the buffer WANT_SYNC, and bump | 3011 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { |
3043 | * up the refcnt so we can release the log (which drops | 3012 | /* |
3044 | * the ref count). The state switch keeps new transaction | 3013 | * We sleep here if we haven't already slept (e.g. |
3045 | * commits from using this buffer. When the current commits | 3014 | * this is the first time we've looked at the correct |
3046 | * finish writing into the buffer, the refcount will drop to | 3015 | * iclog buf) and the buffer before us is going to |
3047 | * zero and the buffer will go out then. | 3016 | * be sync'ed. The reason for this is that if we |
3048 | */ | 3017 | * are doing sync transactions here, by waiting for |
3049 | if (!already_slept && | 3018 | * the previous I/O to complete, we can allow a few |
3050 | (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | | 3019 | * more transactions into this iclog before we close |
3051 | XLOG_STATE_SYNCING))) { | 3020 | * it down. |
3052 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); | 3021 | * |
3053 | XFS_STATS_INC(xs_log_force_sleep); | 3022 | * Otherwise, we mark the buffer WANT_SYNC, and bump |
3054 | sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, | 3023 | * up the refcnt so we can release the log (which |
3055 | &log->l_icloglock, s); | 3024 | * drops the ref count). The state switch keeps new |
3056 | *log_flushed = 1; | 3025 | * transaction commits from using this buffer. When |
3057 | already_slept = 1; | 3026 | * the current commits finish writing into the buffer, |
3058 | goto try_again; | 3027 | * the refcount will drop to zero and the buffer will |
3059 | } else { | 3028 | * go out then. |
3029 | */ | ||
3030 | if (!already_slept && | ||
3031 | (iclog->ic_prev->ic_state & | ||
3032 | (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { | ||
3033 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); | ||
3034 | |||
3035 | XFS_STATS_INC(xs_log_force_sleep); | ||
3036 | |||
3037 | sv_wait(&iclog->ic_prev->ic_write_wait, | ||
3038 | PSWP, &log->l_icloglock, s); | ||
3039 | if (log_flushed) | ||
3040 | *log_flushed = 1; | ||
3041 | already_slept = 1; | ||
3042 | goto try_again; | ||
3043 | } | ||
3060 | atomic_inc(&iclog->ic_refcnt); | 3044 | atomic_inc(&iclog->ic_refcnt); |
3061 | xlog_state_switch_iclogs(log, iclog, 0); | 3045 | xlog_state_switch_iclogs(log, iclog, 0); |
3062 | spin_unlock(&log->l_icloglock); | 3046 | spin_unlock(&log->l_icloglock); |
3063 | if (xlog_state_release_iclog(log, iclog)) | 3047 | if (xlog_state_release_iclog(log, iclog)) |
3064 | return XFS_ERROR(EIO); | 3048 | return XFS_ERROR(EIO); |
3065 | *log_flushed = 1; | 3049 | if (log_flushed) |
3050 | *log_flushed = 1; | ||
3066 | spin_lock(&log->l_icloglock); | 3051 | spin_lock(&log->l_icloglock); |
3067 | } | 3052 | } |
3068 | } | ||
3069 | 3053 | ||
3070 | if ((flags & XFS_LOG_SYNC) && /* sleep */ | 3054 | if ((flags & XFS_LOG_SYNC) && /* sleep */ |
3071 | !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { | 3055 | !(iclog->ic_state & |
3056 | (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { | ||
3057 | /* | ||
3058 | * Don't wait on completion if we know that we've | ||
3059 | * gotten a log write error. | ||
3060 | */ | ||
3061 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3062 | spin_unlock(&log->l_icloglock); | ||
3063 | return XFS_ERROR(EIO); | ||
3064 | } | ||
3065 | XFS_STATS_INC(xs_log_force_sleep); | ||
3066 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | ||
3067 | /* | ||
3068 | * No need to grab the log lock here since we're | ||
3069 | * only deciding whether or not to return EIO | ||
3070 | * and the memory read should be atomic. | ||
3071 | */ | ||
3072 | if (iclog->ic_state & XLOG_STATE_IOERROR) | ||
3073 | return XFS_ERROR(EIO); | ||
3072 | 3074 | ||
3073 | /* | 3075 | if (log_flushed) |
3074 | * Don't wait on completion if we know that we've | 3076 | *log_flushed = 1; |
3075 | * gotten a log write error. | 3077 | } else { /* just return */ |
3076 | */ | ||
3077 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | ||
3078 | spin_unlock(&log->l_icloglock); | 3078 | spin_unlock(&log->l_icloglock); |
3079 | return XFS_ERROR(EIO); | ||
3080 | } | 3079 | } |
3081 | XFS_STATS_INC(xs_log_force_sleep); | ||
3082 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | ||
3083 | /* | ||
3084 | * No need to grab the log lock here since we're | ||
3085 | * only deciding whether or not to return EIO | ||
3086 | * and the memory read should be atomic. | ||
3087 | */ | ||
3088 | if (iclog->ic_state & XLOG_STATE_IOERROR) | ||
3089 | return XFS_ERROR(EIO); | ||
3090 | *log_flushed = 1; | ||
3091 | } else { /* just return */ | ||
3092 | spin_unlock(&log->l_icloglock); | ||
3093 | } | ||
3094 | return 0; | ||
3095 | 3080 | ||
3096 | } while (iclog != log->l_iclog); | 3081 | return 0; |
3082 | } while (iclog != log->l_iclog); | ||
3097 | 3083 | ||
3098 | spin_unlock(&log->l_icloglock); | 3084 | spin_unlock(&log->l_icloglock); |
3099 | return 0; | 3085 | return 0; |
3100 | } /* xlog_state_sync */ | 3086 | } |
3101 | 3087 | ||
3088 | /* | ||
3089 | * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care | ||
3090 | * about errors or whether the log was flushed or not. This is the normal | ||
3091 | * interface to use when trying to unpin items or move the log forward. | ||
3092 | */ | ||
3093 | void | ||
3094 | xfs_log_force_lsn( | ||
3095 | xfs_mount_t *mp, | ||
3096 | xfs_lsn_t lsn, | ||
3097 | uint flags) | ||
3098 | { | ||
3099 | int error; | ||
3100 | |||
3101 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); | ||
3102 | if (error) { | ||
3103 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
3104 | "error %d returned.", error); | ||
3105 | } | ||
3106 | } | ||
3102 | 3107 | ||
3103 | /* | 3108 | /* |
3104 | * Called when we want to mark the current iclog as being ready to sync to | 3109 | * Called when we want to mark the current iclog as being ready to sync to |
@@ -3463,7 +3468,6 @@ xfs_log_force_umount( | |||
3463 | xlog_ticket_t *tic; | 3468 | xlog_ticket_t *tic; |
3464 | xlog_t *log; | 3469 | xlog_t *log; |
3465 | int retval; | 3470 | int retval; |
3466 | int dummy; | ||
3467 | 3471 | ||
3468 | log = mp->m_log; | 3472 | log = mp->m_log; |
3469 | 3473 | ||
@@ -3537,13 +3541,14 @@ xfs_log_force_umount( | |||
3537 | } | 3541 | } |
3538 | spin_unlock(&log->l_grant_lock); | 3542 | spin_unlock(&log->l_grant_lock); |
3539 | 3543 | ||
3540 | if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3544 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
3541 | ASSERT(!logerror); | 3545 | ASSERT(!logerror); |
3542 | /* | 3546 | /* |
3543 | * Force the incore logs to disk before shutting the | 3547 | * Force the incore logs to disk before shutting the |
3544 | * log down completely. | 3548 | * log down completely. |
3545 | */ | 3549 | */ |
3546 | xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); | 3550 | _xfs_log_force(mp, XFS_LOG_SYNC, NULL); |
3551 | |||
3547 | spin_lock(&log->l_icloglock); | 3552 | spin_lock(&log->l_icloglock); |
3548 | retval = xlog_state_ioerror(log); | 3553 | retval = xlog_state_ioerror(log); |
3549 | spin_unlock(&log->l_icloglock); | 3554 | spin_unlock(&log->l_icloglock); |