aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c708
1 files changed, 320 insertions, 388 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9dbdff3ea484..2be019136287 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -40,6 +40,7 @@
40#include "xfs_dinode.h" 40#include "xfs_dinode.h"
41#include "xfs_inode.h" 41#include "xfs_inode.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_trace.h"
43 44
44kmem_zone_t *xfs_log_ticket_zone; 45kmem_zone_t *xfs_log_ticket_zone;
45 46
@@ -49,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone;
49 (off) += (bytes);} 50 (off) += (bytes);}
50 51
51/* Local miscellaneous function prototypes */ 52/* Local miscellaneous function prototypes */
52STATIC int xlog_bdstrat_cb(struct xfs_buf *);
53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
54 xlog_in_core_t **, xfs_lsn_t *); 54 xlog_in_core_t **, xfs_lsn_t *);
55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
@@ -60,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
61STATIC void xlog_dealloc_log(xlog_t *log); 61STATIC void xlog_dealloc_log(xlog_t *log);
62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
63 int nentries, xfs_log_ticket_t tic, 63 int nentries, struct xlog_ticket *tic,
64 xfs_lsn_t *start_lsn, 64 xfs_lsn_t *start_lsn,
65 xlog_in_core_t **commit_iclog, 65 xlog_in_core_t **commit_iclog,
66 uint flags); 66 uint flags);
@@ -79,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
79STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
80 xlog_in_core_t *iclog, 80 xlog_in_core_t *iclog,
81 int eventual_size); 81 int eventual_size);
82STATIC int xlog_state_sync(xlog_t *log,
83 xfs_lsn_t lsn,
84 uint flags,
85 int *log_flushed);
86STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
87STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 82STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
88 83
89/* local functions to manipulate grant head */ 84/* local functions to manipulate grant head */
@@ -122,85 +117,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
122 117
123STATIC int xlog_iclogs_empty(xlog_t *log); 118STATIC int xlog_iclogs_empty(xlog_t *log);
124 119
125#if defined(XFS_LOG_TRACE)
126
127#define XLOG_TRACE_LOGGRANT_SIZE 2048
128#define XLOG_TRACE_ICLOG_SIZE 256
129
130void
131xlog_trace_loggrant_alloc(xlog_t *log)
132{
133 log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
134}
135
136void
137xlog_trace_loggrant_dealloc(xlog_t *log)
138{
139 ktrace_free(log->l_grant_trace);
140}
141
142void
143xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
144{
145 unsigned long cnts;
146
147 /* ticket counts are 1 byte each */
148 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
149
150 ktrace_enter(log->l_grant_trace,
151 (void *)tic,
152 (void *)log->l_reserve_headq,
153 (void *)log->l_write_headq,
154 (void *)((unsigned long)log->l_grant_reserve_cycle),
155 (void *)((unsigned long)log->l_grant_reserve_bytes),
156 (void *)((unsigned long)log->l_grant_write_cycle),
157 (void *)((unsigned long)log->l_grant_write_bytes),
158 (void *)((unsigned long)log->l_curr_cycle),
159 (void *)((unsigned long)log->l_curr_block),
160 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
161 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
162 (void *)string,
163 (void *)((unsigned long)tic->t_trans_type),
164 (void *)cnts,
165 (void *)((unsigned long)tic->t_curr_res),
166 (void *)((unsigned long)tic->t_unit_res));
167}
168
169void
170xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
171{
172 iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
173}
174
175void
176xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
177{
178 ktrace_free(iclog->ic_trace);
179}
180
181void
182xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
183{
184 ktrace_enter(iclog->ic_trace,
185 (void *)((unsigned long)state),
186 (void *)((unsigned long)current_pid()),
187 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
188 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
189 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
190 (void *)NULL, (void *)NULL);
191}
192#else
193
194#define xlog_trace_loggrant_alloc(log)
195#define xlog_trace_loggrant_dealloc(log)
196#define xlog_trace_loggrant(log,tic,string)
197
198#define xlog_trace_iclog_alloc(iclog)
199#define xlog_trace_iclog_dealloc(iclog)
200#define xlog_trace_iclog(iclog,state)
201
202#endif /* XFS_LOG_TRACE */
203
204 120
205static void 121static void
206xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 122xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
@@ -327,14 +243,14 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
327 * out when the next write occurs. 243 * out when the next write occurs.
328 */ 244 */
329xfs_lsn_t 245xfs_lsn_t
330xfs_log_done(xfs_mount_t *mp, 246xfs_log_done(
331 xfs_log_ticket_t xtic, 247 struct xfs_mount *mp,
332 void **iclog, 248 struct xlog_ticket *ticket,
333 uint flags) 249 struct xlog_in_core **iclog,
250 uint flags)
334{ 251{
335 xlog_t *log = mp->m_log; 252 struct log *log = mp->m_log;
336 xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic; 253 xfs_lsn_t lsn = 0;
337 xfs_lsn_t lsn = 0;
338 254
339 if (XLOG_FORCED_SHUTDOWN(log) || 255 if (XLOG_FORCED_SHUTDOWN(log) ||
340 /* 256 /*
@@ -342,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp,
342 * If we get an error, just continue and give back the log ticket. 258 * If we get an error, just continue and give back the log ticket.
343 */ 259 */
344 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
345 (xlog_commit_record(mp, ticket, 261 (xlog_commit_record(mp, ticket, iclog, &lsn)))) {
346 (xlog_in_core_t **)iclog, &lsn)))) {
347 lsn = (xfs_lsn_t) -1; 262 lsn = (xfs_lsn_t) -1;
348 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 263 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
349 flags |= XFS_LOG_REL_PERM_RESERV; 264 flags |= XFS_LOG_REL_PERM_RESERV;
@@ -353,15 +268,17 @@ xfs_log_done(xfs_mount_t *mp,
353 268
354 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || 269 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
355 (flags & XFS_LOG_REL_PERM_RESERV)) { 270 (flags & XFS_LOG_REL_PERM_RESERV)) {
271 trace_xfs_log_done_nonperm(log, ticket);
272
356 /* 273 /*
357 * Release ticket if not permanent reservation or a specific 274 * Release ticket if not permanent reservation or a specific
358 * request has been made to release a permanent reservation. 275 * request has been made to release a permanent reservation.
359 */ 276 */
360 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
361 xlog_ungrant_log_space(log, ticket); 277 xlog_ungrant_log_space(log, ticket);
362 xfs_log_ticket_put(ticket); 278 xfs_log_ticket_put(ticket);
363 } else { 279 } else {
364 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 280 trace_xfs_log_done_perm(log, ticket);
281
365 xlog_regrant_reserve_log_space(log, ticket); 282 xlog_regrant_reserve_log_space(log, ticket);
366 /* If this ticket was a permanent reservation and we aren't 283 /* If this ticket was a permanent reservation and we aren't
367 * trying to release it, reset the inited flags; so next time 284 * trying to release it, reset the inited flags; so next time
@@ -371,67 +288,8 @@ xfs_log_done(xfs_mount_t *mp,
371 } 288 }
372 289
373 return lsn; 290 return lsn;
374} /* xfs_log_done */
375
376
377/*
378 * Force the in-core log to disk. If flags == XFS_LOG_SYNC,
379 * the force is done synchronously.
380 *
381 * Asynchronous forces are implemented by setting the WANT_SYNC
382 * bit in the appropriate in-core log and then returning.
383 *
384 * Synchronous forces are implemented with a signal variable. All callers
385 * to force a given lsn to disk will wait on a the sv attached to the
386 * specific in-core log. When given in-core log finally completes its
387 * write to disk, that thread will wake up all threads waiting on the
388 * sv.
389 */
390int
391_xfs_log_force(
392 xfs_mount_t *mp,
393 xfs_lsn_t lsn,
394 uint flags,
395 int *log_flushed)
396{
397 xlog_t *log = mp->m_log;
398 int dummy;
399
400 if (!log_flushed)
401 log_flushed = &dummy;
402
403 ASSERT(flags & XFS_LOG_FORCE);
404
405 XFS_STATS_INC(xs_log_force);
406
407 if (log->l_flags & XLOG_IO_ERROR)
408 return XFS_ERROR(EIO);
409 if (lsn == 0)
410 return xlog_state_sync_all(log, flags, log_flushed);
411 else
412 return xlog_state_sync(log, lsn, flags, log_flushed);
413} /* _xfs_log_force */
414
415/*
416 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
417 * about errors or whether the log was flushed or not. This is the normal
418 * interface to use when trying to unpin items or move the log forward.
419 */
420void
421xfs_log_force(
422 xfs_mount_t *mp,
423 xfs_lsn_t lsn,
424 uint flags)
425{
426 int error;
427 error = _xfs_log_force(mp, lsn, flags, NULL);
428 if (error) {
429 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
430 "error %d returned.", error);
431 }
432} 291}
433 292
434
435/* 293/*
436 * Attaches a new iclog I/O completion callback routine during 294 * Attaches a new iclog I/O completion callback routine during
437 * transaction commit. If the log is in error state, a non-zero 295 * transaction commit. If the log is in error state, a non-zero
@@ -439,11 +297,11 @@ xfs_log_force(
439 * executing the callback at an appropriate time. 297 * executing the callback at an appropriate time.
440 */ 298 */
441int 299int
442xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ 300xfs_log_notify(
443 void *iclog_hndl, /* iclog to hang callback off */ 301 struct xfs_mount *mp,
444 xfs_log_callback_t *cb) 302 struct xlog_in_core *iclog,
303 xfs_log_callback_t *cb)
445{ 304{
446 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
447 int abortflg; 305 int abortflg;
448 306
449 spin_lock(&iclog->ic_callback_lock); 307 spin_lock(&iclog->ic_callback_lock);
@@ -457,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
457 } 315 }
458 spin_unlock(&iclog->ic_callback_lock); 316 spin_unlock(&iclog->ic_callback_lock);
459 return abortflg; 317 return abortflg;
460} /* xfs_log_notify */ 318}
461 319
462int 320int
463xfs_log_release_iclog(xfs_mount_t *mp, 321xfs_log_release_iclog(
464 void *iclog_hndl) 322 struct xfs_mount *mp,
323 struct xlog_in_core *iclog)
465{ 324{
466 xlog_t *log = mp->m_log; 325 if (xlog_state_release_iclog(mp->m_log, iclog)) {
467 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
468
469 if (xlog_state_release_iclog(log, iclog)) {
470 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 326 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
471 return EIO; 327 return EIO;
472 } 328 }
@@ -485,17 +341,18 @@ xfs_log_release_iclog(xfs_mount_t *mp,
485 * reservation, we prevent over allocation problems. 341 * reservation, we prevent over allocation problems.
486 */ 342 */
487int 343int
488xfs_log_reserve(xfs_mount_t *mp, 344xfs_log_reserve(
489 int unit_bytes, 345 struct xfs_mount *mp,
490 int cnt, 346 int unit_bytes,
491 xfs_log_ticket_t *ticket, 347 int cnt,
492 __uint8_t client, 348 struct xlog_ticket **ticket,
493 uint flags, 349 __uint8_t client,
494 uint t_type) 350 uint flags,
351 uint t_type)
495{ 352{
496 xlog_t *log = mp->m_log; 353 struct log *log = mp->m_log;
497 xlog_ticket_t *internal_ticket; 354 struct xlog_ticket *internal_ticket;
498 int retval = 0; 355 int retval = 0;
499 356
500 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 357 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
501 ASSERT((flags & XFS_LOG_NOSLEEP) == 0); 358 ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
@@ -505,10 +362,13 @@ xfs_log_reserve(xfs_mount_t *mp,
505 362
506 XFS_STATS_INC(xs_try_logspace); 363 XFS_STATS_INC(xs_try_logspace);
507 364
365
508 if (*ticket != NULL) { 366 if (*ticket != NULL) {
509 ASSERT(flags & XFS_LOG_PERM_RESERV); 367 ASSERT(flags & XFS_LOG_PERM_RESERV);
510 internal_ticket = (xlog_ticket_t *)*ticket; 368 internal_ticket = *ticket;
511 xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); 369
370 trace_xfs_log_reserve(log, internal_ticket);
371
512 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 372 xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
513 retval = xlog_regrant_write_log_space(log, internal_ticket); 373 retval = xlog_regrant_write_log_space(log, internal_ticket);
514 } else { 374 } else {
@@ -519,10 +379,9 @@ xfs_log_reserve(xfs_mount_t *mp,
519 return XFS_ERROR(ENOMEM); 379 return XFS_ERROR(ENOMEM);
520 internal_ticket->t_trans_type = t_type; 380 internal_ticket->t_trans_type = t_type;
521 *ticket = internal_ticket; 381 *ticket = internal_ticket;
522 xlog_trace_loggrant(log, internal_ticket, 382
523 (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? 383 trace_xfs_log_reserve(log, internal_ticket);
524 "xfs_log_reserve: create new ticket (permanent trans)" : 384
525 "xfs_log_reserve: create new ticket");
526 xlog_grant_push_ail(mp, 385 xlog_grant_push_ail(mp,
527 (internal_ticket->t_unit_res * 386 (internal_ticket->t_unit_res *
528 internal_ticket->t_cnt)); 387 internal_ticket->t_cnt));
@@ -658,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
658 xlog_in_core_t *first_iclog; 517 xlog_in_core_t *first_iclog;
659#endif 518#endif
660 xfs_log_iovec_t reg[1]; 519 xfs_log_iovec_t reg[1];
661 xfs_log_ticket_t tic = NULL; 520 xlog_ticket_t *tic = NULL;
662 xfs_lsn_t lsn; 521 xfs_lsn_t lsn;
663 int error; 522 int error;
664 523
@@ -676,7 +535,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
676 if (mp->m_flags & XFS_MOUNT_RDONLY) 535 if (mp->m_flags & XFS_MOUNT_RDONLY)
677 return 0; 536 return 0;
678 537
679 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 538 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
680 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 539 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
681 540
682#ifdef DEBUG 541#ifdef DEBUG
@@ -692,7 +551,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
692 if (! (XLOG_FORCED_SHUTDOWN(log))) { 551 if (! (XLOG_FORCED_SHUTDOWN(log))) {
693 reg[0].i_addr = (void*)&magic; 552 reg[0].i_addr = (void*)&magic;
694 reg[0].i_len = sizeof(magic); 553 reg[0].i_len = sizeof(magic);
695 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT); 554 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
696 555
697 error = xfs_log_reserve(mp, 600, 1, &tic, 556 error = xfs_log_reserve(mp, 600, 1, &tic,
698 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 557 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
@@ -734,7 +593,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
734 spin_unlock(&log->l_icloglock); 593 spin_unlock(&log->l_icloglock);
735 } 594 }
736 if (tic) { 595 if (tic) {
737 xlog_trace_loggrant(log, tic, "unmount rec"); 596 trace_xfs_log_umount_write(log, tic);
738 xlog_ungrant_log_space(log, tic); 597 xlog_ungrant_log_space(log, tic);
739 xfs_log_ticket_put(tic); 598 xfs_log_ticket_put(tic);
740 } 599 }
@@ -795,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp)
795 * transaction occur with one call to xfs_log_write(). 654 * transaction occur with one call to xfs_log_write().
796 */ 655 */
797int 656int
798xfs_log_write(xfs_mount_t * mp, 657xfs_log_write(
799 xfs_log_iovec_t reg[], 658 struct xfs_mount *mp,
800 int nentries, 659 struct xfs_log_iovec reg[],
801 xfs_log_ticket_t tic, 660 int nentries,
802 xfs_lsn_t *start_lsn) 661 struct xlog_ticket *tic,
662 xfs_lsn_t *start_lsn)
803{ 663{
804 int error; 664 struct log *log = mp->m_log;
805 xlog_t *log = mp->m_log; 665 int error;
806 666
807 if (XLOG_FORCED_SHUTDOWN(log)) 667 if (XLOG_FORCED_SHUTDOWN(log))
808 return XFS_ERROR(EIO); 668 return XFS_ERROR(EIO);
809 669
810 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { 670 error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
671 if (error)
811 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 672 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
812 }
813 return error; 673 return error;
814} /* xfs_log_write */ 674}
815
816 675
817void 676void
818xfs_log_move_tail(xfs_mount_t *mp, 677xfs_log_move_tail(xfs_mount_t *mp,
@@ -886,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp,
886 745
887/* 746/*
888 * Determine if we have a transaction that has gone to disk 747 * Determine if we have a transaction that has gone to disk
889 * that needs to be covered. Log activity needs to be idle (no AIL and 748 * that needs to be covered. To begin the transition to the idle state
890 * nothing in the iclogs). And, we need to be in the right state indicating 749 * firstly the log needs to be idle (no AIL and nothing in the iclogs).
891 * something has gone out. 750 * If we are then in a state where covering is needed, the caller is informed
751 * that dummy transactions are required to move the log into the idle state.
752 *
753 * Because this is called as part of the sync process, we should also indicate
754 * that dummy transactions should be issued in anything but the covered or
755 * idle states. This ensures that the log tail is accurately reflected in
756 * the log at the end of the sync, hence if a crash occurrs avoids replay
757 * of transactions where the metadata is already on disk.
892 */ 758 */
893int 759int
894xfs_log_need_covered(xfs_mount_t *mp) 760xfs_log_need_covered(xfs_mount_t *mp)
@@ -900,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp)
900 return 0; 766 return 0;
901 767
902 spin_lock(&log->l_icloglock); 768 spin_lock(&log->l_icloglock);
903 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 769 switch (log->l_covered_state) {
904 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 770 case XLOG_STATE_COVER_DONE:
905 && !xfs_trans_ail_tail(log->l_ailp) 771 case XLOG_STATE_COVER_DONE2:
906 && xlog_iclogs_empty(log)) { 772 case XLOG_STATE_COVER_IDLE:
907 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 773 break;
908 log->l_covered_state = XLOG_STATE_COVER_DONE; 774 case XLOG_STATE_COVER_NEED:
909 else { 775 case XLOG_STATE_COVER_NEED2:
910 ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); 776 if (!xfs_trans_ail_tail(log->l_ailp) &&
911 log->l_covered_state = XLOG_STATE_COVER_DONE2; 777 xlog_iclogs_empty(log)) {
778 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
779 log->l_covered_state = XLOG_STATE_COVER_DONE;
780 else
781 log->l_covered_state = XLOG_STATE_COVER_DONE2;
912 } 782 }
783 /* FALLTHRU */
784 default:
913 needed = 1; 785 needed = 1;
786 break;
914 } 787 }
915 spin_unlock(&log->l_icloglock); 788 spin_unlock(&log->l_icloglock);
916 return needed; 789 return needed;
@@ -1030,7 +903,6 @@ xlog_iodone(xfs_buf_t *bp)
1030 xfs_fs_cmn_err(CE_WARN, l->l_mp, 903 xfs_fs_cmn_err(CE_WARN, l->l_mp,
1031 "xlog_iodone: Barriers are no longer supported" 904 "xlog_iodone: Barriers are no longer supported"
1032 " by device. Disabling barriers\n"); 905 " by device. Disabling barriers\n");
1033 xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp);
1034 } 906 }
1035 907
1036 /* 908 /*
@@ -1063,38 +935,6 @@ xlog_iodone(xfs_buf_t *bp)
1063} /* xlog_iodone */ 935} /* xlog_iodone */
1064 936
1065/* 937/*
1066 * The bdstrat callback function for log bufs. This gives us a central
1067 * place to trap bufs in case we get hit by a log I/O error and need to
1068 * shutdown. Actually, in practice, even when we didn't get a log error,
1069 * we transition the iclogs to IOERROR state *after* flushing all existing
1070 * iclogs to disk. This is because we don't want anymore new transactions to be
1071 * started or completed afterwards.
1072 */
1073STATIC int
1074xlog_bdstrat_cb(struct xfs_buf *bp)
1075{
1076 xlog_in_core_t *iclog;
1077
1078 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1079
1080 if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
1081 /* note for irix bstrat will need struct bdevsw passed
1082 * Fix the following macro if the code ever is merged
1083 */
1084 XFS_bdstrat(bp);
1085 return 0;
1086 }
1087
1088 xfs_buftrace("XLOG__BDSTRAT IOERROR", bp);
1089 XFS_BUF_ERROR(bp, EIO);
1090 XFS_BUF_STALE(bp);
1091 xfs_biodone(bp);
1092 return XFS_ERROR(EIO);
1093
1094
1095}
1096
1097/*
1098 * Return size of each in-core log record buffer. 938 * Return size of each in-core log record buffer.
1099 * 939 *
1100 * All machines get 8 x 32kB buffers by default, unless tuned otherwise. 940 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
@@ -1236,7 +1076,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1236 if (!bp) 1076 if (!bp)
1237 goto out_free_log; 1077 goto out_free_log;
1238 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1078 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1239 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1240 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1079 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1241 ASSERT(XFS_BUF_ISBUSY(bp)); 1080 ASSERT(XFS_BUF_ISBUSY(bp));
1242 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 1081 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
@@ -1246,7 +1085,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1246 spin_lock_init(&log->l_grant_lock); 1085 spin_lock_init(&log->l_grant_lock);
1247 sv_init(&log->l_flush_wait, 0, "flush_wait"); 1086 sv_init(&log->l_flush_wait, 0, "flush_wait");
1248 1087
1249 xlog_trace_loggrant_alloc(log);
1250 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1088 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1251 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1089 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
1252 1090
@@ -1275,7 +1113,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1275 if (!XFS_BUF_CPSEMA(bp)) 1113 if (!XFS_BUF_CPSEMA(bp))
1276 ASSERT(0); 1114 ASSERT(0);
1277 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1115 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1278 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1279 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1116 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1280 iclog->ic_bp = bp; 1117 iclog->ic_bp = bp;
1281 iclog->ic_data = bp->b_addr; 1118 iclog->ic_data = bp->b_addr;
@@ -1305,8 +1142,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1305 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1142 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1306 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1143 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1307 1144
1308 xlog_trace_iclog_alloc(iclog);
1309
1310 iclogp = &iclog->ic_next; 1145 iclogp = &iclog->ic_next;
1311 } 1146 }
1312 *iclogp = log->l_iclog; /* complete ring */ 1147 *iclogp = log->l_iclog; /* complete ring */
@@ -1321,13 +1156,11 @@ out_free_iclog:
1321 sv_destroy(&iclog->ic_force_wait); 1156 sv_destroy(&iclog->ic_force_wait);
1322 sv_destroy(&iclog->ic_write_wait); 1157 sv_destroy(&iclog->ic_write_wait);
1323 xfs_buf_free(iclog->ic_bp); 1158 xfs_buf_free(iclog->ic_bp);
1324 xlog_trace_iclog_dealloc(iclog);
1325 } 1159 }
1326 kmem_free(iclog); 1160 kmem_free(iclog);
1327 } 1161 }
1328 spinlock_destroy(&log->l_icloglock); 1162 spinlock_destroy(&log->l_icloglock);
1329 spinlock_destroy(&log->l_grant_lock); 1163 spinlock_destroy(&log->l_grant_lock);
1330 xlog_trace_loggrant_dealloc(log);
1331 xfs_buf_free(log->l_xbuf); 1164 xfs_buf_free(log->l_xbuf);
1332out_free_log: 1165out_free_log:
1333 kmem_free(log); 1166 kmem_free(log);
@@ -1351,7 +1184,7 @@ xlog_commit_record(xfs_mount_t *mp,
1351 1184
1352 reg[0].i_addr = NULL; 1185 reg[0].i_addr = NULL;
1353 reg[0].i_len = 0; 1186 reg[0].i_len = 0;
1354 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT); 1187 reg[0].i_type = XLOG_REG_TYPE_COMMIT;
1355 1188
1356 ASSERT_ALWAYS(iclog); 1189 ASSERT_ALWAYS(iclog);
1357 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1190 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1426,6 +1259,37 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1426 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1259 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1427} /* xlog_grant_push_ail */ 1260} /* xlog_grant_push_ail */
1428 1261
1262/*
1263 * The bdstrat callback function for log bufs. This gives us a central
1264 * place to trap bufs in case we get hit by a log I/O error and need to
1265 * shutdown. Actually, in practice, even when we didn't get a log error,
1266 * we transition the iclogs to IOERROR state *after* flushing all existing
1267 * iclogs to disk. This is because we don't want anymore new transactions to be
1268 * started or completed afterwards.
1269 */
1270STATIC int
1271xlog_bdstrat(
1272 struct xfs_buf *bp)
1273{
1274 struct xlog_in_core *iclog;
1275
1276 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1277 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1278 XFS_BUF_ERROR(bp, EIO);
1279 XFS_BUF_STALE(bp);
1280 xfs_biodone(bp);
1281 /*
1282 * It would seem logical to return EIO here, but we rely on
1283 * the log state machine to propagate I/O errors instead of
1284 * doing it here.
1285 */
1286 return 0;
1287 }
1288
1289 bp->b_flags |= _XBF_RUN_QUEUES;
1290 xfs_buf_iorequest(bp);
1291 return 0;
1292}
1429 1293
1430/* 1294/*
1431 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 1295 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
@@ -1524,6 +1388,7 @@ xlog_sync(xlog_t *log,
1524 XFS_BUF_ZEROFLAGS(bp); 1388 XFS_BUF_ZEROFLAGS(bp);
1525 XFS_BUF_BUSY(bp); 1389 XFS_BUF_BUSY(bp);
1526 XFS_BUF_ASYNC(bp); 1390 XFS_BUF_ASYNC(bp);
1391 bp->b_flags |= XBF_LOG_BUFFER;
1527 /* 1392 /*
1528 * Do an ordered write for the log block. 1393 * Do an ordered write for the log block.
1529 * Its unnecessary to flush the first split block in the log wrap case. 1394 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1544,7 +1409,7 @@ xlog_sync(xlog_t *log,
1544 */ 1409 */
1545 XFS_BUF_WRITE(bp); 1410 XFS_BUF_WRITE(bp);
1546 1411
1547 if ((error = XFS_bwrite(bp))) { 1412 if ((error = xlog_bdstrat(bp))) {
1548 xfs_ioerror_alert("xlog_sync", log->l_mp, bp, 1413 xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1549 XFS_BUF_ADDR(bp)); 1414 XFS_BUF_ADDR(bp));
1550 return error; 1415 return error;
@@ -1561,6 +1426,7 @@ xlog_sync(xlog_t *log,
1561 XFS_BUF_ZEROFLAGS(bp); 1426 XFS_BUF_ZEROFLAGS(bp);
1562 XFS_BUF_BUSY(bp); 1427 XFS_BUF_BUSY(bp);
1563 XFS_BUF_ASYNC(bp); 1428 XFS_BUF_ASYNC(bp);
1429 bp->b_flags |= XBF_LOG_BUFFER;
1564 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1430 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1565 XFS_BUF_ORDERED(bp); 1431 XFS_BUF_ORDERED(bp);
1566 dptr = XFS_BUF_PTR(bp); 1432 dptr = XFS_BUF_PTR(bp);
@@ -1583,7 +1449,7 @@ xlog_sync(xlog_t *log,
1583 /* account for internal log which doesn't start at block #0 */ 1449 /* account for internal log which doesn't start at block #0 */
1584 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1450 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1585 XFS_BUF_WRITE(bp); 1451 XFS_BUF_WRITE(bp);
1586 if ((error = XFS_bwrite(bp))) { 1452 if ((error = xlog_bdstrat(bp))) {
1587 xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 1453 xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1588 bp, XFS_BUF_ADDR(bp)); 1454 bp, XFS_BUF_ADDR(bp));
1589 return error; 1455 return error;
@@ -1607,7 +1473,6 @@ xlog_dealloc_log(xlog_t *log)
1607 sv_destroy(&iclog->ic_force_wait); 1473 sv_destroy(&iclog->ic_force_wait);
1608 sv_destroy(&iclog->ic_write_wait); 1474 sv_destroy(&iclog->ic_write_wait);
1609 xfs_buf_free(iclog->ic_bp); 1475 xfs_buf_free(iclog->ic_bp);
1610 xlog_trace_iclog_dealloc(iclog);
1611 next_iclog = iclog->ic_next; 1476 next_iclog = iclog->ic_next;
1612 kmem_free(iclog); 1477 kmem_free(iclog);
1613 iclog = next_iclog; 1478 iclog = next_iclog;
@@ -1616,7 +1481,6 @@ xlog_dealloc_log(xlog_t *log)
1616 spinlock_destroy(&log->l_grant_lock); 1481 spinlock_destroy(&log->l_grant_lock);
1617 1482
1618 xfs_buf_free(log->l_xbuf); 1483 xfs_buf_free(log->l_xbuf);
1619 xlog_trace_loggrant_dealloc(log);
1620 log->l_mp->m_log = NULL; 1484 log->l_mp->m_log = NULL;
1621 kmem_free(log); 1485 kmem_free(log);
1622} /* xlog_dealloc_log */ 1486} /* xlog_dealloc_log */
@@ -1790,16 +1654,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1790 * bytes have been written out. 1654 * bytes have been written out.
1791 */ 1655 */
1792STATIC int 1656STATIC int
1793xlog_write(xfs_mount_t * mp, 1657xlog_write(
1794 xfs_log_iovec_t reg[], 1658 struct xfs_mount *mp,
1795 int nentries, 1659 struct xfs_log_iovec reg[],
1796 xfs_log_ticket_t tic, 1660 int nentries,
1797 xfs_lsn_t *start_lsn, 1661 struct xlog_ticket *ticket,
1798 xlog_in_core_t **commit_iclog, 1662 xfs_lsn_t *start_lsn,
1799 uint flags) 1663 struct xlog_in_core **commit_iclog,
1664 uint flags)
1800{ 1665{
1801 xlog_t *log = mp->m_log; 1666 xlog_t *log = mp->m_log;
1802 xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
1803 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1667 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
1804 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1668 xlog_op_header_t *logop_head; /* ptr to log operation header */
1805 __psint_t ptr; /* copy address into data region */ 1669 __psint_t ptr; /* copy address into data region */
@@ -1913,7 +1777,7 @@ xlog_write(xfs_mount_t * mp,
1913 default: 1777 default:
1914 xfs_fs_cmn_err(CE_WARN, mp, 1778 xfs_fs_cmn_err(CE_WARN, mp,
1915 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1779 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1916 logop_head->oh_clientid, tic); 1780 logop_head->oh_clientid, ticket);
1917 return XFS_ERROR(EIO); 1781 return XFS_ERROR(EIO);
1918 } 1782 }
1919 1783
@@ -2414,7 +2278,6 @@ restart:
2414 2278
2415 iclog = log->l_iclog; 2279 iclog = log->l_iclog;
2416 if (iclog->ic_state != XLOG_STATE_ACTIVE) { 2280 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2417 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2418 XFS_STATS_INC(xs_log_noiclogs); 2281 XFS_STATS_INC(xs_log_noiclogs);
2419 2282
2420 /* Wait for log writes to have flushed */ 2283 /* Wait for log writes to have flushed */
@@ -2520,13 +2383,15 @@ xlog_grant_log_space(xlog_t *log,
2520 2383
2521 /* Is there space or do we need to sleep? */ 2384 /* Is there space or do we need to sleep? */
2522 spin_lock(&log->l_grant_lock); 2385 spin_lock(&log->l_grant_lock);
2523 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); 2386
2387 trace_xfs_log_grant_enter(log, tic);
2524 2388
2525 /* something is already sleeping; insert new transaction at end */ 2389 /* something is already sleeping; insert new transaction at end */
2526 if (log->l_reserve_headq) { 2390 if (log->l_reserve_headq) {
2527 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2391 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2528 xlog_trace_loggrant(log, tic, 2392
2529 "xlog_grant_log_space: sleep 1"); 2393 trace_xfs_log_grant_sleep1(log, tic);
2394
2530 /* 2395 /*
2531 * Gotta check this before going to sleep, while we're 2396 * Gotta check this before going to sleep, while we're
2532 * holding the grant lock. 2397 * holding the grant lock.
@@ -2540,8 +2405,7 @@ xlog_grant_log_space(xlog_t *log,
2540 * If we got an error, and the filesystem is shutting down, 2405 * If we got an error, and the filesystem is shutting down,
2541 * we'll catch it down below. So just continue... 2406 * we'll catch it down below. So just continue...
2542 */ 2407 */
2543 xlog_trace_loggrant(log, tic, 2408 trace_xfs_log_grant_wake1(log, tic);
2544 "xlog_grant_log_space: wake 1");
2545 spin_lock(&log->l_grant_lock); 2409 spin_lock(&log->l_grant_lock);
2546 } 2410 }
2547 if (tic->t_flags & XFS_LOG_PERM_RESERV) 2411 if (tic->t_flags & XFS_LOG_PERM_RESERV)
@@ -2558,8 +2422,9 @@ redo:
2558 if (free_bytes < need_bytes) { 2422 if (free_bytes < need_bytes) {
2559 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2423 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2560 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2424 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2561 xlog_trace_loggrant(log, tic, 2425
2562 "xlog_grant_log_space: sleep 2"); 2426 trace_xfs_log_grant_sleep2(log, tic);
2427
2563 spin_unlock(&log->l_grant_lock); 2428 spin_unlock(&log->l_grant_lock);
2564 xlog_grant_push_ail(log->l_mp, need_bytes); 2429 xlog_grant_push_ail(log->l_mp, need_bytes);
2565 spin_lock(&log->l_grant_lock); 2430 spin_lock(&log->l_grant_lock);
@@ -2571,8 +2436,8 @@ redo:
2571 if (XLOG_FORCED_SHUTDOWN(log)) 2436 if (XLOG_FORCED_SHUTDOWN(log))
2572 goto error_return; 2437 goto error_return;
2573 2438
2574 xlog_trace_loggrant(log, tic, 2439 trace_xfs_log_grant_wake2(log, tic);
2575 "xlog_grant_log_space: wake 2"); 2440
2576 goto redo; 2441 goto redo;
2577 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2442 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2578 xlog_del_ticketq(&log->l_reserve_headq, tic); 2443 xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2592,7 +2457,7 @@ redo:
2592 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); 2457 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2593 } 2458 }
2594#endif 2459#endif
2595 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); 2460 trace_xfs_log_grant_exit(log, tic);
2596 xlog_verify_grant_head(log, 1); 2461 xlog_verify_grant_head(log, 1);
2597 spin_unlock(&log->l_grant_lock); 2462 spin_unlock(&log->l_grant_lock);
2598 return 0; 2463 return 0;
@@ -2600,7 +2465,9 @@ redo:
2600 error_return: 2465 error_return:
2601 if (tic->t_flags & XLOG_TIC_IN_Q) 2466 if (tic->t_flags & XLOG_TIC_IN_Q)
2602 xlog_del_ticketq(&log->l_reserve_headq, tic); 2467 xlog_del_ticketq(&log->l_reserve_headq, tic);
2603 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2468
2469 trace_xfs_log_grant_error(log, tic);
2470
2604 /* 2471 /*
2605 * If we are failing, make sure the ticket doesn't have any 2472 * If we are failing, make sure the ticket doesn't have any
2606 * current reservations. We don't want to add this back when 2473 * current reservations. We don't want to add this back when
@@ -2640,7 +2507,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2640#endif 2507#endif
2641 2508
2642 spin_lock(&log->l_grant_lock); 2509 spin_lock(&log->l_grant_lock);
2643 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); 2510
2511 trace_xfs_log_regrant_write_enter(log, tic);
2644 2512
2645 if (XLOG_FORCED_SHUTDOWN(log)) 2513 if (XLOG_FORCED_SHUTDOWN(log))
2646 goto error_return; 2514 goto error_return;
@@ -2669,8 +2537,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2669 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2537 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2670 xlog_ins_ticketq(&log->l_write_headq, tic); 2538 xlog_ins_ticketq(&log->l_write_headq, tic);
2671 2539
2672 xlog_trace_loggrant(log, tic, 2540 trace_xfs_log_regrant_write_sleep1(log, tic);
2673 "xlog_regrant_write_log_space: sleep 1"); 2541
2674 spin_unlock(&log->l_grant_lock); 2542 spin_unlock(&log->l_grant_lock);
2675 xlog_grant_push_ail(log->l_mp, need_bytes); 2543 xlog_grant_push_ail(log->l_mp, need_bytes);
2676 spin_lock(&log->l_grant_lock); 2544 spin_lock(&log->l_grant_lock);
@@ -2685,8 +2553,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2685 if (XLOG_FORCED_SHUTDOWN(log)) 2553 if (XLOG_FORCED_SHUTDOWN(log))
2686 goto error_return; 2554 goto error_return;
2687 2555
2688 xlog_trace_loggrant(log, tic, 2556 trace_xfs_log_regrant_write_wake1(log, tic);
2689 "xlog_regrant_write_log_space: wake 1");
2690 } 2557 }
2691 } 2558 }
2692 2559
@@ -2704,6 +2571,8 @@ redo:
2704 spin_lock(&log->l_grant_lock); 2571 spin_lock(&log->l_grant_lock);
2705 2572
2706 XFS_STATS_INC(xs_sleep_logspace); 2573 XFS_STATS_INC(xs_sleep_logspace);
2574 trace_xfs_log_regrant_write_sleep2(log, tic);
2575
2707 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2576 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2708 2577
2709 /* If we're shutting down, this tic is already off the queue */ 2578 /* If we're shutting down, this tic is already off the queue */
@@ -2711,8 +2580,7 @@ redo:
2711 if (XLOG_FORCED_SHUTDOWN(log)) 2580 if (XLOG_FORCED_SHUTDOWN(log))
2712 goto error_return; 2581 goto error_return;
2713 2582
2714 xlog_trace_loggrant(log, tic, 2583 trace_xfs_log_regrant_write_wake2(log, tic);
2715 "xlog_regrant_write_log_space: wake 2");
2716 goto redo; 2584 goto redo;
2717 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2585 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2718 xlog_del_ticketq(&log->l_write_headq, tic); 2586 xlog_del_ticketq(&log->l_write_headq, tic);
@@ -2727,7 +2595,8 @@ redo:
2727 } 2595 }
2728#endif 2596#endif
2729 2597
2730 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); 2598 trace_xfs_log_regrant_write_exit(log, tic);
2599
2731 xlog_verify_grant_head(log, 1); 2600 xlog_verify_grant_head(log, 1);
2732 spin_unlock(&log->l_grant_lock); 2601 spin_unlock(&log->l_grant_lock);
2733 return 0; 2602 return 0;
@@ -2736,7 +2605,9 @@ redo:
2736 error_return: 2605 error_return:
2737 if (tic->t_flags & XLOG_TIC_IN_Q) 2606 if (tic->t_flags & XLOG_TIC_IN_Q)
2738 xlog_del_ticketq(&log->l_reserve_headq, tic); 2607 xlog_del_ticketq(&log->l_reserve_headq, tic);
2739 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2608
2609 trace_xfs_log_regrant_write_error(log, tic);
2610
2740 /* 2611 /*
2741 * If we are failing, make sure the ticket doesn't have any 2612 * If we are failing, make sure the ticket doesn't have any
2742 * current reservations. We don't want to add this back when 2613 * current reservations. We don't want to add this back when
@@ -2760,8 +2631,8 @@ STATIC void
2760xlog_regrant_reserve_log_space(xlog_t *log, 2631xlog_regrant_reserve_log_space(xlog_t *log,
2761 xlog_ticket_t *ticket) 2632 xlog_ticket_t *ticket)
2762{ 2633{
2763 xlog_trace_loggrant(log, ticket, 2634 trace_xfs_log_regrant_reserve_enter(log, ticket);
2764 "xlog_regrant_reserve_log_space: enter"); 2635
2765 if (ticket->t_cnt > 0) 2636 if (ticket->t_cnt > 0)
2766 ticket->t_cnt--; 2637 ticket->t_cnt--;
2767 2638
@@ -2769,8 +2640,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2769 xlog_grant_sub_space(log, ticket->t_curr_res); 2640 xlog_grant_sub_space(log, ticket->t_curr_res);
2770 ticket->t_curr_res = ticket->t_unit_res; 2641 ticket->t_curr_res = ticket->t_unit_res;
2771 xlog_tic_reset_res(ticket); 2642 xlog_tic_reset_res(ticket);
2772 xlog_trace_loggrant(log, ticket, 2643
2773 "xlog_regrant_reserve_log_space: sub current res"); 2644 trace_xfs_log_regrant_reserve_sub(log, ticket);
2645
2774 xlog_verify_grant_head(log, 1); 2646 xlog_verify_grant_head(log, 1);
2775 2647
2776 /* just return if we still have some of the pre-reserved space */ 2648 /* just return if we still have some of the pre-reserved space */
@@ -2780,8 +2652,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2780 } 2652 }
2781 2653
2782 xlog_grant_add_space_reserve(log, ticket->t_unit_res); 2654 xlog_grant_add_space_reserve(log, ticket->t_unit_res);
2783 xlog_trace_loggrant(log, ticket, 2655
2784 "xlog_regrant_reserve_log_space: exit"); 2656 trace_xfs_log_regrant_reserve_exit(log, ticket);
2657
2785 xlog_verify_grant_head(log, 0); 2658 xlog_verify_grant_head(log, 0);
2786 spin_unlock(&log->l_grant_lock); 2659 spin_unlock(&log->l_grant_lock);
2787 ticket->t_curr_res = ticket->t_unit_res; 2660 ticket->t_curr_res = ticket->t_unit_res;
@@ -2811,11 +2684,11 @@ xlog_ungrant_log_space(xlog_t *log,
2811 ticket->t_cnt--; 2684 ticket->t_cnt--;
2812 2685
2813 spin_lock(&log->l_grant_lock); 2686 spin_lock(&log->l_grant_lock);
2814 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2687 trace_xfs_log_ungrant_enter(log, ticket);
2815 2688
2816 xlog_grant_sub_space(log, ticket->t_curr_res); 2689 xlog_grant_sub_space(log, ticket->t_curr_res);
2817 2690
2818 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2691 trace_xfs_log_ungrant_sub(log, ticket);
2819 2692
2820 /* If this is a permanent reservation ticket, we may be able to free 2693 /* If this is a permanent reservation ticket, we may be able to free
2821 * up more space based on the remaining count. 2694 * up more space based on the remaining count.
@@ -2825,7 +2698,8 @@ xlog_ungrant_log_space(xlog_t *log,
2825 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); 2698 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
2826 } 2699 }
2827 2700
2828 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); 2701 trace_xfs_log_ungrant_exit(log, ticket);
2702
2829 xlog_verify_grant_head(log, 1); 2703 xlog_verify_grant_head(log, 1);
2830 spin_unlock(&log->l_grant_lock); 2704 spin_unlock(&log->l_grant_lock);
2831 xfs_log_move_tail(log->l_mp, 1); 2705 xfs_log_move_tail(log->l_mp, 1);
@@ -2927,7 +2801,6 @@ xlog_state_switch_iclogs(xlog_t *log,
2927 log->l_iclog = iclog->ic_next; 2801 log->l_iclog = iclog->ic_next;
2928} /* xlog_state_switch_iclogs */ 2802} /* xlog_state_switch_iclogs */
2929 2803
2930
2931/* 2804/*
2932 * Write out all data in the in-core log as of this exact moment in time. 2805 * Write out all data in the in-core log as of this exact moment in time.
2933 * 2806 *
@@ -2955,11 +2828,17 @@ xlog_state_switch_iclogs(xlog_t *log,
2955 * b) when we return from flushing out this iclog, it is still 2828 * b) when we return from flushing out this iclog, it is still
2956 * not in the active nor dirty state. 2829 * not in the active nor dirty state.
2957 */ 2830 */
2958STATIC int 2831int
2959xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) 2832_xfs_log_force(
2833 struct xfs_mount *mp,
2834 uint flags,
2835 int *log_flushed)
2960{ 2836{
2961 xlog_in_core_t *iclog; 2837 struct log *log = mp->m_log;
2962 xfs_lsn_t lsn; 2838 struct xlog_in_core *iclog;
2839 xfs_lsn_t lsn;
2840
2841 XFS_STATS_INC(xs_log_force);
2963 2842
2964 spin_lock(&log->l_icloglock); 2843 spin_lock(&log->l_icloglock);
2965 2844
@@ -3005,7 +2884,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
3005 2884
3006 if (xlog_state_release_iclog(log, iclog)) 2885 if (xlog_state_release_iclog(log, iclog))
3007 return XFS_ERROR(EIO); 2886 return XFS_ERROR(EIO);
3008 *log_flushed = 1; 2887
2888 if (log_flushed)
2889 *log_flushed = 1;
3009 spin_lock(&log->l_icloglock); 2890 spin_lock(&log->l_icloglock);
3010 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && 2891 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
3011 iclog->ic_state != XLOG_STATE_DIRTY) 2892 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -3049,19 +2930,37 @@ maybe_sleep:
3049 */ 2930 */
3050 if (iclog->ic_state & XLOG_STATE_IOERROR) 2931 if (iclog->ic_state & XLOG_STATE_IOERROR)
3051 return XFS_ERROR(EIO); 2932 return XFS_ERROR(EIO);
3052 *log_flushed = 1; 2933 if (log_flushed)
3053 2934 *log_flushed = 1;
3054 } else { 2935 } else {
3055 2936
3056no_sleep: 2937no_sleep:
3057 spin_unlock(&log->l_icloglock); 2938 spin_unlock(&log->l_icloglock);
3058 } 2939 }
3059 return 0; 2940 return 0;
3060} /* xlog_state_sync_all */ 2941}
2942
2943/*
2944 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
2945 * about errors or whether the log was flushed or not. This is the normal
2946 * interface to use when trying to unpin items or move the log forward.
2947 */
2948void
2949xfs_log_force(
2950 xfs_mount_t *mp,
2951 uint flags)
2952{
2953 int error;
3061 2954
2955 error = _xfs_log_force(mp, flags, NULL);
2956 if (error) {
2957 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
2958 "error %d returned.", error);
2959 }
2960}
3062 2961
3063/* 2962/*
3064 * Used by code which implements synchronous log forces. 2963 * Force the in-core log to disk for a specific LSN.
3065 * 2964 *
3066 * Find in-core log with lsn. 2965 * Find in-core log with lsn.
3067 * If it is in the DIRTY state, just return. 2966 * If it is in the DIRTY state, just return.
@@ -3069,109 +2968,142 @@ no_sleep:
3069 * state and go to sleep or return. 2968 * state and go to sleep or return.
3070 * If it is in any other state, go to sleep or return. 2969 * If it is in any other state, go to sleep or return.
3071 * 2970 *
3072 * If filesystem activity goes to zero, the iclog will get flushed only by 2971 * Synchronous forces are implemented with a signal variable. All callers
3073 * bdflush(). 2972 * to force a given lsn to disk will wait on a the sv attached to the
2973 * specific in-core log. When given in-core log finally completes its
2974 * write to disk, that thread will wake up all threads waiting on the
2975 * sv.
3074 */ 2976 */
3075STATIC int 2977int
3076xlog_state_sync(xlog_t *log, 2978_xfs_log_force_lsn(
3077 xfs_lsn_t lsn, 2979 struct xfs_mount *mp,
3078 uint flags, 2980 xfs_lsn_t lsn,
3079 int *log_flushed) 2981 uint flags,
2982 int *log_flushed)
3080{ 2983{
3081 xlog_in_core_t *iclog; 2984 struct log *log = mp->m_log;
3082 int already_slept = 0; 2985 struct xlog_in_core *iclog;
2986 int already_slept = 0;
3083 2987
3084try_again: 2988 ASSERT(lsn != 0);
3085 spin_lock(&log->l_icloglock);
3086 iclog = log->l_iclog;
3087 2989
3088 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2990 XFS_STATS_INC(xs_log_force);
3089 spin_unlock(&log->l_icloglock);
3090 return XFS_ERROR(EIO);
3091 }
3092
3093 do {
3094 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3095 iclog = iclog->ic_next;
3096 continue;
3097 }
3098 2991
3099 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2992try_again:
2993 spin_lock(&log->l_icloglock);
2994 iclog = log->l_iclog;
2995 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3100 spin_unlock(&log->l_icloglock); 2996 spin_unlock(&log->l_icloglock);
3101 return 0; 2997 return XFS_ERROR(EIO);
3102 } 2998 }
3103 2999
3104 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3000 do {
3105 /* 3001 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3106 * We sleep here if we haven't already slept (e.g. 3002 iclog = iclog->ic_next;
3107 * this is the first time we've looked at the correct 3003 continue;
3108 * iclog buf) and the buffer before us is going to 3004 }
3109 * be sync'ed. The reason for this is that if we 3005
3110 * are doing sync transactions here, by waiting for 3006 if (iclog->ic_state == XLOG_STATE_DIRTY) {
3111 * the previous I/O to complete, we can allow a few 3007 spin_unlock(&log->l_icloglock);
3112 * more transactions into this iclog before we close 3008 return 0;
3113 * it down. 3009 }
3114 * 3010
3115 * Otherwise, we mark the buffer WANT_SYNC, and bump 3011 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3116 * up the refcnt so we can release the log (which drops 3012 /*
3117 * the ref count). The state switch keeps new transaction 3013 * We sleep here if we haven't already slept (e.g.
3118 * commits from using this buffer. When the current commits 3014 * this is the first time we've looked at the correct
3119 * finish writing into the buffer, the refcount will drop to 3015 * iclog buf) and the buffer before us is going to
3120 * zero and the buffer will go out then. 3016 * be sync'ed. The reason for this is that if we
3121 */ 3017 * are doing sync transactions here, by waiting for
3122 if (!already_slept && 3018 * the previous I/O to complete, we can allow a few
3123 (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | 3019 * more transactions into this iclog before we close
3124 XLOG_STATE_SYNCING))) { 3020 * it down.
3125 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3021 *
3126 XFS_STATS_INC(xs_log_force_sleep); 3022 * Otherwise, we mark the buffer WANT_SYNC, and bump
3127 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, 3023 * up the refcnt so we can release the log (which
3128 &log->l_icloglock, s); 3024 * drops the ref count). The state switch keeps new
3129 *log_flushed = 1; 3025 * transaction commits from using this buffer. When
3130 already_slept = 1; 3026 * the current commits finish writing into the buffer,
3131 goto try_again; 3027 * the refcount will drop to zero and the buffer will
3132 } else { 3028 * go out then.
3029 */
3030 if (!already_slept &&
3031 (iclog->ic_prev->ic_state &
3032 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3033 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3034
3035 XFS_STATS_INC(xs_log_force_sleep);
3036
3037 sv_wait(&iclog->ic_prev->ic_write_wait,
3038 PSWP, &log->l_icloglock, s);
3039 if (log_flushed)
3040 *log_flushed = 1;
3041 already_slept = 1;
3042 goto try_again;
3043 }
3133 atomic_inc(&iclog->ic_refcnt); 3044 atomic_inc(&iclog->ic_refcnt);
3134 xlog_state_switch_iclogs(log, iclog, 0); 3045 xlog_state_switch_iclogs(log, iclog, 0);
3135 spin_unlock(&log->l_icloglock); 3046 spin_unlock(&log->l_icloglock);
3136 if (xlog_state_release_iclog(log, iclog)) 3047 if (xlog_state_release_iclog(log, iclog))
3137 return XFS_ERROR(EIO); 3048 return XFS_ERROR(EIO);
3138 *log_flushed = 1; 3049 if (log_flushed)
3050 *log_flushed = 1;
3139 spin_lock(&log->l_icloglock); 3051 spin_lock(&log->l_icloglock);
3140 } 3052 }
3141 }
3142 3053
3143 if ((flags & XFS_LOG_SYNC) && /* sleep */ 3054 if ((flags & XFS_LOG_SYNC) && /* sleep */
3144 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3055 !(iclog->ic_state &
3056 (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3057 /*
3058 * Don't wait on completion if we know that we've
3059 * gotten a log write error.
3060 */
3061 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3062 spin_unlock(&log->l_icloglock);
3063 return XFS_ERROR(EIO);
3064 }
3065 XFS_STATS_INC(xs_log_force_sleep);
3066 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3067 /*
3068 * No need to grab the log lock here since we're
3069 * only deciding whether or not to return EIO
3070 * and the memory read should be atomic.
3071 */
3072 if (iclog->ic_state & XLOG_STATE_IOERROR)
3073 return XFS_ERROR(EIO);
3145 3074
3146 /* 3075 if (log_flushed)
3147 * Don't wait on completion if we know that we've 3076 *log_flushed = 1;
3148 * gotten a log write error. 3077 } else { /* just return */
3149 */
3150 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3151 spin_unlock(&log->l_icloglock); 3078 spin_unlock(&log->l_icloglock);
3152 return XFS_ERROR(EIO);
3153 } 3079 }
3154 XFS_STATS_INC(xs_log_force_sleep);
3155 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3156 /*
3157 * No need to grab the log lock here since we're
3158 * only deciding whether or not to return EIO
3159 * and the memory read should be atomic.
3160 */
3161 if (iclog->ic_state & XLOG_STATE_IOERROR)
3162 return XFS_ERROR(EIO);
3163 *log_flushed = 1;
3164 } else { /* just return */
3165 spin_unlock(&log->l_icloglock);
3166 }
3167 return 0;
3168 3080
3169 } while (iclog != log->l_iclog); 3081 return 0;
3082 } while (iclog != log->l_iclog);
3083
3084 spin_unlock(&log->l_icloglock);
3085 return 0;
3086}
3170 3087
3171 spin_unlock(&log->l_icloglock); 3088/*
3172 return 0; 3089 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3173} /* xlog_state_sync */ 3090 * about errors or whether the log was flushed or not. This is the normal
3091 * interface to use when trying to unpin items or move the log forward.
3092 */
3093void
3094xfs_log_force_lsn(
3095 xfs_mount_t *mp,
3096 xfs_lsn_t lsn,
3097 uint flags)
3098{
3099 int error;
3174 3100
3101 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3102 if (error) {
3103 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3104 "error %d returned.", error);
3105 }
3106}
3175 3107
3176/* 3108/*
3177 * Called when we want to mark the current iclog as being ready to sync to 3109 * Called when we want to mark the current iclog as being ready to sync to
@@ -3536,7 +3468,6 @@ xfs_log_force_umount(
3536 xlog_ticket_t *tic; 3468 xlog_ticket_t *tic;
3537 xlog_t *log; 3469 xlog_t *log;
3538 int retval; 3470 int retval;
3539 int dummy;
3540 3471
3541 log = mp->m_log; 3472 log = mp->m_log;
3542 3473
@@ -3610,13 +3541,14 @@ xfs_log_force_umount(
3610 } 3541 }
3611 spin_unlock(&log->l_grant_lock); 3542 spin_unlock(&log->l_grant_lock);
3612 3543
3613 if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3544 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3614 ASSERT(!logerror); 3545 ASSERT(!logerror);
3615 /* 3546 /*
3616 * Force the incore logs to disk before shutting the 3547 * Force the incore logs to disk before shutting the
3617 * log down completely. 3548 * log down completely.
3618 */ 3549 */
3619 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); 3550 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3551
3620 spin_lock(&log->l_icloglock); 3552 spin_lock(&log->l_icloglock);
3621 retval = xlog_state_ioerror(log); 3553 retval = xlog_state_ioerror(log);
3622 spin_unlock(&log->l_icloglock); 3554 spin_unlock(&log->l_icloglock);