diff options
Diffstat (limited to 'fs/xfs/xfs_log_priv.h')
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 118 |
1 files changed, 114 insertions, 4 deletions
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 9cf695154451..8c072618965c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -152,8 +152,6 @@ static inline uint xlog_get_client_id(__be32 i) | |||
152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
154 | shutdown */ | 154 | shutdown */ |
155 | typedef __uint32_t xlog_tid_t; | ||
156 | |||
157 | 155 | ||
158 | #ifdef __KERNEL__ | 156 | #ifdef __KERNEL__ |
159 | /* | 157 | /* |
@@ -379,6 +377,99 @@ typedef struct xlog_in_core { | |||
379 | } xlog_in_core_t; | 377 | } xlog_in_core_t; |
380 | 378 | ||
381 | /* | 379 | /* |
380 | * The CIL context is used to aggregate per-transaction details as well be | ||
381 | * passed to the iclog for checkpoint post-commit processing. After being | ||
382 | * passed to the iclog, another context needs to be allocated for tracking the | ||
383 | * next set of transactions to be aggregated into a checkpoint. | ||
384 | */ | ||
385 | struct xfs_cil; | ||
386 | |||
387 | struct xfs_cil_ctx { | ||
388 | struct xfs_cil *cil; | ||
389 | xfs_lsn_t sequence; /* chkpt sequence # */ | ||
390 | xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ | ||
391 | xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ | ||
392 | struct xlog_ticket *ticket; /* chkpt ticket */ | ||
393 | int nvecs; /* number of regions */ | ||
394 | int space_used; /* aggregate size of regions */ | ||
395 | struct list_head busy_extents; /* busy extents in chkpt */ | ||
396 | struct xfs_log_vec *lv_chain; /* logvecs being pushed */ | ||
397 | xfs_log_callback_t log_cb; /* completion callback hook. */ | ||
398 | struct list_head committing; /* ctx committing list */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Committed Item List structure | ||
403 | * | ||
404 | * This structure is used to track log items that have been committed but not | ||
405 | * yet written into the log. It is used only when the delayed logging mount | ||
406 | * option is enabled. | ||
407 | * | ||
408 | * This structure tracks the list of committing checkpoint contexts so | ||
409 | * we can avoid the problem of having to hold out new transactions during a | ||
410 | * flush until we have a the commit record LSN of the checkpoint. We can | ||
411 | * traverse the list of committing contexts in xlog_cil_push_lsn() to find a | ||
412 | * sequence match and extract the commit LSN directly from there. If the | ||
413 | * checkpoint is still in the process of committing, we can block waiting for | ||
414 | * the commit LSN to be determined as well. This should make synchronous | ||
415 | * operations almost as efficient as the old logging methods. | ||
416 | */ | ||
417 | struct xfs_cil { | ||
418 | struct log *xc_log; | ||
419 | struct list_head xc_cil; | ||
420 | spinlock_t xc_cil_lock; | ||
421 | struct xfs_cil_ctx *xc_ctx; | ||
422 | struct rw_semaphore xc_ctx_lock; | ||
423 | struct list_head xc_committing; | ||
424 | sv_t xc_commit_wait; | ||
425 | }; | ||
426 | |||
427 | /* | ||
428 | * The amount of log space we should the CIL to aggregate is difficult to size. | ||
429 | * Whatever we chose we have to make we can get a reservation for the log space | ||
430 | * effectively, that it is large enough to capture sufficient relogging to | ||
431 | * reduce log buffer IO significantly, but it is not too large for the log or | ||
432 | * induces too much latency when writing out through the iclogs. We track both | ||
433 | * space consumed and the number of vectors in the checkpoint context, so we | ||
434 | * need to decide which to use for limiting. | ||
435 | * | ||
436 | * Every log buffer we write out during a push needs a header reserved, which | ||
437 | * is at least one sector and more for v2 logs. Hence we need a reservation of | ||
438 | * at least 512 bytes per 32k of log space just for the LR headers. That means | ||
439 | * 16KB of reservation per megabyte of delayed logging space we will consume, | ||
440 | * plus various headers. The number of headers will vary based on the num of | ||
441 | * io vectors, so limiting on a specific number of vectors is going to result | ||
442 | * in transactions of varying size. IOWs, it is more consistent to track and | ||
443 | * limit space consumed in the log rather than by the number of objects being | ||
444 | * logged in order to prevent checkpoint ticket overruns. | ||
445 | * | ||
446 | * Further, use of static reservations through the log grant mechanism is | ||
447 | * problematic. It introduces a lot of complexity (e.g. reserve grant vs write | ||
448 | * grant) and a significant deadlock potential because regranting write space | ||
449 | * can block on log pushes. Hence if we have to regrant log space during a log | ||
450 | * push, we can deadlock. | ||
451 | * | ||
452 | * However, we can avoid this by use of a dynamic "reservation stealing" | ||
453 | * technique during transaction commit whereby unused reservation space in the | ||
454 | * transaction ticket is transferred to the CIL ctx commit ticket to cover the | ||
455 | * space needed by the checkpoint transaction. This means that we never need to | ||
456 | * specifically reserve space for the CIL checkpoint transaction, nor do we | ||
457 | * need to regrant space once the checkpoint completes. This also means the | ||
458 | * checkpoint transaction ticket is specific to the checkpoint context, rather | ||
459 | * than the CIL itself. | ||
460 | * | ||
461 | * With dynamic reservations, we can basically make up arbitrary limits for the | ||
462 | * checkpoint size so long as they don't violate any other size rules. Hence | ||
463 | * the initial maximum size for the checkpoint transaction will be set to a | ||
464 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | ||
465 | * right now based on the latency of writing out a large amount of data through | ||
466 | * the circular iclog buffers. | ||
467 | */ | ||
468 | |||
469 | #define XLOG_CIL_SPACE_LIMIT(log) \ | ||
470 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
471 | |||
472 | /* | ||
382 | * The reservation head lsn is not made up of a cycle number and block number. | 473 | * The reservation head lsn is not made up of a cycle number and block number. |
383 | * Instead, it uses a cycle number and byte number. Logs don't expect to | 474 | * Instead, it uses a cycle number and byte number. Logs don't expect to |
384 | * overflow 31 bits worth of byte offset, so using a byte number will mean | 475 | * overflow 31 bits worth of byte offset, so using a byte number will mean |
@@ -388,6 +479,7 @@ typedef struct log { | |||
388 | /* The following fields don't need locking */ | 479 | /* The following fields don't need locking */ |
389 | struct xfs_mount *l_mp; /* mount point */ | 480 | struct xfs_mount *l_mp; /* mount point */ |
390 | struct xfs_ail *l_ailp; /* AIL log is working with */ | 481 | struct xfs_ail *l_ailp; /* AIL log is working with */ |
482 | struct xfs_cil *l_cilp; /* CIL log is working with */ | ||
391 | struct xfs_buf *l_xbuf; /* extra buffer for log | 483 | struct xfs_buf *l_xbuf; /* extra buffer for log |
392 | * wrapping */ | 484 | * wrapping */ |
393 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 485 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
@@ -438,14 +530,17 @@ typedef struct log { | |||
438 | 530 | ||
439 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 531 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
440 | 532 | ||
441 | |||
442 | /* common routines */ | 533 | /* common routines */ |
443 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); | 534 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); |
444 | extern int xlog_recover(xlog_t *log); | 535 | extern int xlog_recover(xlog_t *log); |
445 | extern int xlog_recover_finish(xlog_t *log); | 536 | extern int xlog_recover_finish(xlog_t *log); |
446 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 537 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
447 | 538 | ||
448 | extern kmem_zone_t *xfs_log_ticket_zone; | 539 | extern kmem_zone_t *xfs_log_ticket_zone; |
540 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, | ||
541 | int count, char client, uint xflags, | ||
542 | int alloc_flags); | ||
543 | |||
449 | 544 | ||
450 | static inline void | 545 | static inline void |
451 | xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) | 546 | xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) |
@@ -455,6 +550,21 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) | |||
455 | *off += bytes; | 550 | *off += bytes; |
456 | } | 551 | } |
457 | 552 | ||
553 | void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); | ||
554 | int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | ||
555 | struct xlog_ticket *tic, xfs_lsn_t *start_lsn, | ||
556 | xlog_in_core_t **commit_iclog, uint flags); | ||
557 | |||
558 | /* | ||
559 | * Committed Item List interfaces | ||
560 | */ | ||
561 | int xlog_cil_init(struct log *log); | ||
562 | void xlog_cil_init_post_recovery(struct log *log); | ||
563 | void xlog_cil_destroy(struct log *log); | ||
564 | |||
565 | int xlog_cil_push(struct log *log, int push_now); | ||
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | ||
567 | |||
458 | /* | 568 | /* |
459 | * Unmount record type is used as a pseudo transaction type for the ticket. | 569 | * Unmount record type is used as a pseudo transaction type for the ticket. |
460 | * It's value must be outside the range of XFS_TRANS_* values. | 570 | * It's value must be outside the range of XFS_TRANS_* values. |