aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2013-06-27 02:04:52 -0400
committerBen Myers <bpm@sgi.com>2013-06-27 14:33:11 -0400
commit5f6bed76c0c85cb4d04885a5de00b629deee550b (patch)
treed2a39b931b367da3a797472c95d886d9e3b6c989 /fs/xfs
parentfd63875cc4cd60b9e5c609c24d75eaaad3e6d1c4 (diff)
xfs: Introduce an ordered buffer item
If we have a buffer that we have modified but we do not wish to physically log in a transaction (e.g. we've logged a logical change), we still need to ensure that transactional integrity is maintained. Hence we must not move the tail of the log past the transaction that the buffer is associated with before the buffer is written to disk. This means these special buffers still need to be included in the transaction and added to the AIL just like a normal buffer, but we do not want the modifications to the buffer written into the transaction. IOWs, what we want is an "ordered buffer" that maintains the same transactional life cycle as a physically logged buffer, just without the transcribing of the modifications to the log. Hence we need to flag the buffer as an "ordered buffer" to avoid including it in vector size calculations or formatting during the transaction. Once the transaction is committed, the buffer appears for all intents to be the same as a physically logged buffer as it transitions through the log and AIL. Relogging will also work just fine for such an ordered buffer - the logical transaction will be replayed before the subsequent modifications that relog the buffer, so everything will be reconstructed correctly by recovery. Signed-off-by: Dave Chinner <david@fromorbit.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_buf_item.c75
-rw-r--r--fs/xfs/xfs_buf_item.h4
-rw-r--r--fs/xfs/xfs_trace.h4
-rw-r--r--fs/xfs/xfs_trans.h1
-rw-r--r--fs/xfs/xfs_trans_buf.c34
5 files changed, 87 insertions, 31 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 4ec431777048..61f68768ee84 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -140,6 +140,16 @@ xfs_buf_item_size(
140 140
141 ASSERT(bip->bli_flags & XFS_BLI_LOGGED); 141 ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
142 142
143 if (bip->bli_flags & XFS_BLI_ORDERED) {
144 /*
145 * The buffer has been logged just to order it.
146 * It is not being included in the transaction
147 * commit, so no vectors are used at all.
148 */
149 trace_xfs_buf_item_size_ordered(bip);
150 return XFS_LOG_VEC_ORDERED;
151 }
152
143 /* 153 /*
144 * the vector count is based on the number of buffer vectors we have 154 * the vector count is based on the number of buffer vectors we have
145 * dirty bits in. This will only be greater than one when we have a 155 * dirty bits in. This will only be greater than one when we have a
@@ -212,6 +222,7 @@ xfs_buf_item_format_segment(
212 goto out; 222 goto out;
213 } 223 }
214 224
225
215 /* 226 /*
216 * Fill in an iovec for each set of contiguous chunks. 227 * Fill in an iovec for each set of contiguous chunks.
217 */ 228 */
@@ -311,6 +322,16 @@ xfs_buf_item_format(
311 bip->bli_flags &= ~XFS_BLI_INODE_BUF; 322 bip->bli_flags &= ~XFS_BLI_INODE_BUF;
312 } 323 }
313 324
325 if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) ==
326 XFS_BLI_ORDERED) {
327 /*
328 * The buffer has been logged just to order it. It is not being
329 * included in the transaction commit, so don't format it.
330 */
331 trace_xfs_buf_item_format_ordered(bip);
332 return;
333 }
334
314 for (i = 0; i < bip->bli_format_count; i++) { 335 for (i = 0; i < bip->bli_format_count; i++) {
315 vecp = xfs_buf_item_format_segment(bip, vecp, offset, 336 vecp = xfs_buf_item_format_segment(bip, vecp, offset,
316 &bip->bli_formats[i]); 337 &bip->bli_formats[i]);
@@ -340,6 +361,7 @@ xfs_buf_item_pin(
340 361
341 ASSERT(atomic_read(&bip->bli_refcount) > 0); 362 ASSERT(atomic_read(&bip->bli_refcount) > 0);
342 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 363 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
364 (bip->bli_flags & XFS_BLI_ORDERED) ||
343 (bip->bli_flags & XFS_BLI_STALE)); 365 (bip->bli_flags & XFS_BLI_STALE));
344 366
345 trace_xfs_buf_item_pin(bip); 367 trace_xfs_buf_item_pin(bip);
@@ -512,8 +534,9 @@ xfs_buf_item_unlock(
512{ 534{
513 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 535 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
514 struct xfs_buf *bp = bip->bli_buf; 536 struct xfs_buf *bp = bip->bli_buf;
515 int aborted, clean, i; 537 bool clean;
516 uint hold; 538 bool aborted;
539 int flags;
517 540
518 /* Clear the buffer's association with this transaction. */ 541 /* Clear the buffer's association with this transaction. */
519 bp->b_transp = NULL; 542 bp->b_transp = NULL;
@@ -524,23 +547,21 @@ xfs_buf_item_unlock(
524 * (cancelled) buffers at unpin time, but we'll never go through the 547 * (cancelled) buffers at unpin time, but we'll never go through the
525 * pin/unpin cycle if we abort inside commit. 548 * pin/unpin cycle if we abort inside commit.
526 */ 549 */
527 aborted = (lip->li_flags & XFS_LI_ABORTED) != 0; 550 aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false;
528
529 /* 551 /*
530 * Before possibly freeing the buf item, determine if we should 552 * Before possibly freeing the buf item, copy the per-transaction state
531 * release the buffer at the end of this routine. 553 * so we can reference it safely later after clearing it from the
554 * buffer log item.
532 */ 555 */
533 hold = bip->bli_flags & XFS_BLI_HOLD; 556 flags = bip->bli_flags;
534 557 bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
535 /* Clear the per transaction state. */
536 bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD);
537 558
538 /* 559 /*
539 * If the buf item is marked stale, then don't do anything. We'll 560 * If the buf item is marked stale, then don't do anything. We'll
540 * unlock the buffer and free the buf item when the buffer is unpinned 561 * unlock the buffer and free the buf item when the buffer is unpinned
541 * for the last time. 562 * for the last time.
542 */ 563 */
543 if (bip->bli_flags & XFS_BLI_STALE) { 564 if (flags & XFS_BLI_STALE) {
544 trace_xfs_buf_item_unlock_stale(bip); 565 trace_xfs_buf_item_unlock_stale(bip);
545 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 566 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
546 if (!aborted) { 567 if (!aborted) {
@@ -557,13 +578,19 @@ xfs_buf_item_unlock(
557 * be the only reference to the buf item, so we free it anyway 578 * be the only reference to the buf item, so we free it anyway
558 * regardless of whether it is dirty or not. A dirty abort implies a 579 * regardless of whether it is dirty or not. A dirty abort implies a
559 * shutdown, anyway. 580 * shutdown, anyway.
581 *
582 * Ordered buffers are dirty but may have no recorded changes, so ensure
583 * we only release clean items here.
560 */ 584 */
561 clean = 1; 585 clean = (flags & XFS_BLI_DIRTY) ? false : true;
562 for (i = 0; i < bip->bli_format_count; i++) { 586 if (clean) {
563 if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, 587 int i;
564 bip->bli_formats[i].blf_map_size)) { 588 for (i = 0; i < bip->bli_format_count; i++) {
565 clean = 0; 589 if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
566 break; 590 bip->bli_formats[i].blf_map_size)) {
591 clean = false;
592 break;
593 }
567 } 594 }
568 } 595 }
569 if (clean) 596 if (clean)
@@ -576,7 +603,7 @@ xfs_buf_item_unlock(
576 } else 603 } else
577 atomic_dec(&bip->bli_refcount); 604 atomic_dec(&bip->bli_refcount);
578 605
579 if (!hold) 606 if (!(flags & XFS_BLI_HOLD))
580 xfs_buf_relse(bp); 607 xfs_buf_relse(bp);
581} 608}
582 609
@@ -842,12 +869,6 @@ xfs_buf_item_log(
842 struct xfs_buf *bp = bip->bli_buf; 869 struct xfs_buf *bp = bip->bli_buf;
843 870
844 /* 871 /*
845 * Mark the item as having some dirty data for
846 * quick reference in xfs_buf_item_dirty.
847 */
848 bip->bli_flags |= XFS_BLI_DIRTY;
849
850 /*
851 * walk each buffer segment and mark them dirty appropriately. 872 * walk each buffer segment and mark them dirty appropriately.
852 */ 873 */
853 start = 0; 874 start = 0;
@@ -873,7 +894,7 @@ xfs_buf_item_log(
873 894
874 895
875/* 896/*
876 * Return 1 if the buffer has some data that has been logged (at any 897 * Return 1 if the buffer has been logged or ordered in a transaction (at any
877 * point, not just the current transaction) and 0 if not. 898 * point, not just the current transaction) and 0 if not.
878 */ 899 */
879uint 900uint
@@ -907,11 +928,11 @@ void
907xfs_buf_item_relse( 928xfs_buf_item_relse(
908 xfs_buf_t *bp) 929 xfs_buf_t *bp)
909{ 930{
910 xfs_buf_log_item_t *bip; 931 xfs_buf_log_item_t *bip = bp->b_fspriv;
911 932
912 trace_xfs_buf_item_relse(bp, _RET_IP_); 933 trace_xfs_buf_item_relse(bp, _RET_IP_);
934 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
913 935
914 bip = bp->b_fspriv;
915 bp->b_fspriv = bip->bli_item.li_bio_list; 936 bp->b_fspriv = bip->bli_item.li_bio_list;
916 if (bp->b_fspriv == NULL) 937 if (bp->b_fspriv == NULL)
917 bp->b_iodone = NULL; 938 bp->b_iodone = NULL;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 2573d2a75fc8..0f1c247dc680 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -120,6 +120,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
120#define XFS_BLI_INODE_ALLOC_BUF 0x10 120#define XFS_BLI_INODE_ALLOC_BUF 0x10
121#define XFS_BLI_STALE_INODE 0x20 121#define XFS_BLI_STALE_INODE 0x20
122#define XFS_BLI_INODE_BUF 0x40 122#define XFS_BLI_INODE_BUF 0x40
123#define XFS_BLI_ORDERED 0x80
123 124
124#define XFS_BLI_FLAGS \ 125#define XFS_BLI_FLAGS \
125 { XFS_BLI_HOLD, "HOLD" }, \ 126 { XFS_BLI_HOLD, "HOLD" }, \
@@ -128,7 +129,8 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
128 { XFS_BLI_LOGGED, "LOGGED" }, \ 129 { XFS_BLI_LOGGED, "LOGGED" }, \
129 { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ 130 { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
130 { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ 131 { XFS_BLI_STALE_INODE, "STALE_INODE" }, \
131 { XFS_BLI_INODE_BUF, "INODE_BUF" } 132 { XFS_BLI_INODE_BUF, "INODE_BUF" }, \
133 { XFS_BLI_ORDERED, "ORDERED" }
132 134
133 135
134#ifdef __KERNEL__ 136#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e31867270077..ee8b3a3b5d65 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -486,9 +486,12 @@ DEFINE_EVENT(xfs_buf_item_class, name, \
486 TP_PROTO(struct xfs_buf_log_item *bip), \ 486 TP_PROTO(struct xfs_buf_log_item *bip), \
487 TP_ARGS(bip)) 487 TP_ARGS(bip))
488DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); 488DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
489DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered);
489DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); 490DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
490DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); 491DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
492DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered);
491DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); 493DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
494DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
492DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); 495DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
493DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); 496DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
494DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); 497DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
@@ -508,6 +511,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
508DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); 511DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
509DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); 512DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
510DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); 513DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
514DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);
511 515
512DECLARE_EVENT_CLASS(xfs_lock_class, 516DECLARE_EVENT_CLASS(xfs_lock_class,
513 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, 517 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 6d526569820c..822570ec605a 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -498,6 +498,7 @@ void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
498void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); 498void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
499void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 499void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
500void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 500void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
501void xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
501void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 502void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
502void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 503void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
503void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); 504void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 73a5fa457e16..aa5a04b844d6 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -397,7 +397,6 @@ shutdown_abort:
397 return XFS_ERROR(EIO); 397 return XFS_ERROR(EIO);
398} 398}
399 399
400
401/* 400/*
402 * Release the buffer bp which was previously acquired with one of the 401 * Release the buffer bp which was previously acquired with one of the
403 * xfs_trans_... buffer allocation routines if the buffer has not 402 * xfs_trans_... buffer allocation routines if the buffer has not
@@ -603,8 +602,14 @@ xfs_trans_log_buf(xfs_trans_t *tp,
603 602
604 tp->t_flags |= XFS_TRANS_DIRTY; 603 tp->t_flags |= XFS_TRANS_DIRTY;
605 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; 604 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
606 bip->bli_flags |= XFS_BLI_LOGGED; 605
607 xfs_buf_item_log(bip, first, last); 606 /*
607 * If we have an ordered buffer we are not logging any dirty range but
608 * it still needs to be marked dirty and that it has been logged.
609 */
610 bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
611 if (!(bip->bli_flags & XFS_BLI_ORDERED))
612 xfs_buf_item_log(bip, first, last);
608} 613}
609 614
610 615
@@ -757,6 +762,29 @@ xfs_trans_inode_alloc_buf(
757} 762}
758 763
759/* 764/*
765 * Mark the buffer as ordered for this transaction. This means
766 * that the contents of the buffer are not recorded in the transaction
767 * but it is tracked in the AIL as though it was. This allows us
768 * to record logical changes in transactions rather than the physical
769 * changes we make to the buffer without changing writeback ordering
770 * constraints of metadata buffers.
771 */
772void
773xfs_trans_ordered_buf(
774 struct xfs_trans *tp,
775 struct xfs_buf *bp)
776{
777 struct xfs_buf_log_item *bip = bp->b_fspriv;
778
779 ASSERT(bp->b_transp == tp);
780 ASSERT(bip != NULL);
781 ASSERT(atomic_read(&bip->bli_refcount) > 0);
782
783 bip->bli_flags |= XFS_BLI_ORDERED;
784 trace_xfs_buf_item_ordered(bip);
785}
786
787/*
760 * Set the type of the buffer for log recovery so that it can correctly identify 788 * Set the type of the buffer for log recovery so that it can correctly identify
761 * and hence attach the correct buffer ops to the buffer after replay. 789 * and hence attach the correct buffer ops to the buffer after replay.
762 */ 790 */