diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 16 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 11 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 19 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 37 |
8 files changed, 48 insertions, 64 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 6838aefca71f..f3ccaec5760a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -924,19 +924,7 @@ xfs_buf_iodone_work( | |||
924 | xfs_buf_t *bp = | 924 | xfs_buf_t *bp = |
925 | container_of(work, xfs_buf_t, b_iodone_work); | 925 | container_of(work, xfs_buf_t, b_iodone_work); |
926 | 926 | ||
927 | /* | 927 | if (bp->b_iodone) |
928 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | ||
929 | * ordered flag and reissue them. Because we can't tell the higher | ||
930 | * layers directly that they should not issue ordered I/O anymore, they | ||
931 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. | ||
932 | */ | ||
933 | if ((bp->b_error == EOPNOTSUPP) && | ||
934 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | ||
935 | trace_xfs_buf_ordered_retry(bp, _RET_IP_); | ||
936 | bp->b_flags &= ~XBF_ORDERED; | ||
937 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
938 | xfs_buf_iorequest(bp); | ||
939 | } else if (bp->b_iodone) | ||
940 | (*(bp->b_iodone))(bp); | 928 | (*(bp->b_iodone))(bp); |
941 | else if (bp->b_flags & XBF_ASYNC) | 929 | else if (bp->b_flags & XBF_ASYNC) |
942 | xfs_buf_relse(bp); | 930 | xfs_buf_relse(bp); |
@@ -1195,7 +1183,7 @@ _xfs_buf_ioapply( | |||
1195 | 1183 | ||
1196 | if (bp->b_flags & XBF_ORDERED) { | 1184 | if (bp->b_flags & XBF_ORDERED) { |
1197 | ASSERT(!(bp->b_flags & XBF_READ)); | 1185 | ASSERT(!(bp->b_flags & XBF_READ)); |
1198 | rw = WRITE_BARRIER; | 1186 | rw = WRITE_FLUSH_FUA; |
1199 | } else if (bp->b_flags & XBF_LOG_BUFFER) { | 1187 | } else if (bp->b_flags & XBF_LOG_BUFFER) { |
1200 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); | 1188 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); |
1201 | bp->b_flags &= ~_XBF_RUN_QUEUES; | 1189 | bp->b_flags &= ~_XBF_RUN_QUEUES; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 2a05614f0b92..9d021c73ea52 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -86,14 +86,6 @@ typedef enum { | |||
86 | */ | 86 | */ |
87 | #define _XBF_PAGE_LOCKED (1 << 22) | 87 | #define _XBF_PAGE_LOCKED (1 << 22) |
88 | 88 | ||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | #define _XFS_BARRIER_FAILED (1 << 23) | ||
96 | |||
97 | typedef unsigned int xfs_buf_flags_t; | 89 | typedef unsigned int xfs_buf_flags_t; |
98 | 90 | ||
99 | #define XFS_BUF_FLAGS \ | 91 | #define XFS_BUF_FLAGS \ |
@@ -114,8 +106,7 @@ typedef unsigned int xfs_buf_flags_t; | |||
114 | { _XBF_PAGES, "PAGES" }, \ | 106 | { _XBF_PAGES, "PAGES" }, \ |
115 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ | 107 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ |
116 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 108 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
117 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ | 109 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } |
118 | { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } | ||
119 | 110 | ||
120 | 111 | ||
121 | typedef enum { | 112 | typedef enum { |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a4e07974955b..08fd3102128c 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -693,8 +693,7 @@ void | |||
693 | xfs_blkdev_issue_flush( | 693 | xfs_blkdev_issue_flush( |
694 | xfs_buftarg_t *buftarg) | 694 | xfs_buftarg_t *buftarg) |
695 | { | 695 | { |
696 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, | 696 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); |
697 | BLKDEV_IFL_WAIT); | ||
698 | } | 697 | } |
699 | 698 | ||
700 | STATIC void | 699 | STATIC void |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d59c4a65d492..81976ffed7d6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -668,14 +668,11 @@ xfs_inode_set_reclaim_tag( | |||
668 | xfs_perag_put(pag); | 668 | xfs_perag_put(pag); |
669 | } | 669 | } |
670 | 670 | ||
671 | void | 671 | STATIC void |
672 | __xfs_inode_clear_reclaim_tag( | 672 | __xfs_inode_clear_reclaim( |
673 | xfs_mount_t *mp, | ||
674 | xfs_perag_t *pag, | 673 | xfs_perag_t *pag, |
675 | xfs_inode_t *ip) | 674 | xfs_inode_t *ip) |
676 | { | 675 | { |
677 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
678 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
679 | pag->pag_ici_reclaimable--; | 676 | pag->pag_ici_reclaimable--; |
680 | if (!pag->pag_ici_reclaimable) { | 677 | if (!pag->pag_ici_reclaimable) { |
681 | /* clear the reclaim tag from the perag radix tree */ | 678 | /* clear the reclaim tag from the perag radix tree */ |
@@ -689,6 +686,17 @@ __xfs_inode_clear_reclaim_tag( | |||
689 | } | 686 | } |
690 | } | 687 | } |
691 | 688 | ||
689 | void | ||
690 | __xfs_inode_clear_reclaim_tag( | ||
691 | xfs_mount_t *mp, | ||
692 | xfs_perag_t *pag, | ||
693 | xfs_inode_t *ip) | ||
694 | { | ||
695 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
696 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
697 | __xfs_inode_clear_reclaim(pag, ip); | ||
698 | } | ||
699 | |||
692 | /* | 700 | /* |
693 | * Inodes in different states need to be treated differently, and the return | 701 | * Inodes in different states need to be treated differently, and the return |
694 | * value of xfs_iflush is not sufficient to get this right. The following table | 702 | * value of xfs_iflush is not sufficient to get this right. The following table |
@@ -838,6 +846,7 @@ reclaim: | |||
838 | if (!radix_tree_delete(&pag->pag_ici_root, | 846 | if (!radix_tree_delete(&pag->pag_ici_root, |
839 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 847 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
840 | ASSERT(0); | 848 | ASSERT(0); |
849 | __xfs_inode_clear_reclaim(pag, ip); | ||
841 | write_unlock(&pag->pag_ici_lock); | 850 | write_unlock(&pag->pag_ici_lock); |
842 | 851 | ||
843 | /* | 852 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index be5dffd282a1..8fe311a456e2 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -325,7 +325,6 @@ DEFINE_BUF_EVENT(xfs_buf_lock); | |||
325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); | 325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); |
326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); | 326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); |
327 | DEFINE_BUF_EVENT(xfs_buf_unlock); | 327 | DEFINE_BUF_EVENT(xfs_buf_unlock); |
328 | DEFINE_BUF_EVENT(xfs_buf_ordered_retry); | ||
329 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
330 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 33f718f92a48..ba8e36e0b4e7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -917,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp) | |||
917 | l = iclog->ic_log; | 917 | l = iclog->ic_log; |
918 | 918 | ||
919 | /* | 919 | /* |
920 | * If the _XFS_BARRIER_FAILED flag was set by a lower | ||
921 | * layer, it means the underlying device no longer supports | ||
922 | * barrier I/O. Warn loudly and turn off barriers. | ||
923 | */ | ||
924 | if (bp->b_flags & _XFS_BARRIER_FAILED) { | ||
925 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
926 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
927 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | ||
928 | "xlog_iodone: Barriers are no longer supported" | ||
929 | " by device. Disabling barriers\n"); | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * Race to shutdown the filesystem if we see an error. | 920 | * Race to shutdown the filesystem if we see an error. |
934 | */ | 921 | */ |
935 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, | 922 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ed575fb4b495..7e206fc1fa36 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -405,9 +405,15 @@ xlog_cil_push( | |||
405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
407 | 407 | ||
408 | /* lock out transaction commit, but don't block on background push */ | 408 | /* |
409 | * Lock out transaction commit, but don't block for background pushes | ||
410 | * unless we are well over the CIL space limit. See the definition of | ||
411 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
412 | * used here. | ||
413 | */ | ||
409 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 414 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
410 | if (!push_seq) | 415 | if (!push_seq && |
416 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
411 | goto out_free_ticket; | 417 | goto out_free_ticket; |
412 | down_write(&cil->xc_ctx_lock); | 418 | down_write(&cil->xc_ctx_lock); |
413 | } | 419 | } |
@@ -422,7 +428,7 @@ xlog_cil_push( | |||
422 | goto out_skip; | 428 | goto out_skip; |
423 | 429 | ||
424 | /* check for a previously pushed seqeunce */ | 430 | /* check for a previously pushed seqeunce */ |
425 | if (push_seq < cil->xc_ctx->sequence) | 431 | if (push_seq && push_seq < cil->xc_ctx->sequence) |
426 | goto out_skip; | 432 | goto out_skip; |
427 | 433 | ||
428 | /* | 434 | /* |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ced52b98b322..edcdfe01617f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -426,13 +426,13 @@ struct xfs_cil { | |||
426 | }; | 426 | }; |
427 | 427 | ||
428 | /* | 428 | /* |
429 | * The amount of log space we should the CIL to aggregate is difficult to size. | 429 | * The amount of log space we allow the CIL to aggregate is difficult to size. |
430 | * Whatever we chose we have to make we can get a reservation for the log space | 430 | * Whatever we choose, we have to make sure we can get a reservation for the |
431 | * effectively, that it is large enough to capture sufficient relogging to | 431 | * log space effectively, that it is large enough to capture sufficient |
432 | * reduce log buffer IO significantly, but it is not too large for the log or | 432 | * relogging to reduce log buffer IO significantly, but it is not too large for |
433 | * induces too much latency when writing out through the iclogs. We track both | 433 | * the log or induces too much latency when writing out through the iclogs. We |
434 | * space consumed and the number of vectors in the checkpoint context, so we | 434 | * track both space consumed and the number of vectors in the checkpoint |
435 | * need to decide which to use for limiting. | 435 | * context, so we need to decide which to use for limiting. |
436 | * | 436 | * |
437 | * Every log buffer we write out during a push needs a header reserved, which | 437 | * Every log buffer we write out during a push needs a header reserved, which |
438 | * is at least one sector and more for v2 logs. Hence we need a reservation of | 438 | * is at least one sector and more for v2 logs. Hence we need a reservation of |
@@ -459,16 +459,21 @@ struct xfs_cil { | |||
459 | * checkpoint transaction ticket is specific to the checkpoint context, rather | 459 | * checkpoint transaction ticket is specific to the checkpoint context, rather |
460 | * than the CIL itself. | 460 | * than the CIL itself. |
461 | * | 461 | * |
462 | * With dynamic reservations, we can basically make up arbitrary limits for the | 462 | * With dynamic reservations, we can effectively make up arbitrary limits for |
463 | * checkpoint size so long as they don't violate any other size rules. Hence | 463 | * the checkpoint size so long as they don't violate any other size rules. |
464 | * the initial maximum size for the checkpoint transaction will be set to a | 464 | * Recovery imposes a rule that no transaction exceed half the log, so we are |
465 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | 465 | * limited by that. Furthermore, the log transaction reservation subsystem |
466 | * right now based on the latency of writing out a large amount of data through | 466 | * tries to keep 25% of the log free, so we need to keep below that limit or we |
467 | * the circular iclog buffers. | 467 | * risk running out of free log space to start any new transactions. |
468 | * | ||
469 | * In order to keep background CIL push efficient, we will set a lower | ||
470 | * threshold at which background pushing is attempted without blocking current | ||
471 | * transaction commits. A separate, higher bound defines when CIL pushes are | ||
472 | * enforced to ensure we stay within our maximum checkpoint size bounds. | ||
473 | * threshold, yet give us plenty of space for aggregation on large logs. | ||
468 | */ | 474 | */ |
469 | 475 | #define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) | |
470 | #define XLOG_CIL_SPACE_LIMIT(log) \ | 476 | #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) |
471 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
472 | 477 | ||
473 | /* | 478 | /* |
474 | * The reservation head lsn is not made up of a cycle number and block number. | 479 | * The reservation head lsn is not made up of a cycle number and block number. |