aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h11
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_log.c13
-rw-r--r--fs/xfs/xfs_log_cil.c12
-rw-r--r--fs/xfs/xfs_log_priv.h37
8 files changed, 48 insertions, 64 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6838aefca71f..f3ccaec5760a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -924,19 +924,7 @@ xfs_buf_iodone_work(
924 xfs_buf_t *bp = 924 xfs_buf_t *bp =
925 container_of(work, xfs_buf_t, b_iodone_work); 925 container_of(work, xfs_buf_t, b_iodone_work);
926 926
927 /* 927 if (bp->b_iodone)
928 * We can get an EOPNOTSUPP to ordered writes. Here we clear the
929 * ordered flag and reissue them. Because we can't tell the higher
930 * layers directly that they should not issue ordered I/O anymore, they
931 * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
932 */
933 if ((bp->b_error == EOPNOTSUPP) &&
934 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
935 trace_xfs_buf_ordered_retry(bp, _RET_IP_);
936 bp->b_flags &= ~XBF_ORDERED;
937 bp->b_flags |= _XFS_BARRIER_FAILED;
938 xfs_buf_iorequest(bp);
939 } else if (bp->b_iodone)
940 (*(bp->b_iodone))(bp); 928 (*(bp->b_iodone))(bp);
941 else if (bp->b_flags & XBF_ASYNC) 929 else if (bp->b_flags & XBF_ASYNC)
942 xfs_buf_relse(bp); 930 xfs_buf_relse(bp);
@@ -1195,7 +1183,7 @@ _xfs_buf_ioapply(
1195 1183
1196 if (bp->b_flags & XBF_ORDERED) { 1184 if (bp->b_flags & XBF_ORDERED) {
1197 ASSERT(!(bp->b_flags & XBF_READ)); 1185 ASSERT(!(bp->b_flags & XBF_READ));
1198 rw = WRITE_BARRIER; 1186 rw = WRITE_FLUSH_FUA;
1199 } else if (bp->b_flags & XBF_LOG_BUFFER) { 1187 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1200 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1188 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1201 bp->b_flags &= ~_XBF_RUN_QUEUES; 1189 bp->b_flags &= ~_XBF_RUN_QUEUES;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 2a05614f0b92..9d021c73ea52 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -86,14 +86,6 @@ typedef enum {
86 */ 86 */
87#define _XBF_PAGE_LOCKED (1 << 22) 87#define _XBF_PAGE_LOCKED (1 << 22)
88 88
89/*
90 * If we try a barrier write, but it fails we have to communicate
91 * this to the upper layers. Unfortunately b_error gets overwritten
92 * when the buffer is re-issued so we have to add another flag to
93 * keep this information.
94 */
95#define _XFS_BARRIER_FAILED (1 << 23)
96
97typedef unsigned int xfs_buf_flags_t; 89typedef unsigned int xfs_buf_flags_t;
98 90
99#define XFS_BUF_FLAGS \ 91#define XFS_BUF_FLAGS \
@@ -114,8 +106,7 @@ typedef unsigned int xfs_buf_flags_t;
114 { _XBF_PAGES, "PAGES" }, \ 106 { _XBF_PAGES, "PAGES" }, \
115 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ 107 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
116 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 108 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
117 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ 109 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }
118 { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
119 110
120 111
121typedef enum { 112typedef enum {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a4e07974955b..08fd3102128c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -693,8 +693,7 @@ void
693xfs_blkdev_issue_flush( 693xfs_blkdev_issue_flush(
694 xfs_buftarg_t *buftarg) 694 xfs_buftarg_t *buftarg)
695{ 695{
696 blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, 696 blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
697 BLKDEV_IFL_WAIT);
698} 697}
699 698
700STATIC void 699STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index d59c4a65d492..81976ffed7d6 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -668,14 +668,11 @@ xfs_inode_set_reclaim_tag(
668 xfs_perag_put(pag); 668 xfs_perag_put(pag);
669} 669}
670 670
671void 671STATIC void
672__xfs_inode_clear_reclaim_tag( 672__xfs_inode_clear_reclaim(
673 xfs_mount_t *mp,
674 xfs_perag_t *pag, 673 xfs_perag_t *pag,
675 xfs_inode_t *ip) 674 xfs_inode_t *ip)
676{ 675{
677 radix_tree_tag_clear(&pag->pag_ici_root,
678 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
679 pag->pag_ici_reclaimable--; 676 pag->pag_ici_reclaimable--;
680 if (!pag->pag_ici_reclaimable) { 677 if (!pag->pag_ici_reclaimable) {
681 /* clear the reclaim tag from the perag radix tree */ 678 /* clear the reclaim tag from the perag radix tree */
@@ -689,6 +686,17 @@ __xfs_inode_clear_reclaim_tag(
689 } 686 }
690} 687}
691 688
689void
690__xfs_inode_clear_reclaim_tag(
691 xfs_mount_t *mp,
692 xfs_perag_t *pag,
693 xfs_inode_t *ip)
694{
695 radix_tree_tag_clear(&pag->pag_ici_root,
696 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
697 __xfs_inode_clear_reclaim(pag, ip);
698}
699
692/* 700/*
693 * Inodes in different states need to be treated differently, and the return 701 * Inodes in different states need to be treated differently, and the return
694 * value of xfs_iflush is not sufficient to get this right. The following table 702 * value of xfs_iflush is not sufficient to get this right. The following table
@@ -838,6 +846,7 @@ reclaim:
838 if (!radix_tree_delete(&pag->pag_ici_root, 846 if (!radix_tree_delete(&pag->pag_ici_root,
839 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 847 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
840 ASSERT(0); 848 ASSERT(0);
849 __xfs_inode_clear_reclaim(pag, ip);
841 write_unlock(&pag->pag_ici_lock); 850 write_unlock(&pag->pag_ici_lock);
842 851
843 /* 852 /*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index be5dffd282a1..8fe311a456e2 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -325,7 +325,6 @@ DEFINE_BUF_EVENT(xfs_buf_lock);
325DEFINE_BUF_EVENT(xfs_buf_lock_done); 325DEFINE_BUF_EVENT(xfs_buf_lock_done);
326DEFINE_BUF_EVENT(xfs_buf_cond_lock); 326DEFINE_BUF_EVENT(xfs_buf_cond_lock);
327DEFINE_BUF_EVENT(xfs_buf_unlock); 327DEFINE_BUF_EVENT(xfs_buf_unlock);
328DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
329DEFINE_BUF_EVENT(xfs_buf_iowait); 328DEFINE_BUF_EVENT(xfs_buf_iowait);
330DEFINE_BUF_EVENT(xfs_buf_iowait_done); 329DEFINE_BUF_EVENT(xfs_buf_iowait_done);
331DEFINE_BUF_EVENT(xfs_buf_delwri_queue); 330DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 33f718f92a48..ba8e36e0b4e7 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -917,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp)
917 l = iclog->ic_log; 917 l = iclog->ic_log;
918 918
919 /* 919 /*
920 * If the _XFS_BARRIER_FAILED flag was set by a lower
921 * layer, it means the underlying device no longer supports
922 * barrier I/O. Warn loudly and turn off barriers.
923 */
924 if (bp->b_flags & _XFS_BARRIER_FAILED) {
925 bp->b_flags &= ~_XFS_BARRIER_FAILED;
926 l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
927 xfs_fs_cmn_err(CE_WARN, l->l_mp,
928 "xlog_iodone: Barriers are no longer supported"
929 " by device. Disabling barriers\n");
930 }
931
932 /*
933 * Race to shutdown the filesystem if we see an error. 920 * Race to shutdown the filesystem if we see an error.
934 */ 921 */
935 if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, 922 if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ed575fb4b495..7e206fc1fa36 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -405,9 +405,15 @@ xlog_cil_push(
405 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 405 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
406 new_ctx->ticket = xlog_cil_ticket_alloc(log); 406 new_ctx->ticket = xlog_cil_ticket_alloc(log);
407 407
408 /* lock out transaction commit, but don't block on background push */ 408 /*
409 * Lock out transaction commit, but don't block for background pushes
410 * unless we are well over the CIL space limit. See the definition of
411 * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
412 * used here.
413 */
409 if (!down_write_trylock(&cil->xc_ctx_lock)) { 414 if (!down_write_trylock(&cil->xc_ctx_lock)) {
410 if (!push_seq) 415 if (!push_seq &&
416 cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
411 goto out_free_ticket; 417 goto out_free_ticket;
412 down_write(&cil->xc_ctx_lock); 418 down_write(&cil->xc_ctx_lock);
413 } 419 }
@@ -422,7 +428,7 @@ xlog_cil_push(
422 goto out_skip; 428 goto out_skip;
423 429
424 /* check for a previously pushed seqeunce */ 430 /* check for a previously pushed seqeunce */
425 if (push_seq < cil->xc_ctx->sequence) 431 if (push_seq && push_seq < cil->xc_ctx->sequence)
426 goto out_skip; 432 goto out_skip;
427 433
428 /* 434 /*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ced52b98b322..edcdfe01617f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -426,13 +426,13 @@ struct xfs_cil {
426}; 426};
427 427
428/* 428/*
429 * The amount of log space we should the CIL to aggregate is difficult to size. 429 * The amount of log space we allow the CIL to aggregate is difficult to size.
430 * Whatever we chose we have to make we can get a reservation for the log space 430 * Whatever we choose, we have to make sure we can get a reservation for the
431 * effectively, that it is large enough to capture sufficient relogging to 431 * log space effectively, that it is large enough to capture sufficient
432 * reduce log buffer IO significantly, but it is not too large for the log or 432 * relogging to reduce log buffer IO significantly, but it is not too large for
433 * induces too much latency when writing out through the iclogs. We track both 433 * the log or induces too much latency when writing out through the iclogs. We
434 * space consumed and the number of vectors in the checkpoint context, so we 434 * track both space consumed and the number of vectors in the checkpoint
435 * need to decide which to use for limiting. 435 * context, so we need to decide which to use for limiting.
436 * 436 *
437 * Every log buffer we write out during a push needs a header reserved, which 437 * Every log buffer we write out during a push needs a header reserved, which
438 * is at least one sector and more for v2 logs. Hence we need a reservation of 438 * is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -459,16 +459,21 @@ struct xfs_cil {
459 * checkpoint transaction ticket is specific to the checkpoint context, rather 459 * checkpoint transaction ticket is specific to the checkpoint context, rather
460 * than the CIL itself. 460 * than the CIL itself.
461 * 461 *
462 * With dynamic reservations, we can basically make up arbitrary limits for the 462 * With dynamic reservations, we can effectively make up arbitrary limits for
463 * checkpoint size so long as they don't violate any other size rules. Hence 463 * the checkpoint size so long as they don't violate any other size rules.
464 * the initial maximum size for the checkpoint transaction will be set to a 464 * Recovery imposes a rule that no transaction exceed half the log, so we are
465 * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit 465 * limited by that. Furthermore, the log transaction reservation subsystem
466 * right now based on the latency of writing out a large amount of data through 466 * tries to keep 25% of the log free, so we need to keep below that limit or we
467 * the circular iclog buffers. 467 * risk running out of free log space to start any new transactions.
468 *
469 * In order to keep background CIL push efficient, we will set a lower
470 * threshold at which background pushing is attempted without blocking current
471 * transaction commits. A separate, higher bound defines when CIL pushes are
472 * enforced to ensure we stay within our maximum checkpoint size bounds.
473 * threshold, yet give us plenty of space for aggregation on large logs.
468 */ 474 */
469 475#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
470#define XLOG_CIL_SPACE_LIMIT(log) \ 476#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
471 (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
472 477
473/* 478/*
474 * The reservation head lsn is not made up of a cycle number and block number. 479 * The reservation head lsn is not made up of a cycle number and block number.