diff options
author | Dave Chinner <david@fromorbit.com> | 2010-01-01 21:38:56 -0500 |
---|---|---|
committer | Alex Elder <aelder@sgi.com> | 2010-01-10 13:22:02 -0500 |
commit | fd45e4784164d1017521086524e3442318c67370 (patch) | |
tree | aaaf8f524d4bf094f6debadc6e580f81038f4915 /fs | |
parent | 44e08c45cc14e6190a424be8d450070c8e508fad (diff) |
xfs: Ensure we force all busy extents in range to disk
When we search for and find a busy extent during allocation we
force the log out to ensure the extent free transaction is on
disk before the allocation transaction. The current implementation
has a subtle bug in it--it does not handle multiple overlapping
ranges.
That is, if we free lots of little extents into a single
contiguous extent, then allocate the contiguous extent, the busy
search code stops searching at the first extent it finds that
overlaps the allocated range. It then uses the commit LSN of the
transaction to force the log out to.
Unfortunately, the other busy ranges might have more recent
commit LSNs than the first busy extent that is found, and this
results in xfs_alloc_search_busy() returning before all the
extent free transactions are on disk for the range being
allocated. This can lead to potential metadata corruption or
stale data exposure after a crash because log replay won't replay
all the extent free transactions that cover the allocation range.
Modified-by: Alex Elder <aelder@sgi.com>
(Dropped the "found" argument from the xfs_alloc_busysearch trace
event.)
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.c | 44 |
2 files changed, 27 insertions, 29 deletions
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 8cb42b4656fb..c22a608321a3 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1079,28 +1079,28 @@ TRACE_EVENT(xfs_alloc_unbusy, | |||
1079 | 1079 | ||
1080 | TRACE_EVENT(xfs_alloc_busysearch, | 1080 | TRACE_EVENT(xfs_alloc_busysearch, |
1081 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1081 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, |
1082 | xfs_extlen_t len, int found), | 1082 | xfs_extlen_t len, xfs_lsn_t lsn), |
1083 | TP_ARGS(mp, agno, agbno, len, found), | 1083 | TP_ARGS(mp, agno, agbno, len, lsn), |
1084 | TP_STRUCT__entry( | 1084 | TP_STRUCT__entry( |
1085 | __field(dev_t, dev) | 1085 | __field(dev_t, dev) |
1086 | __field(xfs_agnumber_t, agno) | 1086 | __field(xfs_agnumber_t, agno) |
1087 | __field(xfs_agblock_t, agbno) | 1087 | __field(xfs_agblock_t, agbno) |
1088 | __field(xfs_extlen_t, len) | 1088 | __field(xfs_extlen_t, len) |
1089 | __field(int, found) | 1089 | __field(xfs_lsn_t, lsn) |
1090 | ), | 1090 | ), |
1091 | TP_fast_assign( | 1091 | TP_fast_assign( |
1092 | __entry->dev = mp->m_super->s_dev; | 1092 | __entry->dev = mp->m_super->s_dev; |
1093 | __entry->agno = agno; | 1093 | __entry->agno = agno; |
1094 | __entry->agbno = agbno; | 1094 | __entry->agbno = agbno; |
1095 | __entry->len = len; | 1095 | __entry->len = len; |
1096 | __entry->found = found; | 1096 | __entry->lsn = lsn; |
1097 | ), | 1097 | ), |
1098 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", | 1098 | TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx", |
1099 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1099 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1100 | __entry->agno, | 1100 | __entry->agno, |
1101 | __entry->agbno, | 1101 | __entry->agbno, |
1102 | __entry->len, | 1102 | __entry->len, |
1103 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1103 | __entry->lsn) |
1104 | ); | 1104 | ); |
1105 | 1105 | ||
1106 | TRACE_EVENT(xfs_agf, | 1106 | TRACE_EVENT(xfs_agf, |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a1c65fc6d9c4..275b1f4f9430 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -2563,43 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
2563 | xfs_mount_t *mp; | 2563 | xfs_mount_t *mp; |
2564 | xfs_perag_busy_t *bsy; | 2564 | xfs_perag_busy_t *bsy; |
2565 | xfs_agblock_t uend, bend; | 2565 | xfs_agblock_t uend, bend; |
2566 | xfs_lsn_t lsn; | 2566 | xfs_lsn_t lsn = 0; |
2567 | int cnt; | 2567 | int cnt; |
2568 | 2568 | ||
2569 | mp = tp->t_mountp; | 2569 | mp = tp->t_mountp; |
2570 | 2570 | ||
2571 | spin_lock(&mp->m_perag[agno].pagb_lock); | 2571 | spin_lock(&mp->m_perag[agno].pagb_lock); |
2572 | cnt = mp->m_perag[agno].pagb_count; | ||
2573 | 2572 | ||
2574 | uend = bno + len - 1; | 2573 | uend = bno + len - 1; |
2575 | 2574 | ||
2576 | /* search pagb_list for this slot, skipping open slots */ | 2575 | /* |
2577 | for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { | 2576 | * search pagb_list for this slot, skipping open slots. We have to |
2577 | * search the entire array as there may be multiple overlaps and | ||
2578 | * we have to get the most recent LSN for the log force to push out | ||
2579 | * all the transactions that span the range. | ||
2580 | */ | ||
2581 | for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) { | ||
2582 | bsy = &mp->m_perag[agno].pagb_list[cnt]; | ||
2583 | if (!bsy->busy_tp) | ||
2584 | continue; | ||
2578 | 2585 | ||
2579 | /* | 2586 | bend = bsy->busy_start + bsy->busy_length - 1; |
2580 | * (start1,length1) within (start2, length2) | 2587 | if (bno > bend || uend < bsy->busy_start) |
2581 | */ | 2588 | continue; |
2582 | if (bsy->busy_tp != NULL) { | ||
2583 | bend = bsy->busy_start + bsy->busy_length - 1; | ||
2584 | if ((bno > bend) || (uend < bsy->busy_start)) { | ||
2585 | cnt--; | ||
2586 | } else { | ||
2587 | break; | ||
2588 | } | ||
2589 | } | ||
2590 | } | ||
2591 | 2589 | ||
2592 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt); | 2590 | /* (start1,length1) within (start2, length2) */ |
2591 | if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) | ||
2592 | lsn = bsy->busy_tp->t_commit_lsn; | ||
2593 | } | ||
2594 | spin_unlock(&mp->m_perag[agno].pagb_lock); | ||
2595 | trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); | ||
2593 | 2596 | ||
2594 | /* | 2597 | /* |
2595 | * If a block was found, force the log through the LSN of the | 2598 | * If a block was found, force the log through the LSN of the |
2596 | * transaction that freed the block | 2599 | * transaction that freed the block |
2597 | */ | 2600 | */ |
2598 | if (cnt) { | 2601 | if (lsn) |
2599 | lsn = bsy->busy_tp->t_commit_lsn; | ||
2600 | spin_unlock(&mp->m_perag[agno].pagb_lock); | ||
2601 | xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); | 2602 | xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); |
2602 | } else { | ||
2603 | spin_unlock(&mp->m_perag[agno].pagb_lock); | ||
2604 | } | ||
2605 | } | 2603 | } |