aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2010-01-01 21:38:56 -0500
committerAlex Elder <aelder@sgi.com>2010-01-10 13:22:02 -0500
commitfd45e4784164d1017521086524e3442318c67370 (patch)
treeaaaf8f524d4bf094f6debadc6e580f81038f4915 /fs
parent44e08c45cc14e6190a424be8d450070c8e508fad (diff)
xfs: Ensure we force all busy extents in range to disk
When we search for and find a busy extent during allocation we force the log out to ensure the extent free transaction is on disk before the allocation transaction. The current implementation has a subtle bug in it--it does not handle multiple overlapping ranges. That is, if we free lots of little extents into a single contiguous extent, then allocate the contiguous extent, the busy search code stops searching at the first extent it finds that overlaps the allocated range. It then uses the commit LSN of the transaction to force the log out to. Unfortunately, the other busy ranges might have more recent commit LSNs than the first busy extent that is found, and this results in xfs_alloc_search_busy() returning before all the extent free transactions are on disk for the range being allocated. This can lead to potential metadata corruption or stale data exposure after a crash because log replay won't replay all the extent free transactions that cover the allocation range. Modified-by: Alex Elder <aelder@sgi.com> (Dropped the "found" argument from the xfs_alloc_busysearch trace event.) Signed-off-by: Dave Chinner <david@fromorbit.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h12
-rw-r--r--fs/xfs/xfs_alloc.c44
2 files changed, 27 insertions, 29 deletions
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 8cb42b4656fb..c22a608321a3 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1079,28 +1079,28 @@ TRACE_EVENT(xfs_alloc_unbusy,
1079 1079
1080TRACE_EVENT(xfs_alloc_busysearch, 1080TRACE_EVENT(xfs_alloc_busysearch,
1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, 1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1082 xfs_extlen_t len, int found), 1082 xfs_extlen_t len, xfs_lsn_t lsn),
1083 TP_ARGS(mp, agno, agbno, len, found), 1083 TP_ARGS(mp, agno, agbno, len, lsn),
1084 TP_STRUCT__entry( 1084 TP_STRUCT__entry(
1085 __field(dev_t, dev) 1085 __field(dev_t, dev)
1086 __field(xfs_agnumber_t, agno) 1086 __field(xfs_agnumber_t, agno)
1087 __field(xfs_agblock_t, agbno) 1087 __field(xfs_agblock_t, agbno)
1088 __field(xfs_extlen_t, len) 1088 __field(xfs_extlen_t, len)
1089 __field(int, found) 1089 __field(xfs_lsn_t, lsn)
1090 ), 1090 ),
1091 TP_fast_assign( 1091 TP_fast_assign(
1092 __entry->dev = mp->m_super->s_dev; 1092 __entry->dev = mp->m_super->s_dev;
1093 __entry->agno = agno; 1093 __entry->agno = agno;
1094 __entry->agbno = agbno; 1094 __entry->agbno = agbno;
1095 __entry->len = len; 1095 __entry->len = len;
1096 __entry->found = found; 1096 __entry->lsn = lsn;
1097 ), 1097 ),
1098 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1098 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1099 MAJOR(__entry->dev), MINOR(__entry->dev), 1099 MAJOR(__entry->dev), MINOR(__entry->dev),
1100 __entry->agno, 1100 __entry->agno,
1101 __entry->agbno, 1101 __entry->agbno,
1102 __entry->len, 1102 __entry->len,
1103 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1103 __entry->lsn)
1104); 1104);
1105 1105
1106TRACE_EVENT(xfs_agf, 1106TRACE_EVENT(xfs_agf,
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc6d9c4..275b1f4f9430 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2563,43 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2563 xfs_mount_t *mp; 2563 xfs_mount_t *mp;
2564 xfs_perag_busy_t *bsy; 2564 xfs_perag_busy_t *bsy;
2565 xfs_agblock_t uend, bend; 2565 xfs_agblock_t uend, bend;
2566 xfs_lsn_t lsn; 2566 xfs_lsn_t lsn = 0;
2567 int cnt; 2567 int cnt;
2568 2568
2569 mp = tp->t_mountp; 2569 mp = tp->t_mountp;
2570 2570
2571 spin_lock(&mp->m_perag[agno].pagb_lock); 2571 spin_lock(&mp->m_perag[agno].pagb_lock);
2572 cnt = mp->m_perag[agno].pagb_count;
2573 2572
2574 uend = bno + len - 1; 2573 uend = bno + len - 1;
2575 2574
2576 /* search pagb_list for this slot, skipping open slots */ 2575 /*
2577 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
2582 bsy = &mp->m_perag[agno].pagb_list[cnt];
2583 if (!bsy->busy_tp)
2584 continue;
2578 2585
2579 /* 2586 bend = bsy->busy_start + bsy->busy_length - 1;
2580 * (start1,length1) within (start2, length2) 2587 if (bno > bend || uend < bsy->busy_start)
2581 */ 2588 continue;
2582 if (bsy->busy_tp != NULL) {
2583 bend = bsy->busy_start + bsy->busy_length - 1;
2584 if ((bno > bend) || (uend < bsy->busy_start)) {
2585 cnt--;
2586 } else {
2587 break;
2588 }
2589 }
2590 }
2591 2589
2592 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt); 2590 /* (start1,length1) within (start2, length2) */
2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2592 lsn = bsy->busy_tp->t_commit_lsn;
2593 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2593 2596
2594 /* 2597 /*
2595 * If a block was found, force the log through the LSN of the 2598 * If a block was found, force the log through the LSN of the
2596 * transaction that freed the block 2599 * transaction that freed the block
2597 */ 2600 */
2598 if (cnt) { 2601 if (lsn)
2599 lsn = bsy->busy_tp->t_commit_lsn;
2600 spin_unlock(&mp->m_perag[agno].pagb_lock);
2601 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2602 } else {
2603 spin_unlock(&mp->m_perag[agno].pagb_lock);
2604 }
2605} 2603}