aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2018-03-15 13:51:58 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2018-03-23 21:05:06 -0400
commita27ba2607e60312554cbcd43fc660b2c7f29dc9c (patch)
tree109eaa214b6c2a6887c843a0caa0b8998000eb48
parent3e4da466bfa1dea40b2b23130a9dc4acebcc9f14 (diff)
xfs: detect agfl count corruption and reset agfl
The struct xfs_agfl v5 header was originally introduced with unexpected padding that caused the AGFL to operate with one less slot than intended. The header has since been packed, but the fix left an incompatibility for users who upgrade from an old kernel with the unpacked header to a newer kernel with the packed header while the AGFL happens to wrap around the end. The newer kernel recognizes one extra slot at the physical end of the AGFL that the previous kernel did not. The new kernel will eventually attempt to allocate a block from that slot, which contains invalid data, and cause a crash. This condition can be detected by comparing the active range of the AGFL to the count. While this detects a padding mismatch, it can also trigger false positives for unrelated flcount corruption. Since we cannot distinguish a size mismatch due to padding from unrelated corruption, we can't trust the AGFL enough to simply repopulate the empty slot. Instead, avoid unnecessarily complex detection logic and and use a solution that can handle any form of flcount corruption that slips through read verifiers: distrust the entire AGFL and reset it to an empty state. Any valid blocks within the AGFL are intentionally leaked. This requires xfs_repair to rectify (which was already necessary based on the state the AGFL was found in). The reset mitigates the side effect of the padding mismatch problem from a filesystem crash to a free space accounting inconsistency. The generic approach also means that this patch can be safely backported to kernels with or without a packed struct xfs_agfl. Check the AGF for an invalid freelist count on initial read from disk. If detected, set a flag on the xfs_perag to indicate that a reset is required before the AGFL can be used. In the first transaction that attempts to use a flagged AGFL, reset it to empty, warn the user about the inconsistency and allow the freelist fixup code to repopulate the AGFL with new blocks. The xfs_perag flag is cleared to eliminate the need for repeated checks on each block allocation operation. This allows kernels that include the packing fix commit 96f859d52bcb ("libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct") to handle older unpacked AGFL formats without a filesystem crash. Suggested-by: Dave Chinner <david@fromorbit.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by Dave Chiluk <chiluk+linuxxfs@indeed.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c94
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_trace.h9
3 files changed, 103 insertions, 1 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3db90b707fb2..39387bdd225d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2063,6 +2063,93 @@ xfs_alloc_space_available(
2063} 2063}
2064 2064
2065/* 2065/*
2066 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2067 * is to detect an agfl header padding mismatch between current and early v5
2068 * kernels. This problem manifests as a 1-slot size difference between the
2069 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2070 * may also catch variants of agfl count corruption unrelated to padding. Either
2071 * way, we'll reset the agfl and warn the user.
2072 *
2073 * Return true if a reset is required before the agfl can be used, false
2074 * otherwise.
2075 */
2076static bool
2077xfs_agfl_needs_reset(
2078 struct xfs_mount *mp,
2079 struct xfs_agf *agf)
2080{
2081 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2082 uint32_t l = be32_to_cpu(agf->agf_fllast);
2083 uint32_t c = be32_to_cpu(agf->agf_flcount);
2084 int agfl_size = xfs_agfl_size(mp);
2085 int active;
2086
2087 /* no agfl header on v4 supers */
2088 if (!xfs_sb_version_hascrc(&mp->m_sb))
2089 return false;
2090
2091 /*
2092 * The agf read verifier catches severe corruption of these fields.
2093 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2094 * the verifier allows it.
2095 */
2096 if (f >= agfl_size || l >= agfl_size)
2097 return true;
2098 if (c > agfl_size)
2099 return true;
2100
2101 /*
2102 * Check consistency between the on-disk count and the active range. An
2103 * agfl padding mismatch manifests as an inconsistent flcount.
2104 */
2105 if (c && l >= f)
2106 active = l - f + 1;
2107 else if (c)
2108 active = agfl_size - f + l + 1;
2109 else
2110 active = 0;
2111
2112 return active != c;
2113}
2114
2115/*
2116 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2117 * agfl content cannot be trusted. Warn the user that a repair is required to
2118 * recover leaked blocks.
2119 *
2120 * The purpose of this mechanism is to handle filesystems affected by the agfl
2121 * header padding mismatch problem. A reset keeps the filesystem online with a
2122 * relatively minor free space accounting inconsistency rather than suffer the
2123 * inevitable crash from use of an invalid agfl block.
2124 */
2125static void
2126xfs_agfl_reset(
2127 struct xfs_trans *tp,
2128 struct xfs_buf *agbp,
2129 struct xfs_perag *pag)
2130{
2131 struct xfs_mount *mp = tp->t_mountp;
2132 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
2133
2134 ASSERT(pag->pagf_agflreset);
2135 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2136
2137 xfs_warn(mp,
2138 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2139 "Please unmount and run xfs_repair.",
2140 pag->pag_agno, pag->pagf_flcount);
2141
2142 agf->agf_flfirst = 0;
2143 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2144 agf->agf_flcount = 0;
2145 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2146 XFS_AGF_FLCOUNT);
2147
2148 pag->pagf_flcount = 0;
2149 pag->pagf_agflreset = false;
2150}
2151
2152/*
2066 * Decide whether to use this allocation group for this allocation. 2153 * Decide whether to use this allocation group for this allocation.
2067 * If so, fix up the btree freelist's size. 2154 * If so, fix up the btree freelist's size.
2068 */ 2155 */
@@ -2123,6 +2210,10 @@ xfs_alloc_fix_freelist(
2123 } 2210 }
2124 } 2211 }
2125 2212
2213 /* reset a padding mismatched agfl before final free space check */
2214 if (pag->pagf_agflreset)
2215 xfs_agfl_reset(tp, agbp, pag);
2216
2126 /* If there isn't enough total space or single-extent, reject it. */ 2217 /* If there isn't enough total space or single-extent, reject it. */
2127 need = xfs_alloc_min_freelist(mp, pag); 2218 need = xfs_alloc_min_freelist(mp, pag);
2128 if (!xfs_alloc_space_available(args, need, flags)) 2219 if (!xfs_alloc_space_available(args, need, flags))
@@ -2279,6 +2370,7 @@ xfs_alloc_get_freelist(
2279 agf->agf_flfirst = 0; 2370 agf->agf_flfirst = 0;
2280 2371
2281 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2372 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2373 ASSERT(!pag->pagf_agflreset);
2282 be32_add_cpu(&agf->agf_flcount, -1); 2374 be32_add_cpu(&agf->agf_flcount, -1);
2283 xfs_trans_agflist_delta(tp, -1); 2375 xfs_trans_agflist_delta(tp, -1);
2284 pag->pagf_flcount--; 2376 pag->pagf_flcount--;
@@ -2390,6 +2482,7 @@ xfs_alloc_put_freelist(
2390 agf->agf_fllast = 0; 2482 agf->agf_fllast = 0;
2391 2483
2392 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2484 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2485 ASSERT(!pag->pagf_agflreset);
2393 be32_add_cpu(&agf->agf_flcount, 1); 2486 be32_add_cpu(&agf->agf_flcount, 1);
2394 xfs_trans_agflist_delta(tp, 1); 2487 xfs_trans_agflist_delta(tp, 1);
2395 pag->pagf_flcount++; 2488 pag->pagf_flcount++;
@@ -2597,6 +2690,7 @@ xfs_alloc_read_agf(
2597 pag->pagb_count = 0; 2690 pag->pagb_count = 0;
2598 pag->pagb_tree = RB_ROOT; 2691 pag->pagb_tree = RB_ROOT;
2599 pag->pagf_init = 1; 2692 pag->pagf_init = 1;
2693 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2600 } 2694 }
2601#ifdef DEBUG 2695#ifdef DEBUG
2602 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2696 else if (!XFS_FORCED_SHUTDOWN(mp)) {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1808f56decaa..10b90bbc5162 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -353,6 +353,7 @@ typedef struct xfs_perag {
353 char pagi_inodeok; /* The agi is ok for inodes */ 353 char pagi_inodeok; /* The agi is ok for inodes */
354 uint8_t pagf_levels[XFS_BTNUM_AGF]; 354 uint8_t pagf_levels[XFS_BTNUM_AGF];
355 /* # of levels in bno & cnt btree */ 355 /* # of levels in bno & cnt btree */
356 bool pagf_agflreset; /* agfl requires reset before use */
356 uint32_t pagf_flcount; /* count of blocks in freelist */ 357 uint32_t pagf_flcount; /* count of blocks in freelist */
357 xfs_extlen_t pagf_freeblks; /* total free blocks */ 358 xfs_extlen_t pagf_freeblks; /* total free blocks */
358 xfs_extlen_t pagf_longest; /* longest free space */ 359 xfs_extlen_t pagf_longest; /* longest free space */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 945de08af7ba..a982c0b623d0 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1477,7 +1477,7 @@ TRACE_EVENT(xfs_extent_busy_trim,
1477 __entry->tlen) 1477 __entry->tlen)
1478); 1478);
1479 1479
1480TRACE_EVENT(xfs_agf, 1480DECLARE_EVENT_CLASS(xfs_agf_class,
1481 TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, 1481 TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
1482 unsigned long caller_ip), 1482 unsigned long caller_ip),
1483 TP_ARGS(mp, agf, flags, caller_ip), 1483 TP_ARGS(mp, agf, flags, caller_ip),
@@ -1533,6 +1533,13 @@ TRACE_EVENT(xfs_agf,
1533 __entry->longest, 1533 __entry->longest,
1534 (void *)__entry->caller_ip) 1534 (void *)__entry->caller_ip)
1535); 1535);
1536#define DEFINE_AGF_EVENT(name) \
1537DEFINE_EVENT(xfs_agf_class, name, \
1538 TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \
1539 unsigned long caller_ip), \
1540 TP_ARGS(mp, agf, flags, caller_ip))
1541DEFINE_AGF_EVENT(xfs_agf);
1542DEFINE_AGF_EVENT(xfs_agfl_reset);
1536 1543
1537TRACE_EVENT(xfs_free_extent, 1544TRACE_EVENT(xfs_free_extent,
1538 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, 1545 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,