aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2018-03-09 17:02:32 -0500
committerDarrick J. Wong <darrick.wong@oracle.com>2018-03-11 23:27:57 -0400
commit0ab32086d0becee56c75a8ba21f16ac08b80f304 (patch)
treedc34a6a17938d1fa4797074f8903dfaf6988ea81
parent215928633502a7296fec42614463bb49859787d6 (diff)
xfs: account only rmapbt-used blocks against rmapbt perag res
The rmapbt perag metadata reservation reserves blocks for the reverse mapping btree (rmapbt). Since the rmapbt uses blocks from the agfl and perag accounting is updated as blocks are allocated from the allocation btrees, the reservation actually accounts blocks as they are allocated to (or freed from) the agfl rather than the rmapbt itself. While this works for blocks that are eventually used for the rmapbt, not all agfl blocks are destined for the rmapbt. Blocks that are allocated to the agfl (and thus "reserved" for the rmapbt) but then used by another structure leads to a growing inconsistency over time between the runtime tracking of rmapbt usage vs. actual rmapbt usage. Since the runtime tracking thinks all agfl blocks are rmapbt blocks, it essentially believes that less future reservation is required to satisfy the rmapbt than what is actually necessary. The inconsistency is rectified across mount cycles because the perag reservation is initialized based on the actual rmapbt usage at mount time. The problem, however, is that the excessive drain of the reservation at runtime opens a window to allocate blocks for other purposes that might be required for the rmapbt on a subsequent mount. This problem can be demonstrated by a simple test that runs an allocation workload to consume agfl blocks over time and then observe the difference in the agfl reservation requirement across an unmount/mount cycle: mount ...: xfs_ag_resv_init: ... resv 3193 ask 3194 len 3194 ... ... : xfs_ag_resv_alloc_extent: ... resv 2957 ask 3194 len 1 umount...: xfs_ag_resv_free: ... resv 2956 ask 3194 len 0 mount ...: xfs_ag_resv_init: ... resv 3052 ask 3194 len 3194 As the above tracepoints show, the reservation requirement reduces from 3194 blocks to 2956 blocks as the workload runs. Without any other changes in the filesystem, the same reservation requirement jumps from 2956 to 3052 blocks over a umount/mount cycle. To address this divergence, update the RMAPBT reservation to account blocks used for the rmapbt only rather than all blocks filled into the agfl. This patch makes several high-level changes toward that end: 1.) Reintroduce an AGFL reservation type to serve as an accounting no-op for blocks allocated to (or freed from) the AGFL. 2.) Invoke RMAPBT usage accounting from the actual rmapbt block allocation path rather than the AGFL allocation path. The first change is required because agfl blocks are considered free blocks throughout their lifetime. The perag reservation subsystem is invoked unconditionally by the allocation subsystem, so we need a way to tell the perag subsystem (via the allocation subsystem) to not make any accounting changes for blocks filled into the AGFL. The second change causes the in-core RMAPBT reservation usage accounting to remain consistent with the on-disk state at all times and eliminates the risk of leaving the rmapbt reservation underfilled. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c4
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h31
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c18
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c4
-rw-r--r--fs/xfs/xfs_mount.h1
5 files changed, 46 insertions, 12 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 0ca2e680034a..03885a968de8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -326,6 +326,8 @@ xfs_ag_resv_alloc_extent(
326 trace_xfs_ag_resv_alloc_extent(pag, type, args->len); 326 trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
327 327
328 switch (type) { 328 switch (type) {
329 case XFS_AG_RESV_AGFL:
330 return;
329 case XFS_AG_RESV_METADATA: 331 case XFS_AG_RESV_METADATA:
330 case XFS_AG_RESV_RMAPBT: 332 case XFS_AG_RESV_RMAPBT:
331 resv = xfs_perag_resv(pag, type); 333 resv = xfs_perag_resv(pag, type);
@@ -366,6 +368,8 @@ xfs_ag_resv_free_extent(
366 trace_xfs_ag_resv_free_extent(pag, type, len); 368 trace_xfs_ag_resv_free_extent(pag, type, len);
367 369
368 switch (type) { 370 switch (type) {
371 case XFS_AG_RESV_AGFL:
372 return;
369 case XFS_AG_RESV_METADATA: 373 case XFS_AG_RESV_METADATA:
370 case XFS_AG_RESV_RMAPBT: 374 case XFS_AG_RESV_RMAPBT:
371 resv = xfs_perag_resv(pag, type); 375 resv = xfs_perag_resv(pag, type);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index 8d6c687deef3..938f2f96c5e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -32,4 +32,35 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
32void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, 32void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
33 struct xfs_trans *tp, xfs_extlen_t len); 33 struct xfs_trans *tp, xfs_extlen_t len);
34 34
35/*
36 * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from
37 * the AGFL, they are allocated one at a time and the reservation updates don't
38 * require a transaction.
39 */
40static inline void
41xfs_ag_resv_rmapbt_alloc(
42 struct xfs_mount *mp,
43 xfs_agnumber_t agno)
44{
45 struct xfs_alloc_arg args = {0};
46 struct xfs_perag *pag;
47
48 args.len = 1;
49 pag = xfs_perag_get(mp, agno);
50 xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
51 xfs_perag_put(pag);
52}
53
54static inline void
55xfs_ag_resv_rmapbt_free(
56 struct xfs_mount *mp,
57 xfs_agnumber_t agno)
58{
59 struct xfs_perag *pag;
60
61 pag = xfs_perag_get(mp, agno);
62 xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
63 xfs_perag_put(pag);
64}
65
35#endif /* __XFS_AG_RESV_H__ */ 66#endif /* __XFS_AG_RESV_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 1dc244d15a75..3db90b707fb2 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -728,7 +728,7 @@ xfs_alloc_ag_vextent(
728 728
729 ASSERT(args->len >= args->minlen); 729 ASSERT(args->len >= args->minlen);
730 ASSERT(args->len <= args->maxlen); 730 ASSERT(args->len <= args->maxlen);
731 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_RMAPBT); 731 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
732 ASSERT(args->agbno % args->alignment == 0); 732 ASSERT(args->agbno % args->alignment == 0);
733 733
734 /* if not file data, insert new block into the reverse map btree */ 734 /* if not file data, insert new block into the reverse map btree */
@@ -1581,7 +1581,6 @@ xfs_alloc_ag_vextent_small(
1581 int *stat) /* status: 0-freelist, 1-normal/none */ 1581 int *stat) /* status: 0-freelist, 1-normal/none */
1582{ 1582{
1583 struct xfs_owner_info oinfo; 1583 struct xfs_owner_info oinfo;
1584 struct xfs_perag *pag;
1585 int error; 1584 int error;
1586 xfs_agblock_t fbno; 1585 xfs_agblock_t fbno;
1587 xfs_extlen_t flen; 1586 xfs_extlen_t flen;
@@ -1600,7 +1599,7 @@ xfs_alloc_ag_vextent_small(
1600 * freelist. 1599 * freelist.
1601 */ 1600 */
1602 else if (args->minlen == 1 && args->alignment == 1 && 1601 else if (args->minlen == 1 && args->alignment == 1 &&
1603 args->resv != XFS_AG_RESV_RMAPBT && 1602 args->resv != XFS_AG_RESV_AGFL &&
1604 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1603 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1605 > args->minleft)) { 1604 > args->minleft)) {
1606 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1605 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
@@ -1633,18 +1632,13 @@ xfs_alloc_ag_vextent_small(
1633 /* 1632 /*
1634 * If we're feeding an AGFL block to something that 1633 * If we're feeding an AGFL block to something that
1635 * doesn't live in the free space, we need to clear 1634 * doesn't live in the free space, we need to clear
1636 * out the OWN_AG rmap and add the block back to 1635 * out the OWN_AG rmap.
1637 * the RMAPBT per-AG reservation.
1638 */ 1636 */
1639 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 1637 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1640 error = xfs_rmap_free(args->tp, args->agbp, args->agno, 1638 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1641 fbno, 1, &oinfo); 1639 fbno, 1, &oinfo);
1642 if (error) 1640 if (error)
1643 goto error0; 1641 goto error0;
1644 pag = xfs_perag_get(args->mp, args->agno);
1645 xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT,
1646 args->tp, 1);
1647 xfs_perag_put(pag);
1648 1642
1649 *stat = 0; 1643 *stat = 0;
1650 return 0; 1644 return 0;
@@ -2170,7 +2164,7 @@ xfs_alloc_fix_freelist(
2170 if (error) 2164 if (error)
2171 goto out_agbp_relse; 2165 goto out_agbp_relse;
2172 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 2166 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2173 &targs.oinfo, XFS_AG_RESV_RMAPBT); 2167 &targs.oinfo, XFS_AG_RESV_AGFL);
2174 if (error) 2168 if (error)
2175 goto out_agbp_relse; 2169 goto out_agbp_relse;
2176 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2170 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
@@ -2196,7 +2190,7 @@ xfs_alloc_fix_freelist(
2196 while (pag->pagf_flcount < need) { 2190 while (pag->pagf_flcount < need) {
2197 targs.agbno = 0; 2191 targs.agbno = 0;
2198 targs.maxlen = need - pag->pagf_flcount; 2192 targs.maxlen = need - pag->pagf_flcount;
2199 targs.resv = XFS_AG_RESV_RMAPBT; 2193 targs.resv = XFS_AG_RESV_AGFL;
2200 2194
2201 /* Allocate as many blocks as possible at once. */ 2195 /* Allocate as many blocks as possible at once. */
2202 error = xfs_alloc_ag_vextent(&targs); 2196 error = xfs_alloc_ag_vextent(&targs);
@@ -2877,7 +2871,7 @@ xfs_free_extent(
2877 int error; 2871 int error;
2878 2872
2879 ASSERT(len != 0); 2873 ASSERT(len != 0);
2880 ASSERT(type != XFS_AG_RESV_RMAPBT); 2874 ASSERT(type != XFS_AG_RESV_AGFL);
2881 2875
2882 if (XFS_TEST_ERROR(false, mp, 2876 if (XFS_TEST_ERROR(false, mp,
2883 XFS_ERRTAG_FREE_EXTENT)) 2877 XFS_ERRTAG_FREE_EXTENT))
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 738df3f9b5f2..8b0d0de1cd11 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -125,6 +125,8 @@ xfs_rmapbt_alloc_block(
125 be32_add_cpu(&agf->agf_rmap_blocks, 1); 125 be32_add_cpu(&agf->agf_rmap_blocks, 1);
126 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 126 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
127 127
128 xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno);
129
128 *stat = 1; 130 *stat = 1;
129 return 0; 131 return 0;
130} 132}
@@ -152,6 +154,8 @@ xfs_rmapbt_free_block(
152 XFS_EXTENT_BUSY_SKIP_DISCARD); 154 XFS_EXTENT_BUSY_SKIP_DISCARD);
153 xfs_trans_agbtree_delta(cur->bc_tp, -1); 155 xfs_trans_agbtree_delta(cur->bc_tp, -1);
154 156
157 xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno);
158
155 return 0; 159 return 0;
156} 160}
157 161
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a2cf3718bea9..1808f56decaa 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -325,6 +325,7 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
325/* per-AG block reservation data structures*/ 325/* per-AG block reservation data structures*/
326enum xfs_ag_resv_type { 326enum xfs_ag_resv_type {
327 XFS_AG_RESV_NONE = 0, 327 XFS_AG_RESV_NONE = 0,
328 XFS_AG_RESV_AGFL,
328 XFS_AG_RESV_METADATA, 329 XFS_AG_RESV_METADATA,
329 XFS_AG_RESV_RMAPBT, 330 XFS_AG_RESV_RMAPBT,
330}; 331};