summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOmar Sandoval <osandov@fb.com>2018-12-12 11:46:32 -0500
committerDarrick J. Wong <darrick.wong@oracle.com>2018-12-12 11:47:17 -0500
commit355e3532132b487ebf6a4900fad8f3525fa3e137 (patch)
tree97b63d782ed5e36a9a0e953a0ceb7d1fca30f15b
parent2c2d9d3a205afa93bf6105e4ab6f1ff536291dc6 (diff)
xfs: cache minimum realtime summary level
The realtime summary is a two-dimensional array on disk, effectively: u32 rsum[log2(number of realtime extents) + 1][number of blocks in the bitmap] rsum[log][bbno] is the number of extents of size 2**log which start in bitmap block bbno. xfs_rtallocate_extent_near() uses xfs_rtany_summary() to check whether rsum[log][bbno] != 0 for any log level. However, the summary array is stored in row-major order (i.e., like an array in C), so all of these entries are not adjacent, but rather spread across the entire summary file. In the worst case (a full bitmap block), xfs_rtany_summary() has to check every level. This means that on a moderately-used realtime device, an allocation will waste a lot of time finding, reading, and releasing buffers for the realtime summary. In particular, one of our storage services (which runs on servers with 8 very slow CPUs and 15 8 TB XFS realtime filesystems) spends almost 5% of its CPU cycles in xfs_rtbuf_get() and xfs_trans_brelse() called from xfs_rtany_summary(). One solution would be to also store the summary with the dimensions swapped. However, this would require a disk format change to a very old component of XFS. Instead, we can cache the minimum size which contains any extents. We do so lazily; rather than guaranteeing that the cache contains the precise minimum, it always contains a loose lower bound which we tighten when we read or update a summary block. This only uses a few kilobytes of memory and is already serialized via the realtime bitmap and summary inode locks, so the cost is minimal. With this change, the same workload only spends 0.2% of its CPU cycles in the realtime allocator. Signed-off-by: Omar Sandoval <osandov@fb.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c6
-rw-r--r--fs/xfs/xfs_mount.h7
-rw-r--r--fs/xfs/xfs_rtalloc.c25
3 files changed, 34 insertions, 4 deletions
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index b228c821bae6..eaaff67e9626 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -505,6 +505,12 @@ xfs_rtmodify_summary_int(
505 uint first = (uint)((char *)sp - (char *)bp->b_addr); 505 uint first = (uint)((char *)sp - (char *)bp->b_addr);
506 506
507 *sp += delta; 507 *sp += delta;
508 if (mp->m_rsum_cache) {
509 if (*sp == 0 && log == mp->m_rsum_cache[bbno])
510 mp->m_rsum_cache[bbno]++;
511 if (*sp != 0 && log < mp->m_rsum_cache[bbno])
512 mp->m_rsum_cache[bbno] = log;
513 }
508 xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1); 514 xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
509 } 515 }
510 if (sum) 516 if (sum)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0ad025e7f3cf..7daafe064af8 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -89,6 +89,13 @@ typedef struct xfs_mount {
89 int m_logbsize; /* size of each log buffer */ 89 int m_logbsize; /* size of each log buffer */
90 uint m_rsumlevels; /* rt summary levels */ 90 uint m_rsumlevels; /* rt summary levels */
91 uint m_rsumsize; /* size of rt summary, bytes */ 91 uint m_rsumsize; /* size of rt summary, bytes */
92 /*
93 * Optional cache of rt summary level per bitmap block with the
94 * invariant that m_rsum_cache[bbno] <= the minimum i for which
95 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
96 * inode lock.
97 */
98 uint8_t *m_rsum_cache;
92 struct xfs_inode *m_rbmip; /* pointer to bitmap inode */ 99 struct xfs_inode *m_rbmip; /* pointer to bitmap inode */
93 struct xfs_inode *m_rsumip; /* pointer to summary inode */ 100 struct xfs_inode *m_rsumip; /* pointer to summary inode */
94 struct xfs_inode *m_rootip; /* pointer to root directory */ 101 struct xfs_inode *m_rootip; /* pointer to root directory */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 926ed314ffba..aefd63d46397 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -64,8 +64,12 @@ xfs_rtany_summary(
64 int log; /* loop counter, log2 of ext. size */ 64 int log; /* loop counter, log2 of ext. size */
65 xfs_suminfo_t sum; /* summary data */ 65 xfs_suminfo_t sum; /* summary data */
66 66
67 /* There are no extents at levels < m_rsum_cache[bbno]. */
68 if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
69 low = mp->m_rsum_cache[bbno];
70
67 /* 71 /*
68 * Loop over logs of extent sizes. Order is irrelevant. 72 * Loop over logs of extent sizes.
69 */ 73 */
70 for (log = low; log <= high; log++) { 74 for (log = low; log <= high; log++) {
71 /* 75 /*
@@ -80,13 +84,17 @@ xfs_rtany_summary(
80 */ 84 */
81 if (sum) { 85 if (sum) {
82 *stat = 1; 86 *stat = 1;
83 return 0; 87 goto out;
84 } 88 }
85 } 89 }
86 /* 90 /*
87 * Found nothing, return failure. 91 * Found nothing, return failure.
88 */ 92 */
89 *stat = 0; 93 *stat = 0;
94out:
95 /* There were no extents at levels < log. */
96 if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
97 mp->m_rsum_cache[bbno] = log;
90 return 0; 98 return 0;
91} 99}
92 100
@@ -1187,8 +1195,8 @@ xfs_rtmount_init(
1187} 1195}
1188 1196
1189/* 1197/*
1190 * Get the bitmap and summary inodes into the mount structure 1198 * Get the bitmap and summary inodes and the summary cache into the mount
1191 * at mount time. 1199 * structure at mount time.
1192 */ 1200 */
1193int /* error */ 1201int /* error */
1194xfs_rtmount_inodes( 1202xfs_rtmount_inodes(
@@ -1211,6 +1219,14 @@ xfs_rtmount_inodes(
1211 return error; 1219 return error;
1212 } 1220 }
1213 ASSERT(mp->m_rsumip != NULL); 1221 ASSERT(mp->m_rsumip != NULL);
1222 /*
1223 * The rsum cache is initialized to all zeroes, which is trivially a
1224 * lower bound on the minimum level with any free extents. We can
1225 * continue without the cache if it couldn't be allocated.
1226 */
1227 mp->m_rsum_cache = kmem_zalloc_large(sbp->sb_rbmblocks, KM_SLEEP);
1228 if (!mp->m_rsum_cache)
1229 xfs_warn(mp, "could not allocate realtime summary cache");
1214 return 0; 1230 return 0;
1215} 1231}
1216 1232
@@ -1218,6 +1234,7 @@ void
1218xfs_rtunmount_inodes( 1234xfs_rtunmount_inodes(
1219 struct xfs_mount *mp) 1235 struct xfs_mount *mp)
1220{ 1236{
1237 kmem_free(mp->m_rsum_cache);
1221 if (mp->m_rbmip) 1238 if (mp->m_rbmip)
1222 xfs_irele(mp->m_rbmip); 1239 xfs_irele(mp->m_rbmip);
1223 if (mp->m_rsumip) 1240 if (mp->m_rsumip)