aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-11-01 00:27:20 -0400
committerBen Myers <bpm@sgi.com>2013-11-18 10:29:36 -0500
commit8f80587bacb6eb893df0ee4e35fefa0dfcfdf9f4 (patch)
tree0b1eb078933bbf6f76a240dd21aedc90b967fb21
parent9e3908e342eba6684621e616529669c17e271e7e (diff)
xfs: increase inode cluster size for v5 filesystems
v5 filesystems use 512 byte inodes as a minimum, so read inodes in clusters that are effectively half the size of a v4 filesystem with 256 byte inodes. For v5 fielsystems, scale the inode cluster size with the size of the inode so that we keep a constant 32 inodes per cluster ratio for all inode IO. This only works if mkfs.xfs sets the inode alignment appropriately for larger inode clusters, so this functionality is made conditional on mkfs doing the right thing. xfs_repair needs to know about the inode alignment changes, too. Wall time: create bulkstat find+stat ls -R unlink v4 237s 161s 173s 201s 299s v5 235s 163s 205s 31s 356s patched 234s 160s 182s 29s 317s System time: create bulkstat find+stat ls -R unlink v4 2601s 2490s 1653s 1656s 2960s v5 2637s 2497s 1681s 20s 3216s patched 2613s 2451s 1658s 20s 3007s So, wall time same or down across the board, system time same or down across the board, and cache hit rates all improve except for the ls -R case which is a pure cold cache directory read workload on v5 filesystems... So, this patch removes most of the performance and CPU usage differential between v4 and v5 filesystems on traversal related workloads. Note: while this patch is currently for v5 filesystems only, there is no reason it can't be ported back to v4 filesystems. This hasn't been done here because bringing the code back to v4 requires forwards and backwards kernel compatibility testing. i.e. to deterine if older kernels(*) do the right thing with larger inode alignments but still only using 8k inode cluster sizes. None of this testing and validation on v4 filesystems has been done, so for the moment larger inode clusters is limited to v5 superblocks. (*) a current default config v4 filesystem should mount just fine on 2.6.23 (when lazy-count support was introduced), and so if we change the alignment emitted by mkfs without a feature bit then we have to make sure it works properly on all kernels since 2.6.23. And if we allow it to be changed when the lazy-count bit is not set, then it's all kernels since v2 logs were introduced that need to be tested for compatibility... Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Ben Myers <bpm@sgi.com>
-rw-r--r--fs/xfs/xfs_mount.c15
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_trans_resv.c3
3 files changed, 17 insertions, 3 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da88f167af78..02df7b408a26 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -41,6 +41,7 @@
41#include "xfs_fsops.h" 41#include "xfs_fsops.h"
42#include "xfs_trace.h" 42#include "xfs_trace.h"
43#include "xfs_icache.h" 43#include "xfs_icache.h"
44#include "xfs_dinode.h"
44 45
45 46
46#ifdef HAVE_PERCPU_SB 47#ifdef HAVE_PERCPU_SB
@@ -718,8 +719,22 @@ xfs_mountfs(
718 * Set the inode cluster size. 719 * Set the inode cluster size.
719 * This may still be overridden by the file system 720 * This may still be overridden by the file system
720 * block size if it is larger than the chosen cluster size. 721 * block size if it is larger than the chosen cluster size.
722 *
723 * For v5 filesystems, scale the cluster size with the inode size to
724 * keep a constant ratio of inode per cluster buffer, but only if mkfs
725 * has set the inode alignment value appropriately for larger cluster
726 * sizes.
721 */ 727 */
722 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 728 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
729 if (xfs_sb_version_hascrc(&mp->m_sb)) {
730 int new_size = mp->m_inode_cluster_size;
731
732 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
733 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
734 mp->m_inode_cluster_size = new_size;
735 xfs_info(mp, "Using inode cluster size of %d bytes",
736 mp->m_inode_cluster_size);
737 }
723 738
724 /* 739 /*
725 * Set inode alignment fields 740 * Set inode alignment fields
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d8101a10d8e..a466c5e5826e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -112,7 +112,7 @@ typedef struct xfs_mount {
112 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 112 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
113 __uint8_t m_agno_log; /* log #ag's */ 113 __uint8_t m_agno_log; /* log #ag's */
114 __uint8_t m_agino_log; /* #bits for agino in inum */ 114 __uint8_t m_agino_log; /* #bits for agino in inum */
115 __uint16_t m_inode_cluster_size;/* min inode buf size */ 115 uint m_inode_cluster_size;/* min inode buf size */
116 uint m_blockmask; /* sb_blocksize-1 */ 116 uint m_blockmask; /* sb_blocksize-1 */
117 uint m_blockwsize; /* sb_blocksize in words */ 117 uint m_blockwsize; /* sb_blocksize in words */
118 uint m_blockwmask; /* blockwsize-1 */ 118 uint m_blockwmask; /* blockwsize-1 */
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index d53d9f0627a7..2fd59c0dae66 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -385,8 +385,7 @@ xfs_calc_ifree_reservation(
385 xfs_calc_inode_res(mp, 1) + 385 xfs_calc_inode_res(mp, 1) +
386 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 386 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
387 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + 387 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
388 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), 388 max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
389 XFS_INODE_CLUSTER_SIZE(mp)) +
390 xfs_calc_buf_res(1, 0) + 389 xfs_calc_buf_res(1, 0) +
391 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + 390 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
392 mp->m_in_maxlevels, 0) + 391 mp->m_in_maxlevels, 0) +