diff options
author | David Chinner <dgc@sgi.com> | 2007-08-28 00:00:13 -0400 |
---|---|---|
committer | Tim Shimmin <tes@chook.melbourne.sgi.com> | 2007-10-15 02:50:50 -0400 |
commit | da353b0d64e070ae7c5342a0d56ec20ae9ef5cfb (patch) | |
tree | 84454023d649df67cc6b125c73746ddb341ac34e /fs/xfs/xfs_vfsops.c | |
parent | 39cd9f877e63ce7e02cdc7f5dbf1b908451c9532 (diff) |
[XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore
inodes. We currently uses hashes and the default hash sizes chosen can
only ever be a tradeoff between memory consumption and the maximum
realistic size of the cache.
As a result, anyone who has millions of inodes cached on a filesystem
needs to tunes the size of the cache via the ihashsize mount option to
allow decent scalability with inode cache operations.
A further problem is the separate inode cluster hash, whose size is based
on the ihashsize but is smaller, and so under certain conditions (sparse
cluster cache population) this can become a limitation long before the
inode hash is causing issues.
The following patchset removes the inode hash and cluster hash and
replaces them with radix trees to avoid the scalability limitations of the
hashes. It also reduces the size of the inodes by 3 pointers....
SGI-PV: 969561
SGI-Modid: xfs-linux-melb:xfs-kern:29481a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_vfsops.c')
-rw-r--r-- | fs/xfs/xfs_vfsops.c | 27 |
1 files changed, 7 insertions, 20 deletions
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 4a27648b5446..1644be14a144 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -117,8 +117,8 @@ xfs_init(void) | |||
117 | xfs_ili_zone = | 117 | xfs_ili_zone = |
118 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", | 118 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", |
119 | KM_ZONE_SPREAD, NULL); | 119 | KM_ZONE_SPREAD, NULL); |
120 | xfs_chashlist_zone = | 120 | xfs_icluster_zone = |
121 | kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist", | 121 | kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster", |
122 | KM_ZONE_SPREAD, NULL); | 122 | KM_ZONE_SPREAD, NULL); |
123 | 123 | ||
124 | /* | 124 | /* |
@@ -163,7 +163,7 @@ xfs_cleanup(void) | |||
163 | extern kmem_zone_t *xfs_efd_zone; | 163 | extern kmem_zone_t *xfs_efd_zone; |
164 | extern kmem_zone_t *xfs_efi_zone; | 164 | extern kmem_zone_t *xfs_efi_zone; |
165 | extern kmem_zone_t *xfs_buf_item_zone; | 165 | extern kmem_zone_t *xfs_buf_item_zone; |
166 | extern kmem_zone_t *xfs_chashlist_zone; | 166 | extern kmem_zone_t *xfs_icluster_zone; |
167 | 167 | ||
168 | xfs_cleanup_procfs(); | 168 | xfs_cleanup_procfs(); |
169 | xfs_sysctl_unregister(); | 169 | xfs_sysctl_unregister(); |
@@ -199,7 +199,7 @@ xfs_cleanup(void) | |||
199 | kmem_zone_destroy(xfs_efi_zone); | 199 | kmem_zone_destroy(xfs_efi_zone); |
200 | kmem_zone_destroy(xfs_ifork_zone); | 200 | kmem_zone_destroy(xfs_ifork_zone); |
201 | kmem_zone_destroy(xfs_ili_zone); | 201 | kmem_zone_destroy(xfs_ili_zone); |
202 | kmem_zone_destroy(xfs_chashlist_zone); | 202 | kmem_zone_destroy(xfs_icluster_zone); |
203 | } | 203 | } |
204 | 204 | ||
205 | /* | 205 | /* |
@@ -246,7 +246,6 @@ xfs_start_flags( | |||
246 | ap->logbufsize); | 246 | ap->logbufsize); |
247 | return XFS_ERROR(EINVAL); | 247 | return XFS_ERROR(EINVAL); |
248 | } | 248 | } |
249 | mp->m_ihsize = ap->ihashsize; | ||
250 | mp->m_logbsize = ap->logbufsize; | 249 | mp->m_logbsize = ap->logbufsize; |
251 | mp->m_fsname_len = strlen(ap->fsname) + 1; | 250 | mp->m_fsname_len = strlen(ap->fsname) + 1; |
252 | mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); | 251 | mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); |
@@ -293,8 +292,6 @@ xfs_start_flags( | |||
293 | mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; | 292 | mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; |
294 | } | 293 | } |
295 | 294 | ||
296 | if (ap->flags & XFSMNT_IHASHSIZE) | ||
297 | mp->m_flags |= XFS_MOUNT_IHASHSIZE; | ||
298 | if (ap->flags & XFSMNT_IDELETE) | 295 | if (ap->flags & XFSMNT_IDELETE) |
299 | mp->m_flags |= XFS_MOUNT_IDELETE; | 296 | mp->m_flags |= XFS_MOUNT_IDELETE; |
300 | if (ap->flags & XFSMNT_DIRSYNC) | 297 | if (ap->flags & XFSMNT_DIRSYNC) |
@@ -1673,7 +1670,6 @@ xfs_vget( | |||
1673 | #define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ | 1670 | #define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ |
1674 | #define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ | 1671 | #define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ |
1675 | #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ | 1672 | #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ |
1676 | #define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ | ||
1677 | #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ | 1673 | #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ |
1678 | #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and | 1674 | #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and |
1679 | * unwritten extent conversion */ | 1675 | * unwritten extent conversion */ |
@@ -1799,15 +1795,6 @@ xfs_parseargs( | |||
1799 | iosize = suffix_strtoul(value, &eov, 10); | 1795 | iosize = suffix_strtoul(value, &eov, 10); |
1800 | args->flags |= XFSMNT_IOSIZE; | 1796 | args->flags |= XFSMNT_IOSIZE; |
1801 | args->iosizelog = ffs(iosize) - 1; | 1797 | args->iosizelog = ffs(iosize) - 1; |
1802 | } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) { | ||
1803 | if (!value || !*value) { | ||
1804 | cmn_err(CE_WARN, | ||
1805 | "XFS: %s option requires an argument", | ||
1806 | this_char); | ||
1807 | return EINVAL; | ||
1808 | } | ||
1809 | args->flags |= XFSMNT_IHASHSIZE; | ||
1810 | args->ihashsize = simple_strtoul(value, &eov, 10); | ||
1811 | } else if (!strcmp(this_char, MNTOPT_GRPID) || | 1798 | } else if (!strcmp(this_char, MNTOPT_GRPID) || |
1812 | !strcmp(this_char, MNTOPT_BSDGROUPS)) { | 1799 | !strcmp(this_char, MNTOPT_BSDGROUPS)) { |
1813 | vfsp->vfs_flag |= VFS_GRPID; | 1800 | vfsp->vfs_flag |= VFS_GRPID; |
@@ -1876,6 +1863,9 @@ xfs_parseargs( | |||
1876 | args->flags &= ~XFSMNT_ATTR2; | 1863 | args->flags &= ~XFSMNT_ATTR2; |
1877 | } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { | 1864 | } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { |
1878 | args->flags2 |= XFSMNT2_FILESTREAMS; | 1865 | args->flags2 |= XFSMNT2_FILESTREAMS; |
1866 | } else if (!strcmp(this_char, "ihashsize")) { | ||
1867 | cmn_err(CE_WARN, | ||
1868 | "XFS: ihashsize no longer used, option is deprecated."); | ||
1879 | } else if (!strcmp(this_char, "osyncisdsync")) { | 1869 | } else if (!strcmp(this_char, "osyncisdsync")) { |
1880 | /* no-op, this is now the default */ | 1870 | /* no-op, this is now the default */ |
1881 | cmn_err(CE_WARN, | 1871 | cmn_err(CE_WARN, |
@@ -1966,9 +1956,6 @@ xfs_showargs( | |||
1966 | seq_puts(m, xfs_infop->str); | 1956 | seq_puts(m, xfs_infop->str); |
1967 | } | 1957 | } |
1968 | 1958 | ||
1969 | if (mp->m_flags & XFS_MOUNT_IHASHSIZE) | ||
1970 | seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize); | ||
1971 | |||
1972 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) | 1959 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) |
1973 | seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", | 1960 | seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", |
1974 | (int)(1 << mp->m_writeio_log) >> 10); | 1961 | (int)(1 << mp->m_writeio_log) >> 10); |