aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_vfsops.c
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-08-28 00:00:13 -0400
committerTim Shimmin <tes@chook.melbourne.sgi.com>2007-10-15 02:50:50 -0400
commitda353b0d64e070ae7c5342a0d56ec20ae9ef5cfb (patch)
tree84454023d649df67cc6b125c73746ddb341ac34e /fs/xfs/xfs_vfsops.c
parent39cd9f877e63ce7e02cdc7f5dbf1b908451c9532 (diff)
[XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore inodes. We currently uses hashes and the default hash sizes chosen can only ever be a tradeoff between memory consumption and the maximum realistic size of the cache. As a result, anyone who has millions of inodes cached on a filesystem needs to tunes the size of the cache via the ihashsize mount option to allow decent scalability with inode cache operations. A further problem is the separate inode cluster hash, whose size is based on the ihashsize but is smaller, and so under certain conditions (sparse cluster cache population) this can become a limitation long before the inode hash is causing issues. The following patchset removes the inode hash and cluster hash and replaces them with radix trees to avoid the scalability limitations of the hashes. It also reduces the size of the inodes by 3 pointers.... SGI-PV: 969561 SGI-Modid: xfs-linux-melb:xfs-kern:29481a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_vfsops.c')
-rw-r--r--fs/xfs/xfs_vfsops.c27
1 files changed, 7 insertions, 20 deletions
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 4a27648b5446..1644be14a144 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -117,8 +117,8 @@ xfs_init(void)
117 xfs_ili_zone = 117 xfs_ili_zone =
118 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", 118 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
119 KM_ZONE_SPREAD, NULL); 119 KM_ZONE_SPREAD, NULL);
120 xfs_chashlist_zone = 120 xfs_icluster_zone =
121 kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist", 121 kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
122 KM_ZONE_SPREAD, NULL); 122 KM_ZONE_SPREAD, NULL);
123 123
124 /* 124 /*
@@ -163,7 +163,7 @@ xfs_cleanup(void)
163 extern kmem_zone_t *xfs_efd_zone; 163 extern kmem_zone_t *xfs_efd_zone;
164 extern kmem_zone_t *xfs_efi_zone; 164 extern kmem_zone_t *xfs_efi_zone;
165 extern kmem_zone_t *xfs_buf_item_zone; 165 extern kmem_zone_t *xfs_buf_item_zone;
166 extern kmem_zone_t *xfs_chashlist_zone; 166 extern kmem_zone_t *xfs_icluster_zone;
167 167
168 xfs_cleanup_procfs(); 168 xfs_cleanup_procfs();
169 xfs_sysctl_unregister(); 169 xfs_sysctl_unregister();
@@ -199,7 +199,7 @@ xfs_cleanup(void)
199 kmem_zone_destroy(xfs_efi_zone); 199 kmem_zone_destroy(xfs_efi_zone);
200 kmem_zone_destroy(xfs_ifork_zone); 200 kmem_zone_destroy(xfs_ifork_zone);
201 kmem_zone_destroy(xfs_ili_zone); 201 kmem_zone_destroy(xfs_ili_zone);
202 kmem_zone_destroy(xfs_chashlist_zone); 202 kmem_zone_destroy(xfs_icluster_zone);
203} 203}
204 204
205/* 205/*
@@ -246,7 +246,6 @@ xfs_start_flags(
246 ap->logbufsize); 246 ap->logbufsize);
247 return XFS_ERROR(EINVAL); 247 return XFS_ERROR(EINVAL);
248 } 248 }
249 mp->m_ihsize = ap->ihashsize;
250 mp->m_logbsize = ap->logbufsize; 249 mp->m_logbsize = ap->logbufsize;
251 mp->m_fsname_len = strlen(ap->fsname) + 1; 250 mp->m_fsname_len = strlen(ap->fsname) + 1;
252 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); 251 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
@@ -293,8 +292,6 @@ xfs_start_flags(
293 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; 292 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
294 } 293 }
295 294
296 if (ap->flags & XFSMNT_IHASHSIZE)
297 mp->m_flags |= XFS_MOUNT_IHASHSIZE;
298 if (ap->flags & XFSMNT_IDELETE) 295 if (ap->flags & XFSMNT_IDELETE)
299 mp->m_flags |= XFS_MOUNT_IDELETE; 296 mp->m_flags |= XFS_MOUNT_IDELETE;
300 if (ap->flags & XFSMNT_DIRSYNC) 297 if (ap->flags & XFSMNT_DIRSYNC)
@@ -1673,7 +1670,6 @@ xfs_vget(
1673#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ 1670#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */
1674#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ 1671#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */
1675#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ 1672#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
1676#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */
1677#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1673#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
1678#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 1674#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
1679 * unwritten extent conversion */ 1675 * unwritten extent conversion */
@@ -1799,15 +1795,6 @@ xfs_parseargs(
1799 iosize = suffix_strtoul(value, &eov, 10); 1795 iosize = suffix_strtoul(value, &eov, 10);
1800 args->flags |= XFSMNT_IOSIZE; 1796 args->flags |= XFSMNT_IOSIZE;
1801 args->iosizelog = ffs(iosize) - 1; 1797 args->iosizelog = ffs(iosize) - 1;
1802 } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
1803 if (!value || !*value) {
1804 cmn_err(CE_WARN,
1805 "XFS: %s option requires an argument",
1806 this_char);
1807 return EINVAL;
1808 }
1809 args->flags |= XFSMNT_IHASHSIZE;
1810 args->ihashsize = simple_strtoul(value, &eov, 10);
1811 } else if (!strcmp(this_char, MNTOPT_GRPID) || 1798 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
1812 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 1799 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
1813 vfsp->vfs_flag |= VFS_GRPID; 1800 vfsp->vfs_flag |= VFS_GRPID;
@@ -1876,6 +1863,9 @@ xfs_parseargs(
1876 args->flags &= ~XFSMNT_ATTR2; 1863 args->flags &= ~XFSMNT_ATTR2;
1877 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 1864 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
1878 args->flags2 |= XFSMNT2_FILESTREAMS; 1865 args->flags2 |= XFSMNT2_FILESTREAMS;
1866 } else if (!strcmp(this_char, "ihashsize")) {
1867 cmn_err(CE_WARN,
1868 "XFS: ihashsize no longer used, option is deprecated.");
1879 } else if (!strcmp(this_char, "osyncisdsync")) { 1869 } else if (!strcmp(this_char, "osyncisdsync")) {
1880 /* no-op, this is now the default */ 1870 /* no-op, this is now the default */
1881 cmn_err(CE_WARN, 1871 cmn_err(CE_WARN,
@@ -1966,9 +1956,6 @@ xfs_showargs(
1966 seq_puts(m, xfs_infop->str); 1956 seq_puts(m, xfs_infop->str);
1967 } 1957 }
1968 1958
1969 if (mp->m_flags & XFS_MOUNT_IHASHSIZE)
1970 seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize);
1971
1972 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) 1959 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
1973 seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", 1960 seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
1974 (int)(1 << mp->m_writeio_log) >> 10); 1961 (int)(1 << mp->m_writeio_log) >> 10);