diff options
author | David Chinner <dgc@sgi.com> | 2007-08-28 00:00:13 -0400 |
---|---|---|
committer | Tim Shimmin <tes@chook.melbourne.sgi.com> | 2007-10-15 02:50:50 -0400 |
commit | da353b0d64e070ae7c5342a0d56ec20ae9ef5cfb (patch) | |
tree | 84454023d649df67cc6b125c73746ddb341ac34e /fs/xfs/xfs_inode.h | |
parent | 39cd9f877e63ce7e02cdc7f5dbf1b908451c9532 (diff) |
[XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore
inodes. We currently uses hashes and the default hash sizes chosen can
only ever be a tradeoff between memory consumption and the maximum
realistic size of the cache.
As a result, anyone who has millions of inodes cached on a filesystem
needs to tunes the size of the cache via the ihashsize mount option to
allow decent scalability with inode cache operations.
A further problem is the separate inode cluster hash, whose size is based
on the ihashsize but is smaller, and so under certain conditions (sparse
cluster cache population) this can become a limitation long before the
inode hash is causing issues.
The following patchset removes the inode hash and cluster hash and
replaces them with radix trees to avoid the scalability limitations of the
hashes. It also reduces the size of the inodes by 3 pointers....
SGI-PV: 969561
SGI-Modid: xfs-linux-melb:xfs-kern:29481a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_inode.h')
-rw-r--r-- | fs/xfs/xfs_inode.h | 56 |
1 files changed, 12 insertions, 44 deletions
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 873b9f783d29..b6dd23d9b3d6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -172,41 +172,18 @@ typedef struct xfs_iocore { | |||
172 | extern void xfs_iocore_inode_init(struct xfs_inode *); | 172 | extern void xfs_iocore_inode_init(struct xfs_inode *); |
173 | extern void xfs_iocore_inode_reinit(struct xfs_inode *); | 173 | extern void xfs_iocore_inode_reinit(struct xfs_inode *); |
174 | 174 | ||
175 | |||
176 | /* | ||
177 | * This is the type used in the xfs inode hash table. | ||
178 | * An array of these is allocated for each mounted | ||
179 | * file system to hash the inodes for that file system. | ||
180 | */ | ||
181 | typedef struct xfs_ihash { | ||
182 | struct xfs_inode *ih_next; | ||
183 | rwlock_t ih_lock; | ||
184 | uint ih_version; | ||
185 | } xfs_ihash_t; | ||
186 | |||
187 | #define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize)) | ||
188 | |||
189 | /* | 175 | /* |
190 | * This is the xfs inode cluster hash. This hash is used by xfs_iflush to | 176 | * This is the xfs inode cluster structure. This structure is used by |
191 | * find inodes that share a cluster and can be flushed to disk at the same | 177 | * xfs_iflush to find inodes that share a cluster and can be flushed to disk at |
192 | * time. | 178 | * the same time. |
193 | */ | 179 | */ |
194 | typedef struct xfs_chashlist { | 180 | typedef struct xfs_icluster { |
195 | struct xfs_chashlist *chl_next; | 181 | struct hlist_head icl_inodes; /* list of inodes on cluster */ |
196 | struct xfs_chashlist *chl_prev; | 182 | xfs_daddr_t icl_blkno; /* starting block number of |
197 | struct xfs_inode *chl_ip; | ||
198 | xfs_daddr_t chl_blkno; /* starting block number of | ||
199 | * the cluster */ | 183 | * the cluster */ |
200 | struct xfs_buf *chl_buf; /* the inode buffer */ | 184 | struct xfs_buf *icl_buf; /* the inode buffer */ |
201 | } xfs_chashlist_t; | 185 | lock_t icl_lock; /* inode list lock */ |
202 | 186 | } xfs_icluster_t; | |
203 | typedef struct xfs_chash { | ||
204 | xfs_chashlist_t *ch_list; | ||
205 | lock_t ch_lock; | ||
206 | } xfs_chash_t; | ||
207 | |||
208 | #define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize)) | ||
209 | |||
210 | 187 | ||
211 | /* | 188 | /* |
212 | * This is the xfs in-core inode structure. | 189 | * This is the xfs in-core inode structure. |
@@ -269,21 +246,15 @@ typedef struct xfs_icdinode { | |||
269 | } xfs_icdinode_t; | 246 | } xfs_icdinode_t; |
270 | 247 | ||
271 | typedef struct { | 248 | typedef struct { |
272 | struct xfs_ihash *ip_hash; /* pointer to hash header */ | ||
273 | struct xfs_inode *ip_next; /* inode hash link forw */ | ||
274 | struct xfs_inode *ip_mnext; /* next inode in mount list */ | 249 | struct xfs_inode *ip_mnext; /* next inode in mount list */ |
275 | struct xfs_inode *ip_mprev; /* ptr to prev inode */ | 250 | struct xfs_inode *ip_mprev; /* ptr to prev inode */ |
276 | struct xfs_inode **ip_prevp; /* ptr to prev i_next */ | ||
277 | struct xfs_mount *ip_mount; /* fs mount struct ptr */ | 251 | struct xfs_mount *ip_mount; /* fs mount struct ptr */ |
278 | } xfs_iptr_t; | 252 | } xfs_iptr_t; |
279 | 253 | ||
280 | typedef struct xfs_inode { | 254 | typedef struct xfs_inode { |
281 | /* Inode linking and identification information. */ | 255 | /* Inode linking and identification information. */ |
282 | struct xfs_ihash *i_hash; /* pointer to hash header */ | ||
283 | struct xfs_inode *i_next; /* inode hash link forw */ | ||
284 | struct xfs_inode *i_mnext; /* next inode in mount list */ | 256 | struct xfs_inode *i_mnext; /* next inode in mount list */ |
285 | struct xfs_inode *i_mprev; /* ptr to prev inode */ | 257 | struct xfs_inode *i_mprev; /* ptr to prev inode */ |
286 | struct xfs_inode **i_prevp; /* ptr to prev i_next */ | ||
287 | struct xfs_mount *i_mount; /* fs mount struct ptr */ | 258 | struct xfs_mount *i_mount; /* fs mount struct ptr */ |
288 | struct list_head i_reclaim; /* reclaim list */ | 259 | struct list_head i_reclaim; /* reclaim list */ |
289 | struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ | 260 | struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ |
@@ -324,9 +295,8 @@ typedef struct xfs_inode { | |||
324 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 295 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
325 | 296 | ||
326 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 297 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
327 | xfs_chashlist_t *i_chash; /* cluster hash list header */ | 298 | xfs_icluster_t *i_cluster; /* cluster list header */ |
328 | struct xfs_inode *i_cnext; /* cluster hash link forward */ | 299 | struct hlist_node i_cnode; /* cluster link node */ |
329 | struct xfs_inode *i_cprev; /* cluster hash link backward */ | ||
330 | 300 | ||
331 | xfs_fsize_t i_size; /* in-memory size */ | 301 | xfs_fsize_t i_size; /* in-memory size */ |
332 | /* Trace buffers per inode. */ | 302 | /* Trace buffers per inode. */ |
@@ -521,8 +491,6 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) | |||
521 | */ | 491 | */ |
522 | void xfs_ihash_init(struct xfs_mount *); | 492 | void xfs_ihash_init(struct xfs_mount *); |
523 | void xfs_ihash_free(struct xfs_mount *); | 493 | void xfs_ihash_free(struct xfs_mount *); |
524 | void xfs_chash_init(struct xfs_mount *); | ||
525 | void xfs_chash_free(struct xfs_mount *); | ||
526 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, | 494 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, |
527 | struct xfs_trans *); | 495 | struct xfs_trans *); |
528 | void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); | 496 | void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); |
@@ -633,7 +601,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | |||
633 | #define xfs_inobp_check(mp, bp) | 601 | #define xfs_inobp_check(mp, bp) |
634 | #endif /* DEBUG */ | 602 | #endif /* DEBUG */ |
635 | 603 | ||
636 | extern struct kmem_zone *xfs_chashlist_zone; | 604 | extern struct kmem_zone *xfs_icluster_zone; |
637 | extern struct kmem_zone *xfs_ifork_zone; | 605 | extern struct kmem_zone *xfs_ifork_zone; |
638 | extern struct kmem_zone *xfs_inode_zone; | 606 | extern struct kmem_zone *xfs_inode_zone; |
639 | extern struct kmem_zone *xfs_ili_zone; | 607 | extern struct kmem_zone *xfs_ili_zone; |