aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.h
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-08-28 00:00:13 -0400
committerTim Shimmin <tes@chook.melbourne.sgi.com>2007-10-15 02:50:50 -0400
commitda353b0d64e070ae7c5342a0d56ec20ae9ef5cfb (patch)
tree84454023d649df67cc6b125c73746ddb341ac34e /fs/xfs/xfs_inode.h
parent39cd9f877e63ce7e02cdc7f5dbf1b908451c9532 (diff)
[XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore inodes. We currently uses hashes and the default hash sizes chosen can only ever be a tradeoff between memory consumption and the maximum realistic size of the cache. As a result, anyone who has millions of inodes cached on a filesystem needs to tunes the size of the cache via the ihashsize mount option to allow decent scalability with inode cache operations. A further problem is the separate inode cluster hash, whose size is based on the ihashsize but is smaller, and so under certain conditions (sparse cluster cache population) this can become a limitation long before the inode hash is causing issues. The following patchset removes the inode hash and cluster hash and replaces them with radix trees to avoid the scalability limitations of the hashes. It also reduces the size of the inodes by 3 pointers.... SGI-PV: 969561 SGI-Modid: xfs-linux-melb:xfs-kern:29481a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_inode.h')
-rw-r--r--fs/xfs/xfs_inode.h56
1 files changed, 12 insertions, 44 deletions
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 873b9f783d29..b6dd23d9b3d6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -172,41 +172,18 @@ typedef struct xfs_iocore {
172extern void xfs_iocore_inode_init(struct xfs_inode *); 172extern void xfs_iocore_inode_init(struct xfs_inode *);
173extern void xfs_iocore_inode_reinit(struct xfs_inode *); 173extern void xfs_iocore_inode_reinit(struct xfs_inode *);
174 174
175
176/*
177 * This is the type used in the xfs inode hash table.
178 * An array of these is allocated for each mounted
179 * file system to hash the inodes for that file system.
180 */
181typedef struct xfs_ihash {
182 struct xfs_inode *ih_next;
183 rwlock_t ih_lock;
184 uint ih_version;
185} xfs_ihash_t;
186
187#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize))
188
189/* 175/*
190 * This is the xfs inode cluster hash. This hash is used by xfs_iflush to 176 * This is the xfs inode cluster structure. This structure is used by
191 * find inodes that share a cluster and can be flushed to disk at the same 177 * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
192 * time. 178 * the same time.
193 */ 179 */
194typedef struct xfs_chashlist { 180typedef struct xfs_icluster {
195 struct xfs_chashlist *chl_next; 181 struct hlist_head icl_inodes; /* list of inodes on cluster */
196 struct xfs_chashlist *chl_prev; 182 xfs_daddr_t icl_blkno; /* starting block number of
197 struct xfs_inode *chl_ip;
198 xfs_daddr_t chl_blkno; /* starting block number of
199 * the cluster */ 183 * the cluster */
200 struct xfs_buf *chl_buf; /* the inode buffer */ 184 struct xfs_buf *icl_buf; /* the inode buffer */
201} xfs_chashlist_t; 185 lock_t icl_lock; /* inode list lock */
202 186} xfs_icluster_t;
203typedef struct xfs_chash {
204 xfs_chashlist_t *ch_list;
205 lock_t ch_lock;
206} xfs_chash_t;
207
208#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
209
210 187
211/* 188/*
212 * This is the xfs in-core inode structure. 189 * This is the xfs in-core inode structure.
@@ -269,21 +246,15 @@ typedef struct xfs_icdinode {
269} xfs_icdinode_t; 246} xfs_icdinode_t;
270 247
271typedef struct { 248typedef struct {
272 struct xfs_ihash *ip_hash; /* pointer to hash header */
273 struct xfs_inode *ip_next; /* inode hash link forw */
274 struct xfs_inode *ip_mnext; /* next inode in mount list */ 249 struct xfs_inode *ip_mnext; /* next inode in mount list */
275 struct xfs_inode *ip_mprev; /* ptr to prev inode */ 250 struct xfs_inode *ip_mprev; /* ptr to prev inode */
276 struct xfs_inode **ip_prevp; /* ptr to prev i_next */
277 struct xfs_mount *ip_mount; /* fs mount struct ptr */ 251 struct xfs_mount *ip_mount; /* fs mount struct ptr */
278} xfs_iptr_t; 252} xfs_iptr_t;
279 253
280typedef struct xfs_inode { 254typedef struct xfs_inode {
281 /* Inode linking and identification information. */ 255 /* Inode linking and identification information. */
282 struct xfs_ihash *i_hash; /* pointer to hash header */
283 struct xfs_inode *i_next; /* inode hash link forw */
284 struct xfs_inode *i_mnext; /* next inode in mount list */ 256 struct xfs_inode *i_mnext; /* next inode in mount list */
285 struct xfs_inode *i_mprev; /* ptr to prev inode */ 257 struct xfs_inode *i_mprev; /* ptr to prev inode */
286 struct xfs_inode **i_prevp; /* ptr to prev i_next */
287 struct xfs_mount *i_mount; /* fs mount struct ptr */ 258 struct xfs_mount *i_mount; /* fs mount struct ptr */
288 struct list_head i_reclaim; /* reclaim list */ 259 struct list_head i_reclaim; /* reclaim list */
289 struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ 260 struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/
@@ -324,9 +295,8 @@ typedef struct xfs_inode {
324 unsigned int i_delayed_blks; /* count of delay alloc blks */ 295 unsigned int i_delayed_blks; /* count of delay alloc blks */
325 296
326 xfs_icdinode_t i_d; /* most of ondisk inode */ 297 xfs_icdinode_t i_d; /* most of ondisk inode */
327 xfs_chashlist_t *i_chash; /* cluster hash list header */ 298 xfs_icluster_t *i_cluster; /* cluster list header */
328 struct xfs_inode *i_cnext; /* cluster hash link forward */ 299 struct hlist_node i_cnode; /* cluster link node */
329 struct xfs_inode *i_cprev; /* cluster hash link backward */
330 300
331 xfs_fsize_t i_size; /* in-memory size */ 301 xfs_fsize_t i_size; /* in-memory size */
332 /* Trace buffers per inode. */ 302 /* Trace buffers per inode. */
@@ -521,8 +491,6 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
521 */ 491 */
522void xfs_ihash_init(struct xfs_mount *); 492void xfs_ihash_init(struct xfs_mount *);
523void xfs_ihash_free(struct xfs_mount *); 493void xfs_ihash_free(struct xfs_mount *);
524void xfs_chash_init(struct xfs_mount *);
525void xfs_chash_free(struct xfs_mount *);
526xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, 494xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
527 struct xfs_trans *); 495 struct xfs_trans *);
528void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); 496void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *);
@@ -633,7 +601,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
633#define xfs_inobp_check(mp, bp) 601#define xfs_inobp_check(mp, bp)
634#endif /* DEBUG */ 602#endif /* DEBUG */
635 603
636extern struct kmem_zone *xfs_chashlist_zone; 604extern struct kmem_zone *xfs_icluster_zone;
637extern struct kmem_zone *xfs_ifork_zone; 605extern struct kmem_zone *xfs_ifork_zone;
638extern struct kmem_zone *xfs_inode_zone; 606extern struct kmem_zone *xfs_inode_zone;
639extern struct kmem_zone *xfs_ili_zone; 607extern struct kmem_zone *xfs_ili_zone;