aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.h
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-09-24 05:59:04 -0400
committerAlex Elder <aelder@sgi.com>2010-10-18 16:07:56 -0400
commit74f75a0cb7033918eb0fa4a50df25091ac75c16e (patch)
tree3885c0b357c760152d14df03ef88839fdbf5f964 /fs/xfs/linux-2.6/xfs_buf.h
parent69b491c214d7fd4d4df972ae5377be99ca3753db (diff)
xfs: convert buffer cache hash to rbtree
The buffer cache hash is showing typical hash scalability problems. In large scale testing the number of cached items growing far larger than the hash can efficiently handle. Hence we need to move to a self-scaling cache indexing mechanism. I have selected rbtrees for indexing becuse they can have O(log n) search scalability, and insert and remove cost is not excessive, even on large trees. Hence we should be able to cache large numbers of buffers without incurring the excessive cache miss search penalties that the hash is imposing on us. To ensure we still have parallel access to the cache, we need multiple trees. Rather than hashing the buffers by disk address to select a tree, it seems more sensible to separate trees by typical access patterns. Most operations use buffers from within a single AG at a time, so rather than searching lots of different lists, separate the buffer indexes out into per-AG rbtrees. This means that searches during metadata operation have a much higher chance of hitting cache resident nodes, and that updates of the tree are less likely to disturb trees being accessed on other CPUs doing independent operations. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.h')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
1 files changed, 2 insertions, 6 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 1f109cee136c..c79882ecb513 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -135,10 +135,6 @@ typedef struct xfs_buftarg {
135 unsigned int bt_sshift; 135 unsigned int bt_sshift;
136 size_t bt_smask; 136 size_t bt_smask;
137 137
138 /* per device buffer hash table */
139 uint bt_hashshift;
140 xfs_bufhash_t *bt_hash;
141
142 /* per device delwri queue */ 138 /* per device delwri queue */
143 struct task_struct *bt_task; 139 struct task_struct *bt_task;
144 struct list_head bt_list; 140 struct list_head bt_list;
@@ -172,8 +168,8 @@ typedef struct xfs_buf {
172 wait_queue_head_t b_waiters; /* unpin waiters */ 168 wait_queue_head_t b_waiters; /* unpin waiters */
173 struct list_head b_list; 169 struct list_head b_list;
174 xfs_buf_flags_t b_flags; /* status flags */ 170 xfs_buf_flags_t b_flags; /* status flags */
175 struct list_head b_hash_list; /* hash table list */ 171 struct rb_node b_rbnode; /* rbtree node */
176 xfs_bufhash_t *b_hash; /* hash table list start */ 172 struct xfs_perag *b_pag; /* contains rbtree root */
177 xfs_buftarg_t *b_target; /* buffer target (device) */ 173 xfs_buftarg_t *b_target; /* buffer target (device) */
178 atomic_t b_hold; /* reference count */ 174 atomic_t b_hold; /* reference count */
179 xfs_daddr_t b_bn; /* block number for I/O */ 175 xfs_daddr_t b_bn; /* block number for I/O */