diff options
author | Dave Chinner <david@fromorbit.com> | 2010-09-02 01:14:38 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2010-09-02 01:14:38 -0400 |
commit | 9bc08a45fb117c696e4940cfa1208cb1cc7a2f25 (patch) | |
tree | 610e4cb520d62c4ad6ae0f20ddd64cd15520c33a /fs/xfs/linux-2.6 | |
parent | 2bfc96a127bc1cc94d26bfaa40159966064f9c8c (diff) |
xfs: improve buffer cache hash scalability
When doing large parallel file creates on a 16p machines, large amounts of
time is being spent in _xfs_buf_find(). A system wide profile with perf top
shows this:
1134740.00 19.3% _xfs_buf_find
733142.00 12.5% __ticket_spin_lock
The problem is that the hash contains 45,000 buffers, and the hash table width
is only 256 buffers. That means we've got around 200 buffers per chain, and
searching it is quite expensive. The hash table size needs to increase.
Secondly, every time we do a lookup, we promote the buffer we find to the head
of the hash chain. This is causing cachelines to be dirtied and causes
invalidation of cachelines across all CPUs that may have walked the hash chain
recently. hence every walk of the hash chain is effectively a cold cache walk.
Remove the promotion to avoid this invalidation.
The results are:
1045043.00 21.2% __ticket_spin_lock
326184.00 6.6% _xfs_buf_find
A 70% drop in the CPU usage when looking up buffers. Unfortunately that does
not result in an increase in performance underthis workload as contention on
the inode_lock soaks up most of the reduction in CPU usage.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 8 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 1 |
2 files changed, 1 insertions, 8 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ea79072f5210..d72cf2bb054a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -440,12 +440,7 @@ _xfs_buf_find( | |||
440 | ASSERT(btp == bp->b_target); | 440 | ASSERT(btp == bp->b_target); |
441 | if (bp->b_file_offset == range_base && | 441 | if (bp->b_file_offset == range_base && |
442 | bp->b_buffer_length == range_length) { | 442 | bp->b_buffer_length == range_length) { |
443 | /* | ||
444 | * If we look at something, bring it to the | ||
445 | * front of the list for next time. | ||
446 | */ | ||
447 | atomic_inc(&bp->b_hold); | 443 | atomic_inc(&bp->b_hold); |
448 | list_move(&bp->b_hash_list, &hash->bh_list); | ||
449 | goto found; | 444 | goto found; |
450 | } | 445 | } |
451 | } | 446 | } |
@@ -1443,8 +1438,7 @@ xfs_alloc_bufhash( | |||
1443 | { | 1438 | { |
1444 | unsigned int i; | 1439 | unsigned int i; |
1445 | 1440 | ||
1446 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | 1441 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ |
1447 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | ||
1448 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1442 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * |
1449 | sizeof(xfs_bufhash_t)); | 1443 | sizeof(xfs_bufhash_t)); |
1450 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1444 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index d072e5ff923b..2a05614f0b92 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -137,7 +137,6 @@ typedef struct xfs_buftarg { | |||
137 | size_t bt_smask; | 137 | size_t bt_smask; |
138 | 138 | ||
139 | /* per device buffer hash table */ | 139 | /* per device buffer hash table */ |
140 | uint bt_hashmask; | ||
141 | uint bt_hashshift; | 140 | uint bt_hashshift; |
142 | xfs_bufhash_t *bt_hash; | 141 | xfs_bufhash_t *bt_hash; |
143 | 142 | ||