aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include/asm
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2012-06-07 14:14:48 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-07-03 00:14:48 -0400
commitb4c3a8729ae57b4f84d661e16a192f828eca1d03 (patch)
tree03ff960dc63b7c60ed54cbf88f98c8b6df1823ec /arch/powerpc/include/asm
parentd362213722c8875b40d712796392682968ce685e (diff)
powerpc/iommu: Implement IOMMU pools to improve multiqueue adapter performance
At the moment all queues in a multiqueue adapter will serialise against the IOMMU table lock. This is proving to be a big issue, especially with 10Gbit ethernet. This patch creates 4 pools and tries to spread the load across them. If the table is under 1GB in size we revert back to the original behaviour of 1 pool and 1 largealloc pool. We create a hash to map CPUs to pools. Since we prefer interrupts to be affinitised to primary CPUs, without some form of hashing we are very likely to end up using the same pool. As an example, POWER7 has 4 way SMT and with 4 pools all primary threads will map to the same pool. The largealloc pool is reduced from 1/2 to 1/4 of the space to partially offset the overhead of breaking the table up into pools. Some performance numbers were obtained with a Chelsio T3 adapter on two POWER7 boxes, running a 100 session TCP round robin test. Performance improved 69% with this patch applied. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/include/asm')
-rw-r--r--arch/powerpc/include/asm/iommu.h18
1 files changed, 14 insertions, 4 deletions
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 957a83f43646..cbfe678e3dbe 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -53,6 +53,16 @@ static __inline__ __attribute_const__ int get_iommu_order(unsigned long size)
53 */ 53 */
54#define IOMAP_MAX_ORDER 13 54#define IOMAP_MAX_ORDER 13
55 55
56#define IOMMU_POOL_HASHBITS 2
57#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS)
58
59struct iommu_pool {
60 unsigned long start;
61 unsigned long end;
62 unsigned long hint;
63 spinlock_t lock;
64} ____cacheline_aligned_in_smp;
65
56struct iommu_table { 66struct iommu_table {
57 unsigned long it_busno; /* Bus number this table belongs to */ 67 unsigned long it_busno; /* Bus number this table belongs to */
58 unsigned long it_size; /* Size of iommu table in entries */ 68 unsigned long it_size; /* Size of iommu table in entries */
@@ -61,10 +71,10 @@ struct iommu_table {
61 unsigned long it_index; /* which iommu table this is */ 71 unsigned long it_index; /* which iommu table this is */
62 unsigned long it_type; /* type: PCI or Virtual Bus */ 72 unsigned long it_type; /* type: PCI or Virtual Bus */
63 unsigned long it_blocksize; /* Entries in each block (cacheline) */ 73 unsigned long it_blocksize; /* Entries in each block (cacheline) */
64 unsigned long it_hint; /* Hint for next alloc */ 74 unsigned long poolsize;
65 unsigned long it_largehint; /* Hint for large allocs */ 75 unsigned long nr_pools;
66 unsigned long it_halfpoint; /* Breaking point for small/large allocs */ 76 struct iommu_pool large_pool;
67 spinlock_t it_lock; /* Protects it_map */ 77 struct iommu_pool pools[IOMMU_NR_POOLS];
68 unsigned long *it_map; /* A simple allocation bitmap for now */ 78 unsigned long *it_map; /* A simple allocation bitmap for now */
69}; 79};
70 80