aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2015-03-12 20:02:35 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-16 15:44:55 -0400
commit10b88a4b17d31a7409494b179dcb76e7ab2fcaea (patch)
tree1322d01102b150a8c04d2815fee1935efc951b76
parent497a5df7bf6ffd136ae21c49d1a01292930d7ca2 (diff)
sparc: Break up monolithic iommu table/lock into finer graularity pools and lock
Investigation of multithreaded iperf experiments on an ethernet interface show the iommu->lock as the hottest lock identified by lockstat, with something of the order of 21M contentions out of 27M acquisitions, and an average wait time of 26 us for the lock. This is not efficient. A more scalable design is to follow the ppc model, where the iommu_table has multiple pools, each stretching over a segment of the map, and with a separate lock for each pool. This model allows for better parallelization of the iommu map search. This patch adds the iommu range alloc/free function infrastructure. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/iommu-common.h55
-rw-r--r--lib/Makefile2
-rw-r--r--lib/iommu-common.c220
3 files changed, 276 insertions, 1 deletions
diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h
new file mode 100644
index 000000000000..6be5c863f329
--- /dev/null
+++ b/include/linux/iommu-common.h
@@ -0,0 +1,55 @@
1#ifndef _LINUX_IOMMU_COMMON_H
2#define _LINUX_IOMMU_COMMON_H
3
4#include <linux/spinlock_types.h>
5#include <linux/device.h>
6#include <asm/page.h>
7
8#define IOMMU_POOL_HASHBITS 4
9#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS)
10
11struct iommu_pool {
12 unsigned long start;
13 unsigned long end;
14 unsigned long hint;
15 spinlock_t lock;
16};
17
18struct iommu_table;
19
20struct iommu_tbl_ops {
21 unsigned long (*cookie_to_index)(u64, void *);
22 void (*demap)(void *, unsigned long, unsigned long);
23 void (*reset)(struct iommu_table *);
24};
25
26struct iommu_table {
27 unsigned long page_table_map_base;
28 unsigned long page_table_shift;
29 unsigned long nr_pools;
30 const struct iommu_tbl_ops *iommu_tbl_ops;
31 unsigned long poolsize;
32 struct iommu_pool arena_pool[IOMMU_NR_POOLS];
33 u32 flags;
34#define IOMMU_HAS_LARGE_POOL 0x00000001
35 struct iommu_pool large_pool;
36 unsigned long *map;
37};
38
39extern void iommu_tbl_pool_init(struct iommu_table *iommu,
40 unsigned long num_entries,
41 u32 page_table_shift,
42 const struct iommu_tbl_ops *iommu_tbl_ops,
43 bool large_pool, u32 npools);
44
45extern unsigned long iommu_tbl_range_alloc(struct device *dev,
46 struct iommu_table *iommu,
47 unsigned long npages,
48 unsigned long *handle,
49 unsigned int pool_hash);
50
51extern void iommu_tbl_range_free(struct iommu_table *iommu,
52 u64 dma_addr, unsigned long npages,
53 bool do_demap, void *demap_arg);
54
55#endif
diff --git a/lib/Makefile b/lib/Makefile
index 58f74d2dd396..60c22e65b793 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o 106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
107 107
108obj-$(CONFIG_SWIOTLB) += swiotlb.o 108obj-$(CONFIG_SWIOTLB) += swiotlb.o
109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o 111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o 112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
new file mode 100644
index 000000000000..7583f9b7846b
--- /dev/null
+++ b/lib/iommu-common.c
@@ -0,0 +1,220 @@
1/*
2 * IOMMU mmap management and range allocation functions.
3 * Based almost entirely upon the powerpc iommu allocator.
4 */
5
6#include <linux/export.h>
7#include <linux/bitmap.h>
8#include <linux/bug.h>
9#include <linux/iommu-helper.h>
10#include <linux/iommu-common.h>
11#include <linux/dma-mapping.h>
12
13#define IOMMU_LARGE_ALLOC 15
14
15/*
16 * Initialize iommu_pool entries for the iommu_table. `num_entries'
17 * is the number of table entries. If `large_pool' is set to true,
18 * the top 1/4 of the table will be set aside for pool allocations
19 * of more than IOMMU_LARGE_ALLOC pages.
20 */
21extern void iommu_tbl_pool_init(struct iommu_table *iommu,
22 unsigned long num_entries,
23 u32 page_table_shift,
24 const struct iommu_tbl_ops *iommu_tbl_ops,
25 bool large_pool, u32 npools)
26{
27 unsigned int start, i;
28 struct iommu_pool *p = &(iommu->large_pool);
29
30 if (npools == 0)
31 iommu->nr_pools = IOMMU_NR_POOLS;
32 else
33 iommu->nr_pools = npools;
34 BUG_ON(npools > IOMMU_NR_POOLS);
35
36 iommu->page_table_shift = page_table_shift;
37 iommu->iommu_tbl_ops = iommu_tbl_ops;
38 start = 0;
39 if (large_pool)
40 iommu->flags |= IOMMU_HAS_LARGE_POOL;
41
42 if (!large_pool)
43 iommu->poolsize = num_entries/iommu->nr_pools;
44 else
45 iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
46 for (i = 0; i < iommu->nr_pools; i++) {
47 spin_lock_init(&(iommu->arena_pool[i].lock));
48 iommu->arena_pool[i].start = start;
49 iommu->arena_pool[i].hint = start;
50 start += iommu->poolsize; /* start for next pool */
51 iommu->arena_pool[i].end = start - 1;
52 }
53 if (!large_pool)
54 return;
55 /* initialize large_pool */
56 spin_lock_init(&(p->lock));
57 p->start = start;
58 p->hint = p->start;
59 p->end = num_entries;
60}
61EXPORT_SYMBOL(iommu_tbl_pool_init);
62
63unsigned long iommu_tbl_range_alloc(struct device *dev,
64 struct iommu_table *iommu,
65 unsigned long npages,
66 unsigned long *handle,
67 unsigned int pool_hash)
68{
69 unsigned long n, end, start, limit, boundary_size;
70 struct iommu_pool *arena;
71 int pass = 0;
72 unsigned int pool_nr;
73 unsigned int npools = iommu->nr_pools;
74 unsigned long flags;
75 bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
76 bool largealloc = (large_pool && npages > IOMMU_LARGE_ALLOC);
77 unsigned long shift;
78
79 /* Sanity check */
80 if (unlikely(npages == 0)) {
81 printk_ratelimited("npages == 0\n");
82 return DMA_ERROR_CODE;
83 }
84
85 if (largealloc) {
86 arena = &(iommu->large_pool);
87 spin_lock_irqsave(&arena->lock, flags);
88 pool_nr = 0; /* to keep compiler happy */
89 } else {
90 /* pick out pool_nr */
91 pool_nr = pool_hash & (npools - 1);
92 arena = &(iommu->arena_pool[pool_nr]);
93
94 /* find first available unlocked pool */
95 while (!spin_trylock_irqsave(&(arena->lock), flags)) {
96 pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
97 arena = &(iommu->arena_pool[pool_nr]);
98 }
99 }
100
101 again:
102 if (pass == 0 && handle && *handle &&
103 (*handle >= arena->start) && (*handle < arena->end))
104 start = *handle;
105 else
106 start = arena->hint;
107
108 limit = arena->end;
109
110 /* The case below can happen if we have a small segment appended
111 * to a large, or when the previous alloc was at the very end of
112 * the available space. If so, go back to the beginning and flush.
113 */
114 if (start >= limit) {
115 start = arena->start;
116 if (iommu->iommu_tbl_ops->reset != NULL)
117 iommu->iommu_tbl_ops->reset(iommu);
118 }
119
120 if (dev)
121 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
122 1 << iommu->page_table_shift);
123 else
124 boundary_size = ALIGN(1UL << 32, 1 << iommu->page_table_shift);
125
126 shift = iommu->page_table_map_base >> iommu->page_table_shift;
127 boundary_size = boundary_size >> iommu->page_table_shift;
128 /*
129 * if the iommu has a non-trivial cookie <-> index mapping, we set
130 * things up so that iommu_is_span_boundary() merely checks if the
131 * (index + npages) < num_tsb_entries
132 */
133 if (iommu->iommu_tbl_ops->cookie_to_index != NULL) {
134 shift = 0;
135 boundary_size = iommu->poolsize * iommu->nr_pools;
136 }
137 n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
138 boundary_size, 0);
139 if (n == -1) {
140 if (likely(pass == 0)) {
141 /* First failure, rescan from the beginning. */
142 arena->hint = arena->start;
143 if (iommu->iommu_tbl_ops->reset != NULL)
144 iommu->iommu_tbl_ops->reset(iommu);
145 pass++;
146 goto again;
147 } else if (!largealloc && pass <= iommu->nr_pools) {
148 spin_unlock(&(arena->lock));
149 pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
150 arena = &(iommu->arena_pool[pool_nr]);
151 while (!spin_trylock(&(arena->lock))) {
152 pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
153 arena = &(iommu->arena_pool[pool_nr]);
154 }
155 arena->hint = arena->start;
156 pass++;
157 goto again;
158 } else {
159 /* give up */
160 spin_unlock_irqrestore(&(arena->lock), flags);
161 return DMA_ERROR_CODE;
162 }
163 }
164
165 end = n + npages;
166
167 arena->hint = end;
168
169 /* Update handle for SG allocations */
170 if (handle)
171 *handle = end;
172 spin_unlock_irqrestore(&(arena->lock), flags);
173
174 return n;
175}
176EXPORT_SYMBOL(iommu_tbl_range_alloc);
177
178static struct iommu_pool *get_pool(struct iommu_table *tbl,
179 unsigned long entry)
180{
181 struct iommu_pool *p;
182 unsigned long largepool_start = tbl->large_pool.start;
183 bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
184
185 /* The large pool is the last pool at the top of the table */
186 if (large_pool && entry >= largepool_start) {
187 p = &tbl->large_pool;
188 } else {
189 unsigned int pool_nr = entry / tbl->poolsize;
190
191 BUG_ON(pool_nr >= tbl->nr_pools);
192 p = &tbl->arena_pool[pool_nr];
193 }
194 return p;
195}
196
197void iommu_tbl_range_free(struct iommu_table *iommu, u64 dma_addr,
198 unsigned long npages, bool do_demap, void *demap_arg)
199{
200 unsigned long entry;
201 struct iommu_pool *pool;
202 unsigned long flags;
203 unsigned long shift = iommu->page_table_shift;
204
205 if (iommu->iommu_tbl_ops->cookie_to_index != NULL) {
206 entry = (*iommu->iommu_tbl_ops->cookie_to_index)(dma_addr,
207 demap_arg);
208 } else {
209 entry = (dma_addr - iommu->page_table_map_base) >> shift;
210 }
211 pool = get_pool(iommu, entry);
212
213 spin_lock_irqsave(&(pool->lock), flags);
214 if (do_demap && iommu->iommu_tbl_ops->demap != NULL)
215 (*iommu->iommu_tbl_ops->demap)(demap_arg, entry, npages);
216
217 bitmap_clear(iommu->map, entry, npages);
218 spin_unlock_irqrestore(&(pool->lock), flags);
219}
220EXPORT_SYMBOL(iommu_tbl_range_free);