aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2015-04-09 15:33:30 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-18 15:32:59 -0400
commitff7d37a502022149655c18035b99a53391be0383 (patch)
tree262ef5021055702d1edfb2982c618720cde4489c /lib
parentc12f048ffdf3a5802239426dc290290929268dc9 (diff)
Break up monolithic iommu table/lock into finer graularity pools and lock
Investigation of multithreaded iperf experiments on an ethernet interface show the iommu->lock as the hottest lock identified by lockstat, with something of the order of 21M contentions out of 27M acquisitions, and an average wait time of 26 us for the lock. This is not efficient. A more scalable design is to follow the ppc model, where the iommu_map_table has multiple pools, each stretching over a segment of the map, and with a separate lock for each pool. This model allows for better parallelization of the iommu map search. This patch adds the iommu range alloc/free function infrastructure. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile2
-rw-r--r--lib/iommu-common.c266
2 files changed, 267 insertions, 1 deletions
diff --git a/lib/Makefile b/lib/Makefile
index da6116b21555..6c37933336a0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o 106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
107 107
108obj-$(CONFIG_SWIOTLB) += swiotlb.o 108obj-$(CONFIG_SWIOTLB) += swiotlb.o
109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o 111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o 112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
new file mode 100644
index 000000000000..b99f1d744a8d
--- /dev/null
+++ b/lib/iommu-common.c
@@ -0,0 +1,266 @@
1/*
2 * IOMMU mmap management and range allocation functions.
3 * Based almost entirely upon the powerpc iommu allocator.
4 */
5
6#include <linux/export.h>
7#include <linux/bitmap.h>
8#include <linux/bug.h>
9#include <linux/iommu-helper.h>
10#include <linux/iommu-common.h>
11#include <linux/dma-mapping.h>
12#include <linux/hash.h>
13
14unsigned long iommu_large_alloc = 15;
15
16static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
17
18static inline bool need_flush(struct iommu_map_table *iommu)
19{
20 return (iommu->lazy_flush != NULL &&
21 (iommu->flags & IOMMU_NEED_FLUSH) != 0);
22}
23
24static inline void set_flush(struct iommu_map_table *iommu)
25{
26 iommu->flags |= IOMMU_NEED_FLUSH;
27}
28
29static inline void clear_flush(struct iommu_map_table *iommu)
30{
31 iommu->flags &= ~IOMMU_NEED_FLUSH;
32}
33
34static void setup_iommu_pool_hash(void)
35{
36 unsigned int i;
37 static bool do_once;
38
39 if (do_once)
40 return;
41 do_once = true;
42 for_each_possible_cpu(i)
43 per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
44}
45
46/*
47 * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
48 * is the number of table entries. If `large_pool' is set to true,
49 * the top 1/4 of the table will be set aside for pool allocations
50 * of more than iommu_large_alloc pages.
51 */
52extern void iommu_tbl_pool_init(struct iommu_map_table *iommu,
53 unsigned long num_entries,
54 u32 table_shift,
55 void (*lazy_flush)(struct iommu_map_table *),
56 bool large_pool, u32 npools,
57 bool skip_span_boundary_check)
58{
59 unsigned int start, i;
60 struct iommu_pool *p = &(iommu->large_pool);
61
62 setup_iommu_pool_hash();
63 if (npools == 0)
64 iommu->nr_pools = IOMMU_NR_POOLS;
65 else
66 iommu->nr_pools = npools;
67 BUG_ON(npools > IOMMU_NR_POOLS);
68
69 iommu->table_shift = table_shift;
70 iommu->lazy_flush = lazy_flush;
71 start = 0;
72 if (skip_span_boundary_check)
73 iommu->flags |= IOMMU_NO_SPAN_BOUND;
74 if (large_pool)
75 iommu->flags |= IOMMU_HAS_LARGE_POOL;
76
77 if (!large_pool)
78 iommu->poolsize = num_entries/iommu->nr_pools;
79 else
80 iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
81 for (i = 0; i < iommu->nr_pools; i++) {
82 spin_lock_init(&(iommu->pools[i].lock));
83 iommu->pools[i].start = start;
84 iommu->pools[i].hint = start;
85 start += iommu->poolsize; /* start for next pool */
86 iommu->pools[i].end = start - 1;
87 }
88 if (!large_pool)
89 return;
90 /* initialize large_pool */
91 spin_lock_init(&(p->lock));
92 p->start = start;
93 p->hint = p->start;
94 p->end = num_entries;
95}
96EXPORT_SYMBOL(iommu_tbl_pool_init);
97
98unsigned long iommu_tbl_range_alloc(struct device *dev,
99 struct iommu_map_table *iommu,
100 unsigned long npages,
101 unsigned long *handle,
102 unsigned long mask,
103 unsigned int align_order)
104{
105 unsigned int pool_hash = __this_cpu_read(iommu_pool_hash);
106 unsigned long n, end, start, limit, boundary_size;
107 struct iommu_pool *pool;
108 int pass = 0;
109 unsigned int pool_nr;
110 unsigned int npools = iommu->nr_pools;
111 unsigned long flags;
112 bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
113 bool largealloc = (large_pool && npages > iommu_large_alloc);
114 unsigned long shift;
115 unsigned long align_mask = 0;
116
117 if (align_order > 0)
118 align_mask = 0xffffffffffffffffl >> (64 - align_order);
119
120 /* Sanity check */
121 if (unlikely(npages == 0)) {
122 WARN_ON_ONCE(1);
123 return DMA_ERROR_CODE;
124 }
125
126 if (largealloc) {
127 pool = &(iommu->large_pool);
128 pool_nr = 0; /* to keep compiler happy */
129 } else {
130 /* pick out pool_nr */
131 pool_nr = pool_hash & (npools - 1);
132 pool = &(iommu->pools[pool_nr]);
133 }
134 spin_lock_irqsave(&pool->lock, flags);
135
136 again:
137 if (pass == 0 && handle && *handle &&
138 (*handle >= pool->start) && (*handle < pool->end))
139 start = *handle;
140 else
141 start = pool->hint;
142
143 limit = pool->end;
144
145 /* The case below can happen if we have a small segment appended
146 * to a large, or when the previous alloc was at the very end of
147 * the available space. If so, go back to the beginning. If a
148 * flush is needed, it will get done based on the return value
149 * from iommu_area_alloc() below.
150 */
151 if (start >= limit)
152 start = pool->start;
153 shift = iommu->table_map_base >> iommu->table_shift;
154 if (limit + shift > mask) {
155 limit = mask - shift + 1;
156 /* If we're constrained on address range, first try
157 * at the masked hint to avoid O(n) search complexity,
158 * but on second pass, start at 0 in pool 0.
159 */
160 if ((start & mask) >= limit || pass > 0) {
161 spin_unlock(&(pool->lock));
162 pool = &(iommu->pools[0]);
163 spin_lock(&(pool->lock));
164 start = pool->start;
165 } else {
166 start &= mask;
167 }
168 }
169
170 if (dev)
171 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
172 1 << iommu->table_shift);
173 else
174 boundary_size = ALIGN(1UL << 32, 1 << iommu->table_shift);
175
176 boundary_size = boundary_size >> iommu->table_shift;
177 /*
178 * if the skip_span_boundary_check had been set during init, we set
179 * things up so that iommu_is_span_boundary() merely checks if the
180 * (index + npages) < num_tsb_entries
181 */
182 if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
183 shift = 0;
184 boundary_size = iommu->poolsize * iommu->nr_pools;
185 }
186 n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
187 boundary_size, align_mask);
188 if (n == -1) {
189 if (likely(pass == 0)) {
190 /* First failure, rescan from the beginning. */
191 pool->hint = pool->start;
192 set_flush(iommu);
193 pass++;
194 goto again;
195 } else if (!largealloc && pass <= iommu->nr_pools) {
196 spin_unlock(&(pool->lock));
197 pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
198 pool = &(iommu->pools[pool_nr]);
199 spin_lock(&(pool->lock));
200 pool->hint = pool->start;
201 set_flush(iommu);
202 pass++;
203 goto again;
204 } else {
205 /* give up */
206 n = DMA_ERROR_CODE;
207 goto bail;
208 }
209 }
210 if (n < pool->hint || need_flush(iommu)) {
211 clear_flush(iommu);
212 iommu->lazy_flush(iommu);
213 }
214
215 end = n + npages;
216 pool->hint = end;
217
218 /* Update handle for SG allocations */
219 if (handle)
220 *handle = end;
221bail:
222 spin_unlock_irqrestore(&(pool->lock), flags);
223
224 return n;
225}
226EXPORT_SYMBOL(iommu_tbl_range_alloc);
227
228static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
229 unsigned long entry)
230{
231 struct iommu_pool *p;
232 unsigned long largepool_start = tbl->large_pool.start;
233 bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
234
235 /* The large pool is the last pool at the top of the table */
236 if (large_pool && entry >= largepool_start) {
237 p = &tbl->large_pool;
238 } else {
239 unsigned int pool_nr = entry / tbl->poolsize;
240
241 BUG_ON(pool_nr >= tbl->nr_pools);
242 p = &tbl->pools[pool_nr];
243 }
244 return p;
245}
246
247/* Caller supplies the index of the entry into the iommu map table
248 * itself when the mapping from dma_addr to the entry is not the
249 * default addr->entry mapping below.
250 */
251void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
252 unsigned long npages, unsigned long entry)
253{
254 struct iommu_pool *pool;
255 unsigned long flags;
256 unsigned long shift = iommu->table_shift;
257
258 if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
259 entry = (dma_addr - iommu->table_map_base) >> shift;
260 pool = get_pool(iommu, entry);
261
262 spin_lock_irqsave(&(pool->lock), flags);
263 bitmap_clear(iommu->map, entry, npages);
264 spin_unlock_irqrestore(&(pool->lock), flags);
265}
266EXPORT_SYMBOL(iommu_tbl_range_free);