aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc64/kernel')
-rw-r--r--arch/sparc64/kernel/sbus.c560
1 files changed, 232 insertions, 328 deletions
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index 01d6d869ea2b..14f78fb5e890 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -24,48 +24,25 @@
24 24
25#include "iommu_common.h" 25#include "iommu_common.h"
26 26
27/* These should be allocated on an SMP_CACHE_BYTES
28 * aligned boundary for optimal performance.
29 *
30 * On SYSIO, using an 8K page size we have 1GB of SBUS
31 * DMA space mapped. We divide this space into equally
32 * sized clusters. We allocate a DMA mapping from the
33 * cluster that matches the order of the allocation, or
34 * if the order is greater than the number of clusters,
35 * we try to allocate from the last cluster.
36 */
37
38#define NCLUSTERS 8UL
39#define ONE_GIG (1UL * 1024UL * 1024UL * 1024UL)
40#define CLUSTER_SIZE (ONE_GIG / NCLUSTERS)
41#define CLUSTER_MASK (CLUSTER_SIZE - 1)
42#define CLUSTER_NPAGES (CLUSTER_SIZE >> IO_PAGE_SHIFT)
43#define MAP_BASE ((u32)0xc0000000) 27#define MAP_BASE ((u32)0xc0000000)
44 28
29struct sbus_iommu_arena {
30 unsigned long *map;
31 unsigned int hint;
32 unsigned int limit;
33};
34
45struct sbus_iommu { 35struct sbus_iommu {
46/*0x00*/spinlock_t lock; 36 spinlock_t lock;
47 37
48/*0x08*/iopte_t *page_table; 38 struct sbus_iommu_arena arena;
49/*0x10*/unsigned long strbuf_regs;
50/*0x18*/unsigned long iommu_regs;
51/*0x20*/unsigned long sbus_control_reg;
52 39
53/*0x28*/volatile unsigned long strbuf_flushflag; 40 iopte_t *page_table;
41 unsigned long strbuf_regs;
42 unsigned long iommu_regs;
43 unsigned long sbus_control_reg;
54 44
55 /* If NCLUSTERS is ever decresed to 4 or lower, 45 volatile unsigned long strbuf_flushflag;
56 * you must increase the size of the type of
57 * these counters. You have been duly warned. -DaveM
58 */
59/*0x30*/struct {
60 u16 next;
61 u16 flush;
62 } alloc_info[NCLUSTERS];
63
64 /* The lowest used consistent mapping entry. Since
65 * we allocate consistent maps out of cluster 0 this
66 * is relative to the beginning of closter 0.
67 */
68/*0x50*/u32 lowest_consistent_map;
69}; 46};
70 47
71/* Offsets from iommu_regs */ 48/* Offsets from iommu_regs */
@@ -91,19 +68,6 @@ static void __iommu_flushall(struct sbus_iommu *iommu)
91 tag += 8UL; 68 tag += 8UL;
92 } 69 }
93 upa_readq(iommu->sbus_control_reg); 70 upa_readq(iommu->sbus_control_reg);
94
95 for (entry = 0; entry < NCLUSTERS; entry++) {
96 iommu->alloc_info[entry].flush =
97 iommu->alloc_info[entry].next;
98 }
99}
100
101static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
102{
103 while (npages--)
104 upa_writeq(base + (npages << IO_PAGE_SHIFT),
105 iommu->iommu_regs + IOMMU_FLUSH);
106 upa_readq(iommu->sbus_control_reg);
107} 71}
108 72
109/* Offsets from strbuf_regs */ 73/* Offsets from strbuf_regs */
@@ -156,178 +120,115 @@ static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long
156 base, npages); 120 base, npages);
157} 121}
158 122
159static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages) 123/* Based largely upon the ppc64 iommu allocator. */
124static long sbus_arena_alloc(struct sbus_iommu *iommu, unsigned long npages)
160{ 125{
161 iopte_t *iopte, *limit, *first, *cluster; 126 struct sbus_iommu_arena *arena = &iommu->arena;
162 unsigned long cnum, ent, nent, flush_point, found; 127 unsigned long n, i, start, end, limit;
163 128 int pass;
164 cnum = 0; 129
165 nent = 1; 130 limit = arena->limit;
166 while ((1UL << cnum) < npages) 131 start = arena->hint;
167 cnum++; 132 pass = 0;
168 if(cnum >= NCLUSTERS) { 133
169 nent = 1UL << (cnum - NCLUSTERS); 134again:
170 cnum = NCLUSTERS - 1; 135 n = find_next_zero_bit(arena->map, limit, start);
171 } 136 end = n + npages;
172 iopte = iommu->page_table + (cnum * CLUSTER_NPAGES); 137 if (unlikely(end >= limit)) {
173 138 if (likely(pass < 1)) {
174 if (cnum == 0) 139 limit = start;
175 limit = (iommu->page_table + 140 start = 0;
176 iommu->lowest_consistent_map); 141 __iommu_flushall(iommu);
177 else 142 pass++;
178 limit = (iopte + CLUSTER_NPAGES); 143 goto again;
179
180 iopte += ((ent = iommu->alloc_info[cnum].next) << cnum);
181 flush_point = iommu->alloc_info[cnum].flush;
182
183 first = iopte;
184 cluster = NULL;
185 found = 0;
186 for (;;) {
187 if (iopte_val(*iopte) == 0UL) {
188 found++;
189 if (!cluster)
190 cluster = iopte;
191 } else { 144 } else {
192 /* Used cluster in the way */ 145 /* Scanned the whole thing, give up. */
193 cluster = NULL; 146 return -1;
194 found = 0;
195 } 147 }
148 }
196 149
197 if (found == nent) 150 for (i = n; i < end; i++) {
198 break; 151 if (test_bit(i, arena->map)) {
199 152 start = i + 1;
200 iopte += (1 << cnum); 153 goto again;
201 ent++;
202 if (iopte >= limit) {
203 iopte = (iommu->page_table + (cnum * CLUSTER_NPAGES));
204 ent = 0;
205
206 /* Multiple cluster allocations must not wrap */
207 cluster = NULL;
208 found = 0;
209 } 154 }
210 if (ent == flush_point)
211 __iommu_flushall(iommu);
212 if (iopte == first)
213 goto bad;
214 } 155 }
215 156
216 /* ent/iopte points to the last cluster entry we're going to use, 157 for (i = n; i < end; i++)
217 * so save our place for the next allocation. 158 __set_bit(i, arena->map);
218 */ 159
219 if ((iopte + (1 << cnum)) >= limit) 160 arena->hint = end;
220 ent = 0; 161
221 else 162 return n;
222 ent = ent + 1;
223 iommu->alloc_info[cnum].next = ent;
224 if (ent == flush_point)
225 __iommu_flushall(iommu);
226
227 /* I've got your streaming cluster right here buddy boy... */
228 return cluster;
229
230bad:
231 printk(KERN_EMERG "sbus: alloc_streaming_cluster of npages(%ld) failed!\n",
232 npages);
233 return NULL;
234} 163}
235 164
236static void free_streaming_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages) 165static void sbus_arena_free(struct sbus_iommu_arena *arena, unsigned long base, unsigned long npages)
237{ 166{
238 unsigned long cnum, ent, nent; 167 unsigned long i;
239 iopte_t *iopte;
240 168
241 cnum = 0; 169 for (i = base; i < (base + npages); i++)
242 nent = 1; 170 __clear_bit(i, arena->map);
243 while ((1UL << cnum) < npages)
244 cnum++;
245 if(cnum >= NCLUSTERS) {
246 nent = 1UL << (cnum - NCLUSTERS);
247 cnum = NCLUSTERS - 1;
248 }
249 ent = (base & CLUSTER_MASK) >> (IO_PAGE_SHIFT + cnum);
250 iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT);
251 do {
252 iopte_val(*iopte) = 0UL;
253 iopte += 1 << cnum;
254 } while(--nent);
255
256 /* If the global flush might not have caught this entry,
257 * adjust the flush point such that we will flush before
258 * ever trying to reuse it.
259 */
260#define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y)))
261 if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush))
262 iommu->alloc_info[cnum].flush = ent;
263#undef between
264} 171}
265 172
266/* We allocate consistent mappings from the end of cluster zero. */ 173static void sbus_iommu_table_init(struct sbus_iommu *iommu, unsigned int tsbsize)
267static iopte_t *alloc_consistent_cluster(struct sbus_iommu *iommu, unsigned long npages)
268{ 174{
269 iopte_t *iopte; 175 unsigned long tsbbase, order, sz, num_tsb_entries;
270 176
271 iopte = iommu->page_table + (1 * CLUSTER_NPAGES); 177 num_tsb_entries = tsbsize / sizeof(iopte_t);
272 while (iopte > iommu->page_table) {
273 iopte--;
274 if (!(iopte_val(*iopte) & IOPTE_VALID)) {
275 unsigned long tmp = npages;
276 178
277 while (--tmp) { 179 /* Setup initial software IOMMU state. */
278 iopte--; 180 spin_lock_init(&iommu->lock);
279 if (iopte_val(*iopte) & IOPTE_VALID)
280 break;
281 }
282 if (tmp == 0) {
283 u32 entry = (iopte - iommu->page_table);
284 181
285 if (entry < iommu->lowest_consistent_map) 182 /* Allocate and initialize the free area map. */
286 iommu->lowest_consistent_map = entry; 183 sz = num_tsb_entries / 8;
287 return iopte; 184 sz = (sz + 7UL) & ~7UL;
288 } 185 iommu->arena.map = kzalloc(sz, GFP_KERNEL);
289 } 186 if (!iommu->arena.map) {
187 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
188 prom_halt();
189 }
190 iommu->arena.limit = num_tsb_entries;
191
192 /* Now allocate and setup the IOMMU page table itself. */
193 order = get_order(tsbsize);
194 tsbbase = __get_free_pages(GFP_KERNEL, order);
195 if (!tsbbase) {
196 prom_printf("IOMMU: Error, gfp(tsb) failed.\n");
197 prom_halt();
290 } 198 }
291 return NULL; 199 iommu->page_table = (iopte_t *)tsbbase;
200 memset(iommu->page_table, 0, tsbsize);
292} 201}
293 202
294static void free_consistent_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages) 203static inline iopte_t *alloc_npages(struct sbus_iommu *iommu, unsigned long npages)
295{ 204{
296 iopte_t *iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT); 205 long entry;
297 206
298 if ((iopte - iommu->page_table) == iommu->lowest_consistent_map) { 207 entry = sbus_arena_alloc(iommu, npages);
299 iopte_t *walk = iopte + npages; 208 if (unlikely(entry < 0))
300 iopte_t *limit; 209 return NULL;
301 210
302 limit = iommu->page_table + CLUSTER_NPAGES; 211 return iommu->page_table + entry;
303 while (walk < limit) { 212}
304 if (iopte_val(*walk) != 0UL)
305 break;
306 walk++;
307 }
308 iommu->lowest_consistent_map =
309 (walk - iommu->page_table);
310 }
311 213
312 while (npages--) 214static inline void free_npages(struct sbus_iommu *iommu, dma_addr_t base, unsigned long npages)
313 *iopte++ = __iopte(0UL); 215{
216 sbus_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
314} 217}
315 218
316void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma_addr) 219void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma_addr)
317{ 220{
318 unsigned long order, first_page, flags;
319 struct sbus_iommu *iommu; 221 struct sbus_iommu *iommu;
320 iopte_t *iopte; 222 iopte_t *iopte;
223 unsigned long flags, order, first_page;
321 void *ret; 224 void *ret;
322 int npages; 225 int npages;
323 226
324 if (size <= 0 || sdev == NULL || dvma_addr == NULL)
325 return NULL;
326
327 size = IO_PAGE_ALIGN(size); 227 size = IO_PAGE_ALIGN(size);
328 order = get_order(size); 228 order = get_order(size);
329 if (order >= 10) 229 if (order >= 10)
330 return NULL; 230 return NULL;
231
331 first_page = __get_free_pages(GFP_KERNEL|__GFP_COMP, order); 232 first_page = __get_free_pages(GFP_KERNEL|__GFP_COMP, order);
332 if (first_page == 0UL) 233 if (first_page == 0UL)
333 return NULL; 234 return NULL;
@@ -336,108 +237,121 @@ void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma
336 iommu = sdev->bus->iommu; 237 iommu = sdev->bus->iommu;
337 238
338 spin_lock_irqsave(&iommu->lock, flags); 239 spin_lock_irqsave(&iommu->lock, flags);
339 iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); 240 iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
340 if (iopte == NULL) { 241 spin_unlock_irqrestore(&iommu->lock, flags);
341 spin_unlock_irqrestore(&iommu->lock, flags); 242
243 if (unlikely(iopte == NULL)) {
342 free_pages(first_page, order); 244 free_pages(first_page, order);
343 return NULL; 245 return NULL;
344 } 246 }
345 247
346 /* Ok, we're committed at this point. */ 248 *dvma_addr = (MAP_BASE +
347 *dvma_addr = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); 249 ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
348 ret = (void *) first_page; 250 ret = (void *) first_page;
349 npages = size >> IO_PAGE_SHIFT; 251 npages = size >> IO_PAGE_SHIFT;
252 first_page = __pa(first_page);
350 while (npages--) { 253 while (npages--) {
351 *iopte++ = __iopte(IOPTE_VALID | IOPTE_CACHE | IOPTE_WRITE | 254 iopte_val(*iopte) = (IOPTE_VALID | IOPTE_CACHE |
352 (__pa(first_page) & IOPTE_PAGE)); 255 IOPTE_WRITE |
256 (first_page & IOPTE_PAGE));
257 iopte++;
353 first_page += IO_PAGE_SIZE; 258 first_page += IO_PAGE_SIZE;
354 } 259 }
355 iommu_flush(iommu, *dvma_addr, size >> IO_PAGE_SHIFT);
356 spin_unlock_irqrestore(&iommu->lock, flags);
357 260
358 return ret; 261 return ret;
359} 262}
360 263
361void sbus_free_consistent(struct sbus_dev *sdev, size_t size, void *cpu, dma_addr_t dvma) 264void sbus_free_consistent(struct sbus_dev *sdev, size_t size, void *cpu, dma_addr_t dvma)
362{ 265{
363 unsigned long order, npages;
364 struct sbus_iommu *iommu; 266 struct sbus_iommu *iommu;
365 267 iopte_t *iopte;
366 if (size <= 0 || sdev == NULL || cpu == NULL) 268 unsigned long flags, order, npages;
367 return;
368 269
369 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 270 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
370 iommu = sdev->bus->iommu; 271 iommu = sdev->bus->iommu;
272 iopte = iommu->page_table +
273 ((dvma - MAP_BASE) >> IO_PAGE_SHIFT);
274
275 spin_lock_irqsave(&iommu->lock, flags);
276
277 free_npages(iommu, dvma - MAP_BASE, npages);
371 278
372 spin_lock_irq(&iommu->lock); 279 spin_unlock_irqrestore(&iommu->lock, flags);
373 free_consistent_cluster(iommu, dvma, npages);
374 iommu_flush(iommu, dvma, npages);
375 spin_unlock_irq(&iommu->lock);
376 280
377 order = get_order(size); 281 order = get_order(size);
378 if (order < 10) 282 if (order < 10)
379 free_pages((unsigned long)cpu, order); 283 free_pages((unsigned long)cpu, order);
380} 284}
381 285
382dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t size, int dir) 286dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t sz, int direction)
383{ 287{
384 struct sbus_iommu *iommu = sdev->bus->iommu; 288 struct sbus_iommu *iommu;
385 unsigned long npages, pbase, flags; 289 iopte_t *base;
386 iopte_t *iopte; 290 unsigned long flags, npages, oaddr;
387 u32 dma_base, offset; 291 unsigned long i, base_paddr;
388 unsigned long iopte_bits; 292 u32 bus_addr, ret;
293 unsigned long iopte_protection;
294
295 iommu = sdev->bus->iommu;
389 296
390 if (dir == SBUS_DMA_NONE) 297 if (unlikely(direction == SBUS_DMA_NONE))
391 BUG(); 298 BUG();
392 299
393 pbase = (unsigned long) ptr; 300 oaddr = (unsigned long)ptr;
394 offset = (u32) (pbase & ~IO_PAGE_MASK); 301 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
395 size = (IO_PAGE_ALIGN(pbase + size) - (pbase & IO_PAGE_MASK)); 302 npages >>= IO_PAGE_SHIFT;
396 pbase = (unsigned long) __pa(pbase & IO_PAGE_MASK);
397 303
398 spin_lock_irqsave(&iommu->lock, flags); 304 spin_lock_irqsave(&iommu->lock, flags);
399 npages = size >> IO_PAGE_SHIFT; 305 base = alloc_npages(iommu, npages);
400 iopte = alloc_streaming_cluster(iommu, npages);
401 if (iopte == NULL)
402 goto bad;
403 dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT);
404 npages = size >> IO_PAGE_SHIFT;
405 iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE;
406 if (dir != SBUS_DMA_TODEVICE)
407 iopte_bits |= IOPTE_WRITE;
408 while (npages--) {
409 *iopte++ = __iopte(iopte_bits | (pbase & IOPTE_PAGE));
410 pbase += IO_PAGE_SIZE;
411 }
412 npages = size >> IO_PAGE_SHIFT;
413 spin_unlock_irqrestore(&iommu->lock, flags); 306 spin_unlock_irqrestore(&iommu->lock, flags);
414 307
415 return (dma_base | offset); 308 if (unlikely(!base))
309 BUG();
416 310
417bad: 311 bus_addr = (MAP_BASE +
418 spin_unlock_irqrestore(&iommu->lock, flags); 312 ((base - iommu->page_table) << IO_PAGE_SHIFT));
419 BUG(); 313 ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
420 return 0; 314 base_paddr = __pa(oaddr & IO_PAGE_MASK);
315
316 iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE;
317 if (direction != SBUS_DMA_TODEVICE)
318 iopte_protection |= IOPTE_WRITE;
319
320 for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
321 iopte_val(*base) = iopte_protection | base_paddr;
322
323 return ret;
421} 324}
422 325
423void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size, int direction) 326void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction)
424{ 327{
425 struct sbus_iommu *iommu = sdev->bus->iommu; 328 struct sbus_iommu *iommu = sdev->bus->iommu;
426 u32 dma_base = dma_addr & IO_PAGE_MASK; 329 iopte_t *base;
427 unsigned long flags; 330 unsigned long flags, npages, i;
331
332 if (unlikely(direction == SBUS_DMA_NONE))
333 BUG();
334
335 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
336 npages >>= IO_PAGE_SHIFT;
337 base = iommu->page_table +
338 ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT);
428 339
429 size = (IO_PAGE_ALIGN(dma_addr + size) - dma_base); 340 bus_addr &= IO_PAGE_MASK;
430 341
431 spin_lock_irqsave(&iommu->lock, flags); 342 spin_lock_irqsave(&iommu->lock, flags);
432 free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); 343 sbus_strbuf_flush(iommu, bus_addr, npages, direction);
433 sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction); 344 for (i = 0; i < npages; i++)
345 iopte_val(base[i]) = 0UL;
346 free_npages(iommu, bus_addr - MAP_BASE, npages);
434 spin_unlock_irqrestore(&iommu->lock, flags); 347 spin_unlock_irqrestore(&iommu->lock, flags);
435} 348}
436 349
437#define SG_ENT_PHYS_ADDRESS(SG) \ 350#define SG_ENT_PHYS_ADDRESS(SG) \
438 (__pa(page_address((SG)->page)) + (SG)->offset) 351 (__pa(page_address((SG)->page)) + (SG)->offset)
439 352
440static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, int nelems, unsigned long iopte_bits) 353static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
354 int nused, int nelems, unsigned long iopte_protection)
441{ 355{
442 struct scatterlist *dma_sg = sg; 356 struct scatterlist *dma_sg = sg;
443 struct scatterlist *sg_end = sg + nelems; 357 struct scatterlist *sg_end = sg + nelems;
@@ -462,7 +376,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, in
462 for (;;) { 376 for (;;) {
463 unsigned long tmp; 377 unsigned long tmp;
464 378
465 tmp = (unsigned long) SG_ENT_PHYS_ADDRESS(sg); 379 tmp = SG_ENT_PHYS_ADDRESS(sg);
466 len = sg->length; 380 len = sg->length;
467 if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { 381 if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) {
468 pteval = tmp & IO_PAGE_MASK; 382 pteval = tmp & IO_PAGE_MASK;
@@ -478,7 +392,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, in
478 sg++; 392 sg++;
479 } 393 }
480 394
481 pteval = ((pteval & IOPTE_PAGE) | iopte_bits); 395 pteval = iopte_protection | (pteval & IOPTE_PAGE);
482 while (len > 0) { 396 while (len > 0) {
483 *iopte++ = __iopte(pteval); 397 *iopte++ = __iopte(pteval);
484 pteval += IO_PAGE_SIZE; 398 pteval += IO_PAGE_SIZE;
@@ -509,103 +423,111 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, in
509 } 423 }
510} 424}
511 425
512int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int dir) 426int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction)
513{ 427{
514 struct sbus_iommu *iommu = sdev->bus->iommu; 428 struct sbus_iommu *iommu;
515 unsigned long flags, npages; 429 unsigned long flags, npages, iopte_protection;
516 iopte_t *iopte; 430 iopte_t *base;
517 u32 dma_base; 431 u32 dma_base;
518 struct scatterlist *sgtmp; 432 struct scatterlist *sgtmp;
519 int used; 433 int used;
520 unsigned long iopte_bits;
521
522 if (dir == SBUS_DMA_NONE)
523 BUG();
524 434
525 /* Fast path single entry scatterlists. */ 435 /* Fast path single entry scatterlists. */
526 if (nents == 1) { 436 if (nelems == 1) {
527 sg->dma_address = 437 sglist->dma_address =
528 sbus_map_single(sdev, 438 sbus_map_single(sdev,
529 (page_address(sg->page) + sg->offset), 439 (page_address(sglist->page) + sglist->offset),
530 sg->length, dir); 440 sglist->length, direction);
531 sg->dma_length = sg->length; 441 sglist->dma_length = sglist->length;
532 return 1; 442 return 1;
533 } 443 }
534 444
535 npages = prepare_sg(sg, nents); 445 iommu = sdev->bus->iommu;
446
447 if (unlikely(direction == SBUS_DMA_NONE))
448 BUG();
449
450 npages = prepare_sg(sglist, nelems);
536 451
537 spin_lock_irqsave(&iommu->lock, flags); 452 spin_lock_irqsave(&iommu->lock, flags);
538 iopte = alloc_streaming_cluster(iommu, npages); 453 base = alloc_npages(iommu, npages);
539 if (iopte == NULL) 454 spin_unlock_irqrestore(&iommu->lock, flags);
540 goto bad; 455
541 dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); 456 if (unlikely(base == NULL))
457 BUG();
458
459 dma_base = MAP_BASE +
460 ((base - iommu->page_table) << IO_PAGE_SHIFT);
542 461
543 /* Normalize DVMA addresses. */ 462 /* Normalize DVMA addresses. */
544 sgtmp = sg; 463 used = nelems;
545 used = nents;
546 464
465 sgtmp = sglist;
547 while (used && sgtmp->dma_length) { 466 while (used && sgtmp->dma_length) {
548 sgtmp->dma_address += dma_base; 467 sgtmp->dma_address += dma_base;
549 sgtmp++; 468 sgtmp++;
550 used--; 469 used--;
551 } 470 }
552 used = nents - used; 471 used = nelems - used;
553 472
554 iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; 473 iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE;
555 if (dir != SBUS_DMA_TODEVICE) 474 if (direction != SBUS_DMA_TODEVICE)
556 iopte_bits |= IOPTE_WRITE; 475 iopte_protection |= IOPTE_WRITE;
476
477 fill_sg(base, sglist, used, nelems, iopte_protection);
557 478
558 fill_sg(iopte, sg, used, nents, iopte_bits);
559#ifdef VERIFY_SG 479#ifdef VERIFY_SG
560 verify_sglist(sg, nents, iopte, npages); 480 verify_sglist(sglist, nelems, base, npages);
561#endif 481#endif
562 spin_unlock_irqrestore(&iommu->lock, flags);
563 482
564 return used; 483 return used;
565
566bad:
567 spin_unlock_irqrestore(&iommu->lock, flags);
568 BUG();
569 return 0;
570} 484}
571 485
572void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction) 486void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction)
573{ 487{
574 unsigned long size, flags;
575 struct sbus_iommu *iommu; 488 struct sbus_iommu *iommu;
576 u32 dvma_base; 489 iopte_t *base;
577 int i; 490 unsigned long flags, i, npages;
491 u32 bus_addr;
578 492
579 /* Fast path single entry scatterlists. */ 493 if (unlikely(direction == SBUS_DMA_NONE))
580 if (nents == 1) { 494 BUG();
581 sbus_unmap_single(sdev, sg->dma_address, sg->dma_length, direction); 495
582 return; 496 iommu = sdev->bus->iommu;
583 } 497
498 bus_addr = sglist->dma_address & IO_PAGE_MASK;
584 499
585 dvma_base = sg[0].dma_address & IO_PAGE_MASK; 500 for (i = 1; i < nelems; i++)
586 for (i = 0; i < nents; i++) { 501 if (sglist[i].dma_length == 0)
587 if (sg[i].dma_length == 0)
588 break; 502 break;
589 }
590 i--; 503 i--;
591 size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - dvma_base; 504 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
505 bus_addr) >> IO_PAGE_SHIFT;
506
507 base = iommu->page_table +
508 ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT);
592 509
593 iommu = sdev->bus->iommu;
594 spin_lock_irqsave(&iommu->lock, flags); 510 spin_lock_irqsave(&iommu->lock, flags);
595 free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); 511 sbus_strbuf_flush(iommu, bus_addr, npages, direction);
596 sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction); 512 for (i = 0; i < npages; i++)
513 iopte_val(base[i]) = 0UL;
514 free_npages(iommu, bus_addr - MAP_BASE, npages);
597 spin_unlock_irqrestore(&iommu->lock, flags); 515 spin_unlock_irqrestore(&iommu->lock, flags);
598} 516}
599 517
600void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size, int direction) 518void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction)
601{ 519{
602 struct sbus_iommu *iommu = sdev->bus->iommu; 520 struct sbus_iommu *iommu;
603 unsigned long flags; 521 unsigned long flags, npages;
522
523 iommu = sdev->bus->iommu;
604 524
605 size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); 525 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
526 npages >>= IO_PAGE_SHIFT;
527 bus_addr &= IO_PAGE_MASK;
606 528
607 spin_lock_irqsave(&iommu->lock, flags); 529 spin_lock_irqsave(&iommu->lock, flags);
608 sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction); 530 sbus_strbuf_flush(iommu, bus_addr, npages, direction);
609 spin_unlock_irqrestore(&iommu->lock, flags); 531 spin_unlock_irqrestore(&iommu->lock, flags);
610} 532}
611 533
@@ -613,23 +535,25 @@ void sbus_dma_sync_single_for_device(struct sbus_dev *sdev, dma_addr_t base, siz
613{ 535{
614} 536}
615 537
616void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction) 538void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction)
617{ 539{
618 struct sbus_iommu *iommu = sdev->bus->iommu; 540 struct sbus_iommu *iommu;
619 unsigned long flags, size; 541 unsigned long flags, npages, i;
620 u32 base; 542 u32 bus_addr;
621 int i; 543
544 iommu = sdev->bus->iommu;
622 545
623 base = sg[0].dma_address & IO_PAGE_MASK; 546 bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
624 for (i = 0; i < nents; i++) { 547 for (i = 0; i < nelems; i++) {
625 if (sg[i].dma_length == 0) 548 if (!sglist[i].dma_length)
626 break; 549 break;
627 } 550 }
628 i--; 551 i--;
629 size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; 552 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
553 - bus_addr) >> IO_PAGE_SHIFT;
630 554
631 spin_lock_irqsave(&iommu->lock, flags); 555 spin_lock_irqsave(&iommu->lock, flags);
632 sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction); 556 sbus_strbuf_flush(iommu, bus_addr, npages, direction);
633 spin_unlock_irqrestore(&iommu->lock, flags); 557 spin_unlock_irqrestore(&iommu->lock, flags);
634} 558}
635 559
@@ -1104,7 +1028,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
1104 struct linux_prom64_registers *pr; 1028 struct linux_prom64_registers *pr;
1105 struct device_node *dp; 1029 struct device_node *dp;
1106 struct sbus_iommu *iommu; 1030 struct sbus_iommu *iommu;
1107 unsigned long regs, tsb_base; 1031 unsigned long regs;
1108 u64 control; 1032 u64 control;
1109 int i; 1033 int i;
1110 1034
@@ -1132,14 +1056,6 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
1132 1056
1133 memset(iommu, 0, sizeof(*iommu)); 1057 memset(iommu, 0, sizeof(*iommu));
1134 1058
1135 /* We start with no consistent mappings. */
1136 iommu->lowest_consistent_map = CLUSTER_NPAGES;
1137
1138 for (i = 0; i < NCLUSTERS; i++) {
1139 iommu->alloc_info[i].flush = 0;
1140 iommu->alloc_info[i].next = 0;
1141 }
1142
1143 /* Setup spinlock. */ 1059 /* Setup spinlock. */
1144 spin_lock_init(&iommu->lock); 1060 spin_lock_init(&iommu->lock);
1145 1061
@@ -1159,25 +1075,13 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
1159 sbus->portid, regs); 1075 sbus->portid, regs);
1160 1076
1161 /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ 1077 /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
1078 sbus_iommu_table_init(iommu, IO_TSB_SIZE);
1079
1162 control = upa_readq(iommu->iommu_regs + IOMMU_CONTROL); 1080 control = upa_readq(iommu->iommu_regs + IOMMU_CONTROL);
1163 control = ((7UL << 16UL) | 1081 control = ((7UL << 16UL) |
1164 (0UL << 2UL) | 1082 (0UL << 2UL) |
1165 (1UL << 1UL) | 1083 (1UL << 1UL) |
1166 (1UL << 0UL)); 1084 (1UL << 0UL));
1167
1168 /* Using the above configuration we need 1MB iommu page
1169 * table (128K ioptes * 8 bytes per iopte). This is
1170 * page order 7 on UltraSparc.
1171 */
1172 tsb_base = __get_free_pages(GFP_ATOMIC, get_order(IO_TSB_SIZE));
1173 if (tsb_base == 0UL) {
1174 prom_printf("sbus_iommu_init: Fatal error, cannot alloc TSB table.\n");
1175 prom_halt();
1176 }
1177
1178 iommu->page_table = (iopte_t *) tsb_base;
1179 memset(iommu->page_table, 0, IO_TSB_SIZE);
1180
1181 upa_writeq(control, iommu->iommu_regs + IOMMU_CONTROL); 1085 upa_writeq(control, iommu->iommu_regs + IOMMU_CONTROL);
1182 1086
1183 /* Clean out any cruft in the IOMMU using 1087 /* Clean out any cruft in the IOMMU using
@@ -1195,7 +1099,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
1195 upa_readq(iommu->sbus_control_reg); 1099 upa_readq(iommu->sbus_control_reg);
1196 1100
1197 /* Give the TSB to SYSIO. */ 1101 /* Give the TSB to SYSIO. */
1198 upa_writeq(__pa(tsb_base), iommu->iommu_regs + IOMMU_TSBBASE); 1102 upa_writeq(__pa(iommu->page_table), iommu->iommu_regs + IOMMU_TSBBASE);
1199 1103
1200 /* Setup streaming buffer, DE=1 SB_EN=1 */ 1104 /* Setup streaming buffer, DE=1 SB_EN=1 */
1201 control = (1UL << 1UL) | (1UL << 0UL); 1105 control = (1UL << 1UL) | (1UL << 0UL);