aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc64')
-rw-r--r--arch/sparc64/Kconfig4
-rw-r--r--arch/sparc64/kernel/iommu.c352
-rw-r--r--arch/sparc64/kernel/iommu_common.h33
-rw-r--r--arch/sparc64/kernel/kprobes.c113
-rw-r--r--arch/sparc64/kernel/of_device.c26
-rw-r--r--arch/sparc64/kernel/pci_sun4v.c278
6 files changed, 509 insertions, 297 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 810755637311..3af378ddb6ae 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -41,6 +41,10 @@ config MMU
41 bool 41 bool
42 default y 42 default y
43 43
44config IOMMU_HELPER
45 bool
46 default y
47
44config QUICKLIST 48config QUICKLIST
45 bool 49 bool
46 default y 50 default y
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index 5623a4d59dff..d3276ebcfb47 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -1,6 +1,6 @@
1/* iommu.c: Generic sparc64 IOMMU support. 1/* iommu.c: Generic sparc64 IOMMU support.
2 * 2 *
3 * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
4 * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com) 4 * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
5 */ 5 */
6 6
@@ -10,6 +10,7 @@
10#include <linux/device.h> 10#include <linux/device.h>
11#include <linux/dma-mapping.h> 11#include <linux/dma-mapping.h>
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/iommu-helper.h>
13 14
14#ifdef CONFIG_PCI 15#ifdef CONFIG_PCI
15#include <linux/pci.h> 16#include <linux/pci.h>
@@ -41,7 +42,7 @@
41 "i" (ASI_PHYS_BYPASS_EC_E)) 42 "i" (ASI_PHYS_BYPASS_EC_E))
42 43
43/* Must be invoked under the IOMMU lock. */ 44/* Must be invoked under the IOMMU lock. */
44static void __iommu_flushall(struct iommu *iommu) 45static void iommu_flushall(struct iommu *iommu)
45{ 46{
46 if (iommu->iommu_flushinv) { 47 if (iommu->iommu_flushinv) {
47 iommu_write(iommu->iommu_flushinv, ~(u64)0); 48 iommu_write(iommu->iommu_flushinv, ~(u64)0);
@@ -83,54 +84,91 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
83 iopte_val(*iopte) = val; 84 iopte_val(*iopte) = val;
84} 85}
85 86
86/* Based largely upon the ppc64 iommu allocator. */ 87/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
87static long arena_alloc(struct iommu *iommu, unsigned long npages) 88 * facility it must all be done in one pass while under the iommu lock.
89 *
90 * On sun4u platforms, we only flush the IOMMU once every time we've passed
91 * over the entire page table doing allocations. Therefore we only ever advance
92 * the hint and cannot backtrack it.
93 */
94unsigned long iommu_range_alloc(struct device *dev,
95 struct iommu *iommu,
96 unsigned long npages,
97 unsigned long *handle)
88{ 98{
99 unsigned long n, end, start, limit, boundary_size;
89 struct iommu_arena *arena = &iommu->arena; 100 struct iommu_arena *arena = &iommu->arena;
90 unsigned long n, i, start, end, limit; 101 int pass = 0;
91 int pass; 102
103 /* This allocator was derived from x86_64's bit string search */
104
105 /* Sanity check */
106 if (unlikely(npages == 0)) {
107 if (printk_ratelimit())
108 WARN_ON(1);
109 return DMA_ERROR_CODE;
110 }
111
112 if (handle && *handle)
113 start = *handle;
114 else
115 start = arena->hint;
92 116
93 limit = arena->limit; 117 limit = arena->limit;
94 start = arena->hint;
95 pass = 0;
96 118
97again: 119 /* The case below can happen if we have a small segment appended
98 n = find_next_zero_bit(arena->map, limit, start); 120 * to a large, or when the previous alloc was at the very end of
99 end = n + npages; 121 * the available space. If so, go back to the beginning and flush.
100 if (unlikely(end >= limit)) { 122 */
123 if (start >= limit) {
124 start = 0;
125 if (iommu->flush_all)
126 iommu->flush_all(iommu);
127 }
128
129 again:
130
131 if (dev)
132 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
133 1 << IO_PAGE_SHIFT);
134 else
135 boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
136
137 n = iommu_area_alloc(arena->map, limit, start, npages, 0,
138 boundary_size >> IO_PAGE_SHIFT, 0);
139 if (n == -1) {
101 if (likely(pass < 1)) { 140 if (likely(pass < 1)) {
102 limit = start; 141 /* First failure, rescan from the beginning. */
103 start = 0; 142 start = 0;
104 __iommu_flushall(iommu); 143 if (iommu->flush_all)
144 iommu->flush_all(iommu);
105 pass++; 145 pass++;
106 goto again; 146 goto again;
107 } else { 147 } else {
108 /* Scanned the whole thing, give up. */ 148 /* Second failure, give up */
109 return -1; 149 return DMA_ERROR_CODE;
110 }
111 }
112
113 for (i = n; i < end; i++) {
114 if (test_bit(i, arena->map)) {
115 start = i + 1;
116 goto again;
117 } 150 }
118 } 151 }
119 152
120 for (i = n; i < end; i++) 153 end = n + npages;
121 __set_bit(i, arena->map);
122 154
123 arena->hint = end; 155 arena->hint = end;
124 156
157 /* Update handle for SG allocations */
158 if (handle)
159 *handle = end;
160
125 return n; 161 return n;
126} 162}
127 163
128static void arena_free(struct iommu_arena *arena, unsigned long base, unsigned long npages) 164void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
129{ 165{
130 unsigned long i; 166 struct iommu_arena *arena = &iommu->arena;
167 unsigned long entry;
131 168
132 for (i = base; i < (base + npages); i++) 169 entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
133 __clear_bit(i, arena->map); 170
171 iommu_area_free(arena->map, entry, npages);
134} 172}
135 173
136int iommu_table_init(struct iommu *iommu, int tsbsize, 174int iommu_table_init(struct iommu *iommu, int tsbsize,
@@ -156,6 +194,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
156 } 194 }
157 iommu->arena.limit = num_tsb_entries; 195 iommu->arena.limit = num_tsb_entries;
158 196
197 if (tlb_type != hypervisor)
198 iommu->flush_all = iommu_flushall;
199
159 /* Allocate and initialize the dummy page which we 200 /* Allocate and initialize the dummy page which we
160 * set inactive IO PTEs to point to. 201 * set inactive IO PTEs to point to.
161 */ 202 */
@@ -192,22 +233,18 @@ out_free_map:
192 return -ENOMEM; 233 return -ENOMEM;
193} 234}
194 235
195static inline iopte_t *alloc_npages(struct iommu *iommu, unsigned long npages) 236static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
237 unsigned long npages)
196{ 238{
197 long entry; 239 unsigned long entry;
198 240
199 entry = arena_alloc(iommu, npages); 241 entry = iommu_range_alloc(dev, iommu, npages, NULL);
200 if (unlikely(entry < 0)) 242 if (unlikely(entry == DMA_ERROR_CODE))
201 return NULL; 243 return NULL;
202 244
203 return iommu->page_table + entry; 245 return iommu->page_table + entry;
204} 246}
205 247
206static inline void free_npages(struct iommu *iommu, dma_addr_t base, unsigned long npages)
207{
208 arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
209}
210
211static int iommu_alloc_ctx(struct iommu *iommu) 248static int iommu_alloc_ctx(struct iommu *iommu)
212{ 249{
213 int lowest = iommu->ctx_lowest_free; 250 int lowest = iommu->ctx_lowest_free;
@@ -258,7 +295,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
258 iommu = dev->archdata.iommu; 295 iommu = dev->archdata.iommu;
259 296
260 spin_lock_irqsave(&iommu->lock, flags); 297 spin_lock_irqsave(&iommu->lock, flags);
261 iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); 298 iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
262 spin_unlock_irqrestore(&iommu->lock, flags); 299 spin_unlock_irqrestore(&iommu->lock, flags);
263 300
264 if (unlikely(iopte == NULL)) { 301 if (unlikely(iopte == NULL)) {
@@ -296,7 +333,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
296 333
297 spin_lock_irqsave(&iommu->lock, flags); 334 spin_lock_irqsave(&iommu->lock, flags);
298 335
299 free_npages(iommu, dvma - iommu->page_table_map_base, npages); 336 iommu_range_free(iommu, dvma, npages);
300 337
301 spin_unlock_irqrestore(&iommu->lock, flags); 338 spin_unlock_irqrestore(&iommu->lock, flags);
302 339
@@ -327,7 +364,7 @@ static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz,
327 npages >>= IO_PAGE_SHIFT; 364 npages >>= IO_PAGE_SHIFT;
328 365
329 spin_lock_irqsave(&iommu->lock, flags); 366 spin_lock_irqsave(&iommu->lock, flags);
330 base = alloc_npages(iommu, npages); 367 base = alloc_npages(dev, iommu, npages);
331 ctx = 0; 368 ctx = 0;
332 if (iommu->iommu_ctxflush) 369 if (iommu->iommu_ctxflush)
333 ctx = iommu_alloc_ctx(iommu); 370 ctx = iommu_alloc_ctx(iommu);
@@ -465,7 +502,7 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
465 for (i = 0; i < npages; i++) 502 for (i = 0; i < npages; i++)
466 iopte_make_dummy(iommu, base + i); 503 iopte_make_dummy(iommu, base + i);
467 504
468 free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); 505 iommu_range_free(iommu, bus_addr, npages);
469 506
470 iommu_free_ctx(iommu, ctx); 507 iommu_free_ctx(iommu, ctx);
471 508
@@ -475,124 +512,209 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
475static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, 512static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
476 int nelems, enum dma_data_direction direction) 513 int nelems, enum dma_data_direction direction)
477{ 514{
478 unsigned long flags, ctx, i, npages, iopte_protection; 515 struct scatterlist *s, *outs, *segstart;
479 struct scatterlist *sg; 516 unsigned long flags, handle, prot, ctx;
517 dma_addr_t dma_next = 0, dma_addr;
518 unsigned int max_seg_size;
519 int outcount, incount, i;
480 struct strbuf *strbuf; 520 struct strbuf *strbuf;
481 struct iommu *iommu; 521 struct iommu *iommu;
482 iopte_t *base; 522
483 u32 dma_base; 523 BUG_ON(direction == DMA_NONE);
484
485 /* Fast path single entry scatterlists. */
486 if (nelems == 1) {
487 sglist->dma_address =
488 dma_4u_map_single(dev, sg_virt(sglist),
489 sglist->length, direction);
490 if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
491 return 0;
492 sglist->dma_length = sglist->length;
493 return 1;
494 }
495 524
496 iommu = dev->archdata.iommu; 525 iommu = dev->archdata.iommu;
497 strbuf = dev->archdata.stc; 526 strbuf = dev->archdata.stc;
498 527 if (nelems == 0 || !iommu)
499 if (unlikely(direction == DMA_NONE)) 528 return 0;
500 goto bad_no_ctx;
501
502 npages = calc_npages(sglist, nelems);
503 529
504 spin_lock_irqsave(&iommu->lock, flags); 530 spin_lock_irqsave(&iommu->lock, flags);
505 531
506 base = alloc_npages(iommu, npages);
507 ctx = 0; 532 ctx = 0;
508 if (iommu->iommu_ctxflush) 533 if (iommu->iommu_ctxflush)
509 ctx = iommu_alloc_ctx(iommu); 534 ctx = iommu_alloc_ctx(iommu);
510 535
511 spin_unlock_irqrestore(&iommu->lock, flags);
512
513 if (base == NULL)
514 goto bad;
515
516 dma_base = iommu->page_table_map_base +
517 ((base - iommu->page_table) << IO_PAGE_SHIFT);
518
519 if (strbuf->strbuf_enabled) 536 if (strbuf->strbuf_enabled)
520 iopte_protection = IOPTE_STREAMING(ctx); 537 prot = IOPTE_STREAMING(ctx);
521 else 538 else
522 iopte_protection = IOPTE_CONSISTENT(ctx); 539 prot = IOPTE_CONSISTENT(ctx);
523 if (direction != DMA_TO_DEVICE) 540 if (direction != DMA_TO_DEVICE)
524 iopte_protection |= IOPTE_WRITE; 541 prot |= IOPTE_WRITE;
525 542
526 for_each_sg(sglist, sg, nelems, i) { 543 outs = s = segstart = &sglist[0];
527 unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg); 544 outcount = 1;
528 unsigned long slen = sg->length; 545 incount = nelems;
529 unsigned long this_npages; 546 handle = 0;
547
548 /* Init first segment length for backout at failure */
549 outs->dma_length = 0;
550
551 max_seg_size = dma_get_max_seg_size(dev);
552 for_each_sg(sglist, s, nelems, i) {
553 unsigned long paddr, npages, entry, slen;
554 iopte_t *base;
555
556 slen = s->length;
557 /* Sanity check */
558 if (slen == 0) {
559 dma_next = 0;
560 continue;
561 }
562 /* Allocate iommu entries for that segment */
563 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
564 npages = iommu_num_pages(paddr, slen);
565 entry = iommu_range_alloc(dev, iommu, npages, &handle);
566
567 /* Handle failure */
568 if (unlikely(entry == DMA_ERROR_CODE)) {
569 if (printk_ratelimit())
570 printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
571 " npages %lx\n", iommu, paddr, npages);
572 goto iommu_map_failed;
573 }
530 574
531 this_npages = iommu_num_pages(paddr, slen); 575 base = iommu->page_table + entry;
532 576
533 sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK); 577 /* Convert entry to a dma_addr_t */
534 sg->dma_length = slen; 578 dma_addr = iommu->page_table_map_base +
579 (entry << IO_PAGE_SHIFT);
580 dma_addr |= (s->offset & ~IO_PAGE_MASK);
535 581
582 /* Insert into HW table */
536 paddr &= IO_PAGE_MASK; 583 paddr &= IO_PAGE_MASK;
537 while (this_npages--) { 584 while (npages--) {
538 iopte_val(*base) = iopte_protection | paddr; 585 iopte_val(*base) = prot | paddr;
539
540 base++; 586 base++;
541 paddr += IO_PAGE_SIZE; 587 paddr += IO_PAGE_SIZE;
542 dma_base += IO_PAGE_SIZE;
543 } 588 }
589
590 /* If we are in an open segment, try merging */
591 if (segstart != s) {
592 /* We cannot merge if:
593 * - allocated dma_addr isn't contiguous to previous allocation
594 */
595 if ((dma_addr != dma_next) ||
596 (outs->dma_length + s->length > max_seg_size)) {
597 /* Can't merge: create a new segment */
598 segstart = s;
599 outcount++;
600 outs = sg_next(outs);
601 } else {
602 outs->dma_length += s->length;
603 }
604 }
605
606 if (segstart == s) {
607 /* This is a new segment, fill entries */
608 outs->dma_address = dma_addr;
609 outs->dma_length = slen;
610 }
611
612 /* Calculate next page pointer for contiguous check */
613 dma_next = dma_addr + slen;
544 } 614 }
545 615
546 return nelems; 616 spin_unlock_irqrestore(&iommu->lock, flags);
617
618 if (outcount < incount) {
619 outs = sg_next(outs);
620 outs->dma_address = DMA_ERROR_CODE;
621 outs->dma_length = 0;
622 }
623
624 return outcount;
625
626iommu_map_failed:
627 for_each_sg(sglist, s, nelems, i) {
628 if (s->dma_length != 0) {
629 unsigned long vaddr, npages, entry, i;
630 iopte_t *base;
631
632 vaddr = s->dma_address & IO_PAGE_MASK;
633 npages = iommu_num_pages(s->dma_address, s->dma_length);
634 iommu_range_free(iommu, vaddr, npages);
635
636 entry = (vaddr - iommu->page_table_map_base)
637 >> IO_PAGE_SHIFT;
638 base = iommu->page_table + entry;
639
640 for (i = 0; i < npages; i++)
641 iopte_make_dummy(iommu, base + i);
642
643 s->dma_address = DMA_ERROR_CODE;
644 s->dma_length = 0;
645 }
646 if (s == outs)
647 break;
648 }
649 spin_unlock_irqrestore(&iommu->lock, flags);
547 650
548bad:
549 iommu_free_ctx(iommu, ctx);
550bad_no_ctx:
551 if (printk_ratelimit())
552 WARN_ON(1);
553 return 0; 651 return 0;
554} 652}
555 653
654/* If contexts are being used, they are the same in all of the mappings
655 * we make for a particular SG.
656 */
657static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
658{
659 unsigned long ctx = 0;
660
661 if (iommu->iommu_ctxflush) {
662 iopte_t *base;
663 u32 bus_addr;
664
665 bus_addr = sg->dma_address & IO_PAGE_MASK;
666 base = iommu->page_table +
667 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
668
669 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
670 }
671 return ctx;
672}
673
556static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, 674static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
557 int nelems, enum dma_data_direction direction) 675 int nelems, enum dma_data_direction direction)
558{ 676{
559 unsigned long flags, ctx, i, npages; 677 unsigned long flags, ctx;
678 struct scatterlist *sg;
560 struct strbuf *strbuf; 679 struct strbuf *strbuf;
561 struct iommu *iommu; 680 struct iommu *iommu;
562 iopte_t *base;
563 u32 bus_addr;
564 681
565 if (unlikely(direction == DMA_NONE)) { 682 BUG_ON(direction == DMA_NONE);
566 if (printk_ratelimit())
567 WARN_ON(1);
568 }
569 683
570 iommu = dev->archdata.iommu; 684 iommu = dev->archdata.iommu;
571 strbuf = dev->archdata.stc; 685 strbuf = dev->archdata.stc;
572 686
573 bus_addr = sglist->dma_address & IO_PAGE_MASK; 687 ctx = fetch_sg_ctx(iommu, sglist);
574 688
575 npages = calc_npages(sglist, nelems); 689 spin_lock_irqsave(&iommu->lock, flags);
576 690
577 base = iommu->page_table + 691 sg = sglist;
578 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 692 while (nelems--) {
693 dma_addr_t dma_handle = sg->dma_address;
694 unsigned int len = sg->dma_length;
695 unsigned long npages, entry;
696 iopte_t *base;
697 int i;
579 698
580 spin_lock_irqsave(&iommu->lock, flags); 699 if (!len)
700 break;
701 npages = iommu_num_pages(dma_handle, len);
702 iommu_range_free(iommu, dma_handle, npages);
581 703
582 /* Record the context, if any. */ 704 entry = ((dma_handle - iommu->page_table_map_base)
583 ctx = 0; 705 >> IO_PAGE_SHIFT);
584 if (iommu->iommu_ctxflush) 706 base = iommu->page_table + entry;
585 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
586 707
587 /* Step 1: Kick data out of streaming buffers if necessary. */ 708 dma_handle &= IO_PAGE_MASK;
588 if (strbuf->strbuf_enabled) 709 if (strbuf->strbuf_enabled)
589 strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); 710 strbuf_flush(strbuf, iommu, dma_handle, ctx,
711 npages, direction);
590 712
591 /* Step 2: Clear out the TSB entries. */ 713 for (i = 0; i < npages; i++)
592 for (i = 0; i < npages; i++) 714 iopte_make_dummy(iommu, base + i);
593 iopte_make_dummy(iommu, base + i);
594 715
595 free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); 716 sg = sg_next(sg);
717 }
596 718
597 iommu_free_ctx(iommu, ctx); 719 iommu_free_ctx(iommu, ctx);
598 720
diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h
index 4b5cafa2877a..0713bd58499c 100644
--- a/arch/sparc64/kernel/iommu_common.h
+++ b/arch/sparc64/kernel/iommu_common.h
@@ -1,9 +1,11 @@
1/* $Id: iommu_common.h,v 1.5 2001/12/11 09:41:01 davem Exp $ 1/* iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
2 * iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
3 * 2 *
4 * Copyright (C) 1999 David S. Miller (davem@redhat.com) 3 * Copyright (C) 1999, 2008 David S. Miller (davem@davemloft.net)
5 */ 4 */
6 5
6#ifndef _IOMMU_COMMON_H
7#define _IOMMU_COMMON_H
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/sched.h> 11#include <linux/sched.h>
@@ -56,21 +58,12 @@ static inline unsigned long calc_npages(struct scatterlist *sglist, int nelems)
56 return npages; 58 return npages;
57} 59}
58 60
59/* You are _strongly_ advised to enable the following debugging code 61extern unsigned long iommu_range_alloc(struct device *dev,
60 * any time you make changes to the sg code below, run it for a while 62 struct iommu *iommu,
61 * with filesystems mounted read-only before buying the farm... -DaveM 63 unsigned long npages,
62 */ 64 unsigned long *handle);
63#undef VERIFY_SG 65extern void iommu_range_free(struct iommu *iommu,
64 66 dma_addr_t dma_addr,
65#ifdef VERIFY_SG 67 unsigned long npages);
66extern void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages);
67#endif
68
69/* Two addresses are "virtually contiguous" if and only if:
70 * 1) They are equal, or...
71 * 2) They are both on a page boundary
72 */
73#define VCONTIG(__X, __Y) (((__X) == (__Y)) || \
74 (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL)
75 68
76extern unsigned long prepare_sg(struct device *dev, struct scatterlist *sg, int nents); 69#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index d94f901d321e..34fc3ddd5002 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -480,8 +480,117 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
480 return 0; 480 return 0;
481} 481}
482 482
483/* architecture specific initialization */ 483/* Called with kretprobe_lock held. The value stored in the return
484int arch_init_kprobes(void) 484 * address register is actually 2 instructions before where the
485 * callee will return to. Sequences usually look something like this
486 *
487 * call some_function <--- return register points here
488 * nop <--- call delay slot
489 * whatever <--- where callee returns to
490 *
491 * To keep trampoline_probe_handler logic simpler, we normalize the
492 * value kept in ri->ret_addr so we don't need to keep adjusting it
493 * back and forth.
494 */
495void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
496 struct pt_regs *regs)
497{
498 ri->ret_addr = (kprobe_opcode_t *)(regs->u_regs[UREG_RETPC] + 8);
499
500 /* Replace the return addr with trampoline addr */
501 regs->u_regs[UREG_RETPC] =
502 ((unsigned long)kretprobe_trampoline) - 8;
503}
504
505/*
506 * Called when the probe at kretprobe trampoline is hit
507 */
508int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
509{
510 struct kretprobe_instance *ri = NULL;
511 struct hlist_head *head, empty_rp;
512 struct hlist_node *node, *tmp;
513 unsigned long flags, orig_ret_address = 0;
514 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
515
516 INIT_HLIST_HEAD(&empty_rp);
517 spin_lock_irqsave(&kretprobe_lock, flags);
518 head = kretprobe_inst_table_head(current);
519
520 /*
521 * It is possible to have multiple instances associated with a given
522 * task either because an multiple functions in the call path
523 * have a return probe installed on them, and/or more then one return
524 * return probe was registered for a target function.
525 *
526 * We can handle this because:
527 * - instances are always inserted at the head of the list
528 * - when multiple return probes are registered for the same
529 * function, the first instance's ret_addr will point to the
530 * real return address, and all the rest will point to
531 * kretprobe_trampoline
532 */
533 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
534 if (ri->task != current)
535 /* another task is sharing our hash bucket */
536 continue;
537
538 if (ri->rp && ri->rp->handler)
539 ri->rp->handler(ri, regs);
540
541 orig_ret_address = (unsigned long)ri->ret_addr;
542 recycle_rp_inst(ri, &empty_rp);
543
544 if (orig_ret_address != trampoline_address)
545 /*
546 * This is the real return address. Any other
547 * instances associated with this task are for
548 * other calls deeper on the call stack
549 */
550 break;
551 }
552
553 kretprobe_assert(ri, orig_ret_address, trampoline_address);
554 regs->tpc = orig_ret_address;
555 regs->tnpc = orig_ret_address + 4;
556
557 reset_current_kprobe();
558 spin_unlock_irqrestore(&kretprobe_lock, flags);
559 preempt_enable_no_resched();
560
561 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
562 hlist_del(&ri->hlist);
563 kfree(ri);
564 }
565 /*
566 * By returning a non-zero value, we are telling
567 * kprobe_handler() that we don't want the post_handler
568 * to run (and have re-enabled preemption)
569 */
570 return 1;
571}
572
573void kretprobe_trampoline_holder(void)
574{
575 asm volatile(".global kretprobe_trampoline\n"
576 "kretprobe_trampoline:\n"
577 "\tnop\n"
578 "\tnop\n");
579}
580static struct kprobe trampoline_p = {
581 .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
582 .pre_handler = trampoline_probe_handler
583};
584
585int __init arch_init_kprobes(void)
485{ 586{
587 return register_kprobe(&trampoline_p);
588}
589
590int __kprobes arch_trampoline_kprobe(struct kprobe *p)
591{
592 if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
593 return 1;
594
486 return 0; 595 return 0;
487} 596}
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index fc5c0cc793b8..0fd9db95b896 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -868,29 +868,3 @@ static int __init of_debug(char *str)
868} 868}
869 869
870__setup("of_debug=", of_debug); 870__setup("of_debug=", of_debug);
871
872struct of_device* of_platform_device_create(struct device_node *np,
873 const char *bus_id,
874 struct device *parent,
875 struct bus_type *bus)
876{
877 struct of_device *dev;
878
879 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
880 if (!dev)
881 return NULL;
882
883 dev->dev.parent = parent;
884 dev->dev.bus = bus;
885 dev->dev.release = of_release_dev;
886
887 strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
888
889 if (of_device_register(dev) != 0) {
890 kfree(dev);
891 return NULL;
892 }
893
894 return dev;
895}
896EXPORT_SYMBOL(of_platform_device_create);
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index 61baf8dc095e..ddca6c6c0b49 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -1,6 +1,6 @@
1/* pci_sun4v.c: SUN4V specific PCI controller support. 1/* pci_sun4v.c: SUN4V specific PCI controller support.
2 * 2 *
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#include <linux/kernel.h> 6#include <linux/kernel.h>
@@ -89,6 +89,17 @@ static long iommu_batch_flush(struct iommu_batch *p)
89 return 0; 89 return 0;
90} 90}
91 91
92static inline void iommu_batch_new_entry(unsigned long entry)
93{
94 struct iommu_batch *p = &__get_cpu_var(iommu_batch);
95
96 if (p->entry + p->npages == entry)
97 return;
98 if (p->entry != ~0UL)
99 iommu_batch_flush(p);
100 p->entry = entry;
101}
102
92/* Interrupts must be disabled. */ 103/* Interrupts must be disabled. */
93static inline long iommu_batch_add(u64 phys_page) 104static inline long iommu_batch_add(u64 phys_page)
94{ 105{
@@ -113,54 +124,6 @@ static inline long iommu_batch_end(void)
113 return iommu_batch_flush(p); 124 return iommu_batch_flush(p);
114} 125}
115 126
116static long arena_alloc(struct iommu_arena *arena, unsigned long npages)
117{
118 unsigned long n, i, start, end, limit;
119 int pass;
120
121 limit = arena->limit;
122 start = arena->hint;
123 pass = 0;
124
125again:
126 n = find_next_zero_bit(arena->map, limit, start);
127 end = n + npages;
128 if (unlikely(end >= limit)) {
129 if (likely(pass < 1)) {
130 limit = start;
131 start = 0;
132 pass++;
133 goto again;
134 } else {
135 /* Scanned the whole thing, give up. */
136 return -1;
137 }
138 }
139
140 for (i = n; i < end; i++) {
141 if (test_bit(i, arena->map)) {
142 start = i + 1;
143 goto again;
144 }
145 }
146
147 for (i = n; i < end; i++)
148 __set_bit(i, arena->map);
149
150 arena->hint = end;
151
152 return n;
153}
154
155static void arena_free(struct iommu_arena *arena, unsigned long base,
156 unsigned long npages)
157{
158 unsigned long i;
159
160 for (i = base; i < (base + npages); i++)
161 __clear_bit(i, arena->map);
162}
163
164static void *dma_4v_alloc_coherent(struct device *dev, size_t size, 127static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
165 dma_addr_t *dma_addrp, gfp_t gfp) 128 dma_addr_t *dma_addrp, gfp_t gfp)
166{ 129{
@@ -185,11 +148,11 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
185 iommu = dev->archdata.iommu; 148 iommu = dev->archdata.iommu;
186 149
187 spin_lock_irqsave(&iommu->lock, flags); 150 spin_lock_irqsave(&iommu->lock, flags);
188 entry = arena_alloc(&iommu->arena, npages); 151 entry = iommu_range_alloc(dev, iommu, npages, NULL);
189 spin_unlock_irqrestore(&iommu->lock, flags); 152 spin_unlock_irqrestore(&iommu->lock, flags);
190 153
191 if (unlikely(entry < 0L)) 154 if (unlikely(entry == DMA_ERROR_CODE))
192 goto arena_alloc_fail; 155 goto range_alloc_fail;
193 156
194 *dma_addrp = (iommu->page_table_map_base + 157 *dma_addrp = (iommu->page_table_map_base +
195 (entry << IO_PAGE_SHIFT)); 158 (entry << IO_PAGE_SHIFT));
@@ -219,10 +182,10 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
219iommu_map_fail: 182iommu_map_fail:
220 /* Interrupts are disabled. */ 183 /* Interrupts are disabled. */
221 spin_lock(&iommu->lock); 184 spin_lock(&iommu->lock);
222 arena_free(&iommu->arena, entry, npages); 185 iommu_range_free(iommu, *dma_addrp, npages);
223 spin_unlock_irqrestore(&iommu->lock, flags); 186 spin_unlock_irqrestore(&iommu->lock, flags);
224 187
225arena_alloc_fail: 188range_alloc_fail:
226 free_pages(first_page, order); 189 free_pages(first_page, order);
227 return NULL; 190 return NULL;
228} 191}
@@ -243,7 +206,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
243 206
244 spin_lock_irqsave(&iommu->lock, flags); 207 spin_lock_irqsave(&iommu->lock, flags);
245 208
246 arena_free(&iommu->arena, entry, npages); 209 iommu_range_free(iommu, dvma, npages);
247 210
248 do { 211 do {
249 unsigned long num; 212 unsigned long num;
@@ -281,10 +244,10 @@ static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
281 npages >>= IO_PAGE_SHIFT; 244 npages >>= IO_PAGE_SHIFT;
282 245
283 spin_lock_irqsave(&iommu->lock, flags); 246 spin_lock_irqsave(&iommu->lock, flags);
284 entry = arena_alloc(&iommu->arena, npages); 247 entry = iommu_range_alloc(dev, iommu, npages, NULL);
285 spin_unlock_irqrestore(&iommu->lock, flags); 248 spin_unlock_irqrestore(&iommu->lock, flags);
286 249
287 if (unlikely(entry < 0L)) 250 if (unlikely(entry == DMA_ERROR_CODE))
288 goto bad; 251 goto bad;
289 252
290 bus_addr = (iommu->page_table_map_base + 253 bus_addr = (iommu->page_table_map_base +
@@ -319,7 +282,7 @@ bad:
319iommu_map_fail: 282iommu_map_fail:
320 /* Interrupts are disabled. */ 283 /* Interrupts are disabled. */
321 spin_lock(&iommu->lock); 284 spin_lock(&iommu->lock);
322 arena_free(&iommu->arena, entry, npages); 285 iommu_range_free(iommu, bus_addr, npages);
323 spin_unlock_irqrestore(&iommu->lock, flags); 286 spin_unlock_irqrestore(&iommu->lock, flags);
324 287
325 return DMA_ERROR_CODE; 288 return DMA_ERROR_CODE;
@@ -350,9 +313,9 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
350 313
351 spin_lock_irqsave(&iommu->lock, flags); 314 spin_lock_irqsave(&iommu->lock, flags);
352 315
353 entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; 316 iommu_range_free(iommu, bus_addr, npages);
354 arena_free(&iommu->arena, entry, npages);
355 317
318 entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
356 do { 319 do {
357 unsigned long num; 320 unsigned long num;
358 321
@@ -368,88 +331,131 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
368static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, 331static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
369 int nelems, enum dma_data_direction direction) 332 int nelems, enum dma_data_direction direction)
370{ 333{
371 unsigned long flags, npages, i, prot; 334 struct scatterlist *s, *outs, *segstart;
372 struct scatterlist *sg; 335 unsigned long flags, handle, prot;
336 dma_addr_t dma_next = 0, dma_addr;
337 unsigned int max_seg_size;
338 int outcount, incount, i;
373 struct iommu *iommu; 339 struct iommu *iommu;
374 long entry, err; 340 long err;
375 u32 dma_base; 341
376 342 BUG_ON(direction == DMA_NONE);
377 /* Fast path single entry scatterlists. */
378 if (nelems == 1) {
379 sglist->dma_address =
380 dma_4v_map_single(dev, sg_virt(sglist),
381 sglist->length, direction);
382 if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
383 return 0;
384 sglist->dma_length = sglist->length;
385 return 1;
386 }
387 343
388 iommu = dev->archdata.iommu; 344 iommu = dev->archdata.iommu;
345 if (nelems == 0 || !iommu)
346 return 0;
389 347
390 if (unlikely(direction == DMA_NONE)) 348 prot = HV_PCI_MAP_ATTR_READ;
391 goto bad; 349 if (direction != DMA_TO_DEVICE)
392 350 prot |= HV_PCI_MAP_ATTR_WRITE;
393 npages = calc_npages(sglist, nelems);
394 351
395 spin_lock_irqsave(&iommu->lock, flags); 352 outs = s = segstart = &sglist[0];
396 entry = arena_alloc(&iommu->arena, npages); 353 outcount = 1;
397 spin_unlock_irqrestore(&iommu->lock, flags); 354 incount = nelems;
355 handle = 0;
398 356
399 if (unlikely(entry < 0L)) 357 /* Init first segment length for backout at failure */
400 goto bad; 358 outs->dma_length = 0;
401 359
402 dma_base = iommu->page_table_map_base + 360 spin_lock_irqsave(&iommu->lock, flags);
403 (entry << IO_PAGE_SHIFT);
404 361
405 prot = HV_PCI_MAP_ATTR_READ; 362 iommu_batch_start(dev, prot, ~0UL);
406 if (direction != DMA_TO_DEVICE)
407 prot |= HV_PCI_MAP_ATTR_WRITE;
408 363
409 local_irq_save(flags); 364 max_seg_size = dma_get_max_seg_size(dev);
365 for_each_sg(sglist, s, nelems, i) {
366 unsigned long paddr, npages, entry, slen;
410 367
411 iommu_batch_start(dev, prot, entry); 368 slen = s->length;
369 /* Sanity check */
370 if (slen == 0) {
371 dma_next = 0;
372 continue;
373 }
374 /* Allocate iommu entries for that segment */
375 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
376 npages = iommu_num_pages(paddr, slen);
377 entry = iommu_range_alloc(dev, iommu, npages, &handle);
412 378
413 for_each_sg(sglist, sg, nelems, i) { 379 /* Handle failure */
414 unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg); 380 if (unlikely(entry == DMA_ERROR_CODE)) {
415 unsigned long slen = sg->length; 381 if (printk_ratelimit())
416 unsigned long this_npages; 382 printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
383 " npages %lx\n", iommu, paddr, npages);
384 goto iommu_map_failed;
385 }
417 386
418 this_npages = iommu_num_pages(paddr, slen); 387 iommu_batch_new_entry(entry);
419 388
420 sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK); 389 /* Convert entry to a dma_addr_t */
421 sg->dma_length = slen; 390 dma_addr = iommu->page_table_map_base +
391 (entry << IO_PAGE_SHIFT);
392 dma_addr |= (s->offset & ~IO_PAGE_MASK);
422 393
394 /* Insert into HW table */
423 paddr &= IO_PAGE_MASK; 395 paddr &= IO_PAGE_MASK;
424 while (this_npages--) { 396 while (npages--) {
425 err = iommu_batch_add(paddr); 397 err = iommu_batch_add(paddr);
426 if (unlikely(err < 0L)) { 398 if (unlikely(err < 0L))
427 local_irq_restore(flags);
428 goto iommu_map_failed; 399 goto iommu_map_failed;
400 paddr += IO_PAGE_SIZE;
401 }
402
403 /* If we are in an open segment, try merging */
404 if (segstart != s) {
405 /* We cannot merge if:
406 * - allocated dma_addr isn't contiguous to previous allocation
407 */
408 if ((dma_addr != dma_next) ||
409 (outs->dma_length + s->length > max_seg_size)) {
410 /* Can't merge: create a new segment */
411 segstart = s;
412 outcount++;
413 outs = sg_next(outs);
414 } else {
415 outs->dma_length += s->length;
429 } 416 }
417 }
430 418
431 paddr += IO_PAGE_SIZE; 419 if (segstart == s) {
432 dma_base += IO_PAGE_SIZE; 420 /* This is a new segment, fill entries */
421 outs->dma_address = dma_addr;
422 outs->dma_length = slen;
433 } 423 }
424
425 /* Calculate next page pointer for contiguous check */
426 dma_next = dma_addr + slen;
434 } 427 }
435 428
436 err = iommu_batch_end(); 429 err = iommu_batch_end();
437 430
438 local_irq_restore(flags);
439
440 if (unlikely(err < 0L)) 431 if (unlikely(err < 0L))
441 goto iommu_map_failed; 432 goto iommu_map_failed;
442 433
443 return nelems; 434 spin_unlock_irqrestore(&iommu->lock, flags);
444 435
445bad: 436 if (outcount < incount) {
446 if (printk_ratelimit()) 437 outs = sg_next(outs);
447 WARN_ON(1); 438 outs->dma_address = DMA_ERROR_CODE;
448 return 0; 439 outs->dma_length = 0;
440 }
441
442 return outcount;
449 443
450iommu_map_failed: 444iommu_map_failed:
451 spin_lock_irqsave(&iommu->lock, flags); 445 for_each_sg(sglist, s, nelems, i) {
452 arena_free(&iommu->arena, entry, npages); 446 if (s->dma_length != 0) {
447 unsigned long vaddr, npages;
448
449 vaddr = s->dma_address & IO_PAGE_MASK;
450 npages = iommu_num_pages(s->dma_address, s->dma_length);
451 iommu_range_free(iommu, vaddr, npages);
452 /* XXX demap? XXX */
453 s->dma_address = DMA_ERROR_CODE;
454 s->dma_length = 0;
455 }
456 if (s == outs)
457 break;
458 }
453 spin_unlock_irqrestore(&iommu->lock, flags); 459 spin_unlock_irqrestore(&iommu->lock, flags);
454 460
455 return 0; 461 return 0;
@@ -458,39 +464,43 @@ iommu_map_failed:
458static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, 464static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
459 int nelems, enum dma_data_direction direction) 465 int nelems, enum dma_data_direction direction)
460{ 466{
461 unsigned long flags, npages;
462 struct pci_pbm_info *pbm; 467 struct pci_pbm_info *pbm;
463 u32 devhandle, bus_addr; 468 struct scatterlist *sg;
464 struct iommu *iommu; 469 struct iommu *iommu;
465 long entry; 470 unsigned long flags;
471 u32 devhandle;
466 472
467 if (unlikely(direction == DMA_NONE)) { 473 BUG_ON(direction == DMA_NONE);
468 if (printk_ratelimit())
469 WARN_ON(1);
470 }
471 474
472 iommu = dev->archdata.iommu; 475 iommu = dev->archdata.iommu;
473 pbm = dev->archdata.host_controller; 476 pbm = dev->archdata.host_controller;
474 devhandle = pbm->devhandle; 477 devhandle = pbm->devhandle;
475 478
476 bus_addr = sglist->dma_address & IO_PAGE_MASK;
477
478 npages = calc_npages(sglist, nelems);
479
480 entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
481
482 spin_lock_irqsave(&iommu->lock, flags); 479 spin_lock_irqsave(&iommu->lock, flags);
483 480
484 arena_free(&iommu->arena, entry, npages); 481 sg = sglist;
485 482 while (nelems--) {
486 do { 483 dma_addr_t dma_handle = sg->dma_address;
487 unsigned long num; 484 unsigned int len = sg->dma_length;
485 unsigned long npages, entry;
486
487 if (!len)
488 break;
489 npages = iommu_num_pages(dma_handle, len);
490 iommu_range_free(iommu, dma_handle, npages);
491
492 entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
493 while (npages) {
494 unsigned long num;
495
496 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
497 npages);
498 entry += num;
499 npages -= num;
500 }
488 501
489 num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), 502 sg = sg_next(sg);
490 npages); 503 }
491 entry += num;
492 npages -= num;
493 } while (npages != 0);
494 504
495 spin_unlock_irqrestore(&iommu->lock, flags); 505 spin_unlock_irqrestore(&iommu->lock, flags);
496} 506}