aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/kernel
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-10-18 21:23:11 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-10-18 21:23:11 -0400
commit4e0e329d9a2011f9f7a7c0a378dc3bff7b0a0283 (patch)
treea802614e01460631c694dfa118642d54c3d5fc79 /arch/sparc64/kernel
parente33b9dfa3008fcaa908dc0c8c472a812c400f839 (diff)
parent59a10b172fccaea793352c00fd9065f0a5b4ef70 (diff)
Merge branch 'upstream'
Diffstat (limited to 'arch/sparc64/kernel')
-rw-r--r--arch/sparc64/kernel/dtlb_base.S14
-rw-r--r--arch/sparc64/kernel/dtlb_prot.S12
-rw-r--r--arch/sparc64/kernel/entry.S39
-rw-r--r--arch/sparc64/kernel/head.S166
-rw-r--r--arch/sparc64/kernel/irq.c1
-rw-r--r--arch/sparc64/kernel/itlb_base.S26
-rw-r--r--arch/sparc64/kernel/ktlb.S92
-rw-r--r--arch/sparc64/kernel/pci_iommu.c363
-rw-r--r--arch/sparc64/kernel/pci_psycho.c44
-rw-r--r--arch/sparc64/kernel/pci_sabre.c39
-rw-r--r--arch/sparc64/kernel/pci_schizo.c57
-rw-r--r--arch/sparc64/kernel/power.c64
-rw-r--r--arch/sparc64/kernel/rtrap.S7
-rw-r--r--arch/sparc64/kernel/smp.c7
14 files changed, 422 insertions, 509 deletions
diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S
index 702d349c1e88..6528786840c0 100644
--- a/arch/sparc64/kernel/dtlb_base.S
+++ b/arch/sparc64/kernel/dtlb_base.S
@@ -53,19 +53,18 @@
53 * be guaranteed to be 0 ... mmu_context.h does guarantee this 53 * be guaranteed to be 0 ... mmu_context.h does guarantee this
54 * by only using 10 bits in the hwcontext value. 54 * by only using 10 bits in the hwcontext value.
55 */ 55 */
56#define CREATE_VPTE_OFFSET1(r1, r2) 56#define CREATE_VPTE_OFFSET1(r1, r2) nop
57#define CREATE_VPTE_OFFSET2(r1, r2) \ 57#define CREATE_VPTE_OFFSET2(r1, r2) \
58 srax r1, 10, r2 58 srax r1, 10, r2
59#define CREATE_VPTE_NOP nop
60#else 59#else
61#define CREATE_VPTE_OFFSET1(r1, r2) \ 60#define CREATE_VPTE_OFFSET1(r1, r2) \
62 srax r1, PAGE_SHIFT, r2 61 srax r1, PAGE_SHIFT, r2
63#define CREATE_VPTE_OFFSET2(r1, r2) \ 62#define CREATE_VPTE_OFFSET2(r1, r2) \
64 sllx r2, 3, r2 63 sllx r2, 3, r2
65#define CREATE_VPTE_NOP
66#endif 64#endif
67 65
68/* DTLB ** ICACHE line 1: Quick user TLB misses */ 66/* DTLB ** ICACHE line 1: Quick user TLB misses */
67 mov TLB_SFSR, %g1
69 ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS 68 ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS
70 andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus? 69 andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus?
71from_tl1_trap: 70from_tl1_trap:
@@ -74,18 +73,16 @@ from_tl1_trap:
74 be,pn %xcc, kvmap ! Yep, special processing 73 be,pn %xcc, kvmap ! Yep, special processing
75 CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset 74 CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset
76 cmp %g5, 4 ! Last trap level? 75 cmp %g5, 4 ! Last trap level?
77 be,pn %xcc, longpath ! Yep, cannot risk VPTE miss
78 nop ! delay slot
79 76
80/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */ 77/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */
78 be,pn %xcc, longpath ! Yep, cannot risk VPTE miss
79 nop ! delay slot
81 ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE 80 ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE
821: brgez,pn %g5, longpath ! Invalid, branch out 811: brgez,pn %g5, longpath ! Invalid, branch out
83 nop ! Delay-slot 82 nop ! Delay-slot
849: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB 839: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
85 retry ! Trap return 84 retry ! Trap return
86 nop 85 nop
87 nop
88 nop
89 86
90/* DTLB ** ICACHE line 3: winfixups+real_faults */ 87/* DTLB ** ICACHE line 3: winfixups+real_faults */
91longpath: 88longpath:
@@ -106,8 +103,7 @@ longpath:
106 nop 103 nop
107 nop 104 nop
108 nop 105 nop
109 CREATE_VPTE_NOP 106 nop
110 107
111#undef CREATE_VPTE_OFFSET1 108#undef CREATE_VPTE_OFFSET1
112#undef CREATE_VPTE_OFFSET2 109#undef CREATE_VPTE_OFFSET2
113#undef CREATE_VPTE_NOP
diff --git a/arch/sparc64/kernel/dtlb_prot.S b/arch/sparc64/kernel/dtlb_prot.S
index d848bb7374bb..e0a920162604 100644
--- a/arch/sparc64/kernel/dtlb_prot.S
+++ b/arch/sparc64/kernel/dtlb_prot.S
@@ -14,14 +14,14 @@
14 */ 14 */
15 15
16/* PROT ** ICACHE line 1: User DTLB protection trap */ 16/* PROT ** ICACHE line 1: User DTLB protection trap */
17 stxa %g0, [%g1] ASI_DMMU ! Clear SFSR FaultValid bit 17 mov TLB_SFSR, %g1
18 membar #Sync ! Synchronize ASI stores 18 stxa %g0, [%g1] ASI_DMMU ! Clear FaultValid bit
19 rdpr %pstate, %g5 ! Move into alternate globals 19 membar #Sync ! Synchronize stores
20 rdpr %pstate, %g5 ! Move into alt-globals
20 wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate 21 wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate
21 rdpr %tl, %g1 ! Need to do a winfixup? 22 rdpr %tl, %g1 ! Need a winfixup?
22 cmp %g1, 1 ! Trap level >1? 23 cmp %g1, 1 ! Trap level >1?
23 mov TLB_TAG_ACCESS, %g4 ! Prepare reload of vaddr 24 mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr
24 nop
25 25
26/* PROT ** ICACHE line 2: More real fault processing */ 26/* PROT ** ICACHE line 2: More real fault processing */
27 bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup 27 bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index f685035dbdb8..11a848402fb1 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -33,7 +33,7 @@
33 /* This is trivial with the new code... */ 33 /* This is trivial with the new code... */
34 .globl do_fpdis 34 .globl do_fpdis
35do_fpdis: 35do_fpdis:
36 sethi %hi(TSTATE_PEF), %g4 ! IEU0 36 sethi %hi(TSTATE_PEF), %g4
37 rdpr %tstate, %g5 37 rdpr %tstate, %g5
38 andcc %g5, %g4, %g0 38 andcc %g5, %g4, %g0
39 be,pt %xcc, 1f 39 be,pt %xcc, 1f
@@ -50,18 +50,18 @@ do_fpdis:
50 add %g0, %g0, %g0 50 add %g0, %g0, %g0
51 ba,a,pt %xcc, rtrap_clr_l6 51 ba,a,pt %xcc, rtrap_clr_l6
52 52
531: ldub [%g6 + TI_FPSAVED], %g5 ! Load Group 531: ldub [%g6 + TI_FPSAVED], %g5
54 wr %g0, FPRS_FEF, %fprs ! LSU Group+4bubbles 54 wr %g0, FPRS_FEF, %fprs
55 andcc %g5, FPRS_FEF, %g0 ! IEU1 Group 55 andcc %g5, FPRS_FEF, %g0
56 be,a,pt %icc, 1f ! CTI 56 be,a,pt %icc, 1f
57 clr %g7 ! IEU0 57 clr %g7
58 ldx [%g6 + TI_GSR], %g7 ! Load Group 58 ldx [%g6 + TI_GSR], %g7
591: andcc %g5, FPRS_DL, %g0 ! IEU1 591: andcc %g5, FPRS_DL, %g0
60 bne,pn %icc, 2f ! CTI 60 bne,pn %icc, 2f
61 fzero %f0 ! FPA 61 fzero %f0
62 andcc %g5, FPRS_DU, %g0 ! IEU1 Group 62 andcc %g5, FPRS_DU, %g0
63 bne,pn %icc, 1f ! CTI 63 bne,pn %icc, 1f
64 fzero %f2 ! FPA 64 fzero %f2
65 faddd %f0, %f2, %f4 65 faddd %f0, %f2, %f4
66 fmuld %f0, %f2, %f6 66 fmuld %f0, %f2, %f6
67 faddd %f0, %f2, %f8 67 faddd %f0, %f2, %f8
@@ -104,8 +104,10 @@ do_fpdis:
104 add %g6, TI_FPREGS + 0xc0, %g2 104 add %g6, TI_FPREGS + 0xc0, %g2
105 faddd %f0, %f2, %f8 105 faddd %f0, %f2, %f8
106 fmuld %f0, %f2, %f10 106 fmuld %f0, %f2, %f10
107 ldda [%g1] ASI_BLK_S, %f32 ! grrr, where is ASI_BLK_NUCLEUS 8-( 107 membar #Sync
108 ldda [%g1] ASI_BLK_S, %f32
108 ldda [%g2] ASI_BLK_S, %f48 109 ldda [%g2] ASI_BLK_S, %f48
110 membar #Sync
109 faddd %f0, %f2, %f12 111 faddd %f0, %f2, %f12
110 fmuld %f0, %f2, %f14 112 fmuld %f0, %f2, %f14
111 faddd %f0, %f2, %f16 113 faddd %f0, %f2, %f16
@@ -116,7 +118,6 @@ do_fpdis:
116 fmuld %f0, %f2, %f26 118 fmuld %f0, %f2, %f26
117 faddd %f0, %f2, %f28 119 faddd %f0, %f2, %f28
118 fmuld %f0, %f2, %f30 120 fmuld %f0, %f2, %f30
119 membar #Sync
120 b,pt %xcc, fpdis_exit 121 b,pt %xcc, fpdis_exit
121 nop 122 nop
1222: andcc %g5, FPRS_DU, %g0 1232: andcc %g5, FPRS_DU, %g0
@@ -133,8 +134,10 @@ do_fpdis:
133 add %g6, TI_FPREGS + 0x40, %g2 134 add %g6, TI_FPREGS + 0x40, %g2
134 faddd %f32, %f34, %f36 135 faddd %f32, %f34, %f36
135 fmuld %f32, %f34, %f38 136 fmuld %f32, %f34, %f38
136 ldda [%g1] ASI_BLK_S, %f0 ! grrr, where is ASI_BLK_NUCLEUS 8-( 137 membar #Sync
138 ldda [%g1] ASI_BLK_S, %f0
137 ldda [%g2] ASI_BLK_S, %f16 139 ldda [%g2] ASI_BLK_S, %f16
140 membar #Sync
138 faddd %f32, %f34, %f40 141 faddd %f32, %f34, %f40
139 fmuld %f32, %f34, %f42 142 fmuld %f32, %f34, %f42
140 faddd %f32, %f34, %f44 143 faddd %f32, %f34, %f44
@@ -147,7 +150,6 @@ do_fpdis:
147 fmuld %f32, %f34, %f58 150 fmuld %f32, %f34, %f58
148 faddd %f32, %f34, %f60 151 faddd %f32, %f34, %f60
149 fmuld %f32, %f34, %f62 152 fmuld %f32, %f34, %f62
150 membar #Sync
151 ba,pt %xcc, fpdis_exit 153 ba,pt %xcc, fpdis_exit
152 nop 154 nop
1533: mov SECONDARY_CONTEXT, %g3 1553: mov SECONDARY_CONTEXT, %g3
@@ -158,7 +160,8 @@ do_fpdis:
158 stxa %g2, [%g3] ASI_DMMU 160 stxa %g2, [%g3] ASI_DMMU
159 membar #Sync 161 membar #Sync
160 mov 0x40, %g2 162 mov 0x40, %g2
161 ldda [%g1] ASI_BLK_S, %f0 ! grrr, where is ASI_BLK_NUCLEUS 8-( 163 membar #Sync
164 ldda [%g1] ASI_BLK_S, %f0
162 ldda [%g1 + %g2] ASI_BLK_S, %f16 165 ldda [%g1 + %g2] ASI_BLK_S, %f16
163 add %g1, 0x80, %g1 166 add %g1, 0x80, %g1
164 ldda [%g1] ASI_BLK_S, %f32 167 ldda [%g1] ASI_BLK_S, %f32
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 24340496cdd3..b49dcd4504b0 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -28,19 +28,14 @@
28#include <asm/mmu.h> 28#include <asm/mmu.h>
29 29
30/* This section from from _start to sparc64_boot_end should fit into 30/* This section from from _start to sparc64_boot_end should fit into
31 * 0x0000.0000.0040.4000 to 0x0000.0000.0040.8000 and will be sharing space 31 * 0x0000000000404000 to 0x0000000000408000.
32 * with bootup_user_stack, which is from 0x0000.0000.0040.4000 to
33 * 0x0000.0000.0040.6000 and empty_bad_page, which is from
34 * 0x0000.0000.0040.6000 to 0x0000.0000.0040.8000.
35 */ 32 */
36
37 .text 33 .text
38 .globl start, _start, stext, _stext 34 .globl start, _start, stext, _stext
39_start: 35_start:
40start: 36start:
41_stext: 37_stext:
42stext: 38stext:
43bootup_user_stack:
44! 0x0000000000404000 39! 0x0000000000404000
45 b sparc64_boot 40 b sparc64_boot
46 flushw /* Flush register file. */ 41 flushw /* Flush register file. */
@@ -191,8 +186,9 @@ prom_boot_mapping_phys_low:
191 stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5 186 stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5
192 stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate" 187 stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate"
193 stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache 188 stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache
194 srlx %l0, 22, %l3 189 /* PAGE align */
195 sllx %l3, 22, %l3 190 srlx %l0, 13, %l3
191 sllx %l3, 13, %l3
196 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC 192 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC
197 stx %g0, [%sp + 2047 + 128 + 0x30] ! res1 193 stx %g0, [%sp + 2047 + 128 + 0x30] ! res1
198 stx %g0, [%sp + 2047 + 128 + 0x38] ! res2 194 stx %g0, [%sp + 2047 + 128 + 0x38] ! res2
@@ -211,6 +207,9 @@ prom_boot_mapping_phys_low:
211 ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high 207 ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high
212 stx %l2, [%l4 + 0x0] 208 stx %l2, [%l4 + 0x0]
213 ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low 209 ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low
210 /* 4MB align */
211 srlx %l3, 22, %l3
212 sllx %l3, 22, %l3
214 stx %l3, [%l4 + 0x8] 213 stx %l3, [%l4 + 0x8]
215 214
216 /* Leave service as-is, "call-method" */ 215 /* Leave service as-is, "call-method" */
@@ -382,32 +381,78 @@ tlb_fixup_done:
382 nop 381 nop
383 /* Not reached... */ 382 /* Not reached... */
384 383
385/* IMPORTANT NOTE: Whenever making changes here, check 384 /* This is meant to allow the sharing of this code between
386 * trampoline.S as well. -jj */ 385 * boot processor invocation (via setup_tba() below) and
387 .globl setup_tba 386 * secondary processor startup (via trampoline.S). The
388setup_tba: /* i0 = is_starfire */ 387 * former does use this code, the latter does not yet due
389 save %sp, -160, %sp 388 * to some complexities. That should be fixed up at some
389 * point.
390 *
391 * There used to be enormous complexity wrt. transferring
392 * over from the firwmare's trap table to the Linux kernel's.
393 * For example, there was a chicken & egg problem wrt. building
394 * the OBP page tables, yet needing to be on the Linux kernel
395 * trap table (to translate PAGE_OFFSET addresses) in order to
396 * do that.
397 *
398 * We now handle OBP tlb misses differently, via linear lookups
399 * into the prom_trans[] array. So that specific problem no
400 * longer exists. Yet, unfortunately there are still some issues
401 * preventing trampoline.S from using this code... ho hum.
402 */
403 .globl setup_trap_table
404setup_trap_table:
405 save %sp, -192, %sp
390 406
391 rdpr %tba, %g7 407 /* Force interrupts to be disabled. */
392 sethi %hi(prom_tba), %o1 408 rdpr %pstate, %o1
393 or %o1, %lo(prom_tba), %o1 409 andn %o1, PSTATE_IE, %o1
394 stx %g7, [%o1] 410 wrpr %o1, 0x0, %pstate
411 wrpr %g0, 15, %pil
412
413 /* Make the firmware call to jump over to the Linux trap table. */
414 call prom_set_trap_table
415 sethi %hi(sparc64_ttable_tl0), %o0
416
417 /* Start using proper page size encodings in ctx register. */
418 sethi %hi(sparc64_kern_pri_context), %g3
419 ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
420 mov PRIMARY_CONTEXT, %g1
421 stxa %g2, [%g1] ASI_DMMU
422 membar #Sync
423
424 /* The Linux trap handlers expect various trap global registers
425 * to be setup with some fixed values. So here we set these
426 * up very carefully. These globals are:
427 *
428 * Alternate Globals (PSTATE_AG):
429 *
430 * %g6 --> current_thread_info()
431 *
432 * MMU Globals (PSTATE_MG):
433 *
434 * %g1 --> TLB_SFSR
435 * %g2 --> ((_PAGE_VALID | _PAGE_SZ4MB |
436 * _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
437 * ^ 0xfffff80000000000)
438 * (this %g2 value is used for computing the PAGE_OFFSET kernel
439 * TLB entries quickly, the virtual address of the fault XOR'd
440 * with this %g2 value is the PTE to load into the TLB)
441 * %g3 --> VPTE_BASE_CHEETAH or VPTE_BASE_SPITFIRE
442 *
443 * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()):
444 *
445 * %g6 --> __irq_work[smp_processor_id()]
446 */
395 447
396 /* Setup "Linux" globals 8-) */
397 rdpr %pstate, %o1 448 rdpr %pstate, %o1
398 mov %g6, %o2 449 mov %g6, %o2
399 wrpr %o1, (PSTATE_AG|PSTATE_IE), %pstate 450 wrpr %o1, PSTATE_AG, %pstate
400 sethi %hi(sparc64_ttable_tl0), %g1
401 wrpr %g1, %tba
402 mov %o2, %g6 451 mov %o2, %g6
403 452
404 /* Set up MMU globals */
405 wrpr %o1, (PSTATE_MG|PSTATE_IE), %pstate
406
407 /* Set fixed globals used by dTLB miss handler. */
408#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000) 453#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
409#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) 454#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
410 455 wrpr %o1, PSTATE_MG, %pstate
411 mov TSB_REG, %g1 456 mov TSB_REG, %g1
412 stxa %g0, [%g1] ASI_DMMU 457 stxa %g0, [%g1] ASI_DMMU
413 membar #Sync 458 membar #Sync
@@ -419,17 +464,17 @@ setup_tba: /* i0 = is_starfire */
419 sllx %g2, 32, %g2 464 sllx %g2, 32, %g2
420 or %g2, KERN_LOWBITS, %g2 465 or %g2, KERN_LOWBITS, %g2
421 466
422 BRANCH_IF_ANY_CHEETAH(g3,g7,cheetah_vpte_base) 467 BRANCH_IF_ANY_CHEETAH(g3,g7,8f)
423 ba,pt %xcc, spitfire_vpte_base 468 ba,pt %xcc, 9f
424 nop 469 nop
425 470
426cheetah_vpte_base: 4718:
427 sethi %uhi(VPTE_BASE_CHEETAH), %g3 472 sethi %uhi(VPTE_BASE_CHEETAH), %g3
428 or %g3, %ulo(VPTE_BASE_CHEETAH), %g3 473 or %g3, %ulo(VPTE_BASE_CHEETAH), %g3
429 ba,pt %xcc, 2f 474 ba,pt %xcc, 2f
430 sllx %g3, 32, %g3 475 sllx %g3, 32, %g3
431 476
432spitfire_vpte_base: 4779:
433 sethi %uhi(VPTE_BASE_SPITFIRE), %g3 478 sethi %uhi(VPTE_BASE_SPITFIRE), %g3
434 or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3 479 or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3
435 sllx %g3, 32, %g3 480 sllx %g3, 32, %g3
@@ -455,41 +500,55 @@ spitfire_vpte_base:
455 sllx %o2, 32, %o2 500 sllx %o2, 32, %o2
456 wr %o2, %asr25 501 wr %o2, %asr25
457 502
458 /* Ok, we're done setting up all the state our trap mechanims needs,
459 * now get back into normal globals and let the PROM know what is up.
460 */
4612: 5032:
462 wrpr %g0, %g0, %wstate 504 wrpr %g0, %g0, %wstate
463 wrpr %o1, PSTATE_IE, %pstate 505 wrpr %o1, 0x0, %pstate
464 506
465 call init_irqwork_curcpu 507 call init_irqwork_curcpu
466 nop 508 nop
467 509
468 call prom_set_trap_table 510 /* Now we can turn interrupts back on. */
469 sethi %hi(sparc64_ttable_tl0), %o0
470
471 /* Start using proper page size encodings in ctx register. */
472 sethi %hi(sparc64_kern_pri_context), %g3
473 ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
474 mov PRIMARY_CONTEXT, %g1
475 stxa %g2, [%g1] ASI_DMMU
476 membar #Sync
477
478 rdpr %pstate, %o1 511 rdpr %pstate, %o1
479 or %o1, PSTATE_IE, %o1 512 or %o1, PSTATE_IE, %o1
480 wrpr %o1, 0, %pstate 513 wrpr %o1, 0, %pstate
514 wrpr %g0, 0x0, %pil
481 515
482 ret 516 ret
483 restore 517 restore
484 518
519 .globl setup_tba
520setup_tba: /* i0 = is_starfire */
521 save %sp, -192, %sp
522
523 /* The boot processor is the only cpu which invokes this
524 * routine, the other cpus set things up via trampoline.S.
525 * So save the OBP trap table address here.
526 */
527 rdpr %tba, %g7
528 sethi %hi(prom_tba), %o1
529 or %o1, %lo(prom_tba), %o1
530 stx %g7, [%o1]
531
532 call setup_trap_table
533 nop
534
535 ret
536 restore
537sparc64_boot_end:
538
539#include "systbls.S"
540#include "ktlb.S"
541#include "etrap.S"
542#include "rtrap.S"
543#include "winfixup.S"
544#include "entry.S"
545
485/* 546/*
486 * The following skips make sure the trap table in ttable.S is aligned 547 * The following skip makes sure the trap table in ttable.S is aligned
487 * on a 32K boundary as required by the v9 specs for TBA register. 548 * on a 32K boundary as required by the v9 specs for TBA register.
488 */ 549 */
489sparc64_boot_end: 5501:
490 .skip 0x2000 + _start - sparc64_boot_end 551 .skip 0x4000 + _start - 1b
491bootup_user_stack_end:
492 .skip 0x2000
493 552
494#ifdef CONFIG_SBUS 553#ifdef CONFIG_SBUS
495/* This is just a hack to fool make depend config.h discovering 554/* This is just a hack to fool make depend config.h discovering
@@ -501,15 +560,6 @@ bootup_user_stack_end:
501! 0x0000000000408000 560! 0x0000000000408000
502 561
503#include "ttable.S" 562#include "ttable.S"
504#include "systbls.S"
505#include "ktlb.S"
506#include "etrap.S"
507#include "rtrap.S"
508#include "winfixup.S"
509#include "entry.S"
510
511 /* This is just anal retentiveness on my part... */
512 .align 16384
513 563
514 .data 564 .data
515 .align 8 565 .align 8
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index c9b69167632a..233526ba3abe 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -27,6 +27,7 @@
27#include <asm/atomic.h> 27#include <asm/atomic.h>
28#include <asm/system.h> 28#include <asm/system.h>
29#include <asm/irq.h> 29#include <asm/irq.h>
30#include <asm/io.h>
30#include <asm/sbus.h> 31#include <asm/sbus.h>
31#include <asm/iommu.h> 32#include <asm/iommu.h>
32#include <asm/upa.h> 33#include <asm/upa.h>
diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S
index b5e32dfa4fbc..4951ff8f6877 100644
--- a/arch/sparc64/kernel/itlb_base.S
+++ b/arch/sparc64/kernel/itlb_base.S
@@ -15,14 +15,12 @@
15 */ 15 */
16#define CREATE_VPTE_OFFSET1(r1, r2) \ 16#define CREATE_VPTE_OFFSET1(r1, r2) \
17 srax r1, 10, r2 17 srax r1, 10, r2
18#define CREATE_VPTE_OFFSET2(r1, r2) 18#define CREATE_VPTE_OFFSET2(r1, r2) nop
19#define CREATE_VPTE_NOP nop
20#else /* PAGE_SHIFT */ 19#else /* PAGE_SHIFT */
21#define CREATE_VPTE_OFFSET1(r1, r2) \ 20#define CREATE_VPTE_OFFSET1(r1, r2) \
22 srax r1, PAGE_SHIFT, r2 21 srax r1, PAGE_SHIFT, r2
23#define CREATE_VPTE_OFFSET2(r1, r2) \ 22#define CREATE_VPTE_OFFSET2(r1, r2) \
24 sllx r2, 3, r2 23 sllx r2, 3, r2
25#define CREATE_VPTE_NOP
26#endif /* PAGE_SHIFT */ 24#endif /* PAGE_SHIFT */
27 25
28 26
@@ -36,6 +34,7 @@
36 */ 34 */
37 35
38/* ITLB ** ICACHE line 1: Quick user TLB misses */ 36/* ITLB ** ICACHE line 1: Quick user TLB misses */
37 mov TLB_SFSR, %g1
39 ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS 38 ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS
40 CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset 39 CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset
41 CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset 40 CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset
@@ -43,41 +42,38 @@
431: brgez,pn %g5, 3f ! Not valid, branch out 421: brgez,pn %g5, 3f ! Not valid, branch out
44 sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot 43 sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot
45 andcc %g5, %g4, %g0 ! Executable? 44 andcc %g5, %g4, %g0 ! Executable?
45
46/* ITLB ** ICACHE line 2: Real faults */
46 be,pn %xcc, 3f ! Nope, branch. 47 be,pn %xcc, 3f ! Nope, branch.
47 nop ! Delay-slot 48 nop ! Delay-slot
482: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB 492: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB
49 retry ! Trap return 50 retry ! Trap return
503: rdpr %pstate, %g4 ! Move into alternate globals 513: rdpr %pstate, %g4 ! Move into alt-globals
51
52/* ITLB ** ICACHE line 2: Real faults */
53 wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate 52 wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate
54 rdpr %tpc, %g5 ! And load faulting VA 53 rdpr %tpc, %g5 ! And load faulting VA
55 mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB 54 mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB
56sparc64_realfault_common: ! Called by TL0 dtlb_miss too 55
56/* ITLB ** ICACHE line 3: Finish faults */
57sparc64_realfault_common: ! Called by dtlb_miss
57 stb %g4, [%g6 + TI_FAULT_CODE] 58 stb %g4, [%g6 + TI_FAULT_CODE]
58 stx %g5, [%g6 + TI_FAULT_ADDR] 59 stx %g5, [%g6 + TI_FAULT_ADDR]
59 ba,pt %xcc, etrap ! Save state 60 ba,pt %xcc, etrap ! Save state
601: rd %pc, %g7 ! ... 611: rd %pc, %g7 ! ...
61 nop
62
63/* ITLB ** ICACHE line 3: Finish faults + window fixups */
64 call do_sparc64_fault ! Call fault handler 62 call do_sparc64_fault ! Call fault handler
65 add %sp, PTREGS_OFF, %o0! Compute pt_regs arg 63 add %sp, PTREGS_OFF, %o0! Compute pt_regs arg
66 ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state 64 ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state
67 nop 65 nop
66
67/* ITLB ** ICACHE line 4: Window fixups */
68winfix_trampoline: 68winfix_trampoline:
69 rdpr %tpc, %g3 ! Prepare winfixup TNPC 69 rdpr %tpc, %g3 ! Prepare winfixup TNPC
70 or %g3, 0x7c, %g3 ! Compute offset to branch 70 or %g3, 0x7c, %g3 ! Compute branch offset
71 wrpr %g3, %tnpc ! Write it into TNPC 71 wrpr %g3, %tnpc ! Write it into TNPC
72 done ! Do it to it 72 done ! Do it to it
73
74/* ITLB ** ICACHE line 4: Unused... */
75 nop 73 nop
76 nop 74 nop
77 nop 75 nop
78 nop 76 nop
79 CREATE_VPTE_NOP
80 77
81#undef CREATE_VPTE_OFFSET1 78#undef CREATE_VPTE_OFFSET1
82#undef CREATE_VPTE_OFFSET2 79#undef CREATE_VPTE_OFFSET2
83#undef CREATE_VPTE_NOP
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index 7796b37f478c..d9244d3c9f73 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -58,9 +58,6 @@ vpte_noent:
58 done 58 done
59 59
60vpte_insn_obp: 60vpte_insn_obp:
61 sethi %hi(prom_pmd_phys), %g5
62 ldx [%g5 + %lo(prom_pmd_phys)], %g5
63
64 /* Behave as if we are at TL0. */ 61 /* Behave as if we are at TL0. */
65 wrpr %g0, 1, %tl 62 wrpr %g0, 1, %tl
66 rdpr %tpc, %g4 /* Find original faulting iaddr */ 63 rdpr %tpc, %g4 /* Find original faulting iaddr */
@@ -71,58 +68,57 @@ vpte_insn_obp:
71 mov TLB_SFSR, %g1 68 mov TLB_SFSR, %g1
72 stxa %g4, [%g1 + %g1] ASI_IMMU 69 stxa %g4, [%g1 + %g1] ASI_IMMU
73 70
74 /* Get PMD offset. */ 71 sethi %hi(prom_trans), %g5
75 srlx %g4, 23, %g6 72 or %g5, %lo(prom_trans), %g5
76 and %g6, 0x7ff, %g6 73
77 sllx %g6, 2, %g6 741: ldx [%g5 + 0x00], %g6 ! base
78 75 brz,a,pn %g6, longpath ! no more entries, fail
79 /* Load PMD, is it valid? */ 76 mov TLB_SFSR, %g1 ! and restore %g1
80 lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 77 ldx [%g5 + 0x08], %g1 ! len
81 brz,pn %g5, longpath 78 add %g6, %g1, %g1 ! end
82 sllx %g5, 11, %g5 79 cmp %g6, %g4
83 80 bgu,pt %xcc, 2f
84 /* Get PTE offset. */ 81 cmp %g4, %g1
85 srlx %g4, 13, %g6 82 bgeu,pt %xcc, 2f
86 and %g6, 0x3ff, %g6 83 ldx [%g5 + 0x10], %g1 ! PTE
87 sllx %g6, 3, %g6 84
88 85 /* TLB load, restore %g1, and return from trap. */
89 /* Load PTE. */ 86 sub %g4, %g6, %g6
90 ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 87 add %g1, %g6, %g5
91 brgez,pn %g5, longpath 88 mov TLB_SFSR, %g1
92 nop
93
94 /* TLB load and return from trap. */
95 stxa %g5, [%g0] ASI_ITLB_DATA_IN 89 stxa %g5, [%g0] ASI_ITLB_DATA_IN
96 retry 90 retry
97 91
98kvmap_do_obp: 922: ba,pt %xcc, 1b
99 sethi %hi(prom_pmd_phys), %g5 93 add %g5, (3 * 8), %g5 ! next entry
100 ldx [%g5 + %lo(prom_pmd_phys)], %g5
101
102 /* Get PMD offset. */
103 srlx %g4, 23, %g6
104 and %g6, 0x7ff, %g6
105 sllx %g6, 2, %g6
106
107 /* Load PMD, is it valid? */
108 lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5
109 brz,pn %g5, longpath
110 sllx %g5, 11, %g5
111
112 /* Get PTE offset. */
113 srlx %g4, 13, %g6
114 and %g6, 0x3ff, %g6
115 sllx %g6, 3, %g6
116
117 /* Load PTE. */
118 ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5
119 brgez,pn %g5, longpath
120 nop
121 94
122 /* TLB load and return from trap. */ 95kvmap_do_obp:
96 sethi %hi(prom_trans), %g5
97 or %g5, %lo(prom_trans), %g5
98 srlx %g4, 13, %g4
99 sllx %g4, 13, %g4
100
1011: ldx [%g5 + 0x00], %g6 ! base
102 brz,a,pn %g6, longpath ! no more entries, fail
103 mov TLB_SFSR, %g1 ! and restore %g1
104 ldx [%g5 + 0x08], %g1 ! len
105 add %g6, %g1, %g1 ! end
106 cmp %g6, %g4
107 bgu,pt %xcc, 2f
108 cmp %g4, %g1
109 bgeu,pt %xcc, 2f
110 ldx [%g5 + 0x10], %g1 ! PTE
111
112 /* TLB load, restore %g1, and return from trap. */
113 sub %g4, %g6, %g6
114 add %g1, %g6, %g5
115 mov TLB_SFSR, %g1
123 stxa %g5, [%g0] ASI_DTLB_DATA_IN 116 stxa %g5, [%g0] ASI_DTLB_DATA_IN
124 retry 117 retry
125 118
1192: ba,pt %xcc, 1b
120 add %g5, (3 * 8), %g5 ! next entry
121
126/* 122/*
127 * On a first level data miss, check whether this is to the OBP range (note 123 * On a first level data miss, check whether this is to the OBP range (note
128 * that such accesses can be made by prom, as well as by kernel using 124 * that such accesses can be made by prom, as well as by kernel using
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index 425c60cfea19..a11910be1013 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -49,12 +49,6 @@ static void __iommu_flushall(struct pci_iommu *iommu)
49 49
50 /* Ensure completion of previous PIO writes. */ 50 /* Ensure completion of previous PIO writes. */
51 (void) pci_iommu_read(iommu->write_complete_reg); 51 (void) pci_iommu_read(iommu->write_complete_reg);
52
53 /* Now update everyone's flush point. */
54 for (entry = 0; entry < PBM_NCLUSTERS; entry++) {
55 iommu->alloc_info[entry].flush =
56 iommu->alloc_info[entry].next;
57 }
58} 52}
59 53
60#define IOPTE_CONSISTENT(CTX) \ 54#define IOPTE_CONSISTENT(CTX) \
@@ -80,120 +74,117 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte)
80 iopte_val(*iopte) = val; 74 iopte_val(*iopte) = val;
81} 75}
82 76
83void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize) 77/* Based largely upon the ppc64 iommu allocator. */
78static long pci_arena_alloc(struct pci_iommu *iommu, unsigned long npages)
84{ 79{
85 int i; 80 struct pci_iommu_arena *arena = &iommu->arena;
86 81 unsigned long n, i, start, end, limit;
87 tsbsize /= sizeof(iopte_t); 82 int pass;
88 83
89 for (i = 0; i < tsbsize; i++) 84 limit = arena->limit;
90 iopte_make_dummy(iommu, &iommu->page_table[i]); 85 start = arena->hint;
91} 86 pass = 0;
92 87
93static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages) 88again:
94{ 89 n = find_next_zero_bit(arena->map, limit, start);
95 iopte_t *iopte, *limit, *first; 90 end = n + npages;
96 unsigned long cnum, ent, flush_point; 91 if (unlikely(end >= limit)) {
97 92 if (likely(pass < 1)) {
98 cnum = 0; 93 limit = start;
99 while ((1UL << cnum) < npages) 94 start = 0;
100 cnum++; 95 __iommu_flushall(iommu);
101 iopte = (iommu->page_table + 96 pass++;
102 (cnum << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); 97 goto again;
103 98 } else {
104 if (cnum == 0) 99 /* Scanned the whole thing, give up. */
105 limit = (iommu->page_table + 100 return -1;
106 iommu->lowest_consistent_map);
107 else
108 limit = (iopte +
109 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
110
111 iopte += ((ent = iommu->alloc_info[cnum].next) << cnum);
112 flush_point = iommu->alloc_info[cnum].flush;
113
114 first = iopte;
115 for (;;) {
116 if (IOPTE_IS_DUMMY(iommu, iopte)) {
117 if ((iopte + (1 << cnum)) >= limit)
118 ent = 0;
119 else
120 ent = ent + 1;
121 iommu->alloc_info[cnum].next = ent;
122 if (ent == flush_point)
123 __iommu_flushall(iommu);
124 break;
125 } 101 }
126 iopte += (1 << cnum); 102 }
127 ent++; 103
128 if (iopte >= limit) { 104 for (i = n; i < end; i++) {
129 iopte = (iommu->page_table + 105 if (test_bit(i, arena->map)) {
130 (cnum << 106 start = i + 1;
131 (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); 107 goto again;
132 ent = 0;
133 } 108 }
134 if (ent == flush_point)
135 __iommu_flushall(iommu);
136 if (iopte == first)
137 goto bad;
138 } 109 }
139 110
140 /* I've got your streaming cluster right here buddy boy... */ 111 for (i = n; i < end; i++)
141 return iopte; 112 __set_bit(i, arena->map);
142 113
143bad: 114 arena->hint = end;
144 printk(KERN_EMERG "pci_iommu: alloc_streaming_cluster of npages(%ld) failed!\n", 115
145 npages); 116 return n;
146 return NULL;
147} 117}
148 118
149static void free_streaming_cluster(struct pci_iommu *iommu, dma_addr_t base, 119static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages)
150 unsigned long npages, unsigned long ctx)
151{ 120{
152 unsigned long cnum, ent; 121 unsigned long i;
153 122
154 cnum = 0; 123 for (i = base; i < (base + npages); i++)
155 while ((1UL << cnum) < npages) 124 __clear_bit(i, arena->map);
156 cnum++; 125}
157 126
158 ent = (base << (32 - IO_PAGE_SHIFT + PBM_LOGCLUSTERS - iommu->page_table_sz_bits)) 127void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask)
159 >> (32 + PBM_LOGCLUSTERS + cnum - iommu->page_table_sz_bits); 128{
129 unsigned long i, tsbbase, order, sz, num_tsb_entries;
130
131 num_tsb_entries = tsbsize / sizeof(iopte_t);
132
133 /* Setup initial software IOMMU state. */
134 spin_lock_init(&iommu->lock);
135 iommu->ctx_lowest_free = 1;
136 iommu->page_table_map_base = dma_offset;
137 iommu->dma_addr_mask = dma_addr_mask;
138
139 /* Allocate and initialize the free area map. */
140 sz = num_tsb_entries / 8;
141 sz = (sz + 7UL) & ~7UL;
142 iommu->arena.map = kmalloc(sz, GFP_KERNEL);
143 if (!iommu->arena.map) {
144 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
145 prom_halt();
146 }
147 memset(iommu->arena.map, 0, sz);
148 iommu->arena.limit = num_tsb_entries;
160 149
161 /* If the global flush might not have caught this entry, 150 /* Allocate and initialize the dummy page which we
162 * adjust the flush point such that we will flush before 151 * set inactive IO PTEs to point to.
163 * ever trying to reuse it.
164 */ 152 */
165#define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y))) 153 iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
166 if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush)) 154 if (!iommu->dummy_page) {
167 iommu->alloc_info[cnum].flush = ent; 155 prom_printf("PCI_IOMMU: Error, gfp(dummy_page) failed.\n");
168#undef between 156 prom_halt();
157 }
158 memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
159 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
160
161 /* Now allocate and setup the IOMMU page table itself. */
162 order = get_order(tsbsize);
163 tsbbase = __get_free_pages(GFP_KERNEL, order);
164 if (!tsbbase) {
165 prom_printf("PCI_IOMMU: Error, gfp(tsb) failed.\n");
166 prom_halt();
167 }
168 iommu->page_table = (iopte_t *)tsbbase;
169
170 for (i = 0; i < num_tsb_entries; i++)
171 iopte_make_dummy(iommu, &iommu->page_table[i]);
169} 172}
170 173
171/* We allocate consistent mappings from the end of cluster zero. */ 174static inline iopte_t *alloc_npages(struct pci_iommu *iommu, unsigned long npages)
172static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long npages)
173{ 175{
174 iopte_t *iopte; 176 long entry;
175 177
176 iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)); 178 entry = pci_arena_alloc(iommu, npages);
177 while (iopte > iommu->page_table) { 179 if (unlikely(entry < 0))
178 iopte--; 180 return NULL;
179 if (IOPTE_IS_DUMMY(iommu, iopte)) {
180 unsigned long tmp = npages;
181 181
182 while (--tmp) { 182 return iommu->page_table + entry;
183 iopte--; 183}
184 if (!IOPTE_IS_DUMMY(iommu, iopte))
185 break;
186 }
187 if (tmp == 0) {
188 u32 entry = (iopte - iommu->page_table);
189 184
190 if (entry < iommu->lowest_consistent_map) 185static inline void free_npages(struct pci_iommu *iommu, dma_addr_t base, unsigned long npages)
191 iommu->lowest_consistent_map = entry; 186{
192 return iopte; 187 pci_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
193 }
194 }
195 }
196 return NULL;
197} 188}
198 189
199static int iommu_alloc_ctx(struct pci_iommu *iommu) 190static int iommu_alloc_ctx(struct pci_iommu *iommu)
@@ -233,7 +224,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
233 struct pcidev_cookie *pcp; 224 struct pcidev_cookie *pcp;
234 struct pci_iommu *iommu; 225 struct pci_iommu *iommu;
235 iopte_t *iopte; 226 iopte_t *iopte;
236 unsigned long flags, order, first_page, ctx; 227 unsigned long flags, order, first_page;
237 void *ret; 228 void *ret;
238 int npages; 229 int npages;
239 230
@@ -251,9 +242,10 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
251 iommu = pcp->pbm->iommu; 242 iommu = pcp->pbm->iommu;
252 243
253 spin_lock_irqsave(&iommu->lock, flags); 244 spin_lock_irqsave(&iommu->lock, flags);
254 iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); 245 iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
255 if (iopte == NULL) { 246 spin_unlock_irqrestore(&iommu->lock, flags);
256 spin_unlock_irqrestore(&iommu->lock, flags); 247
248 if (unlikely(iopte == NULL)) {
257 free_pages(first_page, order); 249 free_pages(first_page, order);
258 return NULL; 250 return NULL;
259 } 251 }
@@ -262,31 +254,15 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
262 ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); 254 ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
263 ret = (void *) first_page; 255 ret = (void *) first_page;
264 npages = size >> IO_PAGE_SHIFT; 256 npages = size >> IO_PAGE_SHIFT;
265 ctx = 0;
266 if (iommu->iommu_ctxflush)
267 ctx = iommu_alloc_ctx(iommu);
268 first_page = __pa(first_page); 257 first_page = __pa(first_page);
269 while (npages--) { 258 while (npages--) {
270 iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) | 259 iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
271 IOPTE_WRITE | 260 IOPTE_WRITE |
272 (first_page & IOPTE_PAGE)); 261 (first_page & IOPTE_PAGE));
273 iopte++; 262 iopte++;
274 first_page += IO_PAGE_SIZE; 263 first_page += IO_PAGE_SIZE;
275 } 264 }
276 265
277 {
278 int i;
279 u32 daddr = *dma_addrp;
280
281 npages = size >> IO_PAGE_SHIFT;
282 for (i = 0; i < npages; i++) {
283 pci_iommu_write(iommu->iommu_flush, daddr);
284 daddr += IO_PAGE_SIZE;
285 }
286 }
287
288 spin_unlock_irqrestore(&iommu->lock, flags);
289
290 return ret; 266 return ret;
291} 267}
292 268
@@ -296,7 +272,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
296 struct pcidev_cookie *pcp; 272 struct pcidev_cookie *pcp;
297 struct pci_iommu *iommu; 273 struct pci_iommu *iommu;
298 iopte_t *iopte; 274 iopte_t *iopte;
299 unsigned long flags, order, npages, i, ctx; 275 unsigned long flags, order, npages;
300 276
301 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; 277 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
302 pcp = pdev->sysdata; 278 pcp = pdev->sysdata;
@@ -306,46 +282,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
306 282
307 spin_lock_irqsave(&iommu->lock, flags); 283 spin_lock_irqsave(&iommu->lock, flags);
308 284
309 if ((iopte - iommu->page_table) == 285 free_npages(iommu, dvma, npages);
310 iommu->lowest_consistent_map) {
311 iopte_t *walk = iopte + npages;
312 iopte_t *limit;
313
314 limit = (iommu->page_table +
315 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
316 while (walk < limit) {
317 if (!IOPTE_IS_DUMMY(iommu, walk))
318 break;
319 walk++;
320 }
321 iommu->lowest_consistent_map =
322 (walk - iommu->page_table);
323 }
324
325 /* Data for consistent mappings cannot enter the streaming
326 * buffers, so we only need to update the TSB. We flush
327 * the IOMMU here as well to prevent conflicts with the
328 * streaming mapping deferred tlb flush scheme.
329 */
330
331 ctx = 0;
332 if (iommu->iommu_ctxflush)
333 ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
334
335 for (i = 0; i < npages; i++, iopte++)
336 iopte_make_dummy(iommu, iopte);
337
338 if (iommu->iommu_ctxflush) {
339 pci_iommu_write(iommu->iommu_ctxflush, ctx);
340 } else {
341 for (i = 0; i < npages; i++) {
342 u32 daddr = dvma + (i << IO_PAGE_SHIFT);
343
344 pci_iommu_write(iommu->iommu_flush, daddr);
345 }
346 }
347
348 iommu_free_ctx(iommu, ctx);
349 286
350 spin_unlock_irqrestore(&iommu->lock, flags); 287 spin_unlock_irqrestore(&iommu->lock, flags);
351 288
@@ -372,25 +309,27 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
372 iommu = pcp->pbm->iommu; 309 iommu = pcp->pbm->iommu;
373 strbuf = &pcp->pbm->stc; 310 strbuf = &pcp->pbm->stc;
374 311
375 if (direction == PCI_DMA_NONE) 312 if (unlikely(direction == PCI_DMA_NONE))
376 BUG(); 313 goto bad_no_ctx;
377 314
378 oaddr = (unsigned long)ptr; 315 oaddr = (unsigned long)ptr;
379 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); 316 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
380 npages >>= IO_PAGE_SHIFT; 317 npages >>= IO_PAGE_SHIFT;
381 318
382 spin_lock_irqsave(&iommu->lock, flags); 319 spin_lock_irqsave(&iommu->lock, flags);
320 base = alloc_npages(iommu, npages);
321 ctx = 0;
322 if (iommu->iommu_ctxflush)
323 ctx = iommu_alloc_ctx(iommu);
324 spin_unlock_irqrestore(&iommu->lock, flags);
383 325
384 base = alloc_streaming_cluster(iommu, npages); 326 if (unlikely(!base))
385 if (base == NULL)
386 goto bad; 327 goto bad;
328
387 bus_addr = (iommu->page_table_map_base + 329 bus_addr = (iommu->page_table_map_base +
388 ((base - iommu->page_table) << IO_PAGE_SHIFT)); 330 ((base - iommu->page_table) << IO_PAGE_SHIFT));
389 ret = bus_addr | (oaddr & ~IO_PAGE_MASK); 331 ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
390 base_paddr = __pa(oaddr & IO_PAGE_MASK); 332 base_paddr = __pa(oaddr & IO_PAGE_MASK);
391 ctx = 0;
392 if (iommu->iommu_ctxflush)
393 ctx = iommu_alloc_ctx(iommu);
394 if (strbuf->strbuf_enabled) 333 if (strbuf->strbuf_enabled)
395 iopte_protection = IOPTE_STREAMING(ctx); 334 iopte_protection = IOPTE_STREAMING(ctx);
396 else 335 else
@@ -401,12 +340,13 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
401 for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) 340 for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
402 iopte_val(*base) = iopte_protection | base_paddr; 341 iopte_val(*base) = iopte_protection | base_paddr;
403 342
404 spin_unlock_irqrestore(&iommu->lock, flags);
405
406 return ret; 343 return ret;
407 344
408bad: 345bad:
409 spin_unlock_irqrestore(&iommu->lock, flags); 346 iommu_free_ctx(iommu, ctx);
347bad_no_ctx:
348 if (printk_ratelimit())
349 WARN_ON(1);
410 return PCI_DMA_ERROR_CODE; 350 return PCI_DMA_ERROR_CODE;
411} 351}
412 352
@@ -481,10 +421,13 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
481 struct pci_iommu *iommu; 421 struct pci_iommu *iommu;
482 struct pci_strbuf *strbuf; 422 struct pci_strbuf *strbuf;
483 iopte_t *base; 423 iopte_t *base;
484 unsigned long flags, npages, ctx; 424 unsigned long flags, npages, ctx, i;
485 425
486 if (direction == PCI_DMA_NONE) 426 if (unlikely(direction == PCI_DMA_NONE)) {
487 BUG(); 427 if (printk_ratelimit())
428 WARN_ON(1);
429 return;
430 }
488 431
489 pcp = pdev->sysdata; 432 pcp = pdev->sysdata;
490 iommu = pcp->pbm->iommu; 433 iommu = pcp->pbm->iommu;
@@ -510,13 +453,14 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
510 453
511 /* Step 1: Kick data out of streaming buffers if necessary. */ 454 /* Step 1: Kick data out of streaming buffers if necessary. */
512 if (strbuf->strbuf_enabled) 455 if (strbuf->strbuf_enabled)
513 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); 456 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx,
457 npages, direction);
514 458
515 /* Step 2: Clear out first TSB entry. */ 459 /* Step 2: Clear out TSB entries. */
516 iopte_make_dummy(iommu, base); 460 for (i = 0; i < npages; i++)
461 iopte_make_dummy(iommu, base + i);
517 462
518 free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, 463 free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
519 npages, ctx);
520 464
521 iommu_free_ctx(iommu, ctx); 465 iommu_free_ctx(iommu, ctx);
522 466
@@ -621,6 +565,8 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
621 pci_map_single(pdev, 565 pci_map_single(pdev,
622 (page_address(sglist->page) + sglist->offset), 566 (page_address(sglist->page) + sglist->offset),
623 sglist->length, direction); 567 sglist->length, direction);
568 if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
569 return 0;
624 sglist->dma_length = sglist->length; 570 sglist->dma_length = sglist->length;
625 return 1; 571 return 1;
626 } 572 }
@@ -629,21 +575,29 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
629 iommu = pcp->pbm->iommu; 575 iommu = pcp->pbm->iommu;
630 strbuf = &pcp->pbm->stc; 576 strbuf = &pcp->pbm->stc;
631 577
632 if (direction == PCI_DMA_NONE) 578 if (unlikely(direction == PCI_DMA_NONE))
633 BUG(); 579 goto bad_no_ctx;
634 580
635 /* Step 1: Prepare scatter list. */ 581 /* Step 1: Prepare scatter list. */
636 582
637 npages = prepare_sg(sglist, nelems); 583 npages = prepare_sg(sglist, nelems);
638 584
639 /* Step 2: Allocate a cluster. */ 585 /* Step 2: Allocate a cluster and context, if necessary. */
640 586
641 spin_lock_irqsave(&iommu->lock, flags); 587 spin_lock_irqsave(&iommu->lock, flags);
642 588
643 base = alloc_streaming_cluster(iommu, npages); 589 base = alloc_npages(iommu, npages);
590 ctx = 0;
591 if (iommu->iommu_ctxflush)
592 ctx = iommu_alloc_ctx(iommu);
593
594 spin_unlock_irqrestore(&iommu->lock, flags);
595
644 if (base == NULL) 596 if (base == NULL)
645 goto bad; 597 goto bad;
646 dma_base = iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT); 598
599 dma_base = iommu->page_table_map_base +
600 ((base - iommu->page_table) << IO_PAGE_SHIFT);
647 601
648 /* Step 3: Normalize DMA addresses. */ 602 /* Step 3: Normalize DMA addresses. */
649 used = nelems; 603 used = nelems;
@@ -656,30 +610,28 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
656 } 610 }
657 used = nelems - used; 611 used = nelems - used;
658 612
659 /* Step 4: Choose a context if necessary. */ 613 /* Step 4: Create the mappings. */
660 ctx = 0;
661 if (iommu->iommu_ctxflush)
662 ctx = iommu_alloc_ctx(iommu);
663
664 /* Step 5: Create the mappings. */
665 if (strbuf->strbuf_enabled) 614 if (strbuf->strbuf_enabled)
666 iopte_protection = IOPTE_STREAMING(ctx); 615 iopte_protection = IOPTE_STREAMING(ctx);
667 else 616 else
668 iopte_protection = IOPTE_CONSISTENT(ctx); 617 iopte_protection = IOPTE_CONSISTENT(ctx);
669 if (direction != PCI_DMA_TODEVICE) 618 if (direction != PCI_DMA_TODEVICE)
670 iopte_protection |= IOPTE_WRITE; 619 iopte_protection |= IOPTE_WRITE;
671 fill_sg (base, sglist, used, nelems, iopte_protection); 620
621 fill_sg(base, sglist, used, nelems, iopte_protection);
622
672#ifdef VERIFY_SG 623#ifdef VERIFY_SG
673 verify_sglist(sglist, nelems, base, npages); 624 verify_sglist(sglist, nelems, base, npages);
674#endif 625#endif
675 626
676 spin_unlock_irqrestore(&iommu->lock, flags);
677
678 return used; 627 return used;
679 628
680bad: 629bad:
681 spin_unlock_irqrestore(&iommu->lock, flags); 630 iommu_free_ctx(iommu, ctx);
682 return PCI_DMA_ERROR_CODE; 631bad_no_ctx:
632 if (printk_ratelimit())
633 WARN_ON(1);
634 return 0;
683} 635}
684 636
685/* Unmap a set of streaming mode DMA translations. */ 637/* Unmap a set of streaming mode DMA translations. */
@@ -692,8 +644,10 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
692 unsigned long flags, ctx, i, npages; 644 unsigned long flags, ctx, i, npages;
693 u32 bus_addr; 645 u32 bus_addr;
694 646
695 if (direction == PCI_DMA_NONE) 647 if (unlikely(direction == PCI_DMA_NONE)) {
696 BUG(); 648 if (printk_ratelimit())
649 WARN_ON(1);
650 }
697 651
698 pcp = pdev->sysdata; 652 pcp = pdev->sysdata;
699 iommu = pcp->pbm->iommu; 653 iommu = pcp->pbm->iommu;
@@ -705,7 +659,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
705 if (sglist[i].dma_length == 0) 659 if (sglist[i].dma_length == 0)
706 break; 660 break;
707 i--; 661 i--;
708 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; 662 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
663 bus_addr) >> IO_PAGE_SHIFT;
709 664
710 base = iommu->page_table + 665 base = iommu->page_table +
711 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); 666 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
@@ -726,11 +681,11 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
726 if (strbuf->strbuf_enabled) 681 if (strbuf->strbuf_enabled)
727 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); 682 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
728 683
729 /* Step 2: Clear out first TSB entry. */ 684 /* Step 2: Clear out the TSB entries. */
730 iopte_make_dummy(iommu, base); 685 for (i = 0; i < npages; i++)
686 iopte_make_dummy(iommu, base + i);
731 687
732 free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, 688 free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
733 npages, ctx);
734 689
735 iommu_free_ctx(iommu, ctx); 690 iommu_free_ctx(iommu, ctx);
736 691
diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c
index 6ed1ef25e0ac..c03ed5f49d31 100644
--- a/arch/sparc64/kernel/pci_psycho.c
+++ b/arch/sparc64/kernel/pci_psycho.c
@@ -1207,13 +1207,9 @@ static void psycho_scan_bus(struct pci_controller_info *p)
1207static void psycho_iommu_init(struct pci_controller_info *p) 1207static void psycho_iommu_init(struct pci_controller_info *p)
1208{ 1208{
1209 struct pci_iommu *iommu = p->pbm_A.iommu; 1209 struct pci_iommu *iommu = p->pbm_A.iommu;
1210 unsigned long tsbbase, i; 1210 unsigned long i;
1211 u64 control; 1211 u64 control;
1212 1212
1213 /* Setup initial software IOMMU state. */
1214 spin_lock_init(&iommu->lock);
1215 iommu->ctx_lowest_free = 1;
1216
1217 /* Register addresses. */ 1213 /* Register addresses. */
1218 iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL; 1214 iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL;
1219 iommu->iommu_tsbbase = p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE; 1215 iommu->iommu_tsbbase = p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE;
@@ -1240,40 +1236,10 @@ static void psycho_iommu_init(struct pci_controller_info *p)
1240 /* Leave diag mode enabled for full-flushing done 1236 /* Leave diag mode enabled for full-flushing done
1241 * in pci_iommu.c 1237 * in pci_iommu.c
1242 */ 1238 */
1239 pci_iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff);
1243 1240
1244 iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); 1241 psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE,
1245 if (!iommu->dummy_page) { 1242 __pa(iommu->page_table));
1246 prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
1247 prom_halt();
1248 }
1249 memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
1250 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
1251
1252 /* Using assumed page size 8K with 128K entries we need 1MB iommu page
1253 * table (128K ioptes * 8 bytes per iopte). This is
1254 * page order 7 on UltraSparc.
1255 */
1256 tsbbase = __get_free_pages(GFP_KERNEL, get_order(IO_TSB_SIZE));
1257 if (!tsbbase) {
1258 prom_printf("PSYCHO_IOMMU: Error, gfp(tsb) failed.\n");
1259 prom_halt();
1260 }
1261 iommu->page_table = (iopte_t *)tsbbase;
1262 iommu->page_table_sz_bits = 17;
1263 iommu->page_table_map_base = 0xc0000000;
1264 iommu->dma_addr_mask = 0xffffffff;
1265 pci_iommu_table_init(iommu, IO_TSB_SIZE);
1266
1267 /* We start with no consistent mappings. */
1268 iommu->lowest_consistent_map =
1269 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
1270
1271 for (i = 0; i < PBM_NCLUSTERS; i++) {
1272 iommu->alloc_info[i].flush = 0;
1273 iommu->alloc_info[i].next = 0;
1274 }
1275
1276 psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, __pa(tsbbase));
1277 1243
1278 control = psycho_read(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL); 1244 control = psycho_read(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL);
1279 control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ); 1245 control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ);
@@ -1281,7 +1247,7 @@ static void psycho_iommu_init(struct pci_controller_info *p)
1281 psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL, control); 1247 psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL, control);
1282 1248
1283 /* If necessary, hook us up for starfire IRQ translations. */ 1249 /* If necessary, hook us up for starfire IRQ translations. */
1284 if(this_is_starfire) 1250 if (this_is_starfire)
1285 p->starfire_cookie = starfire_hookup(p->pbm_A.portid); 1251 p->starfire_cookie = starfire_hookup(p->pbm_A.portid);
1286 else 1252 else
1287 p->starfire_cookie = NULL; 1253 p->starfire_cookie = NULL;
diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c
index 0ee6bd5b9ac6..da8e1364194f 100644
--- a/arch/sparc64/kernel/pci_sabre.c
+++ b/arch/sparc64/kernel/pci_sabre.c
@@ -1267,13 +1267,9 @@ static void sabre_iommu_init(struct pci_controller_info *p,
1267 u32 dma_mask) 1267 u32 dma_mask)
1268{ 1268{
1269 struct pci_iommu *iommu = p->pbm_A.iommu; 1269 struct pci_iommu *iommu = p->pbm_A.iommu;
1270 unsigned long tsbbase, i, order; 1270 unsigned long i;
1271 u64 control; 1271 u64 control;
1272 1272
1273 /* Setup initial software IOMMU state. */
1274 spin_lock_init(&iommu->lock);
1275 iommu->ctx_lowest_free = 1;
1276
1277 /* Register addresses. */ 1273 /* Register addresses. */
1278 iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL; 1274 iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL;
1279 iommu->iommu_tsbbase = p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE; 1275 iommu->iommu_tsbbase = p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE;
@@ -1295,26 +1291,10 @@ static void sabre_iommu_init(struct pci_controller_info *p,
1295 /* Leave diag mode enabled for full-flushing done 1291 /* Leave diag mode enabled for full-flushing done
1296 * in pci_iommu.c 1292 * in pci_iommu.c
1297 */ 1293 */
1294 pci_iommu_table_init(iommu, tsbsize * 1024 * 8, dvma_offset, dma_mask);
1298 1295
1299 iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); 1296 sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE,
1300 if (!iommu->dummy_page) { 1297 __pa(iommu->page_table));
1301 prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
1302 prom_halt();
1303 }
1304 memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
1305 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
1306
1307 tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8));
1308 if (!tsbbase) {
1309 prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n");
1310 prom_halt();
1311 }
1312 iommu->page_table = (iopte_t *)tsbbase;
1313 iommu->page_table_map_base = dvma_offset;
1314 iommu->dma_addr_mask = dma_mask;
1315 pci_iommu_table_init(iommu, PAGE_SIZE << order);
1316
1317 sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase));
1318 1298
1319 control = sabre_read(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL); 1299 control = sabre_read(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL);
1320 control &= ~(SABRE_IOMMUCTRL_TSBSZ | SABRE_IOMMUCTRL_TBWSZ); 1300 control &= ~(SABRE_IOMMUCTRL_TSBSZ | SABRE_IOMMUCTRL_TBWSZ);
@@ -1322,11 +1302,9 @@ static void sabre_iommu_init(struct pci_controller_info *p,
1322 switch(tsbsize) { 1302 switch(tsbsize) {
1323 case 64: 1303 case 64:
1324 control |= SABRE_IOMMU_TSBSZ_64K; 1304 control |= SABRE_IOMMU_TSBSZ_64K;
1325 iommu->page_table_sz_bits = 16;
1326 break; 1305 break;
1327 case 128: 1306 case 128:
1328 control |= SABRE_IOMMU_TSBSZ_128K; 1307 control |= SABRE_IOMMU_TSBSZ_128K;
1329 iommu->page_table_sz_bits = 17;
1330 break; 1308 break;
1331 default: 1309 default:
1332 prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize); 1310 prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize);
@@ -1334,15 +1312,6 @@ static void sabre_iommu_init(struct pci_controller_info *p,
1334 break; 1312 break;
1335 } 1313 }
1336 sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL, control); 1314 sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL, control);
1337
1338 /* We start with no consistent mappings. */
1339 iommu->lowest_consistent_map =
1340 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
1341
1342 for (i = 0; i < PBM_NCLUSTERS; i++) {
1343 iommu->alloc_info[i].flush = 0;
1344 iommu->alloc_info[i].next = 0;
1345 }
1346} 1315}
1347 1316
1348static void pbm_register_toplevel_resources(struct pci_controller_info *p, 1317static void pbm_register_toplevel_resources(struct pci_controller_info *p,
diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c
index cae5b61fe2f0..d8c4e0919b4e 100644
--- a/arch/sparc64/kernel/pci_schizo.c
+++ b/arch/sparc64/kernel/pci_schizo.c
@@ -1765,7 +1765,7 @@ static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm)
1765static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) 1765static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
1766{ 1766{
1767 struct pci_iommu *iommu = pbm->iommu; 1767 struct pci_iommu *iommu = pbm->iommu;
1768 unsigned long tsbbase, i, tagbase, database, order; 1768 unsigned long i, tagbase, database;
1769 u32 vdma[2], dma_mask; 1769 u32 vdma[2], dma_mask;
1770 u64 control; 1770 u64 control;
1771 int err, tsbsize; 1771 int err, tsbsize;
@@ -1800,10 +1800,6 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
1800 prom_halt(); 1800 prom_halt();
1801 }; 1801 };
1802 1802
1803 /* Setup initial software IOMMU state. */
1804 spin_lock_init(&iommu->lock);
1805 iommu->ctx_lowest_free = 1;
1806
1807 /* Register addresses, SCHIZO has iommu ctx flushing. */ 1803 /* Register addresses, SCHIZO has iommu ctx flushing. */
1808 iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL; 1804 iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
1809 iommu->iommu_tsbbase = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE; 1805 iommu->iommu_tsbbase = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE;
@@ -1832,56 +1828,9 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
1832 /* Leave diag mode enabled for full-flushing done 1828 /* Leave diag mode enabled for full-flushing done
1833 * in pci_iommu.c 1829 * in pci_iommu.c
1834 */ 1830 */
1831 pci_iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
1835 1832
1836 iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); 1833 schizo_write(iommu->iommu_tsbbase, __pa(iommu->page_table));
1837 if (!iommu->dummy_page) {
1838 prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
1839 prom_halt();
1840 }
1841 memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
1842 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
1843
1844 /* Using assumed page size 8K with 128K entries we need 1MB iommu page
1845 * table (128K ioptes * 8 bytes per iopte). This is
1846 * page order 7 on UltraSparc.
1847 */
1848 order = get_order(tsbsize * 8 * 1024);
1849 tsbbase = __get_free_pages(GFP_KERNEL, order);
1850 if (!tsbbase) {
1851 prom_printf("%s: Error, gfp(tsb) failed.\n", pbm->name);
1852 prom_halt();
1853 }
1854
1855 iommu->page_table = (iopte_t *)tsbbase;
1856 iommu->page_table_map_base = vdma[0];
1857 iommu->dma_addr_mask = dma_mask;
1858 pci_iommu_table_init(iommu, PAGE_SIZE << order);
1859
1860 switch (tsbsize) {
1861 case 64:
1862 iommu->page_table_sz_bits = 16;
1863 break;
1864
1865 case 128:
1866 iommu->page_table_sz_bits = 17;
1867 break;
1868
1869 default:
1870 prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize);
1871 prom_halt();
1872 break;
1873 };
1874
1875 /* We start with no consistent mappings. */
1876 iommu->lowest_consistent_map =
1877 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
1878
1879 for (i = 0; i < PBM_NCLUSTERS; i++) {
1880 iommu->alloc_info[i].flush = 0;
1881 iommu->alloc_info[i].next = 0;
1882 }
1883
1884 schizo_write(iommu->iommu_tsbbase, __pa(tsbbase));
1885 1834
1886 control = schizo_read(iommu->iommu_control); 1835 control = schizo_read(iommu->iommu_control);
1887 control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ); 1836 control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ);
diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c
index 946cee0257ea..9e8362ea3104 100644
--- a/arch/sparc64/kernel/power.c
+++ b/arch/sparc64/kernel/power.c
@@ -17,6 +17,7 @@
17 17
18#include <asm/system.h> 18#include <asm/system.h>
19#include <asm/ebus.h> 19#include <asm/ebus.h>
20#include <asm/isa.h>
20#include <asm/auxio.h> 21#include <asm/auxio.h>
21 22
22#include <linux/unistd.h> 23#include <linux/unistd.h>
@@ -100,46 +101,83 @@ again:
100 return 0; 101 return 0;
101} 102}
102 103
103static int __init has_button_interrupt(struct linux_ebus_device *edev) 104static int __init has_button_interrupt(unsigned int irq, int prom_node)
104{ 105{
105 if (edev->irqs[0] == PCI_IRQ_NONE) 106 if (irq == PCI_IRQ_NONE)
106 return 0; 107 return 0;
107 if (!prom_node_has_property(edev->prom_node, "button")) 108 if (!prom_node_has_property(prom_node, "button"))
108 return 0; 109 return 0;
109 110
110 return 1; 111 return 1;
111} 112}
112 113
113void __init power_init(void) 114static int __init power_probe_ebus(struct resource **resp, unsigned int *irq_p, int *prom_node_p)
114{ 115{
115 struct linux_ebus *ebus; 116 struct linux_ebus *ebus;
116 struct linux_ebus_device *edev; 117 struct linux_ebus_device *edev;
118
119 for_each_ebus(ebus) {
120 for_each_ebusdev(edev, ebus) {
121 if (!strcmp(edev->prom_name, "power")) {
122 *resp = &edev->resource[0];
123 *irq_p = edev->irqs[0];
124 *prom_node_p = edev->prom_node;
125 return 0;
126 }
127 }
128 }
129 return -ENODEV;
130}
131
132static int __init power_probe_isa(struct resource **resp, unsigned int *irq_p, int *prom_node_p)
133{
134 struct sparc_isa_bridge *isa_bus;
135 struct sparc_isa_device *isa_dev;
136
137 for_each_isa(isa_bus) {
138 for_each_isadev(isa_dev, isa_bus) {
139 if (!strcmp(isa_dev->prom_name, "power")) {
140 *resp = &isa_dev->resource;
141 *irq_p = isa_dev->irq;
142 *prom_node_p = isa_dev->prom_node;
143 return 0;
144 }
145 }
146 }
147 return -ENODEV;
148}
149
150void __init power_init(void)
151{
152 struct resource *res = NULL;
153 unsigned int irq;
154 int prom_node;
117 static int invoked; 155 static int invoked;
118 156
119 if (invoked) 157 if (invoked)
120 return; 158 return;
121 invoked = 1; 159 invoked = 1;
122 160
123 for_each_ebus(ebus) { 161 if (!power_probe_ebus(&res, &irq, &prom_node))
124 for_each_ebusdev(edev, ebus) { 162 goto found;
125 if (!strcmp(edev->prom_name, "power")) 163
126 goto found; 164 if (!power_probe_isa(&res, &irq, &prom_node))
127 } 165 goto found;
128 } 166
129 return; 167 return;
130 168
131found: 169found:
132 power_reg = ioremap(edev->resource[0].start, 0x4); 170 power_reg = ioremap(res->start, 0x4);
133 printk("power: Control reg at %p ... ", power_reg); 171 printk("power: Control reg at %p ... ", power_reg);
134 poweroff_method = machine_halt; /* able to use the standard halt */ 172 poweroff_method = machine_halt; /* able to use the standard halt */
135 if (has_button_interrupt(edev)) { 173 if (has_button_interrupt(irq, prom_node)) {
136 if (kernel_thread(powerd, NULL, CLONE_FS) < 0) { 174 if (kernel_thread(powerd, NULL, CLONE_FS) < 0) {
137 printk("Failed to start power daemon.\n"); 175 printk("Failed to start power daemon.\n");
138 return; 176 return;
139 } 177 }
140 printk("powerd running.\n"); 178 printk("powerd running.\n");
141 179
142 if (request_irq(edev->irqs[0], 180 if (request_irq(irq,
143 power_handler, SA_SHIRQ, "power", NULL) < 0) 181 power_handler, SA_SHIRQ, "power", NULL) < 0)
144 printk("power: Error, cannot register IRQ handler.\n"); 182 printk("power: Error, cannot register IRQ handler.\n");
145 } else { 183 } else {
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index ecfb42a69a44..090dcca00d2a 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -312,32 +312,33 @@ kern_fpucheck: ldub [%g6 + TI_FPDEPTH], %l5
312 wr %g1, FPRS_FEF, %fprs 312 wr %g1, FPRS_FEF, %fprs
313 ldx [%o1 + %o5], %g1 313 ldx [%o1 + %o5], %g1
314 add %g6, TI_XFSR, %o1 314 add %g6, TI_XFSR, %o1
315 membar #StoreLoad | #LoadLoad
316 sll %o0, 8, %o2 315 sll %o0, 8, %o2
317 add %g6, TI_FPREGS, %o3 316 add %g6, TI_FPREGS, %o3
318 brz,pn %l6, 1f 317 brz,pn %l6, 1f
319 add %g6, TI_FPREGS+0x40, %o4 318 add %g6, TI_FPREGS+0x40, %o4
320 319
320 membar #Sync
321 ldda [%o3 + %o2] ASI_BLK_P, %f0 321 ldda [%o3 + %o2] ASI_BLK_P, %f0
322 ldda [%o4 + %o2] ASI_BLK_P, %f16 322 ldda [%o4 + %o2] ASI_BLK_P, %f16
323 membar #Sync
3231: andcc %l2, FPRS_DU, %g0 3241: andcc %l2, FPRS_DU, %g0
324 be,pn %icc, 1f 325 be,pn %icc, 1f
325 wr %g1, 0, %gsr 326 wr %g1, 0, %gsr
326 add %o2, 0x80, %o2 327 add %o2, 0x80, %o2
328 membar #Sync
327 ldda [%o3 + %o2] ASI_BLK_P, %f32 329 ldda [%o3 + %o2] ASI_BLK_P, %f32
328 ldda [%o4 + %o2] ASI_BLK_P, %f48 330 ldda [%o4 + %o2] ASI_BLK_P, %f48
329
3301: membar #Sync 3311: membar #Sync
331 ldx [%o1 + %o5], %fsr 332 ldx [%o1 + %o5], %fsr
3322: stb %l5, [%g6 + TI_FPDEPTH] 3332: stb %l5, [%g6 + TI_FPDEPTH]
333 ba,pt %xcc, rt_continue 334 ba,pt %xcc, rt_continue
334 nop 335 nop
3355: wr %g0, FPRS_FEF, %fprs 3365: wr %g0, FPRS_FEF, %fprs
336 membar #StoreLoad | #LoadLoad
337 sll %o0, 8, %o2 337 sll %o0, 8, %o2
338 338
339 add %g6, TI_FPREGS+0x80, %o3 339 add %g6, TI_FPREGS+0x80, %o3
340 add %g6, TI_FPREGS+0xc0, %o4 340 add %g6, TI_FPREGS+0xc0, %o4
341 membar #Sync
341 ldda [%o3 + %o2] ASI_BLK_P, %f32 342 ldda [%o3 + %o2] ASI_BLK_P, %f32
342 ldda [%o4 + %o2] ASI_BLK_P, %f48 343 ldda [%o4 + %o2] ASI_BLK_P, %f48
343 membar #Sync 344 membar #Sync
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 590df5a16f5a..b137fd63f5e1 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1001,13 +1001,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1001 preempt_enable(); 1001 preempt_enable();
1002} 1002}
1003 1003
1004extern unsigned long xcall_promstop;
1005
1006void smp_promstop_others(void)
1007{
1008 smp_cross_call(&xcall_promstop, 0, 0, 0);
1009}
1010
1011#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier 1004#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier
1012#define prof_counter(__cpu) cpu_data(__cpu).counter 1005#define prof_counter(__cpu) cpu_data(__cpu).counter
1013 1006