aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-02-27 02:24:22 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:11:16 -0500
commit56fb4df6da76c35dca22036174e2d1edef83ff1f (patch)
treeb39f152ec9ed682edceca965a85680fd4bf736a7 /arch
parent3c936465249f863f322154ff1aaa628b84ee5750 (diff)
[SPARC64]: Elminate all usage of hard-coded trap globals.
UltraSPARC has special sets of global registers which are switched to for certain trap types. There is one set for MMU related traps, one set of Interrupt Vector processing, and another set (called the Alternate globals) for all other trap types. For what seems like forever we've hard coded the values in some of these trap registers. Some examples include: 1) Interrupt Vector global %g6 holds current processors interrupt work struct where received interrupts are managed for IRQ handler dispatch. 2) MMU global %g7 holds the base of the page tables of the currently active address space. 3) Alternate global %g6 held the current_thread_info() value. Such hardcoding has resulted in some serious issues in many areas. There are some code sequences where having another register available would help clean up the implementation. Taking traps such as cross-calls from the OBP firmware requires some trick code sequences wherein we have to save away and restore all of the special sets of global registers when we enter/exit OBP. We were also using the IMMU TSB register on SMP to hold the per-cpu area base address, which doesn't work any longer now that we actually use the TSB facility of the cpu. The implementation is pretty straight forward. One tricky bit is getting the current processor ID as that is different on different cpu variants. We use a stub with a fancy calling convention which we patch at boot time. The calling convention is that the stub is branched to and the (PC - 4) to return to is in register %g1. The cpu number is left in %g6. This stub can be invoked by using the __GET_CPUID macro. We use an array of per-cpu trap state to store the current thread and physical address of the current address space's page tables. The TRAP_LOAD_THREAD_REG loads %g6 with the current thread from this table, it uses __GET_CPUID and also clobbers %g1. TRAP_LOAD_IRQ_WORK is used by the interrupt vector processing to load the current processor's IRQ software state into %g6. It also uses __GET_CPUID and clobbers %g1. Finally, TRAP_LOAD_PGD_PHYS loads the physical address base of the current address space's page tables into %g7, it clobbers %g1 and uses __GET_CPUID. Many refinements are possible, as well as some tuning, with this stuff in place. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/kernel/entry.S122
-rw-r--r--arch/sparc64/kernel/etrap.S18
-rw-r--r--arch/sparc64/kernel/head.S20
-rw-r--r--arch/sparc64/kernel/irq.c26
-rw-r--r--arch/sparc64/kernel/rtrap.S10
-rw-r--r--arch/sparc64/kernel/setup.c8
-rw-r--r--arch/sparc64/kernel/smp.c55
-rw-r--r--arch/sparc64/kernel/trampoline.S9
-rw-r--r--arch/sparc64/kernel/traps.c19
-rw-r--r--arch/sparc64/kernel/tsb.S26
-rw-r--r--arch/sparc64/kernel/ttable.S2
-rw-r--r--arch/sparc64/kernel/winfixup.S24
-rw-r--r--arch/sparc64/mm/ultra.S10
13 files changed, 192 insertions, 157 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index a73553ae7e53..906b64ffdb1b 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -50,7 +50,8 @@ do_fpdis:
50 add %g0, %g0, %g0 50 add %g0, %g0, %g0
51 ba,a,pt %xcc, rtrap_clr_l6 51 ba,a,pt %xcc, rtrap_clr_l6
52 52
531: ldub [%g6 + TI_FPSAVED], %g5 531: TRAP_LOAD_THREAD_REG
54 ldub [%g6 + TI_FPSAVED], %g5
54 wr %g0, FPRS_FEF, %fprs 55 wr %g0, FPRS_FEF, %fprs
55 andcc %g5, FPRS_FEF, %g0 56 andcc %g5, FPRS_FEF, %g0
56 be,a,pt %icc, 1f 57 be,a,pt %icc, 1f
@@ -189,6 +190,7 @@ fp_other_bounce:
189 .globl do_fpother_check_fitos 190 .globl do_fpother_check_fitos
190 .align 32 191 .align 32
191do_fpother_check_fitos: 192do_fpother_check_fitos:
193 TRAP_LOAD_THREAD_REG
192 sethi %hi(fp_other_bounce - 4), %g7 194 sethi %hi(fp_other_bounce - 4), %g7
193 or %g7, %lo(fp_other_bounce - 4), %g7 195 or %g7, %lo(fp_other_bounce - 4), %g7
194 196
@@ -353,8 +355,6 @@ do_fptrap_after_fsr:
353 * 355 *
354 * With this method we can do most of the cross-call tlb/cache 356 * With this method we can do most of the cross-call tlb/cache
355 * flushing very quickly. 357 * flushing very quickly.
356 *
357 * Current CPU's IRQ worklist table is locked into %g6, don't touch.
358 */ 358 */
359 .text 359 .text
360 .align 32 360 .align 32
@@ -378,6 +378,8 @@ do_ivec:
378 sllx %g2, %g4, %g2 378 sllx %g2, %g4, %g2
379 sllx %g4, 2, %g4 379 sllx %g4, 2, %g4
380 380
381 TRAP_LOAD_IRQ_WORK
382
381 lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ 383 lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */
382 stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ 384 stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */
383 stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */ 385 stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */
@@ -488,9 +490,24 @@ setcc:
488 retl 490 retl
489 stx %o1, [%o0 + PT_V9_TSTATE] 491 stx %o1, [%o0 + PT_V9_TSTATE]
490 492
491 .globl utrap, utrap_ill 493 .globl utrap_trap
492utrap: brz,pn %g1, etrap 494utrap_trap: /* %g3=handler,%g4=level */
495 TRAP_LOAD_THREAD_REG
496 ldx [%g6 + TI_UTRAPS], %g1
497 brnz,pt %g1, invoke_utrap
493 nop 498 nop
499
500 ba,pt %xcc, etrap
501 rd %pc, %g7
502 mov %l4, %o1
503 call bad_trap
504 add %sp, PTREGS_OFF, %o0
505 ba,pt %xcc, rtrap
506 clr %l6
507
508invoke_utrap:
509 sllx %g3, 3, %g3
510 ldx [%g1 + %g3], %g1
494 save %sp, -128, %sp 511 save %sp, -128, %sp
495 rdpr %tstate, %l6 512 rdpr %tstate, %l6
496 rdpr %cwp, %l7 513 rdpr %cwp, %l7
@@ -500,17 +517,6 @@ utrap: brz,pn %g1, etrap
500 rdpr %tnpc, %l7 517 rdpr %tnpc, %l7
501 wrpr %g1, 0, %tnpc 518 wrpr %g1, 0, %tnpc
502 done 519 done
503utrap_ill:
504 call bad_trap
505 add %sp, PTREGS_OFF, %o0
506 ba,pt %xcc, rtrap
507 clr %l6
508
509 /* XXX Here is stuff we still need to write... -DaveM XXX */
510 .globl netbsd_syscall
511netbsd_syscall:
512 retl
513 nop
514 520
515 /* We need to carefully read the error status, ACK 521 /* We need to carefully read the error status, ACK
516 * the errors, prevent recursive traps, and pass the 522 * the errors, prevent recursive traps, and pass the
@@ -1001,7 +1007,7 @@ dcpe_icpe_tl1_common:
1001 * %g3: scratch 1007 * %g3: scratch
1002 * %g4: AFSR 1008 * %g4: AFSR
1003 * %g5: AFAR 1009 * %g5: AFAR
1004 * %g6: current thread ptr 1010 * %g6: unused, will have current thread ptr after etrap
1005 * %g7: scratch 1011 * %g7: scratch
1006 */ 1012 */
1007__cheetah_log_error: 1013__cheetah_log_error:
@@ -1690,3 +1696,85 @@ __flushw_user:
1690 restore %g0, %g0, %g0 1696 restore %g0, %g0, %g0
16912: retl 16972: retl
1692 nop 1698 nop
1699
1700 /* Read cpu ID from hardware, return in %g6.
1701 * (callers_pc - 4) is in %g1. Patched at boot time.
1702 *
1703 * Default is spitfire implementation.
1704 *
1705 * The instruction sequence needs to be 5 instructions
1706 * in order to fit the longest implementation, which is
1707 * currently starfire.
1708 */
1709 .align 32
1710 .globl __get_cpu_id
1711__get_cpu_id:
1712 ldxa [%g0] ASI_UPA_CONFIG, %g6
1713 srlx %g6, 17, %g6
1714 jmpl %g1 + 0x4, %g0
1715 and %g6, 0x1f, %g6
1716 nop
1717
1718__get_cpu_id_cheetah_safari:
1719 ldxa [%g0] ASI_SAFARI_CONFIG, %g6
1720 srlx %g6, 17, %g6
1721 jmpl %g1 + 0x4, %g0
1722 and %g6, 0x3ff, %g6
1723 nop
1724
1725__get_cpu_id_cheetah_jbus:
1726 ldxa [%g0] ASI_JBUS_CONFIG, %g6
1727 srlx %g6, 17, %g6
1728 jmpl %g1 + 0x4, %g0
1729 and %g6, 0x1f, %g6
1730 nop
1731
1732__get_cpu_id_starfire:
1733 sethi %hi(0x1fff40000d0 >> 9), %g6
1734 sllx %g6, 9, %g6
1735 or %g6, 0xd0, %g6
1736 jmpl %g1 + 0x4, %g0
1737 lduwa [%g6] ASI_PHYS_BYPASS_EC_E, %g6
1738
1739 .globl per_cpu_patch
1740per_cpu_patch:
1741 sethi %hi(this_is_starfire), %o0
1742 lduw [%o0 + %lo(this_is_starfire)], %o1
1743 sethi %hi(__get_cpu_id_starfire), %o0
1744 brnz,pn %o1, 10f
1745 or %o0, %lo(__get_cpu_id_starfire), %o0
1746 sethi %hi(tlb_type), %o0
1747 lduw [%o0 + %lo(tlb_type)], %o1
1748 brz,pt %o1, 11f
1749 nop
1750 rdpr %ver, %o0
1751 srlx %o0, 32, %o0
1752 sethi %hi(0x003e0016), %o1
1753 or %o1, %lo(0x003e0016), %o1
1754 cmp %o0, %o1
1755 sethi %hi(__get_cpu_id_cheetah_jbus), %o0
1756 be,pn %icc, 10f
1757 or %o0, %lo(__get_cpu_id_cheetah_jbus), %o0
1758 sethi %hi(__get_cpu_id_cheetah_safari), %o0
1759 or %o0, %lo(__get_cpu_id_cheetah_safari), %o0
176010:
1761 sethi %hi(__get_cpu_id), %o1
1762 or %o1, %lo(__get_cpu_id), %o1
1763 lduw [%o0 + 0x00], %o2
1764 stw %o2, [%o1 + 0x00]
1765 flush %o1 + 0x00
1766 lduw [%o0 + 0x04], %o2
1767 stw %o2, [%o1 + 0x04]
1768 flush %o1 + 0x04
1769 lduw [%o0 + 0x08], %o2
1770 stw %o2, [%o1 + 0x08]
1771 flush %o1 + 0x08
1772 lduw [%o0 + 0x0c], %o2
1773 stw %o2, [%o1 + 0x0c]
1774 flush %o1 + 0x0c
1775 lduw [%o0 + 0x10], %o2
1776 stw %o2, [%o1 + 0x10]
1777 flush %o1 + 0x10
177811:
1779 retl
1780 nop
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 567dbb765c34..8b3b6d720ed5 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -31,6 +31,7 @@
31 .globl etrap, etrap_irq, etraptl1 31 .globl etrap, etrap_irq, etraptl1
32etrap: rdpr %pil, %g2 32etrap: rdpr %pil, %g2
33etrap_irq: 33etrap_irq:
34 TRAP_LOAD_THREAD_REG
34 rdpr %tstate, %g1 35 rdpr %tstate, %g1
35 sllx %g2, 20, %g3 36 sllx %g2, 20, %g3
36 andcc %g1, TSTATE_PRIV, %g0 37 andcc %g1, TSTATE_PRIV, %g0
@@ -98,11 +99,7 @@ etrap_irq:
98 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] 99 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
99 wrpr %g0, ETRAP_PSTATE2, %pstate 100 wrpr %g0, ETRAP_PSTATE2, %pstate
100 mov %l6, %g6 101 mov %l6, %g6
101#ifdef CONFIG_SMP 102 LOAD_PER_CPU_BASE(%g4, %g3)
102#error IMMU TSB usage must be fixed
103 mov TSB_REG, %g3
104 ldxa [%g3] ASI_IMMU, %g5
105#endif
106 jmpl %l2 + 0x4, %g0 103 jmpl %l2 + 0x4, %g0
107 ldx [%g6 + TI_TASK], %g4 104 ldx [%g6 + TI_TASK], %g4
108 105
@@ -126,6 +123,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
126 * 0x58 TL4's TT 123 * 0x58 TL4's TT
127 * 0x60 TL 124 * 0x60 TL
128 */ 125 */
126 TRAP_LOAD_THREAD_REG
129 sub %sp, ((4 * 8) * 4) + 8, %g2 127 sub %sp, ((4 * 8) * 4) + 8, %g2
130 rdpr %tl, %g1 128 rdpr %tl, %g1
131 129
@@ -179,7 +177,9 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
179 177
180 .align 64 178 .align 64
181 .globl scetrap 179 .globl scetrap
182scetrap: rdpr %pil, %g2 180scetrap:
181 TRAP_LOAD_THREAD_REG
182 rdpr %pil, %g2
183 rdpr %tstate, %g1 183 rdpr %tstate, %g1
184 sllx %g2, 20, %g3 184 sllx %g2, 20, %g3
185 andcc %g1, TSTATE_PRIV, %g0 185 andcc %g1, TSTATE_PRIV, %g0
@@ -248,11 +248,7 @@ scetrap: rdpr %pil, %g2
248 stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] 248 stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
249 mov %l6, %g6 249 mov %l6, %g6
250 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] 250 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
251#ifdef CONFIG_SMP 251 LOAD_PER_CPU_BASE(%g4, %g3)
252#error IMMU TSB usage must be fixed
253 mov TSB_REG, %g3
254 ldxa [%g3] ASI_IMMU, %g5
255#endif
256 ldx [%g6 + TI_TASK], %g4 252 ldx [%g6 + TI_TASK], %g4
257 done 253 done
258 254
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index d00e20693be1..82ce5bced9c7 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -26,6 +26,7 @@
26#include <asm/head.h> 26#include <asm/head.h>
27#include <asm/ttable.h> 27#include <asm/ttable.h>
28#include <asm/mmu.h> 28#include <asm/mmu.h>
29#include <asm/cpudata.h>
29 30
30/* This section from from _start to sparc64_boot_end should fit into 31/* This section from from _start to sparc64_boot_end should fit into
31 * 0x0000000000404000 to 0x0000000000408000. 32 * 0x0000000000404000 to 0x0000000000408000.
@@ -421,24 +422,6 @@ setup_trap_table:
421 stxa %g2, [%g1] ASI_DMMU 422 stxa %g2, [%g1] ASI_DMMU
422 membar #Sync 423 membar #Sync
423 424
424 /* The Linux trap handlers expect various trap global registers
425 * to be setup with some fixed values. So here we set these
426 * up very carefully. These globals are:
427 *
428 * Alternate Globals (PSTATE_AG):
429 *
430 * %g6 --> current_thread_info()
431 *
432 * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()):
433 *
434 * %g6 --> __irq_work[smp_processor_id()]
435 */
436
437 rdpr %pstate, %o1
438 mov %g6, %o2
439 wrpr %o1, PSTATE_AG, %pstate
440 mov %o2, %g6
441
442 /* Kill PROM timer */ 425 /* Kill PROM timer */
443 sethi %hi(0x80000000), %o2 426 sethi %hi(0x80000000), %o2
444 sllx %o2, 32, %o2 427 sllx %o2, 32, %o2
@@ -457,7 +440,6 @@ setup_trap_table:
457 440
4582: 4412:
459 wrpr %g0, %g0, %wstate 442 wrpr %g0, %g0, %wstate
460 wrpr %o1, 0x0, %pstate
461 443
462 call init_irqwork_curcpu 444 call init_irqwork_curcpu
463 nop 445 nop
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index f7490ef629b9..3e48af2769d4 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -848,33 +848,9 @@ static void kill_prom_timer(void)
848 848
849void init_irqwork_curcpu(void) 849void init_irqwork_curcpu(void)
850{ 850{
851 register struct irq_work_struct *workp asm("o2");
852 register unsigned long tmp asm("o3");
853 int cpu = hard_smp_processor_id(); 851 int cpu = hard_smp_processor_id();
854 852
855 memset(__irq_work + cpu, 0, sizeof(*workp)); 853 memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct));
856
857 /* Make sure we are called with PSTATE_IE disabled. */
858 __asm__ __volatile__("rdpr %%pstate, %0\n\t"
859 : "=r" (tmp));
860 if (tmp & PSTATE_IE) {
861 prom_printf("BUG: init_irqwork_curcpu() called with "
862 "PSTATE_IE enabled, bailing.\n");
863 __asm__ __volatile__("mov %%i7, %0\n\t"
864 : "=r" (tmp));
865 prom_printf("BUG: Called from %lx\n", tmp);
866 prom_halt();
867 }
868
869 /* Set interrupt globals. */
870 workp = &__irq_work[cpu];
871 __asm__ __volatile__(
872 "rdpr %%pstate, %0\n\t"
873 "wrpr %0, %1, %%pstate\n\t"
874 "mov %2, %%g6\n\t"
875 "wrpr %0, 0x0, %%pstate\n\t"
876 : "=&r" (tmp)
877 : "i" (PSTATE_IG), "r" (workp));
878} 854}
879 855
880/* Only invoked on boot processor. */ 856/* Only invoked on boot processor. */
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index 213eb4a9d8a4..5a62ec5d531c 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -223,12 +223,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
223 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 223 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
224 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 224 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
225 ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 225 ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
226#ifdef CONFIG_SMP 226 brz,pt %l3, 1f
227#error IMMU TSB usage must be fixed 227 nop
228 mov TSB_REG, %g6 228 /* Must do this before thread reg is clobbered below. */
229 brnz,a,pn %l3, 1f 229 LOAD_PER_CPU_BASE(%g6, %g7)
230 ldxa [%g6] ASI_IMMU, %g5
231#endif
2321: 2301:
233 ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 231 ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
234 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 232 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 158bd31e15b7..59a70301a6cf 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -507,6 +507,11 @@ void __init setup_arch(char **cmdline_p)
507 /* Work out if we are starfire early on */ 507 /* Work out if we are starfire early on */
508 check_if_starfire(); 508 check_if_starfire();
509 509
510 /* Now we know enough to patch the __get_cpu_id()
511 * trampoline used by trap code.
512 */
513 per_cpu_patch();
514
510 boot_flags_init(*cmdline_p); 515 boot_flags_init(*cmdline_p);
511 516
512 idprom_init(); 517 idprom_init();
@@ -545,6 +550,9 @@ void __init setup_arch(char **cmdline_p)
545 smp_setup_cpu_possible_map(); 550 smp_setup_cpu_possible_map();
546 551
547 paging_init(); 552 paging_init();
553
554 /* Get boot processor trap_block[] setup. */
555 init_cur_cpu_trap();
548} 556}
549 557
550static int __init set_preferred_console(void) 558static int __init set_preferred_console(void)
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index d2d3369e7b5d..8c245859d212 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -38,6 +38,7 @@
38#include <asm/timer.h> 38#include <asm/timer.h>
39#include <asm/starfire.h> 39#include <asm/starfire.h>
40#include <asm/tlb.h> 40#include <asm/tlb.h>
41#include <asm/sections.h>
41 42
42extern void calibrate_delay(void); 43extern void calibrate_delay(void);
43 44
@@ -87,10 +88,6 @@ void __init smp_store_cpu_info(int id)
87 cpu_data(id).clock_tick = prom_getintdefault(cpu_node, 88 cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
88 "clock-frequency", 0); 89 "clock-frequency", 0);
89 90
90 cpu_data(id).pgcache_size = 0;
91 cpu_data(id).pte_cache[0] = NULL;
92 cpu_data(id).pte_cache[1] = NULL;
93 cpu_data(id).pgd_cache = NULL;
94 cpu_data(id).idle_volume = 1; 91 cpu_data(id).idle_volume = 1;
95 92
96 cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", 93 cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",
@@ -121,26 +118,15 @@ static volatile unsigned long callin_flag = 0;
121 118
122extern void inherit_locked_prom_mappings(int save_p); 119extern void inherit_locked_prom_mappings(int save_p);
123 120
124static inline void cpu_setup_percpu_base(unsigned long cpu_id)
125{
126#error IMMU TSB usage must be fixed
127 __asm__ __volatile__("mov %0, %%g5\n\t"
128 "stxa %0, [%1] %2\n\t"
129 "membar #Sync"
130 : /* no outputs */
131 : "r" (__per_cpu_offset(cpu_id)),
132 "r" (TSB_REG), "i" (ASI_IMMU));
133}
134
135void __init smp_callin(void) 121void __init smp_callin(void)
136{ 122{
137 int cpuid = hard_smp_processor_id(); 123 int cpuid = hard_smp_processor_id();
138 124
139 inherit_locked_prom_mappings(0); 125 inherit_locked_prom_mappings(0);
140 126
141 __flush_tlb_all(); 127 __local_per_cpu_offset = __per_cpu_offset(cpuid);
142 128
143 cpu_setup_percpu_base(cpuid); 129 __flush_tlb_all();
144 130
145 smp_setup_percpu_timer(); 131 smp_setup_percpu_timer();
146 132
@@ -1107,12 +1093,15 @@ void __init smp_setup_cpu_possible_map(void)
1107 1093
1108void __devinit smp_prepare_boot_cpu(void) 1094void __devinit smp_prepare_boot_cpu(void)
1109{ 1095{
1110 if (hard_smp_processor_id() >= NR_CPUS) { 1096 int cpu = hard_smp_processor_id();
1097
1098 if (cpu >= NR_CPUS) {
1111 prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); 1099 prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
1112 prom_halt(); 1100 prom_halt();
1113 } 1101 }
1114 1102
1115 current_thread_info()->cpu = hard_smp_processor_id(); 1103 current_thread_info()->cpu = cpu;
1104 __local_per_cpu_offset = __per_cpu_offset(cpu);
1116 1105
1117 cpu_set(smp_processor_id(), cpu_online_map); 1106 cpu_set(smp_processor_id(), cpu_online_map);
1118 cpu_set(smp_processor_id(), phys_cpu_present_map); 1107 cpu_set(smp_processor_id(), phys_cpu_present_map);
@@ -1173,12 +1162,9 @@ void __init setup_per_cpu_areas(void)
1173{ 1162{
1174 unsigned long goal, size, i; 1163 unsigned long goal, size, i;
1175 char *ptr; 1164 char *ptr;
1176 /* Created by linker magic */
1177 extern char __per_cpu_start[], __per_cpu_end[];
1178 1165
1179 /* Copy section for each CPU (we discard the original) */ 1166 /* Copy section for each CPU (we discard the original) */
1180 goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); 1167 goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
1181
1182#ifdef CONFIG_MODULES 1168#ifdef CONFIG_MODULES
1183 if (goal < PERCPU_ENOUGH_ROOM) 1169 if (goal < PERCPU_ENOUGH_ROOM)
1184 goal = PERCPU_ENOUGH_ROOM; 1170 goal = PERCPU_ENOUGH_ROOM;
@@ -1187,31 +1173,10 @@ void __init setup_per_cpu_areas(void)
1187 for (size = 1UL; size < goal; size <<= 1UL) 1173 for (size = 1UL; size < goal; size <<= 1UL)
1188 __per_cpu_shift++; 1174 __per_cpu_shift++;
1189 1175
1190 /* Make sure the resulting __per_cpu_base value 1176 ptr = alloc_bootmem(size * NR_CPUS);
1191 * will fit in the 43-bit sign extended IMMU
1192 * TSB register.
1193 */
1194 ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE,
1195 (unsigned long) __per_cpu_start);
1196 1177
1197 __per_cpu_base = ptr - __per_cpu_start; 1178 __per_cpu_base = ptr - __per_cpu_start;
1198 1179
1199 if ((__per_cpu_shift < PAGE_SHIFT) ||
1200 (__per_cpu_base & ~PAGE_MASK) ||
1201 (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) {
1202 prom_printf("PER_CPU: Invalid layout, "
1203 "ptr[%p] shift[%lx] base[%lx]\n",
1204 ptr, __per_cpu_shift, __per_cpu_base);
1205 prom_halt();
1206 }
1207
1208 for (i = 0; i < NR_CPUS; i++, ptr += size) 1180 for (i = 0; i < NR_CPUS; i++, ptr += size)
1209 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 1181 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
1210
1211 /* Finally, load in the boot cpu's base value.
1212 * We abuse the IMMU TSB register for trap handler
1213 * entry and exit loading of %g5. That is why it
1214 * has to be page aligned.
1215 */
1216 cpu_setup_percpu_base(hard_smp_processor_id());
1217} 1182}
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 782d8c4973e4..18c333f841e3 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -287,21 +287,18 @@ do_unlock:
287 wrpr %g0, 0, %wstate 287 wrpr %g0, 0, %wstate
288 wrpr %g0, 0, %tl 288 wrpr %g0, 0, %tl
289 289
290 /* Setup the trap globals, then we can resurface. */ 290 /* Load TBA, then we can resurface. */
291 rdpr %pstate, %o1
292 mov %g6, %o2
293 wrpr %o1, PSTATE_AG, %pstate
294 sethi %hi(sparc64_ttable_tl0), %g5 291 sethi %hi(sparc64_ttable_tl0), %g5
295 wrpr %g5, %tba 292 wrpr %g5, %tba
296 mov %o2, %g6
297 293
298 wrpr %o1, 0x0, %pstate
299 ldx [%g6 + TI_TASK], %g4 294 ldx [%g6 + TI_TASK], %g4
300 295
301 wrpr %g0, 0, %wstate 296 wrpr %g0, 0, %wstate
302 297
303 call init_irqwork_curcpu 298 call init_irqwork_curcpu
304 nop 299 nop
300 call init_cur_cpu_trap
301 nop
305 302
306 /* Start using proper page size encodings in ctx register. */ 303 /* Start using proper page size encodings in ctx register. */
307 sethi %hi(sparc64_kern_pri_context), %g3 304 sethi %hi(sparc64_kern_pri_context), %g3
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 8d44ae5a15e3..f47f4874253c 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2130,7 +2130,22 @@ void do_getpsr(struct pt_regs *regs)
2130 } 2130 }
2131} 2131}
2132 2132
2133struct trap_per_cpu trap_block[NR_CPUS];
2134
2135/* This can get invoked before sched_init() so play it super safe
2136 * and use hard_smp_processor_id().
2137 */
2138void init_cur_cpu_trap(void)
2139{
2140 int cpu = hard_smp_processor_id();
2141 struct trap_per_cpu *p = &trap_block[cpu];
2142
2143 p->thread = current_thread_info();
2144 p->pgd_paddr = 0;
2145}
2146
2133extern void thread_info_offsets_are_bolixed_dave(void); 2147extern void thread_info_offsets_are_bolixed_dave(void);
2148extern void trap_per_cpu_offsets_are_bolixed_dave(void);
2134 2149
2135/* Only invoked on boot processor. */ 2150/* Only invoked on boot processor. */
2136void __init trap_init(void) 2151void __init trap_init(void)
@@ -2165,6 +2180,10 @@ void __init trap_init(void)
2165 (TI_FPREGS & (64 - 1))) 2180 (TI_FPREGS & (64 - 1)))
2166 thread_info_offsets_are_bolixed_dave(); 2181 thread_info_offsets_are_bolixed_dave();
2167 2182
2183 if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
2184 TRAP_PER_CPU_PGD_PADDR != offsetof(struct trap_per_cpu, pgd_paddr))
2185 trap_per_cpu_offsets_are_bolixed_dave();
2186
2168 /* Attach to the address space of init_task. On SMP we 2187 /* Attach to the address space of init_task. On SMP we
2169 * do this in smp.c:smp_callin for other cpus. 2188 * do this in smp.c:smp_callin for other cpus.
2170 */ 2189 */
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index 44b9e6fed09f..50752c518773 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -36,6 +36,15 @@ tsb_miss_itlb:
36 nop 36 nop
37 37
38tsb_miss_page_table_walk: 38tsb_miss_page_table_walk:
39 /* This clobbers %g1 and %g6, preserve them... */
40 mov %g1, %g5
41 mov %g6, %g2
42
43 TRAP_LOAD_PGD_PHYS
44
45 mov %g2, %g6
46 mov %g5, %g1
47
39 USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) 48 USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
40 49
41tsb_reload: 50tsb_reload:
@@ -112,15 +121,20 @@ winfix_trampoline:
112 * %o0: page table physical address 121 * %o0: page table physical address
113 * %o1: TSB address 122 * %o1: TSB address
114 */ 123 */
124 .align 32
115 .globl tsb_context_switch 125 .globl tsb_context_switch
116tsb_context_switch: 126tsb_context_switch:
117 wrpr %g0, PSTATE_MG | PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV, %pstate 127 rdpr %pstate, %o5
128 wrpr %o5, PSTATE_IE, %pstate
118 129
119 /* Set page table base alternate global. */ 130 ldub [%g6 + TI_CPU], %o3
120 mov %o0, %g7 131 sethi %hi(trap_block), %o4
132 sllx %o3, TRAP_BLOCK_SZ_SHIFT, %o3
133 or %o4, %lo(trap_block), %o4
134 add %o4, %o3, %o4
135 stx %o0, [%o4 + TRAP_PER_CPU_PGD_PADDR]
121 136
122 /* XXX can this happen? */ 137 brgez %o1, 9f
123 brz,pn %o1, 9f
124 nop 138 nop
125 139
126 /* Lock TSB into D-TLB. */ 140 /* Lock TSB into D-TLB. */
@@ -163,7 +177,7 @@ tsb_context_switch:
163 membar #Sync 177 membar #Sync
164 178
1659: 1799:
166 wrpr %g0, PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE, %pstate 180 wrpr %o5, %pstate
167 181
168 retl 182 retl
169 mov %o2, %o0 183 mov %o2, %o0
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
index 56f060c8fbf0..2fb7a33993c0 100644
--- a/arch/sparc64/kernel/ttable.S
+++ b/arch/sparc64/kernel/ttable.S
@@ -128,7 +128,7 @@ tl0_flushw: FLUSH_WINDOW_TRAP
128tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) 128tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107)
129 .globl tl0_solaris 129 .globl tl0_solaris
130tl0_solaris: SOLARIS_SYSCALL_TRAP 130tl0_solaris: SOLARIS_SYSCALL_TRAP
131tl0_netbsd: NETBSD_SYSCALL_TRAP 131tl0_resv109: BTRAP(0x109)
132tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e) 132tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e)
133tl0_resv10f: BTRAP(0x10f) 133tl0_resv10f: BTRAP(0x10f)
134tl0_linux32: LINUX_32BIT_SYSCALL_TRAP 134tl0_linux32: LINUX_32BIT_SYSCALL_TRAP
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index f5d93aa99cbb..de588036df43 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -39,6 +39,7 @@ set_pcontext:
39 */ 39 */
40 .globl fill_fixup, spill_fixup 40 .globl fill_fixup, spill_fixup
41fill_fixup: 41fill_fixup:
42 TRAP_LOAD_THREAD_REG
42 rdpr %tstate, %g1 43 rdpr %tstate, %g1
43 andcc %g1, TSTATE_PRIV, %g0 44 andcc %g1, TSTATE_PRIV, %g0
44 or %g4, FAULT_CODE_WINFIXUP, %g4 45 or %g4, FAULT_CODE_WINFIXUP, %g4
@@ -84,11 +85,7 @@ fill_fixup:
84 wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate 85 wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
85 mov %o7, %g6 86 mov %o7, %g6
86 ldx [%g6 + TI_TASK], %g4 87 ldx [%g6 + TI_TASK], %g4
87#ifdef CONFIG_SMP 88 LOAD_PER_CPU_BASE(%g1, %g2)
88#error IMMU TSB usage must be fixed
89 mov TSB_REG, %g1
90 ldxa [%g1] ASI_IMMU, %g5
91#endif
92 89
93 /* This is the same as below, except we handle this a bit special 90 /* This is the same as below, except we handle this a bit special
94 * since we must preserve %l5 and %l6, see comment above. 91 * since we must preserve %l5 and %l6, see comment above.
@@ -107,6 +104,7 @@ fill_fixup:
107 * do not touch %g7 or %g2 so we handle the two cases fine. 104 * do not touch %g7 or %g2 so we handle the two cases fine.
108 */ 105 */
109spill_fixup: 106spill_fixup:
107 TRAP_LOAD_THREAD_REG
110 ldx [%g6 + TI_FLAGS], %g1 108 ldx [%g6 + TI_FLAGS], %g1
111 andcc %g1, _TIF_32BIT, %g0 109 andcc %g1, _TIF_32BIT, %g0
112 ldub [%g6 + TI_WSAVED], %g1 110 ldub [%g6 + TI_WSAVED], %g1
@@ -182,6 +180,7 @@ winfix_mna:
182 wrpr %g3, %tnpc 180 wrpr %g3, %tnpc
183 done 181 done
184fill_fixup_mna: 182fill_fixup_mna:
183 TRAP_LOAD_THREAD_REG
185 rdpr %tstate, %g1 184 rdpr %tstate, %g1
186 andcc %g1, TSTATE_PRIV, %g0 185 andcc %g1, TSTATE_PRIV, %g0
187 be,pt %xcc, window_mna_from_user_common 186 be,pt %xcc, window_mna_from_user_common
@@ -209,17 +208,14 @@ fill_fixup_mna:
209 wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate 208 wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
210 mov %o7, %g6 ! Get current back. 209 mov %o7, %g6 ! Get current back.
211 ldx [%g6 + TI_TASK], %g4 ! Finish it. 210 ldx [%g6 + TI_TASK], %g4 ! Finish it.
212#ifdef CONFIG_SMP 211 LOAD_PER_CPU_BASE(%g1, %g2)
213#error IMMU TSB usage must be fixed
214 mov TSB_REG, %g1
215 ldxa [%g1] ASI_IMMU, %g5
216#endif
217 call mem_address_unaligned 212 call mem_address_unaligned
218 add %sp, PTREGS_OFF, %o0 213 add %sp, PTREGS_OFF, %o0
219 214
220 b,pt %xcc, rtrap 215 b,pt %xcc, rtrap
221 nop ! yes, the nop is correct 216 nop ! yes, the nop is correct
222spill_fixup_mna: 217spill_fixup_mna:
218 TRAP_LOAD_THREAD_REG
223 ldx [%g6 + TI_FLAGS], %g1 219 ldx [%g6 + TI_FLAGS], %g1
224 andcc %g1, _TIF_32BIT, %g0 220 andcc %g1, _TIF_32BIT, %g0
225 ldub [%g6 + TI_WSAVED], %g1 221 ldub [%g6 + TI_WSAVED], %g1
@@ -287,6 +283,7 @@ winfix_dax:
287 wrpr %g3, %tnpc 283 wrpr %g3, %tnpc
288 done 284 done
289fill_fixup_dax: 285fill_fixup_dax:
286 TRAP_LOAD_THREAD_REG
290 rdpr %tstate, %g1 287 rdpr %tstate, %g1
291 andcc %g1, TSTATE_PRIV, %g0 288 andcc %g1, TSTATE_PRIV, %g0
292 be,pt %xcc, window_dax_from_user_common 289 be,pt %xcc, window_dax_from_user_common
@@ -314,17 +311,14 @@ fill_fixup_dax:
314 wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate 311 wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
315 mov %o7, %g6 ! Get current back. 312 mov %o7, %g6 ! Get current back.
316 ldx [%g6 + TI_TASK], %g4 ! Finish it. 313 ldx [%g6 + TI_TASK], %g4 ! Finish it.
317#ifdef CONFIG_SMP 314 LOAD_PER_CPU_BASE(%g1, %g2)
318#error IMMU TSB usage must be fixed
319 mov TSB_REG, %g1
320 ldxa [%g1] ASI_IMMU, %g5
321#endif
322 call spitfire_data_access_exception 315 call spitfire_data_access_exception
323 add %sp, PTREGS_OFF, %o0 316 add %sp, PTREGS_OFF, %o0
324 317
325 b,pt %xcc, rtrap 318 b,pt %xcc, rtrap
326 nop ! yes, the nop is correct 319 nop ! yes, the nop is correct
327spill_fixup_dax: 320spill_fixup_dax:
321 TRAP_LOAD_THREAD_REG
328 ldx [%g6 + TI_FLAGS], %g1 322 ldx [%g6 + TI_FLAGS], %g1
329 andcc %g1, _TIF_32BIT, %g0 323 andcc %g1, _TIF_32BIT, %g0
330 ldub [%g6 + TI_WSAVED], %g1 324 ldub [%g6 + TI_WSAVED], %g1
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 22791f29552e..a87394824ec2 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -295,12 +295,10 @@ cheetah_patch_cachetlbops:
295 * %g1 address arg 1 (tlb page and range flushes) 295 * %g1 address arg 1 (tlb page and range flushes)
296 * %g7 address arg 2 (tlb range flush only) 296 * %g7 address arg 2 (tlb range flush only)
297 * 297 *
298 * %g6 ivector table, don't touch 298 * %g6 scratch 1
299 * %g2 scratch 1 299 * %g2 scratch 2
300 * %g3 scratch 2 300 * %g3 scratch 3
301 * %g4 scratch 3 301 * %g4 scratch 4
302 *
303 * TODO: Make xcall TLB range flushes use the tricks above... -DaveM
304 */ 302 */
305 .align 32 303 .align 32
306 .globl xcall_flush_tlb_mm 304 .globl xcall_flush_tlb_mm