diff options
author | David S. Miller <davem@davemloft.net> | 2006-02-27 02:24:22 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-03-20 04:11:16 -0500 |
commit | 56fb4df6da76c35dca22036174e2d1edef83ff1f (patch) | |
tree | b39f152ec9ed682edceca965a85680fd4bf736a7 /include/asm-sparc64 | |
parent | 3c936465249f863f322154ff1aaa628b84ee5750 (diff) |
[SPARC64]: Elminate all usage of hard-coded trap globals.
UltraSPARC has special sets of global registers which are switched to
for certain trap types. There is one set for MMU related traps, one
set of Interrupt Vector processing, and another set (called the
Alternate globals) for all other trap types.
For what seems like forever we've hard coded the values in some of
these trap registers. Some examples include:
1) Interrupt Vector global %g6 holds current processors interrupt
work struct where received interrupts are managed for IRQ handler
dispatch.
2) MMU global %g7 holds the base of the page tables of the currently
active address space.
3) Alternate global %g6 held the current_thread_info() value.
Such hardcoding has resulted in some serious issues in many areas.
There are some code sequences where having another register available
would help clean up the implementation. Taking traps such as
cross-calls from the OBP firmware requires some trick code sequences
wherein we have to save away and restore all of the special sets of
global registers when we enter/exit OBP.
We were also using the IMMU TSB register on SMP to hold the per-cpu
area base address, which doesn't work any longer now that we actually
use the TSB facility of the cpu.
The implementation is pretty straight forward. One tricky bit is
getting the current processor ID as that is different on different cpu
variants. We use a stub with a fancy calling convention which we
patch at boot time. The calling convention is that the stub is
branched to and the (PC - 4) to return to is in register %g1. The cpu
number is left in %g6. This stub can be invoked by using the
__GET_CPUID macro.
We use an array of per-cpu trap state to store the current thread and
physical address of the current address space's page tables. The
TRAP_LOAD_THREAD_REG loads %g6 with the current thread from this
table, it uses __GET_CPUID and also clobbers %g1.
TRAP_LOAD_IRQ_WORK is used by the interrupt vector processing to load
the current processor's IRQ software state into %g6. It also uses
__GET_CPUID and clobbers %g1.
Finally, TRAP_LOAD_PGD_PHYS loads the physical address base of the
current address space's page tables into %g7, it clobbers %g1 and uses
__GET_CPUID.
Many refinements are possible, as well as some tuning, with this stuff
in place.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/asm-sparc64')
-rw-r--r-- | include/asm-sparc64/cpudata.h | 86 | ||||
-rw-r--r-- | include/asm-sparc64/system.h | 2 | ||||
-rw-r--r-- | include/asm-sparc64/ttable.h | 18 |
3 files changed, 95 insertions, 11 deletions
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index f7c0faede8b8..6c57cbb9a7d1 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h | |||
@@ -1,12 +1,15 @@ | |||
1 | /* cpudata.h: Per-cpu parameters. | 1 | /* cpudata.h: Per-cpu parameters. |
2 | * | 2 | * |
3 | * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com) | 3 | * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #ifndef _SPARC64_CPUDATA_H | 6 | #ifndef _SPARC64_CPUDATA_H |
7 | #define _SPARC64_CPUDATA_H | 7 | #define _SPARC64_CPUDATA_H |
8 | 8 | ||
9 | #ifndef __ASSEMBLY__ | ||
10 | |||
9 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
12 | #include <linux/threads.h> | ||
10 | 13 | ||
11 | typedef struct { | 14 | typedef struct { |
12 | /* Dcache line 1 */ | 15 | /* Dcache line 1 */ |
@@ -32,4 +35,85 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); | |||
32 | #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) | 35 | #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) |
33 | #define local_cpu_data() __get_cpu_var(__cpu_data) | 36 | #define local_cpu_data() __get_cpu_var(__cpu_data) |
34 | 37 | ||
38 | /* Trap handling code needs to get at a few critical values upon | ||
39 | * trap entry and to process TSB misses. These cannot be in the | ||
40 | * per_cpu() area as we really need to lock them into the TLB and | ||
41 | * thus make them part of the main kernel image. As a result we | ||
42 | * try to make this as small as possible. | ||
43 | * | ||
44 | * This is padded out and aligned to 64-bytes to avoid false sharing | ||
45 | * on SMP. | ||
46 | */ | ||
47 | |||
48 | /* If you modify the size of this structure, please update | ||
49 | * TRAP_BLOCK_SZ_SHIFT below. | ||
50 | */ | ||
51 | struct thread_info; | ||
52 | struct trap_per_cpu { | ||
53 | /* D-cache line 1 */ | ||
54 | struct thread_info *thread; | ||
55 | unsigned long pgd_paddr; | ||
56 | unsigned long __pad1[2]; | ||
57 | |||
58 | /* D-cache line 2 */ | ||
59 | unsigned long __pad2[4]; | ||
60 | } __attribute__((aligned(64))); | ||
61 | extern struct trap_per_cpu trap_block[NR_CPUS]; | ||
62 | extern void init_cur_cpu_trap(void); | ||
63 | extern void per_cpu_patch(void); | ||
64 | |||
65 | #endif /* !(__ASSEMBLY__) */ | ||
66 | |||
67 | #define TRAP_PER_CPU_THREAD 0x00 | ||
68 | #define TRAP_PER_CPU_PGD_PADDR 0x08 | ||
69 | |||
70 | #define TRAP_BLOCK_SZ_SHIFT 6 | ||
71 | |||
72 | /* Clobbers %g1, loads %g6 with local processor's cpuid */ | ||
73 | #define __GET_CPUID \ | ||
74 | ba,pt %xcc, __get_cpu_id; \ | ||
75 | rd %pc, %g1; | ||
76 | |||
77 | /* Clobbers %g1, current address space PGD phys address into %g7. */ | ||
78 | #define TRAP_LOAD_PGD_PHYS \ | ||
79 | __GET_CPUID \ | ||
80 | sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ | ||
81 | sethi %hi(trap_block), %g7; \ | ||
82 | or %g7, %lo(trap_block), %g7; \ | ||
83 | add %g7, %g6, %g7; \ | ||
84 | ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; | ||
85 | |||
86 | /* Clobbers %g1, loads local processor's IRQ work area into %g6. */ | ||
87 | #define TRAP_LOAD_IRQ_WORK \ | ||
88 | __GET_CPUID \ | ||
89 | sethi %hi(__irq_work), %g1; \ | ||
90 | sllx %g6, 6, %g6; \ | ||
91 | or %g1, %lo(__irq_work), %g1; \ | ||
92 | add %g1, %g6, %g6; | ||
93 | |||
94 | /* Clobbers %g1, loads %g6 with current thread info pointer. */ | ||
95 | #define TRAP_LOAD_THREAD_REG \ | ||
96 | __GET_CPUID \ | ||
97 | sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ | ||
98 | sethi %hi(trap_block), %g1; \ | ||
99 | or %g1, %lo(trap_block), %g1; \ | ||
100 | ldx [%g1 + %g6], %g6; | ||
101 | |||
102 | /* Given the current thread info pointer in %g6, load the per-cpu | ||
103 | * area base of the current processor into %g5. REG1 and REG2 are | ||
104 | * clobbered. | ||
105 | */ | ||
106 | #ifdef CONFIG_SMP | ||
107 | #define LOAD_PER_CPU_BASE(REG1, REG2) \ | ||
108 | ldub [%g6 + TI_CPU], REG1; \ | ||
109 | sethi %hi(__per_cpu_shift), %g5; \ | ||
110 | sethi %hi(__per_cpu_base), REG2; \ | ||
111 | ldx [%g5 + %lo(__per_cpu_shift)], %g5; \ | ||
112 | ldx [REG2 + %lo(__per_cpu_base)], REG2; \ | ||
113 | sllx REG1, %g5, %g5; \ | ||
114 | add %g5, REG2, %g5; | ||
115 | #else | ||
116 | #define LOAD_PER_CPU_BASE(REG1, REG2) | ||
117 | #endif | ||
118 | |||
35 | #endif /* _SPARC64_CPUDATA_H */ | 119 | #endif /* _SPARC64_CPUDATA_H */ |
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index af254e581834..26c0807af3e4 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h | |||
@@ -209,6 +209,8 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ | |||
209 | /* so that ASI is only written if it changes, think again. */ \ | 209 | /* so that ASI is only written if it changes, think again. */ \ |
210 | __asm__ __volatile__("wr %%g0, %0, %%asi" \ | 210 | __asm__ __volatile__("wr %%g0, %0, %%asi" \ |
211 | : : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\ | 211 | : : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\ |
212 | trap_block[current_thread_info()->cpu].thread = \ | ||
213 | task_thread_info(next); \ | ||
212 | __asm__ __volatile__( \ | 214 | __asm__ __volatile__( \ |
213 | "mov %%g4, %%g7\n\t" \ | 215 | "mov %%g4, %%g7\n\t" \ |
214 | "wrpr %%g0, 0x95, %%pstate\n\t" \ | 216 | "wrpr %%g0, 0x95, %%pstate\n\t" \ |
diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h index 2784f80094c3..f557db4faf84 100644 --- a/include/asm-sparc64/ttable.h +++ b/include/asm-sparc64/ttable.h | |||
@@ -109,14 +109,14 @@ | |||
109 | nop;nop;nop; | 109 | nop;nop;nop; |
110 | 110 | ||
111 | #define TRAP_UTRAP(handler,lvl) \ | 111 | #define TRAP_UTRAP(handler,lvl) \ |
112 | ldx [%g6 + TI_UTRAPS], %g1; \ | 112 | mov handler, %g3; \ |
113 | sethi %hi(109f), %g7; \ | 113 | ba,pt %xcc, utrap_trap; \ |
114 | brz,pn %g1, utrap; \ | 114 | mov lvl, %g4; \ |
115 | or %g7, %lo(109f), %g7; \ | 115 | nop; \ |
116 | ba,pt %xcc, utrap; \ | 116 | nop; \ |
117 | 109: ldx [%g1 + handler*8], %g1; \ | 117 | nop; \ |
118 | ba,pt %xcc, utrap_ill; \ | 118 | nop; \ |
119 | mov lvl, %o1; | 119 | nop; |
120 | 120 | ||
121 | #ifdef CONFIG_SUNOS_EMUL | 121 | #ifdef CONFIG_SUNOS_EMUL |
122 | #define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table) | 122 | #define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table) |
@@ -136,8 +136,6 @@ | |||
136 | #else | 136 | #else |
137 | #define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall) | 137 | #define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall) |
138 | #endif | 138 | #endif |
139 | /* FIXME: Write these actually */ | ||
140 | #define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall) | ||
141 | #define BREAKPOINT_TRAP TRAP(breakpoint_trap) | 139 | #define BREAKPOINT_TRAP TRAP(breakpoint_trap) |
142 | 140 | ||
143 | #define TRAP_IRQ(routine, level) \ | 141 | #define TRAP_IRQ(routine, level) \ |