aboutsummaryrefslogtreecommitdiffstats
path: root/include/asm-sparc64/cpudata.h
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-02-27 02:27:19 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:11:35 -0500
commit92704a1c63c3b481870d02636d0b5a70c7e21cd1 (patch)
tree098f96da6ab50a1d878425e2b91a9cf22f78ac80 /include/asm-sparc64/cpudata.h
parentf4e841da30b4bcbb8f1cc20a01157a788ff58b21 (diff)
[SPARC64]: Refine code sequences to get the cpu id.
On uniprocessor, it's always zero for optimize that. On SMP, the jmpl to the stub kills the return address stack in the cpu branch prediction logic, so expand the code sequence inline and use a code patching section to fix things up. This also always better and explicit register selection, which will be taken advantage of in a future changeset. The hard_smp_processor_id() function is big, so do not inline it. Fix up tests for Jalapeno to also test for Serrano chips too. These tests want "jbus Ultra-IIIi" cases to match, so that is what we should test for. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/asm-sparc64/cpudata.h')
-rw-r--r--include/asm-sparc64/cpudata.h89
1 files changed, 69 insertions, 20 deletions
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index f83768883e98..da54b4f35403 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -60,9 +60,18 @@ struct trap_per_cpu {
60} __attribute__((aligned(64))); 60} __attribute__((aligned(64)));
61extern struct trap_per_cpu trap_block[NR_CPUS]; 61extern struct trap_per_cpu trap_block[NR_CPUS];
62extern void init_cur_cpu_trap(void); 62extern void init_cur_cpu_trap(void);
63extern void per_cpu_patch(void);
64extern void setup_tba(void); 63extern void setup_tba(void);
65 64
65#ifdef CONFIG_SMP
66struct cpuid_patch_entry {
67 unsigned int addr;
68 unsigned int cheetah_safari[4];
69 unsigned int cheetah_jbus[4];
70 unsigned int starfire[4];
71};
72extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
73#endif
74
66#endif /* !(__ASSEMBLY__) */ 75#endif /* !(__ASSEMBLY__) */
67 76
68#define TRAP_PER_CPU_THREAD 0x00 77#define TRAP_PER_CPU_THREAD 0x00
@@ -70,35 +79,58 @@ extern void setup_tba(void);
70 79
71#define TRAP_BLOCK_SZ_SHIFT 6 80#define TRAP_BLOCK_SZ_SHIFT 6
72 81
73/* Clobbers %g1, loads %g6 with local processor's cpuid */ 82#ifdef CONFIG_SMP
74#define __GET_CPUID \ 83
75 ba,pt %xcc, __get_cpu_id; \ 84#define __GET_CPUID(REG) \
76 rd %pc, %g1; 85 /* Spitfire implementation (default). */ \
86661: ldxa [%g0] ASI_UPA_CONFIG, REG; \
87 srlx REG, 17, REG; \
88 and REG, 0x1f, REG; \
89 nop; \
90 .section .cpuid_patch, "ax"; \
91 /* Instruction location. */ \
92 .word 661b; \
93 /* Cheetah Safari implementation. */ \
94 ldxa [%g0] ASI_SAFARI_CONFIG, REG; \
95 srlx REG, 17, REG; \
96 and REG, 0x3ff, REG; \
97 nop; \
98 /* Cheetah JBUS implementation. */ \
99 ldxa [%g0] ASI_JBUS_CONFIG, REG; \
100 srlx REG, 17, REG; \
101 and REG, 0x1f, REG; \
102 nop; \
103 /* Starfire implementation. */ \
104 sethi %hi(0x1fff40000d0 >> 9), REG; \
105 sllx REG, 9, REG; \
106 or REG, 0xd0, REG; \
107 lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\
108 .previous;
77 109
78/* Clobbers %g1, current address space PGD phys address into %g7. */ 110/* Clobbers %g1, current address space PGD phys address into %g7. */
79#define TRAP_LOAD_PGD_PHYS \ 111#define TRAP_LOAD_PGD_PHYS \
80 __GET_CPUID \ 112 __GET_CPUID(%g1) \
81 sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \
82 sethi %hi(trap_block), %g7; \ 113 sethi %hi(trap_block), %g7; \
114 sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \
83 or %g7, %lo(trap_block), %g7; \ 115 or %g7, %lo(trap_block), %g7; \
84 add %g7, %g6, %g7; \ 116 add %g7, %g1, %g7; \
85 ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; 117 ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
86 118
87/* Clobbers %g1, loads local processor's IRQ work area into %g6. */ 119/* Clobbers %g1, loads local processor's IRQ work area into %g6. */
88#define TRAP_LOAD_IRQ_WORK \ 120#define TRAP_LOAD_IRQ_WORK \
89 __GET_CPUID \ 121 __GET_CPUID(%g1) \
90 sethi %hi(__irq_work), %g1; \ 122 sethi %hi(__irq_work), %g6; \
91 sllx %g6, 6, %g6; \ 123 sllx %g1, 6, %g1; \
92 or %g1, %lo(__irq_work), %g1; \ 124 or %g6, %lo(__irq_work), %g6; \
93 add %g1, %g6, %g6; 125 add %g6, %g1, %g6;
94 126
95/* Clobbers %g1, loads %g6 with current thread info pointer. */ 127/* Clobbers %g1, loads %g6 with current thread info pointer. */
96#define TRAP_LOAD_THREAD_REG \ 128#define TRAP_LOAD_THREAD_REG \
97 __GET_CPUID \ 129 __GET_CPUID(%g1) \
98 sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ 130 sethi %hi(trap_block), %g6; \
99 sethi %hi(trap_block), %g1; \ 131 sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \
100 or %g1, %lo(trap_block), %g1; \ 132 or %g6, %lo(trap_block), %g6; \
101 ldx [%g1 + %g6], %g6; 133 ldx [%g6 + %g1], %g6;
102 134
103/* Given the current thread info pointer in %g6, load the per-cpu 135/* Given the current thread info pointer in %g6, load the per-cpu
104 * area base of the current processor into %g5. REG1, REG2, and REG3 are 136 * area base of the current processor into %g5. REG1, REG2, and REG3 are
@@ -109,7 +141,6 @@ extern void setup_tba(void);
109 * trap will load the fully resolved %g5 per-cpu base. This can corrupt 141 * trap will load the fully resolved %g5 per-cpu base. This can corrupt
110 * the calculations done by the macro mid-stream. 142 * the calculations done by the macro mid-stream.
111 */ 143 */
112#ifdef CONFIG_SMP
113#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \ 144#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \
114 ldub [%g6 + TI_CPU], REG1; \ 145 ldub [%g6 + TI_CPU], REG1; \
115 sethi %hi(__per_cpu_shift), REG3; \ 146 sethi %hi(__per_cpu_shift), REG3; \
@@ -118,8 +149,26 @@ extern void setup_tba(void);
118 ldx [REG2 + %lo(__per_cpu_base)], REG2; \ 149 ldx [REG2 + %lo(__per_cpu_base)], REG2; \
119 sllx REG1, REG3, REG3; \ 150 sllx REG1, REG3, REG3; \
120 add REG3, REG2, %g5; 151 add REG3, REG2, %g5;
152
121#else 153#else
154
155/* Uniprocessor versions, we know the cpuid is zero. */
156#define TRAP_LOAD_PGD_PHYS \
157 sethi %hi(trap_block), %g7; \
158 or %g7, %lo(trap_block), %g7; \
159 ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
160
161#define TRAP_LOAD_IRQ_WORK \
162 sethi %hi(__irq_work), %g6; \
163 or %g6, %lo(__irq_work), %g6;
164
165#define TRAP_LOAD_THREAD_REG \
166 sethi %hi(trap_block), %g6; \
167 ldx [%g6 + %lo(trap_block)], %g6;
168
169/* No per-cpu areas on uniprocessor, so no need to load %g5. */
122#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) 170#define LOAD_PER_CPU_BASE(REG1, REG2, REG3)
123#endif 171
172#endif /* !(CONFIG_SMP) */
124 173
125#endif /* _SPARC64_CPUDATA_H */ 174#endif /* _SPARC64_CPUDATA_H */