aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/kernel/smp.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-02-27 02:24:22 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:11:16 -0500
commit56fb4df6da76c35dca22036174e2d1edef83ff1f (patch)
treeb39f152ec9ed682edceca965a85680fd4bf736a7 /arch/sparc64/kernel/smp.c
parent3c936465249f863f322154ff1aaa628b84ee5750 (diff)
[SPARC64]: Elminate all usage of hard-coded trap globals.
UltraSPARC has special sets of global registers which are switched to for certain trap types. There is one set for MMU related traps, one set of Interrupt Vector processing, and another set (called the Alternate globals) for all other trap types. For what seems like forever we've hard coded the values in some of these trap registers. Some examples include: 1) Interrupt Vector global %g6 holds current processors interrupt work struct where received interrupts are managed for IRQ handler dispatch. 2) MMU global %g7 holds the base of the page tables of the currently active address space. 3) Alternate global %g6 held the current_thread_info() value. Such hardcoding has resulted in some serious issues in many areas. There are some code sequences where having another register available would help clean up the implementation. Taking traps such as cross-calls from the OBP firmware requires some trick code sequences wherein we have to save away and restore all of the special sets of global registers when we enter/exit OBP. We were also using the IMMU TSB register on SMP to hold the per-cpu area base address, which doesn't work any longer now that we actually use the TSB facility of the cpu. The implementation is pretty straight forward. One tricky bit is getting the current processor ID as that is different on different cpu variants. We use a stub with a fancy calling convention which we patch at boot time. The calling convention is that the stub is branched to and the (PC - 4) to return to is in register %g1. The cpu number is left in %g6. This stub can be invoked by using the __GET_CPUID macro. We use an array of per-cpu trap state to store the current thread and physical address of the current address space's page tables. The TRAP_LOAD_THREAD_REG loads %g6 with the current thread from this table, it uses __GET_CPUID and also clobbers %g1. TRAP_LOAD_IRQ_WORK is used by the interrupt vector processing to load the current processor's IRQ software state into %g6. It also uses __GET_CPUID and clobbers %g1. Finally, TRAP_LOAD_PGD_PHYS loads the physical address base of the current address space's page tables into %g7, it clobbers %g1 and uses __GET_CPUID. Many refinements are possible, as well as some tuning, with this stuff in place. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/kernel/smp.c')
-rw-r--r--arch/sparc64/kernel/smp.c55
1 files changed, 10 insertions, 45 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index d2d3369e7b5d..8c245859d212 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -38,6 +38,7 @@
38#include <asm/timer.h> 38#include <asm/timer.h>
39#include <asm/starfire.h> 39#include <asm/starfire.h>
40#include <asm/tlb.h> 40#include <asm/tlb.h>
41#include <asm/sections.h>
41 42
42extern void calibrate_delay(void); 43extern void calibrate_delay(void);
43 44
@@ -87,10 +88,6 @@ void __init smp_store_cpu_info(int id)
87 cpu_data(id).clock_tick = prom_getintdefault(cpu_node, 88 cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
88 "clock-frequency", 0); 89 "clock-frequency", 0);
89 90
90 cpu_data(id).pgcache_size = 0;
91 cpu_data(id).pte_cache[0] = NULL;
92 cpu_data(id).pte_cache[1] = NULL;
93 cpu_data(id).pgd_cache = NULL;
94 cpu_data(id).idle_volume = 1; 91 cpu_data(id).idle_volume = 1;
95 92
96 cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", 93 cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",
@@ -121,26 +118,15 @@ static volatile unsigned long callin_flag = 0;
121 118
122extern void inherit_locked_prom_mappings(int save_p); 119extern void inherit_locked_prom_mappings(int save_p);
123 120
124static inline void cpu_setup_percpu_base(unsigned long cpu_id)
125{
126#error IMMU TSB usage must be fixed
127 __asm__ __volatile__("mov %0, %%g5\n\t"
128 "stxa %0, [%1] %2\n\t"
129 "membar #Sync"
130 : /* no outputs */
131 : "r" (__per_cpu_offset(cpu_id)),
132 "r" (TSB_REG), "i" (ASI_IMMU));
133}
134
135void __init smp_callin(void) 121void __init smp_callin(void)
136{ 122{
137 int cpuid = hard_smp_processor_id(); 123 int cpuid = hard_smp_processor_id();
138 124
139 inherit_locked_prom_mappings(0); 125 inherit_locked_prom_mappings(0);
140 126
141 __flush_tlb_all(); 127 __local_per_cpu_offset = __per_cpu_offset(cpuid);
142 128
143 cpu_setup_percpu_base(cpuid); 129 __flush_tlb_all();
144 130
145 smp_setup_percpu_timer(); 131 smp_setup_percpu_timer();
146 132
@@ -1107,12 +1093,15 @@ void __init smp_setup_cpu_possible_map(void)
1107 1093
1108void __devinit smp_prepare_boot_cpu(void) 1094void __devinit smp_prepare_boot_cpu(void)
1109{ 1095{
1110 if (hard_smp_processor_id() >= NR_CPUS) { 1096 int cpu = hard_smp_processor_id();
1097
1098 if (cpu >= NR_CPUS) {
1111 prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); 1099 prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
1112 prom_halt(); 1100 prom_halt();
1113 } 1101 }
1114 1102
1115 current_thread_info()->cpu = hard_smp_processor_id(); 1103 current_thread_info()->cpu = cpu;
1104 __local_per_cpu_offset = __per_cpu_offset(cpu);
1116 1105
1117 cpu_set(smp_processor_id(), cpu_online_map); 1106 cpu_set(smp_processor_id(), cpu_online_map);
1118 cpu_set(smp_processor_id(), phys_cpu_present_map); 1107 cpu_set(smp_processor_id(), phys_cpu_present_map);
@@ -1173,12 +1162,9 @@ void __init setup_per_cpu_areas(void)
1173{ 1162{
1174 unsigned long goal, size, i; 1163 unsigned long goal, size, i;
1175 char *ptr; 1164 char *ptr;
1176 /* Created by linker magic */
1177 extern char __per_cpu_start[], __per_cpu_end[];
1178 1165
1179 /* Copy section for each CPU (we discard the original) */ 1166 /* Copy section for each CPU (we discard the original) */
1180 goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); 1167 goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
1181
1182#ifdef CONFIG_MODULES 1168#ifdef CONFIG_MODULES
1183 if (goal < PERCPU_ENOUGH_ROOM) 1169 if (goal < PERCPU_ENOUGH_ROOM)
1184 goal = PERCPU_ENOUGH_ROOM; 1170 goal = PERCPU_ENOUGH_ROOM;
@@ -1187,31 +1173,10 @@ void __init setup_per_cpu_areas(void)
1187 for (size = 1UL; size < goal; size <<= 1UL) 1173 for (size = 1UL; size < goal; size <<= 1UL)
1188 __per_cpu_shift++; 1174 __per_cpu_shift++;
1189 1175
1190 /* Make sure the resulting __per_cpu_base value 1176 ptr = alloc_bootmem(size * NR_CPUS);
1191 * will fit in the 43-bit sign extended IMMU
1192 * TSB register.
1193 */
1194 ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE,
1195 (unsigned long) __per_cpu_start);
1196 1177
1197 __per_cpu_base = ptr - __per_cpu_start; 1178 __per_cpu_base = ptr - __per_cpu_start;
1198 1179
1199 if ((__per_cpu_shift < PAGE_SHIFT) ||
1200 (__per_cpu_base & ~PAGE_MASK) ||
1201 (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) {
1202 prom_printf("PER_CPU: Invalid layout, "
1203 "ptr[%p] shift[%lx] base[%lx]\n",
1204 ptr, __per_cpu_shift, __per_cpu_base);
1205 prom_halt();
1206 }
1207
1208 for (i = 0; i < NR_CPUS; i++, ptr += size) 1180 for (i = 0; i < NR_CPUS; i++, ptr += size)
1209 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 1181 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
1210
1211 /* Finally, load in the boot cpu's base value.
1212 * We abuse the IMMU TSB register for trap handler
1213 * entry and exit loading of %g5. That is why it
1214 * has to be page aligned.
1215 */
1216 cpu_setup_percpu_base(hard_smp_processor_id());
1217} 1182}