aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2006-02-17 04:29:17 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:13:22 -0500
commit72aff53f1fe74153eccef303ab2f79de888d248c (patch)
treea229645be99274d36bed04bed355d74ec3c0baa2 /arch
parent19a0d585e80e84b54bb9bf120bf0c826045dd3dd (diff)
[SPARC64]: Get SUN4V SMP working.
The sibling cpu bringup is extremely fragile. We can only perform the most basic calls until we take over the trap table from the firmware/hypervisor on the new cpu. This means no accesses to %g4, %g5, %g6 since those can't be TLB translated without our trap handlers. In order to achieve this: 1) Change sun4v_init_mondo_queues() so that it can operate in several modes. It can allocate the queues, or install them in the current processor, or both. The boot cpu does both in it's call early on. Later, the boot cpu allocates the sibling cpu queue, starts the sibling cpu, then the sibling cpu loads them in. 2) init_cur_cpu_trap() is changed to take the current_thread_info() as an argument instead of reading %g6 directly on the current cpu. 3) Create a trampoline stack for the sibling cpus. We do our basic kernel calls using this stack, which is locked into the kernel image, then go to our proper thread stack after taking over the trap table. 4) While we are in this delicate startup state, we put 0xdeadbeef into %g4/%g5/%g6 in order to catch accidental accesses. 5) On the final prom_set_trap_table*() call, we put &init_thread_union into %g6. This is a hack to make prom_world(0) work. All that wants to do is restore the %asi register using get_thread_current_ds(). Longer term we should just do the OBP calls to set the trap table by hand just like we do for everything else. This would avoid that silly prom_world(0) issue, then we can remove the init_thread_union hack. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/kernel/irq.c30
-rw-r--r--arch/sparc64/kernel/setup.c2
-rw-r--r--arch/sparc64/kernel/smp.c6
-rw-r--r--arch/sparc64/kernel/trampoline.S92
-rw-r--r--arch/sparc64/kernel/traps.c4
5 files changed, 84 insertions, 50 deletions
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 6eb44ca5dba6..bb0bb34555da 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -1018,21 +1018,29 @@ static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_
1018} 1018}
1019 1019
1020/* Allocate and register the mondo and error queues for this cpu. */ 1020/* Allocate and register the mondo and error queues for this cpu. */
1021void __cpuinit sun4v_init_mondo_queues(int use_bootmem) 1021void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load)
1022{ 1022{
1023 int cpu = hard_smp_processor_id();
1024 struct trap_per_cpu *tb = &trap_block[cpu]; 1023 struct trap_per_cpu *tb = &trap_block[cpu];
1025 1024
1026 alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem); 1025 if (alloc) {
1027 alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem); 1026 alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem);
1028 alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem); 1027 alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem);
1029 alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem); 1028 alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem);
1030 alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem); 1029 alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem);
1031 alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem); 1030 alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem);
1031 alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem);
1032 1032
1033 init_cpu_send_mondo_info(tb, use_bootmem); 1033 init_cpu_send_mondo_info(tb, use_bootmem);
1034 }
1034 1035
1035 sun4v_register_mondo_queues(cpu); 1036 if (load) {
1037 if (cpu != hard_smp_processor_id()) {
1038 prom_printf("SUN4V: init mondo on cpu %d not %d\n",
1039 cpu, hard_smp_processor_id());
1040 prom_halt();
1041 }
1042 sun4v_register_mondo_queues(cpu);
1043 }
1036} 1044}
1037 1045
1038/* Only invoked on boot processor. */ 1046/* Only invoked on boot processor. */
@@ -1043,7 +1051,7 @@ void __init init_IRQ(void)
1043 memset(&ivector_table[0], 0, sizeof(ivector_table)); 1051 memset(&ivector_table[0], 0, sizeof(ivector_table));
1044 1052
1045 if (tlb_type == hypervisor) 1053 if (tlb_type == hypervisor)
1046 sun4v_init_mondo_queues(1); 1054 sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1);
1047 1055
1048 /* We need to clear any IRQ's pending in the soft interrupt 1056 /* We need to clear any IRQ's pending in the soft interrupt
1049 * registers, a spurious one could be left around from the 1057 * registers, a spurious one could be left around from the
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 06807cf95ee1..9b0c409d5b6a 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -384,7 +384,7 @@ void __init setup_arch(char **cmdline_p)
384 paging_init(); 384 paging_init();
385 385
386 /* Get boot processor trap_block[] setup. */ 386 /* Get boot processor trap_block[] setup. */
387 init_cur_cpu_trap(); 387 init_cur_cpu_trap(current_thread_info());
388} 388}
389 389
390static int __init set_preferred_console(void) 390static int __init set_preferred_console(void)
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 527dfd7ae210..b586345fe3b9 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -316,6 +316,8 @@ static void smp_synchronize_one_tick(int cpu)
316 spin_unlock_irqrestore(&itc_sync_lock, flags); 316 spin_unlock_irqrestore(&itc_sync_lock, flags);
317} 317}
318 318
319extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
320
319extern unsigned long sparc64_cpu_startup; 321extern unsigned long sparc64_cpu_startup;
320 322
321/* The OBP cpu startup callback truncates the 3rd arg cookie to 323/* The OBP cpu startup callback truncates the 3rd arg cookie to
@@ -339,6 +341,9 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
339 cpu_set(cpu, cpu_callout_map); 341 cpu_set(cpu, cpu_callout_map);
340 342
341 if (tlb_type == hypervisor) { 343 if (tlb_type == hypervisor) {
344 /* Alloc the mondo queues, cpu will load them. */
345 sun4v_init_mondo_queues(0, cpu, 1, 0);
346
342 prom_startcpu_cpuid(cpu, entry, cookie); 347 prom_startcpu_cpuid(cpu, entry, cookie);
343 } else { 348 } else {
344 int cpu_node; 349 int cpu_node;
@@ -352,6 +357,7 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
352 break; 357 break;
353 udelay(100); 358 udelay(100);
354 } 359 }
360
355 if (callin_flag) { 361 if (callin_flag) {
356 ret = 0; 362 ret = 0;
357 } else { 363 } else {
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index b9c9f54b0a00..a4dc01a3d238 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -30,12 +30,16 @@ itlb_load:
30dtlb_load: 30dtlb_load:
31 .asciz "SUNW,dtlb-load" 31 .asciz "SUNW,dtlb-load"
32 32
33 /* XXX __cpuinit this thing XXX */
34#define TRAMP_STACK_SIZE 1024
35 .align 16
36tramp_stack:
37 .skip TRAMP_STACK_SIZE
38
33 .text 39 .text
34 .align 8 40 .align 8
35 .globl sparc64_cpu_startup, sparc64_cpu_startup_end 41 .globl sparc64_cpu_startup, sparc64_cpu_startup_end
36sparc64_cpu_startup: 42sparc64_cpu_startup:
37 flushw
38
39 BRANCH_IF_SUN4V(g1, niagara_startup) 43 BRANCH_IF_SUN4V(g1, niagara_startup)
40 BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) 44 BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup)
41 BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) 45 BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup)
@@ -58,6 +62,7 @@ cheetah_startup:
58 or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 62 or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
59 stxa %g5, [%g0] ASI_DCU_CONTROL_REG 63 stxa %g5, [%g0] ASI_DCU_CONTROL_REG
60 membar #Sync 64 membar #Sync
65 /* fallthru */
61 66
62cheetah_generic_startup: 67cheetah_generic_startup:
63 mov TSB_EXTENSION_P, %g3 68 mov TSB_EXTENSION_P, %g3
@@ -90,19 +95,17 @@ spitfire_startup:
90 membar #Sync 95 membar #Sync
91 96
92startup_continue: 97startup_continue:
93 wrpr %g0, 15, %pil
94
95 sethi %hi(0x80000000), %g2 98 sethi %hi(0x80000000), %g2
96 sllx %g2, 32, %g2 99 sllx %g2, 32, %g2
97 wr %g2, 0, %tick_cmpr 100 wr %g2, 0, %tick_cmpr
98 101
102 mov %o0, %l0
103
99 BRANCH_IF_SUN4V(g1, niagara_lock_tlb) 104 BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
100 105
101 /* Call OBP by hand to lock KERNBASE into i/d tlbs. 106 /* Call OBP by hand to lock KERNBASE into i/d tlbs.
102 * We lock 2 consequetive entries if we are 'bigkernel'. 107 * We lock 2 consequetive entries if we are 'bigkernel'.
103 */ 108 */
104 mov %o0, %l0
105
106 sethi %hi(prom_entry_lock), %g2 109 sethi %hi(prom_entry_lock), %g2
1071: ldstub [%g2 + %lo(prom_entry_lock)], %g1 1101: ldstub [%g2 + %lo(prom_entry_lock)], %g1
108 membar #StoreLoad | #StoreStore 111 membar #StoreLoad | #StoreStore
@@ -112,7 +115,6 @@ startup_continue:
112 sethi %hi(p1275buf), %g2 115 sethi %hi(p1275buf), %g2
113 or %g2, %lo(p1275buf), %g2 116 or %g2, %lo(p1275buf), %g2
114 ldx [%g2 + 0x10], %l2 117 ldx [%g2 + 0x10], %l2
115 mov %sp, %l1
116 add %l2, -(192 + 128), %sp 118 add %l2, -(192 + 128), %sp
117 flushw 119 flushw
118 120
@@ -308,18 +310,9 @@ niagara_lock_tlb:
308 ta HV_FAST_TRAP 310 ta HV_FAST_TRAP
309 311
310after_lock_tlb: 312after_lock_tlb:
311 mov %l1, %sp
312 flushw
313
314 mov %l0, %o0
315
316 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate 313 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
317 wr %g0, 0, %fprs 314 wr %g0, 0, %fprs
318 315
319 /* XXX Buggy PROM... */
320 srl %o0, 0, %o0
321 ldx [%o0], %g6
322
323 wr %g0, ASI_P, %asi 316 wr %g0, ASI_P, %asi
324 317
325 mov PRIMARY_CONTEXT, %g7 318 mov PRIMARY_CONTEXT, %g7
@@ -341,22 +334,25 @@ after_lock_tlb:
341 334
342 membar #Sync 335 membar #Sync
343 336
344 mov 1, %g5 337 /* Everything we do here, until we properly take over the
345 sllx %g5, THREAD_SHIFT, %g5 338 * trap table, must be done with extreme care. We cannot
346 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 339 * make any references to %g6 (current thread pointer),
347 add %g6, %g5, %sp 340 * %g4 (current task pointer), or %g5 (base of current cpu's
341 * per-cpu area) until we properly take over the trap table
342 * from the firmware and hypervisor.
343 *
344 * Get onto temporary stack which is in the locked kernel image.
345 */
346 sethi %hi(tramp_stack), %g1
347 or %g1, %lo(tramp_stack), %g1
348 add %g1, TRAMP_STACK_SIZE, %g1
349 sub %g1, STACKFRAME_SZ + STACK_BIAS, %sp
348 mov 0, %fp 350 mov 0, %fp
349 351
350 wrpr %g0, 0, %wstate 352 /* Put garbage in these registers to trap any access to them. */
351 wrpr %g0, 0, %tl 353 set 0xdeadbeef, %g4
352 354 set 0xdeadbeef, %g5
353 /* Load TBA, then we can resurface. */ 355 set 0xdeadbeef, %g6
354 sethi %hi(sparc64_ttable_tl0), %g5
355 wrpr %g5, %tba
356
357 ldx [%g6 + TI_TASK], %g4
358
359 wrpr %g0, 0, %wstate
360 356
361 call init_irqwork_curcpu 357 call init_irqwork_curcpu
362 nop 358 nop
@@ -367,11 +363,17 @@ after_lock_tlb:
367 bne,pt %icc, 1f 363 bne,pt %icc, 1f
368 nop 364 nop
369 365
366 call hard_smp_processor_id
367 nop
368
369 mov %o0, %o1
370 mov 0, %o0
371 mov 0, %o2
370 call sun4v_init_mondo_queues 372 call sun4v_init_mondo_queues
371 mov 0, %o0 373 mov 1, %o3
372 374
3731: call init_cur_cpu_trap 3751: call init_cur_cpu_trap
374 nop 376 ldx [%l0], %o0
375 377
376 /* Start using proper page size encodings in ctx register. */ 378 /* Start using proper page size encodings in ctx register. */
377 sethi %hi(sparc64_kern_pri_context), %g3 379 sethi %hi(sparc64_kern_pri_context), %g3
@@ -386,9 +388,14 @@ after_lock_tlb:
386 388
387 membar #Sync 389 membar #Sync
388 390
389 rdpr %pstate, %o1 391 wrpr %g0, 0, %wstate
390 or %o1, PSTATE_IE, %o1 392
391 wrpr %o1, 0, %pstate 393 /* As a hack, put &init_thread_union into %g6.
394 * prom_world() loads from here to restore the %asi
395 * register.
396 */
397 sethi %hi(init_thread_union), %g6
398 or %g6, %lo(init_thread_union), %g6
392 399
393 sethi %hi(is_sun4v), %o0 400 sethi %hi(is_sun4v), %o0
394 lduw [%o0 + %lo(is_sun4v)], %o0 401 lduw [%o0 + %lo(is_sun4v)], %o0
@@ -418,7 +425,20 @@ after_lock_tlb:
4181: call prom_set_trap_table 4251: call prom_set_trap_table
419 sethi %hi(sparc64_ttable_tl0), %o0 426 sethi %hi(sparc64_ttable_tl0), %o0
420 427
4212: call smp_callin 4282: ldx [%l0], %g6
429 ldx [%g6 + TI_TASK], %g4
430
431 mov 1, %g5
432 sllx %g5, THREAD_SHIFT, %g5
433 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
434 add %g6, %g5, %sp
435 mov 0, %fp
436
437 rdpr %pstate, %o1
438 or %o1, PSTATE_IE, %o1
439 wrpr %o1, 0, %pstate
440
441 call smp_callin
422 nop 442 nop
423 call cpu_idle 443 call cpu_idle
424 mov 0, %o0 444 mov 0, %o0
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 5956d0a94009..c9484ae5bb8f 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2413,12 +2413,12 @@ struct trap_per_cpu trap_block[NR_CPUS];
2413/* This can get invoked before sched_init() so play it super safe 2413/* This can get invoked before sched_init() so play it super safe
2414 * and use hard_smp_processor_id(). 2414 * and use hard_smp_processor_id().
2415 */ 2415 */
2416void init_cur_cpu_trap(void) 2416void init_cur_cpu_trap(struct thread_info *t)
2417{ 2417{
2418 int cpu = hard_smp_processor_id(); 2418 int cpu = hard_smp_processor_id();
2419 struct trap_per_cpu *p = &trap_block[cpu]; 2419 struct trap_per_cpu *p = &trap_block[cpu];
2420 2420
2421 p->thread = current_thread_info(); 2421 p->thread = t;
2422 p->pgd_paddr = 0; 2422 p->pgd_paddr = 0;
2423} 2423}
2424 2424