aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/kernel/smp_64.c
diff options
context:
space:
mode:
authorSam Ravnborg <sam@ravnborg.org>2008-12-03 06:11:52 -0500
committerDavid S. Miller <davem@davemloft.net>2008-12-04 12:17:21 -0500
commita88b5ba8bd8ac18aad65ee6c6a254e2e74876db3 (patch)
treeeb3d0ffaf53c3f7ec6083752c2097cecd1cb892a /arch/sparc/kernel/smp_64.c
parentd670bd4f803c8b646acd20f3ba21e65458293faf (diff)
sparc,sparc64: unify kernel/
o Move all files from sparc64/kernel/ to sparc/kernel - rename as appropriate o Update sparc/Makefile to the changes o Update sparc/kernel/Makefile to include the sparc64 files NOTE: This commit changes link order on sparc64! Link order had to change for either of sparc32 and sparc64. And assuming sparc64 see more testing than sparc32 change link order on sparc64 where issues will be caught faster. Signed-off-by: Sam Ravnborg <sam@ravnborg.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/kernel/smp_64.c')
-rw-r--r--arch/sparc/kernel/smp_64.c1412
1 files changed, 1412 insertions, 0 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
new file mode 100644
index 000000000000..b5225c81556c
--- /dev/null
+++ b/arch/sparc/kernel/smp_64.c
@@ -0,0 +1,1412 @@
1/* smp.c: Sparc64 SMP support.
2 *
3 * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
4 */
5
6#include <linux/module.h>
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/threads.h>
12#include <linux/smp.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23#include <linux/lmb.h>
24#include <linux/cpu.h>
25
26#include <asm/head.h>
27#include <asm/ptrace.h>
28#include <asm/atomic.h>
29#include <asm/tlbflush.h>
30#include <asm/mmu_context.h>
31#include <asm/cpudata.h>
32#include <asm/hvtramp.h>
33#include <asm/io.h>
34#include <asm/timer.h>
35
36#include <asm/irq.h>
37#include <asm/irq_regs.h>
38#include <asm/page.h>
39#include <asm/pgtable.h>
40#include <asm/oplib.h>
41#include <asm/uaccess.h>
42#include <asm/starfire.h>
43#include <asm/tlb.h>
44#include <asm/sections.h>
45#include <asm/prom.h>
46#include <asm/mdesc.h>
47#include <asm/ldc.h>
48#include <asm/hypervisor.h>
49
50int sparc64_multi_core __read_mostly;
51
52cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
53cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
54DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
55cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
56 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
57
58EXPORT_SYMBOL(cpu_possible_map);
59EXPORT_SYMBOL(cpu_online_map);
60EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
61EXPORT_SYMBOL(cpu_core_map);
62
63static cpumask_t smp_commenced_mask;
64
65void smp_info(struct seq_file *m)
66{
67 int i;
68
69 seq_printf(m, "State:\n");
70 for_each_online_cpu(i)
71 seq_printf(m, "CPU%d:\t\tonline\n", i);
72}
73
74void smp_bogo(struct seq_file *m)
75{
76 int i;
77
78 for_each_online_cpu(i)
79 seq_printf(m,
80 "Cpu%dClkTck\t: %016lx\n",
81 i, cpu_data(i).clock_tick);
82}
83
84extern void setup_sparc64_timer(void);
85
86static volatile unsigned long callin_flag = 0;
87
88void __cpuinit smp_callin(void)
89{
90 int cpuid = hard_smp_processor_id();
91
92 __local_per_cpu_offset = __per_cpu_offset(cpuid);
93
94 if (tlb_type == hypervisor)
95 sun4v_ktsb_register();
96
97 __flush_tlb_all();
98
99 setup_sparc64_timer();
100
101 if (cheetah_pcache_forced_on)
102 cheetah_enable_pcache();
103
104 local_irq_enable();
105
106 callin_flag = 1;
107 __asm__ __volatile__("membar #Sync\n\t"
108 "flush %%g6" : : : "memory");
109
110 /* Clear this or we will die instantly when we
111 * schedule back to this idler...
112 */
113 current_thread_info()->new_child = 0;
114
115 /* Attach to the address space of init_task. */
116 atomic_inc(&init_mm.mm_count);
117 current->active_mm = &init_mm;
118
119 /* inform the notifiers about the new cpu */
120 notify_cpu_starting(cpuid);
121
122 while (!cpu_isset(cpuid, smp_commenced_mask))
123 rmb();
124
125 ipi_call_lock();
126 cpu_set(cpuid, cpu_online_map);
127 ipi_call_unlock();
128
129 /* idle thread is expected to have preempt disabled */
130 preempt_disable();
131}
132
133void cpu_panic(void)
134{
135 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
136 panic("SMP bolixed\n");
137}
138
139/* This tick register synchronization scheme is taken entirely from
140 * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
141 *
142 * The only change I've made is to rework it so that the master
143 * initiates the synchonization instead of the slave. -DaveM
144 */
145
146#define MASTER 0
147#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
148
149#define NUM_ROUNDS 64 /* magic value */
150#define NUM_ITERS 5 /* likewise */
151
152static DEFINE_SPINLOCK(itc_sync_lock);
153static unsigned long go[SLAVE + 1];
154
155#define DEBUG_TICK_SYNC 0
156
157static inline long get_delta (long *rt, long *master)
158{
159 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
160 unsigned long tcenter, t0, t1, tm;
161 unsigned long i;
162
163 for (i = 0; i < NUM_ITERS; i++) {
164 t0 = tick_ops->get_tick();
165 go[MASTER] = 1;
166 membar_safe("#StoreLoad");
167 while (!(tm = go[SLAVE]))
168 rmb();
169 go[SLAVE] = 0;
170 wmb();
171 t1 = tick_ops->get_tick();
172
173 if (t1 - t0 < best_t1 - best_t0)
174 best_t0 = t0, best_t1 = t1, best_tm = tm;
175 }
176
177 *rt = best_t1 - best_t0;
178 *master = best_tm - best_t0;
179
180 /* average best_t0 and best_t1 without overflow: */
181 tcenter = (best_t0/2 + best_t1/2);
182 if (best_t0 % 2 + best_t1 % 2 == 2)
183 tcenter++;
184 return tcenter - best_tm;
185}
186
187void smp_synchronize_tick_client(void)
188{
189 long i, delta, adj, adjust_latency = 0, done = 0;
190 unsigned long flags, rt, master_time_stamp, bound;
191#if DEBUG_TICK_SYNC
192 struct {
193 long rt; /* roundtrip time */
194 long master; /* master's timestamp */
195 long diff; /* difference between midpoint and master's timestamp */
196 long lat; /* estimate of itc adjustment latency */
197 } t[NUM_ROUNDS];
198#endif
199
200 go[MASTER] = 1;
201
202 while (go[MASTER])
203 rmb();
204
205 local_irq_save(flags);
206 {
207 for (i = 0; i < NUM_ROUNDS; i++) {
208 delta = get_delta(&rt, &master_time_stamp);
209 if (delta == 0) {
210 done = 1; /* let's lock on to this... */
211 bound = rt;
212 }
213
214 if (!done) {
215 if (i > 0) {
216 adjust_latency += -delta;
217 adj = -delta + adjust_latency/4;
218 } else
219 adj = -delta;
220
221 tick_ops->add_tick(adj);
222 }
223#if DEBUG_TICK_SYNC
224 t[i].rt = rt;
225 t[i].master = master_time_stamp;
226 t[i].diff = delta;
227 t[i].lat = adjust_latency/4;
228#endif
229 }
230 }
231 local_irq_restore(flags);
232
233#if DEBUG_TICK_SYNC
234 for (i = 0; i < NUM_ROUNDS; i++)
235 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
236 t[i].rt, t[i].master, t[i].diff, t[i].lat);
237#endif
238
239 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
240 "(last diff %ld cycles, maxerr %lu cycles)\n",
241 smp_processor_id(), delta, rt);
242}
243
244static void smp_start_sync_tick_client(int cpu);
245
246static void smp_synchronize_one_tick(int cpu)
247{
248 unsigned long flags, i;
249
250 go[MASTER] = 0;
251
252 smp_start_sync_tick_client(cpu);
253
254 /* wait for client to be ready */
255 while (!go[MASTER])
256 rmb();
257
258 /* now let the client proceed into his loop */
259 go[MASTER] = 0;
260 membar_safe("#StoreLoad");
261
262 spin_lock_irqsave(&itc_sync_lock, flags);
263 {
264 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
265 while (!go[MASTER])
266 rmb();
267 go[MASTER] = 0;
268 wmb();
269 go[SLAVE] = tick_ops->get_tick();
270 membar_safe("#StoreLoad");
271 }
272 }
273 spin_unlock_irqrestore(&itc_sync_lock, flags);
274}
275
276#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
277/* XXX Put this in some common place. XXX */
278static unsigned long kimage_addr_to_ra(void *p)
279{
280 unsigned long val = (unsigned long) p;
281
282 return kern_base + (val - KERNBASE);
283}
284
285static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
286{
287 extern unsigned long sparc64_ttable_tl0;
288 extern unsigned long kern_locked_tte_data;
289 struct hvtramp_descr *hdesc;
290 unsigned long trampoline_ra;
291 struct trap_per_cpu *tb;
292 u64 tte_vaddr, tte_data;
293 unsigned long hv_err;
294 int i;
295
296 hdesc = kzalloc(sizeof(*hdesc) +
297 (sizeof(struct hvtramp_mapping) *
298 num_kernel_image_mappings - 1),
299 GFP_KERNEL);
300 if (!hdesc) {
301 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
302 "hvtramp_descr.\n");
303 return;
304 }
305
306 hdesc->cpu = cpu;
307 hdesc->num_mappings = num_kernel_image_mappings;
308
309 tb = &trap_block[cpu];
310 tb->hdesc = hdesc;
311
312 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
313 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
314
315 hdesc->thread_reg = thread_reg;
316
317 tte_vaddr = (unsigned long) KERNBASE;
318 tte_data = kern_locked_tte_data;
319
320 for (i = 0; i < hdesc->num_mappings; i++) {
321 hdesc->maps[i].vaddr = tte_vaddr;
322 hdesc->maps[i].tte = tte_data;
323 tte_vaddr += 0x400000;
324 tte_data += 0x400000;
325 }
326
327 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
328
329 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
330 kimage_addr_to_ra(&sparc64_ttable_tl0),
331 __pa(hdesc));
332 if (hv_err)
333 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
334 "gives error %lu\n", hv_err);
335}
336#endif
337
338extern unsigned long sparc64_cpu_startup;
339
340/* The OBP cpu startup callback truncates the 3rd arg cookie to
341 * 32-bits (I think) so to be safe we have it read the pointer
342 * contained here so we work on >4GB machines. -DaveM
343 */
344static struct thread_info *cpu_new_thread = NULL;
345
346static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
347{
348 struct trap_per_cpu *tb = &trap_block[cpu];
349 unsigned long entry =
350 (unsigned long)(&sparc64_cpu_startup);
351 unsigned long cookie =
352 (unsigned long)(&cpu_new_thread);
353 struct task_struct *p;
354 int timeout, ret;
355
356 p = fork_idle(cpu);
357 if (IS_ERR(p))
358 return PTR_ERR(p);
359 callin_flag = 0;
360 cpu_new_thread = task_thread_info(p);
361
362 if (tlb_type == hypervisor) {
363#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
364 if (ldom_domaining_enabled)
365 ldom_startcpu_cpuid(cpu,
366 (unsigned long) cpu_new_thread);
367 else
368#endif
369 prom_startcpu_cpuid(cpu, entry, cookie);
370 } else {
371 struct device_node *dp = of_find_node_by_cpuid(cpu);
372
373 prom_startcpu(dp->node, entry, cookie);
374 }
375
376 for (timeout = 0; timeout < 50000; timeout++) {
377 if (callin_flag)
378 break;
379 udelay(100);
380 }
381
382 if (callin_flag) {
383 ret = 0;
384 } else {
385 printk("Processor %d is stuck.\n", cpu);
386 ret = -ENODEV;
387 }
388 cpu_new_thread = NULL;
389
390 if (tb->hdesc) {
391 kfree(tb->hdesc);
392 tb->hdesc = NULL;
393 }
394
395 return ret;
396}
397
398static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
399{
400 u64 result, target;
401 int stuck, tmp;
402
403 if (this_is_starfire) {
404 /* map to real upaid */
405 cpu = (((cpu & 0x3c) << 1) |
406 ((cpu & 0x40) >> 4) |
407 (cpu & 0x3));
408 }
409
410 target = (cpu << 14) | 0x70;
411again:
412 /* Ok, this is the real Spitfire Errata #54.
413 * One must read back from a UDB internal register
414 * after writes to the UDB interrupt dispatch, but
415 * before the membar Sync for that write.
416 * So we use the high UDB control register (ASI 0x7f,
417 * ADDR 0x20) for the dummy read. -DaveM
418 */
419 tmp = 0x40;
420 __asm__ __volatile__(
421 "wrpr %1, %2, %%pstate\n\t"
422 "stxa %4, [%0] %3\n\t"
423 "stxa %5, [%0+%8] %3\n\t"
424 "add %0, %8, %0\n\t"
425 "stxa %6, [%0+%8] %3\n\t"
426 "membar #Sync\n\t"
427 "stxa %%g0, [%7] %3\n\t"
428 "membar #Sync\n\t"
429 "mov 0x20, %%g1\n\t"
430 "ldxa [%%g1] 0x7f, %%g0\n\t"
431 "membar #Sync"
432 : "=r" (tmp)
433 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
434 "r" (data0), "r" (data1), "r" (data2), "r" (target),
435 "r" (0x10), "0" (tmp)
436 : "g1");
437
438 /* NOTE: PSTATE_IE is still clear. */
439 stuck = 100000;
440 do {
441 __asm__ __volatile__("ldxa [%%g0] %1, %0"
442 : "=r" (result)
443 : "i" (ASI_INTR_DISPATCH_STAT));
444 if (result == 0) {
445 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
446 : : "r" (pstate));
447 return;
448 }
449 stuck -= 1;
450 if (stuck == 0)
451 break;
452 } while (result & 0x1);
453 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
454 : : "r" (pstate));
455 if (stuck == 0) {
456 printk("CPU[%d]: mondo stuckage result[%016lx]\n",
457 smp_processor_id(), result);
458 } else {
459 udelay(2);
460 goto again;
461 }
462}
463
464static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
465{
466 u64 *mondo, data0, data1, data2;
467 u16 *cpu_list;
468 u64 pstate;
469 int i;
470
471 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
472 cpu_list = __va(tb->cpu_list_pa);
473 mondo = __va(tb->cpu_mondo_block_pa);
474 data0 = mondo[0];
475 data1 = mondo[1];
476 data2 = mondo[2];
477 for (i = 0; i < cnt; i++)
478 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
479}
480
481/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
482 * packet, but we have no use for that. However we do take advantage of
483 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
484 */
485static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
486{
487 int nack_busy_id, is_jbus, need_more;
488 u64 *mondo, pstate, ver, busy_mask;
489 u16 *cpu_list;
490
491 cpu_list = __va(tb->cpu_list_pa);
492 mondo = __va(tb->cpu_mondo_block_pa);
493
494 /* Unfortunately, someone at Sun had the brilliant idea to make the
495 * busy/nack fields hard-coded by ITID number for this Ultra-III
496 * derivative processor.
497 */
498 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
499 is_jbus = ((ver >> 32) == __JALAPENO_ID ||
500 (ver >> 32) == __SERRANO_ID);
501
502 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
503
504retry:
505 need_more = 0;
506 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
507 : : "r" (pstate), "i" (PSTATE_IE));
508
509 /* Setup the dispatch data registers. */
510 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
511 "stxa %1, [%4] %6\n\t"
512 "stxa %2, [%5] %6\n\t"
513 "membar #Sync\n\t"
514 : /* no outputs */
515 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
516 "r" (0x40), "r" (0x50), "r" (0x60),
517 "i" (ASI_INTR_W));
518
519 nack_busy_id = 0;
520 busy_mask = 0;
521 {
522 int i;
523
524 for (i = 0; i < cnt; i++) {
525 u64 target, nr;
526
527 nr = cpu_list[i];
528 if (nr == 0xffff)
529 continue;
530
531 target = (nr << 14) | 0x70;
532 if (is_jbus) {
533 busy_mask |= (0x1UL << (nr * 2));
534 } else {
535 target |= (nack_busy_id << 24);
536 busy_mask |= (0x1UL <<
537 (nack_busy_id * 2));
538 }
539 __asm__ __volatile__(
540 "stxa %%g0, [%0] %1\n\t"
541 "membar #Sync\n\t"
542 : /* no outputs */
543 : "r" (target), "i" (ASI_INTR_W));
544 nack_busy_id++;
545 if (nack_busy_id == 32) {
546 need_more = 1;
547 break;
548 }
549 }
550 }
551
552 /* Now, poll for completion. */
553 {
554 u64 dispatch_stat, nack_mask;
555 long stuck;
556
557 stuck = 100000 * nack_busy_id;
558 nack_mask = busy_mask << 1;
559 do {
560 __asm__ __volatile__("ldxa [%%g0] %1, %0"
561 : "=r" (dispatch_stat)
562 : "i" (ASI_INTR_DISPATCH_STAT));
563 if (!(dispatch_stat & (busy_mask | nack_mask))) {
564 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
565 : : "r" (pstate));
566 if (unlikely(need_more)) {
567 int i, this_cnt = 0;
568 for (i = 0; i < cnt; i++) {
569 if (cpu_list[i] == 0xffff)
570 continue;
571 cpu_list[i] = 0xffff;
572 this_cnt++;
573 if (this_cnt == 32)
574 break;
575 }
576 goto retry;
577 }
578 return;
579 }
580 if (!--stuck)
581 break;
582 } while (dispatch_stat & busy_mask);
583
584 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
585 : : "r" (pstate));
586
587 if (dispatch_stat & busy_mask) {
588 /* Busy bits will not clear, continue instead
589 * of freezing up on this cpu.
590 */
591 printk("CPU[%d]: mondo stuckage result[%016lx]\n",
592 smp_processor_id(), dispatch_stat);
593 } else {
594 int i, this_busy_nack = 0;
595
596 /* Delay some random time with interrupts enabled
597 * to prevent deadlock.
598 */
599 udelay(2 * nack_busy_id);
600
601 /* Clear out the mask bits for cpus which did not
602 * NACK us.
603 */
604 for (i = 0; i < cnt; i++) {
605 u64 check_mask, nr;
606
607 nr = cpu_list[i];
608 if (nr == 0xffff)
609 continue;
610
611 if (is_jbus)
612 check_mask = (0x2UL << (2*nr));
613 else
614 check_mask = (0x2UL <<
615 this_busy_nack);
616 if ((dispatch_stat & check_mask) == 0)
617 cpu_list[i] = 0xffff;
618 this_busy_nack += 2;
619 if (this_busy_nack == 64)
620 break;
621 }
622
623 goto retry;
624 }
625 }
626}
627
628/* Multi-cpu list version. */
629static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
630{
631 int retries, this_cpu, prev_sent, i, saw_cpu_error;
632 unsigned long status;
633 u16 *cpu_list;
634
635 this_cpu = smp_processor_id();
636
637 cpu_list = __va(tb->cpu_list_pa);
638
639 saw_cpu_error = 0;
640 retries = 0;
641 prev_sent = 0;
642 do {
643 int forward_progress, n_sent;
644
645 status = sun4v_cpu_mondo_send(cnt,
646 tb->cpu_list_pa,
647 tb->cpu_mondo_block_pa);
648
649 /* HV_EOK means all cpus received the xcall, we're done. */
650 if (likely(status == HV_EOK))
651 break;
652
653 /* First, see if we made any forward progress.
654 *
655 * The hypervisor indicates successful sends by setting
656 * cpu list entries to the value 0xffff.
657 */
658 n_sent = 0;
659 for (i = 0; i < cnt; i++) {
660 if (likely(cpu_list[i] == 0xffff))
661 n_sent++;
662 }
663
664 forward_progress = 0;
665 if (n_sent > prev_sent)
666 forward_progress = 1;
667
668 prev_sent = n_sent;
669
670 /* If we get a HV_ECPUERROR, then one or more of the cpus
671 * in the list are in error state. Use the cpu_state()
672 * hypervisor call to find out which cpus are in error state.
673 */
674 if (unlikely(status == HV_ECPUERROR)) {
675 for (i = 0; i < cnt; i++) {
676 long err;
677 u16 cpu;
678
679 cpu = cpu_list[i];
680 if (cpu == 0xffff)
681 continue;
682
683 err = sun4v_cpu_state(cpu);
684 if (err == HV_CPU_STATE_ERROR) {
685 saw_cpu_error = (cpu + 1);
686 cpu_list[i] = 0xffff;
687 }
688 }
689 } else if (unlikely(status != HV_EWOULDBLOCK))
690 goto fatal_mondo_error;
691
692 /* Don't bother rewriting the CPU list, just leave the
693 * 0xffff and non-0xffff entries in there and the
694 * hypervisor will do the right thing.
695 *
696 * Only advance timeout state if we didn't make any
697 * forward progress.
698 */
699 if (unlikely(!forward_progress)) {
700 if (unlikely(++retries > 10000))
701 goto fatal_mondo_timeout;
702
703 /* Delay a little bit to let other cpus catch up
704 * on their cpu mondo queue work.
705 */
706 udelay(2 * cnt);
707 }
708 } while (1);
709
710 if (unlikely(saw_cpu_error))
711 goto fatal_mondo_cpu_error;
712
713 return;
714
715fatal_mondo_cpu_error:
716 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
717 "(including %d) were in error state\n",
718 this_cpu, saw_cpu_error - 1);
719 return;
720
721fatal_mondo_timeout:
722 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
723 " progress after %d retries.\n",
724 this_cpu, retries);
725 goto dump_cpu_list_and_out;
726
727fatal_mondo_error:
728 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
729 this_cpu, status);
730 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
731 "mondo_block_pa(%lx)\n",
732 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
733
734dump_cpu_list_and_out:
735 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
736 for (i = 0; i < cnt; i++)
737 printk("%u ", cpu_list[i]);
738 printk("]\n");
739}
740
741static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
742
743static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
744{
745 struct trap_per_cpu *tb;
746 int this_cpu, i, cnt;
747 unsigned long flags;
748 u16 *cpu_list;
749 u64 *mondo;
750
751 /* We have to do this whole thing with interrupts fully disabled.
752 * Otherwise if we send an xcall from interrupt context it will
753 * corrupt both our mondo block and cpu list state.
754 *
755 * One consequence of this is that we cannot use timeout mechanisms
756 * that depend upon interrupts being delivered locally. So, for
757 * example, we cannot sample jiffies and expect it to advance.
758 *
759 * Fortunately, udelay() uses %stick/%tick so we can use that.
760 */
761 local_irq_save(flags);
762
763 this_cpu = smp_processor_id();
764 tb = &trap_block[this_cpu];
765
766 mondo = __va(tb->cpu_mondo_block_pa);
767 mondo[0] = data0;
768 mondo[1] = data1;
769 mondo[2] = data2;
770 wmb();
771
772 cpu_list = __va(tb->cpu_list_pa);
773
774 /* Setup the initial cpu list. */
775 cnt = 0;
776 for_each_cpu_mask_nr(i, *mask) {
777 if (i == this_cpu || !cpu_online(i))
778 continue;
779 cpu_list[cnt++] = i;
780 }
781
782 if (cnt)
783 xcall_deliver_impl(tb, cnt);
784
785 local_irq_restore(flags);
786}
787
788/* Send cross call to all processors mentioned in MASK_P
789 * except self. Really, there are only two cases currently,
790 * "&cpu_online_map" and "&mm->cpu_vm_mask".
791 */
792static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
793{
794 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
795
796 xcall_deliver(data0, data1, data2, mask);
797}
798
799/* Send cross call to all processors except self. */
800static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
801{
802 smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
803}
804
805extern unsigned long xcall_sync_tick;
806
807static void smp_start_sync_tick_client(int cpu)
808{
809 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
810 &cpumask_of_cpu(cpu));
811}
812
813extern unsigned long xcall_call_function;
814
815void arch_send_call_function_ipi(cpumask_t mask)
816{
817 xcall_deliver((u64) &xcall_call_function, 0, 0, &mask);
818}
819
820extern unsigned long xcall_call_function_single;
821
822void arch_send_call_function_single_ipi(int cpu)
823{
824 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
825 &cpumask_of_cpu(cpu));
826}
827
828void smp_call_function_client(int irq, struct pt_regs *regs)
829{
830 clear_softint(1 << irq);
831 generic_smp_call_function_interrupt();
832}
833
834void smp_call_function_single_client(int irq, struct pt_regs *regs)
835{
836 clear_softint(1 << irq);
837 generic_smp_call_function_single_interrupt();
838}
839
840static void tsb_sync(void *info)
841{
842 struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
843 struct mm_struct *mm = info;
844
845 /* It is not valid to test "currrent->active_mm == mm" here.
846 *
847 * The value of "current" is not changed atomically with
848 * switch_mm(). But that's OK, we just need to check the
849 * current cpu's trap block PGD physical address.
850 */
851 if (tp->pgd_paddr == __pa(mm->pgd))
852 tsb_context_switch(mm);
853}
854
855void smp_tsb_sync(struct mm_struct *mm)
856{
857 smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1);
858}
859
860extern unsigned long xcall_flush_tlb_mm;
861extern unsigned long xcall_flush_tlb_pending;
862extern unsigned long xcall_flush_tlb_kernel_range;
863extern unsigned long xcall_fetch_glob_regs;
864extern unsigned long xcall_receive_signal;
865extern unsigned long xcall_new_mmu_context_version;
866#ifdef CONFIG_KGDB
867extern unsigned long xcall_kgdb_capture;
868#endif
869
870#ifdef DCACHE_ALIASING_POSSIBLE
871extern unsigned long xcall_flush_dcache_page_cheetah;
872#endif
873extern unsigned long xcall_flush_dcache_page_spitfire;
874
875#ifdef CONFIG_DEBUG_DCFLUSH
876extern atomic_t dcpage_flushes;
877extern atomic_t dcpage_flushes_xcall;
878#endif
879
880static inline void __local_flush_dcache_page(struct page *page)
881{
882#ifdef DCACHE_ALIASING_POSSIBLE
883 __flush_dcache_page(page_address(page),
884 ((tlb_type == spitfire) &&
885 page_mapping(page) != NULL));
886#else
887 if (page_mapping(page) != NULL &&
888 tlb_type == spitfire)
889 __flush_icache_page(__pa(page_address(page)));
890#endif
891}
892
893void smp_flush_dcache_page_impl(struct page *page, int cpu)
894{
895 int this_cpu;
896
897 if (tlb_type == hypervisor)
898 return;
899
900#ifdef CONFIG_DEBUG_DCFLUSH
901 atomic_inc(&dcpage_flushes);
902#endif
903
904 this_cpu = get_cpu();
905
906 if (cpu == this_cpu) {
907 __local_flush_dcache_page(page);
908 } else if (cpu_online(cpu)) {
909 void *pg_addr = page_address(page);
910 u64 data0 = 0;
911
912 if (tlb_type == spitfire) {
913 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
914 if (page_mapping(page) != NULL)
915 data0 |= ((u64)1 << 32);
916 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
917#ifdef DCACHE_ALIASING_POSSIBLE
918 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
919#endif
920 }
921 if (data0) {
922 xcall_deliver(data0, __pa(pg_addr),
923 (u64) pg_addr, &cpumask_of_cpu(cpu));
924#ifdef CONFIG_DEBUG_DCFLUSH
925 atomic_inc(&dcpage_flushes_xcall);
926#endif
927 }
928 }
929
930 put_cpu();
931}
932
933void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
934{
935 void *pg_addr;
936 int this_cpu;
937 u64 data0;
938
939 if (tlb_type == hypervisor)
940 return;
941
942 this_cpu = get_cpu();
943
944#ifdef CONFIG_DEBUG_DCFLUSH
945 atomic_inc(&dcpage_flushes);
946#endif
947 data0 = 0;
948 pg_addr = page_address(page);
949 if (tlb_type == spitfire) {
950 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
951 if (page_mapping(page) != NULL)
952 data0 |= ((u64)1 << 32);
953 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
954#ifdef DCACHE_ALIASING_POSSIBLE
955 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
956#endif
957 }
958 if (data0) {
959 xcall_deliver(data0, __pa(pg_addr),
960 (u64) pg_addr, &cpu_online_map);
961#ifdef CONFIG_DEBUG_DCFLUSH
962 atomic_inc(&dcpage_flushes_xcall);
963#endif
964 }
965 __local_flush_dcache_page(page);
966
967 put_cpu();
968}
969
970void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
971{
972 struct mm_struct *mm;
973 unsigned long flags;
974
975 clear_softint(1 << irq);
976
977 /* See if we need to allocate a new TLB context because
978 * the version of the one we are using is now out of date.
979 */
980 mm = current->active_mm;
981 if (unlikely(!mm || (mm == &init_mm)))
982 return;
983
984 spin_lock_irqsave(&mm->context.lock, flags);
985
986 if (unlikely(!CTX_VALID(mm->context)))
987 get_new_mmu_context(mm);
988
989 spin_unlock_irqrestore(&mm->context.lock, flags);
990
991 load_secondary_context(mm);
992 __flush_tlb_mm(CTX_HWBITS(mm->context),
993 SECONDARY_CONTEXT);
994}
995
996void smp_new_mmu_context_version(void)
997{
998 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
999}
1000
1001#ifdef CONFIG_KGDB
1002void kgdb_roundup_cpus(unsigned long flags)
1003{
1004 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
1005}
1006#endif
1007
1008void smp_fetch_global_regs(void)
1009{
1010 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1011}
1012
1013/* We know that the window frames of the user have been flushed
1014 * to the stack before we get here because all callers of us
1015 * are flush_tlb_*() routines, and these run after flush_cache_*()
1016 * which performs the flushw.
1017 *
1018 * The SMP TLB coherency scheme we use works as follows:
1019 *
1020 * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
1021 * space has (potentially) executed on, this is the heuristic
1022 * we use to avoid doing cross calls.
1023 *
1024 * Also, for flushing from kswapd and also for clones, we
1025 * use cpu_vm_mask as the list of cpus to make run the TLB.
1026 *
1027 * 2) TLB context numbers are shared globally across all processors
1028 * in the system, this allows us to play several games to avoid
1029 * cross calls.
1030 *
1031 * One invariant is that when a cpu switches to a process, and
1032 * that processes tsk->active_mm->cpu_vm_mask does not have the
1033 * current cpu's bit set, that tlb context is flushed locally.
1034 *
1035 * If the address space is non-shared (ie. mm->count == 1) we avoid
1036 * cross calls when we want to flush the currently running process's
1037 * tlb state. This is done by clearing all cpu bits except the current
1038 * processor's in current->active_mm->cpu_vm_mask and performing the
1039 * flush locally only. This will force any subsequent cpus which run
1040 * this task to flush the context from the local tlb if the process
1041 * migrates to another cpu (again).
1042 *
1043 * 3) For shared address spaces (threads) and swapping we bite the
1044 * bullet for most cases and perform the cross call (but only to
1045 * the cpus listed in cpu_vm_mask).
1046 *
1047 * The performance gain from "optimizing" away the cross call for threads is
1048 * questionable (in theory the big win for threads is the massive sharing of
1049 * address space state across processors).
1050 */
1051
1052/* This currently is only used by the hugetlb arch pre-fault
1053 * hook on UltraSPARC-III+ and later when changing the pagesize
1054 * bits of the context register for an address space.
1055 */
1056void smp_flush_tlb_mm(struct mm_struct *mm)
1057{
1058 u32 ctx = CTX_HWBITS(mm->context);
1059 int cpu = get_cpu();
1060
1061 if (atomic_read(&mm->mm_users) == 1) {
1062 mm->cpu_vm_mask = cpumask_of_cpu(cpu);
1063 goto local_flush_and_out;
1064 }
1065
1066 smp_cross_call_masked(&xcall_flush_tlb_mm,
1067 ctx, 0, 0,
1068 &mm->cpu_vm_mask);
1069
1070local_flush_and_out:
1071 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1072
1073 put_cpu();
1074}
1075
1076void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
1077{
1078 u32 ctx = CTX_HWBITS(mm->context);
1079 int cpu = get_cpu();
1080
1081 if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1)
1082 mm->cpu_vm_mask = cpumask_of_cpu(cpu);
1083 else
1084 smp_cross_call_masked(&xcall_flush_tlb_pending,
1085 ctx, nr, (unsigned long) vaddrs,
1086 &mm->cpu_vm_mask);
1087
1088 __flush_tlb_pending(ctx, nr, vaddrs);
1089
1090 put_cpu();
1091}
1092
1093void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1094{
1095 start &= PAGE_MASK;
1096 end = PAGE_ALIGN(end);
1097 if (start != end) {
1098 smp_cross_call(&xcall_flush_tlb_kernel_range,
1099 0, start, end);
1100
1101 __flush_tlb_kernel_range(start, end);
1102 }
1103}
1104
1105/* CPU capture. */
1106/* #define CAPTURE_DEBUG */
1107extern unsigned long xcall_capture;
1108
1109static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1110static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1111static unsigned long penguins_are_doing_time;
1112
1113void smp_capture(void)
1114{
1115 int result = atomic_add_ret(1, &smp_capture_depth);
1116
1117 if (result == 1) {
1118 int ncpus = num_online_cpus();
1119
1120#ifdef CAPTURE_DEBUG
1121 printk("CPU[%d]: Sending penguins to jail...",
1122 smp_processor_id());
1123#endif
1124 penguins_are_doing_time = 1;
1125 atomic_inc(&smp_capture_registry);
1126 smp_cross_call(&xcall_capture, 0, 0, 0);
1127 while (atomic_read(&smp_capture_registry) != ncpus)
1128 rmb();
1129#ifdef CAPTURE_DEBUG
1130 printk("done\n");
1131#endif
1132 }
1133}
1134
1135void smp_release(void)
1136{
1137 if (atomic_dec_and_test(&smp_capture_depth)) {
1138#ifdef CAPTURE_DEBUG
1139 printk("CPU[%d]: Giving pardon to "
1140 "imprisoned penguins\n",
1141 smp_processor_id());
1142#endif
1143 penguins_are_doing_time = 0;
1144 membar_safe("#StoreLoad");
1145 atomic_dec(&smp_capture_registry);
1146 }
1147}
1148
1149/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
1150 * set, so they can service tlb flush xcalls...
1151 */
1152extern void prom_world(int);
1153
1154void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1155{
1156 clear_softint(1 << irq);
1157
1158 preempt_disable();
1159
1160 __asm__ __volatile__("flushw");
1161 prom_world(1);
1162 atomic_inc(&smp_capture_registry);
1163 membar_safe("#StoreLoad");
1164 while (penguins_are_doing_time)
1165 rmb();
1166 atomic_dec(&smp_capture_registry);
1167 prom_world(0);
1168
1169 preempt_enable();
1170}
1171
1172/* /proc/profile writes can call this, don't __init it please. */
1173int setup_profiling_timer(unsigned int multiplier)
1174{
1175 return -EINVAL;
1176}
1177
1178void __init smp_prepare_cpus(unsigned int max_cpus)
1179{
1180}
1181
1182void __devinit smp_prepare_boot_cpu(void)
1183{
1184}
1185
1186void __init smp_setup_processor_id(void)
1187{
1188 if (tlb_type == spitfire)
1189 xcall_deliver_impl = spitfire_xcall_deliver;
1190 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1191 xcall_deliver_impl = cheetah_xcall_deliver;
1192 else
1193 xcall_deliver_impl = hypervisor_xcall_deliver;
1194}
1195
1196void __devinit smp_fill_in_sib_core_maps(void)
1197{
1198 unsigned int i;
1199
1200 for_each_present_cpu(i) {
1201 unsigned int j;
1202
1203 cpus_clear(cpu_core_map[i]);
1204 if (cpu_data(i).core_id == 0) {
1205 cpu_set(i, cpu_core_map[i]);
1206 continue;
1207 }
1208
1209 for_each_present_cpu(j) {
1210 if (cpu_data(i).core_id ==
1211 cpu_data(j).core_id)
1212 cpu_set(j, cpu_core_map[i]);
1213 }
1214 }
1215
1216 for_each_present_cpu(i) {
1217 unsigned int j;
1218
1219 cpus_clear(per_cpu(cpu_sibling_map, i));
1220 if (cpu_data(i).proc_id == -1) {
1221 cpu_set(i, per_cpu(cpu_sibling_map, i));
1222 continue;
1223 }
1224
1225 for_each_present_cpu(j) {
1226 if (cpu_data(i).proc_id ==
1227 cpu_data(j).proc_id)
1228 cpu_set(j, per_cpu(cpu_sibling_map, i));
1229 }
1230 }
1231}
1232
1233int __cpuinit __cpu_up(unsigned int cpu)
1234{
1235 int ret = smp_boot_one_cpu(cpu);
1236
1237 if (!ret) {
1238 cpu_set(cpu, smp_commenced_mask);
1239 while (!cpu_isset(cpu, cpu_online_map))
1240 mb();
1241 if (!cpu_isset(cpu, cpu_online_map)) {
1242 ret = -ENODEV;
1243 } else {
1244 /* On SUN4V, writes to %tick and %stick are
1245 * not allowed.
1246 */
1247 if (tlb_type != hypervisor)
1248 smp_synchronize_one_tick(cpu);
1249 }
1250 }
1251 return ret;
1252}
1253
1254#ifdef CONFIG_HOTPLUG_CPU
1255void cpu_play_dead(void)
1256{
1257 int cpu = smp_processor_id();
1258 unsigned long pstate;
1259
1260 idle_task_exit();
1261
1262 if (tlb_type == hypervisor) {
1263 struct trap_per_cpu *tb = &trap_block[cpu];
1264
1265 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1266 tb->cpu_mondo_pa, 0);
1267 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1268 tb->dev_mondo_pa, 0);
1269 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1270 tb->resum_mondo_pa, 0);
1271 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1272 tb->nonresum_mondo_pa, 0);
1273 }
1274
1275 cpu_clear(cpu, smp_commenced_mask);
1276 membar_safe("#Sync");
1277
1278 local_irq_disable();
1279
1280 __asm__ __volatile__(
1281 "rdpr %%pstate, %0\n\t"
1282 "wrpr %0, %1, %%pstate"
1283 : "=r" (pstate)
1284 : "i" (PSTATE_IE));
1285
1286 while (1)
1287 barrier();
1288}
1289
1290int __cpu_disable(void)
1291{
1292 int cpu = smp_processor_id();
1293 cpuinfo_sparc *c;
1294 int i;
1295
1296 for_each_cpu_mask(i, cpu_core_map[cpu])
1297 cpu_clear(cpu, cpu_core_map[i]);
1298 cpus_clear(cpu_core_map[cpu]);
1299
1300 for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
1301 cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
1302 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1303
1304 c = &cpu_data(cpu);
1305
1306 c->core_id = 0;
1307 c->proc_id = -1;
1308
1309 smp_wmb();
1310
1311 /* Make sure no interrupts point to this cpu. */
1312 fixup_irqs();
1313
1314 local_irq_enable();
1315 mdelay(1);
1316 local_irq_disable();
1317
1318 ipi_call_lock();
1319 cpu_clear(cpu, cpu_online_map);
1320 ipi_call_unlock();
1321
1322 return 0;
1323}
1324
1325void __cpu_die(unsigned int cpu)
1326{
1327 int i;
1328
1329 for (i = 0; i < 100; i++) {
1330 smp_rmb();
1331 if (!cpu_isset(cpu, smp_commenced_mask))
1332 break;
1333 msleep(100);
1334 }
1335 if (cpu_isset(cpu, smp_commenced_mask)) {
1336 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1337 } else {
1338#if defined(CONFIG_SUN_LDOMS)
1339 unsigned long hv_err;
1340 int limit = 100;
1341
1342 do {
1343 hv_err = sun4v_cpu_stop(cpu);
1344 if (hv_err == HV_EOK) {
1345 cpu_clear(cpu, cpu_present_map);
1346 break;
1347 }
1348 } while (--limit > 0);
1349 if (limit <= 0) {
1350 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1351 hv_err);
1352 }
1353#endif
1354 }
1355}
1356#endif
1357
1358void __init smp_cpus_done(unsigned int max_cpus)
1359{
1360}
1361
1362void smp_send_reschedule(int cpu)
1363{
1364 xcall_deliver((u64) &xcall_receive_signal, 0, 0,
1365 &cpumask_of_cpu(cpu));
1366}
1367
1368void smp_receive_signal_client(int irq, struct pt_regs *regs)
1369{
1370 clear_softint(1 << irq);
1371}
1372
1373/* This is a nop because we capture all other cpus
1374 * anyways when making the PROM active.
1375 */
1376void smp_send_stop(void)
1377{
1378}
1379
1380unsigned long __per_cpu_base __read_mostly;
1381unsigned long __per_cpu_shift __read_mostly;
1382
1383EXPORT_SYMBOL(__per_cpu_base);
1384EXPORT_SYMBOL(__per_cpu_shift);
1385
1386void __init real_setup_per_cpu_areas(void)
1387{
1388 unsigned long paddr, goal, size, i;
1389 char *ptr;
1390
1391 /* Copy section for each CPU (we discard the original) */
1392 goal = PERCPU_ENOUGH_ROOM;
1393
1394 __per_cpu_shift = PAGE_SHIFT;
1395 for (size = PAGE_SIZE; size < goal; size <<= 1UL)
1396 __per_cpu_shift++;
1397
1398 paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
1399 if (!paddr) {
1400 prom_printf("Cannot allocate per-cpu memory.\n");
1401 prom_halt();
1402 }
1403
1404 ptr = __va(paddr);
1405 __per_cpu_base = ptr - __per_cpu_start;
1406
1407 for (i = 0; i < NR_CPUS; i++, ptr += size)
1408 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
1409
1410 /* Setup %g5 for the boot cpu. */
1411 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1412}