aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-04-01 04:47:10 -0400
committerDavid S. Miller <davem@davemloft.net>2009-06-16 07:56:11 -0400
commit5a5488d3bb9a23d9884572e5d85dfeefe8749d3d (patch)
treeafa8db75cdf771257cd5541ed80a606df60f9cf8
parent19f0fa3fb3499d8c5fb861933959f546d05fc202 (diff)
sparc64: Store per-cpu offset in trap_block[]
Surprisingly this actually makes LOAD_PER_CPU_BASE() a little more efficient. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc/include/asm/percpu_64.h6
-rw-r--r--arch/sparc/include/asm/trap_block.h14
-rw-r--r--arch/sparc/kernel/head_64.S22
-rw-r--r--arch/sparc/kernel/smp_64.c18
-rw-r--r--arch/sparc/kernel/traps_64.c5
5 files changed, 21 insertions, 44 deletions
diff --git a/arch/sparc/include/asm/percpu_64.h b/arch/sparc/include/asm/percpu_64.h
index bee64593023e..c0ab102d11f6 100644
--- a/arch/sparc/include/asm/percpu_64.h
+++ b/arch/sparc/include/asm/percpu_64.h
@@ -7,12 +7,12 @@ register unsigned long __local_per_cpu_offset asm("g5");
7 7
8#ifdef CONFIG_SMP 8#ifdef CONFIG_SMP
9 9
10#include <asm/trap_block.h>
11
10extern void real_setup_per_cpu_areas(void); 12extern void real_setup_per_cpu_areas(void);
11 13
12extern unsigned long __per_cpu_base;
13extern unsigned long __per_cpu_shift;
14#define __per_cpu_offset(__cpu) \ 14#define __per_cpu_offset(__cpu) \
15 (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) 15 (trap_block[(__cpu)].__per_cpu_base)
16#define per_cpu_offset(x) (__per_cpu_offset(x)) 16#define per_cpu_offset(x) (__per_cpu_offset(x))
17 17
18#define __my_cpu_offset __local_per_cpu_offset 18#define __my_cpu_offset __local_per_cpu_offset
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index 68fd9ee3e8ae..7e26b2db6211 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -48,7 +48,7 @@ struct trap_per_cpu {
48 unsigned int dev_mondo_qmask; 48 unsigned int dev_mondo_qmask;
49 unsigned int resum_qmask; 49 unsigned int resum_qmask;
50 unsigned int nonresum_qmask; 50 unsigned int nonresum_qmask;
51 unsigned long __unused; 51 unsigned long __per_cpu_base;
52} __attribute__((aligned(64))); 52} __attribute__((aligned(64)));
53extern struct trap_per_cpu trap_block[NR_CPUS]; 53extern struct trap_per_cpu trap_block[NR_CPUS];
54extern void init_cur_cpu_trap(struct thread_info *); 54extern void init_cur_cpu_trap(struct thread_info *);
@@ -101,6 +101,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
101#define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec 101#define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec
102#define TRAP_PER_CPU_RESUM_QMASK 0xf0 102#define TRAP_PER_CPU_RESUM_QMASK 0xf0
103#define TRAP_PER_CPU_NONRESUM_QMASK 0xf4 103#define TRAP_PER_CPU_NONRESUM_QMASK 0xf4
104#define TRAP_PER_CPU_PER_CPU_BASE 0xf8
104 105
105#define TRAP_BLOCK_SZ_SHIFT 8 106#define TRAP_BLOCK_SZ_SHIFT 8
106 107
@@ -172,12 +173,11 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
172 */ 173 */
173#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ 174#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
174 lduh [THR + TI_CPU], REG1; \ 175 lduh [THR + TI_CPU], REG1; \
175 sethi %hi(__per_cpu_shift), REG3; \ 176 sethi %hi(trap_block), REG2; \
176 sethi %hi(__per_cpu_base), REG2; \ 177 sllx REG1, TRAP_BLOCK_SZ_SHIFT, REG1; \
177 ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ 178 or REG2, %lo(trap_block), REG2; \
178 ldx [REG2 + %lo(__per_cpu_base)], REG2; \ 179 add REG2, REG1, REG2; \
179 sllx REG1, REG3, REG3; \ 180 ldx [REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST;
180 add REG3, REG2, DEST;
181 181
182#else 182#else
183 183
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 91bf4c7f79b9..f8f21050448b 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -641,28 +641,6 @@ tlb_fixup_done:
641 /* Not reached... */ 641 /* Not reached... */
642 642
6431: 6431:
644 /* If we boot on a non-zero cpu, all of the per-cpu
645 * variable references we make before setting up the
646 * per-cpu areas will use a bogus offset. Put a
647 * compensating factor into __per_cpu_base to handle
648 * this cleanly.
649 *
650 * What the per-cpu code calculates is:
651 *
652 * __per_cpu_base + (cpu << __per_cpu_shift)
653 *
654 * These two variables are zero initially, so to
655 * make it all cancel out to zero we need to put
656 * "0 - (cpu << 0)" into __per_cpu_base so that the
657 * above formula evaluates to zero.
658 *
659 * We cannot even perform a printk() until this stuff
660 * is setup as that calls cpu_clock() which uses
661 * per-cpu variables.
662 */
663 sub %g0, %o0, %o1
664 sethi %hi(__per_cpu_base), %o2
665 stx %o1, [%o2 + %lo(__per_cpu_base)]
666#else 644#else
667 mov 0, %o0 645 mov 0, %o0
668#endif 646#endif
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 4226d0ebaea5..b20f253857b7 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1371,23 +1371,17 @@ void smp_send_stop(void)
1371{ 1371{
1372} 1372}
1373 1373
1374unsigned long __per_cpu_base __read_mostly;
1375unsigned long __per_cpu_shift __read_mostly;
1376
1377EXPORT_SYMBOL(__per_cpu_base);
1378EXPORT_SYMBOL(__per_cpu_shift);
1379
1380void __init real_setup_per_cpu_areas(void) 1374void __init real_setup_per_cpu_areas(void)
1381{ 1375{
1382 unsigned long paddr, goal, size, i; 1376 unsigned long base, shift, paddr, goal, size, i;
1383 char *ptr; 1377 char *ptr;
1384 1378
1385 /* Copy section for each CPU (we discard the original) */ 1379 /* Copy section for each CPU (we discard the original) */
1386 goal = PERCPU_ENOUGH_ROOM; 1380 goal = PERCPU_ENOUGH_ROOM;
1387 1381
1388 __per_cpu_shift = PAGE_SHIFT; 1382 shift = PAGE_SHIFT;
1389 for (size = PAGE_SIZE; size < goal; size <<= 1UL) 1383 for (size = PAGE_SIZE; size < goal; size <<= 1UL)
1390 __per_cpu_shift++; 1384 shift++;
1391 1385
1392 paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); 1386 paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
1393 if (!paddr) { 1387 if (!paddr) {
@@ -1396,10 +1390,12 @@ void __init real_setup_per_cpu_areas(void)
1396 } 1390 }
1397 1391
1398 ptr = __va(paddr); 1392 ptr = __va(paddr);
1399 __per_cpu_base = ptr - __per_cpu_start; 1393 base = ptr - __per_cpu_start;
1400 1394
1401 for (i = 0; i < NR_CPUS; i++, ptr += size) 1395 for (i = 0; i < NR_CPUS; i++, ptr += size) {
1396 __per_cpu_offset(i) = base + (i * size);
1402 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 1397 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
1398 }
1403 1399
1404 /* Setup %g5 for the boot cpu. */ 1400 /* Setup %g5 for the boot cpu. */
1405 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); 1401 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index d809c4ebb48f..d073aabf65ed 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs)
2509} 2509}
2510 2510
2511struct trap_per_cpu trap_block[NR_CPUS]; 2511struct trap_per_cpu trap_block[NR_CPUS];
2512EXPORT_SYMBOL(trap_block);
2512 2513
2513/* This can get invoked before sched_init() so play it super safe 2514/* This can get invoked before sched_init() so play it super safe
2514 * and use hard_smp_processor_id(). 2515 * and use hard_smp_processor_id().
@@ -2592,7 +2593,9 @@ void __init trap_init(void)
2592 (TRAP_PER_CPU_RESUM_QMASK != 2593 (TRAP_PER_CPU_RESUM_QMASK !=
2593 offsetof(struct trap_per_cpu, resum_qmask)) || 2594 offsetof(struct trap_per_cpu, resum_qmask)) ||
2594 (TRAP_PER_CPU_NONRESUM_QMASK != 2595 (TRAP_PER_CPU_NONRESUM_QMASK !=
2595 offsetof(struct trap_per_cpu, nonresum_qmask))) 2596 offsetof(struct trap_per_cpu, nonresum_qmask)) ||
2597 (TRAP_PER_CPU_PER_CPU_BASE !=
2598 offsetof(struct trap_per_cpu, __per_cpu_base)))
2596 trap_per_cpu_offsets_are_bolixed_dave(); 2599 trap_per_cpu_offsets_are_bolixed_dave();
2597 2600
2598 if ((TSB_CONFIG_TSB != 2601 if ((TSB_CONFIG_TSB !=