diff options
author | David S. Miller <davem@davemloft.net> | 2009-04-01 04:47:10 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-06-16 07:56:11 -0400 |
commit | 5a5488d3bb9a23d9884572e5d85dfeefe8749d3d (patch) | |
tree | afa8db75cdf771257cd5541ed80a606df60f9cf8 | |
parent | 19f0fa3fb3499d8c5fb861933959f546d05fc202 (diff) |
sparc64: Store per-cpu offset in trap_block[]
Surprisingly this actually makes LOAD_PER_CPU_BASE() a little
more efficient.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc/include/asm/percpu_64.h | 6 | ||||
-rw-r--r-- | arch/sparc/include/asm/trap_block.h | 14 | ||||
-rw-r--r-- | arch/sparc/kernel/head_64.S | 22 | ||||
-rw-r--r-- | arch/sparc/kernel/smp_64.c | 18 | ||||
-rw-r--r-- | arch/sparc/kernel/traps_64.c | 5 |
5 files changed, 21 insertions, 44 deletions
diff --git a/arch/sparc/include/asm/percpu_64.h b/arch/sparc/include/asm/percpu_64.h index bee64593023e..c0ab102d11f6 100644 --- a/arch/sparc/include/asm/percpu_64.h +++ b/arch/sparc/include/asm/percpu_64.h | |||
@@ -7,12 +7,12 @@ register unsigned long __local_per_cpu_offset asm("g5"); | |||
7 | 7 | ||
8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
9 | 9 | ||
10 | #include <asm/trap_block.h> | ||
11 | |||
10 | extern void real_setup_per_cpu_areas(void); | 12 | extern void real_setup_per_cpu_areas(void); |
11 | 13 | ||
12 | extern unsigned long __per_cpu_base; | ||
13 | extern unsigned long __per_cpu_shift; | ||
14 | #define __per_cpu_offset(__cpu) \ | 14 | #define __per_cpu_offset(__cpu) \ |
15 | (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) | 15 | (trap_block[(__cpu)].__per_cpu_base) |
16 | #define per_cpu_offset(x) (__per_cpu_offset(x)) | 16 | #define per_cpu_offset(x) (__per_cpu_offset(x)) |
17 | 17 | ||
18 | #define __my_cpu_offset __local_per_cpu_offset | 18 | #define __my_cpu_offset __local_per_cpu_offset |
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index 68fd9ee3e8ae..7e26b2db6211 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h | |||
@@ -48,7 +48,7 @@ struct trap_per_cpu { | |||
48 | unsigned int dev_mondo_qmask; | 48 | unsigned int dev_mondo_qmask; |
49 | unsigned int resum_qmask; | 49 | unsigned int resum_qmask; |
50 | unsigned int nonresum_qmask; | 50 | unsigned int nonresum_qmask; |
51 | unsigned long __unused; | 51 | unsigned long __per_cpu_base; |
52 | } __attribute__((aligned(64))); | 52 | } __attribute__((aligned(64))); |
53 | extern struct trap_per_cpu trap_block[NR_CPUS]; | 53 | extern struct trap_per_cpu trap_block[NR_CPUS]; |
54 | extern void init_cur_cpu_trap(struct thread_info *); | 54 | extern void init_cur_cpu_trap(struct thread_info *); |
@@ -101,6 +101,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, | |||
101 | #define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec | 101 | #define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec |
102 | #define TRAP_PER_CPU_RESUM_QMASK 0xf0 | 102 | #define TRAP_PER_CPU_RESUM_QMASK 0xf0 |
103 | #define TRAP_PER_CPU_NONRESUM_QMASK 0xf4 | 103 | #define TRAP_PER_CPU_NONRESUM_QMASK 0xf4 |
104 | #define TRAP_PER_CPU_PER_CPU_BASE 0xf8 | ||
104 | 105 | ||
105 | #define TRAP_BLOCK_SZ_SHIFT 8 | 106 | #define TRAP_BLOCK_SZ_SHIFT 8 |
106 | 107 | ||
@@ -172,12 +173,11 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, | |||
172 | */ | 173 | */ |
173 | #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ | 174 | #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ |
174 | lduh [THR + TI_CPU], REG1; \ | 175 | lduh [THR + TI_CPU], REG1; \ |
175 | sethi %hi(__per_cpu_shift), REG3; \ | 176 | sethi %hi(trap_block), REG2; \ |
176 | sethi %hi(__per_cpu_base), REG2; \ | 177 | sllx REG1, TRAP_BLOCK_SZ_SHIFT, REG1; \ |
177 | ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ | 178 | or REG2, %lo(trap_block), REG2; \ |
178 | ldx [REG2 + %lo(__per_cpu_base)], REG2; \ | 179 | add REG2, REG1, REG2; \ |
179 | sllx REG1, REG3, REG3; \ | 180 | ldx [REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST; |
180 | add REG3, REG2, DEST; | ||
181 | 181 | ||
182 | #else | 182 | #else |
183 | 183 | ||
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 91bf4c7f79b9..f8f21050448b 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S | |||
@@ -641,28 +641,6 @@ tlb_fixup_done: | |||
641 | /* Not reached... */ | 641 | /* Not reached... */ |
642 | 642 | ||
643 | 1: | 643 | 1: |
644 | /* If we boot on a non-zero cpu, all of the per-cpu | ||
645 | * variable references we make before setting up the | ||
646 | * per-cpu areas will use a bogus offset. Put a | ||
647 | * compensating factor into __per_cpu_base to handle | ||
648 | * this cleanly. | ||
649 | * | ||
650 | * What the per-cpu code calculates is: | ||
651 | * | ||
652 | * __per_cpu_base + (cpu << __per_cpu_shift) | ||
653 | * | ||
654 | * These two variables are zero initially, so to | ||
655 | * make it all cancel out to zero we need to put | ||
656 | * "0 - (cpu << 0)" into __per_cpu_base so that the | ||
657 | * above formula evaluates to zero. | ||
658 | * | ||
659 | * We cannot even perform a printk() until this stuff | ||
660 | * is setup as that calls cpu_clock() which uses | ||
661 | * per-cpu variables. | ||
662 | */ | ||
663 | sub %g0, %o0, %o1 | ||
664 | sethi %hi(__per_cpu_base), %o2 | ||
665 | stx %o1, [%o2 + %lo(__per_cpu_base)] | ||
666 | #else | 644 | #else |
667 | mov 0, %o0 | 645 | mov 0, %o0 |
668 | #endif | 646 | #endif |
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 4226d0ebaea5..b20f253857b7 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
@@ -1371,23 +1371,17 @@ void smp_send_stop(void) | |||
1371 | { | 1371 | { |
1372 | } | 1372 | } |
1373 | 1373 | ||
1374 | unsigned long __per_cpu_base __read_mostly; | ||
1375 | unsigned long __per_cpu_shift __read_mostly; | ||
1376 | |||
1377 | EXPORT_SYMBOL(__per_cpu_base); | ||
1378 | EXPORT_SYMBOL(__per_cpu_shift); | ||
1379 | |||
1380 | void __init real_setup_per_cpu_areas(void) | 1374 | void __init real_setup_per_cpu_areas(void) |
1381 | { | 1375 | { |
1382 | unsigned long paddr, goal, size, i; | 1376 | unsigned long base, shift, paddr, goal, size, i; |
1383 | char *ptr; | 1377 | char *ptr; |
1384 | 1378 | ||
1385 | /* Copy section for each CPU (we discard the original) */ | 1379 | /* Copy section for each CPU (we discard the original) */ |
1386 | goal = PERCPU_ENOUGH_ROOM; | 1380 | goal = PERCPU_ENOUGH_ROOM; |
1387 | 1381 | ||
1388 | __per_cpu_shift = PAGE_SHIFT; | 1382 | shift = PAGE_SHIFT; |
1389 | for (size = PAGE_SIZE; size < goal; size <<= 1UL) | 1383 | for (size = PAGE_SIZE; size < goal; size <<= 1UL) |
1390 | __per_cpu_shift++; | 1384 | shift++; |
1391 | 1385 | ||
1392 | paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); | 1386 | paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); |
1393 | if (!paddr) { | 1387 | if (!paddr) { |
@@ -1396,10 +1390,12 @@ void __init real_setup_per_cpu_areas(void) | |||
1396 | } | 1390 | } |
1397 | 1391 | ||
1398 | ptr = __va(paddr); | 1392 | ptr = __va(paddr); |
1399 | __per_cpu_base = ptr - __per_cpu_start; | 1393 | base = ptr - __per_cpu_start; |
1400 | 1394 | ||
1401 | for (i = 0; i < NR_CPUS; i++, ptr += size) | 1395 | for (i = 0; i < NR_CPUS; i++, ptr += size) { |
1396 | __per_cpu_offset(i) = base + (i * size); | ||
1402 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 1397 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
1398 | } | ||
1403 | 1399 | ||
1404 | /* Setup %g5 for the boot cpu. */ | 1400 | /* Setup %g5 for the boot cpu. */ |
1405 | __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); | 1401 | __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); |
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d809c4ebb48f..d073aabf65ed 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c | |||
@@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs) | |||
2509 | } | 2509 | } |
2510 | 2510 | ||
2511 | struct trap_per_cpu trap_block[NR_CPUS]; | 2511 | struct trap_per_cpu trap_block[NR_CPUS]; |
2512 | EXPORT_SYMBOL(trap_block); | ||
2512 | 2513 | ||
2513 | /* This can get invoked before sched_init() so play it super safe | 2514 | /* This can get invoked before sched_init() so play it super safe |
2514 | * and use hard_smp_processor_id(). | 2515 | * and use hard_smp_processor_id(). |
@@ -2592,7 +2593,9 @@ void __init trap_init(void) | |||
2592 | (TRAP_PER_CPU_RESUM_QMASK != | 2593 | (TRAP_PER_CPU_RESUM_QMASK != |
2593 | offsetof(struct trap_per_cpu, resum_qmask)) || | 2594 | offsetof(struct trap_per_cpu, resum_qmask)) || |
2594 | (TRAP_PER_CPU_NONRESUM_QMASK != | 2595 | (TRAP_PER_CPU_NONRESUM_QMASK != |
2595 | offsetof(struct trap_per_cpu, nonresum_qmask))) | 2596 | offsetof(struct trap_per_cpu, nonresum_qmask)) || |
2597 | (TRAP_PER_CPU_PER_CPU_BASE != | ||
2598 | offsetof(struct trap_per_cpu, __per_cpu_base))) | ||
2596 | trap_per_cpu_offsets_are_bolixed_dave(); | 2599 | trap_per_cpu_offsets_are_bolixed_dave(); |
2597 | 2600 | ||
2598 | if ((TSB_CONFIG_TSB != | 2601 | if ((TSB_CONFIG_TSB != |