aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/setup_64.c
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2010-05-31 14:45:11 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-07-08 21:28:30 -0400
commitae01f84b93b274e2f215bdf6d0b46435679b5f9a (patch)
tree59457aa08f2a2ac53f6d00653a267964568cf427 /arch/powerpc/kernel/setup_64.c
parent51c7fdba40e741dfe18455b5e4240b70c422bf2e (diff)
powerpc: Optimise per cpu accesses on 64bit
Now we dynamically allocate the paca array, it takes an extra load whenever we want to access another cpu's paca. One place we do that a lot is per cpu variables. A simple example: DEFINE_PER_CPU(unsigned long, vara); unsigned long test4(int cpu) { return per_cpu(vara, cpu); } This takes 4 loads, 5 if you include the actual load of the per cpu variable: ld r11,-32760(r30) # load address of paca pointer ld r9,-32768(r30) # load link address of percpu variable sldi r3,r29,9 # get offset into paca (each entry is 512 bytes) ld r0,0(r11) # load paca pointer add r3,r0,r3 # paca + offset ld r11,64(r3) # load paca[cpu].data_offset ldx r3,r9,r11 # load per cpu variable If we remove the ppc64 specific per_cpu_offset(), we get the generic one which indexes into a statically allocated array. This removes one load and one add: ld r11,-32760(r30) # load address of __per_cpu_offset ld r9,-32768(r30) # load link address of percpu variable sldi r3,r29,3 # get offset into __per_cpu_offset (each entry 8 bytes) ldx r11,r11,r3 # load __per_cpu_offset[cpu] ldx r3,r9,r11 # load per cpu variable Having all the offsets in one array also helps when iterating over a per cpu variable across a number of cpus, such as in the scheduler. Before we would need to load one paca cacheline when calculating each per cpu offset. Now we have 16 (128 / sizeof(long)) per cpu offsets in each cacheline. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel/setup_64.c')
-rw-r--r--arch/powerpc/kernel/setup_64.c9
1 files changed, 7 insertions, 2 deletions
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 643dcac40fcb..c352f322dbdd 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -600,6 +600,9 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
600 return REMOTE_DISTANCE; 600 return REMOTE_DISTANCE;
601} 601}
602 602
603unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
604EXPORT_SYMBOL(__per_cpu_offset);
605
603void __init setup_per_cpu_areas(void) 606void __init setup_per_cpu_areas(void)
604{ 607{
605 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 608 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
@@ -624,8 +627,10 @@ void __init setup_per_cpu_areas(void)
624 panic("cannot initialize percpu area (err=%d)", rc); 627 panic("cannot initialize percpu area (err=%d)", rc);
625 628
626 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 629 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
627 for_each_possible_cpu(cpu) 630 for_each_possible_cpu(cpu) {
628 paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; 631 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
632 paca[cpu].data_offset = __per_cpu_offset[cpu];
633 }
629} 634}
630#endif 635#endif
631 636