aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Herring <rob.herring@calxeda.com>2012-11-29 14:39:54 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2012-12-03 06:16:36 -0500
commit14318efb322e2fe1a034c69463d725209eb9d548 (patch)
treecf99b8a06eca7abdb89ac87c7f35bebb6b3254f7
parent3e99675af1b25a191c467700499b1cbe5585a778 (diff)
ARM: 7587/1: implement optimized percpu variable access
Use the previously unused TPIDRPRW register to store percpu offsets. TPIDRPRW is only accessible in PL1, so it can only be used in the kernel. This replaces 2 loads with a mrc instruction for each percpu variable access. With hackbench, the performance improvement is 1.4% on Cortex-A9 (highbank). Taking an average of 30 runs of "hackbench -l 1000" yields: Before: 6.2191 After: 6.1348 Will Deacon reported similar delta on v6 with 11MPCore. The asm "memory clobber" are needed here to ensure the percpu offset gets reloaded. Testing by Will found that this would not happen in __schedule() which is a bit of a special case as preemption is disabled but the execution can move cores. Signed-off-by: Rob Herring <rob.herring@calxeda.com> Acked-by: Will Deacon <will.deacon@arm.com> Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/percpu.h45
-rw-r--r--arch/arm/kernel/setup.c6
-rw-r--r--arch/arm/kernel/smp.c4
4 files changed, 54 insertions, 2 deletions
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f70ae175a3d6..2ffdaacd461c 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += local64.h
16generic-y += msgbuf.h 16generic-y += msgbuf.h
17generic-y += param.h 17generic-y += param.h
18generic-y += parport.h 18generic-y += parport.h
19generic-y += percpu.h
20generic-y += poll.h 19generic-y += poll.h
21generic-y += resource.h 20generic-y += resource.h
22generic-y += sections.h 21generic-y += sections.h
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
new file mode 100644
index 000000000000..968c0a14e0a3
--- /dev/null
+++ b/arch/arm/include/asm/percpu.h
@@ -0,0 +1,45 @@
1/*
2 * Copyright 2012 Calxeda, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#ifndef _ASM_ARM_PERCPU_H_
17#define _ASM_ARM_PERCPU_H_
18
19/*
20 * Same as asm-generic/percpu.h, except that we store the per cpu offset
21 * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
22 */
23#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
24static inline void set_my_cpu_offset(unsigned long off)
25{
26 /* Set TPIDRPRW */
27 asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
28}
29
30static inline unsigned long __my_cpu_offset(void)
31{
32 unsigned long off;
33 /* Read TPIDRPRW */
34 asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : : "memory");
35 return off;
36}
37#define __my_cpu_offset __my_cpu_offset()
38#else
39#define set_my_cpu_offset(x) do {} while(0)
40
41#endif /* CONFIG_SMP */
42
43#include <asm-generic/percpu.h>
44
45#endif /* _ASM_ARM_PERCPU_H_ */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f739fb1d217a..9a89bf4aefe1 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -383,6 +383,12 @@ void cpu_init(void)
383 BUG(); 383 BUG();
384 } 384 }
385 385
386 /*
387 * This only works on resume and secondary cores. For booting on the
388 * boot cpu, smp_prepare_boot_cpu is called after percpu area setup.
389 */
390 set_my_cpu_offset(per_cpu_offset(cpu));
391
386 cpu_proc_init(); 392 cpu_proc_init();
387 393
388 /* 394 /*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 7eacd84cdc9c..f3a2be5837aa 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -314,9 +314,10 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
314 current->active_mm = mm; 314 current->active_mm = mm;
315 cpumask_set_cpu(cpu, mm_cpumask(mm)); 315 cpumask_set_cpu(cpu, mm_cpumask(mm));
316 316
317 cpu_init();
318
317 printk("CPU%u: Booted secondary processor\n", cpu); 319 printk("CPU%u: Booted secondary processor\n", cpu);
318 320
319 cpu_init();
320 preempt_disable(); 321 preempt_disable();
321 trace_hardirqs_off(); 322 trace_hardirqs_off();
322 323
@@ -372,6 +373,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
372 373
373void __init smp_prepare_boot_cpu(void) 374void __init smp_prepare_boot_cpu(void)
374{ 375{
376 set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
375} 377}
376 378
377void __init smp_prepare_cpus(unsigned int max_cpus) 379void __init smp_prepare_cpus(unsigned int max_cpus)