aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm/percpu.h
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-15 08:15:53 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-16 08:20:31 -0500
commit6dbde3530850d4d8bfc1b6bd4006d92786a2787f (patch)
tree08c6dd55e860827311b889e2ecfe3de9f51421a0 /arch/x86/include/asm/percpu.h
parent004aa322f855a765741d9437a98dd8fe2e4f32a6 (diff)
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/include/asm/percpu.h')
-rw-r--r--arch/x86/include/asm/percpu.h24
1 files changed, 13 insertions, 11 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 328b31a429d7..03aa4b00a1c3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -40,16 +40,11 @@
40 40
41#ifdef CONFIG_SMP 41#ifdef CONFIG_SMP
42#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" 42#define __percpu_seg_str "%%"__stringify(__percpu_seg)":"
43#define __my_cpu_offset x86_read_percpu(this_cpu_off) 43#define __my_cpu_offset percpu_read(this_cpu_off)
44#else 44#else
45#define __percpu_seg_str 45#define __percpu_seg_str
46#endif 46#endif
47 47
48#include <asm-generic/percpu.h>
49
50/* We can use this directly for local CPU (faster). */
51DECLARE_PER_CPU(unsigned long, this_cpu_off);
52
53/* For arch-specific code, we can use direct single-insn ops (they 48/* For arch-specific code, we can use direct single-insn ops (they
54 * don't give an lvalue though). */ 49 * don't give an lvalue though). */
55extern void __bad_percpu_size(void); 50extern void __bad_percpu_size(void);
@@ -115,11 +110,13 @@ do { \
115 ret__; \ 110 ret__; \
116}) 111})
117 112
118#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) 113#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
119#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) 114#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
120#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) 115#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
121#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) 116#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
122#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 117#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
118#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
119#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
123 120
124/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 121/* This is not atomic against other CPUs -- CPU preemption needs to be off */
125#define x86_test_and_clear_bit_percpu(bit, var) \ 122#define x86_test_and_clear_bit_percpu(bit, var) \
@@ -131,6 +128,11 @@ do { \
131 old__; \ 128 old__; \
132}) 129})
133 130
131#include <asm-generic/percpu.h>
132
133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135
134#ifdef CONFIG_X86_64 136#ifdef CONFIG_X86_64
135extern void load_pda_offset(int cpu); 137extern void load_pda_offset(int cpu);
136#else 138#else