diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-01-15 08:15:53 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-16 08:20:31 -0500 |
commit | 6dbde3530850d4d8bfc1b6bd4006d92786a2787f (patch) | |
tree | 08c6dd55e860827311b889e2ecfe3de9f51421a0 /arch/x86/include/asm/percpu.h | |
parent | 004aa322f855a765741d9437a98dd8fe2e4f32a6 (diff) |
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new
generic percpu methods:
percpu_read()
percpu_write()
percpu_add()
percpu_sub()
percpu_and()
percpu_or()
percpu_xor()
and implements support for them on x86. (other architectures will fall
back to a default implementation)
The advantage is that for example to read a local percpu variable,
instead of this sequence:
return __get_cpu_var(var);
ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx
ffffffff8102ca32: 81
ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax
ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax
We can get a single instruction by using the optimized variants:
return percpu_read(var);
ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax
I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.
tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
* added percpu_and() for completeness's sake
* made generic percpu ops atomic against preemption
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/include/asm/percpu.h')
-rw-r--r-- | arch/x86/include/asm/percpu.h | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 328b31a429d7..03aa4b00a1c3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -40,16 +40,11 @@ | |||
40 | 40 | ||
41 | #ifdef CONFIG_SMP | 41 | #ifdef CONFIG_SMP |
42 | #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" | 42 | #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" |
43 | #define __my_cpu_offset x86_read_percpu(this_cpu_off) | 43 | #define __my_cpu_offset percpu_read(this_cpu_off) |
44 | #else | 44 | #else |
45 | #define __percpu_seg_str | 45 | #define __percpu_seg_str |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #include <asm-generic/percpu.h> | ||
49 | |||
50 | /* We can use this directly for local CPU (faster). */ | ||
51 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | ||
52 | |||
53 | /* For arch-specific code, we can use direct single-insn ops (they | 48 | /* For arch-specific code, we can use direct single-insn ops (they |
54 | * don't give an lvalue though). */ | 49 | * don't give an lvalue though). */ |
55 | extern void __bad_percpu_size(void); | 50 | extern void __bad_percpu_size(void); |
@@ -115,11 +110,13 @@ do { \ | |||
115 | ret__; \ | 110 | ret__; \ |
116 | }) | 111 | }) |
117 | 112 | ||
118 | #define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) | 113 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var) |
119 | #define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) | 114 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) |
120 | #define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) | 115 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) |
121 | #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) | 116 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) |
122 | #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) | 117 | #define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) |
118 | #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) | ||
119 | #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) | ||
123 | 120 | ||
124 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 121 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
125 | #define x86_test_and_clear_bit_percpu(bit, var) \ | 122 | #define x86_test_and_clear_bit_percpu(bit, var) \ |
@@ -131,6 +128,11 @@ do { \ | |||
131 | old__; \ | 128 | old__; \ |
132 | }) | 129 | }) |
133 | 130 | ||
131 | #include <asm-generic/percpu.h> | ||
132 | |||
133 | /* We can use this directly for local CPU (faster). */ | ||
134 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | ||
135 | |||
134 | #ifdef CONFIG_X86_64 | 136 | #ifdef CONFIG_X86_64 |
135 | extern void load_pda_offset(int cpu); | 137 | extern void load_pda_offset(int cpu); |
136 | #else | 138 | #else |