diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-01-15 08:15:53 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-16 08:20:31 -0500 |
commit | 6dbde3530850d4d8bfc1b6bd4006d92786a2787f (patch) | |
tree | 08c6dd55e860827311b889e2ecfe3de9f51421a0 /arch/x86/include | |
parent | 004aa322f855a765741d9437a98dd8fe2e4f32a6 (diff) |
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new
generic percpu methods:
percpu_read()
percpu_write()
percpu_add()
percpu_sub()
percpu_and()
percpu_or()
percpu_xor()
and implements support for them on x86. (other architectures will fall
back to a default implementation)
The advantage is that for example to read a local percpu variable,
instead of this sequence:
return __get_cpu_var(var);
ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx
ffffffff8102ca32: 81
ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax
ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax
We can get a single instruction by using the optimized variants:
return percpu_read(var);
ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax
I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.
tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
* added percpu_and() for completeness's sake
* made generic percpu ops atomic against preemption
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/current.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_regs_32.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu_context_32.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/pda.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/percpu.h | 24 | ||||
-rw-r--r-- | arch/x86/include/asm/smp.h | 2 |
6 files changed, 28 insertions, 26 deletions
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d672..0728480f5c56 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h | |||
@@ -10,7 +10,7 @@ struct task_struct; | |||
10 | DECLARE_PER_CPU(struct task_struct *, current_task); | 10 | DECLARE_PER_CPU(struct task_struct *, current_task); |
11 | static __always_inline struct task_struct *get_current(void) | 11 | static __always_inline struct task_struct *get_current(void) |
12 | { | 12 | { |
13 | return x86_read_percpu(current_task); | 13 | return percpu_read(current_task); |
14 | } | 14 | } |
15 | 15 | ||
16 | #else /* X86_32 */ | 16 | #else /* X86_32 */ |
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index 86afd7473457..d7ed33ee94e9 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h | |||
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); | |||
15 | 15 | ||
16 | static inline struct pt_regs *get_irq_regs(void) | 16 | static inline struct pt_regs *get_irq_regs(void) |
17 | { | 17 | { |
18 | return x86_read_percpu(irq_regs); | 18 | return percpu_read(irq_regs); |
19 | } | 19 | } |
20 | 20 | ||
21 | static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) | 21 | static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) |
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) | |||
23 | struct pt_regs *old_regs; | 23 | struct pt_regs *old_regs; |
24 | 24 | ||
25 | old_regs = get_irq_regs(); | 25 | old_regs = get_irq_regs(); |
26 | x86_write_percpu(irq_regs, new_regs); | 26 | percpu_write(irq_regs, new_regs); |
27 | 27 | ||
28 | return old_regs; | 28 | return old_regs; |
29 | } | 29 | } |
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 7e98ce1d2c0e..08b53454f831 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h | |||
@@ -4,8 +4,8 @@ | |||
4 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | 4 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) |
5 | { | 5 | { |
6 | #ifdef CONFIG_SMP | 6 | #ifdef CONFIG_SMP |
7 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) | 7 | if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) |
8 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); | 8 | percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); |
9 | #endif | 9 | #endif |
10 | } | 10 | } |
11 | 11 | ||
@@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, | |||
19 | /* stop flush ipis for the previous mm */ | 19 | /* stop flush ipis for the previous mm */ |
20 | cpu_clear(cpu, prev->cpu_vm_mask); | 20 | cpu_clear(cpu, prev->cpu_vm_mask); |
21 | #ifdef CONFIG_SMP | 21 | #ifdef CONFIG_SMP |
22 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); | 22 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
23 | x86_write_percpu(cpu_tlbstate.active_mm, next); | 23 | percpu_write(cpu_tlbstate.active_mm, next); |
24 | #endif | 24 | #endif |
25 | cpu_set(cpu, next->cpu_vm_mask); | 25 | cpu_set(cpu, next->cpu_vm_mask); |
26 | 26 | ||
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, | |||
35 | } | 35 | } |
36 | #ifdef CONFIG_SMP | 36 | #ifdef CONFIG_SMP |
37 | else { | 37 | else { |
38 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); | 38 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
39 | BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); | 39 | BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); |
40 | 40 | ||
41 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { | 41 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { |
42 | /* We were in lazy tlb mode and leave_mm disabled | 42 | /* We were in lazy tlb mode and leave_mm disabled |
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e3d3a081d798..47f274fe6953 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h | |||
@@ -45,11 +45,11 @@ extern void pda_init(int); | |||
45 | 45 | ||
46 | #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) | 46 | #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) |
47 | 47 | ||
48 | #define read_pda(field) x86_read_percpu(__pda.field) | 48 | #define read_pda(field) percpu_read(__pda.field) |
49 | #define write_pda(field, val) x86_write_percpu(__pda.field, val) | 49 | #define write_pda(field, val) percpu_write(__pda.field, val) |
50 | #define add_pda(field, val) x86_add_percpu(__pda.field, val) | 50 | #define add_pda(field, val) percpu_add(__pda.field, val) |
51 | #define sub_pda(field, val) x86_sub_percpu(__pda.field, val) | 51 | #define sub_pda(field, val) percpu_sub(__pda.field, val) |
52 | #define or_pda(field, val) x86_or_percpu(__pda.field, val) | 52 | #define or_pda(field, val) percpu_or(__pda.field, val) |
53 | 53 | ||
54 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 54 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
55 | #define test_and_clear_bit_pda(bit, field) \ | 55 | #define test_and_clear_bit_pda(bit, field) \ |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 328b31a429d7..03aa4b00a1c3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -40,16 +40,11 @@ | |||
40 | 40 | ||
41 | #ifdef CONFIG_SMP | 41 | #ifdef CONFIG_SMP |
42 | #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" | 42 | #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" |
43 | #define __my_cpu_offset x86_read_percpu(this_cpu_off) | 43 | #define __my_cpu_offset percpu_read(this_cpu_off) |
44 | #else | 44 | #else |
45 | #define __percpu_seg_str | 45 | #define __percpu_seg_str |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #include <asm-generic/percpu.h> | ||
49 | |||
50 | /* We can use this directly for local CPU (faster). */ | ||
51 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | ||
52 | |||
53 | /* For arch-specific code, we can use direct single-insn ops (they | 48 | /* For arch-specific code, we can use direct single-insn ops (they |
54 | * don't give an lvalue though). */ | 49 | * don't give an lvalue though). */ |
55 | extern void __bad_percpu_size(void); | 50 | extern void __bad_percpu_size(void); |
@@ -115,11 +110,13 @@ do { \ | |||
115 | ret__; \ | 110 | ret__; \ |
116 | }) | 111 | }) |
117 | 112 | ||
118 | #define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) | 113 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var) |
119 | #define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) | 114 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) |
120 | #define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) | 115 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) |
121 | #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) | 116 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) |
122 | #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) | 117 | #define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) |
118 | #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) | ||
119 | #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) | ||
123 | 120 | ||
124 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 121 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
125 | #define x86_test_and_clear_bit_percpu(bit, var) \ | 122 | #define x86_test_and_clear_bit_percpu(bit, var) \ |
@@ -131,6 +128,11 @@ do { \ | |||
131 | old__; \ | 128 | old__; \ |
132 | }) | 129 | }) |
133 | 130 | ||
131 | #include <asm-generic/percpu.h> | ||
132 | |||
133 | /* We can use this directly for local CPU (faster). */ | ||
134 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | ||
135 | |||
134 | #ifdef CONFIG_X86_64 | 136 | #ifdef CONFIG_X86_64 |
135 | extern void load_pda_offset(int cpu); | 137 | extern void load_pda_offset(int cpu); |
136 | #else | 138 | #else |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 127415402ea1..c7bbbbe65d3f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata; | |||
160 | * from the initial startup. We map APIC_BASE very early in page_setup(), | 160 | * from the initial startup. We map APIC_BASE very early in page_setup(), |
161 | * so this is correct in the x86 case. | 161 | * so this is correct in the x86 case. |
162 | */ | 162 | */ |
163 | #define raw_smp_processor_id() (x86_read_percpu(cpu_number)) | 163 | #define raw_smp_processor_id() (percpu_read(cpu_number)) |
164 | extern int safe_smp_processor_id(void); | 164 | extern int safe_smp_processor_id(void); |
165 | 165 | ||
166 | #elif defined(CONFIG_X86_64_SMP) | 166 | #elif defined(CONFIG_X86_64_SMP) |