aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-15 08:15:53 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-16 08:20:31 -0500
commit6dbde3530850d4d8bfc1b6bd4006d92786a2787f (patch)
tree08c6dd55e860827311b889e2ecfe3de9f51421a0 /arch/x86/include
parent004aa322f855a765741d9437a98dd8fe2e4f32a6 (diff)
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/irq_regs_32.h4
-rw-r--r--arch/x86/include/asm/mmu_context_32.h12
-rw-r--r--arch/x86/include/asm/pda.h10
-rw-r--r--arch/x86/include/asm/percpu.h24
-rw-r--r--arch/x86/include/asm/smp.h2
6 files changed, 28 insertions, 26 deletions
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..0728480f5c56 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -10,7 +10,7 @@ struct task_struct;
10DECLARE_PER_CPU(struct task_struct *, current_task); 10DECLARE_PER_CPU(struct task_struct *, current_task);
11static __always_inline struct task_struct *get_current(void) 11static __always_inline struct task_struct *get_current(void)
12{ 12{
13 return x86_read_percpu(current_task); 13 return percpu_read(current_task);
14} 14}
15 15
16#else /* X86_32 */ 16#else /* X86_32 */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index 86afd7473457..d7ed33ee94e9 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15 15
16static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
17{ 17{
18 return x86_read_percpu(irq_regs); 18 return percpu_read(irq_regs);
19} 19}
20 20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) 21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
23 struct pt_regs *old_regs; 23 struct pt_regs *old_regs;
24 24
25 old_regs = get_irq_regs(); 25 old_regs = get_irq_regs();
26 x86_write_percpu(irq_regs, new_regs); 26 percpu_write(irq_regs, new_regs);
27 27
28 return old_regs; 28 return old_regs;
29} 29}
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 7e98ce1d2c0e..08b53454f831 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,8 +4,8 @@
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{ 5{
6#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) 7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); 8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif 9#endif
10} 10}
11 11
@@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
19 /* stop flush ipis for the previous mm */ 19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask); 20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP 21#ifdef CONFIG_SMP
22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); 22 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
23 x86_write_percpu(cpu_tlbstate.active_mm, next); 23 percpu_write(cpu_tlbstate.active_mm, next);
24#endif 24#endif
25 cpu_set(cpu, next->cpu_vm_mask); 25 cpu_set(cpu, next->cpu_vm_mask);
26 26
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
35 } 35 }
36#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
37 else { 37 else {
38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); 38 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); 39 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
40 40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled 42 /* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index e3d3a081d798..47f274fe6953 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -45,11 +45,11 @@ extern void pda_init(int);
45 45
46#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) 46#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
47 47
48#define read_pda(field) x86_read_percpu(__pda.field) 48#define read_pda(field) percpu_read(__pda.field)
49#define write_pda(field, val) x86_write_percpu(__pda.field, val) 49#define write_pda(field, val) percpu_write(__pda.field, val)
50#define add_pda(field, val) x86_add_percpu(__pda.field, val) 50#define add_pda(field, val) percpu_add(__pda.field, val)
51#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) 51#define sub_pda(field, val) percpu_sub(__pda.field, val)
52#define or_pda(field, val) x86_or_percpu(__pda.field, val) 52#define or_pda(field, val) percpu_or(__pda.field, val)
53 53
54/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 54/* This is not atomic against other CPUs -- CPU preemption needs to be off */
55#define test_and_clear_bit_pda(bit, field) \ 55#define test_and_clear_bit_pda(bit, field) \
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 328b31a429d7..03aa4b00a1c3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -40,16 +40,11 @@
40 40
41#ifdef CONFIG_SMP 41#ifdef CONFIG_SMP
42#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" 42#define __percpu_seg_str "%%"__stringify(__percpu_seg)":"
43#define __my_cpu_offset x86_read_percpu(this_cpu_off) 43#define __my_cpu_offset percpu_read(this_cpu_off)
44#else 44#else
45#define __percpu_seg_str 45#define __percpu_seg_str
46#endif 46#endif
47 47
48#include <asm-generic/percpu.h>
49
50/* We can use this directly for local CPU (faster). */
51DECLARE_PER_CPU(unsigned long, this_cpu_off);
52
53/* For arch-specific code, we can use direct single-insn ops (they 48/* For arch-specific code, we can use direct single-insn ops (they
54 * don't give an lvalue though). */ 49 * don't give an lvalue though). */
55extern void __bad_percpu_size(void); 50extern void __bad_percpu_size(void);
@@ -115,11 +110,13 @@ do { \
115 ret__; \ 110 ret__; \
116}) 111})
117 112
118#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) 113#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
119#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) 114#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
120#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) 115#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
121#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) 116#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
122#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 117#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
118#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
119#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
123 120
124/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 121/* This is not atomic against other CPUs -- CPU preemption needs to be off */
125#define x86_test_and_clear_bit_percpu(bit, var) \ 122#define x86_test_and_clear_bit_percpu(bit, var) \
@@ -131,6 +128,11 @@ do { \
131 old__; \ 128 old__; \
132}) 129})
133 130
131#include <asm-generic/percpu.h>
132
133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135
134#ifdef CONFIG_X86_64 136#ifdef CONFIG_X86_64
135extern void load_pda_offset(int cpu); 137extern void load_pda_offset(int cpu);
136#else 138#else
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 127415402ea1..c7bbbbe65d3f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata;
160 * from the initial startup. We map APIC_BASE very early in page_setup(), 160 * from the initial startup. We map APIC_BASE very early in page_setup(),
161 * so this is correct in the x86 case. 161 * so this is correct in the x86 case.
162 */ 162 */
163#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) 163#define raw_smp_processor_id() (percpu_read(cpu_number))
164extern int safe_smp_processor_id(void); 164extern int safe_smp_processor_id(void);
165 165
166#elif defined(CONFIG_X86_64_SMP) 166#elif defined(CONFIG_X86_64_SMP)