aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-15 08:15:53 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-16 08:20:31 -0500
commit6dbde3530850d4d8bfc1b6bd4006d92786a2787f (patch)
tree08c6dd55e860827311b889e2ecfe3de9f51421a0
parent004aa322f855a765741d9437a98dd8fe2e4f32a6 (diff)
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/irq_regs_32.h4
-rw-r--r--arch/x86/include/asm/mmu_context_32.h12
-rw-r--r--arch/x86/include/asm/pda.h10
-rw-r--r--arch/x86/include/asm/percpu.h24
-rw-r--r--arch/x86/include/asm/smp.h2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/tlb_32.c10
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c4
-rw-r--r--arch/x86/xen/enlighten.c14
-rw-r--r--arch/x86/xen/irq.c8
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--include/asm-generic/percpu.h52
15 files changed, 102 insertions, 48 deletions
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..0728480f5c56 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -10,7 +10,7 @@ struct task_struct;
10DECLARE_PER_CPU(struct task_struct *, current_task); 10DECLARE_PER_CPU(struct task_struct *, current_task);
11static __always_inline struct task_struct *get_current(void) 11static __always_inline struct task_struct *get_current(void)
12{ 12{
13 return x86_read_percpu(current_task); 13 return percpu_read(current_task);
14} 14}
15 15
16#else /* X86_32 */ 16#else /* X86_32 */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index 86afd7473457..d7ed33ee94e9 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15 15
16static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
17{ 17{
18 return x86_read_percpu(irq_regs); 18 return percpu_read(irq_regs);
19} 19}
20 20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) 21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
23 struct pt_regs *old_regs; 23 struct pt_regs *old_regs;
24 24
25 old_regs = get_irq_regs(); 25 old_regs = get_irq_regs();
26 x86_write_percpu(irq_regs, new_regs); 26 percpu_write(irq_regs, new_regs);
27 27
28 return old_regs; 28 return old_regs;
29} 29}
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 7e98ce1d2c0e..08b53454f831 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,8 +4,8 @@
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{ 5{
6#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) 7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); 8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif 9#endif
10} 10}
11 11
@@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
19 /* stop flush ipis for the previous mm */ 19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask); 20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP 21#ifdef CONFIG_SMP
22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); 22 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
23 x86_write_percpu(cpu_tlbstate.active_mm, next); 23 percpu_write(cpu_tlbstate.active_mm, next);
24#endif 24#endif
25 cpu_set(cpu, next->cpu_vm_mask); 25 cpu_set(cpu, next->cpu_vm_mask);
26 26
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
35 } 35 }
36#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
37 else { 37 else {
38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); 38 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); 39 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
40 40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled 42 /* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index e3d3a081d798..47f274fe6953 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -45,11 +45,11 @@ extern void pda_init(int);
45 45
46#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) 46#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
47 47
48#define read_pda(field) x86_read_percpu(__pda.field) 48#define read_pda(field) percpu_read(__pda.field)
49#define write_pda(field, val) x86_write_percpu(__pda.field, val) 49#define write_pda(field, val) percpu_write(__pda.field, val)
50#define add_pda(field, val) x86_add_percpu(__pda.field, val) 50#define add_pda(field, val) percpu_add(__pda.field, val)
51#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) 51#define sub_pda(field, val) percpu_sub(__pda.field, val)
52#define or_pda(field, val) x86_or_percpu(__pda.field, val) 52#define or_pda(field, val) percpu_or(__pda.field, val)
53 53
54/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 54/* This is not atomic against other CPUs -- CPU preemption needs to be off */
55#define test_and_clear_bit_pda(bit, field) \ 55#define test_and_clear_bit_pda(bit, field) \
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 328b31a429d7..03aa4b00a1c3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -40,16 +40,11 @@
40 40
41#ifdef CONFIG_SMP 41#ifdef CONFIG_SMP
42#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" 42#define __percpu_seg_str "%%"__stringify(__percpu_seg)":"
43#define __my_cpu_offset x86_read_percpu(this_cpu_off) 43#define __my_cpu_offset percpu_read(this_cpu_off)
44#else 44#else
45#define __percpu_seg_str 45#define __percpu_seg_str
46#endif 46#endif
47 47
48#include <asm-generic/percpu.h>
49
50/* We can use this directly for local CPU (faster). */
51DECLARE_PER_CPU(unsigned long, this_cpu_off);
52
53/* For arch-specific code, we can use direct single-insn ops (they 48/* For arch-specific code, we can use direct single-insn ops (they
54 * don't give an lvalue though). */ 49 * don't give an lvalue though). */
55extern void __bad_percpu_size(void); 50extern void __bad_percpu_size(void);
@@ -115,11 +110,13 @@ do { \
115 ret__; \ 110 ret__; \
116}) 111})
117 112
118#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) 113#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
119#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) 114#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
120#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) 115#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
121#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) 116#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
122#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 117#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
118#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
119#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
123 120
124/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 121/* This is not atomic against other CPUs -- CPU preemption needs to be off */
125#define x86_test_and_clear_bit_percpu(bit, var) \ 122#define x86_test_and_clear_bit_percpu(bit, var) \
@@ -131,6 +128,11 @@ do { \
131 old__; \ 128 old__; \
132}) 129})
133 130
131#include <asm-generic/percpu.h>
132
133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135
134#ifdef CONFIG_X86_64 136#ifdef CONFIG_X86_64
135extern void load_pda_offset(int cpu); 137extern void load_pda_offset(int cpu);
136#else 138#else
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 127415402ea1..c7bbbbe65d3f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata;
160 * from the initial startup. We map APIC_BASE very early in page_setup(), 160 * from the initial startup. We map APIC_BASE very early in page_setup(),
161 * so this is correct in the x86 case. 161 * so this is correct in the x86 case.
162 */ 162 */
163#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) 163#define raw_smp_processor_id() (percpu_read(cpu_number))
164extern int safe_smp_processor_id(void); 164extern int safe_smp_processor_id(void);
165 165
166#elif defined(CONFIG_X86_64_SMP) 166#elif defined(CONFIG_X86_64_SMP)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..77d546817d94 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -591,7 +591,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
591 if (prev->gs | next->gs) 591 if (prev->gs | next->gs)
592 loadsegment(gs, next->gs); 592 loadsegment(gs, next->gs);
593 593
594 x86_write_percpu(current_task, next_p); 594 percpu_write(current_task, next_p);
595 595
596 return prev_p; 596 return prev_p;
597} 597}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index ec53818f4e38..e65449d0f7d9 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
34 */ 34 */
35void leave_mm(int cpu) 35void leave_mm(int cpu)
36{ 36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); 37 BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); 38 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir); 39 load_cr3(swapper_pg_dir);
40} 40}
41EXPORT_SYMBOL_GPL(leave_mm); 41EXPORT_SYMBOL_GPL(leave_mm);
@@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
103 * BUG(); 103 * BUG();
104 */ 104 */
105 105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { 106 if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { 107 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL) 108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb(); 109 local_flush_tlb();
110 else 110 else
@@ -222,7 +222,7 @@ static void do_flush_tlb_all(void *info)
222 unsigned long cpu = smp_processor_id(); 222 unsigned long cpu = smp_processor_id();
223 223
224 __flush_tlb_all(); 224 __flush_tlb_all();
225 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) 225 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
226 leave_mm(cpu); 226 leave_mm(cpu);
227} 227}
228 228
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 1a48368acb09..96f15b09a4c5 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -402,7 +402,7 @@ void __init find_smp_config(void)
402 VOYAGER_SUS_IN_CONTROL_PORT); 402 VOYAGER_SUS_IN_CONTROL_PORT);
403 403
404 current_thread_info()->cpu = boot_cpu_id; 404 current_thread_info()->cpu = boot_cpu_id;
405 x86_write_percpu(cpu_number, boot_cpu_id); 405 percpu_write(cpu_number, boot_cpu_id);
406} 406}
407 407
408/* 408/*
@@ -1782,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
1782void __init smp_setup_processor_id(void) 1782void __init smp_setup_processor_id(void)
1783{ 1783{
1784 current_thread_info()->cpu = hard_smp_processor_id(); 1784 current_thread_info()->cpu = hard_smp_processor_id();
1785 x86_write_percpu(cpu_number, hard_smp_processor_id()); 1785 percpu_write(cpu_number, hard_smp_processor_id());
1786} 1786}
1787 1787
1788static void voyager_send_call_func(cpumask_t callmask) 1788static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 312414ef9365..75b94139e1f2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0)
695 695
696static void xen_write_cr2(unsigned long cr2) 696static void xen_write_cr2(unsigned long cr2)
697{ 697{
698 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 698 percpu_read(xen_vcpu)->arch.cr2 = cr2;
699} 699}
700 700
701static unsigned long xen_read_cr2(void) 701static unsigned long xen_read_cr2(void)
702{ 702{
703 return x86_read_percpu(xen_vcpu)->arch.cr2; 703 return percpu_read(xen_vcpu)->arch.cr2;
704} 704}
705 705
706static unsigned long xen_read_cr2_direct(void) 706static unsigned long xen_read_cr2_direct(void)
707{ 707{
708 return x86_read_percpu(xen_vcpu_info.arch.cr2); 708 return percpu_read(xen_vcpu_info.arch.cr2);
709} 709}
710 710
711static void xen_write_cr4(unsigned long cr4) 711static void xen_write_cr4(unsigned long cr4)
@@ -718,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4)
718 718
719static unsigned long xen_read_cr3(void) 719static unsigned long xen_read_cr3(void)
720{ 720{
721 return x86_read_percpu(xen_cr3); 721 return percpu_read(xen_cr3);
722} 722}
723 723
724static void set_current_cr3(void *v) 724static void set_current_cr3(void *v)
725{ 725{
726 x86_write_percpu(xen_current_cr3, (unsigned long)v); 726 percpu_write(xen_current_cr3, (unsigned long)v);
727} 727}
728 728
729static void __xen_write_cr3(bool kernel, unsigned long cr3) 729static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -748,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
749 749
750 if (kernel) { 750 if (kernel) {
751 x86_write_percpu(xen_cr3, cr3); 751 percpu_write(xen_cr3, cr3);
752 752
753 /* Update xen_current_cr3 once the batch has actually 753 /* Update xen_current_cr3 once the batch has actually
754 been submitted. */ 754 been submitted. */
@@ -764,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3)
764 764
765 /* Update while interrupts are disabled, so its atomic with 765 /* Update while interrupts are disabled, so its atomic with
766 respect to ipis */ 766 respect to ipis */
767 x86_write_percpu(xen_cr3, cr3); 767 percpu_write(xen_cr3, cr3);
768 768
769 __xen_write_cr3(true, cr3); 769 __xen_write_cr3(true, cr3);
770 770
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb042608c602..2e8271431e1a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void)
39 struct vcpu_info *vcpu; 39 struct vcpu_info *vcpu;
40 unsigned long flags; 40 unsigned long flags;
41 41
42 vcpu = x86_read_percpu(xen_vcpu); 42 vcpu = percpu_read(xen_vcpu);
43 43
44 /* flag has opposite sense of mask */ 44 /* flag has opposite sense of mask */
45 flags = !vcpu->evtchn_upcall_mask; 45 flags = !vcpu->evtchn_upcall_mask;
@@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags)
62 make sure we're don't switch CPUs between getting the vcpu 62 make sure we're don't switch CPUs between getting the vcpu
63 pointer and updating the mask. */ 63 pointer and updating the mask. */
64 preempt_disable(); 64 preempt_disable();
65 vcpu = x86_read_percpu(xen_vcpu); 65 vcpu = percpu_read(xen_vcpu);
66 vcpu->evtchn_upcall_mask = flags; 66 vcpu->evtchn_upcall_mask = flags;
67 preempt_enable_no_resched(); 67 preempt_enable_no_resched();
68 68
@@ -83,7 +83,7 @@ static void xen_irq_disable(void)
83 make sure we're don't switch CPUs between getting the vcpu 83 make sure we're don't switch CPUs between getting the vcpu
84 pointer and updating the mask. */ 84 pointer and updating the mask. */
85 preempt_disable(); 85 preempt_disable();
86 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; 86 percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
87 preempt_enable_no_resched(); 87 preempt_enable_no_resched();
88} 88}
89 89
@@ -96,7 +96,7 @@ static void xen_irq_enable(void)
96 the caller is confused and is trying to re-enable interrupts 96 the caller is confused and is trying to re-enable interrupts
97 on an indeterminate processor. */ 97 on an indeterminate processor. */
98 98
99 vcpu = x86_read_percpu(xen_vcpu); 99 vcpu = percpu_read(xen_vcpu);
100 vcpu->evtchn_upcall_mask = 0; 100 vcpu->evtchn_upcall_mask = 0;
101 101
102 /* Doesn't matter if we get preempted here, because any 102 /* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240e26c7..7bc7852cc5c4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1074,7 +1074,7 @@ static void drop_other_mm_ref(void *info)
1074 1074
1075 /* If this cpu still has a stale cr3 reference, then make sure 1075 /* If this cpu still has a stale cr3 reference, then make sure
1076 it has been flushed. */ 1076 it has been flushed. */
1077 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { 1077 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
1078 load_cr3(swapper_pg_dir); 1078 load_cr3(swapper_pg_dir);
1079 arch_flush_lazy_cpu_mode(); 1079 arch_flush_lazy_cpu_mode();
1080 } 1080 }
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 858938241616..e786fa7f2615 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode)
39 xen_mc_flush(); 39 xen_mc_flush();
40 40
41 /* restore flags saved in xen_mc_batch */ 41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); 42 local_irq_restore(percpu_read(xen_mc_irq_flags));
43} 43}
44 44
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 83fa4236477d..3bfd6dd0b47c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -78,7 +78,7 @@ static __cpuinit void cpu_bringup(void)
78 xen_setup_cpu_clockevents(); 78 xen_setup_cpu_clockevents();
79 79
80 cpu_set(cpu, cpu_online_map); 80 cpu_set(cpu, cpu_online_map);
81 x86_write_percpu(cpu_state, CPU_ONLINE); 81 percpu_write(cpu_state, CPU_ONLINE);
82 wmb(); 82 wmb();
83 83
84 /* We can take interrupts now: we're officially "up". */ 84 /* We can take interrupts now: we're officially "up". */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b0e63c672ebd..00f45ff081a6 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void);
80#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ 80#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
81 __typeof__(type) per_cpu_var(name) 81 __typeof__(type) per_cpu_var(name)
82 82
83/*
84 * Optional methods for optimized non-lvalue per-cpu variable access.
85 *
86 * @var can be a percpu variable or a field of it and its size should
87 * equal char, int or long. percpu_read() evaluates to a lvalue and
88 * all others to void.
89 *
90 * These operations are guaranteed to be atomic w.r.t. preemption.
91 * The generic versions use plain get/put_cpu_var(). Archs are
92 * encouraged to implement single-instruction alternatives which don't
93 * require preemption protection.
94 */
95#ifndef percpu_read
96# define percpu_read(var) \
97 ({ \
98 typeof(per_cpu_var(var)) __tmp_var__; \
99 __tmp_var__ = get_cpu_var(var); \
100 put_cpu_var(var); \
101 __tmp_var__; \
102 })
103#endif
104
105#define __percpu_generic_to_op(var, val, op) \
106do { \
107 get_cpu_var(var) op val; \
108 put_cpu_var(var); \
109} while (0)
110
111#ifndef percpu_write
112# define percpu_write(var, val) __percpu_generic_to_op(var, (val), =)
113#endif
114
115#ifndef percpu_add
116# define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=)
117#endif
118
119#ifndef percpu_sub
120# define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=)
121#endif
122
123#ifndef percpu_and
124# define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=)
125#endif
126
127#ifndef percpu_or
128# define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=)
129#endif
130
131#ifndef percpu_xor
132# define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=)
133#endif
134
83#endif /* _ASM_GENERIC_PERCPU_H_ */ 135#endif /* _ASM_GENERIC_PERCPU_H_ */