diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2009-03-17 14:16:54 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-03-18 04:36:14 -0400 |
commit | ce4e240c279a31096f74afa6584a62d64a1ba8c8 (patch) | |
tree | 2b5f7bdf6dc058c09257977929d8f622e6c09466 /arch | |
parent | fa4b57cc045d6134b9862b2873f9c8ba9ed53ffe (diff) |
x86: add x2apic_wrmsr_fence() to x2apic flush tlb paths
Impact: optimize APIC IPI related barriers
Uncached MMIO accesses for xapic are inherently serializing and hence
we don't need explicit barriers for xapic IPI paths.
x2apic MSR writes/reads don't have serializing semantics and hence need
a serializing instruction or mfence, to make all the previous memory
stores globally visisble before the x2apic msr write for IPI.
Add x2apic_wrmsr_fence() in flush tlb path to x2apic specific paths.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "steiner@sgi.com" <steiner@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
LKML-Reference: <1237313814.27006.203.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/apic.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 5 |
4 files changed, 22 insertions, 5 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6d5b6f0900e1..00f5962d82d0 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -108,6 +108,16 @@ extern void native_apic_icr_write(u32 low, u32 id); | |||
108 | extern u64 native_apic_icr_read(void); | 108 | extern u64 native_apic_icr_read(void); |
109 | 109 | ||
110 | #ifdef CONFIG_X86_X2APIC | 110 | #ifdef CONFIG_X86_X2APIC |
111 | /* | ||
112 | * Make previous memory operations globally visible before | ||
113 | * sending the IPI through x2apic wrmsr. We need a serializing instruction or | ||
114 | * mfence for this. | ||
115 | */ | ||
116 | static inline void x2apic_wrmsr_fence(void) | ||
117 | { | ||
118 | asm volatile("mfence" : : : "memory"); | ||
119 | } | ||
120 | |||
111 | static inline void native_apic_msr_write(u32 reg, u32 v) | 121 | static inline void native_apic_msr_write(u32 reg, u32 v) |
112 | { | 122 | { |
113 | if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || | 123 | if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8fb87b6dd633..4a903e2f0d17 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | |||
57 | unsigned long query_cpu; | 57 | unsigned long query_cpu; |
58 | unsigned long flags; | 58 | unsigned long flags; |
59 | 59 | ||
60 | x2apic_wrmsr_fence(); | ||
61 | |||
60 | local_irq_save(flags); | 62 | local_irq_save(flags); |
61 | for_each_cpu(query_cpu, mask) { | 63 | for_each_cpu(query_cpu, mask) { |
62 | __x2apic_send_IPI_dest( | 64 | __x2apic_send_IPI_dest( |
@@ -73,6 +75,8 @@ static void | |||
73 | unsigned long query_cpu; | 75 | unsigned long query_cpu; |
74 | unsigned long flags; | 76 | unsigned long flags; |
75 | 77 | ||
78 | x2apic_wrmsr_fence(); | ||
79 | |||
76 | local_irq_save(flags); | 80 | local_irq_save(flags); |
77 | for_each_cpu(query_cpu, mask) { | 81 | for_each_cpu(query_cpu, mask) { |
78 | if (query_cpu == this_cpu) | 82 | if (query_cpu == this_cpu) |
@@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector) | |||
90 | unsigned long query_cpu; | 94 | unsigned long query_cpu; |
91 | unsigned long flags; | 95 | unsigned long flags; |
92 | 96 | ||
97 | x2apic_wrmsr_fence(); | ||
98 | |||
93 | local_irq_save(flags); | 99 | local_irq_save(flags); |
94 | for_each_online_cpu(query_cpu) { | 100 | for_each_online_cpu(query_cpu) { |
95 | if (query_cpu == this_cpu) | 101 | if (query_cpu == this_cpu) |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 23625b9f98b2..a284359627e7 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | |||
58 | unsigned long query_cpu; | 58 | unsigned long query_cpu; |
59 | unsigned long flags; | 59 | unsigned long flags; |
60 | 60 | ||
61 | x2apic_wrmsr_fence(); | ||
62 | |||
61 | local_irq_save(flags); | 63 | local_irq_save(flags); |
62 | for_each_cpu(query_cpu, mask) { | 64 | for_each_cpu(query_cpu, mask) { |
63 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | 65 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), |
@@ -73,6 +75,8 @@ static void | |||
73 | unsigned long query_cpu; | 75 | unsigned long query_cpu; |
74 | unsigned long flags; | 76 | unsigned long flags; |
75 | 77 | ||
78 | x2apic_wrmsr_fence(); | ||
79 | |||
76 | local_irq_save(flags); | 80 | local_irq_save(flags); |
77 | for_each_cpu(query_cpu, mask) { | 81 | for_each_cpu(query_cpu, mask) { |
78 | if (query_cpu != this_cpu) | 82 | if (query_cpu != this_cpu) |
@@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector) | |||
89 | unsigned long query_cpu; | 93 | unsigned long query_cpu; |
90 | unsigned long flags; | 94 | unsigned long flags; |
91 | 95 | ||
96 | x2apic_wrmsr_fence(); | ||
97 | |||
92 | local_irq_save(flags); | 98 | local_irq_save(flags); |
93 | for_each_online_cpu(query_cpu) { | 99 | for_each_online_cpu(query_cpu) { |
94 | if (query_cpu == this_cpu) | 100 | if (query_cpu == this_cpu) |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a654d59e4483..821e97017e95 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -187,11 +187,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
187 | cpumask, cpumask_of(smp_processor_id())); | 187 | cpumask, cpumask_of(smp_processor_id())); |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * Make the above memory operations globally visible before | ||
191 | * sending the IPI. | ||
192 | */ | ||
193 | smp_mb(); | ||
194 | /* | ||
195 | * We have to send the IPI only to | 190 | * We have to send the IPI only to |
196 | * CPUs affected. | 191 | * CPUs affected. |
197 | */ | 192 | */ |