aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@amacapital.net>2014-10-24 18:58:08 -0400
committerIngo Molnar <mingo@kernel.org>2015-02-04 06:10:42 -0500
commit1e02ce4cccdcb9688386e5b8d2c9fa4660b45389 (patch)
tree7d514286844acea505228590119ac1a886cf6995 /arch/x86/include
parent375074cc736ab1d89a708c0a8d7baa4a70d5d476 (diff)
x86: Store a per-cpu shadow copy of CR4
Context switches and TLB flushes can change individual bits of CR4. CR4 reads take several cycles, so store a shadow copy of CR4 in a per-cpu variable. To avoid wasting a cache line, I added the CR4 shadow to cpu_tlbstate, which is already touched in switch_mm. The heaviest users of the cr4 shadow will be switch_mm and __switch_to_xtra, and __switch_to_xtra is called shortly after switch_mm during context switch, so the cacheline is likely to be hot. Signed-off-by: Andy Lutomirski <luto@amacapital.net> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Kees Cook <keescook@chromium.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Vince Weaver <vince@deater.net> Cc: "hillf.zj" <hillf.zj@alibaba-inc.com> Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/3a54dd3353fffbf84804398e00dfdc5b7c1afd7d.1414190806.git.luto@amacapital.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/special_insns.h6
-rw-r--r--arch/x86/include/asm/tlbflush.h52
-rw-r--r--arch/x86/include/asm/virtext.h2
4 files changed, 46 insertions, 20 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 32444ae939ca..965c47d254aa 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
81} 81}
82 82
83static inline unsigned long read_cr4(void) 83static inline unsigned long __read_cr4(void)
84{ 84{
85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
86} 86}
87static inline unsigned long read_cr4_safe(void) 87static inline unsigned long __read_cr4_safe(void)
88{ 88{
89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
90} 90}
91 91
92static inline void write_cr4(unsigned long x) 92static inline void __write_cr4(unsigned long x)
93{ 93{
94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
95} 95}
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index e820c080a4e9..6a4b00fafb00 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x)
137 native_write_cr3(x); 137 native_write_cr3(x);
138} 138}
139 139
140static inline unsigned long read_cr4(void) 140static inline unsigned long __read_cr4(void)
141{ 141{
142 return native_read_cr4(); 142 return native_read_cr4();
143} 143}
144 144
145static inline unsigned long read_cr4_safe(void) 145static inline unsigned long __read_cr4_safe(void)
146{ 146{
147 return native_read_cr4_safe(); 147 return native_read_cr4_safe();
148} 148}
149 149
150static inline void write_cr4(unsigned long x) 150static inline void __write_cr4(unsigned long x)
151{ 151{
152 native_write_cr4(x); 152 native_write_cr4(x);
153} 153}
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index fc0c4bc356ce..cd791948b286 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -15,14 +15,37 @@
15#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) 15#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
16#endif 16#endif
17 17
18struct tlb_state {
19#ifdef CONFIG_SMP
20 struct mm_struct *active_mm;
21 int state;
22#endif
23
24 /*
25 * Access to this CR4 shadow and to H/W CR4 is protected by
26 * disabling interrupts when modifying either one.
27 */
28 unsigned long cr4;
29};
30DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
31
32/* Initialize cr4 shadow for this CPU. */
33static inline void cr4_init_shadow(void)
34{
35 this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
36}
37
18/* Set in this cpu's CR4. */ 38/* Set in this cpu's CR4. */
19static inline void cr4_set_bits(unsigned long mask) 39static inline void cr4_set_bits(unsigned long mask)
20{ 40{
21 unsigned long cr4; 41 unsigned long cr4;
22 42
23 cr4 = read_cr4(); 43 cr4 = this_cpu_read(cpu_tlbstate.cr4);
24 cr4 |= mask; 44 if ((cr4 | mask) != cr4) {
25 write_cr4(cr4); 45 cr4 |= mask;
46 this_cpu_write(cpu_tlbstate.cr4, cr4);
47 __write_cr4(cr4);
48 }
26} 49}
27 50
28/* Clear in this cpu's CR4. */ 51/* Clear in this cpu's CR4. */
@@ -30,9 +53,18 @@ static inline void cr4_clear_bits(unsigned long mask)
30{ 53{
31 unsigned long cr4; 54 unsigned long cr4;
32 55
33 cr4 = read_cr4(); 56 cr4 = this_cpu_read(cpu_tlbstate.cr4);
34 cr4 &= ~mask; 57 if ((cr4 & ~mask) != cr4) {
35 write_cr4(cr4); 58 cr4 &= ~mask;
59 this_cpu_write(cpu_tlbstate.cr4, cr4);
60 __write_cr4(cr4);
61 }
62}
63
64/* Read the CR4 shadow. */
65static inline unsigned long cr4_read_shadow(void)
66{
67 return this_cpu_read(cpu_tlbstate.cr4);
36} 68}
37 69
38/* 70/*
@@ -61,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
61{ 93{
62 unsigned long cr4; 94 unsigned long cr4;
63 95
64 cr4 = native_read_cr4(); 96 cr4 = this_cpu_read(cpu_tlbstate.cr4);
65 /* clear PGE */ 97 /* clear PGE */
66 native_write_cr4(cr4 & ~X86_CR4_PGE); 98 native_write_cr4(cr4 & ~X86_CR4_PGE);
67 /* write old PGE again and flush TLBs */ 99 /* write old PGE again and flush TLBs */
@@ -221,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
221#define TLBSTATE_OK 1 253#define TLBSTATE_OK 1
222#define TLBSTATE_LAZY 2 254#define TLBSTATE_LAZY 2
223 255
224struct tlb_state {
225 struct mm_struct *active_mm;
226 int state;
227};
228DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
229
230static inline void reset_lazy_tlbstate(void) 256static inline void reset_lazy_tlbstate(void)
231{ 257{
232 this_cpu_write(cpu_tlbstate.state, 0); 258 this_cpu_write(cpu_tlbstate.state, 0);
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index f41e19ca717b..cce9ee68e335 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -46,7 +46,7 @@ static inline void cpu_vmxoff(void)
46 46
47static inline int cpu_vmx_enabled(void) 47static inline int cpu_vmx_enabled(void)
48{ 48{
49 return read_cr4() & X86_CR4_VMXE; 49 return __read_cr4() & X86_CR4_VMXE;
50} 50}
51 51
52/** Disable VMX if it is enabled on the current CPU 52/** Disable VMX if it is enabled on the current CPU