diff options
author | Andy Lutomirski <luto@amacapital.net> | 2014-10-24 18:58:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-04 06:10:42 -0500 |
commit | 1e02ce4cccdcb9688386e5b8d2c9fa4660b45389 (patch) | |
tree | 7d514286844acea505228590119ac1a886cf6995 /arch/x86/include | |
parent | 375074cc736ab1d89a708c0a8d7baa4a70d5d476 (diff) |
x86: Store a per-cpu shadow copy of CR4
Context switches and TLB flushes can change individual bits of CR4.
CR4 reads take several cycles, so store a shadow copy of CR4 in a
per-cpu variable.
To avoid wasting a cache line, I added the CR4 shadow to
cpu_tlbstate, which is already touched in switch_mm. The heaviest
users of the cr4 shadow will be switch_mm and __switch_to_xtra, and
__switch_to_xtra is called shortly after switch_mm during context
switch, so the cacheline is likely to be hot.
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Vince Weaver <vince@deater.net>
Cc: "hillf.zj" <hillf.zj@alibaba-inc.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/3a54dd3353fffbf84804398e00dfdc5b7c1afd7d.1414190806.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/special_insns.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 52 | ||||
-rw-r--r-- | arch/x86/include/asm/virtext.h | 2 |
4 files changed, 46 insertions, 20 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 32444ae939ca..965c47d254aa 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x) | |||
80 | PVOP_VCALL1(pv_mmu_ops.write_cr3, x); | 80 | PVOP_VCALL1(pv_mmu_ops.write_cr3, x); |
81 | } | 81 | } |
82 | 82 | ||
83 | static inline unsigned long read_cr4(void) | 83 | static inline unsigned long __read_cr4(void) |
84 | { | 84 | { |
85 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); | 85 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); |
86 | } | 86 | } |
87 | static inline unsigned long read_cr4_safe(void) | 87 | static inline unsigned long __read_cr4_safe(void) |
88 | { | 88 | { |
89 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); | 89 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); |
90 | } | 90 | } |
91 | 91 | ||
92 | static inline void write_cr4(unsigned long x) | 92 | static inline void __write_cr4(unsigned long x) |
93 | { | 93 | { |
94 | PVOP_VCALL1(pv_cpu_ops.write_cr4, x); | 94 | PVOP_VCALL1(pv_cpu_ops.write_cr4, x); |
95 | } | 95 | } |
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index e820c080a4e9..6a4b00fafb00 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h | |||
@@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x) | |||
137 | native_write_cr3(x); | 137 | native_write_cr3(x); |
138 | } | 138 | } |
139 | 139 | ||
140 | static inline unsigned long read_cr4(void) | 140 | static inline unsigned long __read_cr4(void) |
141 | { | 141 | { |
142 | return native_read_cr4(); | 142 | return native_read_cr4(); |
143 | } | 143 | } |
144 | 144 | ||
145 | static inline unsigned long read_cr4_safe(void) | 145 | static inline unsigned long __read_cr4_safe(void) |
146 | { | 146 | { |
147 | return native_read_cr4_safe(); | 147 | return native_read_cr4_safe(); |
148 | } | 148 | } |
149 | 149 | ||
150 | static inline void write_cr4(unsigned long x) | 150 | static inline void __write_cr4(unsigned long x) |
151 | { | 151 | { |
152 | native_write_cr4(x); | 152 | native_write_cr4(x); |
153 | } | 153 | } |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fc0c4bc356ce..cd791948b286 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -15,14 +15,37 @@ | |||
15 | #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) | 15 | #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | struct tlb_state { | ||
19 | #ifdef CONFIG_SMP | ||
20 | struct mm_struct *active_mm; | ||
21 | int state; | ||
22 | #endif | ||
23 | |||
24 | /* | ||
25 | * Access to this CR4 shadow and to H/W CR4 is protected by | ||
26 | * disabling interrupts when modifying either one. | ||
27 | */ | ||
28 | unsigned long cr4; | ||
29 | }; | ||
30 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); | ||
31 | |||
32 | /* Initialize cr4 shadow for this CPU. */ | ||
33 | static inline void cr4_init_shadow(void) | ||
34 | { | ||
35 | this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); | ||
36 | } | ||
37 | |||
18 | /* Set in this cpu's CR4. */ | 38 | /* Set in this cpu's CR4. */ |
19 | static inline void cr4_set_bits(unsigned long mask) | 39 | static inline void cr4_set_bits(unsigned long mask) |
20 | { | 40 | { |
21 | unsigned long cr4; | 41 | unsigned long cr4; |
22 | 42 | ||
23 | cr4 = read_cr4(); | 43 | cr4 = this_cpu_read(cpu_tlbstate.cr4); |
24 | cr4 |= mask; | 44 | if ((cr4 | mask) != cr4) { |
25 | write_cr4(cr4); | 45 | cr4 |= mask; |
46 | this_cpu_write(cpu_tlbstate.cr4, cr4); | ||
47 | __write_cr4(cr4); | ||
48 | } | ||
26 | } | 49 | } |
27 | 50 | ||
28 | /* Clear in this cpu's CR4. */ | 51 | /* Clear in this cpu's CR4. */ |
@@ -30,9 +53,18 @@ static inline void cr4_clear_bits(unsigned long mask) | |||
30 | { | 53 | { |
31 | unsigned long cr4; | 54 | unsigned long cr4; |
32 | 55 | ||
33 | cr4 = read_cr4(); | 56 | cr4 = this_cpu_read(cpu_tlbstate.cr4); |
34 | cr4 &= ~mask; | 57 | if ((cr4 & ~mask) != cr4) { |
35 | write_cr4(cr4); | 58 | cr4 &= ~mask; |
59 | this_cpu_write(cpu_tlbstate.cr4, cr4); | ||
60 | __write_cr4(cr4); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | /* Read the CR4 shadow. */ | ||
65 | static inline unsigned long cr4_read_shadow(void) | ||
66 | { | ||
67 | return this_cpu_read(cpu_tlbstate.cr4); | ||
36 | } | 68 | } |
37 | 69 | ||
38 | /* | 70 | /* |
@@ -61,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void) | |||
61 | { | 93 | { |
62 | unsigned long cr4; | 94 | unsigned long cr4; |
63 | 95 | ||
64 | cr4 = native_read_cr4(); | 96 | cr4 = this_cpu_read(cpu_tlbstate.cr4); |
65 | /* clear PGE */ | 97 | /* clear PGE */ |
66 | native_write_cr4(cr4 & ~X86_CR4_PGE); | 98 | native_write_cr4(cr4 & ~X86_CR4_PGE); |
67 | /* write old PGE again and flush TLBs */ | 99 | /* write old PGE again and flush TLBs */ |
@@ -221,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
221 | #define TLBSTATE_OK 1 | 253 | #define TLBSTATE_OK 1 |
222 | #define TLBSTATE_LAZY 2 | 254 | #define TLBSTATE_LAZY 2 |
223 | 255 | ||
224 | struct tlb_state { | ||
225 | struct mm_struct *active_mm; | ||
226 | int state; | ||
227 | }; | ||
228 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); | ||
229 | |||
230 | static inline void reset_lazy_tlbstate(void) | 256 | static inline void reset_lazy_tlbstate(void) |
231 | { | 257 | { |
232 | this_cpu_write(cpu_tlbstate.state, 0); | 258 | this_cpu_write(cpu_tlbstate.state, 0); |
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index f41e19ca717b..cce9ee68e335 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h | |||
@@ -46,7 +46,7 @@ static inline void cpu_vmxoff(void) | |||
46 | 46 | ||
47 | static inline int cpu_vmx_enabled(void) | 47 | static inline int cpu_vmx_enabled(void) |
48 | { | 48 | { |
49 | return read_cr4() & X86_CR4_VMXE; | 49 | return __read_cr4() & X86_CR4_VMXE; |
50 | } | 50 | } |
51 | 51 | ||
52 | /** Disable VMX if it is enabled on the current CPU | 52 | /** Disable VMX if it is enabled on the current CPU |