aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-16 17:58:12 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-16 17:58:12 -0500
commit37507717de51a8332a34ee07fd88700be88df5bf (patch)
treed6eb5d00a798a4b1ce40c8c4c8ca74b0d22fe1df
parenta68fb48380bb993306dd62a58cbd946b4348222a (diff)
parenta66734297f78707ce39d756b656bfae861d53f62 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 perf updates from Ingo Molnar: "This series tightens up RDPMC permissions: currently even highly sandboxed x86 execution environments (such as seccomp) have permission to execute RDPMC, which may leak various perf events / PMU state such as timing information and other CPU execution details. This 'all is allowed' RDPMC mode is still preserved as the (non-default) /sys/devices/cpu/rdpmc=2 setting. The new default is that RDPMC access is only allowed if a perf event is mmap-ed (which is needed to correctly interpret RDPMC counter values in any case). As a side effect of these changes CR4 handling is cleaned up in the x86 code and a shadow copy of the CR4 value is added. The extra CR4 manipulation adds ~ <50ns to the context switch cost between rdpmc-capable and rdpmc-non-capable mms" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86: Add /sys/devices/cpu/rdpmc=2 to allow rdpmc for all tasks perf/x86: Only allow rdpmc if a perf_event is mapped perf: Pass the event to arch_perf_update_userpage() perf: Add pmu callbacks to track event mapping and unmapping x86: Add a comment clarifying LDT context switching x86: Store a per-cpu shadow copy of CR4 x86: Clean up cr4 manipulation
-rw-r--r--arch/x86/include/asm/mmu.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h33
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/processor.h33
-rw-r--r--arch/x86/include/asm/special_insns.h6
-rw-r--r--arch/x86/include/asm/tlbflush.h77
-rw-r--r--arch/x86/include/asm/virtext.h5
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event.c76
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/i387.c3
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/xsave.c3
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c10
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init.c13
-rw-r--r--arch/x86/mm/tlb.c3
-rw-r--r--arch/x86/power/cpu.c11
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--drivers/lguest/x86/core.c5
-rw-r--r--include/linux/perf_event.h7
-rw-r--r--kernel/events/core.c14
35 files changed, 253 insertions, 120 deletions
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 876e74e8eec7..09b9620a73b4 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,6 +19,8 @@ typedef struct {
19 19
20 struct mutex lock; 20 struct mutex lock;
21 void __user *vdso; 21 void __user *vdso;
22
23 atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
22} mm_context_t; 24} mm_context_t;
23 25
24#ifdef CONFIG_SMP 26#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 4b75d591eb5e..883f6b933fa4 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -18,6 +18,21 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
18} 18}
19#endif /* !CONFIG_PARAVIRT */ 19#endif /* !CONFIG_PARAVIRT */
20 20
21#ifdef CONFIG_PERF_EVENTS
22extern struct static_key rdpmc_always_available;
23
24static inline void load_mm_cr4(struct mm_struct *mm)
25{
26 if (static_key_true(&rdpmc_always_available) ||
27 atomic_read(&mm->context.perf_rdpmc_allowed))
28 cr4_set_bits(X86_CR4_PCE);
29 else
30 cr4_clear_bits(X86_CR4_PCE);
31}
32#else
33static inline void load_mm_cr4(struct mm_struct *mm) {}
34#endif
35
21/* 36/*
22 * Used for LDT copy/destruction. 37 * Used for LDT copy/destruction.
23 */ 38 */
@@ -52,15 +67,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
52 /* Stop flush ipis for the previous mm */ 67 /* Stop flush ipis for the previous mm */
53 cpumask_clear_cpu(cpu, mm_cpumask(prev)); 68 cpumask_clear_cpu(cpu, mm_cpumask(prev));
54 69
70 /* Load per-mm CR4 state */
71 load_mm_cr4(next);
72
55 /* 73 /*
56 * Load the LDT, if the LDT is different. 74 * Load the LDT, if the LDT is different.
57 * 75 *
58 * It's possible leave_mm(prev) has been called. If so, 76 * It's possible that prev->context.ldt doesn't match
59 * then prev->context.ldt could be out of sync with the 77 * the LDT register. This can happen if leave_mm(prev)
60 * LDT descriptor or the LDT register. This can only happen 78 * was called and then modify_ldt changed
61 * if prev->context.ldt is non-null, since we never free 79 * prev->context.ldt but suppressed an IPI to this CPU.
62 * an LDT. But LDTs can't be shared across mms, so 80 * In this case, prev->context.ldt != NULL, because we
63 * prev->context.ldt won't be equal to next->context.ldt. 81 * never free an LDT while the mm still exists. That
82 * means that next->context.ldt != prev->context.ldt,
83 * because mms never share an LDT.
64 */ 84 */
65 if (unlikely(prev->context.ldt != next->context.ldt)) 85 if (unlikely(prev->context.ldt != next->context.ldt))
66 load_LDT_nolock(&next->context); 86 load_LDT_nolock(&next->context);
@@ -85,6 +105,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
85 */ 105 */
86 load_cr3(next->pgd); 106 load_cr3(next->pgd);
87 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 107 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
108 load_mm_cr4(next);
88 load_LDT_nolock(&next->context); 109 load_LDT_nolock(&next->context);
89 } 110 }
90 } 111 }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 32444ae939ca..965c47d254aa 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
81} 81}
82 82
83static inline unsigned long read_cr4(void) 83static inline unsigned long __read_cr4(void)
84{ 84{
85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
86} 86}
87static inline unsigned long read_cr4_safe(void) 87static inline unsigned long __read_cr4_safe(void)
88{ 88{
89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
90} 90}
91 91
92static inline void write_cr4(unsigned long x) 92static inline void __write_cr4(unsigned long x)
93{ 93{
94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
95} 95}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a092a0cce0b7..ec1c93588cef 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -579,39 +579,6 @@ static inline void load_sp0(struct tss_struct *tss,
579#define set_iopl_mask native_set_iopl_mask 579#define set_iopl_mask native_set_iopl_mask
580#endif /* CONFIG_PARAVIRT */ 580#endif /* CONFIG_PARAVIRT */
581 581
582/*
583 * Save the cr4 feature set we're using (ie
584 * Pentium 4MB enable and PPro Global page
585 * enable), so that any CPU's that boot up
586 * after us can get the correct flags.
587 */
588extern unsigned long mmu_cr4_features;
589extern u32 *trampoline_cr4_features;
590
591static inline void set_in_cr4(unsigned long mask)
592{
593 unsigned long cr4;
594
595 mmu_cr4_features |= mask;
596 if (trampoline_cr4_features)
597 *trampoline_cr4_features = mmu_cr4_features;
598 cr4 = read_cr4();
599 cr4 |= mask;
600 write_cr4(cr4);
601}
602
603static inline void clear_in_cr4(unsigned long mask)
604{
605 unsigned long cr4;
606
607 mmu_cr4_features &= ~mask;
608 if (trampoline_cr4_features)
609 *trampoline_cr4_features = mmu_cr4_features;
610 cr4 = read_cr4();
611 cr4 &= ~mask;
612 write_cr4(cr4);
613}
614
615typedef struct { 582typedef struct {
616 unsigned long seg; 583 unsigned long seg;
617} mm_segment_t; 584} mm_segment_t;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index e820c080a4e9..6a4b00fafb00 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x)
137 native_write_cr3(x); 137 native_write_cr3(x);
138} 138}
139 139
140static inline unsigned long read_cr4(void) 140static inline unsigned long __read_cr4(void)
141{ 141{
142 return native_read_cr4(); 142 return native_read_cr4();
143} 143}
144 144
145static inline unsigned long read_cr4_safe(void) 145static inline unsigned long __read_cr4_safe(void)
146{ 146{
147 return native_read_cr4_safe(); 147 return native_read_cr4_safe();
148} 148}
149 149
150static inline void write_cr4(unsigned long x) 150static inline void __write_cr4(unsigned long x)
151{ 151{
152 native_write_cr4(x); 152 native_write_cr4(x);
153} 153}
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 04905bfc508b..cd791948b286 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -15,6 +15,75 @@
15#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) 15#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
16#endif 16#endif
17 17
18struct tlb_state {
19#ifdef CONFIG_SMP
20 struct mm_struct *active_mm;
21 int state;
22#endif
23
24 /*
25 * Access to this CR4 shadow and to H/W CR4 is protected by
26 * disabling interrupts when modifying either one.
27 */
28 unsigned long cr4;
29};
30DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
31
32/* Initialize cr4 shadow for this CPU. */
33static inline void cr4_init_shadow(void)
34{
35 this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
36}
37
38/* Set in this cpu's CR4. */
39static inline void cr4_set_bits(unsigned long mask)
40{
41 unsigned long cr4;
42
43 cr4 = this_cpu_read(cpu_tlbstate.cr4);
44 if ((cr4 | mask) != cr4) {
45 cr4 |= mask;
46 this_cpu_write(cpu_tlbstate.cr4, cr4);
47 __write_cr4(cr4);
48 }
49}
50
51/* Clear in this cpu's CR4. */
52static inline void cr4_clear_bits(unsigned long mask)
53{
54 unsigned long cr4;
55
56 cr4 = this_cpu_read(cpu_tlbstate.cr4);
57 if ((cr4 & ~mask) != cr4) {
58 cr4 &= ~mask;
59 this_cpu_write(cpu_tlbstate.cr4, cr4);
60 __write_cr4(cr4);
61 }
62}
63
64/* Read the CR4 shadow. */
65static inline unsigned long cr4_read_shadow(void)
66{
67 return this_cpu_read(cpu_tlbstate.cr4);
68}
69
70/*
71 * Save some of cr4 feature set we're using (e.g. Pentium 4MB
72 * enable and PPro Global page enable), so that any CPU's that boot
73 * up after us can get the correct flags. This should only be used
74 * during boot on the boot cpu.
75 */
76extern unsigned long mmu_cr4_features;
77extern u32 *trampoline_cr4_features;
78
79static inline void cr4_set_bits_and_update_boot(unsigned long mask)
80{
81 mmu_cr4_features |= mask;
82 if (trampoline_cr4_features)
83 *trampoline_cr4_features = mmu_cr4_features;
84 cr4_set_bits(mask);
85}
86
18static inline void __native_flush_tlb(void) 87static inline void __native_flush_tlb(void)
19{ 88{
20 native_write_cr3(native_read_cr3()); 89 native_write_cr3(native_read_cr3());
@@ -24,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
24{ 93{
25 unsigned long cr4; 94 unsigned long cr4;
26 95
27 cr4 = native_read_cr4(); 96 cr4 = this_cpu_read(cpu_tlbstate.cr4);
28 /* clear PGE */ 97 /* clear PGE */
29 native_write_cr4(cr4 & ~X86_CR4_PGE); 98 native_write_cr4(cr4 & ~X86_CR4_PGE);
30 /* write old PGE again and flush TLBs */ 99 /* write old PGE again and flush TLBs */
@@ -184,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
184#define TLBSTATE_OK 1 253#define TLBSTATE_OK 1
185#define TLBSTATE_LAZY 2 254#define TLBSTATE_LAZY 2
186 255
187struct tlb_state {
188 struct mm_struct *active_mm;
189 int state;
190};
191DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
192
193static inline void reset_lazy_tlbstate(void) 256static inline void reset_lazy_tlbstate(void)
194{ 257{
195 this_cpu_write(cpu_tlbstate.state, 0); 258 this_cpu_write(cpu_tlbstate.state, 0);
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 5da71c27cc59..cce9ee68e335 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -19,6 +19,7 @@
19 19
20#include <asm/vmx.h> 20#include <asm/vmx.h>
21#include <asm/svm.h> 21#include <asm/svm.h>
22#include <asm/tlbflush.h>
22 23
23/* 24/*
24 * VMX functions: 25 * VMX functions:
@@ -40,12 +41,12 @@ static inline int cpu_has_vmx(void)
40static inline void cpu_vmxoff(void) 41static inline void cpu_vmxoff(void)
41{ 42{
42 asm volatile (ASM_VMX_VMXOFF : : : "cc"); 43 asm volatile (ASM_VMX_VMXOFF : : : "cc");
43 write_cr4(read_cr4() & ~X86_CR4_VMXE); 44 cr4_clear_bits(X86_CR4_VMXE);
44} 45}
45 46
46static inline int cpu_vmx_enabled(void) 47static inline int cpu_vmx_enabled(void)
47{ 48{
48 return read_cr4() & X86_CR4_VMXE; 49 return __read_cr4() & X86_CR4_VMXE;
49} 50}
50 51
51/** Disable VMX if it is enabled on the current CPU 52/** Disable VMX if it is enabled on the current CPU
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 31368207837c..d1daead5fcdd 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void)
78 78
79 header->pmode_cr0 = read_cr0(); 79 header->pmode_cr0 = read_cr0();
80 if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { 80 if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
81 header->pmode_cr4 = read_cr4(); 81 header->pmode_cr4 = __read_cr4();
82 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); 82 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
83 } 83 }
84 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, 84 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b15bffcaba6d..b5c8ff5e9dfc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -19,6 +19,7 @@
19#include <asm/archrandom.h> 19#include <asm/archrandom.h>
20#include <asm/hypervisor.h> 20#include <asm/hypervisor.h>
21#include <asm/processor.h> 21#include <asm/processor.h>
22#include <asm/tlbflush.h>
22#include <asm/debugreg.h> 23#include <asm/debugreg.h>
23#include <asm/sections.h> 24#include <asm/sections.h>
24#include <asm/vsyscall.h> 25#include <asm/vsyscall.h>
@@ -278,7 +279,7 @@ __setup("nosmep", setup_disable_smep);
278static __always_inline void setup_smep(struct cpuinfo_x86 *c) 279static __always_inline void setup_smep(struct cpuinfo_x86 *c)
279{ 280{
280 if (cpu_has(c, X86_FEATURE_SMEP)) 281 if (cpu_has(c, X86_FEATURE_SMEP))
281 set_in_cr4(X86_CR4_SMEP); 282 cr4_set_bits(X86_CR4_SMEP);
282} 283}
283 284
284static __init int setup_disable_smap(char *arg) 285static __init int setup_disable_smap(char *arg)
@@ -298,9 +299,9 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
298 299
299 if (cpu_has(c, X86_FEATURE_SMAP)) { 300 if (cpu_has(c, X86_FEATURE_SMAP)) {
300#ifdef CONFIG_X86_SMAP 301#ifdef CONFIG_X86_SMAP
301 set_in_cr4(X86_CR4_SMAP); 302 cr4_set_bits(X86_CR4_SMAP);
302#else 303#else
303 clear_in_cr4(X86_CR4_SMAP); 304 cr4_clear_bits(X86_CR4_SMAP);
304#endif 305#endif
305 } 306 }
306} 307}
@@ -1295,6 +1296,12 @@ void cpu_init(void)
1295 wait_for_master_cpu(cpu); 1296 wait_for_master_cpu(cpu);
1296 1297
1297 /* 1298 /*
1299 * Initialize the CR4 shadow before doing anything that could
1300 * try to read it.
1301 */
1302 cr4_init_shadow();
1303
1304 /*
1298 * Load microcode on this cpu if a valid microcode is available. 1305 * Load microcode on this cpu if a valid microcode is available.
1299 * This is early microcode loading procedure. 1306 * This is early microcode loading procedure.
1300 */ 1307 */
@@ -1313,7 +1320,7 @@ void cpu_init(void)
1313 1320
1314 pr_debug("Initializing CPU#%d\n", cpu); 1321 pr_debug("Initializing CPU#%d\n", cpu);
1315 1322
1316 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1323 cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1317 1324
1318 /* 1325 /*
1319 * Initialize the per-CPU GDT with the boot GDT, 1326 * Initialize the per-CPU GDT with the boot GDT,
@@ -1394,7 +1401,7 @@ void cpu_init(void)
1394 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1401 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
1395 1402
1396 if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) 1403 if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de)
1397 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1404 cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1398 1405
1399 load_current_idt(); 1406 load_current_idt();
1400 switch_to_new_gdt(cpu); 1407 switch_to_new_gdt(cpu);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index cdfed7953963..3be9fa69f875 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -44,6 +44,7 @@
44 44
45#include <asm/processor.h> 45#include <asm/processor.h>
46#include <asm/traps.h> 46#include <asm/traps.h>
47#include <asm/tlbflush.h>
47#include <asm/mce.h> 48#include <asm/mce.h>
48#include <asm/msr.h> 49#include <asm/msr.h>
49 50
@@ -1452,7 +1453,7 @@ static void __mcheck_cpu_init_generic(void)
1452 bitmap_fill(all_banks, MAX_NR_BANKS); 1453 bitmap_fill(all_banks, MAX_NR_BANKS);
1453 machine_check_poll(MCP_UC | m_fl, &all_banks); 1454 machine_check_poll(MCP_UC | m_fl, &all_banks);
1454 1455
1455 set_in_cr4(X86_CR4_MCE); 1456 cr4_set_bits(X86_CR4_MCE);
1456 1457
1457 rdmsrl(MSR_IA32_MCG_CAP, cap); 1458 rdmsrl(MSR_IA32_MCG_CAP, cap);
1458 if (cap & MCG_CTL_P) 1459 if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index ec2663a708e4..737b0ad4e61a 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -9,6 +9,7 @@
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/traps.h> 11#include <asm/traps.h>
12#include <asm/tlbflush.h>
12#include <asm/mce.h> 13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
@@ -65,7 +66,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
65 "Intel old style machine check architecture supported.\n"); 66 "Intel old style machine check architecture supported.\n");
66 67
67 /* Enable MCE: */ 68 /* Enable MCE: */
68 set_in_cr4(X86_CR4_MCE); 69 cr4_set_bits(X86_CR4_MCE);
69 printk(KERN_INFO 70 printk(KERN_INFO
70 "Intel old style machine check reporting enabled on CPU#%d.\n", 71 "Intel old style machine check reporting enabled on CPU#%d.\n",
71 smp_processor_id()); 72 smp_processor_id());
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index bd5d46a32210..44f138296fbe 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -8,6 +8,7 @@
8 8
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/traps.h> 10#include <asm/traps.h>
11#include <asm/tlbflush.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13 14
@@ -36,7 +37,7 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
36 lo &= ~(1<<4); /* Enable MCE */ 37 lo &= ~(1<<4); /* Enable MCE */
37 wrmsr(MSR_IDT_FCR1, lo, hi); 38 wrmsr(MSR_IDT_FCR1, lo, hi);
38 39
39 set_in_cr4(X86_CR4_MCE); 40 cr4_set_bits(X86_CR4_MCE);
40 41
41 printk(KERN_INFO 42 printk(KERN_INFO
42 "Winchip machine check reporting enabled on CPU#0.\n"); 43 "Winchip machine check reporting enabled on CPU#0.\n");
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 9e451b0876b5..f8c81ba0b465 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -138,8 +138,8 @@ static void prepare_set(void)
138 138
139 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 139 /* Save value of CR4 and clear Page Global Enable (bit 7) */
140 if (cpu_has_pge) { 140 if (cpu_has_pge) {
141 cr4 = read_cr4(); 141 cr4 = __read_cr4();
142 write_cr4(cr4 & ~X86_CR4_PGE); 142 __write_cr4(cr4 & ~X86_CR4_PGE);
143 } 143 }
144 144
145 /* 145 /*
@@ -171,7 +171,7 @@ static void post_set(void)
171 171
172 /* Restore value of CR4 */ 172 /* Restore value of CR4 */
173 if (cpu_has_pge) 173 if (cpu_has_pge)
174 write_cr4(cr4); 174 __write_cr4(cr4);
175} 175}
176 176
177static void cyrix_set_arr(unsigned int reg, unsigned long base, 177static void cyrix_set_arr(unsigned int reg, unsigned long base,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0e25a1bc5ab5..7d74f7b3c6ba 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
678 678
679 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 679 /* Save value of CR4 and clear Page Global Enable (bit 7) */
680 if (cpu_has_pge) { 680 if (cpu_has_pge) {
681 cr4 = read_cr4(); 681 cr4 = __read_cr4();
682 write_cr4(cr4 & ~X86_CR4_PGE); 682 __write_cr4(cr4 & ~X86_CR4_PGE);
683 } 683 }
684 684
685 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 685 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
@@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock)
708 708
709 /* Restore value of CR4 */ 709 /* Restore value of CR4 */
710 if (cpu_has_pge) 710 if (cpu_has_pge)
711 write_cr4(cr4); 711 __write_cr4(cr4);
712 raw_spin_unlock(&set_atomicity_lock); 712 raw_spin_unlock(&set_atomicity_lock);
713} 713}
714 714
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 143e5f5dc855..b71a7f86d68a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,8 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/smp.h> 32#include <asm/smp.h>
33#include <asm/alternative.h> 33#include <asm/alternative.h>
34#include <asm/mmu_context.h>
35#include <asm/tlbflush.h>
34#include <asm/timer.h> 36#include <asm/timer.h>
35#include <asm/desc.h> 37#include <asm/desc.h>
36#include <asm/ldt.h> 38#include <asm/ldt.h>
@@ -43,6 +45,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
43 .enabled = 1, 45 .enabled = 1,
44}; 46};
45 47
48struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
49
46u64 __read_mostly hw_cache_event_ids 50u64 __read_mostly hw_cache_event_ids
47 [PERF_COUNT_HW_CACHE_MAX] 51 [PERF_COUNT_HW_CACHE_MAX]
48 [PERF_COUNT_HW_CACHE_OP_MAX] 52 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1327,8 +1331,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1327 break; 1331 break;
1328 1332
1329 case CPU_STARTING: 1333 case CPU_STARTING:
1330 if (x86_pmu.attr_rdpmc)
1331 set_in_cr4(X86_CR4_PCE);
1332 if (x86_pmu.cpu_starting) 1334 if (x86_pmu.cpu_starting)
1333 x86_pmu.cpu_starting(cpu); 1335 x86_pmu.cpu_starting(cpu);
1334 break; 1336 break;
@@ -1804,14 +1806,44 @@ static int x86_pmu_event_init(struct perf_event *event)
1804 event->destroy(event); 1806 event->destroy(event);
1805 } 1807 }
1806 1808
1809 if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
1810 event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
1811
1807 return err; 1812 return err;
1808} 1813}
1809 1814
1815static void refresh_pce(void *ignored)
1816{
1817 if (current->mm)
1818 load_mm_cr4(current->mm);
1819}
1820
1821static void x86_pmu_event_mapped(struct perf_event *event)
1822{
1823 if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
1824 return;
1825
1826 if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
1827 on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
1828}
1829
1830static void x86_pmu_event_unmapped(struct perf_event *event)
1831{
1832 if (!current->mm)
1833 return;
1834
1835 if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
1836 return;
1837
1838 if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
1839 on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
1840}
1841
1810static int x86_pmu_event_idx(struct perf_event *event) 1842static int x86_pmu_event_idx(struct perf_event *event)
1811{ 1843{
1812 int idx = event->hw.idx; 1844 int idx = event->hw.idx;
1813 1845
1814 if (!x86_pmu.attr_rdpmc) 1846 if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
1815 return 0; 1847 return 0;
1816 1848
1817 if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { 1849 if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
@@ -1829,16 +1861,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
1829 return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); 1861 return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
1830} 1862}
1831 1863
1832static void change_rdpmc(void *info)
1833{
1834 bool enable = !!(unsigned long)info;
1835
1836 if (enable)
1837 set_in_cr4(X86_CR4_PCE);
1838 else
1839 clear_in_cr4(X86_CR4_PCE);
1840}
1841
1842static ssize_t set_attr_rdpmc(struct device *cdev, 1864static ssize_t set_attr_rdpmc(struct device *cdev,
1843 struct device_attribute *attr, 1865 struct device_attribute *attr,
1844 const char *buf, size_t count) 1866 const char *buf, size_t count)
@@ -1850,14 +1872,27 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
1850 if (ret) 1872 if (ret)
1851 return ret; 1873 return ret;
1852 1874
1875 if (val > 2)
1876 return -EINVAL;
1877
1853 if (x86_pmu.attr_rdpmc_broken) 1878 if (x86_pmu.attr_rdpmc_broken)
1854 return -ENOTSUPP; 1879 return -ENOTSUPP;
1855 1880
1856 if (!!val != !!x86_pmu.attr_rdpmc) { 1881 if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
1857 x86_pmu.attr_rdpmc = !!val; 1882 /*
1858 on_each_cpu(change_rdpmc, (void *)val, 1); 1883 * Changing into or out of always available, aka
1884 * perf-event-bypassing mode. This path is extremely slow,
1885 * but only root can trigger it, so it's okay.
1886 */
1887 if (val == 2)
1888 static_key_slow_inc(&rdpmc_always_available);
1889 else
1890 static_key_slow_dec(&rdpmc_always_available);
1891 on_each_cpu(refresh_pce, NULL, 1);
1859 } 1892 }
1860 1893
1894 x86_pmu.attr_rdpmc = val;
1895
1861 return count; 1896 return count;
1862} 1897}
1863 1898
@@ -1900,6 +1935,9 @@ static struct pmu pmu = {
1900 1935
1901 .event_init = x86_pmu_event_init, 1936 .event_init = x86_pmu_event_init,
1902 1937
1938 .event_mapped = x86_pmu_event_mapped,
1939 .event_unmapped = x86_pmu_event_unmapped,
1940
1903 .add = x86_pmu_add, 1941 .add = x86_pmu_add,
1904 .del = x86_pmu_del, 1942 .del = x86_pmu_del,
1905 .start = x86_pmu_start, 1943 .start = x86_pmu_start,
@@ -1914,13 +1952,15 @@ static struct pmu pmu = {
1914 .flush_branch_stack = x86_pmu_flush_branch_stack, 1952 .flush_branch_stack = x86_pmu_flush_branch_stack,
1915}; 1953};
1916 1954
1917void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 1955void arch_perf_update_userpage(struct perf_event *event,
1956 struct perf_event_mmap_page *userpg, u64 now)
1918{ 1957{
1919 struct cyc2ns_data *data; 1958 struct cyc2ns_data *data;
1920 1959
1921 userpg->cap_user_time = 0; 1960 userpg->cap_user_time = 0;
1922 userpg->cap_user_time_zero = 0; 1961 userpg->cap_user_time_zero = 0;
1923 userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; 1962 userpg->cap_user_rdpmc =
1963 !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
1924 userpg->pmc_width = x86_pmu.cntval_bits; 1964 userpg->pmc_width = x86_pmu.cntval_bits;
1925 1965
1926 if (!sched_clock_stable()) 1966 if (!sched_clock_stable())
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 4e6cdb0ddc70..df525d2be1e8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,8 @@ struct event_constraint {
71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ 71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
72#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ 72#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
73#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ 73#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
74#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
75
74 76
75struct amd_nb { 77struct amd_nb {
76 int nb_id; /* NorthBridge id */ 78 int nb_id; /* NorthBridge id */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index d6c1b9836995..2911ef3a9f1c 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,6 +31,7 @@ static void __init i386_default_early_setup(void)
31 31
32asmlinkage __visible void __init i386_start_kernel(void) 32asmlinkage __visible void __init i386_start_kernel(void)
33{ 33{
34 cr4_init_shadow();
34 sanitize_boot_params(&boot_params); 35 sanitize_boot_params(&boot_params);
35 36
36 /* Call the subarch specific early setup function */ 37 /* Call the subarch specific early setup function */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index efcddfaf05f9..c4f8d4659070 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -156,6 +156,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
156 (__START_KERNEL & PGDIR_MASK))); 156 (__START_KERNEL & PGDIR_MASK)));
157 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); 157 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
158 158
159 cr4_init_shadow();
160
159 /* Kill off the identity-map trampoline */ 161 /* Kill off the identity-map trampoline */
160 reset_early_page_tables(); 162 reset_early_page_tables();
161 163
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 81049ffab2d6..d5651fce0b71 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -13,6 +13,7 @@
13#include <asm/sigcontext.h> 13#include <asm/sigcontext.h>
14#include <asm/processor.h> 14#include <asm/processor.h>
15#include <asm/math_emu.h> 15#include <asm/math_emu.h>
16#include <asm/tlbflush.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
17#include <asm/ptrace.h> 18#include <asm/ptrace.h>
18#include <asm/i387.h> 19#include <asm/i387.h>
@@ -193,7 +194,7 @@ void fpu_init(void)
193 if (cpu_has_xmm) 194 if (cpu_has_xmm)
194 cr4_mask |= X86_CR4_OSXMMEXCPT; 195 cr4_mask |= X86_CR4_OSXMMEXCPT;
195 if (cr4_mask) 196 if (cr4_mask)
196 set_in_cr4(cr4_mask); 197 cr4_set_bits(cr4_mask);
197 198
198 cr0 = read_cr0(); 199 cr0 = read_cr0();
199 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ 200 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e127ddaa2d5a..046e2d620bbe 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,6 +28,7 @@
28#include <asm/fpu-internal.h> 28#include <asm/fpu-internal.h>
29#include <asm/debugreg.h> 29#include <asm/debugreg.h>
30#include <asm/nmi.h> 30#include <asm/nmi.h>
31#include <asm/tlbflush.h>
31 32
32/* 33/*
33 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 34 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -141,7 +142,7 @@ void flush_thread(void)
141 142
142static void hard_disable_TSC(void) 143static void hard_disable_TSC(void)
143{ 144{
144 write_cr4(read_cr4() | X86_CR4_TSD); 145 cr4_set_bits(X86_CR4_TSD);
145} 146}
146 147
147void disable_TSC(void) 148void disable_TSC(void)
@@ -158,7 +159,7 @@ void disable_TSC(void)
158 159
159static void hard_enable_TSC(void) 160static void hard_enable_TSC(void)
160{ 161{
161 write_cr4(read_cr4() & ~X86_CR4_TSD); 162 cr4_clear_bits(X86_CR4_TSD);
162} 163}
163 164
164static void enable_TSC(void) 165static void enable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8f3ebfe710d0..603c4f99cb5a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -101,7 +101,7 @@ void __show_regs(struct pt_regs *regs, int all)
101 cr0 = read_cr0(); 101 cr0 = read_cr0();
102 cr2 = read_cr2(); 102 cr2 = read_cr2();
103 cr3 = read_cr3(); 103 cr3 = read_cr3();
104 cr4 = read_cr4_safe(); 104 cr4 = __read_cr4_safe();
105 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 105 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
106 cr0, cr2, cr3, cr4); 106 cr0, cr2, cr3, cr4);
107 107
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5a2c02913af3..67fcc43577d2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, int all)
93 cr0 = read_cr0(); 93 cr0 = read_cr0();
94 cr2 = read_cr2(); 94 cr2 = read_cr2();
95 cr3 = read_cr3(); 95 cr3 = read_cr3();
96 cr4 = read_cr4(); 96 cr4 = __read_cr4();
97 97
98 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 98 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
99 fs, fsindex, gs, gsindex, shadowgs); 99 fs, fsindex, gs, gsindex, shadowgs);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 27d200929864..0a2421cca01f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1179,7 +1179,7 @@ void __init setup_arch(char **cmdline_p)
1179 1179
1180 if (boot_cpu_data.cpuid_level >= 0) { 1180 if (boot_cpu_data.cpuid_level >= 0) {
1181 /* A CPU has %cr4 if and only if it has CPUID */ 1181 /* A CPU has %cr4 if and only if it has CPUID */
1182 mmu_cr4_features = read_cr4(); 1182 mmu_cr4_features = __read_cr4();
1183 if (trampoline_cr4_features) 1183 if (trampoline_cr4_features)
1184 *trampoline_cr4_features = mmu_cr4_features; 1184 *trampoline_cr4_features = mmu_cr4_features;
1185 } 1185 }
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 0de1fae2bdf0..34f66e58a896 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -12,6 +12,7 @@
12#include <asm/i387.h> 12#include <asm/i387.h>
13#include <asm/fpu-internal.h> 13#include <asm/fpu-internal.h>
14#include <asm/sigframe.h> 14#include <asm/sigframe.h>
15#include <asm/tlbflush.h>
15#include <asm/xcr.h> 16#include <asm/xcr.h>
16 17
17/* 18/*
@@ -453,7 +454,7 @@ static void prepare_fx_sw_frame(void)
453 */ 454 */
454static inline void xstate_enable(void) 455static inline void xstate_enable(void)
455{ 456{
456 set_in_cr4(X86_CR4_OSXSAVE); 457 cr4_set_bits(X86_CR4_OSXSAVE);
457 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); 458 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
458} 459}
459 460
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a17d848c6d42..d319e0c24758 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1583,7 +1583,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1583 1583
1584static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1584static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1585{ 1585{
1586 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; 1586 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
1587 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; 1587 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1588 1588
1589 if (cr4 & X86_CR4_VMXE) 1589 if (cr4 & X86_CR4_VMXE)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3f73bfad0349..14c1a18d206a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2871,7 +2871,7 @@ static int hardware_enable(void)
2871 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 2871 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
2872 u64 old, test_bits; 2872 u64 old, test_bits;
2873 2873
2874 if (read_cr4() & X86_CR4_VMXE) 2874 if (cr4_read_shadow() & X86_CR4_VMXE)
2875 return -EBUSY; 2875 return -EBUSY;
2876 2876
2877 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); 2877 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
@@ -2898,7 +2898,7 @@ static int hardware_enable(void)
2898 /* enable and lock */ 2898 /* enable and lock */
2899 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); 2899 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
2900 } 2900 }
2901 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 2901 cr4_set_bits(X86_CR4_VMXE);
2902 2902
2903 if (vmm_exclusive) { 2903 if (vmm_exclusive) {
2904 kvm_cpu_vmxon(phys_addr); 2904 kvm_cpu_vmxon(phys_addr);
@@ -2935,7 +2935,7 @@ static void hardware_disable(void)
2935 vmclear_local_loaded_vmcss(); 2935 vmclear_local_loaded_vmcss();
2936 kvm_cpu_vmxoff(); 2936 kvm_cpu_vmxoff();
2937 } 2937 }
2938 write_cr4(read_cr4() & ~X86_CR4_VMXE); 2938 cr4_clear_bits(X86_CR4_VMXE);
2939} 2939}
2940 2940
2941static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, 2941static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -4450,7 +4450,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
4450 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 4450 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
4451 4451
4452 /* Save the most likely value for this task's CR4 in the VMCS. */ 4452 /* Save the most likely value for this task's CR4 in the VMCS. */
4453 cr4 = read_cr4(); 4453 cr4 = cr4_read_shadow();
4454 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 4454 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
4455 vmx->host_state.vmcs_host_cr4 = cr4; 4455 vmx->host_state.vmcs_host_cr4 = cr4;
4456 4456
@@ -8146,7 +8146,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
8146 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) 8146 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
8147 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 8147 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
8148 8148
8149 cr4 = read_cr4(); 8149 cr4 = cr4_read_shadow();
8150 if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { 8150 if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
8151 vmcs_writel(HOST_CR4, cr4); 8151 vmcs_writel(HOST_CR4, cr4);
8152 vmx->host_state.vmcs_host_cr4 = cr4; 8152 vmx->host_state.vmcs_host_cr4 = cr4;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e3ff27a5b634..ede025fb46f1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -600,7 +600,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
600 printk(nx_warning, from_kuid(&init_user_ns, current_uid())); 600 printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
601 if (pte && pte_present(*pte) && pte_exec(*pte) && 601 if (pte && pte_present(*pte) && pte_exec(*pte) &&
602 (pgd_flags(*pgd) & _PAGE_USER) && 602 (pgd_flags(*pgd) & _PAGE_USER) &&
603 (read_cr4() & X86_CR4_SMEP)) 603 (__read_cr4() & X86_CR4_SMEP))
604 printk(smep_warning, from_kuid(&init_user_ns, current_uid())); 604 printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
605 } 605 }
606 606
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 649da47d3827..553c094b9cd7 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -173,11 +173,11 @@ static void __init probe_page_size_mask(void)
173 173
174 /* Enable PSE if available */ 174 /* Enable PSE if available */
175 if (cpu_has_pse) 175 if (cpu_has_pse)
176 set_in_cr4(X86_CR4_PSE); 176 cr4_set_bits_and_update_boot(X86_CR4_PSE);
177 177
178 /* Enable PGE if available */ 178 /* Enable PGE if available */
179 if (cpu_has_pge) { 179 if (cpu_has_pge) {
180 set_in_cr4(X86_CR4_PGE); 180 cr4_set_bits_and_update_boot(X86_CR4_PGE);
181 __supported_pte_mask |= _PAGE_GLOBAL; 181 __supported_pte_mask |= _PAGE_GLOBAL;
182 } 182 }
183} 183}
@@ -713,6 +713,15 @@ void __init zone_sizes_init(void)
713 free_area_init_nodes(max_zone_pfns); 713 free_area_init_nodes(max_zone_pfns);
714} 714}
715 715
716DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
717#ifdef CONFIG_SMP
718 .active_mm = &init_mm,
719 .state = 0,
720#endif
721 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
722};
723EXPORT_SYMBOL_GPL(cpu_tlbstate);
724
716void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) 725void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
717{ 726{
718 /* entry 0 MUST be WB (hardwired to speed up translations) */ 727 /* entry 0 MUST be WB (hardwired to speed up translations) */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index ee61c36d64f8..3250f2371aea 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -14,9 +14,6 @@
14#include <asm/uv/uv.h> 14#include <asm/uv/uv.h>
15#include <linux/debugfs.h> 15#include <linux/debugfs.h>
16 16
17DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
18 = { &init_mm, 0, };
19
20/* 17/*
21 * Smarter SMP flushing macros. 18 * Smarter SMP flushing macros.
22 * c/o Linus Torvalds. 19 * c/o Linus Torvalds.
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 6ec7910f59bf..3e32ed5648a0 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,11 +105,8 @@ static void __save_processor_state(struct saved_context *ctxt)
105 ctxt->cr0 = read_cr0(); 105 ctxt->cr0 = read_cr0();
106 ctxt->cr2 = read_cr2(); 106 ctxt->cr2 = read_cr2();
107 ctxt->cr3 = read_cr3(); 107 ctxt->cr3 = read_cr3();
108#ifdef CONFIG_X86_32 108 ctxt->cr4 = __read_cr4_safe();
109 ctxt->cr4 = read_cr4_safe(); 109#ifdef CONFIG_X86_64
110#else
111/* CONFIG_X86_64 */
112 ctxt->cr4 = read_cr4();
113 ctxt->cr8 = read_cr8(); 110 ctxt->cr8 = read_cr8();
114#endif 111#endif
115 ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, 112 ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
@@ -175,12 +172,12 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
175 /* cr4 was introduced in the Pentium CPU */ 172 /* cr4 was introduced in the Pentium CPU */
176#ifdef CONFIG_X86_32 173#ifdef CONFIG_X86_32
177 if (ctxt->cr4) 174 if (ctxt->cr4)
178 write_cr4(ctxt->cr4); 175 __write_cr4(ctxt->cr4);
179#else 176#else
180/* CONFIG X86_64 */ 177/* CONFIG X86_64 */
181 wrmsrl(MSR_EFER, ctxt->efer); 178 wrmsrl(MSR_EFER, ctxt->efer);
182 write_cr8(ctxt->cr8); 179 write_cr8(ctxt->cr8);
183 write_cr4(ctxt->cr4); 180 __write_cr4(ctxt->cr4);
184#endif 181#endif
185 write_cr3(ctxt->cr3); 182 write_cr3(ctxt->cr3);
186 write_cr2(ctxt->cr2); 183 write_cr2(ctxt->cr2);
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index bad628a620c4..0b7a63d98440 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -81,7 +81,7 @@ void __init setup_real_mode(void)
81 81
82 trampoline_header->start = (u64) secondary_startup_64; 82 trampoline_header->start = (u64) secondary_startup_64;
83 trampoline_cr4_features = &trampoline_header->cr4; 83 trampoline_cr4_features = &trampoline_header->cr4;
84 *trampoline_cr4_features = read_cr4(); 84 *trampoline_cr4_features = __read_cr4();
85 85
86 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); 86 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
87 trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; 87 trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78a881b7fc41..bd8b8459c3d0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1494,10 +1494,10 @@ static void xen_pvh_set_cr_flags(int cpu)
1494 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. 1494 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init.
1495 */ 1495 */
1496 if (cpu_has_pse) 1496 if (cpu_has_pse)
1497 set_in_cr4(X86_CR4_PSE); 1497 cr4_set_bits_and_update_boot(X86_CR4_PSE);
1498 1498
1499 if (cpu_has_pge) 1499 if (cpu_has_pge)
1500 set_in_cr4(X86_CR4_PGE); 1500 cr4_set_bits_and_update_boot(X86_CR4_PGE);
1501} 1501}
1502 1502
1503/* 1503/*
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 922a1acbf652..6adfd7ba4c97 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -47,6 +47,7 @@
47#include <asm/lguest.h> 47#include <asm/lguest.h>
48#include <asm/uaccess.h> 48#include <asm/uaccess.h>
49#include <asm/i387.h> 49#include <asm/i387.h>
50#include <asm/tlbflush.h>
50#include "../lg.h" 51#include "../lg.h"
51 52
52static int cpu_had_pge; 53static int cpu_had_pge;
@@ -452,9 +453,9 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
452static void adjust_pge(void *on) 453static void adjust_pge(void *on)
453{ 454{
454 if (on) 455 if (on)
455 write_cr4(read_cr4() | X86_CR4_PGE); 456 cr4_set_bits(X86_CR4_PGE);
456 else 457 else
457 write_cr4(read_cr4() & ~X86_CR4_PGE); 458 cr4_clear_bits(X86_CR4_PGE);
458} 459}
459 460
460/*H:020 461/*H:020
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2cdc9d422bed..2b621982938d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -202,6 +202,13 @@ struct pmu {
202 */ 202 */
203 int (*event_init) (struct perf_event *event); 203 int (*event_init) (struct perf_event *event);
204 204
205 /*
206 * Notification that the event was mapped or unmapped. Called
207 * in the context of the mapping task.
208 */
209 void (*event_mapped) (struct perf_event *event); /*optional*/
210 void (*event_unmapped) (struct perf_event *event); /*optional*/
211
205#define PERF_EF_START 0x01 /* start the counter when adding */ 212#define PERF_EF_START 0x01 /* start the counter when adding */
206#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ 213#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
207#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ 214#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8812d8e35f5b..f04daabfd1cf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4101,7 +4101,8 @@ unlock:
4101 rcu_read_unlock(); 4101 rcu_read_unlock();
4102} 4102}
4103 4103
4104void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 4104void __weak arch_perf_update_userpage(
4105 struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now)
4105{ 4106{
4106} 4107}
4107 4108
@@ -4151,7 +4152,7 @@ void perf_event_update_userpage(struct perf_event *event)
4151 userpg->time_running = running + 4152 userpg->time_running = running +
4152 atomic64_read(&event->child_total_time_running); 4153 atomic64_read(&event->child_total_time_running);
4153 4154
4154 arch_perf_update_userpage(userpg, now); 4155 arch_perf_update_userpage(event, userpg, now);
4155 4156
4156 barrier(); 4157 barrier();
4157 ++userpg->lock; 4158 ++userpg->lock;
@@ -4293,6 +4294,9 @@ static void perf_mmap_open(struct vm_area_struct *vma)
4293 4294
4294 atomic_inc(&event->mmap_count); 4295 atomic_inc(&event->mmap_count);
4295 atomic_inc(&event->rb->mmap_count); 4296 atomic_inc(&event->rb->mmap_count);
4297
4298 if (event->pmu->event_mapped)
4299 event->pmu->event_mapped(event);
4296} 4300}
4297 4301
4298/* 4302/*
@@ -4312,6 +4316,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
4312 int mmap_locked = rb->mmap_locked; 4316 int mmap_locked = rb->mmap_locked;
4313 unsigned long size = perf_data_size(rb); 4317 unsigned long size = perf_data_size(rb);
4314 4318
4319 if (event->pmu->event_unmapped)
4320 event->pmu->event_unmapped(event);
4321
4315 atomic_dec(&rb->mmap_count); 4322 atomic_dec(&rb->mmap_count);
4316 4323
4317 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 4324 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
@@ -4513,6 +4520,9 @@ unlock:
4513 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; 4520 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
4514 vma->vm_ops = &perf_mmap_vmops; 4521 vma->vm_ops = &perf_mmap_vmops;
4515 4522
4523 if (event->pmu->event_mapped)
4524 event->pmu->event_mapped(event);
4525
4516 return ret; 4526 return ret;
4517} 4527}
4518 4528