aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-23 05:50:29 -0400
committerIngo Molnar <mingo@kernel.org>2015-03-23 05:50:29 -0400
commite1b63dec2ddba654c7ca75996284e453f32d1af7 (patch)
treec48fbfdb84b4e1b6b416b0e2ce7e14cd1350c5f5 /arch/x86/include
parentf8e617f4582995f7c25ef25b4167213120ad122b (diff)
parent746db9443ea57fd9c059f62c4bfbf41cf224fe13 (diff)
Merge branch 'sched/urgent' into sched/core, to pick up fixes before applying new patches
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/apic.h8
-rw-r--r--arch/x86/include/asm/fpu-internal.h2
-rw-r--r--arch/x86/include/asm/imr.h60
-rw-r--r--arch/x86/include/asm/intel-mid.h3
-rw-r--r--arch/x86/include/asm/kasan.h31
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h59
-rw-r--r--arch/x86/include/asm/lguest_hcall.h1
-rw-r--r--arch/x86/include/asm/livepatch.h46
-rw-r--r--arch/x86/include/asm/mmu.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h33
-rw-r--r--arch/x86/include/asm/page_64_types.h12
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/pgtable-2level.h38
-rw-r--r--arch/x86/include/asm/pgtable-3level.h12
-rw-r--r--arch/x86/include/asm/pgtable.h68
-rw-r--r--arch/x86/include/asm/pgtable_64.h11
-rw-r--r--arch/x86/include/asm/pgtable_types.h46
-rw-r--r--arch/x86/include/asm/processor.h33
-rw-r--r--arch/x86/include/asm/special_insns.h6
-rw-r--r--arch/x86/include/asm/spinlock.h94
-rw-r--r--arch/x86/include/asm/string_64.h18
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/tlbflush.h77
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/virtext.h5
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/asm/xen/page.h20
-rw-r--r--arch/x86/include/asm/xsave.h28
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h11
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h8
-rw-r--r--arch/x86/include/uapi/asm/vmx.h6
32 files changed, 452 insertions, 303 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 92003f3c8a42..efc3b22d896e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -213,7 +213,15 @@ void register_lapic_address(unsigned long address);
213extern void setup_boot_APIC_clock(void); 213extern void setup_boot_APIC_clock(void);
214extern void setup_secondary_APIC_clock(void); 214extern void setup_secondary_APIC_clock(void);
215extern int APIC_init_uniprocessor(void); 215extern int APIC_init_uniprocessor(void);
216
217#ifdef CONFIG_X86_64
218static inline int apic_force_enable(unsigned long addr)
219{
220 return -1;
221}
222#else
216extern int apic_force_enable(unsigned long addr); 223extern int apic_force_enable(unsigned long addr);
224#endif
217 225
218extern int apic_bsp_setup(bool upmode); 226extern int apic_bsp_setup(bool upmode);
219extern void apic_ap_setup(void); 227extern void apic_ap_setup(void);
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 0dbc08282291..72ba21a8b5fc 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -370,7 +370,7 @@ static inline void drop_fpu(struct task_struct *tsk)
370 preempt_disable(); 370 preempt_disable();
371 tsk->thread.fpu_counter = 0; 371 tsk->thread.fpu_counter = 0;
372 __drop_fpu(tsk); 372 __drop_fpu(tsk);
373 clear_used_math(); 373 clear_stopped_child_used_math(tsk);
374 preempt_enable(); 374 preempt_enable();
375} 375}
376 376
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
new file mode 100644
index 000000000000..cd2ce4068441
--- /dev/null
+++ b/arch/x86/include/asm/imr.h
@@ -0,0 +1,60 @@
1/*
2 * imr.h: Isolated Memory Region API
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12#ifndef _IMR_H
13#define _IMR_H
14
15#include <linux/types.h>
16
17/*
18 * IMR agent access mask bits
19 * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register
20 * definitions.
21 */
22#define IMR_ESRAM_FLUSH BIT(31)
23#define IMR_CPU_SNOOP BIT(30) /* Applicable only to write */
24#define IMR_RMU BIT(29)
25#define IMR_VC1_SAI_ID3 BIT(15)
26#define IMR_VC1_SAI_ID2 BIT(14)
27#define IMR_VC1_SAI_ID1 BIT(13)
28#define IMR_VC1_SAI_ID0 BIT(12)
29#define IMR_VC0_SAI_ID3 BIT(11)
30#define IMR_VC0_SAI_ID2 BIT(10)
31#define IMR_VC0_SAI_ID1 BIT(9)
32#define IMR_VC0_SAI_ID0 BIT(8)
33#define IMR_CPU_0 BIT(1) /* SMM mode */
34#define IMR_CPU BIT(0) /* Non SMM mode */
35#define IMR_ACCESS_NONE 0
36
37/*
38 * Read/Write access-all bits here include some reserved bits
39 * These are the values firmware uses and are accepted by hardware.
40 * The kernel defines read/write access-all in the same way as firmware
41 * in order to have a consistent and crisp definition across firmware,
42 * bootloader and kernel.
43 */
44#define IMR_READ_ACCESS_ALL 0xBFFFFFFF
45#define IMR_WRITE_ACCESS_ALL 0xFFFFFFFF
46
47/* Number of IMRs provided by Quark X1000 SoC */
48#define QUARK_X1000_IMR_MAX 0x08
49#define QUARK_X1000_IMR_REGBASE 0x40
50
51/* IMR alignment bits - only bits 31:10 are checked for IMR validity */
52#define IMR_ALIGN 0x400
53#define IMR_MASK (IMR_ALIGN - 1)
54
55int imr_add_range(phys_addr_t base, size_t size,
56 unsigned int rmask, unsigned int wmask, bool lock);
57
58int imr_remove_range(phys_addr_t base, size_t size);
59
60#endif /* _IMR_H */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index e34e097b6f9d..705d35708a50 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
136#define SFI_MTMR_MAX_NUM 8 136#define SFI_MTMR_MAX_NUM 8
137#define SFI_MRTC_MAX 8 137#define SFI_MRTC_MAX 8
138 138
139extern struct console early_mrst_console;
140extern void mrst_early_console_init(void);
141
142extern struct console early_hsu_console; 139extern struct console early_hsu_console;
143extern void hsu_early_console_init(const char *); 140extern void hsu_early_console_init(const char *);
144 141
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
new file mode 100644
index 000000000000..8b22422fbad8
--- /dev/null
+++ b/arch/x86/include/asm/kasan.h
@@ -0,0 +1,31 @@
1#ifndef _ASM_X86_KASAN_H
2#define _ASM_X86_KASAN_H
3
4/*
5 * Compiler uses shadow offset assuming that addresses start
6 * from 0. Kernel addresses don't start from 0, so shadow
7 * for kernel really starts from compiler's shadow offset +
8 * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
9 */
10#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
11 (0xffff800000000000ULL >> 3))
12/* 47 bits for kernel address -> (47 - 3) bits for shadow */
13#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3)))
14
15#ifndef __ASSEMBLY__
16
17extern pte_t kasan_zero_pte[];
18extern pte_t kasan_zero_pmd[];
19extern pte_t kasan_zero_pud[];
20
21#ifdef CONFIG_KASAN
22void __init kasan_map_early_shadow(pgd_t *pgd);
23void __init kasan_init(void);
24#else
25static inline void kasan_map_early_shadow(pgd_t *pgd) { }
26static inline void kasan_init(void) { }
27#endif
28
29#endif
30
31#endif
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index eb181178fe0b..57a9d94fe160 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -208,6 +208,7 @@ struct x86_emulate_ops {
208 208
209 void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, 209 void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
210 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); 210 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
211 void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
211}; 212};
212 213
213typedef u32 __attribute__((vector_size(16))) sse128_t; 214typedef u32 __attribute__((vector_size(16))) sse128_t;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d89c6b828c96..a236e39cc385 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -38,8 +38,6 @@
38#define KVM_PRIVATE_MEM_SLOTS 3 38#define KVM_PRIVATE_MEM_SLOTS 3
39#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) 39#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
40 40
41#define KVM_MMIO_SIZE 16
42
43#define KVM_PIO_PAGE_OFFSET 1 41#define KVM_PIO_PAGE_OFFSET 1
44#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 42#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
45 43
@@ -51,7 +49,7 @@
51 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) 49 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
52 50
53#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL 51#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
54#define CR3_PCID_INVD (1UL << 63) 52#define CR3_PCID_INVD BIT_64(63)
55#define CR4_RESERVED_BITS \ 53#define CR4_RESERVED_BITS \
56 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 54 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
57 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 55 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
@@ -160,6 +158,18 @@ enum {
160#define DR7_FIXED_1 0x00000400 158#define DR7_FIXED_1 0x00000400
161#define DR7_VOLATILE 0xffff2bff 159#define DR7_VOLATILE 0xffff2bff
162 160
161#define PFERR_PRESENT_BIT 0
162#define PFERR_WRITE_BIT 1
163#define PFERR_USER_BIT 2
164#define PFERR_RSVD_BIT 3
165#define PFERR_FETCH_BIT 4
166
167#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
168#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
169#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
170#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
171#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
172
163/* apic attention bits */ 173/* apic attention bits */
164#define KVM_APIC_CHECK_VAPIC 0 174#define KVM_APIC_CHECK_VAPIC 0
165/* 175/*
@@ -615,6 +625,8 @@ struct kvm_arch {
615 #ifdef CONFIG_KVM_MMU_AUDIT 625 #ifdef CONFIG_KVM_MMU_AUDIT
616 int audit_point; 626 int audit_point;
617 #endif 627 #endif
628
629 bool boot_vcpu_runs_old_kvmclock;
618}; 630};
619 631
620struct kvm_vm_stat { 632struct kvm_vm_stat {
@@ -643,6 +655,7 @@ struct kvm_vcpu_stat {
643 u32 irq_window_exits; 655 u32 irq_window_exits;
644 u32 nmi_window_exits; 656 u32 nmi_window_exits;
645 u32 halt_exits; 657 u32 halt_exits;
658 u32 halt_successful_poll;
646 u32 halt_wakeup; 659 u32 halt_wakeup;
647 u32 request_irq_exits; 660 u32 request_irq_exits;
648 u32 irq_exits; 661 u32 irq_exits;
@@ -787,6 +800,31 @@ struct kvm_x86_ops {
787 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); 800 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
788 801
789 void (*sched_in)(struct kvm_vcpu *kvm, int cpu); 802 void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
803
804 /*
805 * Arch-specific dirty logging hooks. These hooks are only supposed to
806 * be valid if the specific arch has hardware-accelerated dirty logging
807 * mechanism. Currently only for PML on VMX.
808 *
809 * - slot_enable_log_dirty:
810 * called when enabling log dirty mode for the slot.
811 * - slot_disable_log_dirty:
812 * called when disabling log dirty mode for the slot.
813 * also called when slot is created with log dirty disabled.
814 * - flush_log_dirty:
815 * called before reporting dirty_bitmap to userspace.
816 * - enable_log_dirty_pt_masked:
817 * called when reenabling log dirty for the GFNs in the mask after
818 * corresponding bits are cleared in slot->dirty_bitmap.
819 */
820 void (*slot_enable_log_dirty)(struct kvm *kvm,
821 struct kvm_memory_slot *slot);
822 void (*slot_disable_log_dirty)(struct kvm *kvm,
823 struct kvm_memory_slot *slot);
824 void (*flush_log_dirty)(struct kvm *kvm);
825 void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
826 struct kvm_memory_slot *slot,
827 gfn_t offset, unsigned long mask);
790}; 828};
791 829
792struct kvm_arch_async_pf { 830struct kvm_arch_async_pf {
@@ -819,10 +857,17 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
819 u64 dirty_mask, u64 nx_mask, u64 x_mask); 857 u64 dirty_mask, u64 nx_mask, u64 x_mask);
820 858
821void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 859void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
822void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 860void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
823void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 861 struct kvm_memory_slot *memslot);
824 struct kvm_memory_slot *slot, 862void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
825 gfn_t gfn_offset, unsigned long mask); 863 struct kvm_memory_slot *memslot);
864void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
865 struct kvm_memory_slot *memslot);
866void kvm_mmu_slot_set_dirty(struct kvm *kvm,
867 struct kvm_memory_slot *memslot);
868void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
869 struct kvm_memory_slot *slot,
870 gfn_t gfn_offset, unsigned long mask);
826void kvm_mmu_zap_all(struct kvm *kvm); 871void kvm_mmu_zap_all(struct kvm *kvm);
827void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); 872void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
828unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); 873unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 879fd7d33877..ef01fef3eebc 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -16,7 +16,6 @@
16#define LHCALL_SET_PTE 14 16#define LHCALL_SET_PTE 14
17#define LHCALL_SET_PGD 15 17#define LHCALL_SET_PGD 15
18#define LHCALL_LOAD_TLS 16 18#define LHCALL_LOAD_TLS 16
19#define LHCALL_NOTIFY 17
20#define LHCALL_LOAD_GDT_ENTRY 18 19#define LHCALL_LOAD_GDT_ENTRY 18
21#define LHCALL_SEND_INTERRUPTS 19 20#define LHCALL_SEND_INTERRUPTS 19
22 21
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
new file mode 100644
index 000000000000..a455a53d789a
--- /dev/null
+++ b/arch/x86/include/asm/livepatch.h
@@ -0,0 +1,46 @@
1/*
2 * livepatch.h - x86-specific Kernel Live Patching Core
3 *
4 * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
5 * Copyright (C) 2014 SUSE
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21#ifndef _ASM_X86_LIVEPATCH_H
22#define _ASM_X86_LIVEPATCH_H
23
24#include <linux/module.h>
25#include <linux/ftrace.h>
26
27#ifdef CONFIG_LIVEPATCH
28static inline int klp_check_compiler_support(void)
29{
30#ifndef CC_USING_FENTRY
31 return 1;
32#endif
33 return 0;
34}
35extern int klp_write_module_reloc(struct module *mod, unsigned long type,
36 unsigned long loc, unsigned long value);
37
38static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
39{
40 regs->ip = ip;
41}
42#else
43#error Live patching support is disabled; check CONFIG_LIVEPATCH
44#endif
45
46#endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 876e74e8eec7..09b9620a73b4 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,6 +19,8 @@ typedef struct {
19 19
20 struct mutex lock; 20 struct mutex lock;
21 void __user *vdso; 21 void __user *vdso;
22
23 atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
22} mm_context_t; 24} mm_context_t;
23 25
24#ifdef CONFIG_SMP 26#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 4b75d591eb5e..883f6b933fa4 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -18,6 +18,21 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
18} 18}
19#endif /* !CONFIG_PARAVIRT */ 19#endif /* !CONFIG_PARAVIRT */
20 20
21#ifdef CONFIG_PERF_EVENTS
22extern struct static_key rdpmc_always_available;
23
24static inline void load_mm_cr4(struct mm_struct *mm)
25{
26 if (static_key_true(&rdpmc_always_available) ||
27 atomic_read(&mm->context.perf_rdpmc_allowed))
28 cr4_set_bits(X86_CR4_PCE);
29 else
30 cr4_clear_bits(X86_CR4_PCE);
31}
32#else
33static inline void load_mm_cr4(struct mm_struct *mm) {}
34#endif
35
21/* 36/*
22 * Used for LDT copy/destruction. 37 * Used for LDT copy/destruction.
23 */ 38 */
@@ -52,15 +67,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
52 /* Stop flush ipis for the previous mm */ 67 /* Stop flush ipis for the previous mm */
53 cpumask_clear_cpu(cpu, mm_cpumask(prev)); 68 cpumask_clear_cpu(cpu, mm_cpumask(prev));
54 69
70 /* Load per-mm CR4 state */
71 load_mm_cr4(next);
72
55 /* 73 /*
56 * Load the LDT, if the LDT is different. 74 * Load the LDT, if the LDT is different.
57 * 75 *
58 * It's possible leave_mm(prev) has been called. If so, 76 * It's possible that prev->context.ldt doesn't match
59 * then prev->context.ldt could be out of sync with the 77 * the LDT register. This can happen if leave_mm(prev)
60 * LDT descriptor or the LDT register. This can only happen 78 * was called and then modify_ldt changed
61 * if prev->context.ldt is non-null, since we never free 79 * prev->context.ldt but suppressed an IPI to this CPU.
62 * an LDT. But LDTs can't be shared across mms, so 80 * In this case, prev->context.ldt != NULL, because we
63 * prev->context.ldt won't be equal to next->context.ldt. 81 * never free an LDT while the mm still exists. That
82 * means that next->context.ldt != prev->context.ldt,
83 * because mms never share an LDT.
64 */ 84 */
65 if (unlikely(prev->context.ldt != next->context.ldt)) 85 if (unlikely(prev->context.ldt != next->context.ldt))
66 load_LDT_nolock(&next->context); 86 load_LDT_nolock(&next->context);
@@ -85,6 +105,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
85 */ 105 */
86 load_cr3(next->pgd); 106 load_cr3(next->pgd);
87 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 107 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
108 load_mm_cr4(next);
88 load_LDT_nolock(&next->context); 109 load_LDT_nolock(&next->context);
89 } 110 }
90 } 111 }
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 75450b2c7be4..4edd53b79a81 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,17 +1,23 @@
1#ifndef _ASM_X86_PAGE_64_DEFS_H 1#ifndef _ASM_X86_PAGE_64_DEFS_H
2#define _ASM_X86_PAGE_64_DEFS_H 2#define _ASM_X86_PAGE_64_DEFS_H
3 3
4#define THREAD_SIZE_ORDER 2 4#ifdef CONFIG_KASAN
5#define KASAN_STACK_ORDER 1
6#else
7#define KASAN_STACK_ORDER 0
8#endif
9
10#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
5#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 11#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
6#define CURRENT_MASK (~(THREAD_SIZE - 1)) 12#define CURRENT_MASK (~(THREAD_SIZE - 1))
7 13
8#define EXCEPTION_STACK_ORDER 0 14#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
9#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) 15#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
10 16
11#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) 17#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
12#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) 18#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
13 19
14#define IRQ_STACK_ORDER 2 20#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
15#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) 21#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
16 22
17#define DOUBLEFAULT_STACK 1 23#define DOUBLEFAULT_STACK 1
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 32444ae939ca..965c47d254aa 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
81} 81}
82 82
83static inline unsigned long read_cr4(void) 83static inline unsigned long __read_cr4(void)
84{ 84{
85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
86} 86}
87static inline unsigned long read_cr4_safe(void) 87static inline unsigned long __read_cr4_safe(void)
88{ 88{
89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
90} 90}
91 91
92static inline void write_cr4(unsigned long x) 92static inline void __write_cr4(unsigned long x)
93{ 93{
94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
95} 95}
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 206a87fdd22d..fd74a11959de 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -62,44 +62,8 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
62 return ((value >> rightshift) & mask) << leftshift; 62 return ((value >> rightshift) & mask) << leftshift;
63} 63}
64 64
65/*
66 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
67 * split up the 29 bits of offset into this range.
68 */
69#define PTE_FILE_MAX_BITS 29
70#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
71#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
72#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
73#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
74#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
75
76#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
77#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
78
79#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
80#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
81
82static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
83{
84 return (pgoff_t)
85 (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) +
86 pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) +
87 pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3));
88}
89
90static __always_inline pte_t pgoff_to_pte(pgoff_t off)
91{
92 return (pte_t){
93 .pte_low =
94 pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) +
95 pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) +
96 pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) +
97 _PAGE_FILE,
98 };
99}
100
101/* Encode and de-code a swap entry */ 65/* Encode and de-code a swap entry */
102#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 66#define SWP_TYPE_BITS 5
103#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) 67#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
104 68
105#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) 69#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 81bb91b49a88..cdaa58c9b39e 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -176,18 +176,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
176#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) 176#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
177#endif 177#endif
178 178
179/*
180 * Bits 0, 6 and 7 are taken in the low part of the pte,
181 * put the 32 bits of offset into the high part.
182 *
183 * For soft-dirty tracking 11 bit is taken from
184 * the low part of pte as well.
185 */
186#define pte_to_pgoff(pte) ((pte).pte_high)
187#define pgoff_to_pte(off) \
188 ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
189#define PTE_FILE_MAX_BITS 32
190
191/* Encode and de-code a swap entry */ 179/* Encode and de-code a swap entry */
192#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) 180#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
193#define __swp_type(x) (((x).val) & 0x1f) 181#define __swp_type(x) (((x).val) & 0x1f)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e8a5454acc99..a0c35bf6cb92 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -115,11 +115,6 @@ static inline int pte_write(pte_t pte)
115 return pte_flags(pte) & _PAGE_RW; 115 return pte_flags(pte) & _PAGE_RW;
116} 116}
117 117
118static inline int pte_file(pte_t pte)
119{
120 return pte_flags(pte) & _PAGE_FILE;
121}
122
123static inline int pte_huge(pte_t pte) 118static inline int pte_huge(pte_t pte)
124{ 119{
125 return pte_flags(pte) & _PAGE_PSE; 120 return pte_flags(pte) & _PAGE_PSE;
@@ -137,13 +132,7 @@ static inline int pte_exec(pte_t pte)
137 132
138static inline int pte_special(pte_t pte) 133static inline int pte_special(pte_t pte)
139{ 134{
140 /* 135 return pte_flags(pte) & _PAGE_SPECIAL;
141 * See CONFIG_NUMA_BALANCING pte_numa in include/asm-generic/pgtable.h.
142 * On x86 we have _PAGE_BIT_NUMA == _PAGE_BIT_GLOBAL+1 ==
143 * __PAGE_BIT_SOFTW1 == _PAGE_BIT_SPECIAL.
144 */
145 return (pte_flags(pte) & _PAGE_SPECIAL) &&
146 (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_PROTNONE));
147} 136}
148 137
149static inline unsigned long pte_pfn(pte_t pte) 138static inline unsigned long pte_pfn(pte_t pte)
@@ -305,7 +294,7 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
305 294
306static inline pmd_t pmd_mknotpresent(pmd_t pmd) 295static inline pmd_t pmd_mknotpresent(pmd_t pmd)
307{ 296{
308 return pmd_clear_flags(pmd, _PAGE_PRESENT); 297 return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
309} 298}
310 299
311#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 300#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -329,21 +318,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
329 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); 318 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
330} 319}
331 320
332static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
333{
334 return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
335}
336
337static inline pte_t pte_file_mksoft_dirty(pte_t pte)
338{
339 return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
340}
341
342static inline int pte_file_soft_dirty(pte_t pte)
343{
344 return pte_flags(pte) & _PAGE_SOFT_DIRTY;
345}
346
347#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ 321#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
348 322
349/* 323/*
@@ -463,13 +437,6 @@ static inline int pte_same(pte_t a, pte_t b)
463 437
464static inline int pte_present(pte_t a) 438static inline int pte_present(pte_t a)
465{ 439{
466 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
467 _PAGE_NUMA);
468}
469
470#define pte_present_nonuma pte_present_nonuma
471static inline int pte_present_nonuma(pte_t a)
472{
473 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); 440 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
474} 441}
475 442
@@ -479,7 +446,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
479 if (pte_flags(a) & _PAGE_PRESENT) 446 if (pte_flags(a) & _PAGE_PRESENT)
480 return true; 447 return true;
481 448
482 if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && 449 if ((pte_flags(a) & _PAGE_PROTNONE) &&
483 mm_tlb_flush_pending(mm)) 450 mm_tlb_flush_pending(mm))
484 return true; 451 return true;
485 452
@@ -499,10 +466,27 @@ static inline int pmd_present(pmd_t pmd)
499 * the _PAGE_PSE flag will remain set at all times while the 466 * the _PAGE_PSE flag will remain set at all times while the
500 * _PAGE_PRESENT bit is clear). 467 * _PAGE_PRESENT bit is clear).
501 */ 468 */
502 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE | 469 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
503 _PAGE_NUMA);
504} 470}
505 471
472#ifdef CONFIG_NUMA_BALANCING
473/*
474 * These work without NUMA balancing but the kernel does not care. See the
475 * comment in include/asm-generic/pgtable.h
476 */
477static inline int pte_protnone(pte_t pte)
478{
479 return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
480 == _PAGE_PROTNONE;
481}
482
483static inline int pmd_protnone(pmd_t pmd)
484{
485 return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT))
486 == _PAGE_PROTNONE;
487}
488#endif /* CONFIG_NUMA_BALANCING */
489
506static inline int pmd_none(pmd_t pmd) 490static inline int pmd_none(pmd_t pmd)
507{ 491{
508 /* Only check low word on 32-bit platforms, since it might be 492 /* Only check low word on 32-bit platforms, since it might be
@@ -559,11 +543,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
559 543
560static inline int pmd_bad(pmd_t pmd) 544static inline int pmd_bad(pmd_t pmd)
561{ 545{
562#ifdef CONFIG_NUMA_BALANCING
563 /* pmd_numa check */
564 if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
565 return 0;
566#endif
567 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; 546 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
568} 547}
569 548
@@ -882,19 +861,16 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
882#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 861#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
883static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 862static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
884{ 863{
885 VM_BUG_ON(pte_present_nonuma(pte));
886 return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); 864 return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
887} 865}
888 866
889static inline int pte_swp_soft_dirty(pte_t pte) 867static inline int pte_swp_soft_dirty(pte_t pte)
890{ 868{
891 VM_BUG_ON(pte_present_nonuma(pte));
892 return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; 869 return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
893} 870}
894 871
895static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 872static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
896{ 873{
897 VM_BUG_ON(pte_present_nonuma(pte));
898 return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); 874 return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
899} 875}
900#endif 876#endif
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4572b2f30237..2ee781114d34 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -133,10 +133,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
133/* PUD - Level3 access */ 133/* PUD - Level3 access */
134 134
135/* PMD - Level 2 access */ 135/* PMD - Level 2 access */
136#define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
137#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
138 _PAGE_FILE })
139#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
140 136
141/* PTE - Level 1 access. */ 137/* PTE - Level 1 access. */
142 138
@@ -145,13 +141,8 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
145#define pte_unmap(pte) ((void)(pte))/* NOP */ 141#define pte_unmap(pte) ((void)(pte))/* NOP */
146 142
147/* Encode and de-code a swap entry */ 143/* Encode and de-code a swap entry */
148#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 144#define SWP_TYPE_BITS 5
149#ifdef CONFIG_NUMA_BALANCING
150/* Automatic NUMA balancing needs to be distinguishable from swap entries */
151#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
152#else
153#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) 145#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
154#endif
155 146
156#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) 147#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
157 148
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 25bcd4a89517..8c7c10802e9c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -4,7 +4,7 @@
4#include <linux/const.h> 4#include <linux/const.h>
5#include <asm/page_types.h> 5#include <asm/page_types.h>
6 6
7#define FIRST_USER_ADDRESS 0 7#define FIRST_USER_ADDRESS 0UL
8 8
9#define _PAGE_BIT_PRESENT 0 /* is present */ 9#define _PAGE_BIT_PRESENT 0 /* is present */
10#define _PAGE_BIT_RW 1 /* writeable */ 10#define _PAGE_BIT_RW 1 /* writeable */
@@ -27,19 +27,9 @@
27#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ 27#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
28#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ 28#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
29 29
30/*
31 * Swap offsets on configurations that allow automatic NUMA balancing use the
32 * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from
33 * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the
34 * maximum possible swap space from 16TB to 8TB.
35 */
36#define _PAGE_BIT_NUMA (_PAGE_BIT_GLOBAL+1)
37
38/* If _PAGE_BIT_PRESENT is clear, we use these: */ 30/* If _PAGE_BIT_PRESENT is clear, we use these: */
39/* - if the user mapped it with PROT_NONE; pte_present gives true */ 31/* - if the user mapped it with PROT_NONE; pte_present gives true */
40#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL 32#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
41/* - set: nonlinear file mapping, saved PTE; unset:swap */
42#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
43 33
44#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) 34#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
45#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) 35#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
@@ -78,21 +68,6 @@
78#endif 68#endif
79 69
80/* 70/*
81 * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page
82 * that is not present. The hinting fault gathers numa placement statistics
83 * (see pte_numa()). The bit is always zero when the PTE is not present.
84 *
85 * The bit picked must be always zero when the pmd is present and not
86 * present, so that we don't lose information when we set it while
87 * atomically clearing the present bit.
88 */
89#ifdef CONFIG_NUMA_BALANCING
90#define _PAGE_NUMA (_AT(pteval_t, 1) << _PAGE_BIT_NUMA)
91#else
92#define _PAGE_NUMA (_AT(pteval_t, 0))
93#endif
94
95/*
96 * Tracking soft dirty bit when a page goes to a swap is tricky. 71 * Tracking soft dirty bit when a page goes to a swap is tricky.
97 * We need a bit which can be stored in pte _and_ not conflict 72 * We need a bit which can be stored in pte _and_ not conflict
98 * with swap entry format. On x86 bits 6 and 7 are *not* involved 73 * with swap entry format. On x86 bits 6 and 7 are *not* involved
@@ -114,7 +89,6 @@
114#define _PAGE_NX (_AT(pteval_t, 0)) 89#define _PAGE_NX (_AT(pteval_t, 0))
115#endif 90#endif
116 91
117#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
118#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) 92#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
119 93
120#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ 94#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
@@ -125,8 +99,8 @@
125/* Set of bits not changed in pte_modify */ 99/* Set of bits not changed in pte_modify */
126#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ 100#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
127 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ 101 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
128 _PAGE_SOFT_DIRTY | _PAGE_NUMA) 102 _PAGE_SOFT_DIRTY)
129#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA) 103#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
130 104
131/* 105/*
132 * The cache modes defined here are used to translate between pure SW usage 106 * The cache modes defined here are used to translate between pure SW usage
@@ -327,20 +301,6 @@ static inline pteval_t pte_flags(pte_t pte)
327 return native_pte_val(pte) & PTE_FLAGS_MASK; 301 return native_pte_val(pte) & PTE_FLAGS_MASK;
328} 302}
329 303
330#ifdef CONFIG_NUMA_BALANCING
331/* Set of bits that distinguishes present, prot_none and numa ptes */
332#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)
333static inline pteval_t ptenuma_flags(pte_t pte)
334{
335 return pte_flags(pte) & _PAGE_NUMA_MASK;
336}
337
338static inline pmdval_t pmdnuma_flags(pmd_t pmd)
339{
340 return pmd_flags(pmd) & _PAGE_NUMA_MASK;
341}
342#endif /* CONFIG_NUMA_BALANCING */
343
344#define pgprot_val(x) ((x).pgprot) 304#define pgprot_val(x) ((x).pgprot)
345#define __pgprot(x) ((pgprot_t) { (x) } ) 305#define __pgprot(x) ((pgprot_t) { (x) } )
346 306
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a092a0cce0b7..ec1c93588cef 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -579,39 +579,6 @@ static inline void load_sp0(struct tss_struct *tss,
579#define set_iopl_mask native_set_iopl_mask 579#define set_iopl_mask native_set_iopl_mask
580#endif /* CONFIG_PARAVIRT */ 580#endif /* CONFIG_PARAVIRT */
581 581
582/*
583 * Save the cr4 feature set we're using (ie
584 * Pentium 4MB enable and PPro Global page
585 * enable), so that any CPU's that boot up
586 * after us can get the correct flags.
587 */
588extern unsigned long mmu_cr4_features;
589extern u32 *trampoline_cr4_features;
590
591static inline void set_in_cr4(unsigned long mask)
592{
593 unsigned long cr4;
594
595 mmu_cr4_features |= mask;
596 if (trampoline_cr4_features)
597 *trampoline_cr4_features = mmu_cr4_features;
598 cr4 = read_cr4();
599 cr4 |= mask;
600 write_cr4(cr4);
601}
602
603static inline void clear_in_cr4(unsigned long mask)
604{
605 unsigned long cr4;
606
607 mmu_cr4_features &= ~mask;
608 if (trampoline_cr4_features)
609 *trampoline_cr4_features = mmu_cr4_features;
610 cr4 = read_cr4();
611 cr4 &= ~mask;
612 write_cr4(cr4);
613}
614
615typedef struct { 582typedef struct {
616 unsigned long seg; 583 unsigned long seg;
617} mm_segment_t; 584} mm_segment_t;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index e820c080a4e9..6a4b00fafb00 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x)
137 native_write_cr3(x); 137 native_write_cr3(x);
138} 138}
139 139
140static inline unsigned long read_cr4(void) 140static inline unsigned long __read_cr4(void)
141{ 141{
142 return native_read_cr4(); 142 return native_read_cr4();
143} 143}
144 144
145static inline unsigned long read_cr4_safe(void) 145static inline unsigned long __read_cr4_safe(void)
146{ 146{
147 return native_read_cr4_safe(); 147 return native_read_cr4_safe();
148} 148}
149 149
150static inline void write_cr4(unsigned long x) 150static inline void __write_cr4(unsigned long x)
151{ 151{
152 native_write_cr4(x); 152 native_write_cr4(x);
153} 153}
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 625660f8a2fc..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
46 46
47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) 47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
48{ 48{
49 set_bit(0, (volatile unsigned long *)&lock->tickets.tail); 49 set_bit(0, (volatile unsigned long *)&lock->tickets.head);
50} 50}
51 51
52#else /* !CONFIG_PARAVIRT_SPINLOCKS */ 52#else /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
60} 60}
61 61
62#endif /* CONFIG_PARAVIRT_SPINLOCKS */ 62#endif /* CONFIG_PARAVIRT_SPINLOCKS */
63static inline int __tickets_equal(__ticket_t one, __ticket_t two)
64{
65 return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
66}
67
68static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
69 __ticket_t head)
70{
71 if (head & TICKET_SLOWPATH_FLAG) {
72 arch_spinlock_t old, new;
73
74 old.tickets.head = head;
75 new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
76 old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
77 new.tickets.tail = old.tickets.tail;
78
79 /* try to clear slowpath flag when there are no contenders */
80 cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
81 }
82}
63 83
64static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) 84static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
65{ 85{
66 return lock.tickets.head == lock.tickets.tail; 86 return __tickets_equal(lock.tickets.head, lock.tickets.tail);
67} 87}
68 88
69/* 89/*
@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
87 if (likely(inc.head == inc.tail)) 107 if (likely(inc.head == inc.tail))
88 goto out; 108 goto out;
89 109
90 inc.tail &= ~TICKET_SLOWPATH_FLAG;
91 for (;;) { 110 for (;;) {
92 unsigned count = SPIN_THRESHOLD; 111 unsigned count = SPIN_THRESHOLD;
93 112
94 do { 113 do {
95 if (READ_ONCE(lock->tickets.head) == inc.tail) 114 inc.head = READ_ONCE(lock->tickets.head);
96 goto out; 115 if (__tickets_equal(inc.head, inc.tail))
116 goto clear_slowpath;
97 cpu_relax(); 117 cpu_relax();
98 } while (--count); 118 } while (--count);
99 __ticket_lock_spinning(lock, inc.tail); 119 __ticket_lock_spinning(lock, inc.tail);
100 } 120 }
101out: barrier(); /* make sure nothing creeps before the lock is taken */ 121clear_slowpath:
122 __ticket_check_and_clear_slowpath(lock, inc.head);
123out:
124 barrier(); /* make sure nothing creeps before the lock is taken */
102} 125}
103 126
104static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 127static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
106 arch_spinlock_t old, new; 129 arch_spinlock_t old, new;
107 130
108 old.tickets = READ_ONCE(lock->tickets); 131 old.tickets = READ_ONCE(lock->tickets);
109 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) 132 if (!__tickets_equal(old.tickets.head, old.tickets.tail))
110 return 0; 133 return 0;
111 134
112 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); 135 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
136 new.head_tail &= ~TICKET_SLOWPATH_FLAG;
113 137
114 /* cmpxchg is a full barrier, so nothing can move before it */ 138 /* cmpxchg is a full barrier, so nothing can move before it */
115 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 139 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
116} 140}
117 141
118static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
119 arch_spinlock_t old)
120{
121 arch_spinlock_t new;
122
123 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
124
125 /* Perform the unlock on the "before" copy */
126 old.tickets.head += TICKET_LOCK_INC;
127
128 /* Clear the slowpath flag */
129 new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
130
131 /*
132 * If the lock is uncontended, clear the flag - use cmpxchg in
133 * case it changes behind our back though.
134 */
135 if (new.tickets.head != new.tickets.tail ||
136 cmpxchg(&lock->head_tail, old.head_tail,
137 new.head_tail) != old.head_tail) {
138 /*
139 * Lock still has someone queued for it, so wake up an
140 * appropriate waiter.
141 */
142 __ticket_unlock_kick(lock, old.tickets.head);
143 }
144}
145
146static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 142static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
147{ 143{
148 if (TICKET_SLOWPATH_FLAG && 144 if (TICKET_SLOWPATH_FLAG &&
149 static_key_false(&paravirt_ticketlocks_enabled)) { 145 static_key_false(&paravirt_ticketlocks_enabled)) {
150 arch_spinlock_t prev; 146 __ticket_t head;
151 147
152 prev = *lock; 148 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
153 add_smp(&lock->tickets.head, TICKET_LOCK_INC);
154 149
155 /* add_smp() is a full mb() */ 150 head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
156 151
157 if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) 152 if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
158 __ticket_unlock_slowpath(lock, prev); 153 head &= ~TICKET_SLOWPATH_FLAG;
154 __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
155 }
159 } else 156 } else
160 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); 157 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
161} 158}
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
164{ 161{
165 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 162 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
166 163
167 return tmp.tail != tmp.head; 164 return !__tickets_equal(tmp.tail, tmp.head);
168} 165}
169 166
170static inline int arch_spin_is_contended(arch_spinlock_t *lock) 167static inline int arch_spin_is_contended(arch_spinlock_t *lock)
171{ 168{
172 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 169 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
173 170
174 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; 171 tmp.head &= ~TICKET_SLOWPATH_FLAG;
172 return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
175} 173}
176#define arch_spin_is_contended arch_spin_is_contended 174#define arch_spin_is_contended arch_spin_is_contended
177 175
@@ -183,16 +181,16 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
183 181
184static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 182static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
185{ 183{
186 __ticket_t head = ACCESS_ONCE(lock->tickets.head); 184 __ticket_t head = READ_ONCE(lock->tickets.head);
187 185
188 for (;;) { 186 for (;;) {
189 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 187 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
190 /* 188 /*
191 * We need to check "unlocked" in a loop, tmp.head == head 189 * We need to check "unlocked" in a loop, tmp.head == head
192 * can be false positive because of overflow. 190 * can be false positive because of overflow.
193 */ 191 */
194 if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || 192 if (__tickets_equal(tmp.head, tmp.tail) ||
195 tmp.head != head) 193 !__tickets_equal(tmp.head, head))
196 break; 194 break;
197 195
198 cpu_relax(); 196 cpu_relax();
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 19e2c468fc2c..e4661196994e 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,11 +27,12 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
27 function. */ 27 function. */
28 28
29#define __HAVE_ARCH_MEMCPY 1 29#define __HAVE_ARCH_MEMCPY 1
30extern void *__memcpy(void *to, const void *from, size_t len);
31
30#ifndef CONFIG_KMEMCHECK 32#ifndef CONFIG_KMEMCHECK
31#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 33#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
32extern void *memcpy(void *to, const void *from, size_t len); 34extern void *memcpy(void *to, const void *from, size_t len);
33#else 35#else
34extern void *__memcpy(void *to, const void *from, size_t len);
35#define memcpy(dst, src, len) \ 36#define memcpy(dst, src, len) \
36({ \ 37({ \
37 size_t __len = (len); \ 38 size_t __len = (len); \
@@ -53,9 +54,11 @@ extern void *__memcpy(void *to, const void *from, size_t len);
53 54
54#define __HAVE_ARCH_MEMSET 55#define __HAVE_ARCH_MEMSET
55void *memset(void *s, int c, size_t n); 56void *memset(void *s, int c, size_t n);
57void *__memset(void *s, int c, size_t n);
56 58
57#define __HAVE_ARCH_MEMMOVE 59#define __HAVE_ARCH_MEMMOVE
58void *memmove(void *dest, const void *src, size_t count); 60void *memmove(void *dest, const void *src, size_t count);
61void *__memmove(void *dest, const void *src, size_t count);
59 62
60int memcmp(const void *cs, const void *ct, size_t count); 63int memcmp(const void *cs, const void *ct, size_t count);
61size_t strlen(const char *s); 64size_t strlen(const char *s);
@@ -63,6 +66,19 @@ char *strcpy(char *dest, const char *src);
63char *strcat(char *dest, const char *src); 66char *strcat(char *dest, const char *src);
64int strcmp(const char *cs, const char *ct); 67int strcmp(const char *cs, const char *ct);
65 68
69#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
70
71/*
72 * For files that not instrumented (e.g. mm/slub.c) we
73 * should use not instrumented version of mem* functions.
74 */
75
76#undef memcpy
77#define memcpy(dst, src, len) __memcpy(dst, src, len)
78#define memmove(dst, src, len) __memmove(dst, src, len)
79#define memset(s, c, n) __memset(s, c, n)
80#endif
81
66#endif /* __KERNEL__ */ 82#endif /* __KERNEL__ */
67 83
68#endif /* _ASM_X86_STRING_64_H */ 84#endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e82e95abc92b..1d4e4f279a32 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -31,7 +31,6 @@ struct thread_info {
31 __u32 cpu; /* current CPU */ 31 __u32 cpu; /* current CPU */
32 int saved_preempt_count; 32 int saved_preempt_count;
33 mm_segment_t addr_limit; 33 mm_segment_t addr_limit;
34 struct restart_block restart_block;
35 void __user *sysenter_return; 34 void __user *sysenter_return;
36 unsigned int sig_on_uaccess_error:1; 35 unsigned int sig_on_uaccess_error:1;
37 unsigned int uaccess_err:1; /* uaccess failed */ 36 unsigned int uaccess_err:1; /* uaccess failed */
@@ -45,9 +44,6 @@ struct thread_info {
45 .cpu = 0, \ 44 .cpu = 0, \
46 .saved_preempt_count = INIT_PREEMPT_COUNT, \ 45 .saved_preempt_count = INIT_PREEMPT_COUNT, \
47 .addr_limit = KERNEL_DS, \ 46 .addr_limit = KERNEL_DS, \
48 .restart_block = { \
49 .fn = do_no_restart_syscall, \
50 }, \
51} 47}
52 48
53#define init_thread_info (init_thread_union.thread_info) 49#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 04905bfc508b..cd791948b286 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -15,6 +15,75 @@
15#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) 15#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
16#endif 16#endif
17 17
18struct tlb_state {
19#ifdef CONFIG_SMP
20 struct mm_struct *active_mm;
21 int state;
22#endif
23
24 /*
25 * Access to this CR4 shadow and to H/W CR4 is protected by
26 * disabling interrupts when modifying either one.
27 */
28 unsigned long cr4;
29};
30DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
31
32/* Initialize cr4 shadow for this CPU. */
33static inline void cr4_init_shadow(void)
34{
35 this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
36}
37
38/* Set in this cpu's CR4. */
39static inline void cr4_set_bits(unsigned long mask)
40{
41 unsigned long cr4;
42
43 cr4 = this_cpu_read(cpu_tlbstate.cr4);
44 if ((cr4 | mask) != cr4) {
45 cr4 |= mask;
46 this_cpu_write(cpu_tlbstate.cr4, cr4);
47 __write_cr4(cr4);
48 }
49}
50
51/* Clear in this cpu's CR4. */
52static inline void cr4_clear_bits(unsigned long mask)
53{
54 unsigned long cr4;
55
56 cr4 = this_cpu_read(cpu_tlbstate.cr4);
57 if ((cr4 & ~mask) != cr4) {
58 cr4 &= ~mask;
59 this_cpu_write(cpu_tlbstate.cr4, cr4);
60 __write_cr4(cr4);
61 }
62}
63
64/* Read the CR4 shadow. */
65static inline unsigned long cr4_read_shadow(void)
66{
67 return this_cpu_read(cpu_tlbstate.cr4);
68}
69
70/*
71 * Save some of cr4 feature set we're using (e.g. Pentium 4MB
72 * enable and PPro Global page enable), so that any CPU's that boot
73 * up after us can get the correct flags. This should only be used
74 * during boot on the boot cpu.
75 */
76extern unsigned long mmu_cr4_features;
77extern u32 *trampoline_cr4_features;
78
79static inline void cr4_set_bits_and_update_boot(unsigned long mask)
80{
81 mmu_cr4_features |= mask;
82 if (trampoline_cr4_features)
83 *trampoline_cr4_features = mmu_cr4_features;
84 cr4_set_bits(mask);
85}
86
18static inline void __native_flush_tlb(void) 87static inline void __native_flush_tlb(void)
19{ 88{
20 native_write_cr3(native_read_cr3()); 89 native_write_cr3(native_read_cr3());
@@ -24,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
24{ 93{
25 unsigned long cr4; 94 unsigned long cr4;
26 95
27 cr4 = native_read_cr4(); 96 cr4 = this_cpu_read(cpu_tlbstate.cr4);
28 /* clear PGE */ 97 /* clear PGE */
29 native_write_cr4(cr4 & ~X86_CR4_PGE); 98 native_write_cr4(cr4 & ~X86_CR4_PGE);
30 /* write old PGE again and flush TLBs */ 99 /* write old PGE again and flush TLBs */
@@ -184,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
184#define TLBSTATE_OK 1 253#define TLBSTATE_OK 1
185#define TLBSTATE_LAZY 2 254#define TLBSTATE_LAZY 2
186 255
187struct tlb_state {
188 struct mm_struct *active_mm;
189 int state;
190};
191DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
192
193static inline void reset_lazy_tlbstate(void) 256static inline void reset_lazy_tlbstate(void)
194{ 257{
195 this_cpu_write(cpu_tlbstate.state, 0); 258 this_cpu_write(cpu_tlbstate.state, 0);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0d592e0a5b84..ace9dec050b1 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -179,7 +179,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
179 asm volatile("call __get_user_%P3" \ 179 asm volatile("call __get_user_%P3" \
180 : "=a" (__ret_gu), "=r" (__val_gu) \ 180 : "=a" (__ret_gu), "=r" (__val_gu) \
181 : "0" (ptr), "i" (sizeof(*(ptr)))); \ 181 : "0" (ptr), "i" (sizeof(*(ptr)))); \
182 (x) = (__typeof__(*(ptr))) __val_gu; \ 182 (x) = (__force __typeof__(*(ptr))) __val_gu; \
183 __ret_gu; \ 183 __ret_gu; \
184}) 184})
185 185
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 5da71c27cc59..cce9ee68e335 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -19,6 +19,7 @@
19 19
20#include <asm/vmx.h> 20#include <asm/vmx.h>
21#include <asm/svm.h> 21#include <asm/svm.h>
22#include <asm/tlbflush.h>
22 23
23/* 24/*
24 * VMX functions: 25 * VMX functions:
@@ -40,12 +41,12 @@ static inline int cpu_has_vmx(void)
40static inline void cpu_vmxoff(void) 41static inline void cpu_vmxoff(void)
41{ 42{
42 asm volatile (ASM_VMX_VMXOFF : : : "cc"); 43 asm volatile (ASM_VMX_VMXOFF : : : "cc");
43 write_cr4(read_cr4() & ~X86_CR4_VMXE); 44 cr4_clear_bits(X86_CR4_VMXE);
44} 45}
45 46
46static inline int cpu_vmx_enabled(void) 47static inline int cpu_vmx_enabled(void)
47{ 48{
48 return read_cr4() & X86_CR4_VMXE; 49 return __read_cr4() & X86_CR4_VMXE;
49} 50}
50 51
51/** Disable VMX if it is enabled on the current CPU 52/** Disable VMX if it is enabled on the current CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 45afaee9555c..da772edd19ab 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
69#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 69#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
70#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 70#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
71#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 71#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
72#define SECONDARY_EXEC_ENABLE_PML 0x00020000
72#define SECONDARY_EXEC_XSAVES 0x00100000 73#define SECONDARY_EXEC_XSAVES 0x00100000
73 74
74 75
@@ -121,6 +122,7 @@ enum vmcs_field {
121 GUEST_LDTR_SELECTOR = 0x0000080c, 122 GUEST_LDTR_SELECTOR = 0x0000080c,
122 GUEST_TR_SELECTOR = 0x0000080e, 123 GUEST_TR_SELECTOR = 0x0000080e,
123 GUEST_INTR_STATUS = 0x00000810, 124 GUEST_INTR_STATUS = 0x00000810,
125 GUEST_PML_INDEX = 0x00000812,
124 HOST_ES_SELECTOR = 0x00000c00, 126 HOST_ES_SELECTOR = 0x00000c00,
125 HOST_CS_SELECTOR = 0x00000c02, 127 HOST_CS_SELECTOR = 0x00000c02,
126 HOST_SS_SELECTOR = 0x00000c04, 128 HOST_SS_SELECTOR = 0x00000c04,
@@ -140,6 +142,8 @@ enum vmcs_field {
140 VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, 142 VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
141 VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, 143 VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
142 VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, 144 VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
145 PML_ADDRESS = 0x0000200e,
146 PML_ADDRESS_HIGH = 0x0000200f,
143 TSC_OFFSET = 0x00002010, 147 TSC_OFFSET = 0x00002010,
144 TSC_OFFSET_HIGH = 0x00002011, 148 TSC_OFFSET_HIGH = 0x00002011,
145 VIRTUAL_APIC_PAGE_ADDR = 0x00002012, 149 VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 5eea09915a15..358dcd338915 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -55,9 +55,8 @@ extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
55 struct gnttab_map_grant_ref *kmap_ops, 55 struct gnttab_map_grant_ref *kmap_ops,
56 struct page **pages, unsigned int count); 56 struct page **pages, unsigned int count);
57extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, 57extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
58 struct gnttab_map_grant_ref *kmap_ops, 58 struct gnttab_unmap_grant_ref *kunmap_ops,
59 struct page **pages, unsigned int count); 59 struct page **pages, unsigned int count);
60extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
61 60
62/* 61/*
63 * Helper functions to write or read unsigned long values to/from 62 * Helper functions to write or read unsigned long values to/from
@@ -154,21 +153,12 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
154 return mfn; 153 return mfn;
155 154
156 pfn = mfn_to_pfn_no_overrides(mfn); 155 pfn = mfn_to_pfn_no_overrides(mfn);
157 if (__pfn_to_mfn(pfn) != mfn) { 156 if (__pfn_to_mfn(pfn) != mfn)
158 /* 157 pfn = ~0;
159 * If this appears to be a foreign mfn (because the pfn
160 * doesn't map back to the mfn), then check the local override
161 * table to see if there's a better pfn to use.
162 *
163 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
164 */
165 pfn = m2p_find_override_pfn(mfn, ~0);
166 }
167 158
168 /* 159 /*
169 * pfn is ~0 if there are no entries in the m2p for mfn or if the 160 * pfn is ~0 if there are no entries in the m2p for mfn or the
170 * entry doesn't map back to the mfn and m2p_override doesn't have a 161 * entry doesn't map back to the mfn.
171 * valid entry for it.
172 */ 162 */
173 if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) 163 if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
174 pfn = mfn; 164 pfn = mfn;
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 5fa9770035dc..c9a6d68b8d62 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -82,18 +82,15 @@ static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
82 if (boot_cpu_has(X86_FEATURE_XSAVES)) 82 if (boot_cpu_has(X86_FEATURE_XSAVES))
83 asm volatile("1:"XSAVES"\n\t" 83 asm volatile("1:"XSAVES"\n\t"
84 "2:\n\t" 84 "2:\n\t"
85 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) 85 xstate_fault
86 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
86 : "memory"); 87 : "memory");
87 else 88 else
88 asm volatile("1:"XSAVE"\n\t" 89 asm volatile("1:"XSAVE"\n\t"
89 "2:\n\t" 90 "2:\n\t"
90 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) 91 xstate_fault
92 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
91 : "memory"); 93 : "memory");
92
93 asm volatile(xstate_fault
94 : "0" (0)
95 : "memory");
96
97 return err; 94 return err;
98} 95}
99 96
@@ -112,18 +109,15 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
112 if (boot_cpu_has(X86_FEATURE_XSAVES)) 109 if (boot_cpu_has(X86_FEATURE_XSAVES))
113 asm volatile("1:"XRSTORS"\n\t" 110 asm volatile("1:"XRSTORS"\n\t"
114 "2:\n\t" 111 "2:\n\t"
115 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) 112 xstate_fault
113 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
116 : "memory"); 114 : "memory");
117 else 115 else
118 asm volatile("1:"XRSTOR"\n\t" 116 asm volatile("1:"XRSTOR"\n\t"
119 "2:\n\t" 117 "2:\n\t"
120 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) 118 xstate_fault
119 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
121 : "memory"); 120 : "memory");
122
123 asm volatile(xstate_fault
124 : "0" (0)
125 : "memory");
126
127 return err; 121 return err;
128} 122}
129 123
@@ -149,9 +143,9 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask)
149 */ 143 */
150 alternative_input_2( 144 alternative_input_2(
151 "1:"XSAVE, 145 "1:"XSAVE,
152 "1:"XSAVEOPT, 146 XSAVEOPT,
153 X86_FEATURE_XSAVEOPT, 147 X86_FEATURE_XSAVEOPT,
154 "1:"XSAVES, 148 XSAVES,
155 X86_FEATURE_XSAVES, 149 X86_FEATURE_XSAVES,
156 [fx] "D" (fx), "a" (lmask), "d" (hmask) : 150 [fx] "D" (fx), "a" (lmask), "d" (hmask) :
157 "memory"); 151 "memory");
@@ -178,7 +172,7 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
178 */ 172 */
179 alternative_input( 173 alternative_input(
180 "1: " XRSTOR, 174 "1: " XRSTOR,
181 "1: " XRSTORS, 175 XRSTORS,
182 X86_FEATURE_XSAVES, 176 X86_FEATURE_XSAVES,
183 "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) 177 "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
184 : "memory"); 178 : "memory");
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 462efe746d77..90c458e66e13 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -187,6 +187,17 @@
187#define HV_X64_MSR_SINT14 0x4000009E 187#define HV_X64_MSR_SINT14 0x4000009E
188#define HV_X64_MSR_SINT15 0x4000009F 188#define HV_X64_MSR_SINT15 0x4000009F
189 189
190/*
191 * Synthetic Timer MSRs. Four timers per vcpu.
192 */
193#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
194#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
195#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
196#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
197#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
198#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
199#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
200#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
190 201
191#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 202#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
192#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 203#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index d979e5abae55..3ce079136c11 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -152,6 +152,10 @@
152#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 152#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
153#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 153#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
154 154
155#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
156#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
157#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
158
155/* Hardware P state interface */ 159/* Hardware P state interface */
156#define MSR_PPERF 0x0000064e 160#define MSR_PPERF 0x0000064e
157#define MSR_PERF_LIMIT_REASONS 0x0000064f 161#define MSR_PERF_LIMIT_REASONS 0x0000064f
@@ -360,8 +364,12 @@
360#define MSR_IA32_UCODE_WRITE 0x00000079 364#define MSR_IA32_UCODE_WRITE 0x00000079
361#define MSR_IA32_UCODE_REV 0x0000008b 365#define MSR_IA32_UCODE_REV 0x0000008b
362 366
367#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
368#define MSR_IA32_SMBASE 0x0000009e
369
363#define MSR_IA32_PERF_STATUS 0x00000198 370#define MSR_IA32_PERF_STATUS 0x00000198
364#define MSR_IA32_PERF_CTL 0x00000199 371#define MSR_IA32_PERF_CTL 0x00000199
372#define INTEL_PERF_CTL_MASK 0xffff
365#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 373#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
366#define MSR_AMD_PERF_STATUS 0xc0010063 374#define MSR_AMD_PERF_STATUS 0xc0010063
367#define MSR_AMD_PERF_CTL 0xc0010062 375#define MSR_AMD_PERF_CTL 0xc0010062
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index b813bf9da1e2..c5f1a1deb91a 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -56,6 +56,7 @@
56#define EXIT_REASON_MSR_READ 31 56#define EXIT_REASON_MSR_READ 31
57#define EXIT_REASON_MSR_WRITE 32 57#define EXIT_REASON_MSR_WRITE 32
58#define EXIT_REASON_INVALID_STATE 33 58#define EXIT_REASON_INVALID_STATE 33
59#define EXIT_REASON_MSR_LOAD_FAIL 34
59#define EXIT_REASON_MWAIT_INSTRUCTION 36 60#define EXIT_REASON_MWAIT_INSTRUCTION 36
60#define EXIT_REASON_MONITOR_INSTRUCTION 39 61#define EXIT_REASON_MONITOR_INSTRUCTION 39
61#define EXIT_REASON_PAUSE_INSTRUCTION 40 62#define EXIT_REASON_PAUSE_INSTRUCTION 40
@@ -72,6 +73,7 @@
72#define EXIT_REASON_XSETBV 55 73#define EXIT_REASON_XSETBV 55
73#define EXIT_REASON_APIC_WRITE 56 74#define EXIT_REASON_APIC_WRITE 56
74#define EXIT_REASON_INVPCID 58 75#define EXIT_REASON_INVPCID 58
76#define EXIT_REASON_PML_FULL 62
75#define EXIT_REASON_XSAVES 63 77#define EXIT_REASON_XSAVES 63
76#define EXIT_REASON_XRSTORS 64 78#define EXIT_REASON_XRSTORS 64
77 79
@@ -116,10 +118,14 @@
116 { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ 118 { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
117 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ 119 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
118 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ 120 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
121 { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
119 { EXIT_REASON_INVD, "INVD" }, \ 122 { EXIT_REASON_INVD, "INVD" }, \
120 { EXIT_REASON_INVVPID, "INVVPID" }, \ 123 { EXIT_REASON_INVVPID, "INVVPID" }, \
121 { EXIT_REASON_INVPCID, "INVPCID" }, \ 124 { EXIT_REASON_INVPCID, "INVPCID" }, \
122 { EXIT_REASON_XSAVES, "XSAVES" }, \ 125 { EXIT_REASON_XSAVES, "XSAVES" }, \
123 { EXIT_REASON_XRSTORS, "XRSTORS" } 126 { EXIT_REASON_XRSTORS, "XRSTORS" }
124 127
128#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
129#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
130
125#endif /* _UAPIVMX_H */ 131#endif /* _UAPIVMX_H */