aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig23
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/efi.h10
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/qspinlock.h19
-rw-r--r--arch/x86/kernel/alternative.c5
-rw-r--r--arch/x86/kernel/apic/apic.c7
-rw-r--r--arch/x86/kernel/apic/io_apic.c5
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c1
-rw-r--r--arch/x86/kernel/irq_32.c19
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/tsc.c17
-rw-r--r--arch/x86/kernel/vm86_32.c27
-rw-r--r--arch/x86/kvm/mmu.c25
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/mm/srat.c5
-rw-r--r--arch/x86/pci/common.c1
29 files changed, 144 insertions, 59 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7aef2d52daa0..328c8352480c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1006,7 +1006,7 @@ config X86_THERMAL_VECTOR
1006 depends on X86_MCE_INTEL 1006 depends on X86_MCE_INTEL
1007 1007
1008config X86_LEGACY_VM86 1008config X86_LEGACY_VM86
1009 bool "Legacy VM86 support (obsolete)" 1009 bool "Legacy VM86 support"
1010 default n 1010 default n
1011 depends on X86_32 1011 depends on X86_32
1012 ---help--- 1012 ---help---
@@ -1018,19 +1018,20 @@ config X86_LEGACY_VM86
1018 available to accelerate real mode DOS programs. However, any 1018 available to accelerate real mode DOS programs. However, any
1019 recent version of DOSEMU, X, or vbetool should be fully 1019 recent version of DOSEMU, X, or vbetool should be fully
1020 functional even without kernel VM86 support, as they will all 1020 functional even without kernel VM86 support, as they will all
1021 fall back to (pretty well performing) software emulation. 1021 fall back to software emulation. Nevertheless, if you are using
1022 a 16-bit DOS program where 16-bit performance matters, vm86
1023 mode might be faster than emulation and you might want to
1024 enable this option.
1022 1025
1023 Anything that works on a 64-bit kernel is unlikely to need 1026 Note that any app that works on a 64-bit kernel is unlikely to
1024 this option, as 64-bit kernels don't, and can't, support V8086 1027 need this option, as 64-bit kernels don't, and can't, support
1025 mode. This option is also unrelated to 16-bit protected mode 1028 V8086 mode. This option is also unrelated to 16-bit protected
1026 and is not needed to run most 16-bit programs under Wine. 1029 mode and is not needed to run most 16-bit programs under Wine.
1027 1030
1028 Enabling this option adds considerable attack surface to the 1031 Enabling this option increases the complexity of the kernel
1029 kernel and slows down system calls and exception handling. 1032 and slows down exception handling a tiny bit.
1030 1033
1031 Unless you use very old userspace or need the last drop of 1034 If unsure, say N here.
1032 performance in your real mode DOS games and can't use KVM,
1033 say N here.
1034 1035
1035config VM86 1036config VM86
1036 bool 1037 bool
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 477bfa6db370..7663c455b9f6 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -381,3 +381,4 @@
381372 i386 recvmsg sys_recvmsg compat_sys_recvmsg 381372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
382373 i386 shutdown sys_shutdown 382373 i386 shutdown sys_shutdown
383374 i386 userfaultfd sys_userfaultfd 383374 i386 userfaultfd sys_userfaultfd
384375 i386 membarrier sys_membarrier
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 81c490634db9..278842fdf1f6 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -330,6 +330,7 @@
330321 common bpf sys_bpf 330321 common bpf sys_bpf
331322 64 execveat stub_execveat 331322 64 execveat stub_execveat
332323 common userfaultfd sys_userfaultfd 332323 common userfaultfd sys_userfaultfd
333324 common membarrier sys_membarrier
333 334
334# 335#
335# x32-specific system call numbers start at 512 to avoid cache impact 336# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 477fc28050e4..e6cf2ad350d1 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -241,6 +241,7 @@
241#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ 241#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
242#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ 242#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
243#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ 243#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
244#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
244 245
245/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ 246/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
246#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ 247#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 155162ea0e00..ab5f1d447ef9 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -86,6 +86,16 @@ extern u64 asmlinkage efi_call(void *fp, ...);
86extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, 86extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
87 u32 type, u64 attribute); 87 u32 type, u64 attribute);
88 88
89/*
90 * CONFIG_KASAN may redefine memset to __memset. __memset function is present
91 * only in kernel binary. Since the EFI stub linked into a separate binary it
92 * doesn't have __memset(). So we should use standard memset from
93 * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove.
94 */
95#undef memcpy
96#undef memset
97#undef memmove
98
89#endif /* CONFIG_X86_32 */ 99#endif /* CONFIG_X86_32 */
90 100
91extern struct efi_scratch efi_scratch; 101extern struct efi_scratch efi_scratch;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c12e845f59e6..2beee0382088 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -40,6 +40,7 @@
40 40
41#define KVM_PIO_PAGE_OFFSET 1 41#define KVM_PIO_PAGE_OFFSET 1
42#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 42#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
43#define KVM_HALT_POLL_NS_DEFAULT 500000
43 44
44#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS 45#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
45 46
@@ -711,6 +712,7 @@ struct kvm_vcpu_stat {
711 u32 nmi_window_exits; 712 u32 nmi_window_exits;
712 u32 halt_exits; 713 u32 halt_exits;
713 u32 halt_successful_poll; 714 u32 halt_successful_poll;
715 u32 halt_attempted_poll;
714 u32 halt_wakeup; 716 u32 halt_wakeup;
715 u32 request_irq_exits; 717 u32 request_irq_exits;
716 u32 irq_exits; 718 u32 irq_exits;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c1c0a1c14344..b98b471a3b7e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -331,6 +331,7 @@
331/* C1E active bits in int pending message */ 331/* C1E active bits in int pending message */
332#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 332#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
333#define MSR_K8_TSEG_ADDR 0xc0010112 333#define MSR_K8_TSEG_ADDR 0xc0010112
334#define MSR_K8_TSEG_MASK 0xc0010113
334#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ 335#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
335#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ 336#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
336#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ 337#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index ce029e4fa7c6..31247b5bff7c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -97,7 +97,6 @@ struct pv_lazy_ops {
97struct pv_time_ops { 97struct pv_time_ops {
98 unsigned long long (*sched_clock)(void); 98 unsigned long long (*sched_clock)(void);
99 unsigned long long (*steal_clock)(int cpu); 99 unsigned long long (*steal_clock)(int cpu);
100 unsigned long (*get_tsc_khz)(void);
101}; 100};
102 101
103struct pv_cpu_ops { 102struct pv_cpu_ops {
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 9d51fae1cba3..eaba08076030 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -39,18 +39,27 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
39} 39}
40#endif 40#endif
41 41
42#define virt_queued_spin_lock virt_queued_spin_lock 42#ifdef CONFIG_PARAVIRT
43 43#define virt_spin_lock virt_spin_lock
44static inline bool virt_queued_spin_lock(struct qspinlock *lock) 44static inline bool virt_spin_lock(struct qspinlock *lock)
45{ 45{
46 if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) 46 if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
47 return false; 47 return false;
48 48
49 while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) 49 /*
50 cpu_relax(); 50 * On hypervisors without PARAVIRT_SPINLOCKS support we fall
51 * back to a Test-and-Set spinlock, because fair locks have
52 * horrible lock 'holder' preemption issues.
53 */
54
55 do {
56 while (atomic_read(&lock->val) != 0)
57 cpu_relax();
58 } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
51 59
52 return true; 60 return true;
53} 61}
62#endif /* CONFIG_PARAVIRT */
54 63
55#include <asm-generic/qspinlock.h> 64#include <asm-generic/qspinlock.h>
56 65
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c42827eb86cf..25f909362b7a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -338,10 +338,15 @@ done:
338 338
339static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) 339static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
340{ 340{
341 unsigned long flags;
342
341 if (instr[0] != 0x90) 343 if (instr[0] != 0x90)
342 return; 344 return;
343 345
346 local_irq_save(flags);
344 add_nops(instr + (a->instrlen - a->padlen), a->padlen); 347 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
348 sync_core();
349 local_irq_restore(flags);
345 350
346 DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", 351 DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
347 instr, a->instrlen - a->padlen, a->padlen); 352 instr, a->instrlen - a->padlen, a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3ca3e46aa405..24e94ce454e2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
336 apic_write(APIC_LVTT, lvtt_value); 336 apic_write(APIC_LVTT, lvtt_value);
337 337
338 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { 338 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
339 /*
340 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
341 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
342 * According to Intel, MFENCE can do the serialization here.
343 */
344 asm volatile("mfence" : : : "memory");
345
339 printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); 346 printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
340 return; 347 return;
341 } 348 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 38a76f826530..5c60bb162622 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void)
2522 int pin, ioapic, irq, irq_entry; 2522 int pin, ioapic, irq, irq_entry;
2523 const struct cpumask *mask; 2523 const struct cpumask *mask;
2524 struct irq_data *idata; 2524 struct irq_data *idata;
2525 struct irq_chip *chip;
2525 2526
2526 if (skip_ioapic_setup == 1) 2527 if (skip_ioapic_setup == 1)
2527 return; 2528 return;
@@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void)
2545 else 2546 else
2546 mask = apic->target_cpus(); 2547 mask = apic->target_cpus();
2547 2548
2548 irq_set_affinity(irq, mask); 2549 chip = irq_data_get_irq_chip(idata);
2550 chip->irq_set_affinity(idata, mask, false);
2549 } 2551 }
2550
2551} 2552}
2552#endif 2553#endif
2553 2554
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1bbd0fe2c806..836d11b92811 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -489,10 +489,8 @@ static int apic_set_affinity(struct irq_data *irq_data,
489 489
490 err = assign_irq_vector(irq, data, dest); 490 err = assign_irq_vector(irq, data, dest);
491 if (err) { 491 if (err) {
492 struct irq_data *top = irq_get_irq_data(irq);
493
494 if (assign_irq_vector(irq, data, 492 if (assign_irq_vector(irq, data,
495 irq_data_get_affinity_mask(top))) 493 irq_data_get_affinity_mask(irq_data)))
496 pr_err("Failed to recover vector for irq %d\n", irq); 494 pr_err("Failed to recover vector for irq %d\n", irq);
497 return err; 495 return err;
498 } 496 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 07ce52c22ec8..de22ea7ff82f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c)
1110 else 1110 else
1111 printk(KERN_CONT "%d86", c->x86); 1111 printk(KERN_CONT "%d86", c->x86);
1112 1112
1113 printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); 1113 printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
1114 1114
1115 if (c->x86_mask || c->cpuid_level >= 0) 1115 if (c->x86_mask || c->cpuid_level >= 0)
1116 printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); 1116 printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
1117 else 1117 else
1118 printk(KERN_CONT ")\n"); 1118 printk(KERN_CONT ")\n");
1119 1119
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cd9b6d0b10bf..3fefebfbdf4b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2316,9 +2316,12 @@ static struct event_constraint *
2316intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2316intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
2317 struct perf_event *event) 2317 struct perf_event *event)
2318{ 2318{
2319 struct event_constraint *c1 = cpuc->event_constraint[idx]; 2319 struct event_constraint *c1 = NULL;
2320 struct event_constraint *c2; 2320 struct event_constraint *c2;
2321 2321
2322 if (idx >= 0) /* fake does < 0 */
2323 c1 = cpuc->event_constraint[idx];
2324
2322 /* 2325 /*
2323 * first time only 2326 * first time only
2324 * - static constraint: no change across incremental scheduling calls 2327 * - static constraint: no change across incremental scheduling calls
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 54690e885759..d1c0f254afbe 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -222,6 +222,7 @@ static void __bts_event_start(struct perf_event *event)
222 if (!buf || bts_buffer_is_full(buf, bts)) 222 if (!buf || bts_buffer_is_full(buf, bts))
223 return; 223 return;
224 224
225 event->hw.itrace_started = 1;
225 event->hw.state = 0; 226 event->hw.state = 0;
226 227
227 if (!buf->snapshot) 228 if (!buf->snapshot)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index c80cf6699678..38da8f29a9c8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -68,11 +68,10 @@ static inline void *current_stack(void)
68 return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); 68 return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
69} 69}
70 70
71static inline int 71static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
72execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
73{ 72{
74 struct irq_stack *curstk, *irqstk; 73 struct irq_stack *curstk, *irqstk;
75 u32 *isp, *prev_esp, arg1, arg2; 74 u32 *isp, *prev_esp, arg1;
76 75
77 curstk = (struct irq_stack *) current_stack(); 76 curstk = (struct irq_stack *) current_stack();
78 irqstk = __this_cpu_read(hardirq_stack); 77 irqstk = __this_cpu_read(hardirq_stack);
@@ -98,8 +97,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
98 asm volatile("xchgl %%ebx,%%esp \n" 97 asm volatile("xchgl %%ebx,%%esp \n"
99 "call *%%edi \n" 98 "call *%%edi \n"
100 "movl %%ebx,%%esp \n" 99 "movl %%ebx,%%esp \n"
101 : "=a" (arg1), "=d" (arg2), "=b" (isp) 100 : "=a" (arg1), "=b" (isp)
102 : "0" (irq), "1" (desc), "2" (isp), 101 : "0" (desc), "1" (isp),
103 "D" (desc->handle_irq) 102 "D" (desc->handle_irq)
104 : "memory", "cc", "ecx"); 103 : "memory", "cc", "ecx");
105 return 1; 104 return 1;
@@ -150,19 +149,15 @@ void do_softirq_own_stack(void)
150 149
151bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) 150bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
152{ 151{
153 unsigned int irq; 152 int overflow = check_stack_overflow();
154 int overflow;
155
156 overflow = check_stack_overflow();
157 153
158 if (IS_ERR_OR_NULL(desc)) 154 if (IS_ERR_OR_NULL(desc))
159 return false; 155 return false;
160 156
161 irq = irq_desc_get_irq(desc); 157 if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) {
162 if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
163 if (unlikely(overflow)) 158 if (unlikely(overflow))
164 print_stack_overflow(); 159 print_stack_overflow();
165 generic_handle_irq_desc(irq, desc); 160 generic_handle_irq_desc(desc);
166 } 161 }
167 162
168 return true; 163 return true;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index ff16ccb918f2..c767cf2bc80a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -75,6 +75,6 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
75 if (unlikely(IS_ERR_OR_NULL(desc))) 75 if (unlikely(IS_ERR_OR_NULL(desc)))
76 return false; 76 return false;
77 77
78 generic_handle_irq_desc(irq_desc_get_irq(desc), desc); 78 generic_handle_irq_desc(desc);
79 return true; 79 return true;
80} 80}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 2bcc0525f1c1..6acc9dd91f36 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size)
58 if (alloc_size > PAGE_SIZE) 58 if (alloc_size > PAGE_SIZE)
59 new_ldt->entries = vzalloc(alloc_size); 59 new_ldt->entries = vzalloc(alloc_size);
60 else 60 else
61 new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); 61 new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
62 62
63 if (!new_ldt->entries) { 63 if (!new_ldt->entries) {
64 kfree(new_ldt); 64 kfree(new_ldt);
@@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
95 if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) 95 if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
96 vfree(ldt->entries); 96 vfree(ldt->entries);
97 else 97 else
98 kfree(ldt->entries); 98 free_page((unsigned long)ldt->entries);
99 kfree(ldt); 99 kfree(ldt);
100} 100}
101 101
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 84b8ef82a159..1b55de1267cf 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -131,8 +131,8 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
131 131
132bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) 132bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
133{ 133{
134 *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
135 *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 134 *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
135 *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
136 136
137 if (!*dev) 137 if (!*dev)
138 *dev = &x86_dma_fallback_dev; 138 *dev = &x86_dma_fallback_dev;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c8d52cb4cb6e..c3f7602cd038 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -21,6 +21,7 @@
21#include <asm/hypervisor.h> 21#include <asm/hypervisor.h>
22#include <asm/nmi.h> 22#include <asm/nmi.h>
23#include <asm/x86_init.h> 23#include <asm/x86_init.h>
24#include <asm/geode.h>
24 25
25unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 26unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
26EXPORT_SYMBOL(cpu_khz); 27EXPORT_SYMBOL(cpu_khz);
@@ -1013,15 +1014,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
1013 1014
1014static void __init check_system_tsc_reliable(void) 1015static void __init check_system_tsc_reliable(void)
1015{ 1016{
1016#ifdef CONFIG_MGEODE_LX 1017#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
1017 /* RTSC counts during suspend */ 1018 if (is_geode_lx()) {
1019 /* RTSC counts during suspend */
1018#define RTSC_SUSP 0x100 1020#define RTSC_SUSP 0x100
1019 unsigned long res_low, res_high; 1021 unsigned long res_low, res_high;
1020 1022
1021 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 1023 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
1022 /* Geode_LX - the OLPC CPU has a very reliable TSC */ 1024 /* Geode_LX - the OLPC CPU has a very reliable TSC */
1023 if (res_low & RTSC_SUSP) 1025 if (res_low & RTSC_SUSP)
1024 tsc_clocksource_reliable = 1; 1026 tsc_clocksource_reliable = 1;
1027 }
1025#endif 1028#endif
1026 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 1029 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
1027 tsc_clocksource_reliable = 1; 1030 tsc_clocksource_reliable = 1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index abd8b856bd2b..524619351961 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -45,6 +45,7 @@
45#include <linux/audit.h> 45#include <linux/audit.h>
46#include <linux/stddef.h> 46#include <linux/stddef.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/security.h>
48 49
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/io.h> 51#include <asm/io.h>
@@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
232 struct pt_regs *regs = current_pt_regs(); 233 struct pt_regs *regs = current_pt_regs();
233 unsigned long err = 0; 234 unsigned long err = 0;
234 235
236 err = security_mmap_addr(0);
237 if (err) {
238 /*
239 * vm86 cannot virtualize the address space, so vm86 users
240 * need to manage the low 1MB themselves using mmap. Given
241 * that BIOS places important data in the first page, vm86
242 * is essentially useless if mmap_min_addr != 0. DOSEMU,
243 * for example, won't even bother trying to use vm86 if it
244 * can't map a page at virtual address 0.
245 *
246 * To reduce the available kernel attack surface, simply
247 * disallow vm86(old) for users who cannot mmap at va 0.
248 *
249 * The implementation of security_mmap_addr will allow
250 * suitably privileged users to map va 0 even if
251 * vm.mmap_min_addr is set above 0, and we want this
252 * behavior for vm86 as well, as it ensures that legacy
253 * tools like vbetool will not fail just because of
254 * vm.mmap_min_addr.
255 */
256 pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
257 current->comm, task_pid_nr(current),
258 from_kuid_munged(&init_user_ns, current_uid()));
259 return -EPERM;
260 }
261
235 if (!vm86) { 262 if (!vm86) {
236 if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) 263 if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
237 return -ENOMEM; 264 return -ENOMEM;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 69088a1ba509..ff606f507913 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3322,7 +3322,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
3322 break; 3322 break;
3323 3323
3324 reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, 3324 reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
3325 leaf); 3325 iterator.level);
3326 } 3326 }
3327 3327
3328 walk_shadow_page_lockless_end(vcpu); 3328 walk_shadow_page_lockless_end(vcpu);
@@ -3614,7 +3614,7 @@ static void
3614__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, 3614__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3615 struct rsvd_bits_validate *rsvd_check, 3615 struct rsvd_bits_validate *rsvd_check,
3616 int maxphyaddr, int level, bool nx, bool gbpages, 3616 int maxphyaddr, int level, bool nx, bool gbpages,
3617 bool pse) 3617 bool pse, bool amd)
3618{ 3618{
3619 u64 exb_bit_rsvd = 0; 3619 u64 exb_bit_rsvd = 0;
3620 u64 gbpages_bit_rsvd = 0; 3620 u64 gbpages_bit_rsvd = 0;
@@ -3631,7 +3631,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3631 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for 3631 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
3632 * leaf entries) on AMD CPUs only. 3632 * leaf entries) on AMD CPUs only.
3633 */ 3633 */
3634 if (guest_cpuid_is_amd(vcpu)) 3634 if (amd)
3635 nonleaf_bit8_rsvd = rsvd_bits(8, 8); 3635 nonleaf_bit8_rsvd = rsvd_bits(8, 8);
3636 3636
3637 switch (level) { 3637 switch (level) {
@@ -3699,7 +3699,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3699 __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, 3699 __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
3700 cpuid_maxphyaddr(vcpu), context->root_level, 3700 cpuid_maxphyaddr(vcpu), context->root_level,
3701 context->nx, guest_cpuid_has_gbpages(vcpu), 3701 context->nx, guest_cpuid_has_gbpages(vcpu),
3702 is_pse(vcpu)); 3702 is_pse(vcpu), guest_cpuid_is_amd(vcpu));
3703} 3703}
3704 3704
3705static void 3705static void
@@ -3749,13 +3749,24 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
3749void 3749void
3750reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3750reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3751{ 3751{
3752 /*
3753 * Passing "true" to the last argument is okay; it adds a check
3754 * on bit 8 of the SPTEs which KVM doesn't use anyway.
3755 */
3752 __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, 3756 __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
3753 boot_cpu_data.x86_phys_bits, 3757 boot_cpu_data.x86_phys_bits,
3754 context->shadow_root_level, context->nx, 3758 context->shadow_root_level, context->nx,
3755 guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); 3759 guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
3760 true);
3756} 3761}
3757EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); 3762EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
3758 3763
3764static inline bool boot_cpu_is_amd(void)
3765{
3766 WARN_ON_ONCE(!tdp_enabled);
3767 return shadow_x_mask == 0;
3768}
3769
3759/* 3770/*
3760 * the direct page table on host, use as much mmu features as 3771 * the direct page table on host, use as much mmu features as
3761 * possible, however, kvm currently does not do execution-protection. 3772 * possible, however, kvm currently does not do execution-protection.
@@ -3764,11 +3775,11 @@ static void
3764reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, 3775reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
3765 struct kvm_mmu *context) 3776 struct kvm_mmu *context)
3766{ 3777{
3767 if (guest_cpuid_is_amd(vcpu)) 3778 if (boot_cpu_is_amd())
3768 __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, 3779 __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
3769 boot_cpu_data.x86_phys_bits, 3780 boot_cpu_data.x86_phys_bits,
3770 context->shadow_root_level, false, 3781 context->shadow_root_level, false,
3771 cpu_has_gbpages, true); 3782 cpu_has_gbpages, true, true);
3772 else 3783 else
3773 __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, 3784 __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
3774 boot_cpu_data.x86_phys_bits, 3785 boot_cpu_data.x86_phys_bits,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fdb8cb63a6c0..94b7d15db3fc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -202,6 +202,7 @@ module_param(npt, int, S_IRUGO);
202static int nested = true; 202static int nested = true;
203module_param(nested, int, S_IRUGO); 203module_param(nested, int, S_IRUGO);
204 204
205static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
205static void svm_flush_tlb(struct kvm_vcpu *vcpu); 206static void svm_flush_tlb(struct kvm_vcpu *vcpu);
206static void svm_complete_interrupts(struct vcpu_svm *svm); 207static void svm_complete_interrupts(struct vcpu_svm *svm);
207 208
@@ -1263,7 +1264,8 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
1263 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 1264 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
1264 * It also updates the guest-visible cr0 value. 1265 * It also updates the guest-visible cr0 value.
1265 */ 1266 */
1266 (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); 1267 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1268 kvm_mmu_reset_context(&svm->vcpu);
1267 1269
1268 save->cr4 = X86_CR4_PAE; 1270 save->cr4 = X86_CR4_PAE;
1269 /* rdx = ?? */ 1271 /* rdx = ?? */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d01986832afc..64076740251e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6064,6 +6064,8 @@ static __init int hardware_setup(void)
6064 memcpy(vmx_msr_bitmap_longmode_x2apic, 6064 memcpy(vmx_msr_bitmap_longmode_x2apic,
6065 vmx_msr_bitmap_longmode, PAGE_SIZE); 6065 vmx_msr_bitmap_longmode, PAGE_SIZE);
6066 6066
6067 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
6068
6067 if (enable_apicv) { 6069 if (enable_apicv) {
6068 for (msr = 0x800; msr <= 0x8ff; msr++) 6070 for (msr = 0x800; msr <= 0x8ff; msr++)
6069 vmx_disable_intercept_msr_read_x2apic(msr); 6071 vmx_disable_intercept_msr_read_x2apic(msr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a60bdbccff51..991466bf8dee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -149,6 +149,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
149 { "nmi_window", VCPU_STAT(nmi_window_exits) }, 149 { "nmi_window", VCPU_STAT(nmi_window_exits) },
150 { "halt_exits", VCPU_STAT(halt_exits) }, 150 { "halt_exits", VCPU_STAT(halt_exits) },
151 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 151 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
152 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
152 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 153 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
153 { "hypercalls", VCPU_STAT(hypercalls) }, 154 { "hypercalls", VCPU_STAT(hypercalls) },
154 { "request_irq", VCPU_STAT(request_irq_exits) }, 155 { "request_irq", VCPU_STAT(request_irq_exits) },
@@ -2189,6 +2190,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2189 case MSR_IA32_LASTINTFROMIP: 2190 case MSR_IA32_LASTINTFROMIP:
2190 case MSR_IA32_LASTINTTOIP: 2191 case MSR_IA32_LASTINTTOIP:
2191 case MSR_K8_SYSCFG: 2192 case MSR_K8_SYSCFG:
2193 case MSR_K8_TSEG_ADDR:
2194 case MSR_K8_TSEG_MASK:
2192 case MSR_K7_HWCR: 2195 case MSR_K7_HWCR:
2193 case MSR_VM_HSAVE_PA: 2196 case MSR_VM_HSAVE_PA:
2194 case MSR_K8_INT_PENDING_MSG: 2197 case MSR_K8_INT_PENDING_MSG:
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 161804de124a..a0d09f6c6533 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1015,7 +1015,7 @@ static struct clock_event_device lguest_clockevent = {
1015 * This is the Guest timer interrupt handler (hardware interrupt 0). We just 1015 * This is the Guest timer interrupt handler (hardware interrupt 0). We just
1016 * call the clockevent infrastructure and it does whatever needs doing. 1016 * call the clockevent infrastructure and it does whatever needs doing.
1017 */ 1017 */
1018static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) 1018static void lguest_time_irq(struct irq_desc *desc)
1019{ 1019{
1020 unsigned long flags; 1020 unsigned long flags;
1021 1021
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 66338a60aa6e..c2aea63bee20 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
192 192
193 node_set(node, numa_nodes_parsed); 193 node_set(node, numa_nodes_parsed);
194 194
195 pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", 195 pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
196 node, pxm, 196 node, pxm,
197 (unsigned long long) start, (unsigned long long) end - 1, 197 (unsigned long long) start, (unsigned long long) end - 1,
198 hotpluggable ? " hotplug" : ""); 198 hotpluggable ? " hotplug" : "",
199 ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
199 200
200 /* Mark hotplug range in memblock. */ 201 /* Mark hotplug range in memblock. */
201 if (hotpluggable && memblock_mark_hotplug(start, ma->length)) 202 if (hotpluggable && memblock_mark_hotplug(start, ma->length))
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 09d3afc0a181..dc78a4a9a466 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -166,6 +166,7 @@ void pcibios_fixup_bus(struct pci_bus *b)
166{ 166{
167 struct pci_dev *dev; 167 struct pci_dev *dev;
168 168
169 pci_read_bridge_bases(b);
169 list_for_each_entry(dev, &b->devices, bus_list) 170 list_for_each_entry(dev, &b->devices, bus_list)
170 pcibios_fixup_device_resources(dev); 171 pcibios_fixup_device_resources(dev);
171} 172}