aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/x86/orc-unwinder.txt2
-rw-r--r--Documentation/x86/x86_64/mm.txt2
-rw-r--r--Makefile6
-rw-r--r--arch/arm64/include/asm/fixmap.h7
-rw-r--r--arch/mips/ar7/platform.c5
-rw-r--r--arch/mips/ar7/prom.c2
-rw-r--r--arch/mips/kernel/smp-bmips.c4
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv.c29
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.debug39
-rw-r--r--arch/x86/configs/tiny.config4
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/entry/calling.h69
-rw-r--r--arch/x86/entry/entry_64.S141
-rw-r--r--arch/x86/entry/entry_64_compat.S3
-rw-r--r--arch/x86/entry/syscalls/Makefile4
-rw-r--r--arch/x86/include/asm/archrandom.h8
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/compat.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h9
-rw-r--r--arch/x86/include/asm/cpufeatures.h536
-rw-r--r--arch/x86/include/asm/elf.h2
-rw-r--r--arch/x86/include/asm/fixmap.h6
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/processor.h52
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/switch_to.h24
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/trace/fpu.h10
-rw-r--r--arch/x86/include/asm/traps.h20
-rw-r--r--arch/x86/include/asm/unwind.h8
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h3
-rw-r--r--arch/x86/kernel/Makefile10
-rw-r--r--arch/x86/kernel/cpu/Makefile3
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c11
-rw-r--r--arch/x86/kernel/cpu/common.c29
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c121
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/fpu/init.c11
-rw-r--r--arch/x86/kernel/fpu/xstate.c43
-rw-r--r--arch/x86/kernel/head_32.S5
-rw-r--r--arch/x86/kernel/head_64.S45
-rw-r--r--arch/x86/kernel/idt.c2
-rw-r--r--arch/x86/kernel/ldt.c16
-rw-r--r--arch/x86/kernel/process.c8
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/smpboot.c14
-rw-r--r--arch/x86/kernel/traps.c13
-rw-r--r--arch/x86/kernel/tsc.c8
-rw-r--r--arch/x86/kernel/unwind_orc.c2
-rw-r--r--arch/x86/kernel/verify_cpu.S3
-rw-r--r--arch/x86/kernel/vm86_32.c20
-rw-r--r--arch/x86/mm/fault.c88
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/mm/kasan_init_64.c101
-rw-r--r--arch/x86/oprofile/op_model_ppro.c4
-rw-r--r--arch/x86/um/ldt.c7
-rw-r--r--arch/x86/xen/enlighten_pv.c9
-rw-r--r--arch/x86/xen/mmu_pv.c159
-rw-r--r--arch/x86/xen/smp_pv.c17
-rw-r--r--arch/x86/xen/xen-asm_64.S2
-rw-r--r--arch/x86/xen/xen-head.S11
-rw-r--r--drivers/acpi/apei/ghes.c78
-rw-r--r--drivers/block/rbd.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c8
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c25
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c2
-rw-r--r--drivers/input/mouse/elan_i2c_core.c1
-rw-r--r--drivers/input/rmi4/rmi_smbus.c4
-rw-r--r--drivers/input/touchscreen/tsc200x-core.c1
-rw-r--r--drivers/misc/pti.c2
-rw-r--r--drivers/net/can/c_can/c_can_pci.c1
-rw-r--r--drivers/net/can/c_can/c_can_platform.c1
-rw-r--r--drivers/net/can/ifi_canfd/ifi_canfd.c6
-rw-r--r--drivers/net/can/peak_canfd/peak_pciefd_main.c14
-rw-r--r--drivers/net/can/sun4i_can.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c7
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/linux/intel-pti.h (renamed from include/linux/pti.h)6
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/uapi/drm/i915_drm.h1
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--mm/gup.c97
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/sparse.c17
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/dsa/switch.c4
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_offload.c12
-rw-r--r--net/rds/ib_recv.c10
-rw-r--r--scripts/Makefile.build2
-rw-r--r--tools/include/uapi/drm/i915_drm.h1
-rw-r--r--tools/objtool/check.c7
-rw-r--r--tools/objtool/objtool.c6
-rw-r--r--tools/perf/builtin-trace.c10
-rw-r--r--tools/perf/util/parse-events.l5
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c88
-rw-r--r--tools/testing/selftests/x86/protection_keys.c24
112 files changed, 1350 insertions, 997 deletions
diff --git a/.mailmap b/.mailmap
index 4757d361fd33..c021f29779a7 100644
--- a/.mailmap
+++ b/.mailmap
@@ -102,6 +102,7 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com>
102Linas Vepstas <linas@austin.ibm.com> 102Linas Vepstas <linas@austin.ibm.com>
103Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de> 103Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
104Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch> 104Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
105Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
105Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> 106Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
106Mark Brown <broonie@sirena.org.uk> 107Mark Brown <broonie@sirena.org.uk>
107Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com> 108Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
index af0c9a4c65a6..cd4b29be29af 100644
--- a/Documentation/x86/orc-unwinder.txt
+++ b/Documentation/x86/orc-unwinder.txt
@@ -4,7 +4,7 @@ ORC unwinder
4Overview 4Overview
5-------- 5--------
6 6
7The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is 7The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
8similar in concept to a DWARF unwinder. The difference is that the 8similar in concept to a DWARF unwinder. The difference is that the
9format of the ORC data is much simpler than DWARF, which in turn allows 9format of the ORC data is much simpler than DWARF, which in turn allows
10the ORC unwinder to be much simpler and faster. 10the ORC unwinder to be much simpler and faster.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index b0798e281aa6..3448e675b462 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
34ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole 34ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
35ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) 35ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
36... unused hole ... 36... unused hole ...
37ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB) 37ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
38... unused hole ... 38... unused hole ...
39ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 39ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
40... unused hole ... 40... unused hole ...
diff --git a/Makefile b/Makefile
index bee2033e7d1d..9a43f19aad08 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 14 3PATCHLEVEL = 14
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc8 5EXTRAVERSION =
6NAME = Fearless Coyote 6NAME = Fearless Coyote
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
@@ -934,8 +934,8 @@ ifdef CONFIG_STACK_VALIDATION
934 ifeq ($(has_libelf),1) 934 ifeq ($(has_libelf),1)
935 objtool_target := tools/objtool FORCE 935 objtool_target := tools/objtool FORCE
936 else 936 else
937 ifdef CONFIG_ORC_UNWINDER 937 ifdef CONFIG_UNWINDER_ORC
938 $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") 938 $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
939 else 939 else
940 $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") 940 $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
941 endif 941 endif
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index caf86be815ba..4052ec39e8db 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -51,6 +51,13 @@ enum fixed_addresses {
51 51
52 FIX_EARLYCON_MEM_BASE, 52 FIX_EARLYCON_MEM_BASE,
53 FIX_TEXT_POKE0, 53 FIX_TEXT_POKE0,
54
55#ifdef CONFIG_ACPI_APEI_GHES
56 /* Used for GHES mapping from assorted contexts */
57 FIX_APEI_GHES_IRQ,
58 FIX_APEI_GHES_NMI,
59#endif /* CONFIG_ACPI_APEI_GHES */
60
54 __end_of_permanent_fixed_addresses, 61 __end_of_permanent_fixed_addresses,
55 62
56 /* 63 /*
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index df7acea3747a..4674f1efbe7a 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -575,6 +575,7 @@ static int __init ar7_register_uarts(void)
575 uart_port.type = PORT_AR7; 575 uart_port.type = PORT_AR7;
576 uart_port.uartclk = clk_get_rate(bus_clk) / 2; 576 uart_port.uartclk = clk_get_rate(bus_clk) / 2;
577 uart_port.iotype = UPIO_MEM32; 577 uart_port.iotype = UPIO_MEM32;
578 uart_port.flags = UPF_FIXED_TYPE;
578 uart_port.regshift = 2; 579 uart_port.regshift = 2;
579 580
580 uart_port.line = 0; 581 uart_port.line = 0;
@@ -653,6 +654,10 @@ static int __init ar7_register_devices(void)
653 u32 val; 654 u32 val;
654 int res; 655 int res;
655 656
657 res = ar7_gpio_init();
658 if (res)
659 pr_warn("unable to register gpios: %d\n", res);
660
656 res = ar7_register_uarts(); 661 res = ar7_register_uarts();
657 if (res) 662 if (res)
658 pr_err("unable to setup uart(s): %d\n", res); 663 pr_err("unable to setup uart(s): %d\n", res);
diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c
index 4fd83336131a..dd53987a690f 100644
--- a/arch/mips/ar7/prom.c
+++ b/arch/mips/ar7/prom.c
@@ -246,8 +246,6 @@ void __init prom_init(void)
246 ar7_init_cmdline(fw_arg0, (char **)fw_arg1); 246 ar7_init_cmdline(fw_arg0, (char **)fw_arg1);
247 ar7_init_env((struct env_var *)fw_arg2); 247 ar7_init_env((struct env_var *)fw_arg2);
248 console_config(); 248 console_config();
249
250 ar7_gpio_init();
251} 249}
252 250
253#define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4))) 251#define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4)))
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 406072e26752..87dcac2447c8 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -591,11 +591,11 @@ void __init bmips_cpu_setup(void)
591 591
592 /* Flush and enable RAC */ 592 /* Flush and enable RAC */
593 cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); 593 cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
594 __raw_writel(cfg | 0x100, BMIPS_RAC_CONFIG); 594 __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG);
595 __raw_readl(cbr + BMIPS_RAC_CONFIG); 595 __raw_readl(cbr + BMIPS_RAC_CONFIG);
596 596
597 cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); 597 cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
598 __raw_writel(cfg | 0xf, BMIPS_RAC_CONFIG); 598 __raw_writel(cfg | 0xf, cbr + BMIPS_RAC_CONFIG);
599 __raw_readl(cbr + BMIPS_RAC_CONFIG); 599 __raw_readl(cbr + BMIPS_RAC_CONFIG);
600 600
601 cfg = __raw_readl(cbr + BMIPS_RAC_ADDRESS_RANGE); 601 cfg = __raw_readl(cbr + BMIPS_RAC_ADDRESS_RANGE);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7c62967d672c..59247af5fd45 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -646,6 +646,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
646 hnow_v = hpte_new_to_old_v(hnow_v, hnow_r); 646 hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
647 hnow_r = hpte_new_to_old_r(hnow_r); 647 hnow_r = hpte_new_to_old_r(hnow_r);
648 } 648 }
649
650 /*
651 * If the HPT is being resized, don't update the HPTE,
652 * instead let the guest retry after the resize operation is complete.
653 * The synchronization for hpte_setup_done test vs. set is provided
654 * by the HPTE lock.
655 */
656 if (!kvm->arch.hpte_setup_done)
657 goto out_unlock;
658
649 if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] || 659 if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
650 rev->guest_rpte != hpte[2]) 660 rev->guest_rpte != hpte[2])
651 /* HPTE has been changed under us; let the guest retry */ 661 /* HPTE has been changed under us; let the guest retry */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 73bf1ebfa78f..8d43cf205d34 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2705,11 +2705,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2705 * Hard-disable interrupts, and check resched flag and signals. 2705 * Hard-disable interrupts, and check resched flag and signals.
2706 * If we need to reschedule or deliver a signal, clean up 2706 * If we need to reschedule or deliver a signal, clean up
2707 * and return without going into the guest(s). 2707 * and return without going into the guest(s).
2708 * If the hpte_setup_done flag has been cleared, don't go into the
2709 * guest because that means a HPT resize operation is in progress.
2708 */ 2710 */
2709 local_irq_disable(); 2711 local_irq_disable();
2710 hard_irq_disable(); 2712 hard_irq_disable();
2711 if (lazy_irq_pending() || need_resched() || 2713 if (lazy_irq_pending() || need_resched() ||
2712 recheck_signals(&core_info)) { 2714 recheck_signals(&core_info) ||
2715 (!kvm_is_radix(vc->kvm) && !vc->kvm->arch.hpte_setup_done)) {
2713 local_irq_enable(); 2716 local_irq_enable();
2714 vc->vcore_state = VCORE_INACTIVE; 2717 vc->vcore_state = VCORE_INACTIVE;
2715 /* Unlock all except the primary vcore */ 2718 /* Unlock all except the primary vcore */
@@ -3078,7 +3081,7 @@ out:
3078 3081
3079static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 3082static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3080{ 3083{
3081 int n_ceded, i; 3084 int n_ceded, i, r;
3082 struct kvmppc_vcore *vc; 3085 struct kvmppc_vcore *vc;
3083 struct kvm_vcpu *v; 3086 struct kvm_vcpu *v;
3084 3087
@@ -3132,6 +3135,20 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3132 3135
3133 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 3136 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
3134 !signal_pending(current)) { 3137 !signal_pending(current)) {
3138 /* See if the HPT and VRMA are ready to go */
3139 if (!kvm_is_radix(vcpu->kvm) &&
3140 !vcpu->kvm->arch.hpte_setup_done) {
3141 spin_unlock(&vc->lock);
3142 r = kvmppc_hv_setup_htab_rma(vcpu);
3143 spin_lock(&vc->lock);
3144 if (r) {
3145 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3146 kvm_run->fail_entry.hardware_entry_failure_reason = 0;
3147 vcpu->arch.ret = r;
3148 break;
3149 }
3150 }
3151
3135 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL) 3152 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
3136 kvmppc_vcore_end_preempt(vc); 3153 kvmppc_vcore_end_preempt(vc);
3137 3154
@@ -3249,13 +3266,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3249 /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */ 3266 /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */
3250 smp_mb(); 3267 smp_mb();
3251 3268
3252 /* On the first time here, set up HTAB and VRMA */
3253 if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) {
3254 r = kvmppc_hv_setup_htab_rma(vcpu);
3255 if (r)
3256 goto out;
3257 }
3258
3259 flush_all_to_thread(current); 3269 flush_all_to_thread(current);
3260 3270
3261 /* Save userspace EBB and other register values */ 3271 /* Save userspace EBB and other register values */
@@ -3303,7 +3313,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3303 } 3313 }
3304 mtspr(SPRN_VRSAVE, user_vrsave); 3314 mtspr(SPRN_VRSAVE, user_vrsave);
3305 3315
3306 out:
3307 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 3316 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
3308 atomic_dec(&vcpu->kvm->arch.vcpus_running); 3317 atomic_dec(&vcpu->kvm->arch.vcpus_running);
3309 return r; 3318 return r;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fdb23313dd5..4ae940a0ed3b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -171,7 +171,7 @@ config X86
171 select HAVE_PERF_USER_STACK_DUMP 171 select HAVE_PERF_USER_STACK_DUMP
172 select HAVE_RCU_TABLE_FREE 172 select HAVE_RCU_TABLE_FREE
173 select HAVE_REGS_AND_STACK_ACCESS_API 173 select HAVE_REGS_AND_STACK_ACCESS_API
174 select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION 174 select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
175 select HAVE_STACK_VALIDATION if X86_64 175 select HAVE_STACK_VALIDATION if X86_64
176 select HAVE_SYSCALL_TRACEPOINTS 176 select HAVE_SYSCALL_TRACEPOINTS
177 select HAVE_UNSTABLE_SCHED_CLOCK 177 select HAVE_UNSTABLE_SCHED_CLOCK
@@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
303config KASAN_SHADOW_OFFSET 303config KASAN_SHADOW_OFFSET
304 hex 304 hex
305 depends on KASAN 305 depends on KASAN
306 default 0xdff8000000000000 if X86_5LEVEL
307 default 0xdffffc0000000000 306 default 0xdffffc0000000000
308 307
309config HAVE_INTEL_TXT 308config HAVE_INTEL_TXT
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 90b123056f4b..6293a8768a91 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG
359 359
360choice 360choice
361 prompt "Choose kernel unwinder" 361 prompt "Choose kernel unwinder"
362 default FRAME_POINTER_UNWINDER 362 default UNWINDER_ORC if X86_64
363 default UNWINDER_FRAME_POINTER if X86_32
363 ---help--- 364 ---help---
364 This determines which method will be used for unwinding kernel stack 365 This determines which method will be used for unwinding kernel stack
365 traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, 366 traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
366 livepatch, lockdep, and more. 367 livepatch, lockdep, and more.
367 368
368config FRAME_POINTER_UNWINDER 369config UNWINDER_ORC
369 bool "Frame pointer unwinder"
370 select FRAME_POINTER
371 ---help---
372 This option enables the frame pointer unwinder for unwinding kernel
373 stack traces.
374
375 The unwinder itself is fast and it uses less RAM than the ORC
376 unwinder, but the kernel text size will grow by ~3% and the kernel's
377 overall performance will degrade by roughly 5-10%.
378
379 This option is recommended if you want to use the livepatch
380 consistency model, as this is currently the only way to get a
381 reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
382
383config ORC_UNWINDER
384 bool "ORC unwinder" 370 bool "ORC unwinder"
385 depends on X86_64 371 depends on X86_64
386 select STACK_VALIDATION 372 select STACK_VALIDATION
@@ -396,7 +382,22 @@ config ORC_UNWINDER
396 Enabling this option will increase the kernel's runtime memory usage 382 Enabling this option will increase the kernel's runtime memory usage
397 by roughly 2-4MB, depending on your kernel config. 383 by roughly 2-4MB, depending on your kernel config.
398 384
399config GUESS_UNWINDER 385config UNWINDER_FRAME_POINTER
386 bool "Frame pointer unwinder"
387 select FRAME_POINTER
388 ---help---
389 This option enables the frame pointer unwinder for unwinding kernel
390 stack traces.
391
392 The unwinder itself is fast and it uses less RAM than the ORC
393 unwinder, but the kernel text size will grow by ~3% and the kernel's
394 overall performance will degrade by roughly 5-10%.
395
396 This option is recommended if you want to use the livepatch
397 consistency model, as this is currently the only way to get a
398 reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
399
400config UNWINDER_GUESS
400 bool "Guess unwinder" 401 bool "Guess unwinder"
401 depends on EXPERT 402 depends on EXPERT
402 ---help--- 403 ---help---
@@ -411,7 +412,7 @@ config GUESS_UNWINDER
411endchoice 412endchoice
412 413
413config FRAME_POINTER 414config FRAME_POINTER
414 depends on !ORC_UNWINDER && !GUESS_UNWINDER 415 depends on !UNWINDER_ORC && !UNWINDER_GUESS
415 bool 416 bool
416 417
417endmenu 418endmenu
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 550cd5012b73..66c9e2aab16c 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,5 +1,5 @@
1CONFIG_NOHIGHMEM=y 1CONFIG_NOHIGHMEM=y
2# CONFIG_HIGHMEM4G is not set 2# CONFIG_HIGHMEM4G is not set
3# CONFIG_HIGHMEM64G is not set 3# CONFIG_HIGHMEM64G is not set
4CONFIG_GUESS_UNWINDER=y 4CONFIG_UNWINDER_GUESS=y
5# CONFIG_FRAME_POINTER_UNWINDER is not set 5# CONFIG_UNWINDER_FRAME_POINTER is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4a4b16e56d35..e32fc1f274d8 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
299# CONFIG_DEBUG_RODATA_TEST is not set 299# CONFIG_DEBUG_RODATA_TEST is not set
300CONFIG_DEBUG_BOOT_PARAMS=y 300CONFIG_DEBUG_BOOT_PARAMS=y
301CONFIG_OPTIMIZE_INLINING=y 301CONFIG_OPTIMIZE_INLINING=y
302CONFIG_UNWINDER_ORC=y
302CONFIG_SECURITY=y 303CONFIG_SECURITY=y
303CONFIG_SECURITY_NETWORK=y 304CONFIG_SECURITY_NETWORK=y
304CONFIG_SECURITY_SELINUX=y 305CONFIG_SECURITY_SELINUX=y
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 6e160031cfea..3fd8bc560fae 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
142 UNWIND_HINT_REGS offset=\offset 142 UNWIND_HINT_REGS offset=\offset
143 .endm 143 .endm
144 144
145 .macro RESTORE_EXTRA_REGS offset=0 145 .macro POP_EXTRA_REGS
146 movq 0*8+\offset(%rsp), %r15 146 popq %r15
147 movq 1*8+\offset(%rsp), %r14 147 popq %r14
148 movq 2*8+\offset(%rsp), %r13 148 popq %r13
149 movq 3*8+\offset(%rsp), %r12 149 popq %r12
150 movq 4*8+\offset(%rsp), %rbp 150 popq %rbp
151 movq 5*8+\offset(%rsp), %rbx 151 popq %rbx
152 UNWIND_HINT_REGS offset=\offset extra=0 152 .endm
153 .endm 153
154 154 .macro POP_C_REGS
155 .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 155 popq %r11
156 .if \rstor_r11 156 popq %r10
157 movq 6*8(%rsp), %r11 157 popq %r9
158 .endif 158 popq %r8
159 .if \rstor_r8910 159 popq %rax
160 movq 7*8(%rsp), %r10 160 popq %rcx
161 movq 8*8(%rsp), %r9 161 popq %rdx
162 movq 9*8(%rsp), %r8 162 popq %rsi
163 .endif 163 popq %rdi
164 .if \rstor_rax
165 movq 10*8(%rsp), %rax
166 .endif
167 .if \rstor_rcx
168 movq 11*8(%rsp), %rcx
169 .endif
170 .if \rstor_rdx
171 movq 12*8(%rsp), %rdx
172 .endif
173 movq 13*8(%rsp), %rsi
174 movq 14*8(%rsp), %rdi
175 UNWIND_HINT_IRET_REGS offset=16*8
176 .endm
177 .macro RESTORE_C_REGS
178 RESTORE_C_REGS_HELPER 1,1,1,1,1
179 .endm
180 .macro RESTORE_C_REGS_EXCEPT_RAX
181 RESTORE_C_REGS_HELPER 0,1,1,1,1
182 .endm
183 .macro RESTORE_C_REGS_EXCEPT_RCX
184 RESTORE_C_REGS_HELPER 1,0,1,1,1
185 .endm
186 .macro RESTORE_C_REGS_EXCEPT_R11
187 RESTORE_C_REGS_HELPER 1,1,0,1,1
188 .endm
189 .macro RESTORE_C_REGS_EXCEPT_RCX_R11
190 RESTORE_C_REGS_HELPER 1,0,0,1,1
191 .endm
192
193 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
194 subq $-(15*8+\addskip), %rsp
195 .endm 164 .endm
196 165
197 .macro icebp 166 .macro icebp
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index bcfc5668dcb2..a2b30ec69497 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath:
221 TRACE_IRQS_ON /* user mode is traced as IRQs on */ 221 TRACE_IRQS_ON /* user mode is traced as IRQs on */
222 movq RIP(%rsp), %rcx 222 movq RIP(%rsp), %rcx
223 movq EFLAGS(%rsp), %r11 223 movq EFLAGS(%rsp), %r11
224 RESTORE_C_REGS_EXCEPT_RCX_R11 224 addq $6*8, %rsp /* skip extra regs -- they were preserved */
225 movq RSP(%rsp), %rsp
226 UNWIND_HINT_EMPTY 225 UNWIND_HINT_EMPTY
227 USERGS_SYSRET64 226 jmp .Lpop_c_regs_except_rcx_r11_and_sysret
228 227
2291: 2281:
230 /* 229 /*
@@ -246,17 +245,18 @@ entry_SYSCALL64_slow_path:
246 call do_syscall_64 /* returns with IRQs disabled */ 245 call do_syscall_64 /* returns with IRQs disabled */
247 246
248return_from_SYSCALL_64: 247return_from_SYSCALL_64:
249 RESTORE_EXTRA_REGS
250 TRACE_IRQS_IRETQ /* we're about to change IF */ 248 TRACE_IRQS_IRETQ /* we're about to change IF */
251 249
252 /* 250 /*
253 * Try to use SYSRET instead of IRET if we're returning to 251 * Try to use SYSRET instead of IRET if we're returning to
254 * a completely clean 64-bit userspace context. 252 * a completely clean 64-bit userspace context. If we're not,
253 * go to the slow exit path.
255 */ 254 */
256 movq RCX(%rsp), %rcx 255 movq RCX(%rsp), %rcx
257 movq RIP(%rsp), %r11 256 movq RIP(%rsp), %r11
258 cmpq %rcx, %r11 /* RCX == RIP */ 257
259 jne opportunistic_sysret_failed 258 cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
259 jne swapgs_restore_regs_and_return_to_usermode
260 260
261 /* 261 /*
262 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP 262 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -274,14 +274,14 @@ return_from_SYSCALL_64:
274 274
275 /* If this changed %rcx, it was not canonical */ 275 /* If this changed %rcx, it was not canonical */
276 cmpq %rcx, %r11 276 cmpq %rcx, %r11
277 jne opportunistic_sysret_failed 277 jne swapgs_restore_regs_and_return_to_usermode
278 278
279 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ 279 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
280 jne opportunistic_sysret_failed 280 jne swapgs_restore_regs_and_return_to_usermode
281 281
282 movq R11(%rsp), %r11 282 movq R11(%rsp), %r11
283 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ 283 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
284 jne opportunistic_sysret_failed 284 jne swapgs_restore_regs_and_return_to_usermode
285 285
286 /* 286 /*
287 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot 287 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
@@ -302,12 +302,12 @@ return_from_SYSCALL_64:
302 * would never get past 'stuck_here'. 302 * would never get past 'stuck_here'.
303 */ 303 */
304 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 304 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
305 jnz opportunistic_sysret_failed 305 jnz swapgs_restore_regs_and_return_to_usermode
306 306
307 /* nothing to check for RSP */ 307 /* nothing to check for RSP */
308 308
309 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ 309 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
310 jne opportunistic_sysret_failed 310 jne swapgs_restore_regs_and_return_to_usermode
311 311
312 /* 312 /*
313 * We win! This label is here just for ease of understanding 313 * We win! This label is here just for ease of understanding
@@ -315,14 +315,20 @@ return_from_SYSCALL_64:
315 */ 315 */
316syscall_return_via_sysret: 316syscall_return_via_sysret:
317 /* rcx and r11 are already restored (see code above) */ 317 /* rcx and r11 are already restored (see code above) */
318 RESTORE_C_REGS_EXCEPT_RCX_R11
319 movq RSP(%rsp), %rsp
320 UNWIND_HINT_EMPTY 318 UNWIND_HINT_EMPTY
319 POP_EXTRA_REGS
320.Lpop_c_regs_except_rcx_r11_and_sysret:
321 popq %rsi /* skip r11 */
322 popq %r10
323 popq %r9
324 popq %r8
325 popq %rax
326 popq %rsi /* skip rcx */
327 popq %rdx
328 popq %rsi
329 popq %rdi
330 movq RSP-ORIG_RAX(%rsp), %rsp
321 USERGS_SYSRET64 331 USERGS_SYSRET64
322
323opportunistic_sysret_failed:
324 SWAPGS
325 jmp restore_c_regs_and_iret
326END(entry_SYSCALL_64) 332END(entry_SYSCALL_64)
327 333
328ENTRY(stub_ptregs_64) 334ENTRY(stub_ptregs_64)
@@ -423,8 +429,7 @@ ENTRY(ret_from_fork)
423 movq %rsp, %rdi 429 movq %rsp, %rdi
424 call syscall_return_slowpath /* returns with IRQs disabled */ 430 call syscall_return_slowpath /* returns with IRQs disabled */
425 TRACE_IRQS_ON /* user mode is traced as IRQS on */ 431 TRACE_IRQS_ON /* user mode is traced as IRQS on */
426 SWAPGS 432 jmp swapgs_restore_regs_and_return_to_usermode
427 jmp restore_regs_and_iret
428 433
4291: 4341:
430 /* kernel thread */ 435 /* kernel thread */
@@ -612,8 +617,21 @@ GLOBAL(retint_user)
612 mov %rsp,%rdi 617 mov %rsp,%rdi
613 call prepare_exit_to_usermode 618 call prepare_exit_to_usermode
614 TRACE_IRQS_IRETQ 619 TRACE_IRQS_IRETQ
620
621GLOBAL(swapgs_restore_regs_and_return_to_usermode)
622#ifdef CONFIG_DEBUG_ENTRY
623 /* Assert that pt_regs indicates user mode. */
624 testb $3, CS(%rsp)
625 jnz 1f
626 ud2
6271:
628#endif
615 SWAPGS 629 SWAPGS
616 jmp restore_regs_and_iret 630 POP_EXTRA_REGS
631 POP_C_REGS
632 addq $8, %rsp /* skip regs->orig_ax */
633 INTERRUPT_RETURN
634
617 635
618/* Returning to kernel space */ 636/* Returning to kernel space */
619retint_kernel: 637retint_kernel:
@@ -633,15 +651,17 @@ retint_kernel:
633 */ 651 */
634 TRACE_IRQS_IRETQ 652 TRACE_IRQS_IRETQ
635 653
636/* 654GLOBAL(restore_regs_and_return_to_kernel)
637 * At this label, code paths which return to kernel and to user, 655#ifdef CONFIG_DEBUG_ENTRY
638 * which come from interrupts/exception and from syscalls, merge. 656 /* Assert that pt_regs indicates kernel mode. */
639 */ 657 testb $3, CS(%rsp)
640GLOBAL(restore_regs_and_iret) 658 jz 1f
641 RESTORE_EXTRA_REGS 659 ud2
642restore_c_regs_and_iret: 6601:
643 RESTORE_C_REGS 661#endif
644 REMOVE_PT_GPREGS_FROM_STACK 8 662 POP_EXTRA_REGS
663 POP_C_REGS
664 addq $8, %rsp /* skip regs->orig_ax */
645 INTERRUPT_RETURN 665 INTERRUPT_RETURN
646 666
647ENTRY(native_iret) 667ENTRY(native_iret)
@@ -818,7 +838,7 @@ ENTRY(\sym)
818 838
819 ASM_CLAC 839 ASM_CLAC
820 840
821 .ifeq \has_error_code 841 .if \has_error_code == 0
822 pushq $-1 /* ORIG_RAX: no syscall to restart */ 842 pushq $-1 /* ORIG_RAX: no syscall to restart */
823 .endif 843 .endif
824 844
@@ -1059,6 +1079,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1059idtentry stack_segment do_stack_segment has_error_code=1 1079idtentry stack_segment do_stack_segment has_error_code=1
1060 1080
1061#ifdef CONFIG_XEN 1081#ifdef CONFIG_XEN
1082idtentry xennmi do_nmi has_error_code=0
1062idtentry xendebug do_debug has_error_code=0 1083idtentry xendebug do_debug has_error_code=0
1063idtentry xenint3 do_int3 has_error_code=0 1084idtentry xenint3 do_int3 has_error_code=0
1064#endif 1085#endif
@@ -1112,17 +1133,14 @@ ENTRY(paranoid_exit)
1112 DISABLE_INTERRUPTS(CLBR_ANY) 1133 DISABLE_INTERRUPTS(CLBR_ANY)
1113 TRACE_IRQS_OFF_DEBUG 1134 TRACE_IRQS_OFF_DEBUG
1114 testl %ebx, %ebx /* swapgs needed? */ 1135 testl %ebx, %ebx /* swapgs needed? */
1115 jnz paranoid_exit_no_swapgs 1136 jnz .Lparanoid_exit_no_swapgs
1116 TRACE_IRQS_IRETQ 1137 TRACE_IRQS_IRETQ
1117 SWAPGS_UNSAFE_STACK 1138 SWAPGS_UNSAFE_STACK
1118 jmp paranoid_exit_restore 1139 jmp .Lparanoid_exit_restore
1119paranoid_exit_no_swapgs: 1140.Lparanoid_exit_no_swapgs:
1120 TRACE_IRQS_IRETQ_DEBUG 1141 TRACE_IRQS_IRETQ_DEBUG
1121paranoid_exit_restore: 1142.Lparanoid_exit_restore:
1122 RESTORE_EXTRA_REGS 1143 jmp restore_regs_and_return_to_kernel
1123 RESTORE_C_REGS
1124 REMOVE_PT_GPREGS_FROM_STACK 8
1125 INTERRUPT_RETURN
1126END(paranoid_exit) 1144END(paranoid_exit)
1127 1145
1128/* 1146/*
@@ -1223,10 +1241,13 @@ ENTRY(error_exit)
1223 jmp retint_user 1241 jmp retint_user
1224END(error_exit) 1242END(error_exit)
1225 1243
1226/* Runs on exception stack */ 1244/*
1227/* XXX: broken on Xen PV */ 1245 * Runs on exception stack. Xen PV does not go through this path at all,
1246 * so we can use real assembly here.
1247 */
1228ENTRY(nmi) 1248ENTRY(nmi)
1229 UNWIND_HINT_IRET_REGS 1249 UNWIND_HINT_IRET_REGS
1250
1230 /* 1251 /*
1231 * We allow breakpoints in NMIs. If a breakpoint occurs, then 1252 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1232 * the iretq it performs will take us out of NMI context. 1253 * the iretq it performs will take us out of NMI context.
@@ -1284,7 +1305,7 @@ ENTRY(nmi)
1284 * stacks lest we corrupt the "NMI executing" variable. 1305 * stacks lest we corrupt the "NMI executing" variable.
1285 */ 1306 */
1286 1307
1287 SWAPGS_UNSAFE_STACK 1308 swapgs
1288 cld 1309 cld
1289 movq %rsp, %rdx 1310 movq %rsp, %rdx
1290 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 1311 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1328,8 +1349,7 @@ ENTRY(nmi)
1328 * Return back to user mode. We must *not* do the normal exit 1349 * Return back to user mode. We must *not* do the normal exit
1329 * work, because we don't want to enable interrupts. 1350 * work, because we don't want to enable interrupts.
1330 */ 1351 */
1331 SWAPGS 1352 jmp swapgs_restore_regs_and_return_to_usermode
1332 jmp restore_regs_and_iret
1333 1353
1334.Lnmi_from_kernel: 1354.Lnmi_from_kernel:
1335 /* 1355 /*
@@ -1450,7 +1470,7 @@ nested_nmi_out:
1450 popq %rdx 1470 popq %rdx
1451 1471
1452 /* We are returning to kernel mode, so this cannot result in a fault. */ 1472 /* We are returning to kernel mode, so this cannot result in a fault. */
1453 INTERRUPT_RETURN 1473 iretq
1454 1474
1455first_nmi: 1475first_nmi:
1456 /* Restore rdx. */ 1476 /* Restore rdx. */
@@ -1481,7 +1501,7 @@ first_nmi:
1481 pushfq /* RFLAGS */ 1501 pushfq /* RFLAGS */
1482 pushq $__KERNEL_CS /* CS */ 1502 pushq $__KERNEL_CS /* CS */
1483 pushq $1f /* RIP */ 1503 pushq $1f /* RIP */
1484 INTERRUPT_RETURN /* continues at repeat_nmi below */ 1504 iretq /* continues at repeat_nmi below */
1485 UNWIND_HINT_IRET_REGS 1505 UNWIND_HINT_IRET_REGS
14861: 15061:
1487#endif 1507#endif
@@ -1544,29 +1564,34 @@ end_repeat_nmi:
1544nmi_swapgs: 1564nmi_swapgs:
1545 SWAPGS_UNSAFE_STACK 1565 SWAPGS_UNSAFE_STACK
1546nmi_restore: 1566nmi_restore:
1547 RESTORE_EXTRA_REGS 1567 POP_EXTRA_REGS
1548 RESTORE_C_REGS 1568 POP_C_REGS
1549 1569
1550 /* Point RSP at the "iret" frame. */ 1570 /*
1551 REMOVE_PT_GPREGS_FROM_STACK 6*8 1571 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
1572 * at the "iret" frame.
1573 */
1574 addq $6*8, %rsp
1552 1575
1553 /* 1576 /*
1554 * Clear "NMI executing". Set DF first so that we can easily 1577 * Clear "NMI executing". Set DF first so that we can easily
1555 * distinguish the remaining code between here and IRET from 1578 * distinguish the remaining code between here and IRET from
1556 * the SYSCALL entry and exit paths. On a native kernel, we 1579 * the SYSCALL entry and exit paths.
1557 * could just inspect RIP, but, on paravirt kernels, 1580 *
1558 * INTERRUPT_RETURN can translate into a jump into a 1581 * We arguably should just inspect RIP instead, but I (Andy) wrote
1559 * hypercall page. 1582 * this code when I had the misapprehension that Xen PV supported
1583 * NMIs, and Xen PV would break that approach.
1560 */ 1584 */
1561 std 1585 std
1562 movq $0, 5*8(%rsp) /* clear "NMI executing" */ 1586 movq $0, 5*8(%rsp) /* clear "NMI executing" */
1563 1587
1564 /* 1588 /*
1565 * INTERRUPT_RETURN reads the "iret" frame and exits the NMI 1589 * iretq reads the "iret" frame and exits the NMI stack in a
1566 * stack in a single instruction. We are returning to kernel 1590 * single instruction. We are returning to kernel mode, so this
1567 * mode, so this cannot result in a fault. 1591 * cannot result in a fault. Similarly, we don't need to worry
1592 * about espfix64 on the way back to kernel mode.
1568 */ 1593 */
1569 INTERRUPT_RETURN 1594 iretq
1570END(nmi) 1595END(nmi)
1571 1596
1572ENTRY(ignore_sysret) 1597ENTRY(ignore_sysret)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index b5c7a56ed256..568e130d932c 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat)
337 337
338 /* Go back to user mode. */ 338 /* Go back to user mode. */
339 TRACE_IRQS_ON 339 TRACE_IRQS_ON
340 SWAPGS 340 jmp swapgs_restore_regs_and_return_to_usermode
341 jmp restore_regs_and_iret
342END(entry_INT80_compat) 341END(entry_INT80_compat)
343 342
344ENTRY(stub32_clone) 343ENTRY(stub32_clone)
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index 331f1dca5085..6fb9b57ed5ba 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -1,6 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2out := $(obj)/../../include/generated/asm 2out := arch/$(SRCARCH)/include/generated/asm
3uapi := $(obj)/../../include/generated/uapi/asm 3uapi := arch/$(SRCARCH)/include/generated/uapi/asm
4 4
5# Create output directory if not already present 5# Create output directory if not already present
6_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ 6_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 5b0579abb398..3ac991d81e74 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
45 bool ok; 45 bool ok;
46 unsigned int retry = RDRAND_RETRY_LOOPS; 46 unsigned int retry = RDRAND_RETRY_LOOPS;
47 do { 47 do {
48 asm volatile(RDRAND_LONG "\n\t" 48 asm volatile(RDRAND_LONG
49 CC_SET(c) 49 CC_SET(c)
50 : CC_OUT(c) (ok), "=a" (*v)); 50 : CC_OUT(c) (ok), "=a" (*v));
51 if (ok) 51 if (ok)
@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
59 bool ok; 59 bool ok;
60 unsigned int retry = RDRAND_RETRY_LOOPS; 60 unsigned int retry = RDRAND_RETRY_LOOPS;
61 do { 61 do {
62 asm volatile(RDRAND_INT "\n\t" 62 asm volatile(RDRAND_INT
63 CC_SET(c) 63 CC_SET(c)
64 : CC_OUT(c) (ok), "=a" (*v)); 64 : CC_OUT(c) (ok), "=a" (*v));
65 if (ok) 65 if (ok)
@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
71static inline bool rdseed_long(unsigned long *v) 71static inline bool rdseed_long(unsigned long *v)
72{ 72{
73 bool ok; 73 bool ok;
74 asm volatile(RDSEED_LONG "\n\t" 74 asm volatile(RDSEED_LONG
75 CC_SET(c) 75 CC_SET(c)
76 : CC_OUT(c) (ok), "=a" (*v)); 76 : CC_OUT(c) (ok), "=a" (*v));
77 return ok; 77 return ok;
@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
80static inline bool rdseed_int(unsigned int *v) 80static inline bool rdseed_int(unsigned int *v)
81{ 81{
82 bool ok; 82 bool ok;
83 asm volatile(RDSEED_INT "\n\t" 83 asm volatile(RDSEED_INT
84 CC_SET(c) 84 CC_SET(c)
85 : CC_OUT(c) (ok), "=a" (*v)); 85 : CC_OUT(c) (ok), "=a" (*v));
86 return ok; 86 return ok;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 2bcf47314959..3fa039855b8f 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
143static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) 143static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
144{ 144{
145 bool negative; 145 bool negative;
146 asm volatile(LOCK_PREFIX "andb %2,%1\n\t" 146 asm volatile(LOCK_PREFIX "andb %2,%1"
147 CC_SET(s) 147 CC_SET(s)
148 : CC_OUT(s) (negative), ADDR 148 : CC_OUT(s) (negative), ADDR
149 : "ir" ((char) ~(1 << nr)) : "memory"); 149 : "ir" ((char) ~(1 << nr)) : "memory");
@@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
246{ 246{
247 bool oldbit; 247 bool oldbit;
248 248
249 asm("bts %2,%1\n\t" 249 asm("bts %2,%1"
250 CC_SET(c) 250 CC_SET(c)
251 : CC_OUT(c) (oldbit), ADDR 251 : CC_OUT(c) (oldbit), ADDR
252 : "Ir" (nr)); 252 : "Ir" (nr));
@@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
286{ 286{
287 bool oldbit; 287 bool oldbit;
288 288
289 asm volatile("btr %2,%1\n\t" 289 asm volatile("btr %2,%1"
290 CC_SET(c) 290 CC_SET(c)
291 : CC_OUT(c) (oldbit), ADDR 291 : CC_OUT(c) (oldbit), ADDR
292 : "Ir" (nr)); 292 : "Ir" (nr));
@@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
298{ 298{
299 bool oldbit; 299 bool oldbit;
300 300
301 asm volatile("btc %2,%1\n\t" 301 asm volatile("btc %2,%1"
302 CC_SET(c) 302 CC_SET(c)
303 : CC_OUT(c) (oldbit), ADDR 303 : CC_OUT(c) (oldbit), ADDR
304 : "Ir" (nr) : "memory"); 304 : "Ir" (nr) : "memory");
@@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
329{ 329{
330 bool oldbit; 330 bool oldbit;
331 331
332 asm volatile("bt %2,%1\n\t" 332 asm volatile("bt %2,%1"
333 CC_SET(c) 333 CC_SET(c)
334 : CC_OUT(c) (oldbit) 334 : CC_OUT(c) (oldbit)
335 : "m" (*(unsigned long *)addr), "Ir" (nr)); 335 : "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 9eef9cc64c68..a600a6cda9ec 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -7,6 +7,7 @@
7 */ 7 */
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/sched/task_stack.h>
10#include <asm/processor.h> 11#include <asm/processor.h>
11#include <asm/user32.h> 12#include <asm/user32.h>
12#include <asm/unistd.h> 13#include <asm/unistd.h>
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0dfa68438e80..bf6a76202a77 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -126,11 +126,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
126#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) 126#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
127 127
128#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) 128#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
129#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) 129
130#define setup_clear_cpu_cap(bit) do { \ 130extern void setup_clear_cpu_cap(unsigned int bit);
131 clear_cpu_cap(&boot_cpu_data, bit); \ 131extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
132 set_bit(bit, (unsigned long *)cpu_caps_cleared); \ 132
133} while (0)
134#define setup_force_cpu_cap(bit) do { \ 133#define setup_force_cpu_cap(bit) do { \
135 set_cpu_cap(&boot_cpu_data, bit); \ 134 set_cpu_cap(&boot_cpu_data, bit); \
136 set_bit(bit, (unsigned long *)cpu_caps_set); \ 135 set_bit(bit, (unsigned long *)cpu_caps_set); \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 793690fbda36..cdf5be866863 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,173 +13,176 @@
13/* 13/*
14 * Defines x86 CPU feature bits 14 * Defines x86 CPU feature bits
15 */ 15 */
16#define NCAPINTS 18 /* N 32-bit words worth of info */ 16#define NCAPINTS 18 /* N 32-bit words worth of info */
17#define NBUGINTS 1 /* N 32-bit bug flags */ 17#define NBUGINTS 1 /* N 32-bit bug flags */
18 18
19/* 19/*
20 * Note: If the comment begins with a quoted string, that string is used 20 * Note: If the comment begins with a quoted string, that string is used
21 * in /proc/cpuinfo instead of the macro name. If the string is "", 21 * in /proc/cpuinfo instead of the macro name. If the string is "",
22 * this feature bit is not displayed in /proc/cpuinfo at all. 22 * this feature bit is not displayed in /proc/cpuinfo at all.
23 *
24 * When adding new features here that depend on other features,
25 * please update the table in kernel/cpu/cpuid-deps.c as well.
23 */ 26 */
24 27
25/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ 28/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
26#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ 29#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
27#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ 30#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
28#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ 31#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
29#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ 32#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
30#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ 33#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
31#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ 34#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
32#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ 35#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
33#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ 36#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
34#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ 37#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
35#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ 38#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
36#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ 39#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
37#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ 40#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
38#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ 41#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
39#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ 42#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
40#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ 43#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
41 /* (plus FCMOVcc, FCOMI with FPU) */ 44#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
42#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ 45#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
43#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ 46#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
44#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ 47#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
45#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ 48#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
46#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ 49#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
47#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ 50#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
48#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ 51#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
49#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ 52#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
50#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ 53#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
51#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ 54#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
52#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ 55#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
53#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ 56#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
54#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ 57#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
55#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ 58#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
56#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
57 59
58/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ 60/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
59/* Don't duplicate feature flags which are redundant with Intel! */ 61/* Don't duplicate feature flags which are redundant with Intel! */
60#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ 62#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
61#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ 63#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
62#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ 64#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
63#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ 65#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
64#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ 66#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
65#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ 67#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
66#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ 68#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
67#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ 69#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
68#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ 70#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
69#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ 71#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
70 72
71/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ 73/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
72#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ 74#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
73#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ 75#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
74#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ 76#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
75 77
76/* Other features, Linux-defined mapping, word 3 */ 78/* Other features, Linux-defined mapping, word 3 */
77/* This range is used for feature bits which conflict or are synthesized */ 79/* This range is used for feature bits which conflict or are synthesized */
78#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ 80#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
79#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ 81#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
80#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ 82#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
81#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ 83#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
82/* cpu types for specific tunings: */ 84
83#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ 85/* CPU types for specific tunings: */
84#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ 86#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
85#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ 87#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
86#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ 88#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
87#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ 89#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
88#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ 90#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
89#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ 91#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
90#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ 92#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
91#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ 93#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
92#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ 94#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
93#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ 95#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
94#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ 96#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
95#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ 97#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
96#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ 98#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
97#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ 99#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
98#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ 100#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
99#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ 101#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
100#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ 102#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
101#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ 103#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
102#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ 104#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
103#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ 105#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
104#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ 106#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
105#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ 107#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
106#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ 108#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
107#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 109#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
108#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ 110#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
109#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ 111#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
112#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
110 113
111/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 114/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
112#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ 115#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
113#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ 116#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
114#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ 117#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
115#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ 118#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
116#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ 119#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
117#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ 120#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
118#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ 121#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
119#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ 122#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
120#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ 123#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
121#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ 124#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
122#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ 125#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
123#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ 126#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
124#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ 127#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
125#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ 128#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
126#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ 129#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
127#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ 130#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
128#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ 131#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
129#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ 132#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
130#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ 133#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
131#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ 134#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
132#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ 135#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
133#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ 136#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
134#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ 137#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
135#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ 138#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
136#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ 139#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
137#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ 140#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
138#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ 141#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
139#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ 142#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
140#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ 143#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
141#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ 144#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
142#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ 145#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
143 146
144/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 147/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
145#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ 148#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
146#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ 149#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
147#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ 150#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
148#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ 151#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
149#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ 152#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
150#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ 153#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
151#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ 154#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
152#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ 155#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
153#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ 156#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
154#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ 157#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
155 158
156/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ 159/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
157#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ 160#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
158#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ 161#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
159#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ 162#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
160#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ 163#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
161#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ 164#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
162#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ 165#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
163#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ 166#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
164#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ 167#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
165#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ 168#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
166#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ 169#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
167#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ 170#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
168#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ 171#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
169#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ 172#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
170#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ 173#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
171#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ 174#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
172#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ 175#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
173#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ 176#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
174#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ 177#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
175#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ 178#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
176#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ 179#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
177#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ 180#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
178#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ 181#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
179#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ 182#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
180#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ 183#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
181#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ 184#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
182#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ 185#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
183 186
184/* 187/*
185 * Auxiliary flags: Linux defined - For features scattered in various 188 * Auxiliary flags: Linux defined - For features scattered in various
@@ -187,146 +190,153 @@
187 * 190 *
188 * Reuse free bits when adding new feature flags! 191 * Reuse free bits when adding new feature flags!
189 */ 192 */
190#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ 193#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
191#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ 194#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
192#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ 195#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
193#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ 196#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
194#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
195#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
196#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
197 200
198#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 201#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
199#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 202#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
200#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 203#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
201 204
202#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 205#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
203#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 206#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
204#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ 207#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
205#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ 208#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
206 209
207#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ 210#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
208 211
209/* Virtualization flags: Linux defined, word 8 */ 212/* Virtualization flags: Linux defined, word 8 */
210#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 213#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
211#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ 214#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
212#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ 215#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
213#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ 216#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
214#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ 217#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
215 218
216#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ 219#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
217#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ 220#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
218 221
219 222
220/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 223/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
221#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 224#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
222#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ 225#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
223#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ 226#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
224#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ 227#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
225#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ 228#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
226#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ 229#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
227#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ 230#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
228#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 231#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
229#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ 232#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
230#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ 233#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
231#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ 234#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
232#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ 235#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
233#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ 236#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
234#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ 237#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
235#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ 238#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
236#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 239#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
237#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 240#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
238#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 241#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
239#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ 242#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
240#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 243#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
241#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ 244#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
242#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ 245#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
243#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ 246#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
244#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ 247#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
245#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ 248#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
246#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ 249#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
247#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ 250#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
248 251
249/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ 252/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
250#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ 253#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
251#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ 254#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
252#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ 255#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
253#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ 256#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
254 257
255/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ 258/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
256#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ 259#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
257 260
258/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ 261/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
259#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ 262#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
260#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ 263#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
261#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ 264#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
262 265
263/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ 266/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
264#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ 267#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
265#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ 268#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
266 269
267/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ 270/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
268#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ 271#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
269#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ 272#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
270#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ 273#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
271#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ 274#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
272#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ 275#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
273#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ 276#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
274#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ 277#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
275#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ 278#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
276#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ 279#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
277#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ 280#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
278 281
279/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ 282/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
280#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ 283#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
281#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ 284#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
282#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ 285#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
283#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ 286#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
284#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ 287#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
285#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ 288#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
286#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ 289#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
287#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ 290#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
288#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ 291#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
289#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ 292#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
290#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ 293#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
291#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ 294#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
292#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ 295#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
293 296
294/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ 297/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
295#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ 298#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
296#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ 299#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
297#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ 300#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
298#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ 301#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
299#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ 302#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
300#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ 303#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
304#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
305#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
306#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
307#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
308#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
309#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
301 310
302/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ 311/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
303#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ 312#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
304#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ 313#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
305#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ 314#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
306 315
307/* 316/*
308 * BUG word(s) 317 * BUG word(s)
309 */ 318 */
310#define X86_BUG(x) (NCAPINTS*32 + (x)) 319#define X86_BUG(x) (NCAPINTS*32 + (x))
311 320
312#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ 321#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
313#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ 322#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
314#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ 323#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
315#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ 324#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
316#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ 325#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
317#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ 326#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
318#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ 327#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
319#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ 328#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
320#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ 329#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
321#ifdef CONFIG_X86_32 330#ifdef CONFIG_X86_32
322/* 331/*
323 * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional 332 * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
324 * to avoid confusion. 333 * to avoid confusion.
325 */ 334 */
326#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ 335#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
327#endif 336#endif
328#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ 337#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
329#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ 338#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
330#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ 339#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
331#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ 340#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
341
332#endif /* _ASM_X86_CPUFEATURES_H */ 342#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index c1a125e47ff3..3a091cea36c5 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -253,7 +253,7 @@ extern int force_personality32;
253 * space open for things that want to use the area for 32-bit pointers. 253 * space open for things that want to use the area for 32-bit pointers.
254 */ 254 */
255#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ 255#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \
256 (TASK_SIZE / 3 * 2)) 256 (DEFAULT_MAP_WINDOW / 3 * 2))
257 257
258/* This yields a mask that user programs can use to figure out what 258/* This yields a mask that user programs can use to figure out what
259 instruction set this CPU supports. This could be done in user space, 259 instruction set this CPU supports. This could be done in user space,
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index dcd9fb55e679..b0c505fe9a95 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -104,6 +104,12 @@ enum fixed_addresses {
104 FIX_GDT_REMAP_BEGIN, 104 FIX_GDT_REMAP_BEGIN,
105 FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, 105 FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
106 106
107#ifdef CONFIG_ACPI_APEI_GHES
108 /* Used for GHES mapping from assorted contexts */
109 FIX_APEI_GHES_IRQ,
110 FIX_APEI_GHES_NMI,
111#endif
112
107 __end_of_permanent_fixed_addresses, 113 __end_of_permanent_fixed_addresses,
108 114
109 /* 115 /*
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 8546fafa21a9..7948a17febb4 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -6,7 +6,7 @@
6#include <asm/orc_types.h> 6#include <asm/orc_types.h>
7 7
8struct mod_arch_specific { 8struct mod_arch_specific {
9#ifdef CONFIG_ORC_UNWINDER 9#ifdef CONFIG_UNWINDER_ORC
10 unsigned int num_orcs; 10 unsigned int num_orcs;
11 int *orc_unwind_ip; 11 int *orc_unwind_ip;
12 struct orc_entry *orc_unwind; 12 struct orc_entry *orc_unwind;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index fd81228e8037..283efcaac8af 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -16,10 +16,9 @@
16#include <linux/cpumask.h> 16#include <linux/cpumask.h>
17#include <asm/frame.h> 17#include <asm/frame.h>
18 18
19static inline void load_sp0(struct tss_struct *tss, 19static inline void load_sp0(unsigned long sp0)
20 struct thread_struct *thread)
21{ 20{
22 PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); 21 PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
23} 22}
24 23
25/* The paravirtualized CPUID instruction. */ 24/* The paravirtualized CPUID instruction. */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 10cc3b9709fe..6ec54d01972d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -134,7 +134,7 @@ struct pv_cpu_ops {
134 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); 134 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
135 void (*free_ldt)(struct desc_struct *ldt, unsigned entries); 135 void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
136 136
137 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); 137 void (*load_sp0)(unsigned long sp0);
138 138
139 void (*set_iopl_mask)(unsigned mask); 139 void (*set_iopl_mask)(unsigned mask);
140 140
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 377f1ffd18be..ba3c523aaf16 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
526{ 526{
527 bool oldbit; 527 bool oldbit;
528 528
529 asm volatile("bt "__percpu_arg(2)",%1\n\t" 529 asm volatile("bt "__percpu_arg(2)",%1"
530 CC_SET(c) 530 CC_SET(c)
531 : CC_OUT(c) (oldbit) 531 : CC_OUT(c) (oldbit)
532 : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); 532 : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 59df7b47a434..9e9b05fc4860 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -200,10 +200,9 @@ enum page_cache_mode {
200 200
201#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) 201#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
202 202
203#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
204 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
205#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ 203#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
206 _PAGE_DIRTY | _PAGE_ENC) 204 _PAGE_DIRTY | _PAGE_ENC)
205#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
207 206
208#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) 207#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
209#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) 208#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bdac19ab2488..2db7cf720b04 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -431,7 +431,9 @@ typedef struct {
431struct thread_struct { 431struct thread_struct {
432 /* Cached TLS descriptors: */ 432 /* Cached TLS descriptors: */
433 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 433 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
434#ifdef CONFIG_X86_32
434 unsigned long sp0; 435 unsigned long sp0;
436#endif
435 unsigned long sp; 437 unsigned long sp;
436#ifdef CONFIG_X86_32 438#ifdef CONFIG_X86_32
437 unsigned long sysenter_cs; 439 unsigned long sysenter_cs;
@@ -518,16 +520,9 @@ static inline void native_set_iopl_mask(unsigned mask)
518} 520}
519 521
520static inline void 522static inline void
521native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) 523native_load_sp0(unsigned long sp0)
522{ 524{
523 tss->x86_tss.sp0 = thread->sp0; 525 this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
524#ifdef CONFIG_X86_32
525 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
526 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
527 tss->x86_tss.ss1 = thread->sysenter_cs;
528 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
529 }
530#endif
531} 526}
532 527
533static inline void native_swapgs(void) 528static inline void native_swapgs(void)
@@ -547,15 +542,20 @@ static inline unsigned long current_top_of_stack(void)
547#endif 542#endif
548} 543}
549 544
545static inline bool on_thread_stack(void)
546{
547 return (unsigned long)(current_top_of_stack() -
548 current_stack_pointer) < THREAD_SIZE;
549}
550
550#ifdef CONFIG_PARAVIRT 551#ifdef CONFIG_PARAVIRT
551#include <asm/paravirt.h> 552#include <asm/paravirt.h>
552#else 553#else
553#define __cpuid native_cpuid 554#define __cpuid native_cpuid
554 555
555static inline void load_sp0(struct tss_struct *tss, 556static inline void load_sp0(unsigned long sp0)
556 struct thread_struct *thread)
557{ 557{
558 native_load_sp0(tss, thread); 558 native_load_sp0(sp0);
559} 559}
560 560
561#define set_iopl_mask native_set_iopl_mask 561#define set_iopl_mask native_set_iopl_mask
@@ -804,6 +804,15 @@ static inline void spin_lock_prefetch(const void *x)
804#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ 804#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
805 TOP_OF_KERNEL_STACK_PADDING) 805 TOP_OF_KERNEL_STACK_PADDING)
806 806
807#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
808
809#define task_pt_regs(task) \
810({ \
811 unsigned long __ptr = (unsigned long)task_stack_page(task); \
812 __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
813 ((struct pt_regs *)__ptr) - 1; \
814})
815
807#ifdef CONFIG_X86_32 816#ifdef CONFIG_X86_32
808/* 817/*
809 * User space process size: 3GB (default). 818 * User space process size: 3GB (default).
@@ -823,23 +832,6 @@ static inline void spin_lock_prefetch(const void *x)
823 .addr_limit = KERNEL_DS, \ 832 .addr_limit = KERNEL_DS, \
824} 833}
825 834
826/*
827 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
828 * This is necessary to guarantee that the entire "struct pt_regs"
829 * is accessible even if the CPU haven't stored the SS/ESP registers
830 * on the stack (interrupt gate does not save these registers
831 * when switching to the same priv ring).
832 * Therefore beware: accessing the ss/esp fields of the
833 * "struct pt_regs" is possible, but they may contain the
834 * completely wrong values.
835 */
836#define task_pt_regs(task) \
837({ \
838 unsigned long __ptr = (unsigned long)task_stack_page(task); \
839 __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
840 ((struct pt_regs *)__ptr) - 1; \
841})
842
843#define KSTK_ESP(task) (task_pt_regs(task)->sp) 835#define KSTK_ESP(task) (task_pt_regs(task)->sp)
844 836
845#else 837#else
@@ -873,11 +865,9 @@ static inline void spin_lock_prefetch(const void *x)
873#define STACK_TOP_MAX TASK_SIZE_MAX 865#define STACK_TOP_MAX TASK_SIZE_MAX
874 866
875#define INIT_THREAD { \ 867#define INIT_THREAD { \
876 .sp0 = TOP_OF_INIT_STACK, \
877 .addr_limit = KERNEL_DS, \ 868 .addr_limit = KERNEL_DS, \
878} 869}
879 870
880#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
881extern unsigned long KSTK_ESP(struct task_struct *task); 871extern unsigned long KSTK_ESP(struct task_struct *task);
882 872
883#endif /* CONFIG_X86_64 */ 873#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index c0e3c45cf6ab..14131dd06b29 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
136#endif 136#endif
137} 137}
138 138
139#ifdef CONFIG_X86_64
140static inline bool user_64bit_mode(struct pt_regs *regs) 139static inline bool user_64bit_mode(struct pt_regs *regs)
141{ 140{
141#ifdef CONFIG_X86_64
142#ifndef CONFIG_PARAVIRT 142#ifndef CONFIG_PARAVIRT
143 /* 143 /*
144 * On non-paravirt systems, this is the only long mode CPL 3 144 * On non-paravirt systems, this is the only long mode CPL 3
@@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
149 /* Headers are too twisted for this to go in paravirt.h. */ 149 /* Headers are too twisted for this to go in paravirt.h. */
150 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; 150 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
151#endif 151#endif
152#else /* !CONFIG_X86_64 */
153 return false;
154#endif
152} 155}
153 156
157#ifdef CONFIG_X86_64
154#define current_user_stack_pointer() current_pt_regs()->sp 158#define current_user_stack_pointer() current_pt_regs()->sp
155#define compat_user_stack_pointer() current_pt_regs()->sp 159#define compat_user_stack_pointer() current_pt_regs()->sp
156#endif 160#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index d8f3a6ae9f6c..f91c365e57c3 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -29,7 +29,7 @@ cc_label: \
29#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ 29#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
30do { \ 30do { \
31 bool c; \ 31 bool c; \
32 asm volatile (fullop ";" CC_SET(cc) \ 32 asm volatile (fullop CC_SET(cc) \
33 : [counter] "+m" (var), CC_OUT(cc) (c) \ 33 : [counter] "+m" (var), CC_OUT(cc) (c) \
34 : __VA_ARGS__ : clobbers); \ 34 : __VA_ARGS__ : clobbers); \
35 return c; \ 35 return c; \
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 899084b70412..8c6bd6863db9 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,6 +2,8 @@
2#ifndef _ASM_X86_SWITCH_TO_H 2#ifndef _ASM_X86_SWITCH_TO_H
3#define _ASM_X86_SWITCH_TO_H 3#define _ASM_X86_SWITCH_TO_H
4 4
5#include <linux/sched/task_stack.h>
6
5struct task_struct; /* one of the stranger aspects of C forward declarations */ 7struct task_struct; /* one of the stranger aspects of C forward declarations */
6 8
7struct task_struct *__switch_to_asm(struct task_struct *prev, 9struct task_struct *__switch_to_asm(struct task_struct *prev,
@@ -73,4 +75,26 @@ do { \
73 ((last) = __switch_to_asm((prev), (next))); \ 75 ((last) = __switch_to_asm((prev), (next))); \
74} while (0) 76} while (0)
75 77
78#ifdef CONFIG_X86_32
79static inline void refresh_sysenter_cs(struct thread_struct *thread)
80{
81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
82 if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
83 return;
84
85 this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
87}
88#endif
89
90/* This is used when switching tasks or entering/exiting vm86 mode. */
91static inline void update_sp0(struct task_struct *task)
92{
93#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0);
95#else
96 load_sp0(task_top_of_stack(task));
97#endif
98}
99
76#endif /* _ASM_X86_SWITCH_TO_H */ 100#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91dfcafe27a6..bad25bb80679 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21asmlinkage long sys_iopl(unsigned int); 21asmlinkage long sys_iopl(unsigned int);
22 22
23/* kernel/ldt.c */ 23/* kernel/ldt.c */
24asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); 24asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
25 25
26/* kernel/signal.c */ 26/* kernel/signal.c */
27asmlinkage long sys_rt_sigreturn(void); 27asmlinkage long sys_rt_sigreturn(void);
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index fa60398bbc3a..069c04be1507 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
34 ) 34 )
35); 35);
36 36
37DEFINE_EVENT(x86_fpu, x86_fpu_state,
38 TP_PROTO(struct fpu *fpu),
39 TP_ARGS(fpu)
40);
41
42DEFINE_EVENT(x86_fpu, x86_fpu_before_save, 37DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
43 TP_PROTO(struct fpu *fpu), 38 TP_PROTO(struct fpu *fpu),
44 TP_ARGS(fpu) 39 TP_ARGS(fpu)
@@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
74 TP_ARGS(fpu) 69 TP_ARGS(fpu)
75); 70);
76 71
77DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
78 TP_PROTO(struct fpu *fpu),
79 TP_ARGS(fpu)
80);
81
82DEFINE_EVENT(x86_fpu, x86_fpu_init_state, 72DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
83 TP_PROTO(struct fpu *fpu), 73 TP_PROTO(struct fpu *fpu),
84 TP_ARGS(fpu) 74 TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index b0cced97a6ce..1fadd310ff68 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void);
38 38
39#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) 39#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
40asmlinkage void xen_divide_error(void); 40asmlinkage void xen_divide_error(void);
41asmlinkage void xen_xennmi(void);
41asmlinkage void xen_xendebug(void); 42asmlinkage void xen_xendebug(void);
42asmlinkage void xen_xenint3(void); 43asmlinkage void xen_xenint3(void);
43asmlinkage void xen_nmi(void);
44asmlinkage void xen_overflow(void); 44asmlinkage void xen_overflow(void);
45asmlinkage void xen_bounds(void); 45asmlinkage void xen_bounds(void);
46asmlinkage void xen_invalid_op(void); 46asmlinkage void xen_invalid_op(void);
@@ -145,4 +145,22 @@ enum {
145 X86_TRAP_IRET = 32, /* 32, IRET Exception */ 145 X86_TRAP_IRET = 32, /* 32, IRET Exception */
146}; 146};
147 147
148/*
149 * Page fault error code bits:
150 *
151 * bit 0 == 0: no page found 1: protection fault
152 * bit 1 == 0: read access 1: write access
153 * bit 2 == 0: kernel-mode access 1: user-mode access
154 * bit 3 == 1: use of reserved bit detected
155 * bit 4 == 1: fault was an instruction fetch
156 * bit 5 == 1: protection keys block access
157 */
158enum x86_pf_error_code {
159 X86_PF_PROT = 1 << 0,
160 X86_PF_WRITE = 1 << 1,
161 X86_PF_USER = 1 << 2,
162 X86_PF_RSVD = 1 << 3,
163 X86_PF_INSTR = 1 << 4,
164 X86_PF_PK = 1 << 5,
165};
148#endif /* _ASM_X86_TRAPS_H */ 166#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 87adc0d38c4a..e9cc6fe1fc6f 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,11 +13,11 @@ struct unwind_state {
13 struct task_struct *task; 13 struct task_struct *task;
14 int graph_idx; 14 int graph_idx;
15 bool error; 15 bool error;
16#if defined(CONFIG_ORC_UNWINDER) 16#if defined(CONFIG_UNWINDER_ORC)
17 bool signal, full_regs; 17 bool signal, full_regs;
18 unsigned long sp, bp, ip; 18 unsigned long sp, bp, ip;
19 struct pt_regs *regs; 19 struct pt_regs *regs;
20#elif defined(CONFIG_FRAME_POINTER_UNWINDER) 20#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
21 bool got_irq; 21 bool got_irq;
22 unsigned long *bp, *orig_sp, ip; 22 unsigned long *bp, *orig_sp, ip;
23 struct pt_regs *regs; 23 struct pt_regs *regs;
@@ -51,7 +51,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
51 __unwind_start(state, task, regs, first_frame); 51 __unwind_start(state, task, regs, first_frame);
52} 52}
53 53
54#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER) 54#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
56{ 56{
57 if (unwind_done(state)) 57 if (unwind_done(state))
@@ -66,7 +66,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
66} 66}
67#endif 67#endif
68 68
69#ifdef CONFIG_ORC_UNWINDER 69#ifdef CONFIG_UNWINDER_ORC
70void unwind_init(void); 70void unwind_init(void);
71void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, 71void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
72 void *orc, size_t orc_size); 72 void *orc, size_t orc_size);
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 6f3355399665..53b4ca55ebb6 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -152,5 +152,8 @@
152#define CX86_ARR_BASE 0xc4 152#define CX86_ARR_BASE 0xc4
153#define CX86_RCR_BASE 0xdc 153#define CX86_RCR_BASE 0xdc
154 154
155#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
156 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
157 X86_CR0_PG)
155 158
156#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */ 159#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5f70044340ff..295abaa58add 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,9 +25,9 @@ endif
25KASAN_SANITIZE_head$(BITS).o := n 25KASAN_SANITIZE_head$(BITS).o := n
26KASAN_SANITIZE_dumpstack.o := n 26KASAN_SANITIZE_dumpstack.o := n
27KASAN_SANITIZE_dumpstack_$(BITS).o := n 27KASAN_SANITIZE_dumpstack_$(BITS).o := n
28KASAN_SANITIZE_stacktrace.o := n 28KASAN_SANITIZE_stacktrace.o := n
29KASAN_SANITIZE_paravirt.o := n
29 30
30OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
31OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y 31OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
32OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y 32OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
33OBJECT_FILES_NON_STANDARD_test_nx.o := y 33OBJECT_FILES_NON_STANDARD_test_nx.o := y
@@ -128,9 +128,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
128obj-$(CONFIG_TRACING) += tracepoint.o 128obj-$(CONFIG_TRACING) += tracepoint.o
129obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o 129obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
130 130
131obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o 131obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
132obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o 132obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
133obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o 133obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
134 134
135### 135###
136# 64 bit specific files 136# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 236999c54edc..90cb82dbba57 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -22,7 +22,8 @@ obj-y += common.o
22obj-y += rdrand.o 22obj-y += rdrand.o
23obj-y += match.o 23obj-y += match.o
24obj-y += bugs.o 24obj-y += bugs.o
25obj-y += aperfmperf.o 25obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
26obj-y += cpuid-deps.o
26 27
27obj-$(CONFIG_PROC_FS) += proc.o 28obj-$(CONFIG_PROC_FS) += proc.o
28obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o 29obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 957813e0180d..0ee83321a313 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -42,6 +42,10 @@ static void aperfmperf_snapshot_khz(void *dummy)
42 s64 time_delta = ktime_ms_delta(now, s->time); 42 s64 time_delta = ktime_ms_delta(now, s->time);
43 unsigned long flags; 43 unsigned long flags;
44 44
45 /* Don't bother re-computing within the cache threshold time. */
46 if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
47 return;
48
45 local_irq_save(flags); 49 local_irq_save(flags);
46 rdmsrl(MSR_IA32_APERF, aperf); 50 rdmsrl(MSR_IA32_APERF, aperf);
47 rdmsrl(MSR_IA32_MPERF, mperf); 51 rdmsrl(MSR_IA32_MPERF, mperf);
@@ -70,7 +74,6 @@ static void aperfmperf_snapshot_khz(void *dummy)
70 74
71unsigned int arch_freq_get_on_cpu(int cpu) 75unsigned int arch_freq_get_on_cpu(int cpu)
72{ 76{
73 s64 time_delta;
74 unsigned int khz; 77 unsigned int khz;
75 78
76 if (!cpu_khz) 79 if (!cpu_khz)
@@ -79,12 +82,6 @@ unsigned int arch_freq_get_on_cpu(int cpu)
79 if (!static_cpu_has(X86_FEATURE_APERFMPERF)) 82 if (!static_cpu_has(X86_FEATURE_APERFMPERF))
80 return 0; 83 return 0;
81 84
82 /* Don't bother re-computing within the cache threshold time. */
83 time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu));
84 khz = per_cpu(samples.khz, cpu);
85 if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
86 return khz;
87
88 smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); 85 smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
89 khz = per_cpu(samples.khz, cpu); 86 khz = per_cpu(samples.khz, cpu);
90 if (khz) 87 if (khz)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..cdf79ab628c2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1301,18 +1301,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
1301 pr_cont(")\n"); 1301 pr_cont(")\n");
1302} 1302}
1303 1303
1304static __init int setup_disablecpuid(char *arg) 1304/*
1305 * clearcpuid= was already parsed in fpu__init_parse_early_param.
1306 * But we need to keep a dummy __setup around otherwise it would
1307 * show up as an environment variable for init.
1308 */
1309static __init int setup_clearcpuid(char *arg)
1305{ 1310{
1306 int bit;
1307
1308 if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
1309 setup_clear_cpu_cap(bit);
1310 else
1311 return 0;
1312
1313 return 1; 1311 return 1;
1314} 1312}
1315__setup("clearcpuid=", setup_disablecpuid); 1313__setup("clearcpuid=", setup_clearcpuid);
1316 1314
1317#ifdef CONFIG_X86_64 1315#ifdef CONFIG_X86_64
1318DEFINE_PER_CPU_FIRST(union irq_stack_union, 1316DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1572,9 +1570,13 @@ void cpu_init(void)
1572 initialize_tlbstate_and_flush(); 1570 initialize_tlbstate_and_flush();
1573 enter_lazy_tlb(&init_mm, me); 1571 enter_lazy_tlb(&init_mm, me);
1574 1572
1575 load_sp0(t, &current->thread); 1573 /*
1574 * Initialize the TSS. Don't bother initializing sp0, as the initial
1575 * task never enters user mode.
1576 */
1576 set_tss_desc(cpu, t); 1577 set_tss_desc(cpu, t);
1577 load_TR_desc(); 1578 load_TR_desc();
1579
1578 load_mm_ldt(&init_mm); 1580 load_mm_ldt(&init_mm);
1579 1581
1580 clear_all_debug_regs(); 1582 clear_all_debug_regs();
@@ -1596,7 +1598,6 @@ void cpu_init(void)
1596 int cpu = smp_processor_id(); 1598 int cpu = smp_processor_id();
1597 struct task_struct *curr = current; 1599 struct task_struct *curr = current;
1598 struct tss_struct *t = &per_cpu(cpu_tss, cpu); 1600 struct tss_struct *t = &per_cpu(cpu_tss, cpu);
1599 struct thread_struct *thread = &curr->thread;
1600 1601
1601 wait_for_master_cpu(cpu); 1602 wait_for_master_cpu(cpu);
1602 1603
@@ -1627,9 +1628,13 @@ void cpu_init(void)
1627 initialize_tlbstate_and_flush(); 1628 initialize_tlbstate_and_flush();
1628 enter_lazy_tlb(&init_mm, curr); 1629 enter_lazy_tlb(&init_mm, curr);
1629 1630
1630 load_sp0(t, thread); 1631 /*
1632 * Initialize the TSS. Don't bother initializing sp0, as the initial
1633 * task never enters user mode.
1634 */
1631 set_tss_desc(cpu, t); 1635 set_tss_desc(cpu, t);
1632 load_TR_desc(); 1636 load_TR_desc();
1637
1633 load_mm_ldt(&init_mm); 1638 load_mm_ldt(&init_mm);
1634 1639
1635 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1640 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 000000000000..904b0a3c4e53
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
1/* Declare dependencies between CPUIDs */
2#include <linux/kernel.h>
3#include <linux/init.h>
4#include <linux/module.h>
5#include <asm/cpufeature.h>
6
7struct cpuid_dep {
8 unsigned int feature;
9 unsigned int depends;
10};
11
12/*
13 * Table of CPUID features that depend on others.
14 *
15 * This only includes dependencies that can be usefully disabled, not
16 * features part of the base set (like FPU).
17 *
18 * Note this all is not __init / __initdata because it can be
19 * called from cpu hotplug. It shouldn't do anything in this case,
20 * but it's difficult to tell that to the init reference checker.
21 */
22const static struct cpuid_dep cpuid_deps[] = {
23 { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
24 { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
25 { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
26 { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
27 { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
28 { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
29 { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
30 { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
31 { X86_FEATURE_XMM, X86_FEATURE_FXSR },
32 { X86_FEATURE_XMM2, X86_FEATURE_XMM },
33 { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
34 { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
35 { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
36 { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
37 { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
38 { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
39 { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
40 { X86_FEATURE_AES, X86_FEATURE_XMM2 },
41 { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
42 { X86_FEATURE_FMA, X86_FEATURE_AVX },
43 { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
44 { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
45 { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
46 { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
47 { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
48 { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
49 { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
50 { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
51 { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
52 { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
53 { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
54 { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
55 { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
56 { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
57 { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
58 { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
59 { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
60 { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
61 { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
62 {}
63};
64
65static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
66{
67 /*
68 * Note: This could use the non atomic __*_bit() variants, but the
69 * rest of the cpufeature code uses atomics as well, so keep it for
70 * consistency. Cleanup all of it separately.
71 */
72 if (!c) {
73 clear_cpu_cap(&boot_cpu_data, feature);
74 set_bit(feature, (unsigned long *)cpu_caps_cleared);
75 } else {
76 clear_bit(feature, (unsigned long *)c->x86_capability);
77 }
78}
79
80/* Take the capabilities and the BUG bits into account */
81#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
82
83static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
84{
85 DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
86 const struct cpuid_dep *d;
87 bool changed;
88
89 if (WARN_ON(feature >= MAX_FEATURE_BITS))
90 return;
91
92 clear_feature(c, feature);
93
94 /* Collect all features to disable, handling dependencies */
95 memset(disable, 0, sizeof(disable));
96 __set_bit(feature, disable);
97
98 /* Loop until we get a stable state. */
99 do {
100 changed = false;
101 for (d = cpuid_deps; d->feature; d++) {
102 if (!test_bit(d->depends, disable))
103 continue;
104 if (__test_and_set_bit(d->feature, disable))
105 continue;
106
107 changed = true;
108 clear_feature(c, d->feature);
109 }
110 } while (changed);
111}
112
113void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
114{
115 do_clear_cpu_cap(c, feature);
116}
117
118void setup_clear_cpu_cap(unsigned int feature)
119{
120 do_clear_cpu_cap(NULL, feature);
121}
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 4378a729b933..6b7e17bf0b71 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -78,11 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
78 seq_printf(m, "microcode\t: 0x%x\n", c->microcode); 78 seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
79 79
80 if (cpu_has(c, X86_FEATURE_TSC)) { 80 if (cpu_has(c, X86_FEATURE_TSC)) {
81 unsigned int freq = arch_freq_get_on_cpu(cpu); 81 unsigned int freq = cpufreq_quick_get(cpu);
82 82
83 if (!freq) 83 if (!freq)
84 freq = cpufreq_quick_get(cpu);
85 if (!freq)
86 freq = cpu_khz; 84 freq = cpu_khz;
87 seq_printf(m, "cpu MHz\t\t: %u.%03u\n", 85 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
88 freq / 1000, (freq % 1000)); 86 freq / 1000, (freq % 1000));
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e3d9a5..6abd83572b01 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
249 */ 249 */
250static void __init fpu__init_parse_early_param(void) 250static void __init fpu__init_parse_early_param(void)
251{ 251{
252 char arg[32];
253 char *argptr = arg;
254 int bit;
255
252 if (cmdline_find_option_bool(boot_command_line, "no387")) 256 if (cmdline_find_option_bool(boot_command_line, "no387"))
253 setup_clear_cpu_cap(X86_FEATURE_FPU); 257 setup_clear_cpu_cap(X86_FEATURE_FPU);
254 258
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
266 270
267 if (cmdline_find_option_bool(boot_command_line, "noxsaves")) 271 if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
268 setup_clear_cpu_cap(X86_FEATURE_XSAVES); 272 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
273
274 if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
275 sizeof(arg)) &&
276 get_option(&argptr, &bit) &&
277 bit >= 0 &&
278 bit < NCAPINTS * 32)
279 setup_clear_cpu_cap(bit);
269} 280}
270 281
271/* 282/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476c9022..87a57b7642d3 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
15#include <asm/fpu/xstate.h> 15#include <asm/fpu/xstate.h>
16 16
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/cpufeature.h>
18 19
19/* 20/*
20 * Although we spell it out in here, the Processor Trace 21 * Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
36 "unknown xstate feature" , 37 "unknown xstate feature" ,
37}; 38};
38 39
40static short xsave_cpuid_features[] __initdata = {
41 X86_FEATURE_FPU,
42 X86_FEATURE_XMM,
43 X86_FEATURE_AVX,
44 X86_FEATURE_MPX,
45 X86_FEATURE_MPX,
46 X86_FEATURE_AVX512F,
47 X86_FEATURE_AVX512F,
48 X86_FEATURE_AVX512F,
49 X86_FEATURE_INTEL_PT,
50 X86_FEATURE_PKU,
51};
52
39/* 53/*
40 * Mask of xstate features supported by the CPU and the kernel: 54 * Mask of xstate features supported by the CPU and the kernel:
41 */ 55 */
@@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
59void fpu__xstate_clear_all_cpu_caps(void) 73void fpu__xstate_clear_all_cpu_caps(void)
60{ 74{
61 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 75 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
62 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
63 setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
64 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
65 setup_clear_cpu_cap(X86_FEATURE_AVX);
66 setup_clear_cpu_cap(X86_FEATURE_AVX2);
67 setup_clear_cpu_cap(X86_FEATURE_AVX512F);
68 setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
69 setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
70 setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
71 setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
72 setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
73 setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
74 setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
75 setup_clear_cpu_cap(X86_FEATURE_MPX);
76 setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
77 setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
78 setup_clear_cpu_cap(X86_FEATURE_PKU);
79 setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
80 setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
81 setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
82} 76}
83 77
84/* 78/*
@@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
726 unsigned int eax, ebx, ecx, edx; 720 unsigned int eax, ebx, ecx, edx;
727 static int on_boot_cpu __initdata = 1; 721 static int on_boot_cpu __initdata = 1;
728 int err; 722 int err;
723 int i;
729 724
730 WARN_ON_FPU(!on_boot_cpu); 725 WARN_ON_FPU(!on_boot_cpu);
731 on_boot_cpu = 0; 726 on_boot_cpu = 0;
@@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
759 goto out_disable; 754 goto out_disable;
760 } 755 }
761 756
757 /*
758 * Clear XSAVE features that are disabled in the normal CPUID.
759 */
760 for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
761 if (!boot_cpu_has(xsave_cpuid_features[i]))
762 xfeatures_mask &= ~BIT(i);
763 }
764
762 xfeatures_mask &= fpu__get_supported_xfeatures_mask(); 765 xfeatures_mask &= fpu__get_supported_xfeatures_mask();
763 766
764 /* Enable xstate instructions to be able to continue with initialization: */ 767 /* Enable xstate instructions to be able to continue with initialization: */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f1d528bb66a6..c29020907886 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -212,9 +212,6 @@ ENTRY(startup_32_smp)
212#endif 212#endif
213 213
214.Ldefault_entry: 214.Ldefault_entry:
215#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
216 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
217 X86_CR0_PG)
218 movl $(CR0_STATE & ~X86_CR0_PG),%eax 215 movl $(CR0_STATE & ~X86_CR0_PG),%eax
219 movl %eax,%cr0 216 movl %eax,%cr0
220 217
@@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array)
402 # 24(%rsp) error code 399 # 24(%rsp) error code
403 i = 0 400 i = 0
404 .rept NUM_EXCEPTION_VECTORS 401 .rept NUM_EXCEPTION_VECTORS
405 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 402 .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
406 pushl $0 # Dummy error code, to make stack frame uniform 403 pushl $0 # Dummy error code, to make stack frame uniform
407 .endif 404 .endif
408 pushl $i # 20(%esp) Vector number 405 pushl $i # 20(%esp) Vector number
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6dde3f3fc1f8..7dca675fe78d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,11 +38,12 @@
38 * 38 *
39 */ 39 */
40 40
41#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
42#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 41#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
43 42
43#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
44PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) 44PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
45PGD_START_KERNEL = pgd_index(__START_KERNEL_map) 45PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
46#endif
46L3_START_KERNEL = pud_index(__START_KERNEL_map) 47L3_START_KERNEL = pud_index(__START_KERNEL_map)
47 48
48 .text 49 .text
@@ -50,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
50 .code64 51 .code64
51 .globl startup_64 52 .globl startup_64
52startup_64: 53startup_64:
54 UNWIND_HINT_EMPTY
53 /* 55 /*
54 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, 56 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
55 * and someone has loaded an identity mapped page table 57 * and someone has loaded an identity mapped page table
@@ -89,6 +91,7 @@ startup_64:
89 addq $(early_top_pgt - __START_KERNEL_map), %rax 91 addq $(early_top_pgt - __START_KERNEL_map), %rax
90 jmp 1f 92 jmp 1f
91ENTRY(secondary_startup_64) 93ENTRY(secondary_startup_64)
94 UNWIND_HINT_EMPTY
92 /* 95 /*
93 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, 96 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
94 * and someone has loaded a mapped page table. 97 * and someone has loaded a mapped page table.
@@ -133,6 +136,7 @@ ENTRY(secondary_startup_64)
133 movq $1f, %rax 136 movq $1f, %rax
134 jmp *%rax 137 jmp *%rax
1351: 1381:
139 UNWIND_HINT_EMPTY
136 140
137 /* Check if nx is implemented */ 141 /* Check if nx is implemented */
138 movl $0x80000001, %eax 142 movl $0x80000001, %eax
@@ -150,9 +154,6 @@ ENTRY(secondary_startup_64)
1501: wrmsr /* Make changes effective */ 1541: wrmsr /* Make changes effective */
151 155
152 /* Setup cr0 */ 156 /* Setup cr0 */
153#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
154 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
155 X86_CR0_PG)
156 movl $CR0_STATE, %eax 157 movl $CR0_STATE, %eax
157 /* Make changes effective */ 158 /* Make changes effective */
158 movq %rax, %cr0 159 movq %rax, %cr0
@@ -235,7 +236,7 @@ ENTRY(secondary_startup_64)
235 pushq %rax # target address in negative space 236 pushq %rax # target address in negative space
236 lretq 237 lretq
237.Lafter_lret: 238.Lafter_lret:
238ENDPROC(secondary_startup_64) 239END(secondary_startup_64)
239 240
240#include "verify_cpu.S" 241#include "verify_cpu.S"
241 242
@@ -247,6 +248,7 @@ ENDPROC(secondary_startup_64)
247 */ 248 */
248ENTRY(start_cpu0) 249ENTRY(start_cpu0)
249 movq initial_stack(%rip), %rsp 250 movq initial_stack(%rip), %rsp
251 UNWIND_HINT_EMPTY
250 jmp .Ljump_to_C_code 252 jmp .Ljump_to_C_code
251ENDPROC(start_cpu0) 253ENDPROC(start_cpu0)
252#endif 254#endif
@@ -266,26 +268,24 @@ ENDPROC(start_cpu0)
266 .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS 268 .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
267 __FINITDATA 269 __FINITDATA
268 270
269bad_address:
270 jmp bad_address
271
272 __INIT 271 __INIT
273ENTRY(early_idt_handler_array) 272ENTRY(early_idt_handler_array)
274 # 104(%rsp) %rflags
275 # 96(%rsp) %cs
276 # 88(%rsp) %rip
277 # 80(%rsp) error code
278 i = 0 273 i = 0
279 .rept NUM_EXCEPTION_VECTORS 274 .rept NUM_EXCEPTION_VECTORS
280 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 275 .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
281 pushq $0 # Dummy error code, to make stack frame uniform 276 UNWIND_HINT_IRET_REGS
277 pushq $0 # Dummy error code, to make stack frame uniform
278 .else
279 UNWIND_HINT_IRET_REGS offset=8
282 .endif 280 .endif
283 pushq $i # 72(%rsp) Vector number 281 pushq $i # 72(%rsp) Vector number
284 jmp early_idt_handler_common 282 jmp early_idt_handler_common
283 UNWIND_HINT_IRET_REGS
285 i = i + 1 284 i = i + 1
286 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc 285 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
287 .endr 286 .endr
288ENDPROC(early_idt_handler_array) 287 UNWIND_HINT_IRET_REGS offset=16
288END(early_idt_handler_array)
289 289
290early_idt_handler_common: 290early_idt_handler_common:
291 /* 291 /*
@@ -313,6 +313,7 @@ early_idt_handler_common:
313 pushq %r13 /* pt_regs->r13 */ 313 pushq %r13 /* pt_regs->r13 */
314 pushq %r14 /* pt_regs->r14 */ 314 pushq %r14 /* pt_regs->r14 */
315 pushq %r15 /* pt_regs->r15 */ 315 pushq %r15 /* pt_regs->r15 */
316 UNWIND_HINT_REGS
316 317
317 cmpq $14,%rsi /* Page fault? */ 318 cmpq $14,%rsi /* Page fault? */
318 jnz 10f 319 jnz 10f
@@ -327,8 +328,8 @@ early_idt_handler_common:
327 328
32820: 32920:
329 decl early_recursion_flag(%rip) 330 decl early_recursion_flag(%rip)
330 jmp restore_regs_and_iret 331 jmp restore_regs_and_return_to_kernel
331ENDPROC(early_idt_handler_common) 332END(early_idt_handler_common)
332 333
333 __INITDATA 334 __INITDATA
334 335
@@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
362 363
363 .data 364 .data
364 365
365#ifndef CONFIG_XEN 366#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
366NEXT_PAGE(init_top_pgt)
367 .fill 512,8,0
368#else
369NEXT_PAGE(init_top_pgt) 367NEXT_PAGE(init_top_pgt)
370 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 368 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
371 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 369 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
@@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
382 * Don't set NX because code runs from these pages. 380 * Don't set NX because code runs from these pages.
383 */ 381 */
384 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 382 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
383#else
384NEXT_PAGE(init_top_pgt)
385 .fill 512,8,0
385#endif 386#endif
386 387
387#ifdef CONFIG_X86_5LEVEL 388#ifdef CONFIG_X86_5LEVEL
@@ -435,7 +436,7 @@ ENTRY(phys_base)
435EXPORT_SYMBOL(phys_base) 436EXPORT_SYMBOL(phys_base)
436 437
437#include "../../x86/xen/xen-head.S" 438#include "../../x86/xen/xen-head.S"
438 439
439 __PAGE_ALIGNED_BSS 440 __PAGE_ALIGNED_BSS
440NEXT_PAGE(empty_zero_page) 441NEXT_PAGE(empty_zero_page)
441 .skip PAGE_SIZE 442 .skip PAGE_SIZE
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 6107ee1cb8d5..014cb2fc47ff 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -92,8 +92,6 @@ static const __initdata struct idt_data def_idts[] = {
92 INTG(X86_TRAP_DF, double_fault), 92 INTG(X86_TRAP_DF, double_fault),
93#endif 93#endif
94 INTG(X86_TRAP_DB, debug), 94 INTG(X86_TRAP_DB, debug),
95 INTG(X86_TRAP_NMI, nmi),
96 INTG(X86_TRAP_BP, int3),
97 95
98#ifdef CONFIG_X86_MCE 96#ifdef CONFIG_X86_MCE
99 INTG(X86_TRAP_MC, &machine_check), 97 INTG(X86_TRAP_MC, &machine_check),
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 4d17bacf4030..ae5615b03def 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -13,6 +13,7 @@
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/syscalls.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
18#include <linux/uaccess.h> 19#include <linux/uaccess.h>
@@ -295,8 +296,8 @@ out:
295 return error; 296 return error;
296} 297}
297 298
298asmlinkage int sys_modify_ldt(int func, void __user *ptr, 299SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
299 unsigned long bytecount) 300 unsigned long , bytecount)
300{ 301{
301 int ret = -ENOSYS; 302 int ret = -ENOSYS;
302 303
@@ -314,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
314 ret = write_ldt(ptr, bytecount, 0); 315 ret = write_ldt(ptr, bytecount, 0);
315 break; 316 break;
316 } 317 }
317 return ret; 318 /*
319 * The SYSCALL_DEFINE() macros give us an 'unsigned long'
320 * return type, but tht ABI for sys_modify_ldt() expects
321 * 'int'. This cast gives us an int-sized value in %rax
322 * for the return code. The 'unsigned' is necessary so
323 * the compiler does not try to sign-extend the negative
324 * return codes into the high half of the register when
325 * taking the value from int->long.
326 */
327 return (unsigned int)ret;
318} 328}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c67685337c5a..97fb3e5737f5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -49,7 +49,13 @@
49 */ 49 */
50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { 50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
51 .x86_tss = { 51 .x86_tss = {
52 .sp0 = TOP_OF_INIT_STACK, 52 /*
53 * .sp0 is only used when entering ring 0 from a lower
54 * privilege level. Since the init task never runs anything
55 * but ring 0 code, there is no need for a valid value here.
56 * Poison it.
57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
53#ifdef CONFIG_X86_32 59#ifdef CONFIG_X86_32
54 .ss0 = __KERNEL_DS, 60 .ss0 = __KERNEL_DS,
55 .ss1 = __KERNEL_CS, 61 .ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 11966251cd42..45bf0c5f93e1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
284 284
285 /* 285 /*
286 * Reload esp0 and cpu_current_top_of_stack. This changes 286 * Reload esp0 and cpu_current_top_of_stack. This changes
287 * current_thread_info(). 287 * current_thread_info(). Refresh the SYSENTER configuration in
288 * case prev or next is vm86.
288 */ 289 */
289 load_sp0(tss, next); 290 update_sp0(next_p);
291 refresh_sysenter_cs(next);
290 this_cpu_write(cpu_current_top_of_stack, 292 this_cpu_write(cpu_current_top_of_stack,
291 (unsigned long)task_stack_page(next_p) + 293 (unsigned long)task_stack_page(next_p) +
292 THREAD_SIZE); 294 THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 302e7b2572d1..eeeb34f85c25 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
274 struct inactive_task_frame *frame; 274 struct inactive_task_frame *frame;
275 struct task_struct *me = current; 275 struct task_struct *me = current;
276 276
277 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
278 childregs = task_pt_regs(p); 277 childregs = task_pt_regs(p);
279 fork_frame = container_of(childregs, struct fork_frame, regs); 278 fork_frame = container_of(childregs, struct fork_frame, regs);
280 frame = &fork_frame->frame; 279 frame = &fork_frame->frame;
@@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
464 */ 463 */
465 this_cpu_write(current_task, next_p); 464 this_cpu_write(current_task, next_p);
466 465
467 /* Reload esp0 and ss1. This changes current_thread_info(). */ 466 /* Reload sp0. */
468 load_sp0(tss, next); 467 update_sp0(next_p);
469 468
470 /* 469 /*
471 * Now maybe reload the debug registers and handle I/O bitmaps 470 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ad59edd84de7..d56c1d209283 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -194,6 +194,12 @@ static void smp_callin(void)
194 smp_store_cpu_info(cpuid); 194 smp_store_cpu_info(cpuid);
195 195
196 /* 196 /*
197 * The topology information must be up to date before
198 * calibrate_delay() and notify_cpu_starting().
199 */
200 set_cpu_sibling_map(raw_smp_processor_id());
201
202 /*
197 * Get our bogomips. 203 * Get our bogomips.
198 * Update loops_per_jiffy in cpu_data. Previous call to 204 * Update loops_per_jiffy in cpu_data. Previous call to
199 * smp_store_cpu_info() stored a value that is close but not as 205 * smp_store_cpu_info() stored a value that is close but not as
@@ -203,11 +209,6 @@ static void smp_callin(void)
203 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; 209 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
204 pr_debug("Stack at about %p\n", &cpuid); 210 pr_debug("Stack at about %p\n", &cpuid);
205 211
206 /*
207 * This must be done before setting cpu_online_mask
208 * or calling notify_cpu_starting.
209 */
210 set_cpu_sibling_map(raw_smp_processor_id());
211 wmb(); 212 wmb();
212 213
213 notify_cpu_starting(cpuid); 214 notify_cpu_starting(cpuid);
@@ -961,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
961#ifdef CONFIG_X86_32 962#ifdef CONFIG_X86_32
962 /* Stack for startup_32 can be just as for start_secondary onwards */ 963 /* Stack for startup_32 can be just as for start_secondary onwards */
963 irq_ctx_init(cpu); 964 irq_ctx_init(cpu);
964 per_cpu(cpu_current_top_of_stack, cpu) = 965 per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
965 (unsigned long)task_stack_page(idle) + THREAD_SIZE;
966#else 966#else
967 initial_gs = per_cpu_offset(cpu); 967 initial_gs = per_cpu_offset(cpu);
968#endif 968#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 67db4f43309e..d366adfc61da 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
141 * will catch asm bugs and any attempt to use ist_preempt_enable 141 * will catch asm bugs and any attempt to use ist_preempt_enable
142 * from double_fault. 142 * from double_fault.
143 */ 143 */
144 BUG_ON((unsigned long)(current_top_of_stack() - 144 BUG_ON(!on_thread_stack());
145 current_stack_pointer) >= THREAD_SIZE);
146 145
147 preempt_enable_no_resched(); 146 preempt_enable_no_resched();
148} 147}
@@ -209,9 +208,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
209 if (fixup_exception(regs, trapnr)) 208 if (fixup_exception(regs, trapnr))
210 return 0; 209 return 0;
211 210
212 if (fixup_bug(regs, trapnr))
213 return 0;
214
215 tsk->thread.error_code = error_code; 211 tsk->thread.error_code = error_code;
216 tsk->thread.trap_nr = trapnr; 212 tsk->thread.trap_nr = trapnr;
217 die(str, regs, error_code); 213 die(str, regs, error_code);
@@ -292,6 +288,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
292 288
293 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 289 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
294 290
291 /*
292 * WARN*()s end up here; fix them up before we call the
293 * notifier chain.
294 */
295 if (!user_mode(regs) && fixup_bug(regs, trapnr))
296 return;
297
295 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != 298 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
296 NOTIFY_STOP) { 299 NOTIFY_STOP) {
297 cond_local_irq_enable(regs); 300 cond_local_irq_enable(regs);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 796d96bb0821..ad2b925a808e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1346,12 +1346,10 @@ void __init tsc_init(void)
1346unsigned long calibrate_delay_is_known(void) 1346unsigned long calibrate_delay_is_known(void)
1347{ 1347{
1348 int sibling, cpu = smp_processor_id(); 1348 int sibling, cpu = smp_processor_id();
1349 struct cpumask *mask = topology_core_cpumask(cpu); 1349 int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
1350 const struct cpumask *mask = topology_core_cpumask(cpu);
1350 1351
1351 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) 1352 if (tsc_disabled || !constant_tsc || !mask)
1352 return 0;
1353
1354 if (!mask)
1355 return 0; 1353 return 0;
1356 1354
1357 sibling = cpumask_any_but(mask, cpu); 1355 sibling = cpumask_any_but(mask, cpu);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index b95007e7c1b3..a3f973b2c97a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -279,7 +279,7 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
279 if (!stack_access_ok(state, addr, sizeof(long))) 279 if (!stack_access_ok(state, addr, sizeof(long)))
280 return false; 280 return false;
281 281
282 *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr); 282 *val = READ_ONCE_NOCHECK(*(unsigned long *)addr);
283 return true; 283 return true;
284} 284}
285 285
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 014ea59aa153..3d3c2f71f617 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -33,7 +33,7 @@
33#include <asm/cpufeatures.h> 33#include <asm/cpufeatures.h>
34#include <asm/msr-index.h> 34#include <asm/msr-index.h>
35 35
36verify_cpu: 36ENTRY(verify_cpu)
37 pushf # Save caller passed flags 37 pushf # Save caller passed flags
38 push $0 # Kill any dangerous flags 38 push $0 # Kill any dangerous flags
39 popf 39 popf
@@ -139,3 +139,4 @@ verify_cpu:
139 popf # Restore caller passed flags 139 popf # Restore caller passed flags
140 xorl %eax, %eax 140 xorl %eax, %eax
141 ret 141 ret
142ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 68244742ecb0..5edb27f1a2c4 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -55,6 +55,7 @@
55#include <asm/irq.h> 55#include <asm/irq.h>
56#include <asm/traps.h> 56#include <asm/traps.h>
57#include <asm/vm86.h> 57#include <asm/vm86.h>
58#include <asm/switch_to.h>
58 59
59/* 60/*
60 * Known problems: 61 * Known problems:
@@ -94,7 +95,6 @@
94 95
95void save_v86_state(struct kernel_vm86_regs *regs, int retval) 96void save_v86_state(struct kernel_vm86_regs *regs, int retval)
96{ 97{
97 struct tss_struct *tss;
98 struct task_struct *tsk = current; 98 struct task_struct *tsk = current;
99 struct vm86plus_struct __user *user; 99 struct vm86plus_struct __user *user;
100 struct vm86 *vm86 = current->thread.vm86; 100 struct vm86 *vm86 = current->thread.vm86;
@@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
146 do_exit(SIGSEGV); 146 do_exit(SIGSEGV);
147 } 147 }
148 148
149 tss = &per_cpu(cpu_tss, get_cpu()); 149 preempt_disable();
150 tsk->thread.sp0 = vm86->saved_sp0; 150 tsk->thread.sp0 = vm86->saved_sp0;
151 tsk->thread.sysenter_cs = __KERNEL_CS; 151 tsk->thread.sysenter_cs = __KERNEL_CS;
152 load_sp0(tss, &tsk->thread); 152 update_sp0(tsk);
153 refresh_sysenter_cs(&tsk->thread);
153 vm86->saved_sp0 = 0; 154 vm86->saved_sp0 = 0;
154 put_cpu(); 155 preempt_enable();
155 156
156 memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs)); 157 memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
157 158
@@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
237 238
238static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) 239static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
239{ 240{
240 struct tss_struct *tss;
241 struct task_struct *tsk = current; 241 struct task_struct *tsk = current;
242 struct vm86 *vm86 = tsk->thread.vm86; 242 struct vm86 *vm86 = tsk->thread.vm86;
243 struct kernel_vm86_regs vm86regs; 243 struct kernel_vm86_regs vm86regs;
@@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
365 vm86->saved_sp0 = tsk->thread.sp0; 365 vm86->saved_sp0 = tsk->thread.sp0;
366 lazy_save_gs(vm86->regs32.gs); 366 lazy_save_gs(vm86->regs32.gs);
367 367
368 tss = &per_cpu(cpu_tss, get_cpu());
369 /* make room for real-mode segments */ 368 /* make room for real-mode segments */
369 preempt_disable();
370 tsk->thread.sp0 += 16; 370 tsk->thread.sp0 += 16;
371 371
372 if (static_cpu_has(X86_FEATURE_SEP)) 372 if (static_cpu_has(X86_FEATURE_SEP)) {
373 tsk->thread.sysenter_cs = 0; 373 tsk->thread.sysenter_cs = 0;
374 refresh_sysenter_cs(&tsk->thread);
375 }
374 376
375 load_sp0(tss, &tsk->thread); 377 update_sp0(tsk);
376 put_cpu(); 378 preempt_enable();
377 379
378 if (vm86->flags & VM86_SCREEN_BITMAP) 380 if (vm86->flags & VM86_SCREEN_BITMAP)
379 mark_screen_rdonly(tsk->mm); 381 mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b0ff378650a9..3109ba6c6ede 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -30,26 +30,6 @@
30#include <asm/trace/exceptions.h> 30#include <asm/trace/exceptions.h>
31 31
32/* 32/*
33 * Page fault error code bits:
34 *
35 * bit 0 == 0: no page found 1: protection fault
36 * bit 1 == 0: read access 1: write access
37 * bit 2 == 0: kernel-mode access 1: user-mode access
38 * bit 3 == 1: use of reserved bit detected
39 * bit 4 == 1: fault was an instruction fetch
40 * bit 5 == 1: protection keys block access
41 */
42enum x86_pf_error_code {
43
44 PF_PROT = 1 << 0,
45 PF_WRITE = 1 << 1,
46 PF_USER = 1 << 2,
47 PF_RSVD = 1 << 3,
48 PF_INSTR = 1 << 4,
49 PF_PK = 1 << 5,
50};
51
52/*
53 * Returns 0 if mmiotrace is disabled, or if the fault is not 33 * Returns 0 if mmiotrace is disabled, or if the fault is not
54 * handled by mmiotrace: 34 * handled by mmiotrace:
55 */ 35 */
@@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
150 * If it was a exec (instruction fetch) fault on NX page, then 130 * If it was a exec (instruction fetch) fault on NX page, then
151 * do not ignore the fault: 131 * do not ignore the fault:
152 */ 132 */
153 if (error_code & PF_INSTR) 133 if (error_code & X86_PF_INSTR)
154 return 0; 134 return 0;
155 135
156 instr = (void *)convert_ip_to_linear(current, regs); 136 instr = (void *)convert_ip_to_linear(current, regs);
@@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
180 * siginfo so userspace can discover which protection key was set 160 * siginfo so userspace can discover which protection key was set
181 * on the PTE. 161 * on the PTE.
182 * 162 *
183 * If we get here, we know that the hardware signaled a PF_PK 163 * If we get here, we know that the hardware signaled a X86_PF_PK
184 * fault and that there was a VMA once we got in the fault 164 * fault and that there was a VMA once we got in the fault
185 * handler. It does *not* guarantee that the VMA we find here 165 * handler. It does *not* guarantee that the VMA we find here
186 * was the one that we faulted on. 166 * was the one that we faulted on.
@@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
205 /* 185 /*
206 * force_sig_info_fault() is called from a number of 186 * force_sig_info_fault() is called from a number of
207 * contexts, some of which have a VMA and some of which 187 * contexts, some of which have a VMA and some of which
208 * do not. The PF_PK handing happens after we have a 188 * do not. The X86_PF_PK handing happens after we have a
209 * valid VMA, so we should never reach this without a 189 * valid VMA, so we should never reach this without a
210 * valid VMA. 190 * valid VMA.
211 */ 191 */
@@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
698 if (!oops_may_print()) 678 if (!oops_may_print())
699 return; 679 return;
700 680
701 if (error_code & PF_INSTR) { 681 if (error_code & X86_PF_INSTR) {
702 unsigned int level; 682 unsigned int level;
703 pgd_t *pgd; 683 pgd_t *pgd;
704 pte_t *pte; 684 pte_t *pte;
@@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
780 */ 760 */
781 if (current->thread.sig_on_uaccess_err && signal) { 761 if (current->thread.sig_on_uaccess_err && signal) {
782 tsk->thread.trap_nr = X86_TRAP_PF; 762 tsk->thread.trap_nr = X86_TRAP_PF;
783 tsk->thread.error_code = error_code | PF_USER; 763 tsk->thread.error_code = error_code | X86_PF_USER;
784 tsk->thread.cr2 = address; 764 tsk->thread.cr2 = address;
785 765
786 /* XXX: hwpoison faults will set the wrong code. */ 766 /* XXX: hwpoison faults will set the wrong code. */
@@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
898 struct task_struct *tsk = current; 878 struct task_struct *tsk = current;
899 879
900 /* User mode accesses just cause a SIGSEGV */ 880 /* User mode accesses just cause a SIGSEGV */
901 if (error_code & PF_USER) { 881 if (error_code & X86_PF_USER) {
902 /* 882 /*
903 * It's possible to have interrupts off here: 883 * It's possible to have interrupts off here:
904 */ 884 */
@@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
919 * Instruction fetch faults in the vsyscall page might need 899 * Instruction fetch faults in the vsyscall page might need
920 * emulation. 900 * emulation.
921 */ 901 */
922 if (unlikely((error_code & PF_INSTR) && 902 if (unlikely((error_code & X86_PF_INSTR) &&
923 ((address & ~0xfff) == VSYSCALL_ADDR))) { 903 ((address & ~0xfff) == VSYSCALL_ADDR))) {
924 if (emulate_vsyscall(regs, address)) 904 if (emulate_vsyscall(regs, address))
925 return; 905 return;
@@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
932 * are always protection faults. 912 * are always protection faults.
933 */ 913 */
934 if (address >= TASK_SIZE_MAX) 914 if (address >= TASK_SIZE_MAX)
935 error_code |= PF_PROT; 915 error_code |= X86_PF_PROT;
936 916
937 if (likely(show_unhandled_signals)) 917 if (likely(show_unhandled_signals))
938 show_signal_msg(regs, error_code, address, tsk); 918 show_signal_msg(regs, error_code, address, tsk);
@@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
993 973
994 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 974 if (!boot_cpu_has(X86_FEATURE_OSPKE))
995 return false; 975 return false;
996 if (error_code & PF_PK) 976 if (error_code & X86_PF_PK)
997 return true; 977 return true;
998 /* this checks permission keys on the VMA: */ 978 /* this checks permission keys on the VMA: */
999 if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), 979 if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
1000 (error_code & PF_INSTR), foreign)) 980 (error_code & X86_PF_INSTR), foreign))
1001 return true; 981 return true;
1002 return false; 982 return false;
1003} 983}
@@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
1025 int code = BUS_ADRERR; 1005 int code = BUS_ADRERR;
1026 1006
1027 /* Kernel mode? Handle exceptions or die: */ 1007 /* Kernel mode? Handle exceptions or die: */
1028 if (!(error_code & PF_USER)) { 1008 if (!(error_code & X86_PF_USER)) {
1029 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); 1009 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
1030 return; 1010 return;
1031 } 1011 }
@@ -1053,14 +1033,14 @@ static noinline void
1053mm_fault_error(struct pt_regs *regs, unsigned long error_code, 1033mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1054 unsigned long address, u32 *pkey, unsigned int fault) 1034 unsigned long address, u32 *pkey, unsigned int fault)
1055{ 1035{
1056 if (fatal_signal_pending(current) && !(error_code & PF_USER)) { 1036 if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
1057 no_context(regs, error_code, address, 0, 0); 1037 no_context(regs, error_code, address, 0, 0);
1058 return; 1038 return;
1059 } 1039 }
1060 1040
1061 if (fault & VM_FAULT_OOM) { 1041 if (fault & VM_FAULT_OOM) {
1062 /* Kernel mode? Handle exceptions or die: */ 1042 /* Kernel mode? Handle exceptions or die: */
1063 if (!(error_code & PF_USER)) { 1043 if (!(error_code & X86_PF_USER)) {
1064 no_context(regs, error_code, address, 1044 no_context(regs, error_code, address,
1065 SIGSEGV, SEGV_MAPERR); 1045 SIGSEGV, SEGV_MAPERR);
1066 return; 1046 return;
@@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1085 1065
1086static int spurious_fault_check(unsigned long error_code, pte_t *pte) 1066static int spurious_fault_check(unsigned long error_code, pte_t *pte)
1087{ 1067{
1088 if ((error_code & PF_WRITE) && !pte_write(*pte)) 1068 if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
1089 return 0; 1069 return 0;
1090 1070
1091 if ((error_code & PF_INSTR) && !pte_exec(*pte)) 1071 if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
1092 return 0; 1072 return 0;
1093 /* 1073 /*
1094 * Note: We do not do lazy flushing on protection key 1074 * Note: We do not do lazy flushing on protection key
1095 * changes, so no spurious fault will ever set PF_PK. 1075 * changes, so no spurious fault will ever set X86_PF_PK.
1096 */ 1076 */
1097 if ((error_code & PF_PK)) 1077 if ((error_code & X86_PF_PK))
1098 return 1; 1078 return 1;
1099 1079
1100 return 1; 1080 return 1;
@@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
1140 * change, so user accesses are not expected to cause spurious 1120 * change, so user accesses are not expected to cause spurious
1141 * faults. 1121 * faults.
1142 */ 1122 */
1143 if (error_code != (PF_WRITE | PF_PROT) 1123 if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
1144 && error_code != (PF_INSTR | PF_PROT)) 1124 error_code != (X86_PF_INSTR | X86_PF_PROT))
1145 return 0; 1125 return 0;
1146 1126
1147 pgd = init_mm.pgd + pgd_index(address); 1127 pgd = init_mm.pgd + pgd_index(address);
@@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
1201 * always an unconditional error and can never result in 1181 * always an unconditional error and can never result in
1202 * a follow-up action to resolve the fault, like a COW. 1182 * a follow-up action to resolve the fault, like a COW.
1203 */ 1183 */
1204 if (error_code & PF_PK) 1184 if (error_code & X86_PF_PK)
1205 return 1; 1185 return 1;
1206 1186
1207 /* 1187 /*
1208 * Make sure to check the VMA so that we do not perform 1188 * Make sure to check the VMA so that we do not perform
1209 * faults just to hit a PF_PK as soon as we fill in a 1189 * faults just to hit a X86_PF_PK as soon as we fill in a
1210 * page. 1190 * page.
1211 */ 1191 */
1212 if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), 1192 if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
1213 (error_code & PF_INSTR), foreign)) 1193 (error_code & X86_PF_INSTR), foreign))
1214 return 1; 1194 return 1;
1215 1195
1216 if (error_code & PF_WRITE) { 1196 if (error_code & X86_PF_WRITE) {
1217 /* write, present and write, not present: */ 1197 /* write, present and write, not present: */
1218 if (unlikely(!(vma->vm_flags & VM_WRITE))) 1198 if (unlikely(!(vma->vm_flags & VM_WRITE)))
1219 return 1; 1199 return 1;
@@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
1221 } 1201 }
1222 1202
1223 /* read, present: */ 1203 /* read, present: */
1224 if (unlikely(error_code & PF_PROT)) 1204 if (unlikely(error_code & X86_PF_PROT))
1225 return 1; 1205 return 1;
1226 1206
1227 /* read, not present: */ 1207 /* read, not present: */
@@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
1244 if (!static_cpu_has(X86_FEATURE_SMAP)) 1224 if (!static_cpu_has(X86_FEATURE_SMAP))
1245 return false; 1225 return false;
1246 1226
1247 if (error_code & PF_USER) 1227 if (error_code & X86_PF_USER)
1248 return false; 1228 return false;
1249 1229
1250 if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) 1230 if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
@@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1297 * protection error (error_code & 9) == 0. 1277 * protection error (error_code & 9) == 0.
1298 */ 1278 */
1299 if (unlikely(fault_in_kernel_space(address))) { 1279 if (unlikely(fault_in_kernel_space(address))) {
1300 if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { 1280 if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
1301 if (vmalloc_fault(address) >= 0) 1281 if (vmalloc_fault(address) >= 0)
1302 return; 1282 return;
1303 1283
@@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1325 if (unlikely(kprobes_fault(regs))) 1305 if (unlikely(kprobes_fault(regs)))
1326 return; 1306 return;
1327 1307
1328 if (unlikely(error_code & PF_RSVD)) 1308 if (unlikely(error_code & X86_PF_RSVD))
1329 pgtable_bad(regs, error_code, address); 1309 pgtable_bad(regs, error_code, address);
1330 1310
1331 if (unlikely(smap_violation(error_code, regs))) { 1311 if (unlikely(smap_violation(error_code, regs))) {
@@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1351 */ 1331 */
1352 if (user_mode(regs)) { 1332 if (user_mode(regs)) {
1353 local_irq_enable(); 1333 local_irq_enable();
1354 error_code |= PF_USER; 1334 error_code |= X86_PF_USER;
1355 flags |= FAULT_FLAG_USER; 1335 flags |= FAULT_FLAG_USER;
1356 } else { 1336 } else {
1357 if (regs->flags & X86_EFLAGS_IF) 1337 if (regs->flags & X86_EFLAGS_IF)
@@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1360 1340
1361 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 1341 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1362 1342
1363 if (error_code & PF_WRITE) 1343 if (error_code & X86_PF_WRITE)
1364 flags |= FAULT_FLAG_WRITE; 1344 flags |= FAULT_FLAG_WRITE;
1365 if (error_code & PF_INSTR) 1345 if (error_code & X86_PF_INSTR)
1366 flags |= FAULT_FLAG_INSTRUCTION; 1346 flags |= FAULT_FLAG_INSTRUCTION;
1367 1347
1368 /* 1348 /*
@@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1382 * space check, thus avoiding the deadlock: 1362 * space check, thus avoiding the deadlock:
1383 */ 1363 */
1384 if (unlikely(!down_read_trylock(&mm->mmap_sem))) { 1364 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
1385 if ((error_code & PF_USER) == 0 && 1365 if (!(error_code & X86_PF_USER) &&
1386 !search_exception_tables(regs->ip)) { 1366 !search_exception_tables(regs->ip)) {
1387 bad_area_nosemaphore(regs, error_code, address, NULL); 1367 bad_area_nosemaphore(regs, error_code, address, NULL);
1388 return; 1368 return;
@@ -1409,7 +1389,7 @@ retry:
1409 bad_area(regs, error_code, address); 1389 bad_area(regs, error_code, address);
1410 return; 1390 return;
1411 } 1391 }
1412 if (error_code & PF_USER) { 1392 if (error_code & X86_PF_USER) {
1413 /* 1393 /*
1414 * Accessing the stack below %sp is always a bug. 1394 * Accessing the stack below %sp is always a bug.
1415 * The large cushion allows instructions like enter 1395 * The large cushion allows instructions like enter
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 048fbe8fc274..adcea90a2046 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
1426 1426
1427#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) 1427#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
1428void register_page_bootmem_memmap(unsigned long section_nr, 1428void register_page_bootmem_memmap(unsigned long section_nr,
1429 struct page *start_page, unsigned long size) 1429 struct page *start_page, unsigned long nr_pages)
1430{ 1430{
1431 unsigned long addr = (unsigned long)start_page; 1431 unsigned long addr = (unsigned long)start_page;
1432 unsigned long end = (unsigned long)(start_page + size); 1432 unsigned long end = (unsigned long)(start_page + nr_pages);
1433 unsigned long next; 1433 unsigned long next;
1434 pgd_t *pgd; 1434 pgd_t *pgd;
1435 p4d_t *p4d; 1435 p4d_t *p4d;
1436 pud_t *pud; 1436 pud_t *pud;
1437 pmd_t *pmd; 1437 pmd_t *pmd;
1438 unsigned int nr_pages; 1438 unsigned int nr_pmd_pages;
1439 struct page *page; 1439 struct page *page;
1440 1440
1441 for (; addr < end; addr = next) { 1441 for (; addr < end; addr = next) {
@@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
1482 if (pmd_none(*pmd)) 1482 if (pmd_none(*pmd))
1483 continue; 1483 continue;
1484 1484
1485 nr_pages = 1 << (get_order(PMD_SIZE)); 1485 nr_pmd_pages = 1 << get_order(PMD_SIZE);
1486 page = pmd_page(*pmd); 1486 page = pmd_page(*pmd);
1487 while (nr_pages--) 1487 while (nr_pmd_pages--)
1488 get_page_bootmem(section_nr, page++, 1488 get_page_bootmem(section_nr, page++,
1489 SECTION_INFO); 1489 SECTION_INFO);
1490 } 1490 }
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8f5be3eb40dd..2b60dc6e64b1 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -16,6 +16,8 @@
16 16
17extern struct range pfn_mapped[E820_MAX_ENTRIES]; 17extern struct range pfn_mapped[E820_MAX_ENTRIES];
18 18
19static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
20
19static int __init map_range(struct range *range) 21static int __init map_range(struct range *range)
20{ 22{
21 unsigned long start; 23 unsigned long start;
@@ -31,8 +33,10 @@ static void __init clear_pgds(unsigned long start,
31 unsigned long end) 33 unsigned long end)
32{ 34{
33 pgd_t *pgd; 35 pgd_t *pgd;
36 /* See comment in kasan_init() */
37 unsigned long pgd_end = end & PGDIR_MASK;
34 38
35 for (; start < end; start += PGDIR_SIZE) { 39 for (; start < pgd_end; start += PGDIR_SIZE) {
36 pgd = pgd_offset_k(start); 40 pgd = pgd_offset_k(start);
37 /* 41 /*
38 * With folded p4d, pgd_clear() is nop, use p4d_clear() 42 * With folded p4d, pgd_clear() is nop, use p4d_clear()
@@ -43,29 +47,61 @@ static void __init clear_pgds(unsigned long start,
43 else 47 else
44 pgd_clear(pgd); 48 pgd_clear(pgd);
45 } 49 }
50
51 pgd = pgd_offset_k(start);
52 for (; start < end; start += P4D_SIZE)
53 p4d_clear(p4d_offset(pgd, start));
54}
55
56static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
57{
58 unsigned long p4d;
59
60 if (!IS_ENABLED(CONFIG_X86_5LEVEL))
61 return (p4d_t *)pgd;
62
63 p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
64 p4d += __START_KERNEL_map - phys_base;
65 return (p4d_t *)p4d + p4d_index(addr);
66}
67
68static void __init kasan_early_p4d_populate(pgd_t *pgd,
69 unsigned long addr,
70 unsigned long end)
71{
72 pgd_t pgd_entry;
73 p4d_t *p4d, p4d_entry;
74 unsigned long next;
75
76 if (pgd_none(*pgd)) {
77 pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
78 set_pgd(pgd, pgd_entry);
79 }
80
81 p4d = early_p4d_offset(pgd, addr);
82 do {
83 next = p4d_addr_end(addr, end);
84
85 if (!p4d_none(*p4d))
86 continue;
87
88 p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
89 set_p4d(p4d, p4d_entry);
90 } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
46} 91}
47 92
48static void __init kasan_map_early_shadow(pgd_t *pgd) 93static void __init kasan_map_early_shadow(pgd_t *pgd)
49{ 94{
50 int i; 95 /* See comment in kasan_init() */
51 unsigned long start = KASAN_SHADOW_START; 96 unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
52 unsigned long end = KASAN_SHADOW_END; 97 unsigned long end = KASAN_SHADOW_END;
98 unsigned long next;
53 99
54 for (i = pgd_index(start); start < end; i++) { 100 pgd += pgd_index(addr);
55 switch (CONFIG_PGTABLE_LEVELS) { 101 do {
56 case 4: 102 next = pgd_addr_end(addr, end);
57 pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) | 103 kasan_early_p4d_populate(pgd, addr, next);
58 _KERNPG_TABLE); 104 } while (pgd++, addr = next, addr != end);
59 break;
60 case 5:
61 pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
62 _KERNPG_TABLE);
63 break;
64 default:
65 BUILD_BUG();
66 }
67 start += PGDIR_SIZE;
68 }
69} 105}
70 106
71#ifdef CONFIG_KASAN_INLINE 107#ifdef CONFIG_KASAN_INLINE
@@ -102,7 +138,7 @@ void __init kasan_early_init(void)
102 for (i = 0; i < PTRS_PER_PUD; i++) 138 for (i = 0; i < PTRS_PER_PUD; i++)
103 kasan_zero_pud[i] = __pud(pud_val); 139 kasan_zero_pud[i] = __pud(pud_val);
104 140
105 for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) 141 for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
106 kasan_zero_p4d[i] = __p4d(p4d_val); 142 kasan_zero_p4d[i] = __p4d(p4d_val);
107 143
108 kasan_map_early_shadow(early_top_pgt); 144 kasan_map_early_shadow(early_top_pgt);
@@ -118,12 +154,35 @@ void __init kasan_init(void)
118#endif 154#endif
119 155
120 memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); 156 memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
157
158 /*
159 * We use the same shadow offset for 4- and 5-level paging to
160 * facilitate boot-time switching between paging modes.
161 * As result in 5-level paging mode KASAN_SHADOW_START and
162 * KASAN_SHADOW_END are not aligned to PGD boundary.
163 *
164 * KASAN_SHADOW_START doesn't share PGD with anything else.
165 * We claim whole PGD entry to make things easier.
166 *
167 * KASAN_SHADOW_END lands in the last PGD entry and it collides with
168 * bunch of things like kernel code, modules, EFI mapping, etc.
169 * We need to take extra steps to not overwrite them.
170 */
171 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
172 void *ptr;
173
174 ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
175 memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
176 set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
177 __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
178 }
179
121 load_cr3(early_top_pgt); 180 load_cr3(early_top_pgt);
122 __flush_tlb_all(); 181 __flush_tlb_all();
123 182
124 clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); 183 clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
125 184
126 kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, 185 kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
127 kasan_mem_to_shadow((void *)PAGE_OFFSET)); 186 kasan_mem_to_shadow((void *)PAGE_OFFSET));
128 187
129 for (i = 0; i < E820_MAX_ENTRIES; i++) { 188 for (i = 0; i < E820_MAX_ENTRIES; i++) {
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 350f7096baac..7913b6921959 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -212,8 +212,8 @@ static void arch_perfmon_setup_counters(void)
212 eax.full = cpuid_eax(0xa); 212 eax.full = cpuid_eax(0xa);
213 213
214 /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ 214 /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
215 if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 && 215 if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 &&
216 __this_cpu_read(cpu_info.x86_model) == 15) { 216 boot_cpu_data.x86_model == 15) {
217 eax.split.version_id = 2; 217 eax.split.version_id = 2;
218 eax.split.num_counters = 2; 218 eax.split.num_counters = 2;
219 eax.split.bit_width = 40; 219 eax.split.bit_width = 40;
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 836a1eb5df43..3ee234b6234d 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -6,6 +6,7 @@
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/syscalls.h>
9#include <linux/uaccess.h> 10#include <linux/uaccess.h>
10#include <asm/unistd.h> 11#include <asm/unistd.h>
11#include <os.h> 12#include <os.h>
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
369 mm->arch.ldt.entry_count = 0; 370 mm->arch.ldt.entry_count = 0;
370} 371}
371 372
372int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) 373SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
374 unsigned long , bytecount)
373{ 375{
374 return do_modify_ldt_skas(func, ptr, bytecount); 376 /* See non-um modify_ldt() for why we do this cast */
377 return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
375} 378}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 168efb2534c0..fbd054d6ac97 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = {
601#ifdef CONFIG_X86_MCE 601#ifdef CONFIG_X86_MCE
602 { machine_check, xen_machine_check, true }, 602 { machine_check, xen_machine_check, true },
603#endif 603#endif
604 { nmi, xen_nmi, true }, 604 { nmi, xen_xennmi, true },
605 { overflow, xen_overflow, false }, 605 { overflow, xen_overflow, false },
606#ifdef CONFIG_IA32_EMULATION 606#ifdef CONFIG_IA32_EMULATION
607 { entry_INT80_compat, xen_entry_INT80_compat, false }, 607 { entry_INT80_compat, xen_entry_INT80_compat, false },
@@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
811 } 811 }
812} 812}
813 813
814static void xen_load_sp0(struct tss_struct *tss, 814static void xen_load_sp0(unsigned long sp0)
815 struct thread_struct *thread)
816{ 815{
817 struct multicall_space mcs; 816 struct multicall_space mcs;
818 817
819 mcs = xen_mc_entry(0); 818 mcs = xen_mc_entry(0);
820 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); 819 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
821 xen_mc_issue(PARAVIRT_LAZY_CPU); 820 xen_mc_issue(PARAVIRT_LAZY_CPU);
822 tss->x86_tss.sp0 = thread->sp0; 821 this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
823} 822}
824 823
825void xen_set_iopl_mask(unsigned mask) 824void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 71495f1a86d7..2ccdaba31a07 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
449} 449}
450PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); 450PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
451 451
452#if CONFIG_PGTABLE_LEVELS == 4 452#ifdef CONFIG_X86_64
453__visible pudval_t xen_pud_val(pud_t pud) 453__visible pudval_t xen_pud_val(pud_t pud)
454{ 454{
455 return pte_mfn_to_pfn(pud.pud); 455 return pte_mfn_to_pfn(pud.pud);
@@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
538 538
539 xen_mc_issue(PARAVIRT_LAZY_MMU); 539 xen_mc_issue(PARAVIRT_LAZY_MMU);
540} 540}
541#endif /* CONFIG_PGTABLE_LEVELS == 4 */ 541#endif /* CONFIG_X86_64 */
542 542
543static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, 543static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
544 int (*func)(struct mm_struct *mm, struct page *, enum pt_level), 544 int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
@@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
580 int (*func)(struct mm_struct *mm, struct page *, enum pt_level), 580 int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
581 bool last, unsigned long limit) 581 bool last, unsigned long limit)
582{ 582{
583 int i, nr, flush = 0; 583 int flush = 0;
584 pud_t *pud;
584 585
585 nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
586 for (i = 0; i < nr; i++) {
587 pud_t *pud;
588 586
589 if (p4d_none(p4d[i])) 587 if (p4d_none(*p4d))
590 continue; 588 return flush;
591 589
592 pud = pud_offset(&p4d[i], 0); 590 pud = pud_offset(p4d, 0);
593 if (PTRS_PER_PUD > 1) 591 if (PTRS_PER_PUD > 1)
594 flush |= (*func)(mm, virt_to_page(pud), PT_PUD); 592 flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
595 flush |= xen_pud_walk(mm, pud, func, 593 flush |= xen_pud_walk(mm, pud, func, last, limit);
596 last && i == nr - 1, limit);
597 }
598 return flush; 594 return flush;
599} 595}
600 596
@@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
644 continue; 640 continue;
645 641
646 p4d = p4d_offset(&pgd[i], 0); 642 p4d = p4d_offset(&pgd[i], 0);
647 if (PTRS_PER_P4D > 1)
648 flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
649 flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); 643 flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
650 } 644 }
651 645
@@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
1176{ 1170{
1177 pgd_t *pgd; 1171 pgd_t *pgd;
1178 p4d_t *p4d; 1172 p4d_t *p4d;
1179 unsigned int i;
1180 bool unpin; 1173 bool unpin;
1181 1174
1182 unpin = (vaddr == 2 * PGDIR_SIZE); 1175 unpin = (vaddr == 2 * PGDIR_SIZE);
1183 vaddr &= PMD_MASK; 1176 vaddr &= PMD_MASK;
1184 pgd = pgd_offset_k(vaddr); 1177 pgd = pgd_offset_k(vaddr);
1185 p4d = p4d_offset(pgd, 0); 1178 p4d = p4d_offset(pgd, 0);
1186 for (i = 0; i < PTRS_PER_P4D; i++) { 1179 if (!p4d_none(*p4d))
1187 if (p4d_none(p4d[i])) 1180 xen_cleanmfnmap_p4d(p4d, unpin);
1188 continue;
1189 xen_cleanmfnmap_p4d(p4d + i, unpin);
1190 }
1191 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
1192 set_pgd(pgd, __pgd(0));
1193 xen_cleanmfnmap_free_pgtbl(p4d, unpin);
1194 }
1195} 1181}
1196 1182
1197static void __init xen_pagetable_p2m_free(void) 1183static void __init xen_pagetable_p2m_free(void)
@@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn)
1692 xen_release_ptpage(pfn, PT_PMD); 1678 xen_release_ptpage(pfn, PT_PMD);
1693} 1679}
1694 1680
1695#if CONFIG_PGTABLE_LEVELS >= 4 1681#ifdef CONFIG_X86_64
1696static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) 1682static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1697{ 1683{
1698 xen_alloc_ptpage(mm, pfn, PT_PUD); 1684 xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
2029 */ 2015 */
2030void __init xen_relocate_p2m(void) 2016void __init xen_relocate_p2m(void)
2031{ 2017{
2032 phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys; 2018 phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
2033 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; 2019 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
2034 int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d; 2020 int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
2035 pte_t *pt; 2021 pte_t *pt;
2036 pmd_t *pmd; 2022 pmd_t *pmd;
2037 pud_t *pud; 2023 pud_t *pud;
2038 p4d_t *p4d = NULL;
2039 pgd_t *pgd; 2024 pgd_t *pgd;
2040 unsigned long *new_p2m; 2025 unsigned long *new_p2m;
2041 int save_pud; 2026 int save_pud;
@@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void)
2045 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; 2030 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
2046 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; 2031 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
2047 n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; 2032 n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
2048 if (PTRS_PER_P4D > 1) 2033 n_frames = n_pte + n_pt + n_pmd + n_pud;
2049 n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
2050 else
2051 n_p4d = 0;
2052 n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
2053 2034
2054 new_area = xen_find_free_area(PFN_PHYS(n_frames)); 2035 new_area = xen_find_free_area(PFN_PHYS(n_frames));
2055 if (!new_area) { 2036 if (!new_area) {
@@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void)
2065 * To avoid any possible virtual address collision, just use 2046 * To avoid any possible virtual address collision, just use
2066 * 2 * PUD_SIZE for the new area. 2047 * 2 * PUD_SIZE for the new area.
2067 */ 2048 */
2068 p4d_phys = new_area; 2049 pud_phys = new_area;
2069 pud_phys = p4d_phys + PFN_PHYS(n_p4d);
2070 pmd_phys = pud_phys + PFN_PHYS(n_pud); 2050 pmd_phys = pud_phys + PFN_PHYS(n_pud);
2071 pt_phys = pmd_phys + PFN_PHYS(n_pmd); 2051 pt_phys = pmd_phys + PFN_PHYS(n_pmd);
2072 p2m_pfn = PFN_DOWN(pt_phys) + n_pt; 2052 p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
2073 2053
2074 pgd = __va(read_cr3_pa()); 2054 pgd = __va(read_cr3_pa());
2075 new_p2m = (unsigned long *)(2 * PGDIR_SIZE); 2055 new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
2076 idx_p4d = 0;
2077 save_pud = n_pud; 2056 save_pud = n_pud;
2078 do { 2057 for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
2079 if (n_p4d > 0) { 2058 pud = early_memremap(pud_phys, PAGE_SIZE);
2080 p4d = early_memremap(p4d_phys, PAGE_SIZE); 2059 clear_page(pud);
2081 clear_page(p4d); 2060 for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
2082 n_pud = min(save_pud, PTRS_PER_P4D); 2061 idx_pmd++) {
2083 } 2062 pmd = early_memremap(pmd_phys, PAGE_SIZE);
2084 for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { 2063 clear_page(pmd);
2085 pud = early_memremap(pud_phys, PAGE_SIZE); 2064 for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
2086 clear_page(pud); 2065 idx_pt++) {
2087 for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); 2066 pt = early_memremap(pt_phys, PAGE_SIZE);
2088 idx_pmd++) { 2067 clear_page(pt);
2089 pmd = early_memremap(pmd_phys, PAGE_SIZE); 2068 for (idx_pte = 0;
2090 clear_page(pmd); 2069 idx_pte < min(n_pte, PTRS_PER_PTE);
2091 for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); 2070 idx_pte++) {
2092 idx_pt++) { 2071 set_pte(pt + idx_pte,
2093 pt = early_memremap(pt_phys, PAGE_SIZE); 2072 pfn_pte(p2m_pfn, PAGE_KERNEL));
2094 clear_page(pt); 2073 p2m_pfn++;
2095 for (idx_pte = 0;
2096 idx_pte < min(n_pte, PTRS_PER_PTE);
2097 idx_pte++) {
2098 set_pte(pt + idx_pte,
2099 pfn_pte(p2m_pfn, PAGE_KERNEL));
2100 p2m_pfn++;
2101 }
2102 n_pte -= PTRS_PER_PTE;
2103 early_memunmap(pt, PAGE_SIZE);
2104 make_lowmem_page_readonly(__va(pt_phys));
2105 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
2106 PFN_DOWN(pt_phys));
2107 set_pmd(pmd + idx_pt,
2108 __pmd(_PAGE_TABLE | pt_phys));
2109 pt_phys += PAGE_SIZE;
2110 } 2074 }
2111 n_pt -= PTRS_PER_PMD; 2075 n_pte -= PTRS_PER_PTE;
2112 early_memunmap(pmd, PAGE_SIZE); 2076 early_memunmap(pt, PAGE_SIZE);
2113 make_lowmem_page_readonly(__va(pmd_phys)); 2077 make_lowmem_page_readonly(__va(pt_phys));
2114 pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, 2078 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
2115 PFN_DOWN(pmd_phys)); 2079 PFN_DOWN(pt_phys));
2116 set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); 2080 set_pmd(pmd + idx_pt,
2117 pmd_phys += PAGE_SIZE; 2081 __pmd(_PAGE_TABLE | pt_phys));
2082 pt_phys += PAGE_SIZE;
2118 } 2083 }
2119 n_pmd -= PTRS_PER_PUD; 2084 n_pt -= PTRS_PER_PMD;
2120 early_memunmap(pud, PAGE_SIZE); 2085 early_memunmap(pmd, PAGE_SIZE);
2121 make_lowmem_page_readonly(__va(pud_phys)); 2086 make_lowmem_page_readonly(__va(pmd_phys));
2122 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); 2087 pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
2123 if (n_p4d > 0) 2088 PFN_DOWN(pmd_phys));
2124 set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys)); 2089 set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
2125 else 2090 pmd_phys += PAGE_SIZE;
2126 set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
2127 pud_phys += PAGE_SIZE;
2128 }
2129 if (n_p4d > 0) {
2130 save_pud -= PTRS_PER_P4D;
2131 early_memunmap(p4d, PAGE_SIZE);
2132 make_lowmem_page_readonly(__va(p4d_phys));
2133 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
2134 set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
2135 p4d_phys += PAGE_SIZE;
2136 } 2091 }
2137 } while (++idx_p4d < n_p4d); 2092 n_pmd -= PTRS_PER_PUD;
2093 early_memunmap(pud, PAGE_SIZE);
2094 make_lowmem_page_readonly(__va(pud_phys));
2095 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
2096 set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
2097 pud_phys += PAGE_SIZE;
2098 }
2138 2099
2139 /* Now copy the old p2m info to the new area. */ 2100 /* Now copy the old p2m info to the new area. */
2140 memcpy(new_p2m, xen_p2m_addr, size); 2101 memcpy(new_p2m, xen_p2m_addr, size);
@@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void)
2361 pv_mmu_ops.set_pte = xen_set_pte; 2322 pv_mmu_ops.set_pte = xen_set_pte;
2362 pv_mmu_ops.set_pmd = xen_set_pmd; 2323 pv_mmu_ops.set_pmd = xen_set_pmd;
2363 pv_mmu_ops.set_pud = xen_set_pud; 2324 pv_mmu_ops.set_pud = xen_set_pud;
2364#if CONFIG_PGTABLE_LEVELS >= 4 2325#ifdef CONFIG_X86_64
2365 pv_mmu_ops.set_p4d = xen_set_p4d; 2326 pv_mmu_ops.set_p4d = xen_set_p4d;
2366#endif 2327#endif
2367 2328
@@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void)
2371 pv_mmu_ops.alloc_pmd = xen_alloc_pmd; 2332 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
2372 pv_mmu_ops.release_pte = xen_release_pte; 2333 pv_mmu_ops.release_pte = xen_release_pte;
2373 pv_mmu_ops.release_pmd = xen_release_pmd; 2334 pv_mmu_ops.release_pmd = xen_release_pmd;
2374#if CONFIG_PGTABLE_LEVELS >= 4 2335#ifdef CONFIG_X86_64
2375 pv_mmu_ops.alloc_pud = xen_alloc_pud; 2336 pv_mmu_ops.alloc_pud = xen_alloc_pud;
2376 pv_mmu_ops.release_pud = xen_release_pud; 2337 pv_mmu_ops.release_pud = xen_release_pud;
2377#endif 2338#endif
@@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2435 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), 2396 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2436 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), 2397 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2437 2398
2438#if CONFIG_PGTABLE_LEVELS >= 4 2399#ifdef CONFIG_X86_64
2439 .pud_val = PV_CALLEE_SAVE(xen_pud_val), 2400 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2440 .make_pud = PV_CALLEE_SAVE(xen_make_pud), 2401 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
2441 .set_p4d = xen_set_p4d_hyper, 2402 .set_p4d = xen_set_p4d_hyper,
2442 2403
2443 .alloc_pud = xen_alloc_pmd_init, 2404 .alloc_pud = xen_alloc_pmd_init,
2444 .release_pud = xen_release_pmd_init, 2405 .release_pud = xen_release_pmd_init,
2445#endif /* CONFIG_PGTABLE_LEVELS == 4 */ 2406#endif /* CONFIG_X86_64 */
2446 2407
2447 .activate_mm = xen_activate_mm, 2408 .activate_mm = xen_activate_mm,
2448 .dup_mmap = xen_dup_mmap, 2409 .dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 05f91ce9b55e..c0c756c76afe 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -14,6 +14,7 @@
14 * single-threaded. 14 * single-threaded.
15 */ 15 */
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/sched/task_stack.h>
17#include <linux/err.h> 18#include <linux/err.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <linux/smp.h> 20#include <linux/smp.h>
@@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
294#endif 295#endif
295 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 296 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
296 297
298 /*
299 * Bring up the CPU in cpu_bringup_and_idle() with the stack
300 * pointing just below where pt_regs would be if it were a normal
301 * kernel entry.
302 */
297 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 303 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
298 ctxt->flags = VGCF_IN_KERNEL; 304 ctxt->flags = VGCF_IN_KERNEL;
299 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 305 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
300 ctxt->user_regs.ds = __USER_DS; 306 ctxt->user_regs.ds = __USER_DS;
301 ctxt->user_regs.es = __USER_DS; 307 ctxt->user_regs.es = __USER_DS;
302 ctxt->user_regs.ss = __KERNEL_DS; 308 ctxt->user_regs.ss = __KERNEL_DS;
309 ctxt->user_regs.cs = __KERNEL_CS;
310 ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
303 311
304 xen_copy_trap_info(ctxt->trap_ctxt); 312 xen_copy_trap_info(ctxt->trap_ctxt);
305 313
@@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
314 ctxt->gdt_frames[0] = gdt_mfn; 322 ctxt->gdt_frames[0] = gdt_mfn;
315 ctxt->gdt_ents = GDT_ENTRIES; 323 ctxt->gdt_ents = GDT_ENTRIES;
316 324
325 /*
326 * Set SS:SP that Xen will use when entering guest kernel mode
327 * from guest user mode. Subsequent calls to load_sp0() can
328 * change this value.
329 */
317 ctxt->kernel_ss = __KERNEL_DS; 330 ctxt->kernel_ss = __KERNEL_DS;
318 ctxt->kernel_sp = idle->thread.sp0; 331 ctxt->kernel_sp = task_top_of_stack(idle);
319 332
320#ifdef CONFIG_X86_32 333#ifdef CONFIG_X86_32
321 ctxt->event_callback_cs = __KERNEL_CS; 334 ctxt->event_callback_cs = __KERNEL_CS;
@@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
327 (unsigned long)xen_hypervisor_callback; 340 (unsigned long)xen_hypervisor_callback;
328 ctxt->failsafe_callback_eip = 341 ctxt->failsafe_callback_eip =
329 (unsigned long)xen_failsafe_callback; 342 (unsigned long)xen_failsafe_callback;
330 ctxt->user_regs.cs = __KERNEL_CS;
331 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 343 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
332 344
333 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
334 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); 345 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
335 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) 346 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
336 BUG(); 347 BUG();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c98a48c861fd..8a10c9a9e2b5 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -30,7 +30,7 @@ xen_pv_trap debug
30xen_pv_trap xendebug 30xen_pv_trap xendebug
31xen_pv_trap int3 31xen_pv_trap int3
32xen_pv_trap xenint3 32xen_pv_trap xenint3
33xen_pv_trap nmi 33xen_pv_trap xennmi
34xen_pv_trap overflow 34xen_pv_trap overflow
35xen_pv_trap bounds 35xen_pv_trap bounds
36xen_pv_trap invalid_op 36xen_pv_trap invalid_op
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index b5b8d7f43557..497cc55a0c16 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -10,6 +10,7 @@
10#include <asm/boot.h> 10#include <asm/boot.h>
11#include <asm/asm.h> 11#include <asm/asm.h>
12#include <asm/page_types.h> 12#include <asm/page_types.h>
13#include <asm/unwind_hints.h>
13 14
14#include <xen/interface/elfnote.h> 15#include <xen/interface/elfnote.h>
15#include <xen/interface/features.h> 16#include <xen/interface/features.h>
@@ -20,6 +21,7 @@
20#ifdef CONFIG_XEN_PV 21#ifdef CONFIG_XEN_PV
21 __INIT 22 __INIT
22ENTRY(startup_xen) 23ENTRY(startup_xen)
24 UNWIND_HINT_EMPTY
23 cld 25 cld
24 26
25 /* Clear .bss */ 27 /* Clear .bss */
@@ -34,21 +36,24 @@ ENTRY(startup_xen)
34 mov $init_thread_union+THREAD_SIZE, %_ASM_SP 36 mov $init_thread_union+THREAD_SIZE, %_ASM_SP
35 37
36 jmp xen_start_kernel 38 jmp xen_start_kernel
37 39END(startup_xen)
38 __FINIT 40 __FINIT
39#endif 41#endif
40 42
41.pushsection .text 43.pushsection .text
42 .balign PAGE_SIZE 44 .balign PAGE_SIZE
43ENTRY(hypercall_page) 45ENTRY(hypercall_page)
44 .skip PAGE_SIZE 46 .rept (PAGE_SIZE / 32)
47 UNWIND_HINT_EMPTY
48 .skip 32
49 .endr
45 50
46#define HYPERCALL(n) \ 51#define HYPERCALL(n) \
47 .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ 52 .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
48 .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 53 .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
49#include <asm/xen-hypercalls.h> 54#include <asm/xen-hypercalls.h>
50#undef HYPERCALL 55#undef HYPERCALL
51 56END(hypercall_page)
52.popsection 57.popsection
53 58
54 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") 59 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 3c3a37b8503b..572b6c7303ed 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -51,6 +51,7 @@
51#include <acpi/actbl1.h> 51#include <acpi/actbl1.h>
52#include <acpi/ghes.h> 52#include <acpi/ghes.h>
53#include <acpi/apei.h> 53#include <acpi/apei.h>
54#include <asm/fixmap.h>
54#include <asm/tlbflush.h> 55#include <asm/tlbflush.h>
55#include <ras/ras_event.h> 56#include <ras/ras_event.h>
56 57
@@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
112 * Because the memory area used to transfer hardware error information 113 * Because the memory area used to transfer hardware error information
113 * from BIOS to Linux can be determined only in NMI, IRQ or timer 114 * from BIOS to Linux can be determined only in NMI, IRQ or timer
114 * handler, but general ioremap can not be used in atomic context, so 115 * handler, but general ioremap can not be used in atomic context, so
115 * a special version of atomic ioremap is implemented for that. 116 * the fixmap is used instead.
116 */ 117 */
117 118
118/* 119/*
@@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex);
126/* virtual memory area for atomic ioremap */ 127/* virtual memory area for atomic ioremap */
127static struct vm_struct *ghes_ioremap_area; 128static struct vm_struct *ghes_ioremap_area;
128/* 129/*
129 * These 2 spinlock is used to prevent atomic ioremap virtual memory 130 * These 2 spinlocks are used to prevent the fixmap entries from being used
130 * area from being mapped simultaneously. 131 * simultaneously.
131 */ 132 */
132static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 133static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
133static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 134static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
@@ -159,52 +160,36 @@ static void ghes_ioremap_exit(void)
159 160
160static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) 161static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
161{ 162{
162 unsigned long vaddr;
163 phys_addr_t paddr; 163 phys_addr_t paddr;
164 pgprot_t prot; 164 pgprot_t prot;
165 165
166 vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
167
168 paddr = pfn << PAGE_SHIFT; 166 paddr = pfn << PAGE_SHIFT;
169 prot = arch_apei_get_mem_attribute(paddr); 167 prot = arch_apei_get_mem_attribute(paddr);
170 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); 168 __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
171 169
172 return (void __iomem *)vaddr; 170 return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
173} 171}
174 172
175static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) 173static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
176{ 174{
177 unsigned long vaddr, paddr; 175 phys_addr_t paddr;
178 pgprot_t prot; 176 pgprot_t prot;
179 177
180 vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
181
182 paddr = pfn << PAGE_SHIFT; 178 paddr = pfn << PAGE_SHIFT;
183 prot = arch_apei_get_mem_attribute(paddr); 179 prot = arch_apei_get_mem_attribute(paddr);
180 __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
184 181
185 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); 182 return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
186
187 return (void __iomem *)vaddr;
188} 183}
189 184
190static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) 185static void ghes_iounmap_nmi(void)
191{ 186{
192 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 187 clear_fixmap(FIX_APEI_GHES_NMI);
193 void *base = ghes_ioremap_area->addr;
194
195 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
196 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
197 arch_apei_flush_tlb_one(vaddr);
198} 188}
199 189
200static void ghes_iounmap_irq(void __iomem *vaddr_ptr) 190static void ghes_iounmap_irq(void)
201{ 191{
202 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 192 clear_fixmap(FIX_APEI_GHES_IRQ);
203 void *base = ghes_ioremap_area->addr;
204
205 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
206 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
207 arch_apei_flush_tlb_one(vaddr);
208} 193}
209 194
210static int ghes_estatus_pool_init(void) 195static int ghes_estatus_pool_init(void)
@@ -360,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
360 paddr += trunk; 345 paddr += trunk;
361 buffer += trunk; 346 buffer += trunk;
362 if (in_nmi) { 347 if (in_nmi) {
363 ghes_iounmap_nmi(vaddr); 348 ghes_iounmap_nmi();
364 raw_spin_unlock(&ghes_ioremap_lock_nmi); 349 raw_spin_unlock(&ghes_ioremap_lock_nmi);
365 } else { 350 } else {
366 ghes_iounmap_irq(vaddr); 351 ghes_iounmap_irq();
367 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); 352 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
368 } 353 }
369 } 354 }
@@ -851,17 +836,8 @@ static void ghes_sea_remove(struct ghes *ghes)
851 synchronize_rcu(); 836 synchronize_rcu();
852} 837}
853#else /* CONFIG_ACPI_APEI_SEA */ 838#else /* CONFIG_ACPI_APEI_SEA */
854static inline void ghes_sea_add(struct ghes *ghes) 839static inline void ghes_sea_add(struct ghes *ghes) { }
855{ 840static inline void ghes_sea_remove(struct ghes *ghes) { }
856 pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
857 ghes->generic->header.source_id);
858}
859
860static inline void ghes_sea_remove(struct ghes *ghes)
861{
862 pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
863 ghes->generic->header.source_id);
864}
865#endif /* CONFIG_ACPI_APEI_SEA */ 841#endif /* CONFIG_ACPI_APEI_SEA */
866 842
867#ifdef CONFIG_HAVE_ACPI_APEI_NMI 843#ifdef CONFIG_HAVE_ACPI_APEI_NMI
@@ -1063,23 +1039,9 @@ static void ghes_nmi_init_cxt(void)
1063 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 1039 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1064} 1040}
1065#else /* CONFIG_HAVE_ACPI_APEI_NMI */ 1041#else /* CONFIG_HAVE_ACPI_APEI_NMI */
1066static inline void ghes_nmi_add(struct ghes *ghes) 1042static inline void ghes_nmi_add(struct ghes *ghes) { }
1067{ 1043static inline void ghes_nmi_remove(struct ghes *ghes) { }
1068 pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", 1044static inline void ghes_nmi_init_cxt(void) { }
1069 ghes->generic->header.source_id);
1070 BUG();
1071}
1072
1073static inline void ghes_nmi_remove(struct ghes *ghes)
1074{
1075 pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
1076 ghes->generic->header.source_id);
1077 BUG();
1078}
1079
1080static inline void ghes_nmi_init_cxt(void)
1081{
1082}
1083#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ 1045#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
1084 1046
1085static int ghes_probe(struct platform_device *ghes_dev) 1047static int ghes_probe(struct platform_device *ghes_dev)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b640ad8a6d20..adc877dfef5c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2692,7 +2692,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
2692 * from the parent. 2692 * from the parent.
2693 */ 2693 */
2694 page_count = (u32)calc_pages_for(0, length); 2694 page_count = (u32)calc_pages_for(0, length);
2695 pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); 2695 pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
2696 if (IS_ERR(pages)) { 2696 if (IS_ERR(pages)) {
2697 result = PTR_ERR(pages); 2697 result = PTR_ERR(pages);
2698 pages = NULL; 2698 pages = NULL;
@@ -2827,7 +2827,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
2827 */ 2827 */
2828 size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32); 2828 size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
2829 page_count = (u32)calc_pages_for(0, size); 2829 page_count = (u32)calc_pages_for(0, size);
2830 pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); 2830 pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
2831 if (IS_ERR(pages)) { 2831 if (IS_ERR(pages)) {
2832 ret = PTR_ERR(pages); 2832 ret = PTR_ERR(pages);
2833 goto fail_stat_request; 2833 goto fail_stat_request;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4ac454ae54d7..83876a1c8d98 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2094,6 +2094,11 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2094 goto err; 2094 goto err;
2095 } 2095 }
2096 2096
2097 if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
2098 err = -EINVAL;
2099 goto err;
2100 }
2101
2097 syncobj = drm_syncobj_find(file, fence.handle); 2102 syncobj = drm_syncobj_find(file, fence.handle);
2098 if (!syncobj) { 2103 if (!syncobj) {
2099 DRM_DEBUG("Invalid syncobj handle provided\n"); 2104 DRM_DEBUG("Invalid syncobj handle provided\n");
@@ -2101,6 +2106,9 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2101 goto err; 2106 goto err;
2102 } 2107 }
2103 2108
2109 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2110 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2111
2104 fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); 2112 fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
2105 } 2113 }
2106 2114
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e2410eb5d96e..ad524cb0f6fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -832,10 +832,14 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
832 } 832 }
833} 833}
834 834
835struct sgt_dma { 835static inline struct sgt_dma {
836 struct scatterlist *sg; 836 struct scatterlist *sg;
837 dma_addr_t dma, max; 837 dma_addr_t dma, max;
838}; 838} sgt_dma(struct i915_vma *vma) {
839 struct scatterlist *sg = vma->pages->sgl;
840 dma_addr_t addr = sg_dma_address(sg);
841 return (struct sgt_dma) { sg, addr, addr + sg->length };
842}
839 843
840struct gen8_insert_pte { 844struct gen8_insert_pte {
841 u16 pml4e; 845 u16 pml4e;
@@ -916,11 +920,7 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
916 u32 unused) 920 u32 unused)
917{ 921{
918 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 922 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
919 struct sgt_dma iter = { 923 struct sgt_dma iter = sgt_dma(vma);
920 .sg = vma->pages->sgl,
921 .dma = sg_dma_address(iter.sg),
922 .max = iter.dma + iter.sg->length,
923 };
924 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); 924 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
925 925
926 gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, 926 gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
@@ -933,11 +933,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
933 u32 unused) 933 u32 unused)
934{ 934{
935 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 935 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
936 struct sgt_dma iter = { 936 struct sgt_dma iter = sgt_dma(vma);
937 .sg = vma->pages->sgl,
938 .dma = sg_dma_address(iter.sg),
939 .max = iter.dma + iter.sg->length,
940 };
941 struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; 937 struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
942 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); 938 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
943 939
@@ -1632,13 +1628,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1632 unsigned act_pt = first_entry / GEN6_PTES; 1628 unsigned act_pt = first_entry / GEN6_PTES;
1633 unsigned act_pte = first_entry % GEN6_PTES; 1629 unsigned act_pte = first_entry % GEN6_PTES;
1634 const u32 pte_encode = vm->pte_encode(0, cache_level, flags); 1630 const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1635 struct sgt_dma iter; 1631 struct sgt_dma iter = sgt_dma(vma);
1636 gen6_pte_t *vaddr; 1632 gen6_pte_t *vaddr;
1637 1633
1638 vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); 1634 vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1639 iter.sg = vma->pages->sgl;
1640 iter.dma = sg_dma_address(iter.sg);
1641 iter.max = iter.dma + iter.sg->length;
1642 do { 1635 do {
1643 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); 1636 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1644 1637
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index e84fee3ec4f3..184340d486c3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -721,7 +721,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
721 * allocation taken by fbdev 721 * allocation taken by fbdev
722 */ 722 */
723 if (!(dev_priv->capabilities & SVGA_CAP_3D)) 723 if (!(dev_priv->capabilities & SVGA_CAP_3D))
724 mem_size *= 2; 724 mem_size *= 3;
725 725
726 dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE; 726 dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE;
727 dev_priv->prim_bb_mem = 727 dev_priv->prim_bb_mem =
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 3bbad22b3748..d6b1c509ae01 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -224,7 +224,7 @@ out:
224 return ret; 224 return ret;
225} 225}
226 226
227static struct dma_fence_ops vmw_fence_ops = { 227static const struct dma_fence_ops vmw_fence_ops = {
228 .get_driver_name = vmw_fence_get_driver_name, 228 .get_driver_name = vmw_fence_get_driver_name,
229 .get_timeline_name = vmw_fence_get_timeline_name, 229 .get_timeline_name = vmw_fence_get_timeline_name,
230 .enable_signaling = vmw_fence_enable_signaling, 230 .enable_signaling = vmw_fence_enable_signaling,
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 6d6b092e2da9..d6135900da64 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1258,6 +1258,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
1258 { "ELAN0605", 0 }, 1258 { "ELAN0605", 0 },
1259 { "ELAN0609", 0 }, 1259 { "ELAN0609", 0 },
1260 { "ELAN060B", 0 }, 1260 { "ELAN060B", 0 },
1261 { "ELAN060C", 0 },
1261 { "ELAN0611", 0 }, 1262 { "ELAN0611", 0 },
1262 { "ELAN1000", 0 }, 1263 { "ELAN1000", 0 },
1263 { } 1264 { }
diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c
index 225025a0940c..b6ccf39c6a7b 100644
--- a/drivers/input/rmi4/rmi_smbus.c
+++ b/drivers/input/rmi4/rmi_smbus.c
@@ -312,7 +312,7 @@ static int rmi_smb_probe(struct i2c_client *client,
312 rmi_smb->xport.dev = &client->dev; 312 rmi_smb->xport.dev = &client->dev;
313 rmi_smb->xport.pdata = *pdata; 313 rmi_smb->xport.pdata = *pdata;
314 rmi_smb->xport.pdata.irq = client->irq; 314 rmi_smb->xport.pdata.irq = client->irq;
315 rmi_smb->xport.proto_name = "smb2"; 315 rmi_smb->xport.proto_name = "smb";
316 rmi_smb->xport.ops = &rmi_smb_ops; 316 rmi_smb->xport.ops = &rmi_smb_ops;
317 317
318 smbus_version = rmi_smb_get_version(rmi_smb); 318 smbus_version = rmi_smb_get_version(rmi_smb);
@@ -322,7 +322,7 @@ static int rmi_smb_probe(struct i2c_client *client,
322 rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d", 322 rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d",
323 smbus_version); 323 smbus_version);
324 324
325 if (smbus_version != 2) { 325 if (smbus_version != 2 && smbus_version != 3) {
326 dev_err(&client->dev, "Unrecognized SMB version %d\n", 326 dev_err(&client->dev, "Unrecognized SMB version %d\n",
327 smbus_version); 327 smbus_version);
328 return -ENODEV; 328 return -ENODEV;
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index 88ea5e1b72ae..abf27578beb1 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -531,6 +531,7 @@ int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
531 531
532 input_set_drvdata(input_dev, ts); 532 input_set_drvdata(input_dev, ts);
533 533
534 __set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
534 input_set_capability(input_dev, EV_KEY, BTN_TOUCH); 535 input_set_capability(input_dev, EV_KEY, BTN_TOUCH);
535 536
536 input_set_abs_params(input_dev, ABS_X, 537 input_set_abs_params(input_dev, ABS_X,
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
index eda38cbe8530..41f2a9f6851d 100644
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -32,7 +32,7 @@
32#include <linux/pci.h> 32#include <linux/pci.h>
33#include <linux/mutex.h> 33#include <linux/mutex.h>
34#include <linux/miscdevice.h> 34#include <linux/miscdevice.h>
35#include <linux/pti.h> 35#include <linux/intel-pti.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38 38
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index cf7c18947189..d065c0e2d18e 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -178,7 +178,6 @@ static int c_can_pci_probe(struct pci_dev *pdev,
178 break; 178 break;
179 case BOSCH_D_CAN: 179 case BOSCH_D_CAN:
180 priv->regs = reg_map_d_can; 180 priv->regs = reg_map_d_can;
181 priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
182 break; 181 break;
183 default: 182 default:
184 ret = -EINVAL; 183 ret = -EINVAL;
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index 46a746ee80bb..b5145a7f874c 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -320,7 +320,6 @@ static int c_can_plat_probe(struct platform_device *pdev)
320 break; 320 break;
321 case BOSCH_D_CAN: 321 case BOSCH_D_CAN:
322 priv->regs = reg_map_d_can; 322 priv->regs = reg_map_d_can;
323 priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
324 priv->read_reg = c_can_plat_read_reg_aligned_to_16bit; 323 priv->read_reg = c_can_plat_read_reg_aligned_to_16bit;
325 priv->write_reg = c_can_plat_write_reg_aligned_to_16bit; 324 priv->write_reg = c_can_plat_write_reg_aligned_to_16bit;
326 priv->read_reg32 = d_can_plat_read_reg32; 325 priv->read_reg32 = d_can_plat_read_reg32;
diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
index 4d1fe8d95042..2772d05ff11c 100644
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -670,9 +670,9 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev)
670 priv->base + IFI_CANFD_FTIME); 670 priv->base + IFI_CANFD_FTIME);
671 671
672 /* Configure transmitter delay */ 672 /* Configure transmitter delay */
673 tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK; 673 tdc = dbt->brp * (dbt->prop_seg + dbt->phase_seg1);
674 writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc, 674 tdc &= IFI_CANFD_TDELAY_MASK;
675 priv->base + IFI_CANFD_TDELAY); 675 writel(IFI_CANFD_TDELAY_EN | tdc, priv->base + IFI_CANFD_TDELAY);
676} 676}
677 677
678static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id, 678static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id,
diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c
index 51c2d182a33a..b4efd711f824 100644
--- a/drivers/net/can/peak_canfd/peak_pciefd_main.c
+++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c
@@ -29,14 +29,19 @@
29#include "peak_canfd_user.h" 29#include "peak_canfd_user.h"
30 30
31MODULE_AUTHOR("Stephane Grosjean <s.grosjean@peak-system.com>"); 31MODULE_AUTHOR("Stephane Grosjean <s.grosjean@peak-system.com>");
32MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe FD family cards"); 32MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe/M.2 FD family cards");
33MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe FD CAN cards"); 33MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe/M.2 FD CAN cards");
34MODULE_LICENSE("GPL v2"); 34MODULE_LICENSE("GPL v2");
35 35
36#define PCIEFD_DRV_NAME "peak_pciefd" 36#define PCIEFD_DRV_NAME "peak_pciefd"
37 37
38#define PEAK_PCI_VENDOR_ID 0x001c /* The PCI device and vendor IDs */ 38#define PEAK_PCI_VENDOR_ID 0x001c /* The PCI device and vendor IDs */
39#define PEAK_PCIEFD_ID 0x0013 /* for PCIe slot cards */ 39#define PEAK_PCIEFD_ID 0x0013 /* for PCIe slot cards */
40#define PCAN_CPCIEFD_ID 0x0014 /* for Compact-PCI Serial slot cards */
41#define PCAN_PCIE104FD_ID 0x0017 /* for PCIe-104 Express slot cards */
42#define PCAN_MINIPCIEFD_ID 0x0018 /* for mini-PCIe slot cards */
43#define PCAN_PCIEFD_OEM_ID 0x0019 /* for PCIe slot OEM cards */
44#define PCAN_M2_ID 0x001a /* for M2 slot cards */
40 45
41/* PEAK PCIe board access description */ 46/* PEAK PCIe board access description */
42#define PCIEFD_BAR0_SIZE (64 * 1024) 47#define PCIEFD_BAR0_SIZE (64 * 1024)
@@ -203,6 +208,11 @@ struct pciefd_board {
203/* supported device ids. */ 208/* supported device ids. */
204static const struct pci_device_id peak_pciefd_tbl[] = { 209static const struct pci_device_id peak_pciefd_tbl[] = {
205 {PEAK_PCI_VENDOR_ID, PEAK_PCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,}, 210 {PEAK_PCI_VENDOR_ID, PEAK_PCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,},
211 {PEAK_PCI_VENDOR_ID, PCAN_CPCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,},
212 {PEAK_PCI_VENDOR_ID, PCAN_PCIE104FD_ID, PCI_ANY_ID, PCI_ANY_ID,},
213 {PEAK_PCI_VENDOR_ID, PCAN_MINIPCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,},
214 {PEAK_PCI_VENDOR_ID, PCAN_PCIEFD_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,},
215 {PEAK_PCI_VENDOR_ID, PCAN_M2_ID, PCI_ANY_ID, PCI_ANY_ID,},
206 {0,} 216 {0,}
207}; 217};
208 218
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index b0c80859f746..1ac2090a1721 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -539,6 +539,13 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status)
539 } 539 }
540 stats->rx_over_errors++; 540 stats->rx_over_errors++;
541 stats->rx_errors++; 541 stats->rx_errors++;
542
543 /* reset the CAN IP by entering reset mode
544 * ignoring timeout error
545 */
546 set_reset_mode(dev);
547 set_normal_mode(dev);
548
542 /* clear bit */ 549 /* clear bit */
543 sun4i_can_write_cmdreg(priv, SUN4I_CMD_CLEAR_OR_FLAG); 550 sun4i_can_write_cmdreg(priv, SUN4I_CMD_CLEAR_OR_FLAG);
544 } 551 }
@@ -653,8 +660,9 @@ static irqreturn_t sun4i_can_interrupt(int irq, void *dev_id)
653 netif_wake_queue(dev); 660 netif_wake_queue(dev);
654 can_led_event(dev, CAN_LED_EVENT_TX); 661 can_led_event(dev, CAN_LED_EVENT_TX);
655 } 662 }
656 if (isrc & SUN4I_INT_RBUF_VLD) { 663 if ((isrc & SUN4I_INT_RBUF_VLD) &&
657 /* receive interrupt */ 664 !(isrc & SUN4I_INT_DATA_OR)) {
665 /* receive interrupt - don't read if overrun occurred */
658 while (status & SUN4I_STA_RBUF_RDY) { 666 while (status & SUN4I_STA_RBUF_RDY) {
659 /* RX buffer is not empty */ 667 /* RX buffer is not empty */
660 sun4i_can_rx(dev); 668 sun4i_can_rx(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index fc281712869b..17b723218b0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -93,7 +93,7 @@ static void delayed_event_release(struct mlx5_device_context *dev_ctx,
93 list_splice_init(&priv->waiting_events_list, &temp); 93 list_splice_init(&priv->waiting_events_list, &temp);
94 if (!dev_ctx->context) 94 if (!dev_ctx->context)
95 goto out; 95 goto out;
96 list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) 96 list_for_each_entry_safe(de, n, &temp, list)
97 dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); 97 dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
98 98
99out: 99out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index cc13d3dbd366..13b5ef9d8703 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -67,7 +67,7 @@
67#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa 67#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
68#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd 68#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
69 69
70#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 70#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
71#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 71#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3
72#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 72#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6
73 73
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 15a1687483cc..91b1b0938931 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -215,22 +215,20 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
215static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, 215static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
216 struct mlx5e_dma_info *dma_info) 216 struct mlx5e_dma_info *dma_info)
217{ 217{
218 struct page *page;
219
220 if (mlx5e_rx_cache_get(rq, dma_info)) 218 if (mlx5e_rx_cache_get(rq, dma_info))
221 return 0; 219 return 0;
222 220
223 page = dev_alloc_pages(rq->buff.page_order); 221 dma_info->page = dev_alloc_pages(rq->buff.page_order);
224 if (unlikely(!page)) 222 if (unlikely(!dma_info->page))
225 return -ENOMEM; 223 return -ENOMEM;
226 224
227 dma_info->addr = dma_map_page(rq->pdev, page, 0, 225 dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
228 RQ_PAGE_SIZE(rq), rq->buff.map_dir); 226 RQ_PAGE_SIZE(rq), rq->buff.map_dir);
229 if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { 227 if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
230 put_page(page); 228 put_page(dma_info->page);
229 dma_info->page = NULL;
231 return -ENOMEM; 230 return -ENOMEM;
232 } 231 }
233 dma_info->page = page;
234 232
235 return 0; 233 return 0;
236} 234}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index e906b754415c..ab92298eafc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -49,7 +49,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
49 struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, 49 struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
50 napi); 50 napi);
51 bool busy = false; 51 bool busy = false;
52 int work_done; 52 int work_done = 0;
53 int i; 53 int i;
54 54
55 for (i = 0; i < c->num_tc; i++) 55 for (i = 0; i < c->num_tc; i++)
@@ -58,15 +58,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
58 if (c->xdp) 58 if (c->xdp)
59 busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); 59 busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
60 60
61 work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); 61 if (likely(budget)) { /* budget=0 means: don't poll rx rings */
62 busy |= work_done == budget; 62 work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
63 busy |= work_done == budget;
64 }
63 65
64 busy |= c->rq.post_wqes(&c->rq); 66 busy |= c->rq.post_wqes(&c->rq);
65 67
66 if (busy) { 68 if (busy) {
67 if (likely(mlx5e_channel_no_affinity_change(c))) 69 if (likely(mlx5e_channel_no_affinity_change(c)))
68 return budget; 70 return budget;
69 if (work_done == budget) 71 if (budget && work_done == budget)
70 work_done--; 72 work_done--;
71 } 73 }
72 74
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0d2c8dcd6eae..06562c9a6b9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1482,9 +1482,16 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1482 return -EAGAIN; 1482 return -EAGAIN;
1483 } 1483 }
1484 1484
1485 /* Panic tear down fw command will stop the PCI bus communication
1486 * with the HCA, so the health polll is no longer needed.
1487 */
1488 mlx5_drain_health_wq(dev);
1489 mlx5_stop_health_poll(dev);
1490
1485 ret = mlx5_cmd_force_teardown_hca(dev); 1491 ret = mlx5_cmd_force_teardown_hca(dev);
1486 if (ret) { 1492 if (ret) {
1487 mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); 1493 mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
1494 mlx5_start_health_poll(dev);
1488 return ret; 1495 return ret;
1489 } 1496 }
1490 1497
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8acfc1e099e1..63e56f6c1877 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -687,7 +687,7 @@
687#define BUG_TABLE 687#define BUG_TABLE
688#endif 688#endif
689 689
690#ifdef CONFIG_ORC_UNWINDER 690#ifdef CONFIG_UNWINDER_ORC
691#define ORC_UNWIND_TABLE \ 691#define ORC_UNWIND_TABLE \
692 . = ALIGN(4); \ 692 . = ALIGN(4); \
693 .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ 693 .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
diff --git a/include/linux/pti.h b/include/linux/intel-pti.h
index b3ea01a3197e..2710d72de3c9 100644
--- a/include/linux/pti.h
+++ b/include/linux/intel-pti.h
@@ -22,8 +22,8 @@
22 * interface to write out it's contents for debugging a mobile system. 22 * interface to write out it's contents for debugging a mobile system.
23 */ 23 */
24 24
25#ifndef PTI_H_ 25#ifndef LINUX_INTEL_PTI_H_
26#define PTI_H_ 26#define LINUX_INTEL_PTI_H_
27 27
28/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ 28/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
29#define PTI_LASTDWORD_DTS 0x30 29#define PTI_LASTDWORD_DTS 0x30
@@ -40,4 +40,4 @@ struct pti_masterchannel *pti_request_masterchannel(u8 type,
40 const char *thread_name); 40 const char *thread_name);
41void pti_release_masterchannel(struct pti_masterchannel *mc); 41void pti_release_masterchannel(struct pti_masterchannel *mc);
42 42
43#endif /*PTI_H_*/ 43#endif /* LINUX_INTEL_PTI_H_ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 43edf659453b..91b46f99b4d2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2496,7 +2496,7 @@ void vmemmap_populate_print_last(void);
2496void vmemmap_free(unsigned long start, unsigned long end); 2496void vmemmap_free(unsigned long start, unsigned long end);
2497#endif 2497#endif
2498void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, 2498void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
2499 unsigned long size); 2499 unsigned long nr_pages);
2500 2500
2501enum mf_flags { 2501enum mf_flags {
2502 MF_COUNT_INCREASED = 1 << 0, 2502 MF_COUNT_INCREASED = 1 << 0,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c9c4a81b9767..a507f43ad221 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1151,13 +1151,17 @@ struct mem_section {
1151#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) 1151#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
1152 1152
1153#ifdef CONFIG_SPARSEMEM_EXTREME 1153#ifdef CONFIG_SPARSEMEM_EXTREME
1154extern struct mem_section *mem_section[NR_SECTION_ROOTS]; 1154extern struct mem_section **mem_section;
1155#else 1155#else
1156extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; 1156extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
1157#endif 1157#endif
1158 1158
1159static inline struct mem_section *__nr_to_section(unsigned long nr) 1159static inline struct mem_section *__nr_to_section(unsigned long nr)
1160{ 1160{
1161#ifdef CONFIG_SPARSEMEM_EXTREME
1162 if (!mem_section)
1163 return NULL;
1164#endif
1161 if (!mem_section[SECTION_NR_TO_ROOT(nr)]) 1165 if (!mem_section[SECTION_NR_TO_ROOT(nr)])
1162 return NULL; 1166 return NULL;
1163 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; 1167 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6598fb76d2c2..9816590d3ad2 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -829,6 +829,7 @@ struct drm_i915_gem_exec_fence {
829 829
830#define I915_EXEC_FENCE_WAIT (1<<0) 830#define I915_EXEC_FENCE_WAIT (1<<0)
831#define I915_EXEC_FENCE_SIGNAL (1<<1) 831#define I915_EXEC_FENCE_SIGNAL (1<<1)
832#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
832 __u32 flags; 833 __u32 flags;
833}; 834};
834 835
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dfdad67d8f6c..ff21b4dbb392 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -376,7 +376,7 @@ config STACK_VALIDATION
376 that runtime stack traces are more reliable. 376 that runtime stack traces are more reliable.
377 377
378 This is also a prerequisite for generation of ORC unwind data, which 378 This is also a prerequisite for generation of ORC unwind data, which
379 is needed for CONFIG_ORC_UNWINDER. 379 is needed for CONFIG_UNWINDER_ORC.
380 380
381 For more information, see 381 For more information, see
382 tools/objtool/Documentation/stack-validation.txt. 382 tools/objtool/Documentation/stack-validation.txt.
diff --git a/mm/gup.c b/mm/gup.c
index b2b4d4263768..dfcde13f289a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1643,6 +1643,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
1643 return 1; 1643 return 1;
1644} 1644}
1645 1645
1646static void gup_pgd_range(unsigned long addr, unsigned long end,
1647 int write, struct page **pages, int *nr)
1648{
1649 unsigned long next;
1650 pgd_t *pgdp;
1651
1652 pgdp = pgd_offset(current->mm, addr);
1653 do {
1654 pgd_t pgd = READ_ONCE(*pgdp);
1655
1656 next = pgd_addr_end(addr, end);
1657 if (pgd_none(pgd))
1658 return;
1659 if (unlikely(pgd_huge(pgd))) {
1660 if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
1661 pages, nr))
1662 return;
1663 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
1664 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1665 PGDIR_SHIFT, next, write, pages, nr))
1666 return;
1667 } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
1668 return;
1669 } while (pgdp++, addr = next, addr != end);
1670}
1671
1672#ifndef gup_fast_permitted
1673/*
1674 * Check if it's allowed to use __get_user_pages_fast() for the range, or
1675 * we need to fall back to the slow version:
1676 */
1677bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1678{
1679 unsigned long len, end;
1680
1681 len = (unsigned long) nr_pages << PAGE_SHIFT;
1682 end = start + len;
1683 return end >= start;
1684}
1685#endif
1686
1646/* 1687/*
1647 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to 1688 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
1648 * the regular GUP. It will only return non-negative values. 1689 * the regular GUP. It will only return non-negative values.
@@ -1650,10 +1691,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
1650int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 1691int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1651 struct page **pages) 1692 struct page **pages)
1652{ 1693{
1653 struct mm_struct *mm = current->mm;
1654 unsigned long addr, len, end; 1694 unsigned long addr, len, end;
1655 unsigned long next, flags; 1695 unsigned long flags;
1656 pgd_t *pgdp;
1657 int nr = 0; 1696 int nr = 0;
1658 1697
1659 start &= PAGE_MASK; 1698 start &= PAGE_MASK;
@@ -1677,45 +1716,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1677 * block IPIs that come from THPs splitting. 1716 * block IPIs that come from THPs splitting.
1678 */ 1717 */
1679 1718
1680 local_irq_save(flags); 1719 if (gup_fast_permitted(start, nr_pages, write)) {
1681 pgdp = pgd_offset(mm, addr); 1720 local_irq_save(flags);
1682 do { 1721 gup_pgd_range(addr, end, write, pages, &nr);
1683 pgd_t pgd = READ_ONCE(*pgdp); 1722 local_irq_restore(flags);
1684 1723 }
1685 next = pgd_addr_end(addr, end);
1686 if (pgd_none(pgd))
1687 break;
1688 if (unlikely(pgd_huge(pgd))) {
1689 if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
1690 pages, &nr))
1691 break;
1692 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
1693 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1694 PGDIR_SHIFT, next, write, pages, &nr))
1695 break;
1696 } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
1697 break;
1698 } while (pgdp++, addr = next, addr != end);
1699 local_irq_restore(flags);
1700 1724
1701 return nr; 1725 return nr;
1702} 1726}
1703 1727
1704#ifndef gup_fast_permitted
1705/*
1706 * Check if it's allowed to use __get_user_pages_fast() for the range, or
1707 * we need to fall back to the slow version:
1708 */
1709bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1710{
1711 unsigned long len, end;
1712
1713 len = (unsigned long) nr_pages << PAGE_SHIFT;
1714 end = start + len;
1715 return end >= start;
1716}
1717#endif
1718
1719/** 1728/**
1720 * get_user_pages_fast() - pin user pages in memory 1729 * get_user_pages_fast() - pin user pages in memory
1721 * @start: starting user address 1730 * @start: starting user address
@@ -1735,12 +1744,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1735int get_user_pages_fast(unsigned long start, int nr_pages, int write, 1744int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1736 struct page **pages) 1745 struct page **pages)
1737{ 1746{
1747 unsigned long addr, len, end;
1738 int nr = 0, ret = 0; 1748 int nr = 0, ret = 0;
1739 1749
1740 start &= PAGE_MASK; 1750 start &= PAGE_MASK;
1751 addr = start;
1752 len = (unsigned long) nr_pages << PAGE_SHIFT;
1753 end = start + len;
1754
1755 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
1756 (void __user *)start, len)))
1757 return 0;
1741 1758
1742 if (gup_fast_permitted(start, nr_pages, write)) { 1759 if (gup_fast_permitted(start, nr_pages, write)) {
1743 nr = __get_user_pages_fast(start, nr_pages, write, pages); 1760 local_irq_disable();
1761 gup_pgd_range(addr, end, write, pages, &nr);
1762 local_irq_enable();
1744 ret = nr; 1763 ret = nr;
1745 } 1764 }
1746 1765
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 77e4d3c5c57b..8dfd13f724d9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5646,6 +5646,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
5646 unsigned long start_pfn, end_pfn; 5646 unsigned long start_pfn, end_pfn;
5647 int i, this_nid; 5647 int i, this_nid;
5648 5648
5649#ifdef CONFIG_SPARSEMEM_EXTREME
5650 if (!mem_section) {
5651 unsigned long size, align;
5652
5653 size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
5654 align = 1 << (INTERNODE_CACHE_SHIFT);
5655 mem_section = memblock_virt_alloc(size, align);
5656 }
5657#endif
5658
5649 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) 5659 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
5650 memory_present(this_nid, start_pfn, end_pfn); 5660 memory_present(this_nid, start_pfn, end_pfn);
5651} 5661}
diff --git a/mm/sparse.c b/mm/sparse.c
index 4900707ae146..044138852baf 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -23,8 +23,7 @@
23 * 1) mem_section - memory sections, mem_map's for valid memory 23 * 1) mem_section - memory sections, mem_map's for valid memory
24 */ 24 */
25#ifdef CONFIG_SPARSEMEM_EXTREME 25#ifdef CONFIG_SPARSEMEM_EXTREME
26struct mem_section *mem_section[NR_SECTION_ROOTS] 26struct mem_section **mem_section;
27 ____cacheline_internodealigned_in_smp;
28#else 27#else
29struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 28struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
30 ____cacheline_internodealigned_in_smp; 29 ____cacheline_internodealigned_in_smp;
@@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
101int __section_nr(struct mem_section* ms) 100int __section_nr(struct mem_section* ms)
102{ 101{
103 unsigned long root_nr; 102 unsigned long root_nr;
104 struct mem_section* root; 103 struct mem_section *root = NULL;
105 104
106 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { 105 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
107 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); 106 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
@@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
112 break; 111 break;
113 } 112 }
114 113
115 VM_BUG_ON(root_nr == NR_SECTION_ROOTS); 114 VM_BUG_ON(!root);
116 115
117 return (root_nr * SECTIONS_PER_ROOT) + (ms - root); 116 return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
118} 117}
@@ -330,11 +329,17 @@ again:
330static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 329static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
331{ 330{
332 unsigned long usemap_snr, pgdat_snr; 331 unsigned long usemap_snr, pgdat_snr;
333 static unsigned long old_usemap_snr = NR_MEM_SECTIONS; 332 static unsigned long old_usemap_snr;
334 static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; 333 static unsigned long old_pgdat_snr;
335 struct pglist_data *pgdat = NODE_DATA(nid); 334 struct pglist_data *pgdat = NODE_DATA(nid);
336 int usemap_nid; 335 int usemap_nid;
337 336
337 /* First call */
338 if (!old_usemap_snr) {
339 old_usemap_snr = NR_MEM_SECTIONS;
340 old_pgdat_snr = NR_MEM_SECTIONS;
341 }
342
338 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); 343 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
339 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); 344 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
340 if (usemap_snr == pgdat_snr) 345 if (usemap_snr == pgdat_snr)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579b5b9f..4a72ee4e2ae9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -376,6 +376,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
376 dev->name); 376 dev->name);
377 vlan_vid_add(dev, htons(ETH_P_8021Q), 0); 377 vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
378 } 378 }
379 if (event == NETDEV_DOWN &&
380 (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
381 vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
379 382
380 vlan_info = rtnl_dereference(dev->vlan_info); 383 vlan_info = rtnl_dereference(dev->vlan_info);
381 if (!vlan_info) 384 if (!vlan_info)
@@ -423,9 +426,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
423 struct net_device *tmp; 426 struct net_device *tmp;
424 LIST_HEAD(close_list); 427 LIST_HEAD(close_list);
425 428
426 if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
427 vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
428
429 /* Put all VLANs for this dev in the down state too. */ 429 /* Put all VLANs for this dev in the down state too. */
430 vlan_group_for_each_dev(grp, i, vlandev) { 430 vlan_group_for_each_dev(grp, i, vlandev) {
431 flgs = vlandev->flags; 431 flgs = vlandev->flags;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e6c06aa349a6..1e2929f4290a 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -133,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
133 if (err) 133 if (err)
134 return err; 134 return err;
135 } 135 }
136
137 return 0;
136 } 138 }
137 139
138 for_each_set_bit(port, group, ds->num_ports) 140 for_each_set_bit(port, group, ds->num_ports)
@@ -180,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
180 if (err) 182 if (err)
181 return err; 183 return err;
182 } 184 }
185
186 return 0;
183 } 187 }
184 188
185 for_each_set_bit(port, members, ds->num_ports) 189 for_each_set_bit(port, members, ds->num_ports)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b2fc7163bd40..b6bb3cdfad09 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2615,7 +2615,6 @@ void tcp_simple_retransmit(struct sock *sk)
2615 struct tcp_sock *tp = tcp_sk(sk); 2615 struct tcp_sock *tp = tcp_sk(sk);
2616 struct sk_buff *skb; 2616 struct sk_buff *skb;
2617 unsigned int mss = tcp_current_mss(sk); 2617 unsigned int mss = tcp_current_mss(sk);
2618 u32 prior_lost = tp->lost_out;
2619 2618
2620 tcp_for_write_queue(skb, sk) { 2619 tcp_for_write_queue(skb, sk) {
2621 if (skb == tcp_send_head(sk)) 2620 if (skb == tcp_send_head(sk))
@@ -2632,7 +2631,7 @@ void tcp_simple_retransmit(struct sock *sk)
2632 2631
2633 tcp_clear_retrans_hints_partial(tp); 2632 tcp_clear_retrans_hints_partial(tp);
2634 2633
2635 if (prior_lost == tp->lost_out) 2634 if (!tp->lost_out)
2636 return; 2635 return;
2637 2636
2638 if (tcp_is_reno(tp)) 2637 if (tcp_is_reno(tp))
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 11f69bbf9307..b6a2aa1dcf56 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
149 * is freed by GSO engine 149 * is freed by GSO engine
150 */ 150 */
151 if (copy_destructor) { 151 if (copy_destructor) {
152 int delta;
153
152 swap(gso_skb->sk, skb->sk); 154 swap(gso_skb->sk, skb->sk);
153 swap(gso_skb->destructor, skb->destructor); 155 swap(gso_skb->destructor, skb->destructor);
154 sum_truesize += skb->truesize; 156 sum_truesize += skb->truesize;
155 refcount_add(sum_truesize - gso_skb->truesize, 157 delta = sum_truesize - gso_skb->truesize;
156 &skb->sk->sk_wmem_alloc); 158 /* In some pathological cases, delta can be negative.
159 * We need to either use refcount_add() or refcount_sub_and_test()
160 */
161 if (likely(delta >= 0))
162 refcount_add(delta, &skb->sk->sk_wmem_alloc);
163 else
164 WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
157 } 165 }
158 166
159 delta = htonl(oldlen + (skb_tail_pointer(skb) - 167 delta = htonl(oldlen + (skb_tail_pointer(skb) -
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 9722bf839d9d..b4e421aa9727 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
410 break; 410 break;
411 } 411 }
412 412
413 /* XXX when can this fail? */ 413 rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
414 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
415 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
416 recv->r_ibinc, sg_page(&recv->r_frag->f_sg), 414 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
417 (long) ib_sg_dma_address( 415 (long) ib_sg_dma_address(
418 ic->i_cm_id->device, 416 ic->i_cm_id->device,
419 &recv->r_frag->f_sg), 417 &recv->r_frag->f_sg));
420 ret); 418
419 /* XXX when can this fail? */
420 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
421 if (ret) { 421 if (ret) {
422 rds_ib_conn_error(conn, "recv post on " 422 rds_ib_conn_error(conn, "recv post on "
423 "%pI4 returned %d, disconnecting and " 423 "%pI4 returned %d, disconnecting and "
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index bb831d49bcfd..e63af4e19382 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
259 259
260__objtool_obj := $(objtree)/tools/objtool/objtool 260__objtool_obj := $(objtree)/tools/objtool/objtool
261 261
262objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check) 262objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
263 263
264ifndef CONFIG_FRAME_POINTER 264ifndef CONFIG_FRAME_POINTER
265objtool_args += --no-fp 265objtool_args += --no-fp
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 6598fb76d2c2..9816590d3ad2 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -829,6 +829,7 @@ struct drm_i915_gem_exec_fence {
829 829
830#define I915_EXEC_FENCE_WAIT (1<<0) 830#define I915_EXEC_FENCE_WAIT (1<<0)
831#define I915_EXEC_FENCE_SIGNAL (1<<1) 831#define I915_EXEC_FENCE_SIGNAL (1<<1)
832#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
832 __u32 flags; 833 __u32 flags;
833}; 834};
834 835
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c0e26ad1fa7e..9b341584eb1b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
1757 if (insn->dead_end) 1757 if (insn->dead_end)
1758 return 0; 1758 return 0;
1759 1759
1760 insn = next_insn; 1760 if (!next_insn) {
1761 if (!insn) { 1761 if (state.cfa.base == CFI_UNDEFINED)
1762 return 0;
1762 WARN("%s: unexpected end of section", sec->name); 1763 WARN("%s: unexpected end of section", sec->name);
1763 return 1; 1764 return 1;
1764 } 1765 }
1766
1767 insn = next_insn;
1765 } 1768 }
1766 1769
1767 return 0; 1770 return 0;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 31e0f9143840..07f329919828 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -70,7 +70,7 @@ static void cmd_usage(void)
70 70
71 printf("\n"); 71 printf("\n");
72 72
73 exit(1); 73 exit(129);
74} 74}
75 75
76static void handle_options(int *argc, const char ***argv) 76static void handle_options(int *argc, const char ***argv)
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
86 break; 86 break;
87 } else { 87 } else {
88 fprintf(stderr, "Unknown option: %s\n", cmd); 88 fprintf(stderr, "Unknown option: %s\n", cmd);
89 fprintf(stderr, "\n Usage: %s\n", 89 cmd_usage();
90 objtool_usage_string);
91 exit(1);
92 } 90 }
93 91
94 (*argv)++; 92 (*argv)++;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 771ddab94bb0..d5d7fff1c211 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1138,6 +1138,14 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1138 return err; 1138 return err;
1139} 1139}
1140 1140
1141static void trace__symbols__exit(struct trace *trace)
1142{
1143 machine__exit(trace->host);
1144 trace->host = NULL;
1145
1146 symbol__exit();
1147}
1148
1141static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) 1149static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1142{ 1150{
1143 int idx; 1151 int idx;
@@ -2481,6 +2489,8 @@ out_disable:
2481 } 2489 }
2482 2490
2483out_delete_evlist: 2491out_delete_evlist:
2492 trace__symbols__exit(trace);
2493
2484 perf_evlist__delete(evlist); 2494 perf_evlist__delete(evlist);
2485 trace->evlist = NULL; 2495 trace->evlist = NULL;
2486 trace->live = false; 2496 trace->live = false;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 6680e4fb7967..025729510525 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -5,6 +5,7 @@
5%option stack 5%option stack
6%option bison-locations 6%option bison-locations
7%option yylineno 7%option yylineno
8%option reject
8 9
9%{ 10%{
10#include <errno.h> 11#include <errno.h>
@@ -339,8 +340,8 @@ r{num_raw_hex} { return raw(yyscanner); }
339{num_hex} { return value(yyscanner, 16); } 340{num_hex} { return value(yyscanner, 16); }
340 341
341{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } 342{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
342{bpf_object} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_OBJECT); } 343{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
343{bpf_source} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_SOURCE); } 344{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
344{name} { return pmu_str_check(yyscanner); } 345{name} { return pmu_str_check(yyscanner); }
345"/" { BEGIN(config); return '/'; } 346"/" { BEGIN(config); return '/'; }
346- { return '-'; } 347- { return '-'; }
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 961e3ee26c27..66e5ce5b91f0 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -115,7 +115,15 @@ static void check_valid_segment(uint16_t index, int ldt,
115 return; 115 return;
116 } 116 }
117 117
118 if (ar != expected_ar) { 118 /* The SDM says "bits 19:16 are undefined". Thanks. */
119 ar &= ~0xF0000;
120
121 /*
122 * NB: Different Linux versions do different things with the
123 * accessed bit in set_thread_area().
124 */
125 if (ar != expected_ar &&
126 (ldt || ar != (expected_ar | AR_ACCESSED))) {
119 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", 127 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
120 (ldt ? "LDT" : "GDT"), index, ar, expected_ar); 128 (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
121 nerrs++; 129 nerrs++;
@@ -129,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt,
129 } 137 }
130} 138}
131 139
132static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, 140static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
133 bool oldmode) 141 bool oldmode, bool ldt)
134{ 142{
135 int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, 143 struct user_desc desc = *d;
136 desc, sizeof(*desc)); 144 int ret;
137 if (ret < -1) 145
138 errno = -ret; 146 if (!ldt) {
147#ifndef __i386__
148 /* No point testing set_thread_area in a 64-bit build */
149 return false;
150#endif
151 if (!gdt_entry_num)
152 return false;
153 desc.entry_number = gdt_entry_num;
154
155 ret = syscall(SYS_set_thread_area, &desc);
156 } else {
157 ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
158 &desc, sizeof(desc));
159
160 if (ret < -1)
161 errno = -ret;
162
163 if (ret != 0 && errno == ENOSYS) {
164 printf("[OK]\tmodify_ldt returned -ENOSYS\n");
165 return false;
166 }
167 }
168
139 if (ret == 0) { 169 if (ret == 0) {
140 uint32_t limit = desc->limit; 170 uint32_t limit = desc.limit;
141 if (desc->limit_in_pages) 171 if (desc.limit_in_pages)
142 limit = (limit << 12) + 4095; 172 limit = (limit << 12) + 4095;
143 check_valid_segment(desc->entry_number, 1, ar, limit, true); 173 check_valid_segment(desc.entry_number, ldt, ar, limit, true);
144 return true; 174 return true;
145 } else if (errno == ENOSYS) {
146 printf("[OK]\tmodify_ldt returned -ENOSYS\n");
147 return false;
148 } else { 175 } else {
149 if (desc->seg_32bit) { 176 if (desc.seg_32bit) {
150 printf("[FAIL]\tUnexpected modify_ldt failure %d\n", 177 printf("[FAIL]\tUnexpected %s failure %d\n",
178 ldt ? "modify_ldt" : "set_thread_area",
151 errno); 179 errno);
152 nerrs++; 180 nerrs++;
153 return false; 181 return false;
154 } else { 182 } else {
155 printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); 183 printf("[OK]\t%s rejected 16 bit segment\n",
184 ldt ? "modify_ldt" : "set_thread_area");
156 return false; 185 return false;
157 } 186 }
158 } 187 }
@@ -160,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
160 189
161static bool install_valid(const struct user_desc *desc, uint32_t ar) 190static bool install_valid(const struct user_desc *desc, uint32_t ar)
162{ 191{
163 return install_valid_mode(desc, ar, false); 192 bool ret = install_valid_mode(desc, ar, false, true);
193
194 if (desc->contents <= 1 && desc->seg_32bit &&
195 !desc->seg_not_present) {
196 /* Should work in the GDT, too. */
197 install_valid_mode(desc, ar, false, false);
198 }
199
200 return ret;
164} 201}
165 202
166static void install_invalid(const struct user_desc *desc, bool oldmode) 203static void install_invalid(const struct user_desc *desc, bool oldmode)
@@ -367,9 +404,24 @@ static void do_simple_tests(void)
367 install_invalid(&desc, false); 404 install_invalid(&desc, false);
368 405
369 desc.seg_not_present = 0; 406 desc.seg_not_present = 0;
370 desc.read_exec_only = 0;
371 desc.seg_32bit = 1; 407 desc.seg_32bit = 1;
408 desc.read_exec_only = 0;
409 desc.limit = 0xfffff;
410
372 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); 411 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
412
413 desc.limit_in_pages = 1;
414
415 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
416 desc.read_exec_only = 1;
417 install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
418 desc.contents = 1;
419 desc.read_exec_only = 0;
420 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
421 desc.read_exec_only = 1;
422 install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
423
424 desc.limit = 0;
373 install_invalid(&desc, true); 425 install_invalid(&desc, true);
374} 426}
375 427
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 555e43ca846b..7a1cc0e56d2d 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -189,17 +189,29 @@ void lots_o_noops_around_write(int *write_to_me)
189#define u64 uint64_t 189#define u64 uint64_t
190 190
191#ifdef __i386__ 191#ifdef __i386__
192#define SYS_mprotect_key 380 192
193#define SYS_pkey_alloc 381 193#ifndef SYS_mprotect_key
194#define SYS_pkey_free 382 194# define SYS_mprotect_key 380
195#endif
196#ifndef SYS_pkey_alloc
197# define SYS_pkey_alloc 381
198# define SYS_pkey_free 382
199#endif
195#define REG_IP_IDX REG_EIP 200#define REG_IP_IDX REG_EIP
196#define si_pkey_offset 0x14 201#define si_pkey_offset 0x14
202
197#else 203#else
198#define SYS_mprotect_key 329 204
199#define SYS_pkey_alloc 330 205#ifndef SYS_mprotect_key
200#define SYS_pkey_free 331 206# define SYS_mprotect_key 329
207#endif
208#ifndef SYS_pkey_alloc
209# define SYS_pkey_alloc 330
210# define SYS_pkey_free 331
211#endif
201#define REG_IP_IDX REG_RIP 212#define REG_IP_IDX REG_RIP
202#define si_pkey_offset 0x20 213#define si_pkey_offset 0x20
214
203#endif 215#endif
204 216
205void dump_mem(void *dumpme, int len_bytes) 217void dump_mem(void *dumpme, int len_bytes)