aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-05-21 14:23:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-05-21 14:23:26 -0400
commit3b78ce4a34b761c7fe13520de822984019ff1a8f (patch)
tree63b93664a184c2d561a70c7f8d16a388750739f7
parent6741c4bb389da103c0d79ad1961884628900bfe6 (diff)
parentaf86ca4e3088fe5eacf2f7e58c01fa68ca067672 (diff)
Merge branch 'speck-v20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Merge speculative store buffer bypass fixes from Thomas Gleixner: - rework of the SPEC_CTRL MSR management to accomodate the new fancy SSBD (Speculative Store Bypass Disable) bit handling. - the CPU bug and sysfs infrastructure for the exciting new Speculative Store Bypass 'feature'. - support for disabling SSB via LS_CFG MSR on AMD CPUs including Hyperthread synchronization on ZEN. - PRCTL support for dynamic runtime control of SSB - SECCOMP integration to automatically disable SSB for sandboxed processes with a filter flag for opt-out. - KVM integration to allow guests fiddling with SSBD including the new software MSR VIRT_SPEC_CTRL to handle the LS_CFG based oddities on AMD. - BPF protection against SSB .. this is just the core and x86 side, other architecture support will come separately. * 'speck-v20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (49 commits) bpf: Prevent memory disambiguation attack x86/bugs: Rename SSBD_NO to SSB_NO KVM: SVM: Implement VIRT_SPEC_CTRL support for SSBD x86/speculation, KVM: Implement support for VIRT_SPEC_CTRL/LS_CFG x86/bugs: Rework spec_ctrl base and mask logic x86/bugs: Remove x86_spec_ctrl_set() x86/bugs: Expose x86_spec_ctrl_base directly x86/bugs: Unify x86_spec_ctrl_{set_guest,restore_host} x86/speculation: Rework speculative_store_bypass_update() x86/speculation: Add virtualized speculative store bypass disable support x86/bugs, KVM: Extend speculation control for VIRT_SPEC_CTRL x86/speculation: Handle HT correctly on AMD x86/cpufeatures: Add FEATURE_ZEN x86/cpufeatures: Disentangle SSBD enumeration x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration from IBRS x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP KVM: SVM: Move spec control call after restore of GS x86/cpu: Make alternative_msr_write work for 32-bit code x86/bugs: Fix the parameters alignment and missing void x86/bugs: Make cpu_show_common() static ...
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt45
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst94
-rw-r--r--arch/x86/include/asm/cpufeatures.h20
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h43
-rw-r--r--arch/x86/include/asm/spec-ctrl.h80
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/kernel/cpu/amd.c22
-rw-r--r--arch/x86/kernel/cpu/bugs.c397
-rw-r--r--arch/x86/kernel/cpu/common.c77
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/process.c146
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kvm/cpuid.c21
-rw-r--r--arch/x86/kvm/svm.c66
-rw-r--r--arch/x86/kvm/vmx.c31
-rw-r--r--arch/x86/kvm/x86.c13
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--fs/proc/array.c25
-rw-r--r--include/linux/bpf_verifier.h1
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/nospec.h10
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/seccomp.h5
-rw-r--r--include/uapi/linux/prctl.h12
-rw-r--r--include/uapi/linux/seccomp.h5
-rw-r--r--kernel/bpf/verifier.c59
-rw-r--r--kernel/seccomp.c21
-rw-r--r--kernel/sys.c23
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c22
34 files changed, 1166 insertions, 119 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 025b7cf3768d..bd4975e132d3 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabilities
478 /sys/devices/system/cpu/vulnerabilities/meltdown 478 /sys/devices/system/cpu/vulnerabilities/meltdown
479 /sys/devices/system/cpu/vulnerabilities/spectre_v1 479 /sys/devices/system/cpu/vulnerabilities/spectre_v1
480 /sys/devices/system/cpu/vulnerabilities/spectre_v2 480 /sys/devices/system/cpu/vulnerabilities/spectre_v2
481 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
481Date: January 2018 482Date: January 2018
482Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> 483Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
483Description: Information about CPU vulnerabilities 484Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 11fc28ecdb6d..f2040d46f095 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2680,6 +2680,9 @@
2680 allow data leaks with this option, which is equivalent 2680 allow data leaks with this option, which is equivalent
2681 to spectre_v2=off. 2681 to spectre_v2=off.
2682 2682
2683 nospec_store_bypass_disable
2684 [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
2685
2683 noxsave [BUGS=X86] Disables x86 extended register state save 2686 noxsave [BUGS=X86] Disables x86 extended register state save
2684 and restore using xsave. The kernel will fallback to 2687 and restore using xsave. The kernel will fallback to
2685 enabling legacy floating-point and sse state. 2688 enabling legacy floating-point and sse state.
@@ -4025,6 +4028,48 @@
4025 Not specifying this option is equivalent to 4028 Not specifying this option is equivalent to
4026 spectre_v2=auto. 4029 spectre_v2=auto.
4027 4030
4031 spec_store_bypass_disable=
4032 [HW] Control Speculative Store Bypass (SSB) Disable mitigation
4033 (Speculative Store Bypass vulnerability)
4034
4035 Certain CPUs are vulnerable to an exploit against a
4036 a common industry wide performance optimization known
4037 as "Speculative Store Bypass" in which recent stores
4038 to the same memory location may not be observed by
4039 later loads during speculative execution. The idea
4040 is that such stores are unlikely and that they can
4041 be detected prior to instruction retirement at the
4042 end of a particular speculation execution window.
4043
4044 In vulnerable processors, the speculatively forwarded
4045 store can be used in a cache side channel attack, for
4046 example to read memory to which the attacker does not
4047 directly have access (e.g. inside sandboxed code).
4048
4049 This parameter controls whether the Speculative Store
4050 Bypass optimization is used.
4051
4052 on - Unconditionally disable Speculative Store Bypass
4053 off - Unconditionally enable Speculative Store Bypass
4054 auto - Kernel detects whether the CPU model contains an
4055 implementation of Speculative Store Bypass and
4056 picks the most appropriate mitigation. If the
4057 CPU is not vulnerable, "off" is selected. If the
4058 CPU is vulnerable the default mitigation is
4059 architecture and Kconfig dependent. See below.
4060 prctl - Control Speculative Store Bypass per thread
4061 via prctl. Speculative Store Bypass is enabled
4062 for a process by default. The state of the control
4063 is inherited on fork.
4064 seccomp - Same as "prctl" above, but all seccomp threads
4065 will disable SSB unless they explicitly opt out.
4066
4067 Not specifying this option is equivalent to
4068 spec_store_bypass_disable=auto.
4069
4070 Default mitigations:
4071 X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
4072
4028 spia_io_base= [HW,MTD] 4073 spia_io_base= [HW,MTD]
4029 spia_fio_base= 4074 spia_fio_base=
4030 spia_pedr= 4075 spia_pedr=
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 7b2eb1b7d4ca..a3233da7fa88 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -19,6 +19,7 @@ place where this information is gathered.
19 no_new_privs 19 no_new_privs
20 seccomp_filter 20 seccomp_filter
21 unshare 21 unshare
22 spec_ctrl
22 23
23.. only:: subproject and html 24.. only:: subproject and html
24 25
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
new file mode 100644
index 000000000000..32f3d55c54b7
--- /dev/null
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -0,0 +1,94 @@
1===================
2Speculation Control
3===================
4
5Quite some CPUs have speculation-related misfeatures which are in
6fact vulnerabilities causing data leaks in various forms even across
7privilege domains.
8
9The kernel provides mitigation for such vulnerabilities in various
10forms. Some of these mitigations are compile-time configurable and some
11can be supplied on the kernel command line.
12
13There is also a class of mitigations which are very expensive, but they can
14be restricted to a certain set of processes or tasks in controlled
15environments. The mechanism to control these mitigations is via
16:manpage:`prctl(2)`.
17
18There are two prctl options which are related to this:
19
20 * PR_GET_SPECULATION_CTRL
21
22 * PR_SET_SPECULATION_CTRL
23
24PR_GET_SPECULATION_CTRL
25-----------------------
26
27PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
28which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
29the following meaning:
30
31==== ===================== ===================================================
32Bit Define Description
33==== ===================== ===================================================
340 PR_SPEC_PRCTL Mitigation can be controlled per task by
35 PR_SET_SPECULATION_CTRL.
361 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
37 disabled.
382 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
39 enabled.
403 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
41 subsequent prctl(..., PR_SPEC_ENABLE) will fail.
42==== ===================== ===================================================
43
44If all bits are 0 the CPU is not affected by the speculation misfeature.
45
46If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
47available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
48misfeature will fail.
49
50PR_SET_SPECULATION_CTRL
51-----------------------
52
53PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
54is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
55in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
56PR_SPEC_FORCE_DISABLE.
57
58Common error codes
59------------------
60======= =================================================================
61Value Meaning
62======= =================================================================
63EINVAL The prctl is not implemented by the architecture or unused
64 prctl(2) arguments are not 0.
65
66ENODEV arg2 is selecting a not supported speculation misfeature.
67======= =================================================================
68
69PR_SET_SPECULATION_CTRL error codes
70-----------------------------------
71======= =================================================================
72Value Meaning
73======= =================================================================
740 Success
75
76ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
77 PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE.
78
79ENXIO Control of the selected speculation misfeature is not possible.
80 See PR_GET_SPECULATION_CTRL.
81
82EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
83 tried to enable it again.
84======= =================================================================
85
86Speculation misfeature controls
87-------------------------------
88- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
89
90 Invocations:
91 * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
92 * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
93 * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
94 * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 578793e97431..fb00a2fca990 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
200#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ 200#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
201
202#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 201#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
203#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 202#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
204#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 203#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
207#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ 206#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
208#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 207#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
209#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ 208#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
210 209#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
210#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
211#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ 211#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
212#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ 212#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ 213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
214
215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ 214#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
216#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ 215#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
216#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
217#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
218#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
219#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
220#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
221#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
217 222
218/* Virtualization flags: Linux defined, word 8 */ 223/* Virtualization flags: Linux defined, word 8 */
219#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 224#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
274#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ 279#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
275#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ 280#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
276#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ 281#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
277#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ 282#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
278#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ 283#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
279#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ 284#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
285#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
280 286
281/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ 287/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
282#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ 288#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -334,6 +340,7 @@
334#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ 340#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
335#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ 341#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
336#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ 342#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
343#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
337 344
338/* 345/*
339 * BUG word(s) 346 * BUG word(s)
@@ -363,5 +370,6 @@
363#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ 370#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
364#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ 371#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
365#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ 372#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
373#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
366 374
367#endif /* _ASM_X86_CPUFEATURES_H */ 375#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c25775fad4ed..f4b2588865e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -924,7 +924,7 @@ struct kvm_x86_ops {
924 int (*hardware_setup)(void); /* __init */ 924 int (*hardware_setup)(void); /* __init */
925 void (*hardware_unsetup)(void); /* __exit */ 925 void (*hardware_unsetup)(void); /* __exit */
926 bool (*cpu_has_accelerated_tpr)(void); 926 bool (*cpu_has_accelerated_tpr)(void);
927 bool (*cpu_has_high_real_mode_segbase)(void); 927 bool (*has_emulated_msr)(int index);
928 void (*cpuid_update)(struct kvm_vcpu *vcpu); 928 void (*cpuid_update)(struct kvm_vcpu *vcpu);
929 929
930 struct kvm *(*vm_alloc)(void); 930 struct kvm *(*vm_alloc)(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 53d5b1b9255e..fda2114197b3 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -42,6 +42,8 @@
42#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ 42#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
43#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ 43#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
44#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ 44#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
45#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
46#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
45 47
46#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ 48#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
47#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ 49#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
@@ -68,6 +70,11 @@
68#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a 70#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
69#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ 71#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
70#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ 72#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
73#define ARCH_CAP_SSB_NO (1 << 4) /*
74 * Not susceptible to Speculative Store Bypass
75 * attack, so no Speculative Store Bypass
76 * control required.
77 */
71 78
72#define MSR_IA32_BBL_CR_CTL 0x00000119 79#define MSR_IA32_BBL_CR_CTL 0x00000119
73#define MSR_IA32_BBL_CR_CTL3 0x0000011e 80#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -340,6 +347,8 @@
340#define MSR_AMD64_SEV_ENABLED_BIT 0 347#define MSR_AMD64_SEV_ENABLED_BIT 0
341#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) 348#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
342 349
350#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
351
343/* Fam 17h MSRs */ 352/* Fam 17h MSRs */
344#define MSR_F17H_IRPERF 0xc00000e9 353#define MSR_F17H_IRPERF 0xc00000e9
345 354
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f928ad9b143f..8b38df98548e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -217,6 +217,14 @@ enum spectre_v2_mitigation {
217 SPECTRE_V2_IBRS, 217 SPECTRE_V2_IBRS,
218}; 218};
219 219
220/* The Speculative Store Bypass disable variants */
221enum ssb_mitigation {
222 SPEC_STORE_BYPASS_NONE,
223 SPEC_STORE_BYPASS_DISABLE,
224 SPEC_STORE_BYPASS_PRCTL,
225 SPEC_STORE_BYPASS_SECCOMP,
226};
227
220extern char __indirect_thunk_start[]; 228extern char __indirect_thunk_start[];
221extern char __indirect_thunk_end[]; 229extern char __indirect_thunk_end[];
222 230
@@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void)
241#endif 249#endif
242} 250}
243 251
244#define alternative_msr_write(_msr, _val, _feature) \ 252static __always_inline
245 asm volatile(ALTERNATIVE("", \ 253void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
246 "movl %[msr], %%ecx\n\t" \ 254{
247 "movl %[val], %%eax\n\t" \ 255 asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
248 "movl $0, %%edx\n\t" \ 256 : : "c" (msr),
249 "wrmsr", \ 257 "a" ((u32)val),
250 _feature) \ 258 "d" ((u32)(val >> 32)),
251 : : [msr] "i" (_msr), [val] "i" (_val) \ 259 [feature] "i" (feature)
252 : "eax", "ecx", "edx", "memory") 260 : "memory");
261}
253 262
254static inline void indirect_branch_prediction_barrier(void) 263static inline void indirect_branch_prediction_barrier(void)
255{ 264{
256 alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 265 u64 val = PRED_CMD_IBPB;
257 X86_FEATURE_USE_IBPB); 266
267 alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
258} 268}
259 269
270/* The Intel SPEC CTRL MSR base value cache */
271extern u64 x86_spec_ctrl_base;
272
260/* 273/*
261 * With retpoline, we must use IBRS to restrict branch prediction 274 * With retpoline, we must use IBRS to restrict branch prediction
262 * before calling into firmware. 275 * before calling into firmware.
@@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void)
265 */ 278 */
266#define firmware_restrict_branch_speculation_start() \ 279#define firmware_restrict_branch_speculation_start() \
267do { \ 280do { \
281 u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
282 \
268 preempt_disable(); \ 283 preempt_disable(); \
269 alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ 284 alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
270 X86_FEATURE_USE_IBRS_FW); \ 285 X86_FEATURE_USE_IBRS_FW); \
271} while (0) 286} while (0)
272 287
273#define firmware_restrict_branch_speculation_end() \ 288#define firmware_restrict_branch_speculation_end() \
274do { \ 289do { \
275 alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ 290 u64 val = x86_spec_ctrl_base; \
291 \
292 alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
276 X86_FEATURE_USE_IBRS_FW); \ 293 X86_FEATURE_USE_IBRS_FW); \
277 preempt_enable(); \ 294 preempt_enable(); \
278} while (0) 295} while (0)
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
new file mode 100644
index 000000000000..ae7c2c5cd7f0
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,80 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_X86_SPECCTRL_H_
3#define _ASM_X86_SPECCTRL_H_
4
5#include <linux/thread_info.h>
6#include <asm/nospec-branch.h>
7
8/*
9 * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
10 * the guest has, while on VMEXIT we restore the host view. This
11 * would be easier if SPEC_CTRL were architecturally maskable or
12 * shadowable for guests but this is not (currently) the case.
13 * Takes the guest view of SPEC_CTRL MSR as a parameter and also
14 * the guest's version of VIRT_SPEC_CTRL, if emulated.
15 */
16extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
17
18/**
19 * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
20 * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
21 * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
22 * (may get translated to MSR_AMD64_LS_CFG bits)
23 *
24 * Avoids writing to the MSR if the content/bits are the same
25 */
26static inline
27void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
28{
29 x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
30}
31
32/**
33 * x86_spec_ctrl_restore_host - Restore host speculation control registers
34 * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
35 * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
36 * (may get translated to MSR_AMD64_LS_CFG bits)
37 *
38 * Avoids writing to the MSR if the content/bits are the same
39 */
40static inline
41void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
42{
43 x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
44}
45
46/* AMD specific Speculative Store Bypass MSR data */
47extern u64 x86_amd_ls_cfg_base;
48extern u64 x86_amd_ls_cfg_ssbd_mask;
49
50static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
51{
52 BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
53 return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
54}
55
56static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
57{
58 BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
59 return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
60}
61
62static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
63{
64 return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
65}
66
67#ifdef CONFIG_SMP
68extern void speculative_store_bypass_ht_init(void);
69#else
70static inline void speculative_store_bypass_ht_init(void) { }
71#endif
72
73extern void speculative_store_bypass_update(unsigned long tif);
74
75static inline void speculative_store_bypass_update_current(void)
76{
77 speculative_store_bypass_update(current_thread_info()->flags);
78}
79
80#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a5d9521bb2cb..2ff2a30a264f 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,6 +79,7 @@ struct thread_info {
79#define TIF_SIGPENDING 2 /* signal pending */ 79#define TIF_SIGPENDING 2 /* signal pending */
80#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ 80#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
81#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ 81#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
82#define TIF_SSBD 5 /* Reduced data speculation */
82#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ 83#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
83#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 84#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
84#define TIF_SECCOMP 8 /* secure computing */ 85#define TIF_SECCOMP 8 /* secure computing */
@@ -105,6 +106,7 @@ struct thread_info {
105#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 106#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
106#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 107#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
107#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) 108#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
109#define _TIF_SSBD (1 << TIF_SSBD)
108#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) 110#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
109#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 111#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
110#define _TIF_SECCOMP (1 << TIF_SECCOMP) 112#define _TIF_SECCOMP (1 << TIF_SECCOMP)
@@ -144,7 +146,7 @@ struct thread_info {
144 146
145/* flags to check in __switch_to() */ 147/* flags to check in __switch_to() */
146#define _TIF_WORK_CTXSW \ 148#define _TIF_WORK_CTXSW \
147 (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) 149 (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
148 150
149#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) 151#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
150#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) 152#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 12bc0a1139da..1b18be3f35a8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -10,6 +10,7 @@
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/apic.h> 11#include <asm/apic.h>
12#include <asm/cpu.h> 12#include <asm/cpu.h>
13#include <asm/spec-ctrl.h>
13#include <asm/smp.h> 14#include <asm/smp.h>
14#include <asm/pci-direct.h> 15#include <asm/pci-direct.h>
15#include <asm/delay.h> 16#include <asm/delay.h>
@@ -554,6 +555,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
554 rdmsrl(MSR_FAM10H_NODE_ID, value); 555 rdmsrl(MSR_FAM10H_NODE_ID, value);
555 nodes_per_socket = ((value >> 3) & 7) + 1; 556 nodes_per_socket = ((value >> 3) & 7) + 1;
556 } 557 }
558
559 if (c->x86 >= 0x15 && c->x86 <= 0x17) {
560 unsigned int bit;
561
562 switch (c->x86) {
563 case 0x15: bit = 54; break;
564 case 0x16: bit = 33; break;
565 case 0x17: bit = 10; break;
566 default: return;
567 }
568 /*
569 * Try to cache the base value so further operations can
570 * avoid RMW. If that faults, do not enable SSBD.
571 */
572 if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
573 setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
574 setup_force_cpu_cap(X86_FEATURE_SSBD);
575 x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
576 }
577 }
557} 578}
558 579
559static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) 580static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -791,6 +812,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
791 812
792static void init_amd_zn(struct cpuinfo_x86 *c) 813static void init_amd_zn(struct cpuinfo_x86 *c)
793{ 814{
815 set_cpu_cap(c, X86_FEATURE_ZEN);
794 /* 816 /*
795 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects 817 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
796 * all up to and including B1. 818 * all up to and including B1.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bfca937bdcc3..7416fc206b4a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,8 +12,10 @@
12#include <linux/utsname.h> 12#include <linux/utsname.h>
13#include <linux/cpu.h> 13#include <linux/cpu.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/nospec.h>
16#include <linux/prctl.h>
15 17
16#include <asm/nospec-branch.h> 18#include <asm/spec-ctrl.h>
17#include <asm/cmdline.h> 19#include <asm/cmdline.h>
18#include <asm/bugs.h> 20#include <asm/bugs.h>
19#include <asm/processor.h> 21#include <asm/processor.h>
@@ -27,6 +29,27 @@
27#include <asm/intel-family.h> 29#include <asm/intel-family.h>
28 30
29static void __init spectre_v2_select_mitigation(void); 31static void __init spectre_v2_select_mitigation(void);
32static void __init ssb_select_mitigation(void);
33
34/*
35 * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
36 * writes to SPEC_CTRL contain whatever reserved bits have been set.
37 */
38u64 __ro_after_init x86_spec_ctrl_base;
39EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
40
41/*
42 * The vendor and possibly platform specific bits which can be modified in
43 * x86_spec_ctrl_base.
44 */
45static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
46
47/*
48 * AMD specific MSR info for Speculative Store Bypass control.
49 * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
50 */
51u64 __ro_after_init x86_amd_ls_cfg_base;
52u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
30 53
31void __init check_bugs(void) 54void __init check_bugs(void)
32{ 55{
@@ -37,9 +60,27 @@ void __init check_bugs(void)
37 print_cpu_info(&boot_cpu_data); 60 print_cpu_info(&boot_cpu_data);
38 } 61 }
39 62
63 /*
64 * Read the SPEC_CTRL MSR to account for reserved bits which may
65 * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
66 * init code as it is not enumerated and depends on the family.
67 */
68 if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
69 rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
70
71 /* Allow STIBP in MSR_SPEC_CTRL if supported */
72 if (boot_cpu_has(X86_FEATURE_STIBP))
73 x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
74
40 /* Select the proper spectre mitigation before patching alternatives */ 75 /* Select the proper spectre mitigation before patching alternatives */
41 spectre_v2_select_mitigation(); 76 spectre_v2_select_mitigation();
42 77
78 /*
79 * Select proper mitigation for any exposure to the Speculative Store
80 * Bypass vulnerability.
81 */
82 ssb_select_mitigation();
83
43#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
44 /* 85 /*
45 * Check whether we are able to run this kernel safely on SMP. 86 * Check whether we are able to run this kernel safely on SMP.
@@ -93,7 +134,76 @@ static const char *spectre_v2_strings[] = {
93#undef pr_fmt 134#undef pr_fmt
94#define pr_fmt(fmt) "Spectre V2 : " fmt 135#define pr_fmt(fmt) "Spectre V2 : " fmt
95 136
96static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; 137static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
138 SPECTRE_V2_NONE;
139
140void
141x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
142{
143 u64 msrval, guestval, hostval = x86_spec_ctrl_base;
144 struct thread_info *ti = current_thread_info();
145
146 /* Is MSR_SPEC_CTRL implemented ? */
147 if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
148 /*
149 * Restrict guest_spec_ctrl to supported values. Clear the
150 * modifiable bits in the host base value and or the
151 * modifiable bits from the guest value.
152 */
153 guestval = hostval & ~x86_spec_ctrl_mask;
154 guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
155
156 /* SSBD controlled in MSR_SPEC_CTRL */
157 if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
158 hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
159
160 if (hostval != guestval) {
161 msrval = setguest ? guestval : hostval;
162 wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
163 }
164 }
165
166 /*
167 * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
168 * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
169 */
170 if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
171 !static_cpu_has(X86_FEATURE_VIRT_SSBD))
172 return;
173
174 /*
175 * If the host has SSBD mitigation enabled, force it in the host's
176 * virtual MSR value. If its not permanently enabled, evaluate
177 * current's TIF_SSBD thread flag.
178 */
179 if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
180 hostval = SPEC_CTRL_SSBD;
181 else
182 hostval = ssbd_tif_to_spec_ctrl(ti->flags);
183
184 /* Sanitize the guest value */
185 guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
186
187 if (hostval != guestval) {
188 unsigned long tif;
189
190 tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
191 ssbd_spec_ctrl_to_tif(hostval);
192
193 speculative_store_bypass_update(tif);
194 }
195}
196EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
197
198static void x86_amd_ssb_disable(void)
199{
200 u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
201
202 if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
203 wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
204 else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
205 wrmsrl(MSR_AMD64_LS_CFG, msrval);
206}
97 207
98#ifdef RETPOLINE 208#ifdef RETPOLINE
99static bool spectre_v2_bad_module; 209static bool spectre_v2_bad_module;
@@ -312,32 +422,289 @@ retpoline_auto:
312} 422}
313 423
314#undef pr_fmt 424#undef pr_fmt
425#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
426
427static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
428
429/* The kernel command line selection */
430enum ssb_mitigation_cmd {
431 SPEC_STORE_BYPASS_CMD_NONE,
432 SPEC_STORE_BYPASS_CMD_AUTO,
433 SPEC_STORE_BYPASS_CMD_ON,
434 SPEC_STORE_BYPASS_CMD_PRCTL,
435 SPEC_STORE_BYPASS_CMD_SECCOMP,
436};
437
438static const char *ssb_strings[] = {
439 [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
440 [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
441 [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
442 [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
443};
444
445static const struct {
446 const char *option;
447 enum ssb_mitigation_cmd cmd;
448} ssb_mitigation_options[] = {
449 { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
450 { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
451 { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
452 { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
453 { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
454};
455
456static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
457{
458 enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
459 char arg[20];
460 int ret, i;
461
462 if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
463 return SPEC_STORE_BYPASS_CMD_NONE;
464 } else {
465 ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
466 arg, sizeof(arg));
467 if (ret < 0)
468 return SPEC_STORE_BYPASS_CMD_AUTO;
469
470 for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
471 if (!match_option(arg, ret, ssb_mitigation_options[i].option))
472 continue;
473
474 cmd = ssb_mitigation_options[i].cmd;
475 break;
476 }
477
478 if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
479 pr_err("unknown option (%s). Switching to AUTO select\n", arg);
480 return SPEC_STORE_BYPASS_CMD_AUTO;
481 }
482 }
483
484 return cmd;
485}
486
487static enum ssb_mitigation __init __ssb_select_mitigation(void)
488{
489 enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
490 enum ssb_mitigation_cmd cmd;
491
492 if (!boot_cpu_has(X86_FEATURE_SSBD))
493 return mode;
494
495 cmd = ssb_parse_cmdline();
496 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
497 (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
498 cmd == SPEC_STORE_BYPASS_CMD_AUTO))
499 return mode;
500
501 switch (cmd) {
502 case SPEC_STORE_BYPASS_CMD_AUTO:
503 case SPEC_STORE_BYPASS_CMD_SECCOMP:
504 /*
505 * Choose prctl+seccomp as the default mode if seccomp is
506 * enabled.
507 */
508 if (IS_ENABLED(CONFIG_SECCOMP))
509 mode = SPEC_STORE_BYPASS_SECCOMP;
510 else
511 mode = SPEC_STORE_BYPASS_PRCTL;
512 break;
513 case SPEC_STORE_BYPASS_CMD_ON:
514 mode = SPEC_STORE_BYPASS_DISABLE;
515 break;
516 case SPEC_STORE_BYPASS_CMD_PRCTL:
517 mode = SPEC_STORE_BYPASS_PRCTL;
518 break;
519 case SPEC_STORE_BYPASS_CMD_NONE:
520 break;
521 }
522
523 /*
524 * We have three CPU feature flags that are in play here:
525 * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
526 * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
527 * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
528 */
529 if (mode == SPEC_STORE_BYPASS_DISABLE) {
530 setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
531 /*
532 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
533 * a completely different MSR and bit dependent on family.
534 */
535 switch (boot_cpu_data.x86_vendor) {
536 case X86_VENDOR_INTEL:
537 x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
538 x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
539 wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
540 break;
541 case X86_VENDOR_AMD:
542 x86_amd_ssb_disable();
543 break;
544 }
545 }
546
547 return mode;
548}
549
550static void ssb_select_mitigation(void)
551{
552 ssb_mode = __ssb_select_mitigation();
553
554 if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
555 pr_info("%s\n", ssb_strings[ssb_mode]);
556}
557
558#undef pr_fmt
559#define pr_fmt(fmt) "Speculation prctl: " fmt
560
561static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
562{
563 bool update;
564
565 if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
566 ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
567 return -ENXIO;
568
569 switch (ctrl) {
570 case PR_SPEC_ENABLE:
571 /* If speculation is force disabled, enable is not allowed */
572 if (task_spec_ssb_force_disable(task))
573 return -EPERM;
574 task_clear_spec_ssb_disable(task);
575 update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
576 break;
577 case PR_SPEC_DISABLE:
578 task_set_spec_ssb_disable(task);
579 update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
580 break;
581 case PR_SPEC_FORCE_DISABLE:
582 task_set_spec_ssb_disable(task);
583 task_set_spec_ssb_force_disable(task);
584 update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
585 break;
586 default:
587 return -ERANGE;
588 }
589
590 /*
591 * If being set on non-current task, delay setting the CPU
592 * mitigation until it is next scheduled.
593 */
594 if (task == current && update)
595 speculative_store_bypass_update_current();
596
597 return 0;
598}
599
600int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
601 unsigned long ctrl)
602{
603 switch (which) {
604 case PR_SPEC_STORE_BYPASS:
605 return ssb_prctl_set(task, ctrl);
606 default:
607 return -ENODEV;
608 }
609}
610
611#ifdef CONFIG_SECCOMP
612void arch_seccomp_spec_mitigate(struct task_struct *task)
613{
614 if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
615 ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
616}
617#endif
618
619static int ssb_prctl_get(struct task_struct *task)
620{
621 switch (ssb_mode) {
622 case SPEC_STORE_BYPASS_DISABLE:
623 return PR_SPEC_DISABLE;
624 case SPEC_STORE_BYPASS_SECCOMP:
625 case SPEC_STORE_BYPASS_PRCTL:
626 if (task_spec_ssb_force_disable(task))
627 return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
628 if (task_spec_ssb_disable(task))
629 return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
630 return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
631 default:
632 if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
633 return PR_SPEC_ENABLE;
634 return PR_SPEC_NOT_AFFECTED;
635 }
636}
637
638int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
639{
640 switch (which) {
641 case PR_SPEC_STORE_BYPASS:
642 return ssb_prctl_get(task);
643 default:
644 return -ENODEV;
645 }
646}
647
648void x86_spec_ctrl_setup_ap(void)
649{
650 if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
651 wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
652
653 if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
654 x86_amd_ssb_disable();
655}
315 656
316#ifdef CONFIG_SYSFS 657#ifdef CONFIG_SYSFS
317ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) 658
659static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
660 char *buf, unsigned int bug)
318{ 661{
319 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) 662 if (!boot_cpu_has_bug(bug))
320 return sprintf(buf, "Not affected\n"); 663 return sprintf(buf, "Not affected\n");
321 if (boot_cpu_has(X86_FEATURE_PTI)) 664
322 return sprintf(buf, "Mitigation: PTI\n"); 665 switch (bug) {
666 case X86_BUG_CPU_MELTDOWN:
667 if (boot_cpu_has(X86_FEATURE_PTI))
668 return sprintf(buf, "Mitigation: PTI\n");
669
670 break;
671
672 case X86_BUG_SPECTRE_V1:
673 return sprintf(buf, "Mitigation: __user pointer sanitization\n");
674
675 case X86_BUG_SPECTRE_V2:
676 return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
677 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
678 boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
679 spectre_v2_module_string());
680
681 case X86_BUG_SPEC_STORE_BYPASS:
682 return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
683
684 default:
685 break;
686 }
687
323 return sprintf(buf, "Vulnerable\n"); 688 return sprintf(buf, "Vulnerable\n");
324} 689}
325 690
691ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
692{
693 return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
694}
695
326ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) 696ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
327{ 697{
328 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) 698 return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
329 return sprintf(buf, "Not affected\n");
330 return sprintf(buf, "Mitigation: __user pointer sanitization\n");
331} 699}
332 700
333ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) 701ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
334{ 702{
335 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 703 return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
336 return sprintf(buf, "Not affected\n"); 704}
337 705
338 return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], 706ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
339 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", 707{
340 boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", 708 return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
341 spectre_v2_module_string());
342} 709}
343#endif 710#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ce243f7d2d4e..78decc3e3067 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -757,17 +757,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
757 * and they also have a different bit for STIBP support. Also, 757 * and they also have a different bit for STIBP support. Also,
758 * a hypervisor might have set the individual AMD bits even on 758 * a hypervisor might have set the individual AMD bits even on
759 * Intel CPUs, for finer-grained selection of what's available. 759 * Intel CPUs, for finer-grained selection of what's available.
760 *
761 * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
762 * features, which are visible in /proc/cpuinfo and used by the
763 * kernel. So set those accordingly from the Intel bits.
764 */ 760 */
765 if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { 761 if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
766 set_cpu_cap(c, X86_FEATURE_IBRS); 762 set_cpu_cap(c, X86_FEATURE_IBRS);
767 set_cpu_cap(c, X86_FEATURE_IBPB); 763 set_cpu_cap(c, X86_FEATURE_IBPB);
764 set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
768 } 765 }
766
769 if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) 767 if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
770 set_cpu_cap(c, X86_FEATURE_STIBP); 768 set_cpu_cap(c, X86_FEATURE_STIBP);
769
770 if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
771 cpu_has(c, X86_FEATURE_VIRT_SSBD))
772 set_cpu_cap(c, X86_FEATURE_SSBD);
773
774 if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
775 set_cpu_cap(c, X86_FEATURE_IBRS);
776 set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
777 }
778
779 if (cpu_has(c, X86_FEATURE_AMD_IBPB))
780 set_cpu_cap(c, X86_FEATURE_IBPB);
781
782 if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
783 set_cpu_cap(c, X86_FEATURE_STIBP);
784 set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
785 }
771} 786}
772 787
773void get_cpu_cap(struct cpuinfo_x86 *c) 788void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -927,21 +942,55 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
927 {} 942 {}
928}; 943};
929 944
930static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) 945static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
946 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW },
947 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT },
948 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL },
949 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW },
950 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW },
951 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
952 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
953 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
954 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
955 { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
956 { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
957 { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
958 { X86_VENDOR_CENTAUR, 5, },
959 { X86_VENDOR_INTEL, 5, },
960 { X86_VENDOR_NSC, 5, },
961 { X86_VENDOR_AMD, 0x12, },
962 { X86_VENDOR_AMD, 0x11, },
963 { X86_VENDOR_AMD, 0x10, },
964 { X86_VENDOR_AMD, 0xf, },
965 { X86_VENDOR_ANY, 4, },
966 {}
967};
968
969static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
931{ 970{
932 u64 ia32_cap = 0; 971 u64 ia32_cap = 0;
933 972
934 if (x86_match_cpu(cpu_no_meltdown))
935 return false;
936
937 if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) 973 if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
938 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); 974 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
939 975
976 if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
977 !(ia32_cap & ARCH_CAP_SSB_NO))
978 setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
979
980 if (x86_match_cpu(cpu_no_speculation))
981 return;
982
983 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
984 setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
985
986 if (x86_match_cpu(cpu_no_meltdown))
987 return;
988
940 /* Rogue Data Cache Load? No! */ 989 /* Rogue Data Cache Load? No! */
941 if (ia32_cap & ARCH_CAP_RDCL_NO) 990 if (ia32_cap & ARCH_CAP_RDCL_NO)
942 return false; 991 return;
943 992
944 return true; 993 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
945} 994}
946 995
947/* 996/*
@@ -992,12 +1041,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
992 1041
993 setup_force_cpu_cap(X86_FEATURE_ALWAYS); 1042 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
994 1043
995 if (!x86_match_cpu(cpu_no_speculation)) { 1044 cpu_set_bug_bits(c);
996 if (cpu_vulnerable_to_meltdown(c))
997 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
998 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
999 setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1000 }
1001 1045
1002 fpu__init_system(c); 1046 fpu__init_system(c);
1003 1047
@@ -1359,6 +1403,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
1359#endif 1403#endif
1360 mtrr_ap_init(); 1404 mtrr_ap_init();
1361 validate_apic_and_package_id(c); 1405 validate_apic_and_package_id(c);
1406 x86_spec_ctrl_setup_ap();
1362} 1407}
1363 1408
1364static __init int setup_noclflush(char *arg) 1409static __init int setup_noclflush(char *arg)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index e806b11a99af..37672d299e35 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -50,4 +50,6 @@ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
50 50
51unsigned int aperfmperf_get_khz(int cpu); 51unsigned int aperfmperf_get_khz(int cpu);
52 52
53extern void x86_spec_ctrl_setup_ap(void);
54
53#endif /* ARCH_X86_CPU_H */ 55#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 60d1897041da..577e7f7ae273 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -188,7 +188,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
188 setup_clear_cpu_cap(X86_FEATURE_IBPB); 188 setup_clear_cpu_cap(X86_FEATURE_IBPB);
189 setup_clear_cpu_cap(X86_FEATURE_STIBP); 189 setup_clear_cpu_cap(X86_FEATURE_STIBP);
190 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); 190 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
191 setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
191 setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); 192 setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
193 setup_clear_cpu_cap(X86_FEATURE_SSBD);
194 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
192 } 195 }
193 196
194 /* 197 /*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 03408b942adb..30ca2d1a9231 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -38,6 +38,7 @@
38#include <asm/switch_to.h> 38#include <asm/switch_to.h>
39#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/prctl.h> 40#include <asm/prctl.h>
41#include <asm/spec-ctrl.h>
41 42
42/* 43/*
43 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 44 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -278,6 +279,148 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
278 } 279 }
279} 280}
280 281
282#ifdef CONFIG_SMP
283
284struct ssb_state {
285 struct ssb_state *shared_state;
286 raw_spinlock_t lock;
287 unsigned int disable_state;
288 unsigned long local_state;
289};
290
291#define LSTATE_SSB 0
292
293static DEFINE_PER_CPU(struct ssb_state, ssb_state);
294
295void speculative_store_bypass_ht_init(void)
296{
297 struct ssb_state *st = this_cpu_ptr(&ssb_state);
298 unsigned int this_cpu = smp_processor_id();
299 unsigned int cpu;
300
301 st->local_state = 0;
302
303 /*
304 * Shared state setup happens once on the first bringup
305 * of the CPU. It's not destroyed on CPU hotunplug.
306 */
307 if (st->shared_state)
308 return;
309
310 raw_spin_lock_init(&st->lock);
311
312 /*
313 * Go over HT siblings and check whether one of them has set up the
314 * shared state pointer already.
315 */
316 for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
317 if (cpu == this_cpu)
318 continue;
319
320 if (!per_cpu(ssb_state, cpu).shared_state)
321 continue;
322
323 /* Link it to the state of the sibling: */
324 st->shared_state = per_cpu(ssb_state, cpu).shared_state;
325 return;
326 }
327
328 /*
329 * First HT sibling to come up on the core. Link shared state of
330 * the first HT sibling to itself. The siblings on the same core
331 * which come up later will see the shared state pointer and link
332 * themself to the state of this CPU.
333 */
334 st->shared_state = st;
335}
336
337/*
338 * Logic is: First HT sibling enables SSBD for both siblings in the core
339 * and last sibling to disable it, disables it for the whole core. This how
340 * MSR_SPEC_CTRL works in "hardware":
341 *
342 * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
343 */
344static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
345{
346 struct ssb_state *st = this_cpu_ptr(&ssb_state);
347 u64 msr = x86_amd_ls_cfg_base;
348
349 if (!static_cpu_has(X86_FEATURE_ZEN)) {
350 msr |= ssbd_tif_to_amd_ls_cfg(tifn);
351 wrmsrl(MSR_AMD64_LS_CFG, msr);
352 return;
353 }
354
355 if (tifn & _TIF_SSBD) {
356 /*
357 * Since this can race with prctl(), block reentry on the
358 * same CPU.
359 */
360 if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
361 return;
362
363 msr |= x86_amd_ls_cfg_ssbd_mask;
364
365 raw_spin_lock(&st->shared_state->lock);
366 /* First sibling enables SSBD: */
367 if (!st->shared_state->disable_state)
368 wrmsrl(MSR_AMD64_LS_CFG, msr);
369 st->shared_state->disable_state++;
370 raw_spin_unlock(&st->shared_state->lock);
371 } else {
372 if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
373 return;
374
375 raw_spin_lock(&st->shared_state->lock);
376 st->shared_state->disable_state--;
377 if (!st->shared_state->disable_state)
378 wrmsrl(MSR_AMD64_LS_CFG, msr);
379 raw_spin_unlock(&st->shared_state->lock);
380 }
381}
382#else
383static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
384{
385 u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
386
387 wrmsrl(MSR_AMD64_LS_CFG, msr);
388}
389#endif
390
391static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
392{
393 /*
394 * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
395 * so ssbd_tif_to_spec_ctrl() just works.
396 */
397 wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
398}
399
400static __always_inline void intel_set_ssb_state(unsigned long tifn)
401{
402 u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
403
404 wrmsrl(MSR_IA32_SPEC_CTRL, msr);
405}
406
407static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
408{
409 if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
410 amd_set_ssb_virt_state(tifn);
411 else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
412 amd_set_core_ssb_state(tifn);
413 else
414 intel_set_ssb_state(tifn);
415}
416
417void speculative_store_bypass_update(unsigned long tif)
418{
419 preempt_disable();
420 __speculative_store_bypass_update(tif);
421 preempt_enable();
422}
423
281void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 424void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
282 struct tss_struct *tss) 425 struct tss_struct *tss)
283{ 426{
@@ -309,6 +452,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
309 452
310 if ((tifp ^ tifn) & _TIF_NOCPUID) 453 if ((tifp ^ tifn) & _TIF_NOCPUID)
311 set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); 454 set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
455
456 if ((tifp ^ tifn) & _TIF_SSBD)
457 __speculative_store_bypass_update(tifn);
312} 458}
313 459
314/* 460/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0f1cbb042f49..9dd324ae4832 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -79,6 +79,7 @@
79#include <asm/qspinlock.h> 79#include <asm/qspinlock.h>
80#include <asm/intel-family.h> 80#include <asm/intel-family.h>
81#include <asm/cpu_device_id.h> 81#include <asm/cpu_device_id.h>
82#include <asm/spec-ctrl.h>
82 83
83/* Number of siblings per CPU package */ 84/* Number of siblings per CPU package */
84int smp_num_siblings = 1; 85int smp_num_siblings = 1;
@@ -244,6 +245,8 @@ static void notrace start_secondary(void *unused)
244 */ 245 */
245 check_tsc_sync_target(); 246 check_tsc_sync_target();
246 247
248 speculative_store_bypass_ht_init();
249
247 /* 250 /*
248 * Lock vector_lock, set CPU online and bring the vector 251 * Lock vector_lock, set CPU online and bring the vector
249 * allocator online. Online must be set with vector_lock held 252 * allocator online. Online must be set with vector_lock held
@@ -1292,6 +1295,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1292 set_mtrr_aps_delayed_init(); 1295 set_mtrr_aps_delayed_init();
1293 1296
1294 smp_quirk_init_udelay(); 1297 smp_quirk_init_udelay();
1298
1299 speculative_store_bypass_ht_init();
1295} 1300}
1296 1301
1297void arch_enable_nonboot_cpus_begin(void) 1302void arch_enable_nonboot_cpus_begin(void)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 82055b90a8b3..ced851169730 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
379 379
380 /* cpuid 0x80000008.ebx */ 380 /* cpuid 0x80000008.ebx */
381 const u32 kvm_cpuid_8000_0008_ebx_x86_features = 381 const u32 kvm_cpuid_8000_0008_ebx_x86_features =
382 F(IBPB) | F(IBRS); 382 F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
383 383
384 /* cpuid 0xC0000001.edx */ 384 /* cpuid 0xC0000001.edx */
385 const u32 kvm_cpuid_C000_0001_edx_x86_features = 385 const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -407,7 +407,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
407 407
408 /* cpuid 7.0.edx*/ 408 /* cpuid 7.0.edx*/
409 const u32 kvm_cpuid_7_0_edx_x86_features = 409 const u32 kvm_cpuid_7_0_edx_x86_features =
410 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | 410 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SSBD) |
411 F(ARCH_CAPABILITIES); 411 F(ARCH_CAPABILITIES);
412 412
413 /* all calls to cpuid_count() should be made on the same cpu */ 413 /* all calls to cpuid_count() should be made on the same cpu */
@@ -647,13 +647,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
647 g_phys_as = phys_as; 647 g_phys_as = phys_as;
648 entry->eax = g_phys_as | (virt_as << 8); 648 entry->eax = g_phys_as | (virt_as << 8);
649 entry->edx = 0; 649 entry->edx = 0;
650 /* IBRS and IBPB aren't necessarily present in hardware cpuid */ 650 /*
651 if (boot_cpu_has(X86_FEATURE_IBPB)) 651 * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
652 entry->ebx |= F(IBPB); 652 * hardware cpuid
653 if (boot_cpu_has(X86_FEATURE_IBRS)) 653 */
654 entry->ebx |= F(IBRS); 654 if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
655 entry->ebx |= F(AMD_IBPB);
656 if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
657 entry->ebx |= F(AMD_IBRS);
658 if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
659 entry->ebx |= F(VIRT_SSBD);
655 entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; 660 entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
656 cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); 661 cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
662 if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
663 entry->ebx |= F(VIRT_SSBD);
657 break; 664 break;
658 } 665 }
659 case 0x80000019: 666 case 0x80000019:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1fc05e428aba..26110c202b19 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,7 +49,7 @@
49#include <asm/debugreg.h> 49#include <asm/debugreg.h>
50#include <asm/kvm_para.h> 50#include <asm/kvm_para.h>
51#include <asm/irq_remapping.h> 51#include <asm/irq_remapping.h>
52#include <asm/nospec-branch.h> 52#include <asm/spec-ctrl.h>
53 53
54#include <asm/virtext.h> 54#include <asm/virtext.h>
55#include "trace.h" 55#include "trace.h"
@@ -213,6 +213,12 @@ struct vcpu_svm {
213 } host; 213 } host;
214 214
215 u64 spec_ctrl; 215 u64 spec_ctrl;
216 /*
217 * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
218 * translated into the appropriate L2_CFG bits on the host to
219 * perform speculative control.
220 */
221 u64 virt_spec_ctrl;
216 222
217 u32 *msrpm; 223 u32 *msrpm;
218 224
@@ -2060,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
2060 2066
2061 vcpu->arch.microcode_version = 0x01000065; 2067 vcpu->arch.microcode_version = 0x01000065;
2062 svm->spec_ctrl = 0; 2068 svm->spec_ctrl = 0;
2069 svm->virt_spec_ctrl = 0;
2063 2070
2064 if (!init_event) { 2071 if (!init_event) {
2065 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | 2072 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
@@ -4108,11 +4115,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
4108 break; 4115 break;
4109 case MSR_IA32_SPEC_CTRL: 4116 case MSR_IA32_SPEC_CTRL:
4110 if (!msr_info->host_initiated && 4117 if (!msr_info->host_initiated &&
4111 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) 4118 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
4112 return 1; 4119 return 1;
4113 4120
4114 msr_info->data = svm->spec_ctrl; 4121 msr_info->data = svm->spec_ctrl;
4115 break; 4122 break;
4123 case MSR_AMD64_VIRT_SPEC_CTRL:
4124 if (!msr_info->host_initiated &&
4125 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4126 return 1;
4127
4128 msr_info->data = svm->virt_spec_ctrl;
4129 break;
4116 case MSR_F15H_IC_CFG: { 4130 case MSR_F15H_IC_CFG: {
4117 4131
4118 int family, model; 4132 int family, model;
@@ -4203,7 +4217,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4203 break; 4217 break;
4204 case MSR_IA32_SPEC_CTRL: 4218 case MSR_IA32_SPEC_CTRL:
4205 if (!msr->host_initiated && 4219 if (!msr->host_initiated &&
4206 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) 4220 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
4207 return 1; 4221 return 1;
4208 4222
4209 /* The STIBP bit doesn't fault even if it's not advertised */ 4223 /* The STIBP bit doesn't fault even if it's not advertised */
@@ -4230,7 +4244,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4230 break; 4244 break;
4231 case MSR_IA32_PRED_CMD: 4245 case MSR_IA32_PRED_CMD:
4232 if (!msr->host_initiated && 4246 if (!msr->host_initiated &&
4233 !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) 4247 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
4234 return 1; 4248 return 1;
4235 4249
4236 if (data & ~PRED_CMD_IBPB) 4250 if (data & ~PRED_CMD_IBPB)
@@ -4244,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4244 break; 4258 break;
4245 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); 4259 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
4246 break; 4260 break;
4261 case MSR_AMD64_VIRT_SPEC_CTRL:
4262 if (!msr->host_initiated &&
4263 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4264 return 1;
4265
4266 if (data & ~SPEC_CTRL_SSBD)
4267 return 1;
4268
4269 svm->virt_spec_ctrl = data;
4270 break;
4247 case MSR_STAR: 4271 case MSR_STAR:
4248 svm->vmcb->save.star = data; 4272 svm->vmcb->save.star = data;
4249 break; 4273 break;
@@ -5557,8 +5581,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5557 * is no need to worry about the conditional branch over the wrmsr 5581 * is no need to worry about the conditional branch over the wrmsr
5558 * being speculatively taken. 5582 * being speculatively taken.
5559 */ 5583 */
5560 if (svm->spec_ctrl) 5584 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
5561 native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
5562 5585
5563 asm volatile ( 5586 asm volatile (
5564 "push %%" _ASM_BP "; \n\t" 5587 "push %%" _ASM_BP "; \n\t"
@@ -5652,6 +5675,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5652#endif 5675#endif
5653 ); 5676 );
5654 5677
5678 /* Eliminate branch target predictions from guest mode */
5679 vmexit_fill_RSB();
5680
5681#ifdef CONFIG_X86_64
5682 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5683#else
5684 loadsegment(fs, svm->host.fs);
5685#ifndef CONFIG_X86_32_LAZY_GS
5686 loadsegment(gs, svm->host.gs);
5687#endif
5688#endif
5689
5655 /* 5690 /*
5656 * We do not use IBRS in the kernel. If this vCPU has used the 5691 * We do not use IBRS in the kernel. If this vCPU has used the
5657 * SPEC_CTRL MSR it may have left it on; save the value and 5692 * SPEC_CTRL MSR it may have left it on; save the value and
@@ -5670,20 +5705,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5670 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) 5705 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5671 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); 5706 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5672 5707
5673 if (svm->spec_ctrl) 5708 x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
5674 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
5675
5676 /* Eliminate branch target predictions from guest mode */
5677 vmexit_fill_RSB();
5678
5679#ifdef CONFIG_X86_64
5680 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5681#else
5682 loadsegment(fs, svm->host.fs);
5683#ifndef CONFIG_X86_32_LAZY_GS
5684 loadsegment(gs, svm->host.gs);
5685#endif
5686#endif
5687 5709
5688 reload_tss(vcpu); 5710 reload_tss(vcpu);
5689 5711
@@ -5786,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
5786 return false; 5808 return false;
5787} 5809}
5788 5810
5789static bool svm_has_high_real_mode_segbase(void) 5811static bool svm_has_emulated_msr(int index)
5790{ 5812{
5791 return true; 5813 return true;
5792} 5814}
@@ -7012,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
7012 .hardware_enable = svm_hardware_enable, 7034 .hardware_enable = svm_hardware_enable,
7013 .hardware_disable = svm_hardware_disable, 7035 .hardware_disable = svm_hardware_disable,
7014 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, 7036 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
7015 .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, 7037 .has_emulated_msr = svm_has_emulated_msr,
7016 7038
7017 .vcpu_create = svm_create_vcpu, 7039 .vcpu_create = svm_create_vcpu,
7018 .vcpu_free = svm_free_vcpu, 7040 .vcpu_free = svm_free_vcpu,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3f1696570b41..40aa29204baf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,7 +51,7 @@
51#include <asm/apic.h> 51#include <asm/apic.h>
52#include <asm/irq_remapping.h> 52#include <asm/irq_remapping.h>
53#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
54#include <asm/nospec-branch.h> 54#include <asm/spec-ctrl.h>
55#include <asm/mshyperv.h> 55#include <asm/mshyperv.h>
56 56
57#include "trace.h" 57#include "trace.h"
@@ -3529,7 +3529,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3529 return kvm_get_msr_common(vcpu, msr_info); 3529 return kvm_get_msr_common(vcpu, msr_info);
3530 case MSR_IA32_SPEC_CTRL: 3530 case MSR_IA32_SPEC_CTRL:
3531 if (!msr_info->host_initiated && 3531 if (!msr_info->host_initiated &&
3532 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
3533 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) 3532 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3534 return 1; 3533 return 1;
3535 3534
@@ -3648,12 +3647,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3648 break; 3647 break;
3649 case MSR_IA32_SPEC_CTRL: 3648 case MSR_IA32_SPEC_CTRL:
3650 if (!msr_info->host_initiated && 3649 if (!msr_info->host_initiated &&
3651 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
3652 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) 3650 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3653 return 1; 3651 return 1;
3654 3652
3655 /* The STIBP bit doesn't fault even if it's not advertised */ 3653 /* The STIBP bit doesn't fault even if it's not advertised */
3656 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) 3654 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
3657 return 1; 3655 return 1;
3658 3656
3659 vmx->spec_ctrl = data; 3657 vmx->spec_ctrl = data;
@@ -3679,7 +3677,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3679 break; 3677 break;
3680 case MSR_IA32_PRED_CMD: 3678 case MSR_IA32_PRED_CMD:
3681 if (!msr_info->host_initiated && 3679 if (!msr_info->host_initiated &&
3682 !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
3683 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) 3680 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3684 return 1; 3681 return 1;
3685 3682
@@ -9488,9 +9485,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
9488} 9485}
9489STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); 9486STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
9490 9487
9491static bool vmx_has_high_real_mode_segbase(void) 9488static bool vmx_has_emulated_msr(int index)
9492{ 9489{
9493 return enable_unrestricted_guest || emulate_invalid_guest_state; 9490 switch (index) {
9491 case MSR_IA32_SMBASE:
9492 /*
9493 * We cannot do SMM unless we can run the guest in big
9494 * real mode.
9495 */
9496 return enable_unrestricted_guest || emulate_invalid_guest_state;
9497 case MSR_AMD64_VIRT_SPEC_CTRL:
9498 /* This is AMD only. */
9499 return false;
9500 default:
9501 return true;
9502 }
9494} 9503}
9495 9504
9496static bool vmx_mpx_supported(void) 9505static bool vmx_mpx_supported(void)
@@ -9722,8 +9731,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9722 * is no need to worry about the conditional branch over the wrmsr 9731 * is no need to worry about the conditional branch over the wrmsr
9723 * being speculatively taken. 9732 * being speculatively taken.
9724 */ 9733 */
9725 if (vmx->spec_ctrl) 9734 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
9726 native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9727 9735
9728 vmx->__launched = vmx->loaded_vmcs->launched; 9736 vmx->__launched = vmx->loaded_vmcs->launched;
9729 9737
@@ -9871,8 +9879,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9871 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) 9879 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
9872 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); 9880 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
9873 9881
9874 if (vmx->spec_ctrl) 9882 x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
9875 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
9876 9883
9877 /* Eliminate branch target predictions from guest mode */ 9884 /* Eliminate branch target predictions from guest mode */
9878 vmexit_fill_RSB(); 9885 vmexit_fill_RSB();
@@ -12632,7 +12639,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
12632 .hardware_enable = hardware_enable, 12639 .hardware_enable = hardware_enable,
12633 .hardware_disable = hardware_disable, 12640 .hardware_disable = hardware_disable,
12634 .cpu_has_accelerated_tpr = report_flexpriority, 12641 .cpu_has_accelerated_tpr = report_flexpriority,
12635 .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, 12642 .has_emulated_msr = vmx_has_emulated_msr,
12636 12643
12637 .vm_init = vmx_vm_init, 12644 .vm_init = vmx_vm_init,
12638 .vm_alloc = vmx_vm_alloc, 12645 .vm_alloc = vmx_vm_alloc,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59371de5d722..22a183aac1c6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1061,6 +1061,7 @@ static u32 emulated_msrs[] = {
1061 MSR_SMI_COUNT, 1061 MSR_SMI_COUNT,
1062 MSR_PLATFORM_INFO, 1062 MSR_PLATFORM_INFO,
1063 MSR_MISC_FEATURES_ENABLES, 1063 MSR_MISC_FEATURES_ENABLES,
1064 MSR_AMD64_VIRT_SPEC_CTRL,
1064}; 1065};
1065 1066
1066static unsigned num_emulated_msrs; 1067static unsigned num_emulated_msrs;
@@ -2906,7 +2907,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2906 * fringe case that is not enabled except via specific settings 2907 * fringe case that is not enabled except via specific settings
2907 * of the module parameters. 2908 * of the module parameters.
2908 */ 2909 */
2909 r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); 2910 r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
2910 break; 2911 break;
2911 case KVM_CAP_VAPIC: 2912 case KVM_CAP_VAPIC:
2912 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 2913 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
@@ -4606,14 +4607,8 @@ static void kvm_init_msr_list(void)
4606 num_msrs_to_save = j; 4607 num_msrs_to_save = j;
4607 4608
4608 for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { 4609 for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
4609 switch (emulated_msrs[i]) { 4610 if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
4610 case MSR_IA32_SMBASE: 4611 continue;
4611 if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
4612 continue;
4613 break;
4614 default:
4615 break;
4616 }
4617 4612
4618 if (j < i) 4613 if (j < i)
4619 emulated_msrs[j] = emulated_msrs[i]; 4614 emulated_msrs[j] = emulated_msrs[i];
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 2da998baa75c..30cc9c877ebb 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -534,14 +534,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
534 return sprintf(buf, "Not affected\n"); 534 return sprintf(buf, "Not affected\n");
535} 535}
536 536
537ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
538 struct device_attribute *attr, char *buf)
539{
540 return sprintf(buf, "Not affected\n");
541}
542
537static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); 543static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
538static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); 544static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
539static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); 545static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
546static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
540 547
541static struct attribute *cpu_root_vulnerabilities_attrs[] = { 548static struct attribute *cpu_root_vulnerabilities_attrs[] = {
542 &dev_attr_meltdown.attr, 549 &dev_attr_meltdown.attr,
543 &dev_attr_spectre_v1.attr, 550 &dev_attr_spectre_v1.attr,
544 &dev_attr_spectre_v2.attr, 551 &dev_attr_spectre_v2.attr,
552 &dev_attr_spec_store_bypass.attr,
545 NULL 553 NULL
546}; 554};
547 555
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ae2c807fd719..72391b3f6927 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -85,6 +85,7 @@
85#include <linux/delayacct.h> 85#include <linux/delayacct.h>
86#include <linux/seq_file.h> 86#include <linux/seq_file.h>
87#include <linux/pid_namespace.h> 87#include <linux/pid_namespace.h>
88#include <linux/prctl.h>
88#include <linux/ptrace.h> 89#include <linux/ptrace.h>
89#include <linux/tracehook.h> 90#include <linux/tracehook.h>
90#include <linux/string_helpers.h> 91#include <linux/string_helpers.h>
@@ -335,6 +336,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
335#ifdef CONFIG_SECCOMP 336#ifdef CONFIG_SECCOMP
336 seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); 337 seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
337#endif 338#endif
339 seq_printf(m, "\nSpeculation_Store_Bypass:\t");
340 switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
341 case -EINVAL:
342 seq_printf(m, "unknown");
343 break;
344 case PR_SPEC_NOT_AFFECTED:
345 seq_printf(m, "not vulnerable");
346 break;
347 case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
348 seq_printf(m, "thread force mitigated");
349 break;
350 case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
351 seq_printf(m, "thread mitigated");
352 break;
353 case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
354 seq_printf(m, "thread vulnerable");
355 break;
356 case PR_SPEC_DISABLE:
357 seq_printf(m, "globally mitigated");
358 break;
359 default:
360 seq_printf(m, "vulnerable");
361 break;
362 }
338 seq_putc(m, '\n'); 363 seq_putc(m, '\n');
339} 364}
340 365
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7e61c395fddf..65cfc2f59db9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -146,6 +146,7 @@ struct bpf_insn_aux_data {
146 s32 call_imm; /* saved imm field of call insn */ 146 s32 call_imm; /* saved imm field of call insn */
147 }; 147 };
148 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ 148 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
149 int sanitize_stack_off; /* stack slot to be cleared */
149 bool seen; /* this insn was processed by the verifier */ 150 bool seen; /* this insn was processed by the verifier */
150}; 151};
151 152
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7b01bc11c692..a97a63eef59f 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -53,6 +53,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
53 struct device_attribute *attr, char *buf); 53 struct device_attribute *attr, char *buf);
54extern ssize_t cpu_show_spectre_v2(struct device *dev, 54extern ssize_t cpu_show_spectre_v2(struct device *dev,
55 struct device_attribute *attr, char *buf); 55 struct device_attribute *attr, char *buf);
56extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
57 struct device_attribute *attr, char *buf);
56 58
57extern __printf(4, 5) 59extern __printf(4, 5)
58struct device *cpu_device_create(struct device *parent, void *drvdata, 60struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index e791ebc65c9c..0c5ef54fd416 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -7,6 +7,8 @@
7#define _LINUX_NOSPEC_H 7#define _LINUX_NOSPEC_H
8#include <asm/barrier.h> 8#include <asm/barrier.h>
9 9
10struct task_struct;
11
10/** 12/**
11 * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise 13 * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
12 * @index: array element index 14 * @index: array element index
@@ -55,4 +57,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
55 \ 57 \
56 (typeof(_i)) (_i & _mask); \ 58 (typeof(_i)) (_i & _mask); \
57}) 59})
60
61/* Speculation control prctl */
62int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
63int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
64 unsigned long ctrl);
65/* Speculation control for seccomp enforced mitigation */
66void arch_seccomp_spec_mitigate(struct task_struct *task);
67
58#endif /* _LINUX_NOSPEC_H */ 68#endif /* _LINUX_NOSPEC_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c2413703f45d..ca3f3eae8980 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1433,7 +1433,8 @@ static inline bool is_percpu_thread(void)
1433#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ 1433#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
1434#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ 1434#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
1435#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ 1435#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
1436 1436#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
1437#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
1437 1438
1438#define TASK_PFA_TEST(name, func) \ 1439#define TASK_PFA_TEST(name, func) \
1439 static inline bool task_##func(struct task_struct *p) \ 1440 static inline bool task_##func(struct task_struct *p) \
@@ -1458,6 +1459,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
1458TASK_PFA_SET(SPREAD_SLAB, spread_slab) 1459TASK_PFA_SET(SPREAD_SLAB, spread_slab)
1459TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) 1460TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1460 1461
1462TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
1463TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
1464TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
1465
1466TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1467TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1468
1461static inline void 1469static inline void
1462current_restore_flags(unsigned long orig_flags, unsigned long flags) 1470current_restore_flags(unsigned long orig_flags, unsigned long flags)
1463{ 1471{
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index c723a5c4e3ff..e5320f6c8654 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -4,8 +4,9 @@
4 4
5#include <uapi/linux/seccomp.h> 5#include <uapi/linux/seccomp.h>
6 6
7#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ 7#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
8 SECCOMP_FILTER_FLAG_LOG) 8 SECCOMP_FILTER_FLAG_LOG | \
9 SECCOMP_FILTER_FLAG_SPEC_ALLOW)
9 10
10#ifdef CONFIG_SECCOMP 11#ifdef CONFIG_SECCOMP
11 12
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index af5f8c2df87a..db9f15f5db04 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -207,4 +207,16 @@ struct prctl_mm_map {
207# define PR_SVE_VL_LEN_MASK 0xffff 207# define PR_SVE_VL_LEN_MASK 0xffff
208# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ 208# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
209 209
210/* Per task speculation control */
211#define PR_GET_SPECULATION_CTRL 52
212#define PR_SET_SPECULATION_CTRL 53
213/* Speculation control variants */
214# define PR_SPEC_STORE_BYPASS 0
215/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
216# define PR_SPEC_NOT_AFFECTED 0
217# define PR_SPEC_PRCTL (1UL << 0)
218# define PR_SPEC_ENABLE (1UL << 1)
219# define PR_SPEC_DISABLE (1UL << 2)
220# define PR_SPEC_FORCE_DISABLE (1UL << 3)
221
210#endif /* _LINUX_PRCTL_H */ 222#endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 2a0bd9dd104d..9efc0e73d50b 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -17,8 +17,9 @@
17#define SECCOMP_GET_ACTION_AVAIL 2 17#define SECCOMP_GET_ACTION_AVAIL 2
18 18
19/* Valid flags for SECCOMP_SET_MODE_FILTER */ 19/* Valid flags for SECCOMP_SET_MODE_FILTER */
20#define SECCOMP_FILTER_FLAG_TSYNC 1 20#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
21#define SECCOMP_FILTER_FLAG_LOG 2 21#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
22#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
22 23
23/* 24/*
24 * All BPF programs must return a 32-bit value. 25 * All BPF programs must return a 32-bit value.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5dd1dcb902bf..2ce967a63ede 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -978,7 +978,7 @@ static bool register_is_null(struct bpf_reg_state *reg)
978 */ 978 */
979static int check_stack_write(struct bpf_verifier_env *env, 979static int check_stack_write(struct bpf_verifier_env *env,
980 struct bpf_func_state *state, /* func where register points to */ 980 struct bpf_func_state *state, /* func where register points to */
981 int off, int size, int value_regno) 981 int off, int size, int value_regno, int insn_idx)
982{ 982{
983 struct bpf_func_state *cur; /* state of the current function */ 983 struct bpf_func_state *cur; /* state of the current function */
984 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 984 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
@@ -1017,8 +1017,33 @@ static int check_stack_write(struct bpf_verifier_env *env,
1017 state->stack[spi].spilled_ptr = cur->regs[value_regno]; 1017 state->stack[spi].spilled_ptr = cur->regs[value_regno];
1018 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 1018 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1019 1019
1020 for (i = 0; i < BPF_REG_SIZE; i++) 1020 for (i = 0; i < BPF_REG_SIZE; i++) {
1021 if (state->stack[spi].slot_type[i] == STACK_MISC &&
1022 !env->allow_ptr_leaks) {
1023 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1024 int soff = (-spi - 1) * BPF_REG_SIZE;
1025
1026 /* detected reuse of integer stack slot with a pointer
1027 * which means either llvm is reusing stack slot or
1028 * an attacker is trying to exploit CVE-2018-3639
1029 * (speculative store bypass)
1030 * Have to sanitize that slot with preemptive
1031 * store of zero.
1032 */
1033 if (*poff && *poff != soff) {
1034 /* disallow programs where single insn stores
1035 * into two different stack slots, since verifier
1036 * cannot sanitize them
1037 */
1038 verbose(env,
1039 "insn %d cannot access two stack slots fp%d and fp%d",
1040 insn_idx, *poff, soff);
1041 return -EINVAL;
1042 }
1043 *poff = soff;
1044 }
1021 state->stack[spi].slot_type[i] = STACK_SPILL; 1045 state->stack[spi].slot_type[i] = STACK_SPILL;
1046 }
1022 } else { 1047 } else {
1023 u8 type = STACK_MISC; 1048 u8 type = STACK_MISC;
1024 1049
@@ -1694,7 +1719,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1694 1719
1695 if (t == BPF_WRITE) 1720 if (t == BPF_WRITE)
1696 err = check_stack_write(env, state, off, size, 1721 err = check_stack_write(env, state, off, size,
1697 value_regno); 1722 value_regno, insn_idx);
1698 else 1723 else
1699 err = check_stack_read(env, state, off, size, 1724 err = check_stack_read(env, state, off, size,
1700 value_regno); 1725 value_regno);
@@ -5169,6 +5194,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
5169 else 5194 else
5170 continue; 5195 continue;
5171 5196
5197 if (type == BPF_WRITE &&
5198 env->insn_aux_data[i + delta].sanitize_stack_off) {
5199 struct bpf_insn patch[] = {
5200 /* Sanitize suspicious stack slot with zero.
5201 * There are no memory dependencies for this store,
5202 * since it's only using frame pointer and immediate
5203 * constant of zero
5204 */
5205 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
5206 env->insn_aux_data[i + delta].sanitize_stack_off,
5207 0),
5208 /* the original STX instruction will immediately
5209 * overwrite the same stack slot with appropriate value
5210 */
5211 *insn,
5212 };
5213
5214 cnt = ARRAY_SIZE(patch);
5215 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
5216 if (!new_prog)
5217 return -ENOMEM;
5218
5219 delta += cnt - 1;
5220 env->prog = new_prog;
5221 insn = new_prog->insnsi + i + delta;
5222 continue;
5223 }
5224
5172 if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) 5225 if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
5173 continue; 5226 continue;
5174 5227
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index dc77548167ef..e691d9a6c58d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -19,6 +19,8 @@
19#include <linux/compat.h> 19#include <linux/compat.h>
20#include <linux/coredump.h> 20#include <linux/coredump.h>
21#include <linux/kmemleak.h> 21#include <linux/kmemleak.h>
22#include <linux/nospec.h>
23#include <linux/prctl.h>
22#include <linux/sched.h> 24#include <linux/sched.h>
23#include <linux/sched/task_stack.h> 25#include <linux/sched/task_stack.h>
24#include <linux/seccomp.h> 26#include <linux/seccomp.h>
@@ -227,8 +229,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
227 return true; 229 return true;
228} 230}
229 231
232void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
233
230static inline void seccomp_assign_mode(struct task_struct *task, 234static inline void seccomp_assign_mode(struct task_struct *task,
231 unsigned long seccomp_mode) 235 unsigned long seccomp_mode,
236 unsigned long flags)
232{ 237{
233 assert_spin_locked(&task->sighand->siglock); 238 assert_spin_locked(&task->sighand->siglock);
234 239
@@ -238,6 +243,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
238 * filter) is set. 243 * filter) is set.
239 */ 244 */
240 smp_mb__before_atomic(); 245 smp_mb__before_atomic();
246 /* Assume default seccomp processes want spec flaw mitigation. */
247 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
248 arch_seccomp_spec_mitigate(task);
241 set_tsk_thread_flag(task, TIF_SECCOMP); 249 set_tsk_thread_flag(task, TIF_SECCOMP);
242} 250}
243 251
@@ -305,7 +313,7 @@ static inline pid_t seccomp_can_sync_threads(void)
305 * without dropping the locks. 313 * without dropping the locks.
306 * 314 *
307 */ 315 */
308static inline void seccomp_sync_threads(void) 316static inline void seccomp_sync_threads(unsigned long flags)
309{ 317{
310 struct task_struct *thread, *caller; 318 struct task_struct *thread, *caller;
311 319
@@ -346,7 +354,8 @@ static inline void seccomp_sync_threads(void)
346 * allow one thread to transition the other. 354 * allow one thread to transition the other.
347 */ 355 */
348 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) 356 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
349 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); 357 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
358 flags);
350 } 359 }
351} 360}
352 361
@@ -469,7 +478,7 @@ static long seccomp_attach_filter(unsigned int flags,
469 478
470 /* Now that the new filter is in place, synchronize to all threads. */ 479 /* Now that the new filter is in place, synchronize to all threads. */
471 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 480 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
472 seccomp_sync_threads(); 481 seccomp_sync_threads(flags);
473 482
474 return 0; 483 return 0;
475} 484}
@@ -818,7 +827,7 @@ static long seccomp_set_mode_strict(void)
818#ifdef TIF_NOTSC 827#ifdef TIF_NOTSC
819 disable_TSC(); 828 disable_TSC();
820#endif 829#endif
821 seccomp_assign_mode(current, seccomp_mode); 830 seccomp_assign_mode(current, seccomp_mode, 0);
822 ret = 0; 831 ret = 0;
823 832
824out: 833out:
@@ -876,7 +885,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
876 /* Do not free the successfully attached filter. */ 885 /* Do not free the successfully attached filter. */
877 prepared = NULL; 886 prepared = NULL;
878 887
879 seccomp_assign_mode(current, seccomp_mode); 888 seccomp_assign_mode(current, seccomp_mode, flags);
880out: 889out:
881 spin_unlock_irq(&current->sighand->siglock); 890 spin_unlock_irq(&current->sighand->siglock);
882 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 891 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
diff --git a/kernel/sys.c b/kernel/sys.c
index ad692183dfe9..b0eee418ee0d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -61,6 +61,8 @@
61#include <linux/uidgid.h> 61#include <linux/uidgid.h>
62#include <linux/cred.h> 62#include <linux/cred.h>
63 63
64#include <linux/nospec.h>
65
64#include <linux/kmsg_dump.h> 66#include <linux/kmsg_dump.h>
65/* Move somewhere else to avoid recompiling? */ 67/* Move somewhere else to avoid recompiling? */
66#include <generated/utsrelease.h> 68#include <generated/utsrelease.h>
@@ -2242,6 +2244,17 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
2242 return 1; 2244 return 1;
2243} 2245}
2244 2246
2247int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
2248{
2249 return -EINVAL;
2250}
2251
2252int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
2253 unsigned long ctrl)
2254{
2255 return -EINVAL;
2256}
2257
2245SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2258SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2246 unsigned long, arg4, unsigned long, arg5) 2259 unsigned long, arg4, unsigned long, arg5)
2247{ 2260{
@@ -2450,6 +2463,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2450 case PR_SVE_GET_VL: 2463 case PR_SVE_GET_VL:
2451 error = SVE_GET_VL(); 2464 error = SVE_GET_VL();
2452 break; 2465 break;
2466 case PR_GET_SPECULATION_CTRL:
2467 if (arg3 || arg4 || arg5)
2468 return -EINVAL;
2469 error = arch_prctl_spec_ctrl_get(me, arg2);
2470 break;
2471 case PR_SET_SPECULATION_CTRL:
2472 if (arg4 || arg5)
2473 return -EINVAL;
2474 error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
2475 break;
2453 default: 2476 default:
2454 error = -EINVAL; 2477 error = -EINVAL;
2455 break; 2478 break;
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 168c66d74fc5..e1473234968d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -134,11 +134,15 @@ struct seccomp_data {
134#endif 134#endif
135 135
136#ifndef SECCOMP_FILTER_FLAG_TSYNC 136#ifndef SECCOMP_FILTER_FLAG_TSYNC
137#define SECCOMP_FILTER_FLAG_TSYNC 1 137#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
138#endif 138#endif
139 139
140#ifndef SECCOMP_FILTER_FLAG_LOG 140#ifndef SECCOMP_FILTER_FLAG_LOG
141#define SECCOMP_FILTER_FLAG_LOG 2 141#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
142#endif
143
144#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
145#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
142#endif 146#endif
143 147
144#ifndef PTRACE_SECCOMP_GET_METADATA 148#ifndef PTRACE_SECCOMP_GET_METADATA
@@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock)
2072TEST(detect_seccomp_filter_flags) 2076TEST(detect_seccomp_filter_flags)
2073{ 2077{
2074 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2078 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2075 SECCOMP_FILTER_FLAG_LOG }; 2079 SECCOMP_FILTER_FLAG_LOG,
2080 SECCOMP_FILTER_FLAG_SPEC_ALLOW };
2076 unsigned int flag, all_flags; 2081 unsigned int flag, all_flags;
2077 int i; 2082 int i;
2078 long ret; 2083 long ret;
2079 2084
2080 /* Test detection of known-good filter flags */ 2085 /* Test detection of known-good filter flags */
2081 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2086 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2087 int bits = 0;
2088
2082 flag = flags[i]; 2089 flag = flags[i];
2090 /* Make sure the flag is a single bit! */
2091 while (flag) {
2092 if (flag & 0x1)
2093 bits ++;
2094 flag >>= 1;
2095 }
2096 ASSERT_EQ(1, bits);
2097 flag = flags[i];
2098
2083 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2099 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2084 ASSERT_NE(ENOSYS, errno) { 2100 ASSERT_NE(ENOSYS, errno) {
2085 TH_LOG("Kernel does not support seccomp syscall!"); 2101 TH_LOG("Kernel does not support seccomp syscall!");