aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-14 10:57:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-14 10:57:29 -0400
commitfa4bff165070dc40a3de35b78e4f8da8e8d85ec5 (patch)
tree1430bdefedcf00030b4152baf12f530a04bd25f3
parent63863ee8e2f6f6ae47be3dff4af2f2806f5ca2dd (diff)
parent95310e348a321b45fb746c176961d4da72344282 (diff)
Merge branch 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 MDS mitigations from Thomas Gleixner: "Microarchitectural Data Sampling (MDS) is a hardware vulnerability which allows unprivileged speculative access to data which is available in various CPU internal buffers. This new set of misfeatures has the following CVEs assigned: CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory MDS attacks target microarchitectural buffers which speculatively forward data under certain conditions. Disclosure gadgets can expose this data via cache side channels. Contrary to other speculation based vulnerabilities the MDS vulnerability does not allow the attacker to control the memory target address. As a consequence the attacks are purely sampling based, but as demonstrated with the TLBleed attack samples can be postprocessed successfully. The mitigation is to flush the microarchitectural buffers on return to user space and before entering a VM. It's bolted on the VERW instruction and requires a microcode update. As some of the attacks exploit data structures shared between hyperthreads, full protection requires to disable hyperthreading. The kernel does not do that by default to avoid breaking unattended updates. The mitigation set comes with documentation for administrators and a deeper technical view" * 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/speculation/mds: Fix documentation typo Documentation: Correct the possible MDS sysfs values x86/mds: Add MDSUM variant to the MDS documentation x86/speculation/mds: Add 'mitigations=' support for MDS x86/speculation/mds: Print SMT vulnerable on MSBDS with mitigations off x86/speculation/mds: Fix comment x86/speculation/mds: Add SMT warning message x86/speculation: Move arch_smt_update() call to after mitigation decisions x86/speculation/mds: Add mds=full,nosmt cmdline option Documentation: Add MDS vulnerability documentation Documentation: Move L1TF to separate directory x86/speculation/mds: Add mitigation mode VMWERV x86/speculation/mds: Add sysfs reporting for MDS x86/speculation/mds: Add mitigation control for MDS x86/speculation/mds: Conditionally clear CPU buffers on idle entry x86/kvm/vmx: Add MDS protection when L1D Flush is not active x86/speculation/mds: Clear CPU buffers on exit to user x86/speculation/mds: Add mds_clear_cpu_buffers() x86/kvm: Expose X86_FEATURE_MD_CLEAR to guests x86/speculation/mds: Add BUG_MSBDS_ONLY ...
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu4
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst13
-rw-r--r--Documentation/admin-guide/hw-vuln/l1tf.rst (renamed from Documentation/admin-guide/l1tf.rst)1
-rw-r--r--Documentation/admin-guide/hw-vuln/mds.rst308
-rw-r--r--Documentation/admin-guide/index.rst6
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt30
-rw-r--r--Documentation/index.rst1
-rw-r--r--Documentation/x86/conf.py10
-rw-r--r--Documentation/x86/index.rst1
-rw-r--r--Documentation/x86/mds.rst225
-rw-r--r--arch/x86/entry/common.c3
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/irqflags.h4
-rw-r--r--arch/x86/include/asm/msr-index.h39
-rw-r--r--arch/x86/include/asm/mwait.h7
-rw-r--r--arch/x86/include/asm/nospec-branch.h50
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/kernel/cpu/bugs.c135
-rw-r--r--arch/x86/kernel/cpu/common.c121
-rw-r--r--arch/x86/kernel/nmi.c4
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/vmx/vmx.c7
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--include/linux/cpu.h2
-rw-r--r--tools/power/x86/turbostat/Makefile2
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile2
27 files changed, 921 insertions, 82 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 4fb76c0e8d30..1528239f69b2 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -484,6 +484,7 @@ What: /sys/devices/system/cpu/vulnerabilities
484 /sys/devices/system/cpu/vulnerabilities/spectre_v2 484 /sys/devices/system/cpu/vulnerabilities/spectre_v2
485 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass 485 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
486 /sys/devices/system/cpu/vulnerabilities/l1tf 486 /sys/devices/system/cpu/vulnerabilities/l1tf
487 /sys/devices/system/cpu/vulnerabilities/mds
487Date: January 2018 488Date: January 2018
488Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> 489Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
489Description: Information about CPU vulnerabilities 490Description: Information about CPU vulnerabilities
@@ -496,8 +497,7 @@ Description: Information about CPU vulnerabilities
496 "Vulnerable" CPU is affected and no mitigation in effect 497 "Vulnerable" CPU is affected and no mitigation in effect
497 "Mitigation: $M" CPU is affected and mitigation $M is in effect 498 "Mitigation: $M" CPU is affected and mitigation $M is in effect
498 499
499 Details about the l1tf file can be found in 500 See also: Documentation/admin-guide/hw-vuln/index.rst
500 Documentation/admin-guide/l1tf.rst
501 501
502What: /sys/devices/system/cpu/smt 502What: /sys/devices/system/cpu/smt
503 /sys/devices/system/cpu/smt/active 503 /sys/devices/system/cpu/smt/active
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
new file mode 100644
index 000000000000..ffc064c1ec68
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -0,0 +1,13 @@
1========================
2Hardware vulnerabilities
3========================
4
5This section describes CPU vulnerabilities and provides an overview of the
6possible mitigations along with guidance for selecting mitigations if they
7are configurable at compile, boot or run time.
8
9.. toctree::
10 :maxdepth: 1
11
12 l1tf
13 mds
diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index 9af977384168..31653a9f0e1b 100644
--- a/Documentation/admin-guide/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -445,6 +445,7 @@ The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
445line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush 445line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
446module parameter is ignored and writes to the sysfs file are rejected. 446module parameter is ignored and writes to the sysfs file are rejected.
447 447
448.. _mitigation_selection:
448 449
449Mitigation selection guide 450Mitigation selection guide
450-------------------------- 451--------------------------
diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
new file mode 100644
index 000000000000..e3a796c0d3a2
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/mds.rst
@@ -0,0 +1,308 @@
1MDS - Microarchitectural Data Sampling
2======================================
3
4Microarchitectural Data Sampling is a hardware vulnerability which allows
5unprivileged speculative access to data which is available in various CPU
6internal buffers.
7
8Affected processors
9-------------------
10
11This vulnerability affects a wide range of Intel processors. The
12vulnerability is not present on:
13
14 - Processors from AMD, Centaur and other non Intel vendors
15
16 - Older processor models, where the CPU family is < 6
17
18 - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus)
19
20 - Intel processors which have the ARCH_CAP_MDS_NO bit set in the
21 IA32_ARCH_CAPABILITIES MSR.
22
23Whether a processor is affected or not can be read out from the MDS
24vulnerability file in sysfs. See :ref:`mds_sys_info`.
25
26Not all processors are affected by all variants of MDS, but the mitigation
27is identical for all of them so the kernel treats them as a single
28vulnerability.
29
30Related CVEs
31------------
32
33The following CVE entries are related to the MDS vulnerability:
34
35 ============== ===== ===================================================
36 CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling
37 CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling
38 CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling
39 CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory
40 ============== ===== ===================================================
41
42Problem
43-------
44
45When performing store, load, L1 refill operations, processors write data
46into temporary microarchitectural structures (buffers). The data in the
47buffer can be forwarded to load operations as an optimization.
48
49Under certain conditions, usually a fault/assist caused by a load
50operation, data unrelated to the load memory address can be speculatively
51forwarded from the buffers. Because the load operation causes a fault or
52assist and its result will be discarded, the forwarded data will not cause
53incorrect program execution or state changes. But a malicious operation
54may be able to forward this speculative data to a disclosure gadget which
55allows in turn to infer the value via a cache side channel attack.
56
57Because the buffers are potentially shared between Hyper-Threads cross
58Hyper-Thread attacks are possible.
59
60Deeper technical information is available in the MDS specific x86
61architecture section: :ref:`Documentation/x86/mds.rst <mds>`.
62
63
64Attack scenarios
65----------------
66
67Attacks against the MDS vulnerabilities can be mounted from malicious non
68priviledged user space applications running on hosts or guest. Malicious
69guest OSes can obviously mount attacks as well.
70
71Contrary to other speculation based vulnerabilities the MDS vulnerability
72does not allow the attacker to control the memory target address. As a
73consequence the attacks are purely sampling based, but as demonstrated with
74the TLBleed attack samples can be postprocessed successfully.
75
76Web-Browsers
77^^^^^^^^^^^^
78
79 It's unclear whether attacks through Web-Browsers are possible at
80 all. The exploitation through Java-Script is considered very unlikely,
81 but other widely used web technologies like Webassembly could possibly be
82 abused.
83
84
85.. _mds_sys_info:
86
87MDS system information
88-----------------------
89
90The Linux kernel provides a sysfs interface to enumerate the current MDS
91status of the system: whether the system is vulnerable, and which
92mitigations are active. The relevant sysfs file is:
93
94/sys/devices/system/cpu/vulnerabilities/mds
95
96The possible values in this file are:
97
98 .. list-table::
99
100 * - 'Not affected'
101 - The processor is not vulnerable
102 * - 'Vulnerable'
103 - The processor is vulnerable, but no mitigation enabled
104 * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
105 - The processor is vulnerable but microcode is not updated.
106
107 The mitigation is enabled on a best effort basis. See :ref:`vmwerv`
108 * - 'Mitigation: Clear CPU buffers'
109 - The processor is vulnerable and the CPU buffer clearing mitigation is
110 enabled.
111
112If the processor is vulnerable then the following information is appended
113to the above information:
114
115 ======================== ============================================
116 'SMT vulnerable' SMT is enabled
117 'SMT mitigated' SMT is enabled and mitigated
118 'SMT disabled' SMT is disabled
119 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
120 ======================== ============================================
121
122.. _vmwerv:
123
124Best effort mitigation mode
125^^^^^^^^^^^^^^^^^^^^^^^^^^^
126
127 If the processor is vulnerable, but the availability of the microcode based
128 mitigation mechanism is not advertised via CPUID the kernel selects a best
129 effort mitigation mode. This mode invokes the mitigation instructions
130 without a guarantee that they clear the CPU buffers.
131
132 This is done to address virtualization scenarios where the host has the
133 microcode update applied, but the hypervisor is not yet updated to expose
134 the CPUID to the guest. If the host has updated microcode the protection
135 takes effect otherwise a few cpu cycles are wasted pointlessly.
136
137 The state in the mds sysfs file reflects this situation accordingly.
138
139
140Mitigation mechanism
141-------------------------
142
143The kernel detects the affected CPUs and the presence of the microcode
144which is required.
145
146If a CPU is affected and the microcode is available, then the kernel
147enables the mitigation by default. The mitigation can be controlled at boot
148time via a kernel command line option. See
149:ref:`mds_mitigation_control_command_line`.
150
151.. _cpu_buffer_clear:
152
153CPU buffer clearing
154^^^^^^^^^^^^^^^^^^^
155
156 The mitigation for MDS clears the affected CPU buffers on return to user
157 space and when entering a guest.
158
159 If SMT is enabled it also clears the buffers on idle entry when the CPU
160 is only affected by MSBDS and not any other MDS variant, because the
161 other variants cannot be protected against cross Hyper-Thread attacks.
162
163 For CPUs which are only affected by MSBDS the user space, guest and idle
164 transition mitigations are sufficient and SMT is not affected.
165
166.. _virt_mechanism:
167
168Virtualization mitigation
169^^^^^^^^^^^^^^^^^^^^^^^^^
170
171 The protection for host to guest transition depends on the L1TF
172 vulnerability of the CPU:
173
174 - CPU is affected by L1TF:
175
176 If the L1D flush mitigation is enabled and up to date microcode is
177 available, the L1D flush mitigation is automatically protecting the
178 guest transition.
179
180 If the L1D flush mitigation is disabled then the MDS mitigation is
181 invoked explicit when the host MDS mitigation is enabled.
182
183 For details on L1TF and virtualization see:
184 :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <mitigation_control_kvm>`.
185
186 - CPU is not affected by L1TF:
187
188 CPU buffers are flushed before entering the guest when the host MDS
189 mitigation is enabled.
190
191 The resulting MDS protection matrix for the host to guest transition:
192
193 ============ ===== ============= ============ =================
194 L1TF MDS VMX-L1FLUSH Host MDS MDS-State
195
196 Don't care No Don't care N/A Not affected
197
198 Yes Yes Disabled Off Vulnerable
199
200 Yes Yes Disabled Full Mitigated
201
202 Yes Yes Enabled Don't care Mitigated
203
204 No Yes N/A Off Vulnerable
205
206 No Yes N/A Full Mitigated
207 ============ ===== ============= ============ =================
208
209 This only covers the host to guest transition, i.e. prevents leakage from
210 host to guest, but does not protect the guest internally. Guests need to
211 have their own protections.
212
213.. _xeon_phi:
214
215XEON PHI specific considerations
216^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
217
218 The XEON PHI processor family is affected by MSBDS which can be exploited
219 cross Hyper-Threads when entering idle states. Some XEON PHI variants allow
220 to use MWAIT in user space (Ring 3) which opens an potential attack vector
221 for malicious user space. The exposure can be disabled on the kernel
222 command line with the 'ring3mwait=disable' command line option.
223
224 XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
225 before the CPU enters a idle state. As XEON PHI is not affected by L1TF
226 either disabling SMT is not required for full protection.
227
228.. _mds_smt_control:
229
230SMT control
231^^^^^^^^^^^
232
233 All MDS variants except MSBDS can be attacked cross Hyper-Threads. That
234 means on CPUs which are affected by MFBDS or MLPDS it is necessary to
235 disable SMT for full protection. These are most of the affected CPUs; the
236 exception is XEON PHI, see :ref:`xeon_phi`.
237
238 Disabling SMT can have a significant performance impact, but the impact
239 depends on the type of workloads.
240
241 See the relevant chapter in the L1TF mitigation documentation for details:
242 :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
243
244
245.. _mds_mitigation_control_command_line:
246
247Mitigation control on the kernel command line
248---------------------------------------------
249
250The kernel command line allows to control the MDS mitigations at boot
251time with the option "mds=". The valid arguments for this option are:
252
253 ============ =============================================================
254 full If the CPU is vulnerable, enable all available mitigations
255 for the MDS vulnerability, CPU buffer clearing on exit to
256 userspace and when entering a VM. Idle transitions are
257 protected as well if SMT is enabled.
258
259 It does not automatically disable SMT.
260
261 full,nosmt The same as mds=full, with SMT disabled on vulnerable
262 CPUs. This is the complete mitigation.
263
264 off Disables MDS mitigations completely.
265
266 ============ =============================================================
267
268Not specifying this option is equivalent to "mds=full".
269
270
271Mitigation selection guide
272--------------------------
273
2741. Trusted userspace
275^^^^^^^^^^^^^^^^^^^^
276
277 If all userspace applications are from a trusted source and do not
278 execute untrusted code which is supplied externally, then the mitigation
279 can be disabled.
280
281
2822. Virtualization with trusted guests
283^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
284
285 The same considerations as above versus trusted user space apply.
286
2873. Virtualization with untrusted guests
288^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
289
290 The protection depends on the state of the L1TF mitigations.
291 See :ref:`virt_mechanism`.
292
293 If the MDS mitigation is enabled and SMT is disabled, guest to host and
294 guest to guest attacks are prevented.
295
296.. _mds_default_mitigations:
297
298Default mitigations
299-------------------
300
301 The kernel default mitigations for vulnerable processors are:
302
303 - Enable CPU buffer clearing
304
305 The kernel does not by default enforce the disabling of SMT, which leaves
306 SMT systems vulnerable when running untrusted code. The same rationale as
307 for L1TF applies.
308 See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <default_mitigations>`.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 5b8286fdd91b..8001917ee012 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -17,14 +17,12 @@ etc.
17 kernel-parameters 17 kernel-parameters
18 devices 18 devices
19 19
20This section describes CPU vulnerabilities and provides an overview of the 20This section describes CPU vulnerabilities and their mitigations.
21possible mitigations along with guidance for selecting mitigations if they
22are configurable at compile, boot or run time.
23 21
24.. toctree:: 22.. toctree::
25 :maxdepth: 1 23 :maxdepth: 1
26 24
27 l1tf 25 hw-vuln/index
28 26
29Here is a set of documents aimed at users who are trying to track down 27Here is a set of documents aimed at users who are trying to track down
30problems and bugs in particular. 28problems and bugs in particular.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 08df58805703..43176340c73d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2143,7 +2143,7 @@
2143 2143
2144 Default is 'flush'. 2144 Default is 'flush'.
2145 2145
2146 For details see: Documentation/admin-guide/l1tf.rst 2146 For details see: Documentation/admin-guide/hw-vuln/l1tf.rst
2147 2147
2148 l2cr= [PPC] 2148 l2cr= [PPC]
2149 2149
@@ -2389,6 +2389,32 @@
2389 Format: <first>,<last> 2389 Format: <first>,<last>
2390 Specifies range of consoles to be captured by the MDA. 2390 Specifies range of consoles to be captured by the MDA.
2391 2391
2392 mds= [X86,INTEL]
2393 Control mitigation for the Micro-architectural Data
2394 Sampling (MDS) vulnerability.
2395
2396 Certain CPUs are vulnerable to an exploit against CPU
2397 internal buffers which can forward information to a
2398 disclosure gadget under certain conditions.
2399
2400 In vulnerable processors, the speculatively
2401 forwarded data can be used in a cache side channel
2402 attack, to access data to which the attacker does
2403 not have direct access.
2404
2405 This parameter controls the MDS mitigation. The
2406 options are:
2407
2408 full - Enable MDS mitigation on vulnerable CPUs
2409 full,nosmt - Enable MDS mitigation and disable
2410 SMT on vulnerable CPUs
2411 off - Unconditionally disable MDS mitigation
2412
2413 Not specifying this option is equivalent to
2414 mds=full.
2415
2416 For details see: Documentation/admin-guide/hw-vuln/mds.rst
2417
2392 mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory 2418 mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
2393 Amount of memory to be used when the kernel is not able 2419 Amount of memory to be used when the kernel is not able
2394 to see the whole system memory or for test. 2420 to see the whole system memory or for test.
@@ -2565,6 +2591,7 @@
2565 spec_store_bypass_disable=off [X86,PPC] 2591 spec_store_bypass_disable=off [X86,PPC]
2566 ssbd=force-off [ARM64] 2592 ssbd=force-off [ARM64]
2567 l1tf=off [X86] 2593 l1tf=off [X86]
2594 mds=off [X86]
2568 2595
2569 auto (default) 2596 auto (default)
2570 Mitigate all CPU vulnerabilities, but leave SMT 2597 Mitigate all CPU vulnerabilities, but leave SMT
@@ -2579,6 +2606,7 @@
2579 if needed. This is for users who always want to 2606 if needed. This is for users who always want to
2580 be fully mitigated, even if it means losing SMT. 2607 be fully mitigated, even if it means losing SMT.
2581 Equivalent to: l1tf=flush,nosmt [X86] 2608 Equivalent to: l1tf=flush,nosmt [X86]
2609 mds=full,nosmt [X86]
2582 2610
2583 mminit_loglevel= 2611 mminit_loglevel=
2584 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this 2612 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 9e01aace4f48..a7566ef62411 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -114,6 +114,7 @@ implementation.
114 114
115 x86/index 115 x86/index
116 sh/index 116 sh/index
117 x86/index
117 118
118Filesystem Documentation 119Filesystem Documentation
119------------------------ 120------------------------
diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py
new file mode 100644
index 000000000000..33c5c3142e20
--- /dev/null
+++ b/Documentation/x86/conf.py
@@ -0,0 +1,10 @@
1# -*- coding: utf-8; mode: python -*-
2
3project = "X86 architecture specific documentation"
4
5tags.add("subproject")
6
7latex_documents = [
8 ('index', 'x86.tex', project,
9 'The kernel development community', 'manual'),
10]
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 73a487957fd4..ae36fc5fc649 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -23,6 +23,7 @@ x86-specific Documentation
23 intel_mpx 23 intel_mpx
24 amd-memory-encryption 24 amd-memory-encryption
25 pti 25 pti
26 mds
26 microcode 27 microcode
27 resctrl_ui 28 resctrl_ui
28 usb-legacy-support 29 usb-legacy-support
diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
new file mode 100644
index 000000000000..534e9baa4e1d
--- /dev/null
+++ b/Documentation/x86/mds.rst
@@ -0,0 +1,225 @@
1Microarchitectural Data Sampling (MDS) mitigation
2=================================================
3
4.. _mds:
5
6Overview
7--------
8
9Microarchitectural Data Sampling (MDS) is a family of side channel attacks
10on internal buffers in Intel CPUs. The variants are:
11
12 - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126)
13 - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130)
14 - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127)
15 - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091)
16
17MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a
18dependent load (store-to-load forwarding) as an optimization. The forward
19can also happen to a faulting or assisting load operation for a different
20memory address, which can be exploited under certain conditions. Store
21buffers are partitioned between Hyper-Threads so cross thread forwarding is
22not possible. But if a thread enters or exits a sleep state the store
23buffer is repartitioned which can expose data from one thread to the other.
24
25MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage
26L1 miss situations and to hold data which is returned or sent in response
27to a memory or I/O operation. Fill buffers can forward data to a load
28operation and also write data to the cache. When the fill buffer is
29deallocated it can retain the stale data of the preceding operations which
30can then be forwarded to a faulting or assisting load operation, which can
31be exploited under certain conditions. Fill buffers are shared between
32Hyper-Threads so cross thread leakage is possible.
33
34MLPDS leaks Load Port Data. Load ports are used to perform load operations
35from memory or I/O. The received data is then forwarded to the register
36file or a subsequent operation. In some implementations the Load Port can
37contain stale data from a previous operation which can be forwarded to
38faulting or assisting loads under certain conditions, which again can be
39exploited eventually. Load ports are shared between Hyper-Threads so cross
40thread leakage is possible.
41
42MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from
43memory that takes a fault or assist can leave data in a microarchitectural
44structure that may later be observed using one of the same methods used by
45MSBDS, MFBDS or MLPDS.
46
47Exposure assumptions
48--------------------
49
50It is assumed that attack code resides in user space or in a guest with one
51exception. The rationale behind this assumption is that the code construct
52needed for exploiting MDS requires:
53
54 - to control the load to trigger a fault or assist
55
56 - to have a disclosure gadget which exposes the speculatively accessed
57 data for consumption through a side channel.
58
59 - to control the pointer through which the disclosure gadget exposes the
60 data
61
62The existence of such a construct in the kernel cannot be excluded with
63100% certainty, but the complexity involved makes it extremly unlikely.
64
65There is one exception, which is untrusted BPF. The functionality of
66untrusted BPF is limited, but it needs to be thoroughly investigated
67whether it can be used to create such a construct.
68
69
70Mitigation strategy
71-------------------
72
73All variants have the same mitigation strategy at least for the single CPU
74thread case (SMT off): Force the CPU to clear the affected buffers.
75
76This is achieved by using the otherwise unused and obsolete VERW
77instruction in combination with a microcode update. The microcode clears
78the affected CPU buffers when the VERW instruction is executed.
79
80For virtualization there are two ways to achieve CPU buffer
81clearing. Either the modified VERW instruction or via the L1D Flush
82command. The latter is issued when L1TF mitigation is enabled so the extra
83VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to
84be issued.
85
86If the VERW instruction with the supplied segment selector argument is
87executed on a CPU without the microcode update there is no side effect
88other than a small number of pointlessly wasted CPU cycles.
89
90This does not protect against cross Hyper-Thread attacks except for MSBDS
91which is only exploitable cross Hyper-thread when one of the Hyper-Threads
92enters a C-state.
93
94The kernel provides a function to invoke the buffer clearing:
95
96 mds_clear_cpu_buffers()
97
98The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
99(idle) transitions.
100
101As a special quirk to address virtualization scenarios where the host has
102the microcode updated, but the hypervisor does not (yet) expose the
103MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the
104hope that it might actually clear the buffers. The state is reflected
105accordingly.
106
107According to current knowledge additional mitigations inside the kernel
108itself are not required because the necessary gadgets to expose the leaked
109data cannot be controlled in a way which allows exploitation from malicious
110user space or VM guests.
111
112Kernel internal mitigation modes
113--------------------------------
114
115 ======= ============================================================
116 off Mitigation is disabled. Either the CPU is not affected or
117 mds=off is supplied on the kernel command line
118
119 full Mitigation is enabled. CPU is affected and MD_CLEAR is
120 advertised in CPUID.
121
122 vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not
123 advertised in CPUID. That is mainly for virtualization
124 scenarios where the host has the updated microcode but the
125 hypervisor does not expose MD_CLEAR in CPUID. It's a best
126 effort approach without guarantee.
127 ======= ============================================================
128
129If the CPU is affected and mds=off is not supplied on the kernel command
130line then the kernel selects the appropriate mitigation mode depending on
131the availability of the MD_CLEAR CPUID bit.
132
133Mitigation points
134-----------------
135
1361. Return to user space
137^^^^^^^^^^^^^^^^^^^^^^^
138
139 When transitioning from kernel to user space the CPU buffers are flushed
140 on affected CPUs when the mitigation is not disabled on the kernel
141 command line. The migitation is enabled through the static key
142 mds_user_clear.
143
144 The mitigation is invoked in prepare_exit_to_usermode() which covers
145 most of the kernel to user space transitions. There are a few exceptions
146 which are not invoking prepare_exit_to_usermode() on return to user
147 space. These exceptions use the paranoid exit code.
148
149 - Non Maskable Interrupt (NMI):
150
151 Access to sensible data like keys, credentials in the NMI context is
152 mostly theoretical: The CPU can do prefetching or execute a
153 misspeculated code path and thereby fetching data which might end up
154 leaking through a buffer.
155
156 But for mounting other attacks the kernel stack address of the task is
157 already valuable information. So in full mitigation mode, the NMI is
158 mitigated on the return from do_nmi() to provide almost complete
159 coverage.
160
161 - Double fault (#DF):
162
163 A double fault is usually fatal, but the ESPFIX workaround, which can
164 be triggered from user space through modify_ldt(2) is a recoverable
165 double fault. #DF uses the paranoid exit path, so explicit mitigation
166 in the double fault handler is required.
167
168 - Machine Check Exception (#MC):
169
170 Another corner case is a #MC which hits between the CPU buffer clear
171 invocation and the actual return to user. As this still is in kernel
172 space it takes the paranoid exit path which does not clear the CPU
173 buffers. So the #MC handler repopulates the buffers to some
174 extent. Machine checks are not reliably controllable and the window is
175 extremly small so mitigation would just tick a checkbox that this
176 theoretical corner case is covered. To keep the amount of special
177 cases small, ignore #MC.
178
179 - Debug Exception (#DB):
180
181 This takes the paranoid exit path only when the INT1 breakpoint is in
182 kernel space. #DB on a user space address takes the regular exit path,
183 so no extra mitigation required.
184
185
1862. C-State transition
187^^^^^^^^^^^^^^^^^^^^^
188
189 When a CPU goes idle and enters a C-State the CPU buffers need to be
190 cleared on affected CPUs when SMT is active. This addresses the
191 repartitioning of the store buffer when one of the Hyper-Threads enters
192 a C-State.
193
194 When SMT is inactive, i.e. either the CPU does not support it or all
195 sibling threads are offline CPU buffer clearing is not required.
196
197 The idle clearing is enabled on CPUs which are only affected by MSBDS
198 and not by any other MDS variant. The other MDS variants cannot be
199 protected against cross Hyper-Thread attacks because the Fill Buffer and
200 the Load Ports are shared. So on CPUs affected by other variants, the
201 idle clearing would be a window dressing exercise and is therefore not
202 activated.
203
204 The invocation is controlled by the static key mds_idle_clear which is
205 switched depending on the chosen mitigation mode and the SMT state of
206 the system.
207
208 The buffer clear is only invoked before entering the C-State to prevent
209 that stale data from the idling CPU from spilling to the Hyper-Thread
210 sibling after the store buffer got repartitioned and all entries are
211 available to the non idle sibling.
212
213 When coming out of idle the store buffer is partitioned again so each
214 sibling has half of it available. The back from idle CPU could be then
215 speculatively exposed to contents of the sibling. The buffers are
216 flushed either on exit to user space or on VMENTER so malicious code
217 in user space or the guest cannot speculatively access them.
218
219 The mitigation is hooked into all variants of halt()/mwait(), but does
220 not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver
221 has been superseded by the intel_idle driver around 2010 and is
222 preferred on all affected CPUs which are expected to gain the MD_CLEAR
223 functionality in microcode. Aside of that the IO-Port mechanism is a
224 legacy interface which is only used on older systems which are either
225 not affected or do not receive microcode updates anymore.
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 51beb8d29123..a986b3c8294c 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -32,6 +32,7 @@
32#include <asm/vdso.h> 32#include <asm/vdso.h>
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/fpu/api.h> 34#include <asm/fpu/api.h>
35#include <asm/nospec-branch.h>
35 36
36#define CREATE_TRACE_POINTS 37#define CREATE_TRACE_POINTS
37#include <trace/events/syscalls.h> 38#include <trace/events/syscalls.h>
@@ -220,6 +221,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
220#endif 221#endif
221 222
222 user_enter_irqoff(); 223 user_enter_irqoff();
224
225 mds_user_clear_cpu_buffers();
223} 226}
224 227
225#define SYSCALL_EXIT_WORK_FLAGS \ 228#define SYSCALL_EXIT_WORK_FLAGS \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 981ff9479648..75f27ee2c263 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -344,6 +344,7 @@
344/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ 344/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
345#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ 345#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
346#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ 346#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
347#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
347#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ 348#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
348#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ 349#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
349#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ 350#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
@@ -382,5 +383,7 @@
382#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ 383#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
383#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ 384#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
384#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ 385#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
386#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
387#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
385 388
386#endif /* _ASM_X86_CPUFEATURES_H */ 389#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 058e40fed167..8a0e56e1dcc9 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -6,6 +6,8 @@
6 6
7#ifndef __ASSEMBLY__ 7#ifndef __ASSEMBLY__
8 8
9#include <asm/nospec-branch.h>
10
9/* Provide __cpuidle; we can't safely include <linux/cpu.h> */ 11/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
10#define __cpuidle __attribute__((__section__(".cpuidle.text"))) 12#define __cpuidle __attribute__((__section__(".cpuidle.text")))
11 13
@@ -54,11 +56,13 @@ static inline void native_irq_enable(void)
54 56
55static inline __cpuidle void native_safe_halt(void) 57static inline __cpuidle void native_safe_halt(void)
56{ 58{
59 mds_idle_clear_cpu_buffers();
57 asm volatile("sti; hlt": : :"memory"); 60 asm volatile("sti; hlt": : :"memory");
58} 61}
59 62
60static inline __cpuidle void native_halt(void) 63static inline __cpuidle void native_halt(void)
61{ 64{
65 mds_idle_clear_cpu_buffers();
62 asm volatile("hlt": : :"memory"); 66 asm volatile("hlt": : :"memory");
63} 67}
64 68
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1378518cf63f..88dd202c8b00 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -2,6 +2,8 @@
2#ifndef _ASM_X86_MSR_INDEX_H 2#ifndef _ASM_X86_MSR_INDEX_H
3#define _ASM_X86_MSR_INDEX_H 3#define _ASM_X86_MSR_INDEX_H
4 4
5#include <linux/bits.h>
6
5/* 7/*
6 * CPU model specific register (MSR) numbers. 8 * CPU model specific register (MSR) numbers.
7 * 9 *
@@ -40,14 +42,14 @@
40/* Intel MSRs. Some also available on other CPUs */ 42/* Intel MSRs. Some also available on other CPUs */
41 43
42#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ 44#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
43#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ 45#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
44#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ 46#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
45#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ 47#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
46#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ 48#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
47#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ 49#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
48 50
49#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ 51#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
50#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ 52#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
51 53
52#define MSR_PPIN_CTL 0x0000004e 54#define MSR_PPIN_CTL 0x0000004e
53#define MSR_PPIN 0x0000004f 55#define MSR_PPIN 0x0000004f
@@ -69,20 +71,25 @@
69#define MSR_MTRRcap 0x000000fe 71#define MSR_MTRRcap 0x000000fe
70 72
71#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a 73#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
72#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ 74#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
73#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ 75#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
74#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ 76#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
75#define ARCH_CAP_SSB_NO (1 << 4) /* 77#define ARCH_CAP_SSB_NO BIT(4) /*
76 * Not susceptible to Speculative Store Bypass 78 * Not susceptible to Speculative Store Bypass
77 * attack, so no Speculative Store Bypass 79 * attack, so no Speculative Store Bypass
78 * control required. 80 * control required.
79 */ 81 */
82#define ARCH_CAP_MDS_NO BIT(5) /*
83 * Not susceptible to
84 * Microarchitectural Data
85 * Sampling (MDS) vulnerabilities.
86 */
80 87
81#define MSR_IA32_FLUSH_CMD 0x0000010b 88#define MSR_IA32_FLUSH_CMD 0x0000010b
82#define L1D_FLUSH (1 << 0) /* 89#define L1D_FLUSH BIT(0) /*
83 * Writeback and invalidate the 90 * Writeback and invalidate the
84 * L1 data cache. 91 * L1 data cache.
85 */ 92 */
86 93
87#define MSR_IA32_BBL_CR_CTL 0x00000119 94#define MSR_IA32_BBL_CR_CTL 0x00000119
88#define MSR_IA32_BBL_CR_CTL3 0x0000011e 95#define MSR_IA32_BBL_CR_CTL3 0x0000011e
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 39a2fb29378a..eb0f80ce8524 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -6,6 +6,7 @@
6#include <linux/sched/idle.h> 6#include <linux/sched/idle.h>
7 7
8#include <asm/cpufeature.h> 8#include <asm/cpufeature.h>
9#include <asm/nospec-branch.h>
9 10
10#define MWAIT_SUBSTATE_MASK 0xf 11#define MWAIT_SUBSTATE_MASK 0xf
11#define MWAIT_CSTATE_MASK 0xf 12#define MWAIT_CSTATE_MASK 0xf
@@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx,
40 41
41static inline void __mwait(unsigned long eax, unsigned long ecx) 42static inline void __mwait(unsigned long eax, unsigned long ecx)
42{ 43{
44 mds_idle_clear_cpu_buffers();
45
43 /* "mwait %eax, %ecx;" */ 46 /* "mwait %eax, %ecx;" */
44 asm volatile(".byte 0x0f, 0x01, 0xc9;" 47 asm volatile(".byte 0x0f, 0x01, 0xc9;"
45 :: "a" (eax), "c" (ecx)); 48 :: "a" (eax), "c" (ecx));
@@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
74static inline void __mwaitx(unsigned long eax, unsigned long ebx, 77static inline void __mwaitx(unsigned long eax, unsigned long ebx,
75 unsigned long ecx) 78 unsigned long ecx)
76{ 79{
80 /* No MDS buffer clear as this is AMD/HYGON only */
81
77 /* "mwaitx %eax, %ebx, %ecx;" */ 82 /* "mwaitx %eax, %ebx, %ecx;" */
78 asm volatile(".byte 0x0f, 0x01, 0xfb;" 83 asm volatile(".byte 0x0f, 0x01, 0xfb;"
79 :: "a" (eax), "b" (ebx), "c" (ecx)); 84 :: "a" (eax), "b" (ebx), "c" (ecx));
@@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
81 86
82static inline void __sti_mwait(unsigned long eax, unsigned long ecx) 87static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
83{ 88{
89 mds_idle_clear_cpu_buffers();
90
84 trace_hardirqs_on(); 91 trace_hardirqs_on();
85 /* "mwait %eax, %ecx;" */ 92 /* "mwait %eax, %ecx;" */
86 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" 93 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index daf25b60c9e3..109f974f9835 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -308,6 +308,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
308DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); 308DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
309DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); 309DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
310 310
311DECLARE_STATIC_KEY_FALSE(mds_user_clear);
312DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
313
314#include <asm/segment.h>
315
316/**
317 * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
318 *
319 * This uses the otherwise unused and obsolete VERW instruction in
320 * combination with microcode which triggers a CPU buffer flush when the
321 * instruction is executed.
322 */
323static inline void mds_clear_cpu_buffers(void)
324{
325 static const u16 ds = __KERNEL_DS;
326
327 /*
328 * Has to be the memory-operand variant because only that
329 * guarantees the CPU buffer flush functionality according to
330 * documentation. The register-operand variant does not.
331 * Works with any segment selector, but a valid writable
332 * data segment is the fastest variant.
333 *
334 * "cc" clobber is required because VERW modifies ZF.
335 */
336 asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
337}
338
339/**
340 * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
341 *
342 * Clear CPU buffers if the corresponding static key is enabled
343 */
344static inline void mds_user_clear_cpu_buffers(void)
345{
346 if (static_branch_likely(&mds_user_clear))
347 mds_clear_cpu_buffers();
348}
349
350/**
351 * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
352 *
353 * Clear CPU buffers if the corresponding static key is enabled
354 */
355static inline void mds_idle_clear_cpu_buffers(void)
356{
357 if (static_branch_likely(&mds_idle_clear))
358 mds_clear_cpu_buffers();
359}
360
311#endif /* __ASSEMBLY__ */ 361#endif /* __ASSEMBLY__ */
312 362
313/* 363/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7e99ef67bff0..c34a35c78618 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -978,4 +978,10 @@ enum l1tf_mitigations {
978 978
979extern enum l1tf_mitigations l1tf_mitigation; 979extern enum l1tf_mitigations l1tf_mitigation;
980 980
981enum mds_mitigations {
982 MDS_MITIGATION_OFF,
983 MDS_MITIGATION_FULL,
984 MDS_MITIGATION_VMWERV,
985};
986
981#endif /* _ASM_X86_PROCESSOR_H */ 987#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 29630393f300..03b4cc0ec3a7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -37,6 +37,7 @@
37static void __init spectre_v2_select_mitigation(void); 37static void __init spectre_v2_select_mitigation(void);
38static void __init ssb_select_mitigation(void); 38static void __init ssb_select_mitigation(void);
39static void __init l1tf_select_mitigation(void); 39static void __init l1tf_select_mitigation(void);
40static void __init mds_select_mitigation(void);
40 41
41/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ 42/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
42u64 x86_spec_ctrl_base; 43u64 x86_spec_ctrl_base;
@@ -63,6 +64,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
63/* Control unconditional IBPB in switch_mm() */ 64/* Control unconditional IBPB in switch_mm() */
64DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); 65DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
65 66
67/* Control MDS CPU buffer clear before returning to user space */
68DEFINE_STATIC_KEY_FALSE(mds_user_clear);
69EXPORT_SYMBOL_GPL(mds_user_clear);
70/* Control MDS CPU buffer clear before idling (halt, mwait) */
71DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
72EXPORT_SYMBOL_GPL(mds_idle_clear);
73
66void __init check_bugs(void) 74void __init check_bugs(void)
67{ 75{
68 identify_boot_cpu(); 76 identify_boot_cpu();
@@ -101,6 +109,10 @@ void __init check_bugs(void)
101 109
102 l1tf_select_mitigation(); 110 l1tf_select_mitigation();
103 111
112 mds_select_mitigation();
113
114 arch_smt_update();
115
104#ifdef CONFIG_X86_32 116#ifdef CONFIG_X86_32
105 /* 117 /*
106 * Check whether we are able to run this kernel safely on SMP. 118 * Check whether we are able to run this kernel safely on SMP.
@@ -207,6 +219,61 @@ static void x86_amd_ssb_disable(void)
207} 219}
208 220
209#undef pr_fmt 221#undef pr_fmt
222#define pr_fmt(fmt) "MDS: " fmt
223
224/* Default mitigation for MDS-affected CPUs */
225static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL;
226static bool mds_nosmt __ro_after_init = false;
227
228static const char * const mds_strings[] = {
229 [MDS_MITIGATION_OFF] = "Vulnerable",
230 [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
231 [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode",
232};
233
234static void __init mds_select_mitigation(void)
235{
236 if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) {
237 mds_mitigation = MDS_MITIGATION_OFF;
238 return;
239 }
240
241 if (mds_mitigation == MDS_MITIGATION_FULL) {
242 if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
243 mds_mitigation = MDS_MITIGATION_VMWERV;
244
245 static_branch_enable(&mds_user_clear);
246
247 if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
248 (mds_nosmt || cpu_mitigations_auto_nosmt()))
249 cpu_smt_disable(false);
250 }
251
252 pr_info("%s\n", mds_strings[mds_mitigation]);
253}
254
255static int __init mds_cmdline(char *str)
256{
257 if (!boot_cpu_has_bug(X86_BUG_MDS))
258 return 0;
259
260 if (!str)
261 return -EINVAL;
262
263 if (!strcmp(str, "off"))
264 mds_mitigation = MDS_MITIGATION_OFF;
265 else if (!strcmp(str, "full"))
266 mds_mitigation = MDS_MITIGATION_FULL;
267 else if (!strcmp(str, "full,nosmt")) {
268 mds_mitigation = MDS_MITIGATION_FULL;
269 mds_nosmt = true;
270 }
271
272 return 0;
273}
274early_param("mds", mds_cmdline);
275
276#undef pr_fmt
210#define pr_fmt(fmt) "Spectre V2 : " fmt 277#define pr_fmt(fmt) "Spectre V2 : " fmt
211 278
212static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = 279static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
@@ -575,9 +642,6 @@ specv2_set_mode:
575 642
576 /* Set up IBPB and STIBP depending on the general spectre V2 command */ 643 /* Set up IBPB and STIBP depending on the general spectre V2 command */
577 spectre_v2_user_select_mitigation(cmd); 644 spectre_v2_user_select_mitigation(cmd);
578
579 /* Enable STIBP if appropriate */
580 arch_smt_update();
581} 645}
582 646
583static void update_stibp_msr(void * __unused) 647static void update_stibp_msr(void * __unused)
@@ -611,6 +675,31 @@ static void update_indir_branch_cond(void)
611 static_branch_disable(&switch_to_cond_stibp); 675 static_branch_disable(&switch_to_cond_stibp);
612} 676}
613 677
678#undef pr_fmt
679#define pr_fmt(fmt) fmt
680
681/* Update the static key controlling the MDS CPU buffer clear in idle */
682static void update_mds_branch_idle(void)
683{
684 /*
685 * Enable the idle clearing if SMT is active on CPUs which are
686 * affected only by MSBDS and not any other MDS variant.
687 *
688 * The other variants cannot be mitigated when SMT is enabled, so
689 * clearing the buffers on idle just to prevent the Store Buffer
690 * repartitioning leak would be a window dressing exercise.
691 */
692 if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
693 return;
694
695 if (sched_smt_active())
696 static_branch_enable(&mds_idle_clear);
697 else
698 static_branch_disable(&mds_idle_clear);
699}
700
701#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
702
614void arch_smt_update(void) 703void arch_smt_update(void)
615{ 704{
616 /* Enhanced IBRS implies STIBP. No update required. */ 705 /* Enhanced IBRS implies STIBP. No update required. */
@@ -632,6 +721,17 @@ void arch_smt_update(void)
632 break; 721 break;
633 } 722 }
634 723
724 switch (mds_mitigation) {
725 case MDS_MITIGATION_FULL:
726 case MDS_MITIGATION_VMWERV:
727 if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
728 pr_warn_once(MDS_MSG_SMT);
729 update_mds_branch_idle();
730 break;
731 case MDS_MITIGATION_OFF:
732 break;
733 }
734
635 mutex_unlock(&spec_ctrl_mutex); 735 mutex_unlock(&spec_ctrl_mutex);
636} 736}
637 737
@@ -1043,7 +1143,7 @@ static void __init l1tf_select_mitigation(void)
1043 pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", 1143 pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
1044 half_pa); 1144 half_pa);
1045 pr_info("However, doing so will make a part of your RAM unusable.\n"); 1145 pr_info("However, doing so will make a part of your RAM unusable.\n");
1046 pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); 1146 pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n");
1047 return; 1147 return;
1048 } 1148 }
1049 1149
@@ -1076,6 +1176,7 @@ static int __init l1tf_cmdline(char *str)
1076early_param("l1tf", l1tf_cmdline); 1176early_param("l1tf", l1tf_cmdline);
1077 1177
1078#undef pr_fmt 1178#undef pr_fmt
1179#define pr_fmt(fmt) fmt
1079 1180
1080#ifdef CONFIG_SYSFS 1181#ifdef CONFIG_SYSFS
1081 1182
@@ -1114,6 +1215,23 @@ static ssize_t l1tf_show_state(char *buf)
1114} 1215}
1115#endif 1216#endif
1116 1217
1218static ssize_t mds_show_state(char *buf)
1219{
1220 if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
1221 return sprintf(buf, "%s; SMT Host state unknown\n",
1222 mds_strings[mds_mitigation]);
1223 }
1224
1225 if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) {
1226 return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
1227 (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
1228 sched_smt_active() ? "mitigated" : "disabled"));
1229 }
1230
1231 return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
1232 sched_smt_active() ? "vulnerable" : "disabled");
1233}
1234
1117static char *stibp_state(void) 1235static char *stibp_state(void)
1118{ 1236{
1119 if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) 1237 if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
@@ -1180,6 +1298,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
1180 if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) 1298 if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
1181 return l1tf_show_state(buf); 1299 return l1tf_show_state(buf);
1182 break; 1300 break;
1301
1302 case X86_BUG_MDS:
1303 return mds_show_state(buf);
1304
1183 default: 1305 default:
1184 break; 1306 break;
1185 } 1307 }
@@ -1211,4 +1333,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b
1211{ 1333{
1212 return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); 1334 return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
1213} 1335}
1336
1337ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf)
1338{
1339 return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
1340}
1214#endif 1341#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8739bdfe9bdf..d7f55ad2dfb1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -940,61 +940,77 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
940#endif 940#endif
941} 941}
942 942
943static const __initconst struct x86_cpu_id cpu_no_speculation[] = { 943#define NO_SPECULATION BIT(0)
944 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, 944#define NO_MELTDOWN BIT(1)
945 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, 945#define NO_SSB BIT(2)
946 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, 946#define NO_L1TF BIT(3)
947 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, 947#define NO_MDS BIT(4)
948 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, 948#define MSBDS_ONLY BIT(5)
949 { X86_VENDOR_CENTAUR, 5 }, 949
950 { X86_VENDOR_INTEL, 5 }, 950#define VULNWL(_vendor, _family, _model, _whitelist) \
951 { X86_VENDOR_NSC, 5 }, 951 { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
952 { X86_VENDOR_ANY, 4 }, 952
953#define VULNWL_INTEL(model, whitelist) \
954 VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
955
956#define VULNWL_AMD(family, whitelist) \
957 VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
958
959#define VULNWL_HYGON(family, whitelist) \
960 VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
961
962static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
963 VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
964 VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION),
965 VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION),
966 VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
967
968 /* Intel Family 6 */
969 VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
970 VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
971 VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
972 VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
973 VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
974
975 VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
976 VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
977 VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
978 VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
979 VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
980 VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
981
982 VULNWL_INTEL(CORE_YONAH, NO_SSB),
983
984 VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
985
986 VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
987 VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
988 VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
989
990 /* AMD Family 0xf - 0x12 */
991 VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
992 VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
993 VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
994 VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
995
996 /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
997 VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
998 VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
953 {} 999 {}
954}; 1000};
955 1001
956static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { 1002static bool __init cpu_matches(unsigned long which)
957 { X86_VENDOR_AMD }, 1003{
958 { X86_VENDOR_HYGON }, 1004 const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
959 {}
960};
961
962/* Only list CPUs which speculate but are non susceptible to SSB */
963static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
964 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
965 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
966 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
967 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
968 { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
969 { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
970 { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
971 { X86_VENDOR_AMD, 0x12, },
972 { X86_VENDOR_AMD, 0x11, },
973 { X86_VENDOR_AMD, 0x10, },
974 { X86_VENDOR_AMD, 0xf, },
975 {}
976};
977 1005
978static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { 1006 return m && !!(m->driver_data & which);
979 /* in addition to cpu_no_speculation */ 1007}
980 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
981 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
982 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
983 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
984 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID },
985 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
986 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X },
987 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS },
988 { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
989 { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
990 {}
991};
992 1008
993static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) 1009static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
994{ 1010{
995 u64 ia32_cap = 0; 1011 u64 ia32_cap = 0;
996 1012
997 if (x86_match_cpu(cpu_no_speculation)) 1013 if (cpu_matches(NO_SPECULATION))
998 return; 1014 return;
999 1015
1000 setup_force_cpu_bug(X86_BUG_SPECTRE_V1); 1016 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
@@ -1003,15 +1019,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
1003 if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) 1019 if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
1004 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); 1020 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
1005 1021
1006 if (!x86_match_cpu(cpu_no_spec_store_bypass) && 1022 if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
1007 !(ia32_cap & ARCH_CAP_SSB_NO) &&
1008 !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) 1023 !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
1009 setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); 1024 setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
1010 1025
1011 if (ia32_cap & ARCH_CAP_IBRS_ALL) 1026 if (ia32_cap & ARCH_CAP_IBRS_ALL)
1012 setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); 1027 setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
1013 1028
1014 if (x86_match_cpu(cpu_no_meltdown)) 1029 if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) {
1030 setup_force_cpu_bug(X86_BUG_MDS);
1031 if (cpu_matches(MSBDS_ONLY))
1032 setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
1033 }
1034
1035 if (cpu_matches(NO_MELTDOWN))
1015 return; 1036 return;
1016 1037
1017 /* Rogue Data Cache Load? No! */ 1038 /* Rogue Data Cache Load? No! */
@@ -1020,7 +1041,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
1020 1041
1021 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); 1042 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1022 1043
1023 if (x86_match_cpu(cpu_no_l1tf)) 1044 if (cpu_matches(NO_L1TF))
1024 return; 1045 return;
1025 1046
1026 setup_force_cpu_bug(X86_BUG_L1TF); 1047 setup_force_cpu_bug(X86_BUG_L1TF);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 3755d0310026..05b09896cfaf 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -35,6 +35,7 @@
35#include <asm/x86_init.h> 35#include <asm/x86_init.h>
36#include <asm/reboot.h> 36#include <asm/reboot.h>
37#include <asm/cache.h> 37#include <asm/cache.h>
38#include <asm/nospec-branch.h>
38 39
39#define CREATE_TRACE_POINTS 40#define CREATE_TRACE_POINTS
40#include <trace/events/nmi.h> 41#include <trace/events/nmi.h>
@@ -551,6 +552,9 @@ nmi_restart:
551 write_cr2(this_cpu_read(nmi_cr2)); 552 write_cr2(this_cpu_read(nmi_cr2));
552 if (this_cpu_dec_return(nmi_state)) 553 if (this_cpu_dec_return(nmi_state))
553 goto nmi_restart; 554 goto nmi_restart;
555
556 if (user_mode(regs))
557 mds_user_clear_cpu_buffers();
554} 558}
555NOKPROBE_SYMBOL(do_nmi); 559NOKPROBE_SYMBOL(do_nmi);
556 560
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8b6d03e55d2f..7de466eb960b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,6 +58,7 @@
58#include <asm/alternative.h> 58#include <asm/alternative.h>
59#include <asm/fpu/xstate.h> 59#include <asm/fpu/xstate.h>
60#include <asm/trace/mpx.h> 60#include <asm/trace/mpx.h>
61#include <asm/nospec-branch.h>
61#include <asm/mpx.h> 62#include <asm/mpx.h>
62#include <asm/vm86.h> 63#include <asm/vm86.h>
63#include <asm/umip.h> 64#include <asm/umip.h>
@@ -367,6 +368,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
367 regs->ip = (unsigned long)general_protection; 368 regs->ip = (unsigned long)general_protection;
368 regs->sp = (unsigned long)&gpregs->orig_ax; 369 regs->sp = (unsigned long)&gpregs->orig_ax;
369 370
371 /*
372 * This situation can be triggered by userspace via
373 * modify_ldt(2) and the return does not take the regular
374 * user space exit, so a CPU buffer clear is required when
375 * MDS mitigation is enabled.
376 */
377 mds_user_clear_cpu_buffers();
370 return; 378 return;
371 } 379 }
372#endif 380#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index fd3951638ae4..bbbe611f0c49 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -410,7 +410,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
410 /* cpuid 7.0.edx*/ 410 /* cpuid 7.0.edx*/
411 const u32 kvm_cpuid_7_0_edx_x86_features = 411 const u32 kvm_cpuid_7_0_edx_x86_features =
412 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | 412 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
413 F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP); 413 F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
414 F(MD_CLEAR);
414 415
415 /* all calls to cpuid_count() should be made on the same cpu */ 416 /* all calls to cpuid_count() should be made on the same cpu */
416 get_cpu(); 417 get_cpu();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9663d41cc2bc..e1fa935a545f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6431,8 +6431,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6431 */ 6431 */
6432 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); 6432 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
6433 6433
6434 /* L1D Flush includes CPU buffer clear to mitigate MDS */
6434 if (static_branch_unlikely(&vmx_l1d_should_flush)) 6435 if (static_branch_unlikely(&vmx_l1d_should_flush))
6435 vmx_l1d_flush(vcpu); 6436 vmx_l1d_flush(vcpu);
6437 else if (static_branch_unlikely(&mds_user_clear))
6438 mds_clear_cpu_buffers();
6436 6439
6437 if (vcpu->arch.cr2 != read_cr2()) 6440 if (vcpu->arch.cr2 != read_cr2())
6438 write_cr2(vcpu->arch.cr2); 6441 write_cr2(vcpu->arch.cr2);
@@ -6668,8 +6671,8 @@ free_partial_vcpu:
6668 return ERR_PTR(err); 6671 return ERR_PTR(err);
6669} 6672}
6670 6673
6671#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" 6674#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
6672#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" 6675#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
6673 6676
6674static int vmx_vm_init(struct kvm *kvm) 6677static int vmx_vm_init(struct kvm *kvm)
6675{ 6678{
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 668139cfa664..cc37511de866 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -548,11 +548,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev,
548 return sprintf(buf, "Not affected\n"); 548 return sprintf(buf, "Not affected\n");
549} 549}
550 550
551ssize_t __weak cpu_show_mds(struct device *dev,
552 struct device_attribute *attr, char *buf)
553{
554 return sprintf(buf, "Not affected\n");
555}
556
551static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); 557static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
552static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); 558static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
553static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); 559static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
554static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); 560static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
555static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); 561static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
562static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
556 563
557static struct attribute *cpu_root_vulnerabilities_attrs[] = { 564static struct attribute *cpu_root_vulnerabilities_attrs[] = {
558 &dev_attr_meltdown.attr, 565 &dev_attr_meltdown.attr,
@@ -560,6 +567,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
560 &dev_attr_spectre_v2.attr, 567 &dev_attr_spectre_v2.attr,
561 &dev_attr_spec_store_bypass.attr, 568 &dev_attr_spec_store_bypass.attr,
562 &dev_attr_l1tf.attr, 569 &dev_attr_l1tf.attr,
570 &dev_attr_mds.attr,
563 NULL 571 NULL
564}; 572};
565 573
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 732745f865b7..3813fe45effd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -57,6 +57,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
57 struct device_attribute *attr, char *buf); 57 struct device_attribute *attr, char *buf);
58extern ssize_t cpu_show_l1tf(struct device *dev, 58extern ssize_t cpu_show_l1tf(struct device *dev,
59 struct device_attribute *attr, char *buf); 59 struct device_attribute *attr, char *buf);
60extern ssize_t cpu_show_mds(struct device *dev,
61 struct device_attribute *attr, char *buf);
60 62
61extern __printf(4, 5) 63extern __printf(4, 5)
62struct device *cpu_device_create(struct device *parent, void *drvdata, 64struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 1598b4fa0b11..045f5f7d68ab 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
9endif 9endif
10 10
11turbostat : turbostat.c 11turbostat : turbostat.c
12override CFLAGS += -Wall 12override CFLAGS += -Wall -I../../../include
13override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' 13override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
14override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' 14override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
15 15
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index ae7a0e09b722..1fdeef864e7c 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
9endif 9endif
10 10
11x86_energy_perf_policy : x86_energy_perf_policy.c 11x86_energy_perf_policy : x86_energy_perf_policy.c
12override CFLAGS += -Wall 12override CFLAGS += -Wall -I../../../include
13override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' 13override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
14 14
15%: %.c 15%: %.c