diff options
46 files changed, 2231 insertions, 276 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 06d0931119cc..fc20cde63d1e 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu | |||
@@ -486,6 +486,8 @@ What: /sys/devices/system/cpu/vulnerabilities | |||
486 | /sys/devices/system/cpu/vulnerabilities/spec_store_bypass | 486 | /sys/devices/system/cpu/vulnerabilities/spec_store_bypass |
487 | /sys/devices/system/cpu/vulnerabilities/l1tf | 487 | /sys/devices/system/cpu/vulnerabilities/l1tf |
488 | /sys/devices/system/cpu/vulnerabilities/mds | 488 | /sys/devices/system/cpu/vulnerabilities/mds |
489 | /sys/devices/system/cpu/vulnerabilities/tsx_async_abort | ||
490 | /sys/devices/system/cpu/vulnerabilities/itlb_multihit | ||
489 | Date: January 2018 | 491 | Date: January 2018 |
490 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | 492 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> |
491 | Description: Information about CPU vulnerabilities | 493 | Description: Information about CPU vulnerabilities |
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 49311f3da6f2..0795e3c2643f 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst | |||
@@ -12,3 +12,5 @@ are configurable at compile, boot or run time. | |||
12 | spectre | 12 | spectre |
13 | l1tf | 13 | l1tf |
14 | mds | 14 | mds |
15 | tsx_async_abort | ||
16 | multihit.rst | ||
diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst new file mode 100644 index 000000000000..ba9988d8bce5 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/multihit.rst | |||
@@ -0,0 +1,163 @@ | |||
1 | iTLB multihit | ||
2 | ============= | ||
3 | |||
4 | iTLB multihit is an erratum where some processors may incur a machine check | ||
5 | error, possibly resulting in an unrecoverable CPU lockup, when an | ||
6 | instruction fetch hits multiple entries in the instruction TLB. This can | ||
7 | occur when the page size is changed along with either the physical address | ||
8 | or cache type. A malicious guest running on a virtualized system can | ||
9 | exploit this erratum to perform a denial of service attack. | ||
10 | |||
11 | |||
12 | Affected processors | ||
13 | ------------------- | ||
14 | |||
15 | Variations of this erratum are present on most Intel Core and Xeon processor | ||
16 | models. The erratum is not present on: | ||
17 | |||
18 | - non-Intel processors | ||
19 | |||
20 | - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont) | ||
21 | |||
22 | - Intel processors that have the PSCHANGE_MC_NO bit set in the | ||
23 | IA32_ARCH_CAPABILITIES MSR. | ||
24 | |||
25 | |||
26 | Related CVEs | ||
27 | ------------ | ||
28 | |||
29 | The following CVE entry is related to this issue: | ||
30 | |||
31 | ============== ================================================= | ||
32 | CVE-2018-12207 Machine Check Error Avoidance on Page Size Change | ||
33 | ============== ================================================= | ||
34 | |||
35 | |||
36 | Problem | ||
37 | ------- | ||
38 | |||
39 | Privileged software, including OS and virtual machine managers (VMM), are in | ||
40 | charge of memory management. A key component in memory management is the control | ||
41 | of the page tables. Modern processors use virtual memory, a technique that creates | ||
42 | the illusion of a very large memory for processors. This virtual space is split | ||
43 | into pages of a given size. Page tables translate virtual addresses to physical | ||
44 | addresses. | ||
45 | |||
46 | To reduce latency when performing a virtual to physical address translation, | ||
47 | processors include a structure, called TLB, that caches recent translations. | ||
48 | There are separate TLBs for instruction (iTLB) and data (dTLB). | ||
49 | |||
50 | Under this errata, instructions are fetched from a linear address translated | ||
51 | using a 4 KB translation cached in the iTLB. Privileged software modifies the | ||
52 | paging structure so that the same linear address using large page size (2 MB, 4 | ||
53 | MB, 1 GB) with a different physical address or memory type. After the page | ||
54 | structure modification but before the software invalidates any iTLB entries for | ||
55 | the linear address, a code fetch that happens on the same linear address may | ||
56 | cause a machine-check error which can result in a system hang or shutdown. | ||
57 | |||
58 | |||
59 | Attack scenarios | ||
60 | ---------------- | ||
61 | |||
62 | Attacks against the iTLB multihit erratum can be mounted from malicious | ||
63 | guests in a virtualized system. | ||
64 | |||
65 | |||
66 | iTLB multihit system information | ||
67 | -------------------------------- | ||
68 | |||
69 | The Linux kernel provides a sysfs interface to enumerate the current iTLB | ||
70 | multihit status of the system:whether the system is vulnerable and which | ||
71 | mitigations are active. The relevant sysfs file is: | ||
72 | |||
73 | /sys/devices/system/cpu/vulnerabilities/itlb_multihit | ||
74 | |||
75 | The possible values in this file are: | ||
76 | |||
77 | .. list-table:: | ||
78 | |||
79 | * - Not affected | ||
80 | - The processor is not vulnerable. | ||
81 | * - KVM: Mitigation: Split huge pages | ||
82 | - Software changes mitigate this issue. | ||
83 | * - KVM: Vulnerable | ||
84 | - The processor is vulnerable, but no mitigation enabled | ||
85 | |||
86 | |||
87 | Enumeration of the erratum | ||
88 | -------------------------------- | ||
89 | |||
90 | A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr | ||
91 | and will be set on CPU's which are mitigated against this issue. | ||
92 | |||
93 | ======================================= =========== =============================== | ||
94 | IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model | ||
95 | IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model | ||
96 | IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable | ||
97 | ======================================= =========== =============================== | ||
98 | |||
99 | |||
100 | Mitigation mechanism | ||
101 | ------------------------- | ||
102 | |||
103 | This erratum can be mitigated by restricting the use of large page sizes to | ||
104 | non-executable pages. This forces all iTLB entries to be 4K, and removes | ||
105 | the possibility of multiple hits. | ||
106 | |||
107 | In order to mitigate the vulnerability, KVM initially marks all huge pages | ||
108 | as non-executable. If the guest attempts to execute in one of those pages, | ||
109 | the page is broken down into 4K pages, which are then marked executable. | ||
110 | |||
111 | If EPT is disabled or not available on the host, KVM is in control of TLB | ||
112 | flushes and the problematic situation cannot happen. However, the shadow | ||
113 | EPT paging mechanism used by nested virtualization is vulnerable, because | ||
114 | the nested guest can trigger multiple iTLB hits by modifying its own | ||
115 | (non-nested) page tables. For simplicity, KVM will make large pages | ||
116 | non-executable in all shadow paging modes. | ||
117 | |||
118 | Mitigation control on the kernel command line and KVM - module parameter | ||
119 | ------------------------------------------------------------------------ | ||
120 | |||
121 | The KVM hypervisor mitigation mechanism for marking huge pages as | ||
122 | non-executable can be controlled with a module parameter "nx_huge_pages=". | ||
123 | The kernel command line allows to control the iTLB multihit mitigations at | ||
124 | boot time with the option "kvm.nx_huge_pages=". | ||
125 | |||
126 | The valid arguments for these options are: | ||
127 | |||
128 | ========== ================================================================ | ||
129 | force Mitigation is enabled. In this case, the mitigation implements | ||
130 | non-executable huge pages in Linux kernel KVM module. All huge | ||
131 | pages in the EPT are marked as non-executable. | ||
132 | If a guest attempts to execute in one of those pages, the page is | ||
133 | broken down into 4K pages, which are then marked executable. | ||
134 | |||
135 | off Mitigation is disabled. | ||
136 | |||
137 | auto Enable mitigation only if the platform is affected and the kernel | ||
138 | was not booted with the "mitigations=off" command line parameter. | ||
139 | This is the default option. | ||
140 | ========== ================================================================ | ||
141 | |||
142 | |||
143 | Mitigation selection guide | ||
144 | -------------------------- | ||
145 | |||
146 | 1. No virtualization in use | ||
147 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
148 | |||
149 | The system is protected by the kernel unconditionally and no further | ||
150 | action is required. | ||
151 | |||
152 | 2. Virtualization with trusted guests | ||
153 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
154 | |||
155 | If the guest comes from a trusted source, you may assume that the guest will | ||
156 | not attempt to maliciously exploit these errata and no further action is | ||
157 | required. | ||
158 | |||
159 | 3. Virtualization with untrusted guests | ||
160 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
161 | If the guest comes from an untrusted source, the guest host kernel will need | ||
162 | to apply iTLB multihit mitigation via the kernel command line or kvm | ||
163 | module parameter. | ||
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst new file mode 100644 index 000000000000..fddbd7579c53 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst | |||
@@ -0,0 +1,276 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | TAA - TSX Asynchronous Abort | ||
4 | ====================================== | ||
5 | |||
6 | TAA is a hardware vulnerability that allows unprivileged speculative access to | ||
7 | data which is available in various CPU internal buffers by using asynchronous | ||
8 | aborts within an Intel TSX transactional region. | ||
9 | |||
10 | Affected processors | ||
11 | ------------------- | ||
12 | |||
13 | This vulnerability only affects Intel processors that support Intel | ||
14 | Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8) | ||
15 | is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit | ||
16 | (bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations | ||
17 | also mitigate against TAA. | ||
18 | |||
19 | Whether a processor is affected or not can be read out from the TAA | ||
20 | vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`. | ||
21 | |||
22 | Related CVEs | ||
23 | ------------ | ||
24 | |||
25 | The following CVE entry is related to this TAA issue: | ||
26 | |||
27 | ============== ===== =================================================== | ||
28 | CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some | ||
29 | microprocessors utilizing speculative execution may | ||
30 | allow an authenticated user to potentially enable | ||
31 | information disclosure via a side channel with | ||
32 | local access. | ||
33 | ============== ===== =================================================== | ||
34 | |||
35 | Problem | ||
36 | ------- | ||
37 | |||
38 | When performing store, load or L1 refill operations, processors write | ||
39 | data into temporary microarchitectural structures (buffers). The data in | ||
40 | those buffers can be forwarded to load operations as an optimization. | ||
41 | |||
42 | Intel TSX is an extension to the x86 instruction set architecture that adds | ||
43 | hardware transactional memory support to improve performance of multi-threaded | ||
44 | software. TSX lets the processor expose and exploit concurrency hidden in an | ||
45 | application due to dynamically avoiding unnecessary synchronization. | ||
46 | |||
47 | TSX supports atomic memory transactions that are either committed (success) or | ||
48 | aborted. During an abort, operations that happened within the transactional region | ||
49 | are rolled back. An asynchronous abort takes place, among other options, when a | ||
50 | different thread accesses a cache line that is also used within the transactional | ||
51 | region when that access might lead to a data race. | ||
52 | |||
53 | Immediately after an uncompleted asynchronous abort, certain speculatively | ||
54 | executed loads may read data from those internal buffers and pass it to dependent | ||
55 | operations. This can be then used to infer the value via a cache side channel | ||
56 | attack. | ||
57 | |||
58 | Because the buffers are potentially shared between Hyper-Threads cross | ||
59 | Hyper-Thread attacks are possible. | ||
60 | |||
61 | The victim of a malicious actor does not need to make use of TSX. Only the | ||
62 | attacker needs to begin a TSX transaction and raise an asynchronous abort | ||
63 | which in turn potenitally leaks data stored in the buffers. | ||
64 | |||
65 | More detailed technical information is available in the TAA specific x86 | ||
66 | architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`. | ||
67 | |||
68 | |||
69 | Attack scenarios | ||
70 | ---------------- | ||
71 | |||
72 | Attacks against the TAA vulnerability can be implemented from unprivileged | ||
73 | applications running on hosts or guests. | ||
74 | |||
75 | As for MDS, the attacker has no control over the memory addresses that can | ||
76 | be leaked. Only the victim is responsible for bringing data to the CPU. As | ||
77 | a result, the malicious actor has to sample as much data as possible and | ||
78 | then postprocess it to try to infer any useful information from it. | ||
79 | |||
80 | A potential attacker only has read access to the data. Also, there is no direct | ||
81 | privilege escalation by using this technique. | ||
82 | |||
83 | |||
84 | .. _tsx_async_abort_sys_info: | ||
85 | |||
86 | TAA system information | ||
87 | ----------------------- | ||
88 | |||
89 | The Linux kernel provides a sysfs interface to enumerate the current TAA status | ||
90 | of mitigated systems. The relevant sysfs file is: | ||
91 | |||
92 | /sys/devices/system/cpu/vulnerabilities/tsx_async_abort | ||
93 | |||
94 | The possible values in this file are: | ||
95 | |||
96 | .. list-table:: | ||
97 | |||
98 | * - 'Vulnerable' | ||
99 | - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied. | ||
100 | * - 'Vulnerable: Clear CPU buffers attempted, no microcode' | ||
101 | - The system tries to clear the buffers but the microcode might not support the operation. | ||
102 | * - 'Mitigation: Clear CPU buffers' | ||
103 | - The microcode has been updated to clear the buffers. TSX is still enabled. | ||
104 | * - 'Mitigation: TSX disabled' | ||
105 | - TSX is disabled. | ||
106 | * - 'Not affected' | ||
107 | - The CPU is not affected by this issue. | ||
108 | |||
109 | .. _ucode_needed: | ||
110 | |||
111 | Best effort mitigation mode | ||
112 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
113 | |||
114 | If the processor is vulnerable, but the availability of the microcode-based | ||
115 | mitigation mechanism is not advertised via CPUID the kernel selects a best | ||
116 | effort mitigation mode. This mode invokes the mitigation instructions | ||
117 | without a guarantee that they clear the CPU buffers. | ||
118 | |||
119 | This is done to address virtualization scenarios where the host has the | ||
120 | microcode update applied, but the hypervisor is not yet updated to expose the | ||
121 | CPUID to the guest. If the host has updated microcode the protection takes | ||
122 | effect; otherwise a few CPU cycles are wasted pointlessly. | ||
123 | |||
124 | The state in the tsx_async_abort sysfs file reflects this situation | ||
125 | accordingly. | ||
126 | |||
127 | |||
128 | Mitigation mechanism | ||
129 | -------------------- | ||
130 | |||
131 | The kernel detects the affected CPUs and the presence of the microcode which is | ||
132 | required. If a CPU is affected and the microcode is available, then the kernel | ||
133 | enables the mitigation by default. | ||
134 | |||
135 | |||
136 | The mitigation can be controlled at boot time via a kernel command line option. | ||
137 | See :ref:`taa_mitigation_control_command_line`. | ||
138 | |||
139 | .. _virt_mechanism: | ||
140 | |||
141 | Virtualization mitigation | ||
142 | ^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
143 | |||
144 | Affected systems where the host has TAA microcode and TAA is mitigated by | ||
145 | having disabled TSX previously, are not vulnerable regardless of the status | ||
146 | of the VMs. | ||
147 | |||
148 | In all other cases, if the host either does not have the TAA microcode or | ||
149 | the kernel is not mitigated, the system might be vulnerable. | ||
150 | |||
151 | |||
152 | .. _taa_mitigation_control_command_line: | ||
153 | |||
154 | Mitigation control on the kernel command line | ||
155 | --------------------------------------------- | ||
156 | |||
157 | The kernel command line allows to control the TAA mitigations at boot time with | ||
158 | the option "tsx_async_abort=". The valid arguments for this option are: | ||
159 | |||
160 | ============ ============================================================= | ||
161 | off This option disables the TAA mitigation on affected platforms. | ||
162 | If the system has TSX enabled (see next parameter) and the CPU | ||
163 | is affected, the system is vulnerable. | ||
164 | |||
165 | full TAA mitigation is enabled. If TSX is enabled, on an affected | ||
166 | system it will clear CPU buffers on ring transitions. On | ||
167 | systems which are MDS-affected and deploy MDS mitigation, | ||
168 | TAA is also mitigated. Specifying this option on those | ||
169 | systems will have no effect. | ||
170 | |||
171 | full,nosmt The same as tsx_async_abort=full, with SMT disabled on | ||
172 | vulnerable CPUs that have TSX enabled. This is the complete | ||
173 | mitigation. When TSX is disabled, SMT is not disabled because | ||
174 | CPU is not vulnerable to cross-thread TAA attacks. | ||
175 | ============ ============================================================= | ||
176 | |||
177 | Not specifying this option is equivalent to "tsx_async_abort=full". | ||
178 | |||
179 | The kernel command line also allows to control the TSX feature using the | ||
180 | parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used | ||
181 | to control the TSX feature and the enumeration of the TSX feature bits (RTM | ||
182 | and HLE) in CPUID. | ||
183 | |||
184 | The valid options are: | ||
185 | |||
186 | ============ ============================================================= | ||
187 | off Disables TSX on the system. | ||
188 | |||
189 | Note that this option takes effect only on newer CPUs which are | ||
190 | not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 | ||
191 | and which get the new IA32_TSX_CTRL MSR through a microcode | ||
192 | update. This new MSR allows for the reliable deactivation of | ||
193 | the TSX functionality. | ||
194 | |||
195 | on Enables TSX. | ||
196 | |||
197 | Although there are mitigations for all known security | ||
198 | vulnerabilities, TSX has been known to be an accelerator for | ||
199 | several previous speculation-related CVEs, and so there may be | ||
200 | unknown security risks associated with leaving it enabled. | ||
201 | |||
202 | auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX | ||
203 | on the system. | ||
204 | ============ ============================================================= | ||
205 | |||
206 | Not specifying this option is equivalent to "tsx=off". | ||
207 | |||
208 | The following combinations of the "tsx_async_abort" and "tsx" are possible. For | ||
209 | affected platforms tsx=auto is equivalent to tsx=off and the result will be: | ||
210 | |||
211 | ========= ========================== ========================================= | ||
212 | tsx=on tsx_async_abort=full The system will use VERW to clear CPU | ||
213 | buffers. Cross-thread attacks are still | ||
214 | possible on SMT machines. | ||
215 | tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT | ||
216 | mitigated. | ||
217 | tsx=on tsx_async_abort=off The system is vulnerable. | ||
218 | tsx=off tsx_async_abort=full TSX might be disabled if microcode | ||
219 | provides a TSX control MSR. If so, | ||
220 | system is not vulnerable. | ||
221 | tsx=off tsx_async_abort=full,nosmt Ditto | ||
222 | tsx=off tsx_async_abort=off ditto | ||
223 | ========= ========================== ========================================= | ||
224 | |||
225 | |||
226 | For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU | ||
227 | buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0) | ||
228 | "tsx" command line argument has no effect. | ||
229 | |||
230 | For the affected platforms below table indicates the mitigation status for the | ||
231 | combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO | ||
232 | and TSX_CTRL_MSR. | ||
233 | |||
234 | ======= ========= ============= ======================================== | ||
235 | MDS_NO MD_CLEAR TSX_CTRL_MSR Status | ||
236 | ======= ========= ============= ======================================== | ||
237 | 0 0 0 Vulnerable (needs microcode) | ||
238 | 0 1 0 MDS and TAA mitigated via VERW | ||
239 | 1 1 0 MDS fixed, TAA vulnerable if TSX enabled | ||
240 | because MD_CLEAR has no meaning and | ||
241 | VERW is not guaranteed to clear buffers | ||
242 | 1 X 1 MDS fixed, TAA can be mitigated by | ||
243 | VERW or TSX_CTRL_MSR | ||
244 | ======= ========= ============= ======================================== | ||
245 | |||
246 | Mitigation selection guide | ||
247 | -------------------------- | ||
248 | |||
249 | 1. Trusted userspace and guests | ||
250 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
251 | |||
252 | If all user space applications are from a trusted source and do not execute | ||
253 | untrusted code which is supplied externally, then the mitigation can be | ||
254 | disabled. The same applies to virtualized environments with trusted guests. | ||
255 | |||
256 | |||
257 | 2. Untrusted userspace and guests | ||
258 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
259 | |||
260 | If there are untrusted applications or guests on the system, enabling TSX | ||
261 | might allow a malicious actor to leak data from the host or from other | ||
262 | processes running on the same physical core. | ||
263 | |||
264 | If the microcode is available and the TSX is disabled on the host, attacks | ||
265 | are prevented in a virtualized environment as well, even if the VMs do not | ||
266 | explicitly enable the mitigation. | ||
267 | |||
268 | |||
269 | .. _taa_default_mitigations: | ||
270 | |||
271 | Default mitigations | ||
272 | ------------------- | ||
273 | |||
274 | The kernel's default action for vulnerable processors is: | ||
275 | |||
276 | - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off). | ||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a84a83f8881e..8dee8f68fe15 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -2055,6 +2055,25 @@ | |||
2055 | KVM MMU at runtime. | 2055 | KVM MMU at runtime. |
2056 | Default is 0 (off) | 2056 | Default is 0 (off) |
2057 | 2057 | ||
2058 | kvm.nx_huge_pages= | ||
2059 | [KVM] Controls the software workaround for the | ||
2060 | X86_BUG_ITLB_MULTIHIT bug. | ||
2061 | force : Always deploy workaround. | ||
2062 | off : Never deploy workaround. | ||
2063 | auto : Deploy workaround based on the presence of | ||
2064 | X86_BUG_ITLB_MULTIHIT. | ||
2065 | |||
2066 | Default is 'auto'. | ||
2067 | |||
2068 | If the software workaround is enabled for the host, | ||
2069 | guests do need not to enable it for nested guests. | ||
2070 | |||
2071 | kvm.nx_huge_pages_recovery_ratio= | ||
2072 | [KVM] Controls how many 4KiB pages are periodically zapped | ||
2073 | back to huge pages. 0 disables the recovery, otherwise if | ||
2074 | the value is N KVM will zap 1/Nth of the 4KiB pages every | ||
2075 | minute. The default is 60. | ||
2076 | |||
2058 | kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. | 2077 | kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. |
2059 | Default is 1 (enabled) | 2078 | Default is 1 (enabled) |
2060 | 2079 | ||
@@ -2636,6 +2655,13 @@ | |||
2636 | ssbd=force-off [ARM64] | 2655 | ssbd=force-off [ARM64] |
2637 | l1tf=off [X86] | 2656 | l1tf=off [X86] |
2638 | mds=off [X86] | 2657 | mds=off [X86] |
2658 | tsx_async_abort=off [X86] | ||
2659 | kvm.nx_huge_pages=off [X86] | ||
2660 | |||
2661 | Exceptions: | ||
2662 | This does not have any effect on | ||
2663 | kvm.nx_huge_pages when | ||
2664 | kvm.nx_huge_pages=force. | ||
2639 | 2665 | ||
2640 | auto (default) | 2666 | auto (default) |
2641 | Mitigate all CPU vulnerabilities, but leave SMT | 2667 | Mitigate all CPU vulnerabilities, but leave SMT |
@@ -2651,6 +2677,7 @@ | |||
2651 | be fully mitigated, even if it means losing SMT. | 2677 | be fully mitigated, even if it means losing SMT. |
2652 | Equivalent to: l1tf=flush,nosmt [X86] | 2678 | Equivalent to: l1tf=flush,nosmt [X86] |
2653 | mds=full,nosmt [X86] | 2679 | mds=full,nosmt [X86] |
2680 | tsx_async_abort=full,nosmt [X86] | ||
2654 | 2681 | ||
2655 | mminit_loglevel= | 2682 | mminit_loglevel= |
2656 | [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this | 2683 | [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this |
@@ -4848,6 +4875,71 @@ | |||
4848 | interruptions from clocksource watchdog are not | 4875 | interruptions from clocksource watchdog are not |
4849 | acceptable). | 4876 | acceptable). |
4850 | 4877 | ||
4878 | tsx= [X86] Control Transactional Synchronization | ||
4879 | Extensions (TSX) feature in Intel processors that | ||
4880 | support TSX control. | ||
4881 | |||
4882 | This parameter controls the TSX feature. The options are: | ||
4883 | |||
4884 | on - Enable TSX on the system. Although there are | ||
4885 | mitigations for all known security vulnerabilities, | ||
4886 | TSX has been known to be an accelerator for | ||
4887 | several previous speculation-related CVEs, and | ||
4888 | so there may be unknown security risks associated | ||
4889 | with leaving it enabled. | ||
4890 | |||
4891 | off - Disable TSX on the system. (Note that this | ||
4892 | option takes effect only on newer CPUs which are | ||
4893 | not vulnerable to MDS, i.e., have | ||
4894 | MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get | ||
4895 | the new IA32_TSX_CTRL MSR through a microcode | ||
4896 | update. This new MSR allows for the reliable | ||
4897 | deactivation of the TSX functionality.) | ||
4898 | |||
4899 | auto - Disable TSX if X86_BUG_TAA is present, | ||
4900 | otherwise enable TSX on the system. | ||
4901 | |||
4902 | Not specifying this option is equivalent to tsx=off. | ||
4903 | |||
4904 | See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst | ||
4905 | for more details. | ||
4906 | |||
4907 | tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async | ||
4908 | Abort (TAA) vulnerability. | ||
4909 | |||
4910 | Similar to Micro-architectural Data Sampling (MDS) | ||
4911 | certain CPUs that support Transactional | ||
4912 | Synchronization Extensions (TSX) are vulnerable to an | ||
4913 | exploit against CPU internal buffers which can forward | ||
4914 | information to a disclosure gadget under certain | ||
4915 | conditions. | ||
4916 | |||
4917 | In vulnerable processors, the speculatively forwarded | ||
4918 | data can be used in a cache side channel attack, to | ||
4919 | access data to which the attacker does not have direct | ||
4920 | access. | ||
4921 | |||
4922 | This parameter controls the TAA mitigation. The | ||
4923 | options are: | ||
4924 | |||
4925 | full - Enable TAA mitigation on vulnerable CPUs | ||
4926 | if TSX is enabled. | ||
4927 | |||
4928 | full,nosmt - Enable TAA mitigation and disable SMT on | ||
4929 | vulnerable CPUs. If TSX is disabled, SMT | ||
4930 | is not disabled because CPU is not | ||
4931 | vulnerable to cross-thread TAA attacks. | ||
4932 | off - Unconditionally disable TAA mitigation | ||
4933 | |||
4934 | Not specifying this option is equivalent to | ||
4935 | tsx_async_abort=full. On CPUs which are MDS affected | ||
4936 | and deploy MDS mitigation, TAA mitigation is not | ||
4937 | required and doesn't provide any additional | ||
4938 | mitigation. | ||
4939 | |||
4940 | For details see: | ||
4941 | Documentation/admin-guide/hw-vuln/tsx_async_abort.rst | ||
4942 | |||
4851 | turbografx.map[2|3]= [HW,JOY] | 4943 | turbografx.map[2|3]= [HW,JOY] |
4852 | TurboGraFX parallel port interface | 4944 | TurboGraFX parallel port interface |
4853 | Format: | 4945 | Format: |
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index af64c4bb4447..a8de2fbc1caa 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst | |||
@@ -27,6 +27,7 @@ x86-specific Documentation | |||
27 | mds | 27 | mds |
28 | microcode | 28 | microcode |
29 | resctrl_ui | 29 | resctrl_ui |
30 | tsx_async_abort | ||
30 | usb-legacy-support | 31 | usb-legacy-support |
31 | i386/index | 32 | i386/index |
32 | x86_64/index | 33 | x86_64/index |
diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/x86/tsx_async_abort.rst new file mode 100644 index 000000000000..583ddc185ba2 --- /dev/null +++ b/Documentation/x86/tsx_async_abort.rst | |||
@@ -0,0 +1,117 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | TSX Async Abort (TAA) mitigation | ||
4 | ================================ | ||
5 | |||
6 | .. _tsx_async_abort: | ||
7 | |||
8 | Overview | ||
9 | -------- | ||
10 | |||
11 | TSX Async Abort (TAA) is a side channel attack on internal buffers in some | ||
12 | Intel processors similar to Microachitectural Data Sampling (MDS). In this | ||
13 | case certain loads may speculatively pass invalid data to dependent operations | ||
14 | when an asynchronous abort condition is pending in a Transactional | ||
15 | Synchronization Extensions (TSX) transaction. This includes loads with no | ||
16 | fault or assist condition. Such loads may speculatively expose stale data from | ||
17 | the same uarch data structures as in MDS, with same scope of exposure i.e. | ||
18 | same-thread and cross-thread. This issue affects all current processors that | ||
19 | support TSX. | ||
20 | |||
21 | Mitigation strategy | ||
22 | ------------------- | ||
23 | |||
24 | a) TSX disable - one of the mitigations is to disable TSX. A new MSR | ||
25 | IA32_TSX_CTRL will be available in future and current processors after | ||
26 | microcode update which can be used to disable TSX. In addition, it | ||
27 | controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID. | ||
28 | |||
29 | b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this | ||
30 | vulnerability. More details on this approach can be found in | ||
31 | :ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`. | ||
32 | |||
33 | Kernel internal mitigation modes | ||
34 | -------------------------------- | ||
35 | |||
36 | ============= ============================================================ | ||
37 | off Mitigation is disabled. Either the CPU is not affected or | ||
38 | tsx_async_abort=off is supplied on the kernel command line. | ||
39 | |||
40 | tsx disabled Mitigation is enabled. TSX feature is disabled by default at | ||
41 | bootup on processors that support TSX control. | ||
42 | |||
43 | verw Mitigation is enabled. CPU is affected and MD_CLEAR is | ||
44 | advertised in CPUID. | ||
45 | |||
46 | ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not | ||
47 | advertised in CPUID. That is mainly for virtualization | ||
48 | scenarios where the host has the updated microcode but the | ||
49 | hypervisor does not expose MD_CLEAR in CPUID. It's a best | ||
50 | effort approach without guarantee. | ||
51 | ============= ============================================================ | ||
52 | |||
53 | If the CPU is affected and the "tsx_async_abort" kernel command line parameter is | ||
54 | not provided then the kernel selects an appropriate mitigation depending on the | ||
55 | status of RTM and MD_CLEAR CPUID bits. | ||
56 | |||
57 | Below tables indicate the impact of tsx=on|off|auto cmdline options on state of | ||
58 | TAA mitigation, VERW behavior and TSX feature for various combinations of | ||
59 | MSR_IA32_ARCH_CAPABILITIES bits. | ||
60 | |||
61 | 1. "tsx=off" | ||
62 | |||
63 | ========= ========= ============ ============ ============== =================== ====================== | ||
64 | MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off | ||
65 | ---------------------------------- ------------------------------------------------------------------------- | ||
66 | TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation | ||
67 | after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full | ||
68 | ========= ========= ============ ============ ============== =================== ====================== | ||
69 | 0 0 0 HW default Yes Same as MDS Same as MDS | ||
70 | 0 0 1 Invalid case Invalid case Invalid case Invalid case | ||
71 | 0 1 0 HW default No Need ucode update Need ucode update | ||
72 | 0 1 1 Disabled Yes TSX disabled TSX disabled | ||
73 | 1 X 1 Disabled X None needed None needed | ||
74 | ========= ========= ============ ============ ============== =================== ====================== | ||
75 | |||
76 | 2. "tsx=on" | ||
77 | |||
78 | ========= ========= ============ ============ ============== =================== ====================== | ||
79 | MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on | ||
80 | ---------------------------------- ------------------------------------------------------------------------- | ||
81 | TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation | ||
82 | after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full | ||
83 | ========= ========= ============ ============ ============== =================== ====================== | ||
84 | 0 0 0 HW default Yes Same as MDS Same as MDS | ||
85 | 0 0 1 Invalid case Invalid case Invalid case Invalid case | ||
86 | 0 1 0 HW default No Need ucode update Need ucode update | ||
87 | 0 1 1 Enabled Yes None Same as MDS | ||
88 | 1 X 1 Enabled X None needed None needed | ||
89 | ========= ========= ============ ============ ============== =================== ====================== | ||
90 | |||
91 | 3. "tsx=auto" | ||
92 | |||
93 | ========= ========= ============ ============ ============== =================== ====================== | ||
94 | MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto | ||
95 | ---------------------------------- ------------------------------------------------------------------------- | ||
96 | TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation | ||
97 | after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full | ||
98 | ========= ========= ============ ============ ============== =================== ====================== | ||
99 | 0 0 0 HW default Yes Same as MDS Same as MDS | ||
100 | 0 0 1 Invalid case Invalid case Invalid case Invalid case | ||
101 | 0 1 0 HW default No Need ucode update Need ucode update | ||
102 | 0 1 1 Disabled Yes TSX disabled TSX disabled | ||
103 | 1 X 1 Enabled X None needed None needed | ||
104 | ========= ========= ============ ============ ============== =================== ====================== | ||
105 | |||
106 | In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that | ||
107 | indicates whether MSR_IA32_TSX_CTRL is supported. | ||
108 | |||
109 | There are two control bits in IA32_TSX_CTRL MSR: | ||
110 | |||
111 | Bit 0: When set it disables the Restricted Transactional Memory (RTM) | ||
112 | sub-feature of TSX (will force all transactions to abort on the | ||
113 | XBEGIN instruction). | ||
114 | |||
115 | Bit 1: When set it disables the enumeration of the RTM and HLE feature | ||
116 | (i.e. it will make CPUID(EAX=7).EBX{bit4} and | ||
117 | CPUID(EAX=7).EBX{bit11} read as 0). | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d6e1faa28c58..8ef85139553f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1940,6 +1940,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS | |||
1940 | 1940 | ||
1941 | If unsure, say y. | 1941 | If unsure, say y. |
1942 | 1942 | ||
1943 | choice | ||
1944 | prompt "TSX enable mode" | ||
1945 | depends on CPU_SUP_INTEL | ||
1946 | default X86_INTEL_TSX_MODE_OFF | ||
1947 | help | ||
1948 | Intel's TSX (Transactional Synchronization Extensions) feature | ||
1949 | allows to optimize locking protocols through lock elision which | ||
1950 | can lead to a noticeable performance boost. | ||
1951 | |||
1952 | On the other hand it has been shown that TSX can be exploited | ||
1953 | to form side channel attacks (e.g. TAA) and chances are there | ||
1954 | will be more of those attacks discovered in the future. | ||
1955 | |||
1956 | Therefore TSX is not enabled by default (aka tsx=off). An admin | ||
1957 | might override this decision by tsx=on the command line parameter. | ||
1958 | Even with TSX enabled, the kernel will attempt to enable the best | ||
1959 | possible TAA mitigation setting depending on the microcode available | ||
1960 | for the particular machine. | ||
1961 | |||
1962 | This option allows to set the default tsx mode between tsx=on, =off | ||
1963 | and =auto. See Documentation/admin-guide/kernel-parameters.txt for more | ||
1964 | details. | ||
1965 | |||
1966 | Say off if not sure, auto if TSX is in use but it should be used on safe | ||
1967 | platforms or on if TSX is in use and the security aspect of tsx is not | ||
1968 | relevant. | ||
1969 | |||
1970 | config X86_INTEL_TSX_MODE_OFF | ||
1971 | bool "off" | ||
1972 | help | ||
1973 | TSX is disabled if possible - equals to tsx=off command line parameter. | ||
1974 | |||
1975 | config X86_INTEL_TSX_MODE_ON | ||
1976 | bool "on" | ||
1977 | help | ||
1978 | TSX is always enabled on TSX capable HW - equals the tsx=on command | ||
1979 | line parameter. | ||
1980 | |||
1981 | config X86_INTEL_TSX_MODE_AUTO | ||
1982 | bool "auto" | ||
1983 | help | ||
1984 | TSX is enabled on TSX capable HW that is believed to be safe against | ||
1985 | side channel attacks- equals the tsx=auto command line parameter. | ||
1986 | endchoice | ||
1987 | |||
1943 | config EFI | 1988 | config EFI |
1944 | bool "EFI runtime service support" | 1989 | bool "EFI runtime service support" |
1945 | depends on ACPI | 1990 | depends on ACPI |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0652d3eed9bd..c4fbe379cc0b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -399,5 +399,7 @@ | |||
399 | #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ | 399 | #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ |
400 | #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ | 400 | #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ |
401 | #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ | 401 | #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ |
402 | #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ | ||
403 | #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ | ||
402 | 404 | ||
403 | #endif /* _ASM_X86_CPUFEATURES_H */ | 405 | #endif /* _ASM_X86_CPUFEATURES_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 24d6598dea29..4fc61483919a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -312,9 +312,12 @@ struct kvm_rmap_head { | |||
312 | struct kvm_mmu_page { | 312 | struct kvm_mmu_page { |
313 | struct list_head link; | 313 | struct list_head link; |
314 | struct hlist_node hash_link; | 314 | struct hlist_node hash_link; |
315 | struct list_head lpage_disallowed_link; | ||
316 | |||
315 | bool unsync; | 317 | bool unsync; |
316 | u8 mmu_valid_gen; | 318 | u8 mmu_valid_gen; |
317 | bool mmio_cached; | 319 | bool mmio_cached; |
320 | bool lpage_disallowed; /* Can't be replaced by an equiv large page */ | ||
318 | 321 | ||
319 | /* | 322 | /* |
320 | * The following two entries are used to key the shadow page in the | 323 | * The following two entries are used to key the shadow page in the |
@@ -859,6 +862,7 @@ struct kvm_arch { | |||
859 | */ | 862 | */ |
860 | struct list_head active_mmu_pages; | 863 | struct list_head active_mmu_pages; |
861 | struct list_head zapped_obsolete_pages; | 864 | struct list_head zapped_obsolete_pages; |
865 | struct list_head lpage_disallowed_mmu_pages; | ||
862 | struct kvm_page_track_notifier_node mmu_sp_tracker; | 866 | struct kvm_page_track_notifier_node mmu_sp_tracker; |
863 | struct kvm_page_track_notifier_head track_notifier_head; | 867 | struct kvm_page_track_notifier_head track_notifier_head; |
864 | 868 | ||
@@ -933,6 +937,7 @@ struct kvm_arch { | |||
933 | bool exception_payload_enabled; | 937 | bool exception_payload_enabled; |
934 | 938 | ||
935 | struct kvm_pmu_event_filter *pmu_event_filter; | 939 | struct kvm_pmu_event_filter *pmu_event_filter; |
940 | struct task_struct *nx_lpage_recovery_thread; | ||
936 | }; | 941 | }; |
937 | 942 | ||
938 | struct kvm_vm_stat { | 943 | struct kvm_vm_stat { |
@@ -946,6 +951,7 @@ struct kvm_vm_stat { | |||
946 | ulong mmu_unsync; | 951 | ulong mmu_unsync; |
947 | ulong remote_tlb_flush; | 952 | ulong remote_tlb_flush; |
948 | ulong lpages; | 953 | ulong lpages; |
954 | ulong nx_lpage_splits; | ||
949 | ulong max_mmu_page_hash_collisions; | 955 | ulong max_mmu_page_hash_collisions; |
950 | }; | 956 | }; |
951 | 957 | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 20ce682a2540..6a3124664289 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -93,6 +93,18 @@ | |||
93 | * Microarchitectural Data | 93 | * Microarchitectural Data |
94 | * Sampling (MDS) vulnerabilities. | 94 | * Sampling (MDS) vulnerabilities. |
95 | */ | 95 | */ |
96 | #define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* | ||
97 | * The processor is not susceptible to a | ||
98 | * machine check error due to modifying the | ||
99 | * code page size along with either the | ||
100 | * physical address or cache type | ||
101 | * without TLB invalidation. | ||
102 | */ | ||
103 | #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ | ||
104 | #define ARCH_CAP_TAA_NO BIT(8) /* | ||
105 | * Not susceptible to | ||
106 | * TSX Async Abort (TAA) vulnerabilities. | ||
107 | */ | ||
96 | 108 | ||
97 | #define MSR_IA32_FLUSH_CMD 0x0000010b | 109 | #define MSR_IA32_FLUSH_CMD 0x0000010b |
98 | #define L1D_FLUSH BIT(0) /* | 110 | #define L1D_FLUSH BIT(0) /* |
@@ -103,6 +115,10 @@ | |||
103 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | 115 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
104 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | 116 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e |
105 | 117 | ||
118 | #define MSR_IA32_TSX_CTRL 0x00000122 | ||
119 | #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ | ||
120 | #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ | ||
121 | |||
106 | #define MSR_IA32_SYSENTER_CS 0x00000174 | 122 | #define MSR_IA32_SYSENTER_CS 0x00000174 |
107 | #define MSR_IA32_SYSENTER_ESP 0x00000175 | 123 | #define MSR_IA32_SYSENTER_ESP 0x00000175 |
108 | #define MSR_IA32_SYSENTER_EIP 0x00000176 | 124 | #define MSR_IA32_SYSENTER_EIP 0x00000176 |
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 80bc209c0708..5c24a7b35166 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h | |||
@@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); | |||
314 | #include <asm/segment.h> | 314 | #include <asm/segment.h> |
315 | 315 | ||
316 | /** | 316 | /** |
317 | * mds_clear_cpu_buffers - Mitigation for MDS vulnerability | 317 | * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability |
318 | * | 318 | * |
319 | * This uses the otherwise unused and obsolete VERW instruction in | 319 | * This uses the otherwise unused and obsolete VERW instruction in |
320 | * combination with microcode which triggers a CPU buffer flush when the | 320 | * combination with microcode which triggers a CPU buffer flush when the |
@@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void) | |||
337 | } | 337 | } |
338 | 338 | ||
339 | /** | 339 | /** |
340 | * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability | 340 | * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability |
341 | * | 341 | * |
342 | * Clear CPU buffers if the corresponding static key is enabled | 342 | * Clear CPU buffers if the corresponding static key is enabled |
343 | */ | 343 | */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6e0a3b43d027..54f5d54280f6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -988,4 +988,11 @@ enum mds_mitigations { | |||
988 | MDS_MITIGATION_VMWERV, | 988 | MDS_MITIGATION_VMWERV, |
989 | }; | 989 | }; |
990 | 990 | ||
991 | enum taa_mitigations { | ||
992 | TAA_MITIGATION_OFF, | ||
993 | TAA_MITIGATION_UCODE_NEEDED, | ||
994 | TAA_MITIGATION_VERW, | ||
995 | TAA_MITIGATION_TSX_DISABLED, | ||
996 | }; | ||
997 | |||
991 | #endif /* _ASM_X86_PROCESSOR_H */ | 998 | #endif /* _ASM_X86_PROCESSOR_H */ |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d7a1e5a9331c..890f60083eca 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -30,7 +30,7 @@ obj-$(CONFIG_PROC_FS) += proc.o | |||
30 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o | 30 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o |
31 | 31 | ||
32 | ifdef CONFIG_CPU_SUP_INTEL | 32 | ifdef CONFIG_CPU_SUP_INTEL |
33 | obj-y += intel.o intel_pconfig.o | 33 | obj-y += intel.o intel_pconfig.o tsx.o |
34 | obj-$(CONFIG_PM) += intel_epb.o | 34 | obj-$(CONFIG_PM) += intel_epb.o |
35 | endif | 35 | endif |
36 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o | 36 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 91c2561b905f..4c7b0fa15a19 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void); | |||
39 | static void __init ssb_select_mitigation(void); | 39 | static void __init ssb_select_mitigation(void); |
40 | static void __init l1tf_select_mitigation(void); | 40 | static void __init l1tf_select_mitigation(void); |
41 | static void __init mds_select_mitigation(void); | 41 | static void __init mds_select_mitigation(void); |
42 | static void __init taa_select_mitigation(void); | ||
42 | 43 | ||
43 | /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ | 44 | /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ |
44 | u64 x86_spec_ctrl_base; | 45 | u64 x86_spec_ctrl_base; |
@@ -105,6 +106,7 @@ void __init check_bugs(void) | |||
105 | ssb_select_mitigation(); | 106 | ssb_select_mitigation(); |
106 | l1tf_select_mitigation(); | 107 | l1tf_select_mitigation(); |
107 | mds_select_mitigation(); | 108 | mds_select_mitigation(); |
109 | taa_select_mitigation(); | ||
108 | 110 | ||
109 | arch_smt_update(); | 111 | arch_smt_update(); |
110 | 112 | ||
@@ -269,6 +271,100 @@ static int __init mds_cmdline(char *str) | |||
269 | early_param("mds", mds_cmdline); | 271 | early_param("mds", mds_cmdline); |
270 | 272 | ||
271 | #undef pr_fmt | 273 | #undef pr_fmt |
274 | #define pr_fmt(fmt) "TAA: " fmt | ||
275 | |||
276 | /* Default mitigation for TAA-affected CPUs */ | ||
277 | static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; | ||
278 | static bool taa_nosmt __ro_after_init; | ||
279 | |||
280 | static const char * const taa_strings[] = { | ||
281 | [TAA_MITIGATION_OFF] = "Vulnerable", | ||
282 | [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", | ||
283 | [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", | ||
284 | [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled", | ||
285 | }; | ||
286 | |||
287 | static void __init taa_select_mitigation(void) | ||
288 | { | ||
289 | u64 ia32_cap; | ||
290 | |||
291 | if (!boot_cpu_has_bug(X86_BUG_TAA)) { | ||
292 | taa_mitigation = TAA_MITIGATION_OFF; | ||
293 | return; | ||
294 | } | ||
295 | |||
296 | /* TSX previously disabled by tsx=off */ | ||
297 | if (!boot_cpu_has(X86_FEATURE_RTM)) { | ||
298 | taa_mitigation = TAA_MITIGATION_TSX_DISABLED; | ||
299 | goto out; | ||
300 | } | ||
301 | |||
302 | if (cpu_mitigations_off()) { | ||
303 | taa_mitigation = TAA_MITIGATION_OFF; | ||
304 | return; | ||
305 | } | ||
306 | |||
307 | /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ | ||
308 | if (taa_mitigation == TAA_MITIGATION_OFF) | ||
309 | goto out; | ||
310 | |||
311 | if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) | ||
312 | taa_mitigation = TAA_MITIGATION_VERW; | ||
313 | else | ||
314 | taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; | ||
315 | |||
316 | /* | ||
317 | * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. | ||
318 | * A microcode update fixes this behavior to clear CPU buffers. It also | ||
319 | * adds support for MSR_IA32_TSX_CTRL which is enumerated by the | ||
320 | * ARCH_CAP_TSX_CTRL_MSR bit. | ||
321 | * | ||
322 | * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode | ||
323 | * update is required. | ||
324 | */ | ||
325 | ia32_cap = x86_read_arch_cap_msr(); | ||
326 | if ( (ia32_cap & ARCH_CAP_MDS_NO) && | ||
327 | !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) | ||
328 | taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; | ||
329 | |||
330 | /* | ||
331 | * TSX is enabled, select alternate mitigation for TAA which is | ||
332 | * the same as MDS. Enable MDS static branch to clear CPU buffers. | ||
333 | * | ||
334 | * For guests that can't determine whether the correct microcode is | ||
335 | * present on host, enable the mitigation for UCODE_NEEDED as well. | ||
336 | */ | ||
337 | static_branch_enable(&mds_user_clear); | ||
338 | |||
339 | if (taa_nosmt || cpu_mitigations_auto_nosmt()) | ||
340 | cpu_smt_disable(false); | ||
341 | |||
342 | out: | ||
343 | pr_info("%s\n", taa_strings[taa_mitigation]); | ||
344 | } | ||
345 | |||
346 | static int __init tsx_async_abort_parse_cmdline(char *str) | ||
347 | { | ||
348 | if (!boot_cpu_has_bug(X86_BUG_TAA)) | ||
349 | return 0; | ||
350 | |||
351 | if (!str) | ||
352 | return -EINVAL; | ||
353 | |||
354 | if (!strcmp(str, "off")) { | ||
355 | taa_mitigation = TAA_MITIGATION_OFF; | ||
356 | } else if (!strcmp(str, "full")) { | ||
357 | taa_mitigation = TAA_MITIGATION_VERW; | ||
358 | } else if (!strcmp(str, "full,nosmt")) { | ||
359 | taa_mitigation = TAA_MITIGATION_VERW; | ||
360 | taa_nosmt = true; | ||
361 | } | ||
362 | |||
363 | return 0; | ||
364 | } | ||
365 | early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); | ||
366 | |||
367 | #undef pr_fmt | ||
272 | #define pr_fmt(fmt) "Spectre V1 : " fmt | 368 | #define pr_fmt(fmt) "Spectre V1 : " fmt |
273 | 369 | ||
274 | enum spectre_v1_mitigation { | 370 | enum spectre_v1_mitigation { |
@@ -786,13 +882,10 @@ static void update_mds_branch_idle(void) | |||
786 | } | 882 | } |
787 | 883 | ||
788 | #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" | 884 | #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" |
885 | #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" | ||
789 | 886 | ||
790 | void cpu_bugs_smt_update(void) | 887 | void cpu_bugs_smt_update(void) |
791 | { | 888 | { |
792 | /* Enhanced IBRS implies STIBP. No update required. */ | ||
793 | if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) | ||
794 | return; | ||
795 | |||
796 | mutex_lock(&spec_ctrl_mutex); | 889 | mutex_lock(&spec_ctrl_mutex); |
797 | 890 | ||
798 | switch (spectre_v2_user) { | 891 | switch (spectre_v2_user) { |
@@ -819,6 +912,17 @@ void cpu_bugs_smt_update(void) | |||
819 | break; | 912 | break; |
820 | } | 913 | } |
821 | 914 | ||
915 | switch (taa_mitigation) { | ||
916 | case TAA_MITIGATION_VERW: | ||
917 | case TAA_MITIGATION_UCODE_NEEDED: | ||
918 | if (sched_smt_active()) | ||
919 | pr_warn_once(TAA_MSG_SMT); | ||
920 | break; | ||
921 | case TAA_MITIGATION_TSX_DISABLED: | ||
922 | case TAA_MITIGATION_OFF: | ||
923 | break; | ||
924 | } | ||
925 | |||
822 | mutex_unlock(&spec_ctrl_mutex); | 926 | mutex_unlock(&spec_ctrl_mutex); |
823 | } | 927 | } |
824 | 928 | ||
@@ -1149,6 +1253,9 @@ void x86_spec_ctrl_setup_ap(void) | |||
1149 | x86_amd_ssb_disable(); | 1253 | x86_amd_ssb_disable(); |
1150 | } | 1254 | } |
1151 | 1255 | ||
1256 | bool itlb_multihit_kvm_mitigation; | ||
1257 | EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); | ||
1258 | |||
1152 | #undef pr_fmt | 1259 | #undef pr_fmt |
1153 | #define pr_fmt(fmt) "L1TF: " fmt | 1260 | #define pr_fmt(fmt) "L1TF: " fmt |
1154 | 1261 | ||
@@ -1304,11 +1411,24 @@ static ssize_t l1tf_show_state(char *buf) | |||
1304 | l1tf_vmx_states[l1tf_vmx_mitigation], | 1411 | l1tf_vmx_states[l1tf_vmx_mitigation], |
1305 | sched_smt_active() ? "vulnerable" : "disabled"); | 1412 | sched_smt_active() ? "vulnerable" : "disabled"); |
1306 | } | 1413 | } |
1414 | |||
1415 | static ssize_t itlb_multihit_show_state(char *buf) | ||
1416 | { | ||
1417 | if (itlb_multihit_kvm_mitigation) | ||
1418 | return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); | ||
1419 | else | ||
1420 | return sprintf(buf, "KVM: Vulnerable\n"); | ||
1421 | } | ||
1307 | #else | 1422 | #else |
1308 | static ssize_t l1tf_show_state(char *buf) | 1423 | static ssize_t l1tf_show_state(char *buf) |
1309 | { | 1424 | { |
1310 | return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); | 1425 | return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); |
1311 | } | 1426 | } |
1427 | |||
1428 | static ssize_t itlb_multihit_show_state(char *buf) | ||
1429 | { | ||
1430 | return sprintf(buf, "Processor vulnerable\n"); | ||
1431 | } | ||
1312 | #endif | 1432 | #endif |
1313 | 1433 | ||
1314 | static ssize_t mds_show_state(char *buf) | 1434 | static ssize_t mds_show_state(char *buf) |
@@ -1328,6 +1448,21 @@ static ssize_t mds_show_state(char *buf) | |||
1328 | sched_smt_active() ? "vulnerable" : "disabled"); | 1448 | sched_smt_active() ? "vulnerable" : "disabled"); |
1329 | } | 1449 | } |
1330 | 1450 | ||
1451 | static ssize_t tsx_async_abort_show_state(char *buf) | ||
1452 | { | ||
1453 | if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || | ||
1454 | (taa_mitigation == TAA_MITIGATION_OFF)) | ||
1455 | return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); | ||
1456 | |||
1457 | if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { | ||
1458 | return sprintf(buf, "%s; SMT Host state unknown\n", | ||
1459 | taa_strings[taa_mitigation]); | ||
1460 | } | ||
1461 | |||
1462 | return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], | ||
1463 | sched_smt_active() ? "vulnerable" : "disabled"); | ||
1464 | } | ||
1465 | |||
1331 | static char *stibp_state(void) | 1466 | static char *stibp_state(void) |
1332 | { | 1467 | { |
1333 | if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) | 1468 | if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) |
@@ -1398,6 +1533,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr | |||
1398 | case X86_BUG_MDS: | 1533 | case X86_BUG_MDS: |
1399 | return mds_show_state(buf); | 1534 | return mds_show_state(buf); |
1400 | 1535 | ||
1536 | case X86_BUG_TAA: | ||
1537 | return tsx_async_abort_show_state(buf); | ||
1538 | |||
1539 | case X86_BUG_ITLB_MULTIHIT: | ||
1540 | return itlb_multihit_show_state(buf); | ||
1541 | |||
1401 | default: | 1542 | default: |
1402 | break; | 1543 | break; |
1403 | } | 1544 | } |
@@ -1434,4 +1575,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu | |||
1434 | { | 1575 | { |
1435 | return cpu_show_common(dev, attr, buf, X86_BUG_MDS); | 1576 | return cpu_show_common(dev, attr, buf, X86_BUG_MDS); |
1436 | } | 1577 | } |
1578 | |||
1579 | ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) | ||
1580 | { | ||
1581 | return cpu_show_common(dev, attr, buf, X86_BUG_TAA); | ||
1582 | } | ||
1583 | |||
1584 | ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) | ||
1585 | { | ||
1586 | return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); | ||
1587 | } | ||
1437 | #endif | 1588 | #endif |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9ae7d1bcd4f4..fffe21945374 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | |||
1016 | #endif | 1016 | #endif |
1017 | } | 1017 | } |
1018 | 1018 | ||
1019 | #define NO_SPECULATION BIT(0) | 1019 | #define NO_SPECULATION BIT(0) |
1020 | #define NO_MELTDOWN BIT(1) | 1020 | #define NO_MELTDOWN BIT(1) |
1021 | #define NO_SSB BIT(2) | 1021 | #define NO_SSB BIT(2) |
1022 | #define NO_L1TF BIT(3) | 1022 | #define NO_L1TF BIT(3) |
1023 | #define NO_MDS BIT(4) | 1023 | #define NO_MDS BIT(4) |
1024 | #define MSBDS_ONLY BIT(5) | 1024 | #define MSBDS_ONLY BIT(5) |
1025 | #define NO_SWAPGS BIT(6) | 1025 | #define NO_SWAPGS BIT(6) |
1026 | #define NO_ITLB_MULTIHIT BIT(7) | ||
1026 | 1027 | ||
1027 | #define VULNWL(_vendor, _family, _model, _whitelist) \ | 1028 | #define VULNWL(_vendor, _family, _model, _whitelist) \ |
1028 | { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } | 1029 | { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } |
@@ -1043,27 +1044,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { | |||
1043 | VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), | 1044 | VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), |
1044 | 1045 | ||
1045 | /* Intel Family 6 */ | 1046 | /* Intel Family 6 */ |
1046 | VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), | 1047 | VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1047 | VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), | 1048 | VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1048 | VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), | 1049 | VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1049 | VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), | 1050 | VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1050 | VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), | 1051 | VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1051 | 1052 | ||
1052 | VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), | 1053 | VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1053 | VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), | 1054 | VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1054 | VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), | 1055 | VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1055 | VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), | 1056 | VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1056 | VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), | 1057 | VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1057 | VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), | 1058 | VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1058 | 1059 | ||
1059 | VULNWL_INTEL(CORE_YONAH, NO_SSB), | 1060 | VULNWL_INTEL(CORE_YONAH, NO_SSB), |
1060 | 1061 | ||
1061 | VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), | 1062 | VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1062 | VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS), | 1063 | VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1063 | 1064 | ||
1064 | VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), | 1065 | VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1065 | VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS), | 1066 | VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1066 | VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), | 1067 | VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1067 | 1068 | ||
1068 | /* | 1069 | /* |
1069 | * Technically, swapgs isn't serializing on AMD (despite it previously | 1070 | * Technically, swapgs isn't serializing on AMD (despite it previously |
@@ -1073,15 +1074,17 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { | |||
1073 | * good enough for our purposes. | 1074 | * good enough for our purposes. |
1074 | */ | 1075 | */ |
1075 | 1076 | ||
1077 | VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), | ||
1078 | |||
1076 | /* AMD Family 0xf - 0x12 */ | 1079 | /* AMD Family 0xf - 0x12 */ |
1077 | VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), | 1080 | VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1078 | VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), | 1081 | VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1079 | VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), | 1082 | VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1080 | VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), | 1083 | VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1081 | 1084 | ||
1082 | /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ | 1085 | /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ |
1083 | VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), | 1086 | VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1084 | VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), | 1087 | VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1085 | {} | 1088 | {} |
1086 | }; | 1089 | }; |
1087 | 1090 | ||
@@ -1092,19 +1095,30 @@ static bool __init cpu_matches(unsigned long which) | |||
1092 | return m && !!(m->driver_data & which); | 1095 | return m && !!(m->driver_data & which); |
1093 | } | 1096 | } |
1094 | 1097 | ||
1095 | static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) | 1098 | u64 x86_read_arch_cap_msr(void) |
1096 | { | 1099 | { |
1097 | u64 ia32_cap = 0; | 1100 | u64 ia32_cap = 0; |
1098 | 1101 | ||
1102 | if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) | ||
1103 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); | ||
1104 | |||
1105 | return ia32_cap; | ||
1106 | } | ||
1107 | |||
1108 | static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) | ||
1109 | { | ||
1110 | u64 ia32_cap = x86_read_arch_cap_msr(); | ||
1111 | |||
1112 | /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ | ||
1113 | if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) | ||
1114 | setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); | ||
1115 | |||
1099 | if (cpu_matches(NO_SPECULATION)) | 1116 | if (cpu_matches(NO_SPECULATION)) |
1100 | return; | 1117 | return; |
1101 | 1118 | ||
1102 | setup_force_cpu_bug(X86_BUG_SPECTRE_V1); | 1119 | setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1103 | setup_force_cpu_bug(X86_BUG_SPECTRE_V2); | 1120 | setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1104 | 1121 | ||
1105 | if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) | ||
1106 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); | ||
1107 | |||
1108 | if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && | 1122 | if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && |
1109 | !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) | 1123 | !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) |
1110 | setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); | 1124 | setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); |
@@ -1121,6 +1135,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) | |||
1121 | if (!cpu_matches(NO_SWAPGS)) | 1135 | if (!cpu_matches(NO_SWAPGS)) |
1122 | setup_force_cpu_bug(X86_BUG_SWAPGS); | 1136 | setup_force_cpu_bug(X86_BUG_SWAPGS); |
1123 | 1137 | ||
1138 | /* | ||
1139 | * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: | ||
1140 | * - TSX is supported or | ||
1141 | * - TSX_CTRL is present | ||
1142 | * | ||
1143 | * TSX_CTRL check is needed for cases when TSX could be disabled before | ||
1144 | * the kernel boot e.g. kexec. | ||
1145 | * TSX_CTRL check alone is not sufficient for cases when the microcode | ||
1146 | * update is not present or running as guest that don't get TSX_CTRL. | ||
1147 | */ | ||
1148 | if (!(ia32_cap & ARCH_CAP_TAA_NO) && | ||
1149 | (cpu_has(c, X86_FEATURE_RTM) || | ||
1150 | (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) | ||
1151 | setup_force_cpu_bug(X86_BUG_TAA); | ||
1152 | |||
1124 | if (cpu_matches(NO_MELTDOWN)) | 1153 | if (cpu_matches(NO_MELTDOWN)) |
1125 | return; | 1154 | return; |
1126 | 1155 | ||
@@ -1554,6 +1583,8 @@ void __init identify_boot_cpu(void) | |||
1554 | #endif | 1583 | #endif |
1555 | cpu_detect_tlb(&boot_cpu_data); | 1584 | cpu_detect_tlb(&boot_cpu_data); |
1556 | setup_cr_pinning(); | 1585 | setup_cr_pinning(); |
1586 | |||
1587 | tsx_init(); | ||
1557 | } | 1588 | } |
1558 | 1589 | ||
1559 | void identify_secondary_cpu(struct cpuinfo_x86 *c) | 1590 | void identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index c0e2407abdd6..38ab6e115eac 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -44,6 +44,22 @@ struct _tlb_table { | |||
44 | extern const struct cpu_dev *const __x86_cpu_dev_start[], | 44 | extern const struct cpu_dev *const __x86_cpu_dev_start[], |
45 | *const __x86_cpu_dev_end[]; | 45 | *const __x86_cpu_dev_end[]; |
46 | 46 | ||
47 | #ifdef CONFIG_CPU_SUP_INTEL | ||
48 | enum tsx_ctrl_states { | ||
49 | TSX_CTRL_ENABLE, | ||
50 | TSX_CTRL_DISABLE, | ||
51 | TSX_CTRL_NOT_SUPPORTED, | ||
52 | }; | ||
53 | |||
54 | extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; | ||
55 | |||
56 | extern void __init tsx_init(void); | ||
57 | extern void tsx_enable(void); | ||
58 | extern void tsx_disable(void); | ||
59 | #else | ||
60 | static inline void tsx_init(void) { } | ||
61 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
62 | |||
47 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | 63 | extern void get_cpu_cap(struct cpuinfo_x86 *c); |
48 | extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); | 64 | extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); |
49 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 65 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
@@ -62,4 +78,6 @@ unsigned int aperfmperf_get_khz(int cpu); | |||
62 | 78 | ||
63 | extern void x86_spec_ctrl_setup_ap(void); | 79 | extern void x86_spec_ctrl_setup_ap(void); |
64 | 80 | ||
81 | extern u64 x86_read_arch_cap_msr(void); | ||
82 | |||
65 | #endif /* ARCH_X86_CPU_H */ | 83 | #endif /* ARCH_X86_CPU_H */ |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c2fdc00df163..11d5c5950e2d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -762,6 +762,11 @@ static void init_intel(struct cpuinfo_x86 *c) | |||
762 | detect_tme(c); | 762 | detect_tme(c); |
763 | 763 | ||
764 | init_intel_misc_features(c); | 764 | init_intel_misc_features(c); |
765 | |||
766 | if (tsx_ctrl_state == TSX_CTRL_ENABLE) | ||
767 | tsx_enable(); | ||
768 | if (tsx_ctrl_state == TSX_CTRL_DISABLE) | ||
769 | tsx_disable(); | ||
765 | } | 770 | } |
766 | 771 | ||
767 | #ifdef CONFIG_X86_32 | 772 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c new file mode 100644 index 000000000000..3e20d322bc98 --- /dev/null +++ b/arch/x86/kernel/cpu/tsx.c | |||
@@ -0,0 +1,140 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Intel Transactional Synchronization Extensions (TSX) control. | ||
4 | * | ||
5 | * Copyright (C) 2019 Intel Corporation | ||
6 | * | ||
7 | * Author: | ||
8 | * Pawan Gupta <pawan.kumar.gupta@linux.intel.com> | ||
9 | */ | ||
10 | |||
11 | #include <linux/cpufeature.h> | ||
12 | |||
13 | #include <asm/cmdline.h> | ||
14 | |||
15 | #include "cpu.h" | ||
16 | |||
17 | enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; | ||
18 | |||
19 | void tsx_disable(void) | ||
20 | { | ||
21 | u64 tsx; | ||
22 | |||
23 | rdmsrl(MSR_IA32_TSX_CTRL, tsx); | ||
24 | |||
25 | /* Force all transactions to immediately abort */ | ||
26 | tsx |= TSX_CTRL_RTM_DISABLE; | ||
27 | |||
28 | /* | ||
29 | * Ensure TSX support is not enumerated in CPUID. | ||
30 | * This is visible to userspace and will ensure they | ||
31 | * do not waste resources trying TSX transactions that | ||
32 | * will always abort. | ||
33 | */ | ||
34 | tsx |= TSX_CTRL_CPUID_CLEAR; | ||
35 | |||
36 | wrmsrl(MSR_IA32_TSX_CTRL, tsx); | ||
37 | } | ||
38 | |||
39 | void tsx_enable(void) | ||
40 | { | ||
41 | u64 tsx; | ||
42 | |||
43 | rdmsrl(MSR_IA32_TSX_CTRL, tsx); | ||
44 | |||
45 | /* Enable the RTM feature in the cpu */ | ||
46 | tsx &= ~TSX_CTRL_RTM_DISABLE; | ||
47 | |||
48 | /* | ||
49 | * Ensure TSX support is enumerated in CPUID. | ||
50 | * This is visible to userspace and will ensure they | ||
51 | * can enumerate and use the TSX feature. | ||
52 | */ | ||
53 | tsx &= ~TSX_CTRL_CPUID_CLEAR; | ||
54 | |||
55 | wrmsrl(MSR_IA32_TSX_CTRL, tsx); | ||
56 | } | ||
57 | |||
58 | static bool __init tsx_ctrl_is_supported(void) | ||
59 | { | ||
60 | u64 ia32_cap = x86_read_arch_cap_msr(); | ||
61 | |||
62 | /* | ||
63 | * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this | ||
64 | * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. | ||
65 | * | ||
66 | * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a | ||
67 | * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES | ||
68 | * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get | ||
69 | * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, | ||
70 | * tsx= cmdline requests will do nothing on CPUs without | ||
71 | * MSR_IA32_TSX_CTRL support. | ||
72 | */ | ||
73 | return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); | ||
74 | } | ||
75 | |||
76 | static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) | ||
77 | { | ||
78 | if (boot_cpu_has_bug(X86_BUG_TAA)) | ||
79 | return TSX_CTRL_DISABLE; | ||
80 | |||
81 | return TSX_CTRL_ENABLE; | ||
82 | } | ||
83 | |||
84 | void __init tsx_init(void) | ||
85 | { | ||
86 | char arg[5] = {}; | ||
87 | int ret; | ||
88 | |||
89 | if (!tsx_ctrl_is_supported()) | ||
90 | return; | ||
91 | |||
92 | ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); | ||
93 | if (ret >= 0) { | ||
94 | if (!strcmp(arg, "on")) { | ||
95 | tsx_ctrl_state = TSX_CTRL_ENABLE; | ||
96 | } else if (!strcmp(arg, "off")) { | ||
97 | tsx_ctrl_state = TSX_CTRL_DISABLE; | ||
98 | } else if (!strcmp(arg, "auto")) { | ||
99 | tsx_ctrl_state = x86_get_tsx_auto_mode(); | ||
100 | } else { | ||
101 | tsx_ctrl_state = TSX_CTRL_DISABLE; | ||
102 | pr_err("tsx: invalid option, defaulting to off\n"); | ||
103 | } | ||
104 | } else { | ||
105 | /* tsx= not provided */ | ||
106 | if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) | ||
107 | tsx_ctrl_state = x86_get_tsx_auto_mode(); | ||
108 | else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) | ||
109 | tsx_ctrl_state = TSX_CTRL_DISABLE; | ||
110 | else | ||
111 | tsx_ctrl_state = TSX_CTRL_ENABLE; | ||
112 | } | ||
113 | |||
114 | if (tsx_ctrl_state == TSX_CTRL_DISABLE) { | ||
115 | tsx_disable(); | ||
116 | |||
117 | /* | ||
118 | * tsx_disable() will change the state of the | ||
119 | * RTM CPUID bit. Clear it here since it is now | ||
120 | * expected to be not set. | ||
121 | */ | ||
122 | setup_clear_cpu_cap(X86_FEATURE_RTM); | ||
123 | } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { | ||
124 | |||
125 | /* | ||
126 | * HW defaults TSX to be enabled at bootup. | ||
127 | * We may still need the TSX enable support | ||
128 | * during init for special cases like | ||
129 | * kexec after TSX is disabled. | ||
130 | */ | ||
131 | tsx_enable(); | ||
132 | |||
133 | /* | ||
134 | * tsx_enable() will change the state of the | ||
135 | * RTM CPUID bit. Force it here since it is now | ||
136 | * expected to be set. | ||
137 | */ | ||
138 | setup_force_cpu_cap(X86_FEATURE_RTM); | ||
139 | } | ||
140 | } | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bf82b1f2e834..fd6012eef9c9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
38 | #include <linux/hash.h> | 38 | #include <linux/hash.h> |
39 | #include <linux/kern_levels.h> | 39 | #include <linux/kern_levels.h> |
40 | #include <linux/kthread.h> | ||
40 | 41 | ||
41 | #include <asm/page.h> | 42 | #include <asm/page.h> |
42 | #include <asm/pat.h> | 43 | #include <asm/pat.h> |
@@ -47,6 +48,30 @@ | |||
47 | #include <asm/kvm_page_track.h> | 48 | #include <asm/kvm_page_track.h> |
48 | #include "trace.h" | 49 | #include "trace.h" |
49 | 50 | ||
51 | extern bool itlb_multihit_kvm_mitigation; | ||
52 | |||
53 | static int __read_mostly nx_huge_pages = -1; | ||
54 | static uint __read_mostly nx_huge_pages_recovery_ratio = 60; | ||
55 | |||
56 | static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); | ||
57 | static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); | ||
58 | |||
59 | static struct kernel_param_ops nx_huge_pages_ops = { | ||
60 | .set = set_nx_huge_pages, | ||
61 | .get = param_get_bool, | ||
62 | }; | ||
63 | |||
64 | static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = { | ||
65 | .set = set_nx_huge_pages_recovery_ratio, | ||
66 | .get = param_get_uint, | ||
67 | }; | ||
68 | |||
69 | module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); | ||
70 | __MODULE_PARM_TYPE(nx_huge_pages, "bool"); | ||
71 | module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, | ||
72 | &nx_huge_pages_recovery_ratio, 0644); | ||
73 | __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); | ||
74 | |||
50 | /* | 75 | /* |
51 | * When setting this variable to true it enables Two-Dimensional-Paging | 76 | * When setting this variable to true it enables Two-Dimensional-Paging |
52 | * where the hardware walks 2 page tables: | 77 | * where the hardware walks 2 page tables: |
@@ -352,6 +377,11 @@ static inline bool spte_ad_need_write_protect(u64 spte) | |||
352 | return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; | 377 | return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; |
353 | } | 378 | } |
354 | 379 | ||
380 | static bool is_nx_huge_page_enabled(void) | ||
381 | { | ||
382 | return READ_ONCE(nx_huge_pages); | ||
383 | } | ||
384 | |||
355 | static inline u64 spte_shadow_accessed_mask(u64 spte) | 385 | static inline u64 spte_shadow_accessed_mask(u64 spte) |
356 | { | 386 | { |
357 | MMU_WARN_ON(is_mmio_spte(spte)); | 387 | MMU_WARN_ON(is_mmio_spte(spte)); |
@@ -1190,6 +1220,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1190 | kvm_mmu_gfn_disallow_lpage(slot, gfn); | 1220 | kvm_mmu_gfn_disallow_lpage(slot, gfn); |
1191 | } | 1221 | } |
1192 | 1222 | ||
1223 | static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1224 | { | ||
1225 | if (sp->lpage_disallowed) | ||
1226 | return; | ||
1227 | |||
1228 | ++kvm->stat.nx_lpage_splits; | ||
1229 | list_add_tail(&sp->lpage_disallowed_link, | ||
1230 | &kvm->arch.lpage_disallowed_mmu_pages); | ||
1231 | sp->lpage_disallowed = true; | ||
1232 | } | ||
1233 | |||
1193 | static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) | 1234 | static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) |
1194 | { | 1235 | { |
1195 | struct kvm_memslots *slots; | 1236 | struct kvm_memslots *slots; |
@@ -1207,6 +1248,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1207 | kvm_mmu_gfn_allow_lpage(slot, gfn); | 1248 | kvm_mmu_gfn_allow_lpage(slot, gfn); |
1208 | } | 1249 | } |
1209 | 1250 | ||
1251 | static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1252 | { | ||
1253 | --kvm->stat.nx_lpage_splits; | ||
1254 | sp->lpage_disallowed = false; | ||
1255 | list_del(&sp->lpage_disallowed_link); | ||
1256 | } | ||
1257 | |||
1210 | static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, | 1258 | static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, |
1211 | struct kvm_memory_slot *slot) | 1259 | struct kvm_memory_slot *slot) |
1212 | { | 1260 | { |
@@ -2792,6 +2840,9 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, | |||
2792 | kvm_reload_remote_mmus(kvm); | 2840 | kvm_reload_remote_mmus(kvm); |
2793 | } | 2841 | } |
2794 | 2842 | ||
2843 | if (sp->lpage_disallowed) | ||
2844 | unaccount_huge_nx_page(kvm, sp); | ||
2845 | |||
2795 | sp->role.invalid = 1; | 2846 | sp->role.invalid = 1; |
2796 | return list_unstable; | 2847 | return list_unstable; |
2797 | } | 2848 | } |
@@ -3013,6 +3064,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
3013 | if (!speculative) | 3064 | if (!speculative) |
3014 | spte |= spte_shadow_accessed_mask(spte); | 3065 | spte |= spte_shadow_accessed_mask(spte); |
3015 | 3066 | ||
3067 | if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && | ||
3068 | is_nx_huge_page_enabled()) { | ||
3069 | pte_access &= ~ACC_EXEC_MASK; | ||
3070 | } | ||
3071 | |||
3016 | if (pte_access & ACC_EXEC_MASK) | 3072 | if (pte_access & ACC_EXEC_MASK) |
3017 | spte |= shadow_x_mask; | 3073 | spte |= shadow_x_mask; |
3018 | else | 3074 | else |
@@ -3233,9 +3289,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | |||
3233 | __direct_pte_prefetch(vcpu, sp, sptep); | 3289 | __direct_pte_prefetch(vcpu, sp, sptep); |
3234 | } | 3290 | } |
3235 | 3291 | ||
3292 | static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, | ||
3293 | gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) | ||
3294 | { | ||
3295 | int level = *levelp; | ||
3296 | u64 spte = *it.sptep; | ||
3297 | |||
3298 | if (it.level == level && level > PT_PAGE_TABLE_LEVEL && | ||
3299 | is_nx_huge_page_enabled() && | ||
3300 | is_shadow_present_pte(spte) && | ||
3301 | !is_large_pte(spte)) { | ||
3302 | /* | ||
3303 | * A small SPTE exists for this pfn, but FNAME(fetch) | ||
3304 | * and __direct_map would like to create a large PTE | ||
3305 | * instead: just force them to go down another level, | ||
3306 | * patching back for them into pfn the next 9 bits of | ||
3307 | * the address. | ||
3308 | */ | ||
3309 | u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1); | ||
3310 | *pfnp |= gfn & page_mask; | ||
3311 | (*levelp)--; | ||
3312 | } | ||
3313 | } | ||
3314 | |||
3236 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, | 3315 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, |
3237 | int map_writable, int level, kvm_pfn_t pfn, | 3316 | int map_writable, int level, kvm_pfn_t pfn, |
3238 | bool prefault) | 3317 | bool prefault, bool lpage_disallowed) |
3239 | { | 3318 | { |
3240 | struct kvm_shadow_walk_iterator it; | 3319 | struct kvm_shadow_walk_iterator it; |
3241 | struct kvm_mmu_page *sp; | 3320 | struct kvm_mmu_page *sp; |
@@ -3248,6 +3327,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, | |||
3248 | 3327 | ||
3249 | trace_kvm_mmu_spte_requested(gpa, level, pfn); | 3328 | trace_kvm_mmu_spte_requested(gpa, level, pfn); |
3250 | for_each_shadow_entry(vcpu, gpa, it) { | 3329 | for_each_shadow_entry(vcpu, gpa, it) { |
3330 | /* | ||
3331 | * We cannot overwrite existing page tables with an NX | ||
3332 | * large page, as the leaf could be executable. | ||
3333 | */ | ||
3334 | disallowed_hugepage_adjust(it, gfn, &pfn, &level); | ||
3335 | |||
3251 | base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | 3336 | base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); |
3252 | if (it.level == level) | 3337 | if (it.level == level) |
3253 | break; | 3338 | break; |
@@ -3258,6 +3343,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, | |||
3258 | it.level - 1, true, ACC_ALL); | 3343 | it.level - 1, true, ACC_ALL); |
3259 | 3344 | ||
3260 | link_shadow_page(vcpu, it.sptep, sp); | 3345 | link_shadow_page(vcpu, it.sptep, sp); |
3346 | if (lpage_disallowed) | ||
3347 | account_huge_nx_page(vcpu->kvm, sp); | ||
3261 | } | 3348 | } |
3262 | } | 3349 | } |
3263 | 3350 | ||
@@ -3550,11 +3637,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
3550 | { | 3637 | { |
3551 | int r; | 3638 | int r; |
3552 | int level; | 3639 | int level; |
3553 | bool force_pt_level = false; | 3640 | bool force_pt_level; |
3554 | kvm_pfn_t pfn; | 3641 | kvm_pfn_t pfn; |
3555 | unsigned long mmu_seq; | 3642 | unsigned long mmu_seq; |
3556 | bool map_writable, write = error_code & PFERR_WRITE_MASK; | 3643 | bool map_writable, write = error_code & PFERR_WRITE_MASK; |
3644 | bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && | ||
3645 | is_nx_huge_page_enabled(); | ||
3557 | 3646 | ||
3647 | force_pt_level = lpage_disallowed; | ||
3558 | level = mapping_level(vcpu, gfn, &force_pt_level); | 3648 | level = mapping_level(vcpu, gfn, &force_pt_level); |
3559 | if (likely(!force_pt_level)) { | 3649 | if (likely(!force_pt_level)) { |
3560 | /* | 3650 | /* |
@@ -3588,7 +3678,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
3588 | goto out_unlock; | 3678 | goto out_unlock; |
3589 | if (likely(!force_pt_level)) | 3679 | if (likely(!force_pt_level)) |
3590 | transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); | 3680 | transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); |
3591 | r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); | 3681 | r = __direct_map(vcpu, v, write, map_writable, level, pfn, |
3682 | prefault, false); | ||
3592 | out_unlock: | 3683 | out_unlock: |
3593 | spin_unlock(&vcpu->kvm->mmu_lock); | 3684 | spin_unlock(&vcpu->kvm->mmu_lock); |
3594 | kvm_release_pfn_clean(pfn); | 3685 | kvm_release_pfn_clean(pfn); |
@@ -4174,6 +4265,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
4174 | unsigned long mmu_seq; | 4265 | unsigned long mmu_seq; |
4175 | int write = error_code & PFERR_WRITE_MASK; | 4266 | int write = error_code & PFERR_WRITE_MASK; |
4176 | bool map_writable; | 4267 | bool map_writable; |
4268 | bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && | ||
4269 | is_nx_huge_page_enabled(); | ||
4177 | 4270 | ||
4178 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); | 4271 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); |
4179 | 4272 | ||
@@ -4184,8 +4277,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
4184 | if (r) | 4277 | if (r) |
4185 | return r; | 4278 | return r; |
4186 | 4279 | ||
4187 | force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, | 4280 | force_pt_level = |
4188 | PT_DIRECTORY_LEVEL); | 4281 | lpage_disallowed || |
4282 | !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); | ||
4189 | level = mapping_level(vcpu, gfn, &force_pt_level); | 4283 | level = mapping_level(vcpu, gfn, &force_pt_level); |
4190 | if (likely(!force_pt_level)) { | 4284 | if (likely(!force_pt_level)) { |
4191 | if (level > PT_DIRECTORY_LEVEL && | 4285 | if (level > PT_DIRECTORY_LEVEL && |
@@ -4214,7 +4308,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
4214 | goto out_unlock; | 4308 | goto out_unlock; |
4215 | if (likely(!force_pt_level)) | 4309 | if (likely(!force_pt_level)) |
4216 | transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); | 4310 | transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); |
4217 | r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); | 4311 | r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, |
4312 | prefault, lpage_disallowed); | ||
4218 | out_unlock: | 4313 | out_unlock: |
4219 | spin_unlock(&vcpu->kvm->mmu_lock); | 4314 | spin_unlock(&vcpu->kvm->mmu_lock); |
4220 | kvm_release_pfn_clean(pfn); | 4315 | kvm_release_pfn_clean(pfn); |
@@ -6155,10 +6250,60 @@ static void kvm_set_mmio_spte_mask(void) | |||
6155 | kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); | 6250 | kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); |
6156 | } | 6251 | } |
6157 | 6252 | ||
6253 | static bool get_nx_auto_mode(void) | ||
6254 | { | ||
6255 | /* Return true when CPU has the bug, and mitigations are ON */ | ||
6256 | return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); | ||
6257 | } | ||
6258 | |||
6259 | static void __set_nx_huge_pages(bool val) | ||
6260 | { | ||
6261 | nx_huge_pages = itlb_multihit_kvm_mitigation = val; | ||
6262 | } | ||
6263 | |||
6264 | static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) | ||
6265 | { | ||
6266 | bool old_val = nx_huge_pages; | ||
6267 | bool new_val; | ||
6268 | |||
6269 | /* In "auto" mode deploy workaround only if CPU has the bug. */ | ||
6270 | if (sysfs_streq(val, "off")) | ||
6271 | new_val = 0; | ||
6272 | else if (sysfs_streq(val, "force")) | ||
6273 | new_val = 1; | ||
6274 | else if (sysfs_streq(val, "auto")) | ||
6275 | new_val = get_nx_auto_mode(); | ||
6276 | else if (strtobool(val, &new_val) < 0) | ||
6277 | return -EINVAL; | ||
6278 | |||
6279 | __set_nx_huge_pages(new_val); | ||
6280 | |||
6281 | if (new_val != old_val) { | ||
6282 | struct kvm *kvm; | ||
6283 | int idx; | ||
6284 | |||
6285 | mutex_lock(&kvm_lock); | ||
6286 | |||
6287 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
6288 | idx = srcu_read_lock(&kvm->srcu); | ||
6289 | kvm_mmu_zap_all_fast(kvm); | ||
6290 | srcu_read_unlock(&kvm->srcu, idx); | ||
6291 | |||
6292 | wake_up_process(kvm->arch.nx_lpage_recovery_thread); | ||
6293 | } | ||
6294 | mutex_unlock(&kvm_lock); | ||
6295 | } | ||
6296 | |||
6297 | return 0; | ||
6298 | } | ||
6299 | |||
6158 | int kvm_mmu_module_init(void) | 6300 | int kvm_mmu_module_init(void) |
6159 | { | 6301 | { |
6160 | int ret = -ENOMEM; | 6302 | int ret = -ENOMEM; |
6161 | 6303 | ||
6304 | if (nx_huge_pages == -1) | ||
6305 | __set_nx_huge_pages(get_nx_auto_mode()); | ||
6306 | |||
6162 | /* | 6307 | /* |
6163 | * MMU roles use union aliasing which is, generally speaking, an | 6308 | * MMU roles use union aliasing which is, generally speaking, an |
6164 | * undefined behavior. However, we supposedly know how compilers behave | 6309 | * undefined behavior. However, we supposedly know how compilers behave |
@@ -6238,3 +6383,116 @@ void kvm_mmu_module_exit(void) | |||
6238 | unregister_shrinker(&mmu_shrinker); | 6383 | unregister_shrinker(&mmu_shrinker); |
6239 | mmu_audit_disable(); | 6384 | mmu_audit_disable(); |
6240 | } | 6385 | } |
6386 | |||
6387 | static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp) | ||
6388 | { | ||
6389 | unsigned int old_val; | ||
6390 | int err; | ||
6391 | |||
6392 | old_val = nx_huge_pages_recovery_ratio; | ||
6393 | err = param_set_uint(val, kp); | ||
6394 | if (err) | ||
6395 | return err; | ||
6396 | |||
6397 | if (READ_ONCE(nx_huge_pages) && | ||
6398 | !old_val && nx_huge_pages_recovery_ratio) { | ||
6399 | struct kvm *kvm; | ||
6400 | |||
6401 | mutex_lock(&kvm_lock); | ||
6402 | |||
6403 | list_for_each_entry(kvm, &vm_list, vm_list) | ||
6404 | wake_up_process(kvm->arch.nx_lpage_recovery_thread); | ||
6405 | |||
6406 | mutex_unlock(&kvm_lock); | ||
6407 | } | ||
6408 | |||
6409 | return err; | ||
6410 | } | ||
6411 | |||
6412 | static void kvm_recover_nx_lpages(struct kvm *kvm) | ||
6413 | { | ||
6414 | int rcu_idx; | ||
6415 | struct kvm_mmu_page *sp; | ||
6416 | unsigned int ratio; | ||
6417 | LIST_HEAD(invalid_list); | ||
6418 | ulong to_zap; | ||
6419 | |||
6420 | rcu_idx = srcu_read_lock(&kvm->srcu); | ||
6421 | spin_lock(&kvm->mmu_lock); | ||
6422 | |||
6423 | ratio = READ_ONCE(nx_huge_pages_recovery_ratio); | ||
6424 | to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; | ||
6425 | while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) { | ||
6426 | /* | ||
6427 | * We use a separate list instead of just using active_mmu_pages | ||
6428 | * because the number of lpage_disallowed pages is expected to | ||
6429 | * be relatively small compared to the total. | ||
6430 | */ | ||
6431 | sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, | ||
6432 | struct kvm_mmu_page, | ||
6433 | lpage_disallowed_link); | ||
6434 | WARN_ON_ONCE(!sp->lpage_disallowed); | ||
6435 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | ||
6436 | WARN_ON_ONCE(sp->lpage_disallowed); | ||
6437 | |||
6438 | if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
6439 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
6440 | if (to_zap) | ||
6441 | cond_resched_lock(&kvm->mmu_lock); | ||
6442 | } | ||
6443 | } | ||
6444 | |||
6445 | spin_unlock(&kvm->mmu_lock); | ||
6446 | srcu_read_unlock(&kvm->srcu, rcu_idx); | ||
6447 | } | ||
6448 | |||
6449 | static long get_nx_lpage_recovery_timeout(u64 start_time) | ||
6450 | { | ||
6451 | return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio) | ||
6452 | ? start_time + 60 * HZ - get_jiffies_64() | ||
6453 | : MAX_SCHEDULE_TIMEOUT; | ||
6454 | } | ||
6455 | |||
6456 | static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) | ||
6457 | { | ||
6458 | u64 start_time; | ||
6459 | long remaining_time; | ||
6460 | |||
6461 | while (true) { | ||
6462 | start_time = get_jiffies_64(); | ||
6463 | remaining_time = get_nx_lpage_recovery_timeout(start_time); | ||
6464 | |||
6465 | set_current_state(TASK_INTERRUPTIBLE); | ||
6466 | while (!kthread_should_stop() && remaining_time > 0) { | ||
6467 | schedule_timeout(remaining_time); | ||
6468 | remaining_time = get_nx_lpage_recovery_timeout(start_time); | ||
6469 | set_current_state(TASK_INTERRUPTIBLE); | ||
6470 | } | ||
6471 | |||
6472 | set_current_state(TASK_RUNNING); | ||
6473 | |||
6474 | if (kthread_should_stop()) | ||
6475 | return 0; | ||
6476 | |||
6477 | kvm_recover_nx_lpages(kvm); | ||
6478 | } | ||
6479 | } | ||
6480 | |||
6481 | int kvm_mmu_post_init_vm(struct kvm *kvm) | ||
6482 | { | ||
6483 | int err; | ||
6484 | |||
6485 | err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, | ||
6486 | "kvm-nx-lpage-recovery", | ||
6487 | &kvm->arch.nx_lpage_recovery_thread); | ||
6488 | if (!err) | ||
6489 | kthread_unpark(kvm->arch.nx_lpage_recovery_thread); | ||
6490 | |||
6491 | return err; | ||
6492 | } | ||
6493 | |||
6494 | void kvm_mmu_pre_destroy_vm(struct kvm *kvm) | ||
6495 | { | ||
6496 | if (kvm->arch.nx_lpage_recovery_thread) | ||
6497 | kthread_stop(kvm->arch.nx_lpage_recovery_thread); | ||
6498 | } | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 11f8ec89433b..d55674f44a18 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); | |||
210 | bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, | 210 | bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, |
211 | struct kvm_memory_slot *slot, u64 gfn); | 211 | struct kvm_memory_slot *slot, u64 gfn); |
212 | int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); | 212 | int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); |
213 | |||
214 | int kvm_mmu_post_init_vm(struct kvm *kvm); | ||
215 | void kvm_mmu_pre_destroy_vm(struct kvm *kvm); | ||
216 | |||
213 | #endif | 217 | #endif |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7d5cdb3af594..97b21e7fd013 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
614 | static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 614 | static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
615 | struct guest_walker *gw, | 615 | struct guest_walker *gw, |
616 | int write_fault, int hlevel, | 616 | int write_fault, int hlevel, |
617 | kvm_pfn_t pfn, bool map_writable, bool prefault) | 617 | kvm_pfn_t pfn, bool map_writable, bool prefault, |
618 | bool lpage_disallowed) | ||
618 | { | 619 | { |
619 | struct kvm_mmu_page *sp = NULL; | 620 | struct kvm_mmu_page *sp = NULL; |
620 | struct kvm_shadow_walk_iterator it; | 621 | struct kvm_shadow_walk_iterator it; |
621 | unsigned direct_access, access = gw->pt_access; | 622 | unsigned direct_access, access = gw->pt_access; |
622 | int top_level, ret; | 623 | int top_level, ret; |
623 | gfn_t base_gfn; | 624 | gfn_t gfn, base_gfn; |
624 | 625 | ||
625 | direct_access = gw->pte_access; | 626 | direct_access = gw->pte_access; |
626 | 627 | ||
@@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
665 | link_shadow_page(vcpu, it.sptep, sp); | 666 | link_shadow_page(vcpu, it.sptep, sp); |
666 | } | 667 | } |
667 | 668 | ||
668 | base_gfn = gw->gfn; | 669 | /* |
670 | * FNAME(page_fault) might have clobbered the bottom bits of | ||
671 | * gw->gfn, restore them from the virtual address. | ||
672 | */ | ||
673 | gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); | ||
674 | base_gfn = gfn; | ||
669 | 675 | ||
670 | trace_kvm_mmu_spte_requested(addr, gw->level, pfn); | 676 | trace_kvm_mmu_spte_requested(addr, gw->level, pfn); |
671 | 677 | ||
672 | for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { | 678 | for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { |
673 | clear_sp_write_flooding_count(it.sptep); | 679 | clear_sp_write_flooding_count(it.sptep); |
674 | base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | 680 | |
681 | /* | ||
682 | * We cannot overwrite existing page tables with an NX | ||
683 | * large page, as the leaf could be executable. | ||
684 | */ | ||
685 | disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); | ||
686 | |||
687 | base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | ||
675 | if (it.level == hlevel) | 688 | if (it.level == hlevel) |
676 | break; | 689 | break; |
677 | 690 | ||
@@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
683 | sp = kvm_mmu_get_page(vcpu, base_gfn, addr, | 696 | sp = kvm_mmu_get_page(vcpu, base_gfn, addr, |
684 | it.level - 1, true, direct_access); | 697 | it.level - 1, true, direct_access); |
685 | link_shadow_page(vcpu, it.sptep, sp); | 698 | link_shadow_page(vcpu, it.sptep, sp); |
699 | if (lpage_disallowed) | ||
700 | account_huge_nx_page(vcpu->kvm, sp); | ||
686 | } | 701 | } |
687 | } | 702 | } |
688 | 703 | ||
@@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
759 | int r; | 774 | int r; |
760 | kvm_pfn_t pfn; | 775 | kvm_pfn_t pfn; |
761 | int level = PT_PAGE_TABLE_LEVEL; | 776 | int level = PT_PAGE_TABLE_LEVEL; |
762 | bool force_pt_level = false; | ||
763 | unsigned long mmu_seq; | 777 | unsigned long mmu_seq; |
764 | bool map_writable, is_self_change_mapping; | 778 | bool map_writable, is_self_change_mapping; |
779 | bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && | ||
780 | is_nx_huge_page_enabled(); | ||
781 | bool force_pt_level = lpage_disallowed; | ||
765 | 782 | ||
766 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 783 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
767 | 784 | ||
@@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
851 | if (!force_pt_level) | 868 | if (!force_pt_level) |
852 | transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); | 869 | transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); |
853 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, | 870 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
854 | level, pfn, map_writable, prefault); | 871 | level, pfn, map_writable, prefault, lpage_disallowed); |
855 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 872 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
856 | 873 | ||
857 | out_unlock: | 874 | out_unlock: |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c8a5e20ea06..7db5c8ef35dd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -213,6 +213,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
213 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | 213 | { "mmu_unsync", VM_STAT(mmu_unsync) }, |
214 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 214 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
215 | { "largepages", VM_STAT(lpages, .mode = 0444) }, | 215 | { "largepages", VM_STAT(lpages, .mode = 0444) }, |
216 | { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, | ||
216 | { "max_mmu_page_hash_collisions", | 217 | { "max_mmu_page_hash_collisions", |
217 | VM_STAT(max_mmu_page_hash_collisions) }, | 218 | VM_STAT(max_mmu_page_hash_collisions) }, |
218 | { NULL } | 219 | { NULL } |
@@ -1285,6 +1286,14 @@ static u64 kvm_get_arch_capabilities(void) | |||
1285 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); | 1286 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); |
1286 | 1287 | ||
1287 | /* | 1288 | /* |
1289 | * If nx_huge_pages is enabled, KVM's shadow paging will ensure that | ||
1290 | * the nested hypervisor runs with NX huge pages. If it is not, | ||
1291 | * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other | ||
1292 | * L1 guests, so it need not worry about its own (L2) guests. | ||
1293 | */ | ||
1294 | data |= ARCH_CAP_PSCHANGE_MC_NO; | ||
1295 | |||
1296 | /* | ||
1288 | * If we're doing cache flushes (either "always" or "cond") | 1297 | * If we're doing cache flushes (either "always" or "cond") |
1289 | * we will do one whenever the guest does a vmlaunch/vmresume. | 1298 | * we will do one whenever the guest does a vmlaunch/vmresume. |
1290 | * If an outer hypervisor is doing the cache flush for us | 1299 | * If an outer hypervisor is doing the cache flush for us |
@@ -1303,6 +1312,25 @@ static u64 kvm_get_arch_capabilities(void) | |||
1303 | if (!boot_cpu_has_bug(X86_BUG_MDS)) | 1312 | if (!boot_cpu_has_bug(X86_BUG_MDS)) |
1304 | data |= ARCH_CAP_MDS_NO; | 1313 | data |= ARCH_CAP_MDS_NO; |
1305 | 1314 | ||
1315 | /* | ||
1316 | * On TAA affected systems, export MDS_NO=0 when: | ||
1317 | * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. | ||
1318 | * - Updated microcode is present. This is detected by | ||
1319 | * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures | ||
1320 | * that VERW clears CPU buffers. | ||
1321 | * | ||
1322 | * When MDS_NO=0 is exported, guests deploy clear CPU buffer | ||
1323 | * mitigation and don't complain: | ||
1324 | * | ||
1325 | * "Vulnerable: Clear CPU buffers attempted, no microcode" | ||
1326 | * | ||
1327 | * If TSX is disabled on the system, guests are also mitigated against | ||
1328 | * TAA and clear CPU buffer mitigation is not required for guests. | ||
1329 | */ | ||
1330 | if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) && | ||
1331 | (data & ARCH_CAP_TSX_CTRL_MSR)) | ||
1332 | data &= ~ARCH_CAP_MDS_NO; | ||
1333 | |||
1306 | return data; | 1334 | return data; |
1307 | } | 1335 | } |
1308 | 1336 | ||
@@ -9424,6 +9452,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
9424 | INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); | 9452 | INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); |
9425 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 9453 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
9426 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); | 9454 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); |
9455 | INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); | ||
9427 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 9456 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
9428 | atomic_set(&kvm->arch.noncoherent_dma_count, 0); | 9457 | atomic_set(&kvm->arch.noncoherent_dma_count, 0); |
9429 | 9458 | ||
@@ -9452,6 +9481,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
9452 | return kvm_x86_ops->vm_init(kvm); | 9481 | return kvm_x86_ops->vm_init(kvm); |
9453 | } | 9482 | } |
9454 | 9483 | ||
9484 | int kvm_arch_post_init_vm(struct kvm *kvm) | ||
9485 | { | ||
9486 | return kvm_mmu_post_init_vm(kvm); | ||
9487 | } | ||
9488 | |||
9455 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 9489 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
9456 | { | 9490 | { |
9457 | vcpu_load(vcpu); | 9491 | vcpu_load(vcpu); |
@@ -9553,6 +9587,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) | |||
9553 | } | 9587 | } |
9554 | EXPORT_SYMBOL_GPL(x86_set_memory_region); | 9588 | EXPORT_SYMBOL_GPL(x86_set_memory_region); |
9555 | 9589 | ||
9590 | void kvm_arch_pre_destroy_vm(struct kvm *kvm) | ||
9591 | { | ||
9592 | kvm_mmu_pre_destroy_vm(kvm); | ||
9593 | } | ||
9594 | |||
9556 | void kvm_arch_destroy_vm(struct kvm *kvm) | 9595 | void kvm_arch_destroy_vm(struct kvm *kvm) |
9557 | { | 9596 | { |
9558 | if (current->mm == kvm->mm) { | 9597 | if (current->mm == kvm->mm) { |
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index cc37511de866..6265871a4af2 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c | |||
@@ -554,12 +554,27 @@ ssize_t __weak cpu_show_mds(struct device *dev, | |||
554 | return sprintf(buf, "Not affected\n"); | 554 | return sprintf(buf, "Not affected\n"); |
555 | } | 555 | } |
556 | 556 | ||
557 | ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, | ||
558 | struct device_attribute *attr, | ||
559 | char *buf) | ||
560 | { | ||
561 | return sprintf(buf, "Not affected\n"); | ||
562 | } | ||
563 | |||
564 | ssize_t __weak cpu_show_itlb_multihit(struct device *dev, | ||
565 | struct device_attribute *attr, char *buf) | ||
566 | { | ||
567 | return sprintf(buf, "Not affected\n"); | ||
568 | } | ||
569 | |||
557 | static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); | 570 | static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); |
558 | static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); | 571 | static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); |
559 | static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); | 572 | static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); |
560 | static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); | 573 | static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); |
561 | static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); | 574 | static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); |
562 | static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); | 575 | static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); |
576 | static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); | ||
577 | static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); | ||
563 | 578 | ||
564 | static struct attribute *cpu_root_vulnerabilities_attrs[] = { | 579 | static struct attribute *cpu_root_vulnerabilities_attrs[] = { |
565 | &dev_attr_meltdown.attr, | 580 | &dev_attr_meltdown.attr, |
@@ -568,6 +583,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { | |||
568 | &dev_attr_spec_store_bypass.attr, | 583 | &dev_attr_spec_store_bypass.attr, |
569 | &dev_attr_l1tf.attr, | 584 | &dev_attr_l1tf.attr, |
570 | &dev_attr_mds.attr, | 585 | &dev_attr_mds.attr, |
586 | &dev_attr_tsx_async_abort.attr, | ||
587 | &dev_attr_itlb_multihit.attr, | ||
571 | NULL | 588 | NULL |
572 | }; | 589 | }; |
573 | 590 | ||
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1cdfe05514c3..e41fd94ae5a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c | |||
@@ -319,6 +319,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) | |||
319 | free_engines(rcu_access_pointer(ctx->engines)); | 319 | free_engines(rcu_access_pointer(ctx->engines)); |
320 | mutex_destroy(&ctx->engines_mutex); | 320 | mutex_destroy(&ctx->engines_mutex); |
321 | 321 | ||
322 | kfree(ctx->jump_whitelist); | ||
323 | |||
322 | if (ctx->timeline) | 324 | if (ctx->timeline) |
323 | intel_timeline_put(ctx->timeline); | 325 | intel_timeline_put(ctx->timeline); |
324 | 326 | ||
@@ -441,6 +443,9 @@ __create_context(struct drm_i915_private *i915) | |||
441 | for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) | 443 | for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) |
442 | ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; | 444 | ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; |
443 | 445 | ||
446 | ctx->jump_whitelist = NULL; | ||
447 | ctx->jump_whitelist_cmds = 0; | ||
448 | |||
444 | return ctx; | 449 | return ctx; |
445 | 450 | ||
446 | err_free: | 451 | err_free: |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 260d59cc3de8..00537b9d7006 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h | |||
@@ -192,6 +192,13 @@ struct i915_gem_context { | |||
192 | * per vm, which may be one per context or shared with the global GTT) | 192 | * per vm, which may be one per context or shared with the global GTT) |
193 | */ | 193 | */ |
194 | struct radix_tree_root handles_vma; | 194 | struct radix_tree_root handles_vma; |
195 | |||
196 | /** jump_whitelist: Bit array for tracking cmds during cmdparsing | ||
197 | * Guarded by struct_mutex | ||
198 | */ | ||
199 | unsigned long *jump_whitelist; | ||
200 | /** jump_whitelist_cmds: No of cmd slots available */ | ||
201 | u32 jump_whitelist_cmds; | ||
195 | }; | 202 | }; |
196 | 203 | ||
197 | #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ | 204 | #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b5f6937369ea..e635e1e5f4d3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | |||
@@ -296,7 +296,9 @@ static inline u64 gen8_noncanonical_addr(u64 address) | |||
296 | 296 | ||
297 | static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) | 297 | static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) |
298 | { | 298 | { |
299 | return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; | 299 | return intel_engine_requires_cmd_parser(eb->engine) || |
300 | (intel_engine_using_cmd_parser(eb->engine) && | ||
301 | eb->args->batch_len); | ||
300 | } | 302 | } |
301 | 303 | ||
302 | static int eb_create(struct i915_execbuffer *eb) | 304 | static int eb_create(struct i915_execbuffer *eb) |
@@ -1955,40 +1957,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) | |||
1955 | return 0; | 1957 | return 0; |
1956 | } | 1958 | } |
1957 | 1959 | ||
1958 | static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) | 1960 | static struct i915_vma * |
1961 | shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) | ||
1962 | { | ||
1963 | struct drm_i915_private *dev_priv = eb->i915; | ||
1964 | struct i915_vma * const vma = *eb->vma; | ||
1965 | struct i915_address_space *vm; | ||
1966 | u64 flags; | ||
1967 | |||
1968 | /* | ||
1969 | * PPGTT backed shadow buffers must be mapped RO, to prevent | ||
1970 | * post-scan tampering | ||
1971 | */ | ||
1972 | if (CMDPARSER_USES_GGTT(dev_priv)) { | ||
1973 | flags = PIN_GLOBAL; | ||
1974 | vm = &dev_priv->ggtt.vm; | ||
1975 | } else if (vma->vm->has_read_only) { | ||
1976 | flags = PIN_USER; | ||
1977 | vm = vma->vm; | ||
1978 | i915_gem_object_set_readonly(obj); | ||
1979 | } else { | ||
1980 | DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); | ||
1981 | return ERR_PTR(-EINVAL); | ||
1982 | } | ||
1983 | |||
1984 | return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); | ||
1985 | } | ||
1986 | |||
1987 | static struct i915_vma *eb_parse(struct i915_execbuffer *eb) | ||
1959 | { | 1988 | { |
1960 | struct intel_engine_pool_node *pool; | 1989 | struct intel_engine_pool_node *pool; |
1961 | struct i915_vma *vma; | 1990 | struct i915_vma *vma; |
1991 | u64 batch_start; | ||
1992 | u64 shadow_batch_start; | ||
1962 | int err; | 1993 | int err; |
1963 | 1994 | ||
1964 | pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len); | 1995 | pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len); |
1965 | if (IS_ERR(pool)) | 1996 | if (IS_ERR(pool)) |
1966 | return ERR_CAST(pool); | 1997 | return ERR_CAST(pool); |
1967 | 1998 | ||
1968 | err = intel_engine_cmd_parser(eb->engine, | 1999 | vma = shadow_batch_pin(eb, pool->obj); |
2000 | if (IS_ERR(vma)) | ||
2001 | goto err; | ||
2002 | |||
2003 | batch_start = gen8_canonical_addr(eb->batch->node.start) + | ||
2004 | eb->batch_start_offset; | ||
2005 | |||
2006 | shadow_batch_start = gen8_canonical_addr(vma->node.start); | ||
2007 | |||
2008 | err = intel_engine_cmd_parser(eb->gem_context, | ||
2009 | eb->engine, | ||
1969 | eb->batch->obj, | 2010 | eb->batch->obj, |
1970 | pool->obj, | 2011 | batch_start, |
1971 | eb->batch_start_offset, | 2012 | eb->batch_start_offset, |
1972 | eb->batch_len, | 2013 | eb->batch_len, |
1973 | is_master); | 2014 | pool->obj, |
2015 | shadow_batch_start); | ||
2016 | |||
1974 | if (err) { | 2017 | if (err) { |
1975 | if (err == -EACCES) /* unhandled chained batch */ | 2018 | i915_vma_unpin(vma); |
2019 | |||
2020 | /* | ||
2021 | * Unsafe GGTT-backed buffers can still be submitted safely | ||
2022 | * as non-secure. | ||
2023 | * For PPGTT backing however, we have no choice but to forcibly | ||
2024 | * reject unsafe buffers | ||
2025 | */ | ||
2026 | if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) | ||
2027 | /* Execute original buffer non-secure */ | ||
1976 | vma = NULL; | 2028 | vma = NULL; |
1977 | else | 2029 | else |
1978 | vma = ERR_PTR(err); | 2030 | vma = ERR_PTR(err); |
1979 | goto err; | 2031 | goto err; |
1980 | } | 2032 | } |
1981 | 2033 | ||
1982 | vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0); | ||
1983 | if (IS_ERR(vma)) | ||
1984 | goto err; | ||
1985 | |||
1986 | eb->vma[eb->buffer_count] = i915_vma_get(vma); | 2034 | eb->vma[eb->buffer_count] = i915_vma_get(vma); |
1987 | eb->flags[eb->buffer_count] = | 2035 | eb->flags[eb->buffer_count] = |
1988 | __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; | 2036 | __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; |
1989 | vma->exec_flags = &eb->flags[eb->buffer_count]; | 2037 | vma->exec_flags = &eb->flags[eb->buffer_count]; |
1990 | eb->buffer_count++; | 2038 | eb->buffer_count++; |
1991 | 2039 | ||
2040 | eb->batch_start_offset = 0; | ||
2041 | eb->batch = vma; | ||
2042 | |||
2043 | if (CMDPARSER_USES_GGTT(eb->i915)) | ||
2044 | eb->batch_flags |= I915_DISPATCH_SECURE; | ||
2045 | |||
2046 | /* eb->batch_len unchanged */ | ||
2047 | |||
1992 | vma->private = pool; | 2048 | vma->private = pool; |
1993 | return vma; | 2049 | return vma; |
1994 | 2050 | ||
@@ -2421,6 +2477,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
2421 | struct drm_i915_gem_exec_object2 *exec, | 2477 | struct drm_i915_gem_exec_object2 *exec, |
2422 | struct drm_syncobj **fences) | 2478 | struct drm_syncobj **fences) |
2423 | { | 2479 | { |
2480 | struct drm_i915_private *i915 = to_i915(dev); | ||
2424 | struct i915_execbuffer eb; | 2481 | struct i915_execbuffer eb; |
2425 | struct dma_fence *in_fence = NULL; | 2482 | struct dma_fence *in_fence = NULL; |
2426 | struct dma_fence *exec_fence = NULL; | 2483 | struct dma_fence *exec_fence = NULL; |
@@ -2432,7 +2489,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
2432 | BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & | 2489 | BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & |
2433 | ~__EXEC_OBJECT_UNKNOWN_FLAGS); | 2490 | ~__EXEC_OBJECT_UNKNOWN_FLAGS); |
2434 | 2491 | ||
2435 | eb.i915 = to_i915(dev); | 2492 | eb.i915 = i915; |
2436 | eb.file = file; | 2493 | eb.file = file; |
2437 | eb.args = args; | 2494 | eb.args = args; |
2438 | if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) | 2495 | if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) |
@@ -2452,8 +2509,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
2452 | 2509 | ||
2453 | eb.batch_flags = 0; | 2510 | eb.batch_flags = 0; |
2454 | if (args->flags & I915_EXEC_SECURE) { | 2511 | if (args->flags & I915_EXEC_SECURE) { |
2512 | if (INTEL_GEN(i915) >= 11) | ||
2513 | return -ENODEV; | ||
2514 | |||
2515 | /* Return -EPERM to trigger fallback code on old binaries. */ | ||
2516 | if (!HAS_SECURE_BATCHES(i915)) | ||
2517 | return -EPERM; | ||
2518 | |||
2455 | if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) | 2519 | if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) |
2456 | return -EPERM; | 2520 | return -EPERM; |
2457 | 2521 | ||
2458 | eb.batch_flags |= I915_DISPATCH_SECURE; | 2522 | eb.batch_flags |= I915_DISPATCH_SECURE; |
2459 | } | 2523 | } |
@@ -2530,34 +2594,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
2530 | goto err_vma; | 2594 | goto err_vma; |
2531 | } | 2595 | } |
2532 | 2596 | ||
2597 | if (eb.batch_len == 0) | ||
2598 | eb.batch_len = eb.batch->size - eb.batch_start_offset; | ||
2599 | |||
2533 | if (eb_use_cmdparser(&eb)) { | 2600 | if (eb_use_cmdparser(&eb)) { |
2534 | struct i915_vma *vma; | 2601 | struct i915_vma *vma; |
2535 | 2602 | ||
2536 | vma = eb_parse(&eb, drm_is_current_master(file)); | 2603 | vma = eb_parse(&eb); |
2537 | if (IS_ERR(vma)) { | 2604 | if (IS_ERR(vma)) { |
2538 | err = PTR_ERR(vma); | 2605 | err = PTR_ERR(vma); |
2539 | goto err_vma; | 2606 | goto err_vma; |
2540 | } | 2607 | } |
2541 | |||
2542 | if (vma) { | ||
2543 | /* | ||
2544 | * Batch parsed and accepted: | ||
2545 | * | ||
2546 | * Set the DISPATCH_SECURE bit to remove the NON_SECURE | ||
2547 | * bit from MI_BATCH_BUFFER_START commands issued in | ||
2548 | * the dispatch_execbuffer implementations. We | ||
2549 | * specifically don't want that set on batches the | ||
2550 | * command parser has accepted. | ||
2551 | */ | ||
2552 | eb.batch_flags |= I915_DISPATCH_SECURE; | ||
2553 | eb.batch_start_offset = 0; | ||
2554 | eb.batch = vma; | ||
2555 | } | ||
2556 | } | 2608 | } |
2557 | 2609 | ||
2558 | if (eb.batch_len == 0) | ||
2559 | eb.batch_len = eb.batch->size - eb.batch_start_offset; | ||
2560 | |||
2561 | /* | 2610 | /* |
2562 | * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure | 2611 | * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure |
2563 | * batch" bit. Hence we need to pin secure batches into the global gtt. | 2612 | * batch" bit. Hence we need to pin secure batches into the global gtt. |
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a82cea95c2f2..9dd8c299cb2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h | |||
@@ -475,12 +475,13 @@ struct intel_engine_cs { | |||
475 | 475 | ||
476 | struct intel_engine_hangcheck hangcheck; | 476 | struct intel_engine_hangcheck hangcheck; |
477 | 477 | ||
478 | #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) | 478 | #define I915_ENGINE_USING_CMD_PARSER BIT(0) |
479 | #define I915_ENGINE_SUPPORTS_STATS BIT(1) | 479 | #define I915_ENGINE_SUPPORTS_STATS BIT(1) |
480 | #define I915_ENGINE_HAS_PREEMPTION BIT(2) | 480 | #define I915_ENGINE_HAS_PREEMPTION BIT(2) |
481 | #define I915_ENGINE_HAS_SEMAPHORES BIT(3) | 481 | #define I915_ENGINE_HAS_SEMAPHORES BIT(3) |
482 | #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) | 482 | #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) |
483 | #define I915_ENGINE_IS_VIRTUAL BIT(5) | 483 | #define I915_ENGINE_IS_VIRTUAL BIT(5) |
484 | #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) | ||
484 | unsigned int flags; | 485 | unsigned int flags; |
485 | 486 | ||
486 | /* | 487 | /* |
@@ -541,9 +542,15 @@ struct intel_engine_cs { | |||
541 | }; | 542 | }; |
542 | 543 | ||
543 | static inline bool | 544 | static inline bool |
544 | intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) | 545 | intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) |
545 | { | 546 | { |
546 | return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; | 547 | return engine->flags & I915_ENGINE_USING_CMD_PARSER; |
548 | } | ||
549 | |||
550 | static inline bool | ||
551 | intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) | ||
552 | { | ||
553 | return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; | ||
547 | } | 554 | } |
548 | 555 | ||
549 | static inline bool | 556 | static inline bool |
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 1363e069ec83..fac75afed35b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c | |||
@@ -38,6 +38,9 @@ static int __gt_unpark(struct intel_wakeref *wf) | |||
38 | gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); | 38 | gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); |
39 | GEM_BUG_ON(!gt->awake); | 39 | GEM_BUG_ON(!gt->awake); |
40 | 40 | ||
41 | if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) | ||
42 | intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); | ||
43 | |||
41 | intel_enable_gt_powersave(i915); | 44 | intel_enable_gt_powersave(i915); |
42 | 45 | ||
43 | i915_update_gfx_val(i915); | 46 | i915_update_gfx_val(i915); |
@@ -67,6 +70,11 @@ static int __gt_park(struct intel_wakeref *wf) | |||
67 | if (INTEL_GEN(i915) >= 6) | 70 | if (INTEL_GEN(i915) >= 6) |
68 | gen6_rps_idle(i915); | 71 | gen6_rps_idle(i915); |
69 | 72 | ||
73 | if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { | ||
74 | i915_rc6_ctx_wa_check(i915); | ||
75 | intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); | ||
76 | } | ||
77 | |||
70 | /* Everything switched off, flush any residual interrupt just in case */ | 78 | /* Everything switched off, flush any residual interrupt just in case */ |
71 | intel_synchronize_irq(i915); | 79 | intel_synchronize_irq(i915); |
72 | 80 | ||
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 24555102e198..f24096e27bef 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c | |||
@@ -53,13 +53,11 @@ | |||
53 | * granting userspace undue privileges. There are three categories of privilege. | 53 | * granting userspace undue privileges. There are three categories of privilege. |
54 | * | 54 | * |
55 | * First, commands which are explicitly defined as privileged or which should | 55 | * First, commands which are explicitly defined as privileged or which should |
56 | * only be used by the kernel driver. The parser generally rejects such | 56 | * only be used by the kernel driver. The parser rejects such commands |
57 | * commands, though it may allow some from the drm master process. | ||
58 | * | 57 | * |
59 | * Second, commands which access registers. To support correct/enhanced | 58 | * Second, commands which access registers. To support correct/enhanced |
60 | * userspace functionality, particularly certain OpenGL extensions, the parser | 59 | * userspace functionality, particularly certain OpenGL extensions, the parser |
61 | * provides a whitelist of registers which userspace may safely access (for both | 60 | * provides a whitelist of registers which userspace may safely access |
62 | * normal and drm master processes). | ||
63 | * | 61 | * |
64 | * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). | 62 | * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). |
65 | * The parser always rejects such commands. | 63 | * The parser always rejects such commands. |
@@ -84,9 +82,9 @@ | |||
84 | * in the per-engine command tables. | 82 | * in the per-engine command tables. |
85 | * | 83 | * |
86 | * Other command table entries map fairly directly to high level categories | 84 | * Other command table entries map fairly directly to high level categories |
87 | * mentioned above: rejected, master-only, register whitelist. The parser | 85 | * mentioned above: rejected, register whitelist. The parser implements a number |
88 | * implements a number of checks, including the privileged memory checks, via a | 86 | * of checks, including the privileged memory checks, via a general bitmasking |
89 | * general bitmasking mechanism. | 87 | * mechanism. |
90 | */ | 88 | */ |
91 | 89 | ||
92 | /* | 90 | /* |
@@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor { | |||
104 | * CMD_DESC_REJECT: The command is never allowed | 102 | * CMD_DESC_REJECT: The command is never allowed |
105 | * CMD_DESC_REGISTER: The command should be checked against the | 103 | * CMD_DESC_REGISTER: The command should be checked against the |
106 | * register whitelist for the appropriate ring | 104 | * register whitelist for the appropriate ring |
107 | * CMD_DESC_MASTER: The command is allowed if the submitting process | ||
108 | * is the DRM master | ||
109 | */ | 105 | */ |
110 | u32 flags; | 106 | u32 flags; |
111 | #define CMD_DESC_FIXED (1<<0) | 107 | #define CMD_DESC_FIXED (1<<0) |
@@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor { | |||
113 | #define CMD_DESC_REJECT (1<<2) | 109 | #define CMD_DESC_REJECT (1<<2) |
114 | #define CMD_DESC_REGISTER (1<<3) | 110 | #define CMD_DESC_REGISTER (1<<3) |
115 | #define CMD_DESC_BITMASK (1<<4) | 111 | #define CMD_DESC_BITMASK (1<<4) |
116 | #define CMD_DESC_MASTER (1<<5) | ||
117 | 112 | ||
118 | /* | 113 | /* |
119 | * The command's unique identification bits and the bitmask to get them. | 114 | * The command's unique identification bits and the bitmask to get them. |
@@ -194,7 +189,7 @@ struct drm_i915_cmd_table { | |||
194 | #define CMD(op, opm, f, lm, fl, ...) \ | 189 | #define CMD(op, opm, f, lm, fl, ...) \ |
195 | { \ | 190 | { \ |
196 | .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ | 191 | .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ |
197 | .cmd = { (op), ~0u << (opm) }, \ | 192 | .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ |
198 | .length = { (lm) }, \ | 193 | .length = { (lm) }, \ |
199 | __VA_ARGS__ \ | 194 | __VA_ARGS__ \ |
200 | } | 195 | } |
@@ -209,14 +204,13 @@ struct drm_i915_cmd_table { | |||
209 | #define R CMD_DESC_REJECT | 204 | #define R CMD_DESC_REJECT |
210 | #define W CMD_DESC_REGISTER | 205 | #define W CMD_DESC_REGISTER |
211 | #define B CMD_DESC_BITMASK | 206 | #define B CMD_DESC_BITMASK |
212 | #define M CMD_DESC_MASTER | ||
213 | 207 | ||
214 | /* Command Mask Fixed Len Action | 208 | /* Command Mask Fixed Len Action |
215 | ---------------------------------------------------------- */ | 209 | ---------------------------------------------------------- */ |
216 | static const struct drm_i915_cmd_descriptor common_cmds[] = { | 210 | static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { |
217 | CMD( MI_NOOP, SMI, F, 1, S ), | 211 | CMD( MI_NOOP, SMI, F, 1, S ), |
218 | CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), | 212 | CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), |
219 | CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), | 213 | CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), |
220 | CMD( MI_ARB_CHECK, SMI, F, 1, S ), | 214 | CMD( MI_ARB_CHECK, SMI, F, 1, S ), |
221 | CMD( MI_REPORT_HEAD, SMI, F, 1, S ), | 215 | CMD( MI_REPORT_HEAD, SMI, F, 1, S ), |
222 | CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), | 216 | CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), |
@@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { | |||
246 | CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), | 240 | CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), |
247 | }; | 241 | }; |
248 | 242 | ||
249 | static const struct drm_i915_cmd_descriptor render_cmds[] = { | 243 | static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { |
250 | CMD( MI_FLUSH, SMI, F, 1, S ), | 244 | CMD( MI_FLUSH, SMI, F, 1, S ), |
251 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), | 245 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
252 | CMD( MI_PREDICATE, SMI, F, 1, S ), | 246 | CMD( MI_PREDICATE, SMI, F, 1, S ), |
@@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { | |||
313 | CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), | 307 | CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), |
314 | CMD( MI_SET_APPID, SMI, F, 1, S ), | 308 | CMD( MI_SET_APPID, SMI, F, 1, S ), |
315 | CMD( MI_RS_CONTEXT, SMI, F, 1, S ), | 309 | CMD( MI_RS_CONTEXT, SMI, F, 1, S ), |
316 | CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), | 310 | CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), |
317 | CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), | 311 | CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), |
318 | CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, | 312 | CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, |
319 | .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), | 313 | .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), |
@@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { | |||
330 | CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), | 324 | CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), |
331 | }; | 325 | }; |
332 | 326 | ||
333 | static const struct drm_i915_cmd_descriptor video_cmds[] = { | 327 | static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { |
334 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), | 328 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
335 | CMD( MI_SET_APPID, SMI, F, 1, S ), | 329 | CMD( MI_SET_APPID, SMI, F, 1, S ), |
336 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, | 330 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, |
@@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { | |||
374 | CMD( MFX_WAIT, SMFX, F, 1, S ), | 368 | CMD( MFX_WAIT, SMFX, F, 1, S ), |
375 | }; | 369 | }; |
376 | 370 | ||
377 | static const struct drm_i915_cmd_descriptor vecs_cmds[] = { | 371 | static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { |
378 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), | 372 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
379 | CMD( MI_SET_APPID, SMI, F, 1, S ), | 373 | CMD( MI_SET_APPID, SMI, F, 1, S ), |
380 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, | 374 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, |
@@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { | |||
412 | }}, ), | 406 | }}, ), |
413 | }; | 407 | }; |
414 | 408 | ||
415 | static const struct drm_i915_cmd_descriptor blt_cmds[] = { | 409 | static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { |
416 | CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), | 410 | CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), |
417 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, | 411 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, |
418 | .bits = {{ | 412 | .bits = {{ |
@@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { | |||
446 | }; | 440 | }; |
447 | 441 | ||
448 | static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { | 442 | static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { |
449 | CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), | 443 | CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), |
450 | CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), | 444 | CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), |
451 | }; | 445 | }; |
452 | 446 | ||
447 | /* | ||
448 | * For Gen9 we can still rely on the h/w to enforce cmd security, and only | ||
449 | * need to re-enforce the register access checks. We therefore only need to | ||
450 | * teach the cmdparser how to find the end of each command, and identify | ||
451 | * register accesses. The table doesn't need to reject any commands, and so | ||
452 | * the only commands listed here are: | ||
453 | * 1) Those that touch registers | ||
454 | * 2) Those that do not have the default 8-bit length | ||
455 | * | ||
456 | * Note that the default MI length mask chosen for this table is 0xFF, not | ||
457 | * the 0x3F used on older devices. This is because the vast majority of MI | ||
458 | * cmds on Gen9 use a standard 8-bit Length field. | ||
459 | * All the Gen9 blitter instructions are standard 0xFF length mask, and | ||
460 | * none allow access to non-general registers, so in fact no BLT cmds are | ||
461 | * included in the table at all. | ||
462 | * | ||
463 | */ | ||
464 | static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { | ||
465 | CMD( MI_NOOP, SMI, F, 1, S ), | ||
466 | CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), | ||
467 | CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), | ||
468 | CMD( MI_FLUSH, SMI, F, 1, S ), | ||
469 | CMD( MI_ARB_CHECK, SMI, F, 1, S ), | ||
470 | CMD( MI_REPORT_HEAD, SMI, F, 1, S ), | ||
471 | CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), | ||
472 | CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), | ||
473 | CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), | ||
474 | CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), | ||
475 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), | ||
476 | CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, | ||
477 | .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), | ||
478 | CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), | ||
479 | CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, | ||
480 | .reg = { .offset = 1, .mask = 0x007FFFFC } ), | ||
481 | CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), | ||
482 | CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, | ||
483 | .reg = { .offset = 1, .mask = 0x007FFFFC } ), | ||
484 | CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, | ||
485 | .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), | ||
486 | |||
487 | /* | ||
488 | * We allow BB_START but apply further checks. We just sanitize the | ||
489 | * basic fields here. | ||
490 | */ | ||
491 | #define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) | ||
492 | #define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) | ||
493 | CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, | ||
494 | .bits = {{ | ||
495 | .offset = 0, | ||
496 | .mask = MI_BB_START_OPERAND_MASK, | ||
497 | .expected = MI_BB_START_OPERAND_EXPECT, | ||
498 | }}, ), | ||
499 | }; | ||
500 | |||
453 | static const struct drm_i915_cmd_descriptor noop_desc = | 501 | static const struct drm_i915_cmd_descriptor noop_desc = |
454 | CMD(MI_NOOP, SMI, F, 1, S); | 502 | CMD(MI_NOOP, SMI, F, 1, S); |
455 | 503 | ||
@@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc = | |||
463 | #undef R | 511 | #undef R |
464 | #undef W | 512 | #undef W |
465 | #undef B | 513 | #undef B |
466 | #undef M | ||
467 | 514 | ||
468 | static const struct drm_i915_cmd_table gen7_render_cmds[] = { | 515 | static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { |
469 | { common_cmds, ARRAY_SIZE(common_cmds) }, | 516 | { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, |
470 | { render_cmds, ARRAY_SIZE(render_cmds) }, | 517 | { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, |
471 | }; | 518 | }; |
472 | 519 | ||
473 | static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { | 520 | static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { |
474 | { common_cmds, ARRAY_SIZE(common_cmds) }, | 521 | { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, |
475 | { render_cmds, ARRAY_SIZE(render_cmds) }, | 522 | { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, |
476 | { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, | 523 | { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, |
477 | }; | 524 | }; |
478 | 525 | ||
479 | static const struct drm_i915_cmd_table gen7_video_cmds[] = { | 526 | static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { |
480 | { common_cmds, ARRAY_SIZE(common_cmds) }, | 527 | { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, |
481 | { video_cmds, ARRAY_SIZE(video_cmds) }, | 528 | { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, |
482 | }; | 529 | }; |
483 | 530 | ||
484 | static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { | 531 | static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { |
485 | { common_cmds, ARRAY_SIZE(common_cmds) }, | 532 | { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, |
486 | { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, | 533 | { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, |
487 | }; | 534 | }; |
488 | 535 | ||
489 | static const struct drm_i915_cmd_table gen7_blt_cmds[] = { | 536 | static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { |
490 | { common_cmds, ARRAY_SIZE(common_cmds) }, | 537 | { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, |
491 | { blt_cmds, ARRAY_SIZE(blt_cmds) }, | 538 | { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, |
492 | }; | 539 | }; |
493 | 540 | ||
494 | static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { | 541 | static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { |
495 | { common_cmds, ARRAY_SIZE(common_cmds) }, | 542 | { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, |
496 | { blt_cmds, ARRAY_SIZE(blt_cmds) }, | 543 | { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, |
497 | { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, | 544 | { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, |
498 | }; | 545 | }; |
499 | 546 | ||
547 | static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { | ||
548 | { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, | ||
549 | }; | ||
550 | |||
551 | |||
500 | /* | 552 | /* |
501 | * Register whitelists, sorted by increasing register offset. | 553 | * Register whitelists, sorted by increasing register offset. |
502 | */ | 554 | */ |
@@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { | |||
612 | REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), | 664 | REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), |
613 | }; | 665 | }; |
614 | 666 | ||
615 | static const struct drm_i915_reg_descriptor ivb_master_regs[] = { | 667 | static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { |
616 | REG32(FORCEWAKE_MT), | 668 | REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), |
617 | REG32(DERRMR), | 669 | REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), |
618 | REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), | 670 | REG32(BCS_SWCTRL), |
619 | REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), | 671 | REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), |
620 | REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), | 672 | REG64_IDX(BCS_GPR, 0), |
621 | }; | 673 | REG64_IDX(BCS_GPR, 1), |
622 | 674 | REG64_IDX(BCS_GPR, 2), | |
623 | static const struct drm_i915_reg_descriptor hsw_master_regs[] = { | 675 | REG64_IDX(BCS_GPR, 3), |
624 | REG32(FORCEWAKE_MT), | 676 | REG64_IDX(BCS_GPR, 4), |
625 | REG32(DERRMR), | 677 | REG64_IDX(BCS_GPR, 5), |
678 | REG64_IDX(BCS_GPR, 6), | ||
679 | REG64_IDX(BCS_GPR, 7), | ||
680 | REG64_IDX(BCS_GPR, 8), | ||
681 | REG64_IDX(BCS_GPR, 9), | ||
682 | REG64_IDX(BCS_GPR, 10), | ||
683 | REG64_IDX(BCS_GPR, 11), | ||
684 | REG64_IDX(BCS_GPR, 12), | ||
685 | REG64_IDX(BCS_GPR, 13), | ||
686 | REG64_IDX(BCS_GPR, 14), | ||
687 | REG64_IDX(BCS_GPR, 15), | ||
626 | }; | 688 | }; |
627 | 689 | ||
628 | #undef REG64 | 690 | #undef REG64 |
@@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = { | |||
631 | struct drm_i915_reg_table { | 693 | struct drm_i915_reg_table { |
632 | const struct drm_i915_reg_descriptor *regs; | 694 | const struct drm_i915_reg_descriptor *regs; |
633 | int num_regs; | 695 | int num_regs; |
634 | bool master; | ||
635 | }; | 696 | }; |
636 | 697 | ||
637 | static const struct drm_i915_reg_table ivb_render_reg_tables[] = { | 698 | static const struct drm_i915_reg_table ivb_render_reg_tables[] = { |
638 | { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, | 699 | { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, |
639 | { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, | ||
640 | }; | 700 | }; |
641 | 701 | ||
642 | static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { | 702 | static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { |
643 | { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, | 703 | { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, |
644 | { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, | ||
645 | }; | 704 | }; |
646 | 705 | ||
647 | static const struct drm_i915_reg_table hsw_render_reg_tables[] = { | 706 | static const struct drm_i915_reg_table hsw_render_reg_tables[] = { |
648 | { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, | 707 | { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, |
649 | { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, | 708 | { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, |
650 | { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, | ||
651 | }; | 709 | }; |
652 | 710 | ||
653 | static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { | 711 | static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { |
654 | { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, | 712 | { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, |
655 | { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, | 713 | }; |
714 | |||
715 | static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { | ||
716 | { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, | ||
656 | }; | 717 | }; |
657 | 718 | ||
658 | static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) | 719 | static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) |
@@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) | |||
710 | return 0; | 771 | return 0; |
711 | } | 772 | } |
712 | 773 | ||
774 | static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) | ||
775 | { | ||
776 | u32 client = cmd_header >> INSTR_CLIENT_SHIFT; | ||
777 | |||
778 | if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) | ||
779 | return 0xFF; | ||
780 | |||
781 | DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); | ||
782 | return 0; | ||
783 | } | ||
784 | |||
713 | static bool validate_cmds_sorted(const struct intel_engine_cs *engine, | 785 | static bool validate_cmds_sorted(const struct intel_engine_cs *engine, |
714 | const struct drm_i915_cmd_table *cmd_tables, | 786 | const struct drm_i915_cmd_table *cmd_tables, |
715 | int cmd_table_count) | 787 | int cmd_table_count) |
@@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) | |||
867 | int cmd_table_count; | 939 | int cmd_table_count; |
868 | int ret; | 940 | int ret; |
869 | 941 | ||
870 | if (!IS_GEN(engine->i915, 7)) | 942 | if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && |
943 | engine->class == COPY_ENGINE_CLASS)) | ||
871 | return; | 944 | return; |
872 | 945 | ||
873 | switch (engine->class) { | 946 | switch (engine->class) { |
874 | case RENDER_CLASS: | 947 | case RENDER_CLASS: |
875 | if (IS_HASWELL(engine->i915)) { | 948 | if (IS_HASWELL(engine->i915)) { |
876 | cmd_tables = hsw_render_ring_cmds; | 949 | cmd_tables = hsw_render_ring_cmd_table; |
877 | cmd_table_count = | 950 | cmd_table_count = |
878 | ARRAY_SIZE(hsw_render_ring_cmds); | 951 | ARRAY_SIZE(hsw_render_ring_cmd_table); |
879 | } else { | 952 | } else { |
880 | cmd_tables = gen7_render_cmds; | 953 | cmd_tables = gen7_render_cmd_table; |
881 | cmd_table_count = ARRAY_SIZE(gen7_render_cmds); | 954 | cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); |
882 | } | 955 | } |
883 | 956 | ||
884 | if (IS_HASWELL(engine->i915)) { | 957 | if (IS_HASWELL(engine->i915)) { |
@@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) | |||
888 | engine->reg_tables = ivb_render_reg_tables; | 961 | engine->reg_tables = ivb_render_reg_tables; |
889 | engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); | 962 | engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); |
890 | } | 963 | } |
891 | |||
892 | engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; | 964 | engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; |
893 | break; | 965 | break; |
894 | case VIDEO_DECODE_CLASS: | 966 | case VIDEO_DECODE_CLASS: |
895 | cmd_tables = gen7_video_cmds; | 967 | cmd_tables = gen7_video_cmd_table; |
896 | cmd_table_count = ARRAY_SIZE(gen7_video_cmds); | 968 | cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); |
897 | engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; | 969 | engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; |
898 | break; | 970 | break; |
899 | case COPY_ENGINE_CLASS: | 971 | case COPY_ENGINE_CLASS: |
900 | if (IS_HASWELL(engine->i915)) { | 972 | engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; |
901 | cmd_tables = hsw_blt_ring_cmds; | 973 | if (IS_GEN(engine->i915, 9)) { |
902 | cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); | 974 | cmd_tables = gen9_blt_cmd_table; |
975 | cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); | ||
976 | engine->get_cmd_length_mask = | ||
977 | gen9_blt_get_cmd_length_mask; | ||
978 | |||
979 | /* BCS Engine unsafe without parser */ | ||
980 | engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; | ||
981 | } else if (IS_HASWELL(engine->i915)) { | ||
982 | cmd_tables = hsw_blt_ring_cmd_table; | ||
983 | cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); | ||
903 | } else { | 984 | } else { |
904 | cmd_tables = gen7_blt_cmds; | 985 | cmd_tables = gen7_blt_cmd_table; |
905 | cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); | 986 | cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); |
906 | } | 987 | } |
907 | 988 | ||
908 | if (IS_HASWELL(engine->i915)) { | 989 | if (IS_GEN(engine->i915, 9)) { |
990 | engine->reg_tables = gen9_blt_reg_tables; | ||
991 | engine->reg_table_count = | ||
992 | ARRAY_SIZE(gen9_blt_reg_tables); | ||
993 | } else if (IS_HASWELL(engine->i915)) { | ||
909 | engine->reg_tables = hsw_blt_reg_tables; | 994 | engine->reg_tables = hsw_blt_reg_tables; |
910 | engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); | 995 | engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); |
911 | } else { | 996 | } else { |
912 | engine->reg_tables = ivb_blt_reg_tables; | 997 | engine->reg_tables = ivb_blt_reg_tables; |
913 | engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); | 998 | engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); |
914 | } | 999 | } |
915 | |||
916 | engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; | ||
917 | break; | 1000 | break; |
918 | case VIDEO_ENHANCEMENT_CLASS: | 1001 | case VIDEO_ENHANCEMENT_CLASS: |
919 | cmd_tables = hsw_vebox_cmds; | 1002 | cmd_tables = hsw_vebox_cmd_table; |
920 | cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); | 1003 | cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); |
921 | /* VECS can use the same length_mask function as VCS */ | 1004 | /* VECS can use the same length_mask function as VCS */ |
922 | engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; | 1005 | engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; |
923 | break; | 1006 | break; |
@@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) | |||
943 | return; | 1026 | return; |
944 | } | 1027 | } |
945 | 1028 | ||
946 | engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; | 1029 | engine->flags |= I915_ENGINE_USING_CMD_PARSER; |
947 | } | 1030 | } |
948 | 1031 | ||
949 | /** | 1032 | /** |
@@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) | |||
955 | */ | 1038 | */ |
956 | void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) | 1039 | void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) |
957 | { | 1040 | { |
958 | if (!intel_engine_needs_cmd_parser(engine)) | 1041 | if (!intel_engine_using_cmd_parser(engine)) |
959 | return; | 1042 | return; |
960 | 1043 | ||
961 | fini_hash_table(engine); | 1044 | fini_hash_table(engine); |
@@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) | |||
1029 | } | 1112 | } |
1030 | 1113 | ||
1031 | static const struct drm_i915_reg_descriptor * | 1114 | static const struct drm_i915_reg_descriptor * |
1032 | find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) | 1115 | find_reg(const struct intel_engine_cs *engine, u32 addr) |
1033 | { | 1116 | { |
1034 | const struct drm_i915_reg_table *table = engine->reg_tables; | 1117 | const struct drm_i915_reg_table *table = engine->reg_tables; |
1118 | const struct drm_i915_reg_descriptor *reg = NULL; | ||
1035 | int count = engine->reg_table_count; | 1119 | int count = engine->reg_table_count; |
1036 | 1120 | ||
1037 | for (; count > 0; ++table, --count) { | 1121 | for (; !reg && (count > 0); ++table, --count) |
1038 | if (!table->master || is_master) { | 1122 | reg = __find_reg(table->regs, table->num_regs, addr); |
1039 | const struct drm_i915_reg_descriptor *reg; | ||
1040 | 1123 | ||
1041 | reg = __find_reg(table->regs, table->num_regs, addr); | 1124 | return reg; |
1042 | if (reg != NULL) | ||
1043 | return reg; | ||
1044 | } | ||
1045 | } | ||
1046 | |||
1047 | return NULL; | ||
1048 | } | 1125 | } |
1049 | 1126 | ||
1050 | /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ | 1127 | /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ |
@@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, | |||
1128 | 1205 | ||
1129 | static bool check_cmd(const struct intel_engine_cs *engine, | 1206 | static bool check_cmd(const struct intel_engine_cs *engine, |
1130 | const struct drm_i915_cmd_descriptor *desc, | 1207 | const struct drm_i915_cmd_descriptor *desc, |
1131 | const u32 *cmd, u32 length, | 1208 | const u32 *cmd, u32 length) |
1132 | const bool is_master) | ||
1133 | { | 1209 | { |
1134 | if (desc->flags & CMD_DESC_SKIP) | 1210 | if (desc->flags & CMD_DESC_SKIP) |
1135 | return true; | 1211 | return true; |
@@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine, | |||
1139 | return false; | 1215 | return false; |
1140 | } | 1216 | } |
1141 | 1217 | ||
1142 | if ((desc->flags & CMD_DESC_MASTER) && !is_master) { | ||
1143 | DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", | ||
1144 | *cmd); | ||
1145 | return false; | ||
1146 | } | ||
1147 | |||
1148 | if (desc->flags & CMD_DESC_REGISTER) { | 1218 | if (desc->flags & CMD_DESC_REGISTER) { |
1149 | /* | 1219 | /* |
1150 | * Get the distance between individual register offset | 1220 | * Get the distance between individual register offset |
@@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, | |||
1158 | offset += step) { | 1228 | offset += step) { |
1159 | const u32 reg_addr = cmd[offset] & desc->reg.mask; | 1229 | const u32 reg_addr = cmd[offset] & desc->reg.mask; |
1160 | const struct drm_i915_reg_descriptor *reg = | 1230 | const struct drm_i915_reg_descriptor *reg = |
1161 | find_reg(engine, is_master, reg_addr); | 1231 | find_reg(engine, reg_addr); |
1162 | 1232 | ||
1163 | if (!reg) { | 1233 | if (!reg) { |
1164 | DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", | 1234 | DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", |
@@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine, | |||
1236 | return true; | 1306 | return true; |
1237 | } | 1307 | } |
1238 | 1308 | ||
1309 | static int check_bbstart(const struct i915_gem_context *ctx, | ||
1310 | u32 *cmd, u32 offset, u32 length, | ||
1311 | u32 batch_len, | ||
1312 | u64 batch_start, | ||
1313 | u64 shadow_batch_start) | ||
1314 | { | ||
1315 | u64 jump_offset, jump_target; | ||
1316 | u32 target_cmd_offset, target_cmd_index; | ||
1317 | |||
1318 | /* For igt compatibility on older platforms */ | ||
1319 | if (CMDPARSER_USES_GGTT(ctx->i915)) { | ||
1320 | DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); | ||
1321 | return -EACCES; | ||
1322 | } | ||
1323 | |||
1324 | if (length != 3) { | ||
1325 | DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", | ||
1326 | length); | ||
1327 | return -EINVAL; | ||
1328 | } | ||
1329 | |||
1330 | jump_target = *(u64*)(cmd+1); | ||
1331 | jump_offset = jump_target - batch_start; | ||
1332 | |||
1333 | /* | ||
1334 | * Any underflow of jump_target is guaranteed to be outside the range | ||
1335 | * of a u32, so >= test catches both too large and too small | ||
1336 | */ | ||
1337 | if (jump_offset >= batch_len) { | ||
1338 | DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", | ||
1339 | jump_target); | ||
1340 | return -EINVAL; | ||
1341 | } | ||
1342 | |||
1343 | /* | ||
1344 | * This cannot overflow a u32 because we already checked jump_offset | ||
1345 | * is within the BB, and the batch_len is a u32 | ||
1346 | */ | ||
1347 | target_cmd_offset = lower_32_bits(jump_offset); | ||
1348 | target_cmd_index = target_cmd_offset / sizeof(u32); | ||
1349 | |||
1350 | *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; | ||
1351 | |||
1352 | if (target_cmd_index == offset) | ||
1353 | return 0; | ||
1354 | |||
1355 | if (ctx->jump_whitelist_cmds <= target_cmd_index) { | ||
1356 | DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); | ||
1357 | return -EINVAL; | ||
1358 | } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { | ||
1359 | DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", | ||
1360 | jump_target); | ||
1361 | return -EINVAL; | ||
1362 | } | ||
1363 | |||
1364 | return 0; | ||
1365 | } | ||
1366 | |||
1367 | static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) | ||
1368 | { | ||
1369 | const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); | ||
1370 | const u32 exact_size = BITS_TO_LONGS(batch_cmds); | ||
1371 | u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); | ||
1372 | unsigned long *next_whitelist; | ||
1373 | |||
1374 | if (CMDPARSER_USES_GGTT(ctx->i915)) | ||
1375 | return; | ||
1376 | |||
1377 | if (batch_cmds <= ctx->jump_whitelist_cmds) { | ||
1378 | bitmap_zero(ctx->jump_whitelist, batch_cmds); | ||
1379 | return; | ||
1380 | } | ||
1381 | |||
1382 | again: | ||
1383 | next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); | ||
1384 | if (next_whitelist) { | ||
1385 | kfree(ctx->jump_whitelist); | ||
1386 | ctx->jump_whitelist = next_whitelist; | ||
1387 | ctx->jump_whitelist_cmds = | ||
1388 | next_size * BITS_PER_BYTE * sizeof(long); | ||
1389 | return; | ||
1390 | } | ||
1391 | |||
1392 | if (next_size > exact_size) { | ||
1393 | next_size = exact_size; | ||
1394 | goto again; | ||
1395 | } | ||
1396 | |||
1397 | DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); | ||
1398 | bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); | ||
1399 | |||
1400 | return; | ||
1401 | } | ||
1402 | |||
1239 | #define LENGTH_BIAS 2 | 1403 | #define LENGTH_BIAS 2 |
1240 | 1404 | ||
1241 | /** | 1405 | /** |
1242 | * i915_parse_cmds() - parse a submitted batch buffer for privilege violations | 1406 | * i915_parse_cmds() - parse a submitted batch buffer for privilege violations |
1407 | * @ctx: the context in which the batch is to execute | ||
1243 | * @engine: the engine on which the batch is to execute | 1408 | * @engine: the engine on which the batch is to execute |
1244 | * @batch_obj: the batch buffer in question | 1409 | * @batch_obj: the batch buffer in question |
1245 | * @shadow_batch_obj: copy of the batch buffer in question | 1410 | * @batch_start: Canonical base address of batch |
1246 | * @batch_start_offset: byte offset in the batch at which execution starts | 1411 | * @batch_start_offset: byte offset in the batch at which execution starts |
1247 | * @batch_len: length of the commands in batch_obj | 1412 | * @batch_len: length of the commands in batch_obj |
1248 | * @is_master: is the submitting process the drm master? | 1413 | * @shadow_batch_obj: copy of the batch buffer in question |
1414 | * @shadow_batch_start: Canonical base address of shadow_batch_obj | ||
1249 | * | 1415 | * |
1250 | * Parses the specified batch buffer looking for privilege violations as | 1416 | * Parses the specified batch buffer looking for privilege violations as |
1251 | * described in the overview. | 1417 | * described in the overview. |
@@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine, | |||
1253 | * Return: non-zero if the parser finds violations or otherwise fails; -EACCES | 1419 | * Return: non-zero if the parser finds violations or otherwise fails; -EACCES |
1254 | * if the batch appears legal but should use hardware parsing | 1420 | * if the batch appears legal but should use hardware parsing |
1255 | */ | 1421 | */ |
1256 | int intel_engine_cmd_parser(struct intel_engine_cs *engine, | 1422 | |
1423 | int intel_engine_cmd_parser(struct i915_gem_context *ctx, | ||
1424 | struct intel_engine_cs *engine, | ||
1257 | struct drm_i915_gem_object *batch_obj, | 1425 | struct drm_i915_gem_object *batch_obj, |
1258 | struct drm_i915_gem_object *shadow_batch_obj, | 1426 | u64 batch_start, |
1259 | u32 batch_start_offset, | 1427 | u32 batch_start_offset, |
1260 | u32 batch_len, | 1428 | u32 batch_len, |
1261 | bool is_master) | 1429 | struct drm_i915_gem_object *shadow_batch_obj, |
1430 | u64 shadow_batch_start) | ||
1262 | { | 1431 | { |
1263 | u32 *cmd, *batch_end; | 1432 | u32 *cmd, *batch_end, offset = 0; |
1264 | struct drm_i915_cmd_descriptor default_desc = noop_desc; | 1433 | struct drm_i915_cmd_descriptor default_desc = noop_desc; |
1265 | const struct drm_i915_cmd_descriptor *desc = &default_desc; | 1434 | const struct drm_i915_cmd_descriptor *desc = &default_desc; |
1266 | bool needs_clflush_after = false; | 1435 | bool needs_clflush_after = false; |
@@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, | |||
1274 | return PTR_ERR(cmd); | 1443 | return PTR_ERR(cmd); |
1275 | } | 1444 | } |
1276 | 1445 | ||
1446 | init_whitelist(ctx, batch_len); | ||
1447 | |||
1277 | /* | 1448 | /* |
1278 | * We use the batch length as size because the shadow object is as | 1449 | * We use the batch length as size because the shadow object is as |
1279 | * large or larger and copy_batch() will write MI_NOPs to the extra | 1450 | * large or larger and copy_batch() will write MI_NOPs to the extra |
@@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, | |||
1283 | do { | 1454 | do { |
1284 | u32 length; | 1455 | u32 length; |
1285 | 1456 | ||
1286 | if (*cmd == MI_BATCH_BUFFER_END) { | 1457 | if (*cmd == MI_BATCH_BUFFER_END) |
1287 | if (needs_clflush_after) { | ||
1288 | void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); | ||
1289 | drm_clflush_virt_range(ptr, | ||
1290 | (void *)(cmd + 1) - ptr); | ||
1291 | } | ||
1292 | break; | 1458 | break; |
1293 | } | ||
1294 | 1459 | ||
1295 | desc = find_cmd(engine, *cmd, desc, &default_desc); | 1460 | desc = find_cmd(engine, *cmd, desc, &default_desc); |
1296 | if (!desc) { | 1461 | if (!desc) { |
1297 | DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", | 1462 | DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", |
1298 | *cmd); | 1463 | *cmd); |
1299 | ret = -EINVAL; | 1464 | ret = -EINVAL; |
1300 | break; | 1465 | goto err; |
1301 | } | ||
1302 | |||
1303 | /* | ||
1304 | * If the batch buffer contains a chained batch, return an | ||
1305 | * error that tells the caller to abort and dispatch the | ||
1306 | * workload as a non-secure batch. | ||
1307 | */ | ||
1308 | if (desc->cmd.value == MI_BATCH_BUFFER_START) { | ||
1309 | ret = -EACCES; | ||
1310 | break; | ||
1311 | } | 1466 | } |
1312 | 1467 | ||
1313 | if (desc->flags & CMD_DESC_FIXED) | 1468 | if (desc->flags & CMD_DESC_FIXED) |
@@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, | |||
1321 | length, | 1476 | length, |
1322 | batch_end - cmd); | 1477 | batch_end - cmd); |
1323 | ret = -EINVAL; | 1478 | ret = -EINVAL; |
1324 | break; | 1479 | goto err; |
1325 | } | 1480 | } |
1326 | 1481 | ||
1327 | if (!check_cmd(engine, desc, cmd, length, is_master)) { | 1482 | if (!check_cmd(engine, desc, cmd, length)) { |
1328 | ret = -EACCES; | 1483 | ret = -EACCES; |
1484 | goto err; | ||
1485 | } | ||
1486 | |||
1487 | if (desc->cmd.value == MI_BATCH_BUFFER_START) { | ||
1488 | ret = check_bbstart(ctx, cmd, offset, length, | ||
1489 | batch_len, batch_start, | ||
1490 | shadow_batch_start); | ||
1491 | |||
1492 | if (ret) | ||
1493 | goto err; | ||
1329 | break; | 1494 | break; |
1330 | } | 1495 | } |
1331 | 1496 | ||
1497 | if (ctx->jump_whitelist_cmds > offset) | ||
1498 | set_bit(offset, ctx->jump_whitelist); | ||
1499 | |||
1332 | cmd += length; | 1500 | cmd += length; |
1501 | offset += length; | ||
1333 | if (cmd >= batch_end) { | 1502 | if (cmd >= batch_end) { |
1334 | DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); | 1503 | DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); |
1335 | ret = -EINVAL; | 1504 | ret = -EINVAL; |
1336 | break; | 1505 | goto err; |
1337 | } | 1506 | } |
1338 | } while (1); | 1507 | } while (1); |
1339 | 1508 | ||
1509 | if (needs_clflush_after) { | ||
1510 | void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); | ||
1511 | |||
1512 | drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); | ||
1513 | } | ||
1514 | |||
1515 | err: | ||
1340 | i915_gem_object_unpin_map(shadow_batch_obj); | 1516 | i915_gem_object_unpin_map(shadow_batch_obj); |
1341 | return ret; | 1517 | return ret; |
1342 | } | 1518 | } |
@@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) | |||
1357 | 1533 | ||
1358 | /* If the command parser is not enabled, report 0 - unsupported */ | 1534 | /* If the command parser is not enabled, report 0 - unsupported */ |
1359 | for_each_uabi_engine(engine, dev_priv) { | 1535 | for_each_uabi_engine(engine, dev_priv) { |
1360 | if (intel_engine_needs_cmd_parser(engine)) { | 1536 | if (intel_engine_using_cmd_parser(engine)) { |
1361 | active = true; | 1537 | active = true; |
1362 | break; | 1538 | break; |
1363 | } | 1539 | } |
@@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) | |||
1382 | * the parser enabled. | 1558 | * the parser enabled. |
1383 | * 9. Don't whitelist or handle oacontrol specially, as ownership | 1559 | * 9. Don't whitelist or handle oacontrol specially, as ownership |
1384 | * for oacontrol state is moving to i915-perf. | 1560 | * for oacontrol state is moving to i915-perf. |
1561 | * 10. Support for Gen9 BCS Parsing | ||
1385 | */ | 1562 | */ |
1386 | return 9; | 1563 | return 10; |
1387 | } | 1564 | } |
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index bb6f86c7067a..fe4d7cabfdf1 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c | |||
@@ -1850,6 +1850,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) | |||
1850 | 1850 | ||
1851 | i915_gem_suspend_late(dev_priv); | 1851 | i915_gem_suspend_late(dev_priv); |
1852 | 1852 | ||
1853 | i915_rc6_ctx_wa_suspend(dev_priv); | ||
1854 | |||
1853 | intel_uncore_suspend(&dev_priv->uncore); | 1855 | intel_uncore_suspend(&dev_priv->uncore); |
1854 | 1856 | ||
1855 | intel_power_domains_suspend(dev_priv, | 1857 | intel_power_domains_suspend(dev_priv, |
@@ -2053,6 +2055,8 @@ static int i915_drm_resume_early(struct drm_device *dev) | |||
2053 | 2055 | ||
2054 | intel_power_domains_resume(dev_priv); | 2056 | intel_power_domains_resume(dev_priv); |
2055 | 2057 | ||
2058 | i915_rc6_ctx_wa_resume(dev_priv); | ||
2059 | |||
2056 | intel_gt_sanitize(&dev_priv->gt, true); | 2060 | intel_gt_sanitize(&dev_priv->gt, true); |
2057 | 2061 | ||
2058 | enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); | 2062 | enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); |
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 953e1d12c23c..89b6112bd66b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -593,6 +593,8 @@ struct intel_rps { | |||
593 | 593 | ||
594 | struct intel_rc6 { | 594 | struct intel_rc6 { |
595 | bool enabled; | 595 | bool enabled; |
596 | bool ctx_corrupted; | ||
597 | intel_wakeref_t ctx_corrupted_wakeref; | ||
596 | u64 prev_hw_residency[4]; | 598 | u64 prev_hw_residency[4]; |
597 | u64 cur_residency[4]; | 599 | u64 cur_residency[4]; |
598 | }; | 600 | }; |
@@ -2075,9 +2077,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, | |||
2075 | #define VEBOX_MASK(dev_priv) \ | 2077 | #define VEBOX_MASK(dev_priv) \ |
2076 | ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS) | 2078 | ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS) |
2077 | 2079 | ||
2080 | /* | ||
2081 | * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution | ||
2082 | * All later gens can run the final buffer from the ppgtt | ||
2083 | */ | ||
2084 | #define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7) | ||
2085 | |||
2078 | #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) | 2086 | #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) |
2079 | #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) | 2087 | #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) |
2080 | #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) | 2088 | #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) |
2089 | #define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) | ||
2081 | #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ | 2090 | #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ |
2082 | IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) | 2091 | IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) |
2083 | 2092 | ||
@@ -2110,10 +2119,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, | |||
2110 | /* Early gen2 have a totally busted CS tlb and require pinned batches. */ | 2119 | /* Early gen2 have a totally busted CS tlb and require pinned batches. */ |
2111 | #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) | 2120 | #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) |
2112 | 2121 | ||
2122 | #define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ | ||
2123 | (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9)) | ||
2124 | |||
2113 | /* WaRsDisableCoarsePowerGating:skl,cnl */ | 2125 | /* WaRsDisableCoarsePowerGating:skl,cnl */ |
2114 | #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ | 2126 | #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ |
2115 | (IS_CANNONLAKE(dev_priv) || \ | 2127 | (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9)) |
2116 | IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) | ||
2117 | 2128 | ||
2118 | #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) | 2129 | #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) |
2119 | #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ | 2130 | #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ |
@@ -2284,6 +2295,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, | |||
2284 | unsigned long flags); | 2295 | unsigned long flags); |
2285 | #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) | 2296 | #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) |
2286 | 2297 | ||
2298 | struct i915_vma * __must_check | ||
2299 | i915_gem_object_pin(struct drm_i915_gem_object *obj, | ||
2300 | struct i915_address_space *vm, | ||
2301 | const struct i915_ggtt_view *view, | ||
2302 | u64 size, | ||
2303 | u64 alignment, | ||
2304 | u64 flags); | ||
2305 | |||
2287 | void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); | 2306 | void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); |
2288 | 2307 | ||
2289 | static inline int __must_check | 2308 | static inline int __must_check |
@@ -2393,12 +2412,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); | |||
2393 | int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); | 2412 | int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); |
2394 | void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); | 2413 | void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); |
2395 | void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); | 2414 | void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); |
2396 | int intel_engine_cmd_parser(struct intel_engine_cs *engine, | 2415 | int intel_engine_cmd_parser(struct i915_gem_context *cxt, |
2416 | struct intel_engine_cs *engine, | ||
2397 | struct drm_i915_gem_object *batch_obj, | 2417 | struct drm_i915_gem_object *batch_obj, |
2398 | struct drm_i915_gem_object *shadow_batch_obj, | 2418 | u64 user_batch_start, |
2399 | u32 batch_start_offset, | 2419 | u32 batch_start_offset, |
2400 | u32 batch_len, | 2420 | u32 batch_len, |
2401 | bool is_master); | 2421 | struct drm_i915_gem_object *shadow_batch_obj, |
2422 | u64 shadow_batch_start); | ||
2402 | 2423 | ||
2403 | /* intel_device_info.c */ | 2424 | /* intel_device_info.c */ |
2404 | static inline struct intel_device_info * | 2425 | static inline struct intel_device_info * |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d0f94f239919..98305d987ac1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -964,6 +964,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, | |||
964 | { | 964 | { |
965 | struct drm_i915_private *dev_priv = to_i915(obj->base.dev); | 965 | struct drm_i915_private *dev_priv = to_i915(obj->base.dev); |
966 | struct i915_address_space *vm = &dev_priv->ggtt.vm; | 966 | struct i915_address_space *vm = &dev_priv->ggtt.vm; |
967 | |||
968 | return i915_gem_object_pin(obj, vm, view, size, alignment, | ||
969 | flags | PIN_GLOBAL); | ||
970 | } | ||
971 | |||
972 | struct i915_vma * | ||
973 | i915_gem_object_pin(struct drm_i915_gem_object *obj, | ||
974 | struct i915_address_space *vm, | ||
975 | const struct i915_ggtt_view *view, | ||
976 | u64 size, | ||
977 | u64 alignment, | ||
978 | u64 flags) | ||
979 | { | ||
980 | struct drm_i915_private *dev_priv = to_i915(obj->base.dev); | ||
967 | struct i915_vma *vma; | 981 | struct i915_vma *vma; |
968 | int ret; | 982 | int ret; |
969 | 983 | ||
@@ -1038,7 +1052,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, | |||
1038 | return ERR_PTR(ret); | 1052 | return ERR_PTR(ret); |
1039 | } | 1053 | } |
1040 | 1054 | ||
1041 | ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); | 1055 | ret = i915_vma_pin(vma, size, alignment, flags); |
1042 | if (ret) | 1056 | if (ret) |
1043 | return ERR_PTR(ret); | 1057 | return ERR_PTR(ret); |
1044 | 1058 | ||
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 5d9101376a3d..9f1517af5b7f 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c | |||
@@ -62,7 +62,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, | |||
62 | value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); | 62 | value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); |
63 | break; | 63 | break; |
64 | case I915_PARAM_HAS_SECURE_BATCHES: | 64 | case I915_PARAM_HAS_SECURE_BATCHES: |
65 | value = capable(CAP_SYS_ADMIN); | 65 | value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN); |
66 | break; | 66 | break; |
67 | case I915_PARAM_CMD_PARSER_VERSION: | 67 | case I915_PARAM_CMD_PARSER_VERSION: |
68 | value = i915_cmd_parser_get_version(i915); | 68 | value = i915_cmd_parser_get_version(i915); |
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2abd199093c5..f8ee9aba3955 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h | |||
@@ -471,6 +471,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) | |||
471 | #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) | 471 | #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) |
472 | #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) | 472 | #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) |
473 | 473 | ||
474 | #define GEN8_RC6_CTX_INFO _MMIO(0x8504) | ||
475 | |||
474 | #define GAC_ECO_BITS _MMIO(0x14090) | 476 | #define GAC_ECO_BITS _MMIO(0x14090) |
475 | #define ECOBITS_SNB_BIT (1 << 13) | 477 | #define ECOBITS_SNB_BIT (1 << 13) |
476 | #define ECOBITS_PPGTT_CACHE64B (3 << 8) | 478 | #define ECOBITS_PPGTT_CACHE64B (3 << 8) |
@@ -555,6 +557,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) | |||
555 | */ | 557 | */ |
556 | #define BCS_SWCTRL _MMIO(0x22200) | 558 | #define BCS_SWCTRL _MMIO(0x22200) |
557 | 559 | ||
560 | /* There are 16 GPR registers */ | ||
561 | #define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) | ||
562 | #define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) | ||
563 | |||
558 | #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) | 564 | #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) |
559 | #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) | 565 | #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) |
560 | #define HS_INVOCATION_COUNT _MMIO(0x2300) | 566 | #define HS_INVOCATION_COUNT _MMIO(0x2300) |
@@ -7211,6 +7217,10 @@ enum { | |||
7211 | #define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084) | 7217 | #define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084) |
7212 | #define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088) | 7218 | #define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088) |
7213 | 7219 | ||
7220 | /* Display Internal Timeout Register */ | ||
7221 | #define RM_TIMEOUT _MMIO(0x42060) | ||
7222 | #define MMIO_TIMEOUT_US(us) ((us) << 0) | ||
7223 | |||
7214 | /* interrupts */ | 7224 | /* interrupts */ |
7215 | #define DE_MASTER_IRQ_CONTROL (1 << 31) | 7225 | #define DE_MASTER_IRQ_CONTROL (1 << 31) |
7216 | #define DE_SPRITEB_FLIP_DONE (1 << 29) | 7226 | #define DE_SPRITEB_FLIP_DONE (1 << 29) |
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 75ee027abb80..2efe1d12d5a9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c | |||
@@ -126,6 +126,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) | |||
126 | */ | 126 | */ |
127 | I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | | 127 | I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | |
128 | PWM1_GATING_DIS | PWM2_GATING_DIS); | 128 | PWM1_GATING_DIS | PWM2_GATING_DIS); |
129 | |||
130 | /* | ||
131 | * Lower the display internal timeout. | ||
132 | * This is needed to avoid any hard hangs when DSI port PLL | ||
133 | * is off and a MMIO access is attempted by any privilege | ||
134 | * application, using batch buffers or any other means. | ||
135 | */ | ||
136 | I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); | ||
129 | } | 137 | } |
130 | 138 | ||
131 | static void glk_init_clock_gating(struct drm_i915_private *dev_priv) | 139 | static void glk_init_clock_gating(struct drm_i915_private *dev_priv) |
@@ -8544,6 +8552,100 @@ static void intel_init_emon(struct drm_i915_private *dev_priv) | |||
8544 | dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); | 8552 | dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); |
8545 | } | 8553 | } |
8546 | 8554 | ||
8555 | static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) | ||
8556 | { | ||
8557 | return !I915_READ(GEN8_RC6_CTX_INFO); | ||
8558 | } | ||
8559 | |||
8560 | static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) | ||
8561 | { | ||
8562 | if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) | ||
8563 | return; | ||
8564 | |||
8565 | if (i915_rc6_ctx_corrupted(i915)) { | ||
8566 | DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); | ||
8567 | i915->gt_pm.rc6.ctx_corrupted = true; | ||
8568 | i915->gt_pm.rc6.ctx_corrupted_wakeref = | ||
8569 | intel_runtime_pm_get(&i915->runtime_pm); | ||
8570 | } | ||
8571 | } | ||
8572 | |||
8573 | static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) | ||
8574 | { | ||
8575 | if (i915->gt_pm.rc6.ctx_corrupted) { | ||
8576 | intel_runtime_pm_put(&i915->runtime_pm, | ||
8577 | i915->gt_pm.rc6.ctx_corrupted_wakeref); | ||
8578 | i915->gt_pm.rc6.ctx_corrupted = false; | ||
8579 | } | ||
8580 | } | ||
8581 | |||
8582 | /** | ||
8583 | * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA | ||
8584 | * @i915: i915 device | ||
8585 | * | ||
8586 | * Perform any steps needed to clean up the RC6 CTX WA before system suspend. | ||
8587 | */ | ||
8588 | void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) | ||
8589 | { | ||
8590 | if (i915->gt_pm.rc6.ctx_corrupted) | ||
8591 | intel_runtime_pm_put(&i915->runtime_pm, | ||
8592 | i915->gt_pm.rc6.ctx_corrupted_wakeref); | ||
8593 | } | ||
8594 | |||
8595 | /** | ||
8596 | * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA | ||
8597 | * @i915: i915 device | ||
8598 | * | ||
8599 | * Perform any steps needed to re-init the RC6 CTX WA after system resume. | ||
8600 | */ | ||
8601 | void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) | ||
8602 | { | ||
8603 | if (!i915->gt_pm.rc6.ctx_corrupted) | ||
8604 | return; | ||
8605 | |||
8606 | if (i915_rc6_ctx_corrupted(i915)) { | ||
8607 | i915->gt_pm.rc6.ctx_corrupted_wakeref = | ||
8608 | intel_runtime_pm_get(&i915->runtime_pm); | ||
8609 | return; | ||
8610 | } | ||
8611 | |||
8612 | DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); | ||
8613 | i915->gt_pm.rc6.ctx_corrupted = false; | ||
8614 | } | ||
8615 | |||
8616 | static void intel_disable_rc6(struct drm_i915_private *dev_priv); | ||
8617 | |||
8618 | /** | ||
8619 | * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption | ||
8620 | * @i915: i915 device | ||
8621 | * | ||
8622 | * Check if an RC6 CTX corruption has happened since the last check and if so | ||
8623 | * disable RC6 and runtime power management. | ||
8624 | * | ||
8625 | * Return false if no context corruption has happened since the last call of | ||
8626 | * this function, true otherwise. | ||
8627 | */ | ||
8628 | bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) | ||
8629 | { | ||
8630 | if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) | ||
8631 | return false; | ||
8632 | |||
8633 | if (i915->gt_pm.rc6.ctx_corrupted) | ||
8634 | return false; | ||
8635 | |||
8636 | if (!i915_rc6_ctx_corrupted(i915)) | ||
8637 | return false; | ||
8638 | |||
8639 | DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); | ||
8640 | |||
8641 | intel_disable_rc6(i915); | ||
8642 | i915->gt_pm.rc6.ctx_corrupted = true; | ||
8643 | i915->gt_pm.rc6.ctx_corrupted_wakeref = | ||
8644 | intel_runtime_pm_get_noresume(&i915->runtime_pm); | ||
8645 | |||
8646 | return true; | ||
8647 | } | ||
8648 | |||
8547 | void intel_init_gt_powersave(struct drm_i915_private *dev_priv) | 8649 | void intel_init_gt_powersave(struct drm_i915_private *dev_priv) |
8548 | { | 8650 | { |
8549 | struct intel_rps *rps = &dev_priv->gt_pm.rps; | 8651 | struct intel_rps *rps = &dev_priv->gt_pm.rps; |
@@ -8557,6 +8659,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) | |||
8557 | pm_runtime_get(&dev_priv->drm.pdev->dev); | 8659 | pm_runtime_get(&dev_priv->drm.pdev->dev); |
8558 | } | 8660 | } |
8559 | 8661 | ||
8662 | i915_rc6_ctx_wa_init(dev_priv); | ||
8663 | |||
8560 | /* Initialize RPS limits (for userspace) */ | 8664 | /* Initialize RPS limits (for userspace) */ |
8561 | if (IS_CHERRYVIEW(dev_priv)) | 8665 | if (IS_CHERRYVIEW(dev_priv)) |
8562 | cherryview_init_gt_powersave(dev_priv); | 8666 | cherryview_init_gt_powersave(dev_priv); |
@@ -8595,6 +8699,8 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) | |||
8595 | if (IS_VALLEYVIEW(dev_priv)) | 8699 | if (IS_VALLEYVIEW(dev_priv)) |
8596 | valleyview_cleanup_gt_powersave(dev_priv); | 8700 | valleyview_cleanup_gt_powersave(dev_priv); |
8597 | 8701 | ||
8702 | i915_rc6_ctx_wa_cleanup(dev_priv); | ||
8703 | |||
8598 | if (!HAS_RC6(dev_priv)) | 8704 | if (!HAS_RC6(dev_priv)) |
8599 | pm_runtime_put(&dev_priv->drm.pdev->dev); | 8705 | pm_runtime_put(&dev_priv->drm.pdev->dev); |
8600 | } | 8706 | } |
@@ -8623,7 +8729,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) | |||
8623 | i915->gt_pm.llc_pstate.enabled = false; | 8729 | i915->gt_pm.llc_pstate.enabled = false; |
8624 | } | 8730 | } |
8625 | 8731 | ||
8626 | static void intel_disable_rc6(struct drm_i915_private *dev_priv) | 8732 | static void __intel_disable_rc6(struct drm_i915_private *dev_priv) |
8627 | { | 8733 | { |
8628 | lockdep_assert_held(&dev_priv->gt_pm.rps.lock); | 8734 | lockdep_assert_held(&dev_priv->gt_pm.rps.lock); |
8629 | 8735 | ||
@@ -8642,6 +8748,15 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv) | |||
8642 | dev_priv->gt_pm.rc6.enabled = false; | 8748 | dev_priv->gt_pm.rc6.enabled = false; |
8643 | } | 8749 | } |
8644 | 8750 | ||
8751 | static void intel_disable_rc6(struct drm_i915_private *dev_priv) | ||
8752 | { | ||
8753 | struct intel_rps *rps = &dev_priv->gt_pm.rps; | ||
8754 | |||
8755 | mutex_lock(&rps->lock); | ||
8756 | __intel_disable_rc6(dev_priv); | ||
8757 | mutex_unlock(&rps->lock); | ||
8758 | } | ||
8759 | |||
8645 | static void intel_disable_rps(struct drm_i915_private *dev_priv) | 8760 | static void intel_disable_rps(struct drm_i915_private *dev_priv) |
8646 | { | 8761 | { |
8647 | lockdep_assert_held(&dev_priv->gt_pm.rps.lock); | 8762 | lockdep_assert_held(&dev_priv->gt_pm.rps.lock); |
@@ -8667,7 +8782,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) | |||
8667 | { | 8782 | { |
8668 | mutex_lock(&dev_priv->gt_pm.rps.lock); | 8783 | mutex_lock(&dev_priv->gt_pm.rps.lock); |
8669 | 8784 | ||
8670 | intel_disable_rc6(dev_priv); | 8785 | __intel_disable_rc6(dev_priv); |
8671 | intel_disable_rps(dev_priv); | 8786 | intel_disable_rps(dev_priv); |
8672 | if (HAS_LLC(dev_priv)) | 8787 | if (HAS_LLC(dev_priv)) |
8673 | intel_disable_llc_pstate(dev_priv); | 8788 | intel_disable_llc_pstate(dev_priv); |
@@ -8694,6 +8809,9 @@ static void intel_enable_rc6(struct drm_i915_private *dev_priv) | |||
8694 | if (dev_priv->gt_pm.rc6.enabled) | 8809 | if (dev_priv->gt_pm.rc6.enabled) |
8695 | return; | 8810 | return; |
8696 | 8811 | ||
8812 | if (dev_priv->gt_pm.rc6.ctx_corrupted) | ||
8813 | return; | ||
8814 | |||
8697 | if (IS_CHERRYVIEW(dev_priv)) | 8815 | if (IS_CHERRYVIEW(dev_priv)) |
8698 | cherryview_enable_rc6(dev_priv); | 8816 | cherryview_enable_rc6(dev_priv); |
8699 | else if (IS_VALLEYVIEW(dev_priv)) | 8817 | else if (IS_VALLEYVIEW(dev_priv)) |
diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index e3573e1e16e3..0f7390c850ec 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h | |||
@@ -36,6 +36,9 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); | |||
36 | void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); | 36 | void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); |
37 | void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); | 37 | void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); |
38 | void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); | 38 | void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); |
39 | bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915); | ||
40 | void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915); | ||
41 | void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915); | ||
39 | void gen6_rps_busy(struct drm_i915_private *dev_priv); | 42 | void gen6_rps_busy(struct drm_i915_private *dev_priv); |
40 | void gen6_rps_idle(struct drm_i915_private *dev_priv); | 43 | void gen6_rps_idle(struct drm_i915_private *dev_priv); |
41 | void gen6_rps_boost(struct i915_request *rq); | 44 | void gen6_rps_boost(struct i915_request *rq); |
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 6afad68e5ba2..238240984bc1 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c | |||
@@ -76,9 +76,11 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) | |||
76 | * ensures no active vp_list traversal while the vport is removed | 76 | * ensures no active vp_list traversal while the vport is removed |
77 | * from the queue) | 77 | * from the queue) |
78 | */ | 78 | */ |
79 | for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++) | 79 | for (i = 0; i < 10; i++) { |
80 | wait_event_timeout(vha->vref_waitq, | 80 | if (wait_event_timeout(vha->vref_waitq, |
81 | atomic_read(&vha->vref_count), HZ); | 81 | !atomic_read(&vha->vref_count), HZ) > 0) |
82 | break; | ||
83 | } | ||
82 | 84 | ||
83 | spin_lock_irqsave(&ha->vport_slock, flags); | 85 | spin_lock_irqsave(&ha->vport_slock, flags); |
84 | if (atomic_read(&vha->vref_count)) { | 86 | if (atomic_read(&vha->vref_count)) { |
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 337162ac3a77..726ad4cbf4a6 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c | |||
@@ -1119,9 +1119,11 @@ qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) | |||
1119 | 1119 | ||
1120 | qla2x00_mark_all_devices_lost(vha, 0); | 1120 | qla2x00_mark_all_devices_lost(vha, 0); |
1121 | 1121 | ||
1122 | for (i = 0; i < 10; i++) | 1122 | for (i = 0; i < 10; i++) { |
1123 | wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), | 1123 | if (wait_event_timeout(vha->fcport_waitQ, |
1124 | HZ); | 1124 | test_fcport_count(vha), HZ) > 0) |
1125 | break; | ||
1126 | } | ||
1125 | 1127 | ||
1126 | flush_workqueue(vha->hw->wq); | 1128 | flush_workqueue(vha->hw->wq); |
1127 | } | 1129 | } |
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 5447738906ac..91c007d26c1e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c | |||
@@ -1883,7 +1883,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) | |||
1883 | { | 1883 | { |
1884 | unsigned int cmd_size, sgl_size; | 1884 | unsigned int cmd_size, sgl_size; |
1885 | 1885 | ||
1886 | sgl_size = scsi_mq_inline_sgl_size(shost); | 1886 | sgl_size = max_t(unsigned int, sizeof(struct scatterlist), |
1887 | scsi_mq_inline_sgl_size(shost)); | ||
1887 | cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size; | 1888 | cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size; |
1888 | if (scsi_host_get_prot(shost)) | 1889 | if (scsi_host_get_prot(shost)) |
1889 | cmd_size += sizeof(struct scsi_data_buffer) + | 1890 | cmd_size += sizeof(struct scsi_data_buffer) + |
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index de4019dc0f0b..1efc69e194f8 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c | |||
@@ -263,25 +263,16 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, | |||
263 | int result = cmd->result; | 263 | int result = cmd->result; |
264 | struct request *rq = cmd->request; | 264 | struct request *rq = cmd->request; |
265 | 265 | ||
266 | switch (req_op(rq)) { | 266 | if (req_op(rq) == REQ_OP_ZONE_RESET && |
267 | case REQ_OP_ZONE_RESET: | 267 | result && |
268 | case REQ_OP_ZONE_RESET_ALL: | 268 | sshdr->sense_key == ILLEGAL_REQUEST && |
269 | 269 | sshdr->asc == 0x24) { | |
270 | if (result && | 270 | /* |
271 | sshdr->sense_key == ILLEGAL_REQUEST && | 271 | * INVALID FIELD IN CDB error: reset of a conventional |
272 | sshdr->asc == 0x24) | 272 | * zone was attempted. Nothing to worry about, so be |
273 | /* | 273 | * quiet about the error. |
274 | * INVALID FIELD IN CDB error: reset of a conventional | 274 | */ |
275 | * zone was attempted. Nothing to worry about, so be | 275 | rq->rq_flags |= RQF_QUIET; |
276 | * quiet about the error. | ||
277 | */ | ||
278 | rq->rq_flags |= RQF_QUIET; | ||
279 | break; | ||
280 | |||
281 | case REQ_OP_WRITE: | ||
282 | case REQ_OP_WRITE_ZEROES: | ||
283 | case REQ_OP_WRITE_SAME: | ||
284 | break; | ||
285 | } | 276 | } |
286 | } | 277 | } |
287 | 278 | ||
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d0633ebdaa9c..bc6c879bd110 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h | |||
@@ -59,6 +59,11 @@ extern ssize_t cpu_show_l1tf(struct device *dev, | |||
59 | struct device_attribute *attr, char *buf); | 59 | struct device_attribute *attr, char *buf); |
60 | extern ssize_t cpu_show_mds(struct device *dev, | 60 | extern ssize_t cpu_show_mds(struct device *dev, |
61 | struct device_attribute *attr, char *buf); | 61 | struct device_attribute *attr, char *buf); |
62 | extern ssize_t cpu_show_tsx_async_abort(struct device *dev, | ||
63 | struct device_attribute *attr, | ||
64 | char *buf); | ||
65 | extern ssize_t cpu_show_itlb_multihit(struct device *dev, | ||
66 | struct device_attribute *attr, char *buf); | ||
62 | 67 | ||
63 | extern __printf(4, 5) | 68 | extern __printf(4, 5) |
64 | struct device *cpu_device_create(struct device *parent, void *drvdata, | 69 | struct device *cpu_device_create(struct device *parent, void *drvdata, |
@@ -213,28 +218,7 @@ static inline int cpuhp_smt_enable(void) { return 0; } | |||
213 | static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } | 218 | static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } |
214 | #endif | 219 | #endif |
215 | 220 | ||
216 | /* | 221 | extern bool cpu_mitigations_off(void); |
217 | * These are used for a global "mitigations=" cmdline option for toggling | 222 | extern bool cpu_mitigations_auto_nosmt(void); |
218 | * optional CPU mitigations. | ||
219 | */ | ||
220 | enum cpu_mitigations { | ||
221 | CPU_MITIGATIONS_OFF, | ||
222 | CPU_MITIGATIONS_AUTO, | ||
223 | CPU_MITIGATIONS_AUTO_NOSMT, | ||
224 | }; | ||
225 | |||
226 | extern enum cpu_mitigations cpu_mitigations; | ||
227 | |||
228 | /* mitigations=off */ | ||
229 | static inline bool cpu_mitigations_off(void) | ||
230 | { | ||
231 | return cpu_mitigations == CPU_MITIGATIONS_OFF; | ||
232 | } | ||
233 | |||
234 | /* mitigations=auto,nosmt */ | ||
235 | static inline bool cpu_mitigations_auto_nosmt(void) | ||
236 | { | ||
237 | return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; | ||
238 | } | ||
239 | 223 | ||
240 | #endif /* _LINUX_CPU_H_ */ | 224 | #endif /* _LINUX_CPU_H_ */ |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 290dbe353a47..d41c521a39da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -1383,4 +1383,10 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) | |||
1383 | } | 1383 | } |
1384 | #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ | 1384 | #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ |
1385 | 1385 | ||
1386 | typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); | ||
1387 | |||
1388 | int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, | ||
1389 | uintptr_t data, const char *name, | ||
1390 | struct task_struct **thread_ptr); | ||
1391 | |||
1386 | #endif | 1392 | #endif |
diff --git a/kernel/cpu.c b/kernel/cpu.c index fc28e17940e0..e2cad3ee2ead 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -2373,7 +2373,18 @@ void __init boot_cpu_hotplug_init(void) | |||
2373 | this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); | 2373 | this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); |
2374 | } | 2374 | } |
2375 | 2375 | ||
2376 | enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; | 2376 | /* |
2377 | * These are used for a global "mitigations=" cmdline option for toggling | ||
2378 | * optional CPU mitigations. | ||
2379 | */ | ||
2380 | enum cpu_mitigations { | ||
2381 | CPU_MITIGATIONS_OFF, | ||
2382 | CPU_MITIGATIONS_AUTO, | ||
2383 | CPU_MITIGATIONS_AUTO_NOSMT, | ||
2384 | }; | ||
2385 | |||
2386 | static enum cpu_mitigations cpu_mitigations __ro_after_init = | ||
2387 | CPU_MITIGATIONS_AUTO; | ||
2377 | 2388 | ||
2378 | static int __init mitigations_parse_cmdline(char *arg) | 2389 | static int __init mitigations_parse_cmdline(char *arg) |
2379 | { | 2390 | { |
@@ -2390,3 +2401,17 @@ static int __init mitigations_parse_cmdline(char *arg) | |||
2390 | return 0; | 2401 | return 0; |
2391 | } | 2402 | } |
2392 | early_param("mitigations", mitigations_parse_cmdline); | 2403 | early_param("mitigations", mitigations_parse_cmdline); |
2404 | |||
2405 | /* mitigations=off */ | ||
2406 | bool cpu_mitigations_off(void) | ||
2407 | { | ||
2408 | return cpu_mitigations == CPU_MITIGATIONS_OFF; | ||
2409 | } | ||
2410 | EXPORT_SYMBOL_GPL(cpu_mitigations_off); | ||
2411 | |||
2412 | /* mitigations=auto,nosmt */ | ||
2413 | bool cpu_mitigations_auto_nosmt(void) | ||
2414 | { | ||
2415 | return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; | ||
2416 | } | ||
2417 | EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); | ||
diff --git a/kernel/signal.c b/kernel/signal.c index c4da1ef56fdf..bcd46f547db3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -2205,8 +2205,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t | |||
2205 | */ | 2205 | */ |
2206 | preempt_disable(); | 2206 | preempt_disable(); |
2207 | read_unlock(&tasklist_lock); | 2207 | read_unlock(&tasklist_lock); |
2208 | preempt_enable_no_resched(); | ||
2209 | cgroup_enter_frozen(); | 2208 | cgroup_enter_frozen(); |
2209 | preempt_enable_no_resched(); | ||
2210 | freezable_schedule(); | 2210 | freezable_schedule(); |
2211 | cgroup_leave_frozen(true); | 2211 | cgroup_leave_frozen(true); |
2212 | } else { | 2212 | } else { |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0dac149ead16..524cff24a68d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/bsearch.h> | 50 | #include <linux/bsearch.h> |
51 | #include <linux/io.h> | 51 | #include <linux/io.h> |
52 | #include <linux/lockdep.h> | 52 | #include <linux/lockdep.h> |
53 | #include <linux/kthread.h> | ||
53 | 54 | ||
54 | #include <asm/processor.h> | 55 | #include <asm/processor.h> |
55 | #include <asm/ioctl.h> | 56 | #include <asm/ioctl.h> |
@@ -645,6 +646,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) | |||
645 | return 0; | 646 | return 0; |
646 | } | 647 | } |
647 | 648 | ||
649 | /* | ||
650 | * Called after the VM is otherwise initialized, but just before adding it to | ||
651 | * the vm_list. | ||
652 | */ | ||
653 | int __weak kvm_arch_post_init_vm(struct kvm *kvm) | ||
654 | { | ||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | /* | ||
659 | * Called just after removing the VM from the vm_list, but before doing any | ||
660 | * other destruction. | ||
661 | */ | ||
662 | void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) | ||
663 | { | ||
664 | } | ||
665 | |||
648 | static struct kvm *kvm_create_vm(unsigned long type) | 666 | static struct kvm *kvm_create_vm(unsigned long type) |
649 | { | 667 | { |
650 | struct kvm *kvm = kvm_arch_alloc_vm(); | 668 | struct kvm *kvm = kvm_arch_alloc_vm(); |
@@ -702,6 +720,10 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
702 | 720 | ||
703 | r = kvm_init_mmu_notifier(kvm); | 721 | r = kvm_init_mmu_notifier(kvm); |
704 | if (r) | 722 | if (r) |
723 | goto out_err_no_mmu_notifier; | ||
724 | |||
725 | r = kvm_arch_post_init_vm(kvm); | ||
726 | if (r) | ||
705 | goto out_err; | 727 | goto out_err; |
706 | 728 | ||
707 | mutex_lock(&kvm_lock); | 729 | mutex_lock(&kvm_lock); |
@@ -713,6 +735,11 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
713 | return kvm; | 735 | return kvm; |
714 | 736 | ||
715 | out_err: | 737 | out_err: |
738 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
739 | if (kvm->mmu_notifier.ops) | ||
740 | mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); | ||
741 | #endif | ||
742 | out_err_no_mmu_notifier: | ||
716 | hardware_disable_all(); | 743 | hardware_disable_all(); |
717 | out_err_no_disable: | 744 | out_err_no_disable: |
718 | kvm_arch_destroy_vm(kvm); | 745 | kvm_arch_destroy_vm(kvm); |
@@ -757,6 +784,8 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
757 | mutex_lock(&kvm_lock); | 784 | mutex_lock(&kvm_lock); |
758 | list_del(&kvm->vm_list); | 785 | list_del(&kvm->vm_list); |
759 | mutex_unlock(&kvm_lock); | 786 | mutex_unlock(&kvm_lock); |
787 | kvm_arch_pre_destroy_vm(kvm); | ||
788 | |||
760 | kvm_free_irq_routing(kvm); | 789 | kvm_free_irq_routing(kvm); |
761 | for (i = 0; i < KVM_NR_BUSES; i++) { | 790 | for (i = 0; i < KVM_NR_BUSES; i++) { |
762 | struct kvm_io_bus *bus = kvm_get_bus(kvm, i); | 791 | struct kvm_io_bus *bus = kvm_get_bus(kvm, i); |
@@ -4391,3 +4420,86 @@ void kvm_exit(void) | |||
4391 | kvm_vfio_ops_exit(); | 4420 | kvm_vfio_ops_exit(); |
4392 | } | 4421 | } |
4393 | EXPORT_SYMBOL_GPL(kvm_exit); | 4422 | EXPORT_SYMBOL_GPL(kvm_exit); |
4423 | |||
4424 | struct kvm_vm_worker_thread_context { | ||
4425 | struct kvm *kvm; | ||
4426 | struct task_struct *parent; | ||
4427 | struct completion init_done; | ||
4428 | kvm_vm_thread_fn_t thread_fn; | ||
4429 | uintptr_t data; | ||
4430 | int err; | ||
4431 | }; | ||
4432 | |||
4433 | static int kvm_vm_worker_thread(void *context) | ||
4434 | { | ||
4435 | /* | ||
4436 | * The init_context is allocated on the stack of the parent thread, so | ||
4437 | * we have to locally copy anything that is needed beyond initialization | ||
4438 | */ | ||
4439 | struct kvm_vm_worker_thread_context *init_context = context; | ||
4440 | struct kvm *kvm = init_context->kvm; | ||
4441 | kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; | ||
4442 | uintptr_t data = init_context->data; | ||
4443 | int err; | ||
4444 | |||
4445 | err = kthread_park(current); | ||
4446 | /* kthread_park(current) is never supposed to return an error */ | ||
4447 | WARN_ON(err != 0); | ||
4448 | if (err) | ||
4449 | goto init_complete; | ||
4450 | |||
4451 | err = cgroup_attach_task_all(init_context->parent, current); | ||
4452 | if (err) { | ||
4453 | kvm_err("%s: cgroup_attach_task_all failed with err %d\n", | ||
4454 | __func__, err); | ||
4455 | goto init_complete; | ||
4456 | } | ||
4457 | |||
4458 | set_user_nice(current, task_nice(init_context->parent)); | ||
4459 | |||
4460 | init_complete: | ||
4461 | init_context->err = err; | ||
4462 | complete(&init_context->init_done); | ||
4463 | init_context = NULL; | ||
4464 | |||
4465 | if (err) | ||
4466 | return err; | ||
4467 | |||
4468 | /* Wait to be woken up by the spawner before proceeding. */ | ||
4469 | kthread_parkme(); | ||
4470 | |||
4471 | if (!kthread_should_stop()) | ||
4472 | err = thread_fn(kvm, data); | ||
4473 | |||
4474 | return err; | ||
4475 | } | ||
4476 | |||
4477 | int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, | ||
4478 | uintptr_t data, const char *name, | ||
4479 | struct task_struct **thread_ptr) | ||
4480 | { | ||
4481 | struct kvm_vm_worker_thread_context init_context = {}; | ||
4482 | struct task_struct *thread; | ||
4483 | |||
4484 | *thread_ptr = NULL; | ||
4485 | init_context.kvm = kvm; | ||
4486 | init_context.parent = current; | ||
4487 | init_context.thread_fn = thread_fn; | ||
4488 | init_context.data = data; | ||
4489 | init_completion(&init_context.init_done); | ||
4490 | |||
4491 | thread = kthread_run(kvm_vm_worker_thread, &init_context, | ||
4492 | "%s-%d", name, task_pid_nr(current)); | ||
4493 | if (IS_ERR(thread)) | ||
4494 | return PTR_ERR(thread); | ||
4495 | |||
4496 | /* kthread_run is never supposed to return NULL */ | ||
4497 | WARN_ON(thread == NULL); | ||
4498 | |||
4499 | wait_for_completion(&init_context.init_done); | ||
4500 | |||
4501 | if (!init_context.err) | ||
4502 | *thread_ptr = thread; | ||
4503 | |||
4504 | return init_context.err; | ||
4505 | } | ||