diff options
author | Ingo Molnar <mingo@kernel.org> | 2019-07-04 04:36:20 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-07-04 04:36:20 -0400 |
commit | f584dd32edc5d4400d7ceb92111a89f0c1f6651f (patch) | |
tree | e31ee9615fc9f07e8791fca0e77cd35f2dd1041a | |
parent | a328a259ced0c0fa5aabcd29238779a536335884 (diff) | |
parent | 049331f277fef1c3f2527c2c9afa1d285e9a1247 (diff) |
Merge branch 'x86/cpu' into perf/core, to pick up revert
perf/core has an earlier version of the x86/cpu tree merged, to avoid
conflicts, and due to this we want to pick up this ABI impacting
revert as well:
049331f277fe: ("x86/fsgsbase: Revert FSGSBASE support")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 2 | ||||
-rw-r--r-- | Documentation/x86/entry_64.rst | 9 | ||||
-rw-r--r-- | Documentation/x86/x86_64/fsgs.rst | 199 | ||||
-rw-r--r-- | Documentation/x86/x86_64/index.rst | 1 | ||||
-rw-r--r-- | arch/x86/entry/calling.h | 40 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 115 | ||||
-rw-r--r-- | arch/x86/include/asm/fsgsbase.h | 45 | ||||
-rw-r--r-- | arch/x86/include/asm/inst.h | 15 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/hwcap2.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/generic.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 119 | ||||
-rw-r--r-- | tools/testing/selftests/x86/Makefile | 5 | ||||
-rw-r--r-- | tools/testing/selftests/x86/fsgsbase.c | 74 | ||||
-rw-r--r-- | tools/testing/selftests/x86/syscall_arg_fault.c | 112 |
16 files changed, 235 insertions, 568 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 35bc3c3574c6..138f6664b2e2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -2857,8 +2857,6 @@ | |||
2857 | no5lvl [X86-64] Disable 5-level paging mode. Forces | 2857 | no5lvl [X86-64] Disable 5-level paging mode. Forces |
2858 | kernel to use 4-level paging instead. | 2858 | kernel to use 4-level paging instead. |
2859 | 2859 | ||
2860 | nofsgsbase [X86] Disables FSGSBASE instructions. | ||
2861 | |||
2862 | no_console_suspend | 2860 | no_console_suspend |
2863 | [HW] Never suspend the console | 2861 | [HW] Never suspend the console |
2864 | Disable suspending of consoles during suspend and | 2862 | Disable suspending of consoles during suspend and |
diff --git a/Documentation/x86/entry_64.rst b/Documentation/x86/entry_64.rst index b87c1d816aea..a48b3f6ebbe8 100644 --- a/Documentation/x86/entry_64.rst +++ b/Documentation/x86/entry_64.rst | |||
@@ -108,12 +108,3 @@ We try to only use IST entries and the paranoid entry code for vectors | |||
108 | that absolutely need the more expensive check for the GS base - and we | 108 | that absolutely need the more expensive check for the GS base - and we |
109 | generate all 'normal' entry points with the regular (faster) paranoid=0 | 109 | generate all 'normal' entry points with the regular (faster) paranoid=0 |
110 | variant. | 110 | variant. |
111 | |||
112 | On a FSGSBASE system, however, user space can set GS without kernel | ||
113 | interaction. It means the value of GS base itself does not imply anything, | ||
114 | whether a kernel value or a user space value. So, there is no longer a safe | ||
115 | way to check whether the exception is entering from user mode or kernel | ||
116 | mode in the paranoid entry code path. So the GSBASE value needs to be read | ||
117 | out, saved and the kernel GSBASE value written. On exit the saved GSBASE | ||
118 | value needs to be restored unconditionally. The non paranoid entry/exit | ||
119 | code still uses SWAPGS unconditionally as the state is known. | ||
diff --git a/Documentation/x86/x86_64/fsgs.rst b/Documentation/x86/x86_64/fsgs.rst deleted file mode 100644 index 380c0b5ccca2..000000000000 --- a/Documentation/x86/x86_64/fsgs.rst +++ /dev/null | |||
@@ -1,199 +0,0 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | Using FS and GS segments in user space applications | ||
4 | =================================================== | ||
5 | |||
6 | The x86 architecture supports segmentation. Instructions which access | ||
7 | memory can use segment register based addressing mode. The following | ||
8 | notation is used to address a byte within a segment: | ||
9 | |||
10 | Segment-register:Byte-address | ||
11 | |||
12 | The segment base address is added to the Byte-address to compute the | ||
13 | resulting virtual address which is accessed. This allows to access multiple | ||
14 | instances of data with the identical Byte-address, i.e. the same code. The | ||
15 | selection of a particular instance is purely based on the base-address in | ||
16 | the segment register. | ||
17 | |||
18 | In 32-bit mode the CPU provides 6 segments, which also support segment | ||
19 | limits. The limits can be used to enforce address space protections. | ||
20 | |||
21 | In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is | ||
22 | always 0 to provide a full 64bit address space. The FS and GS segments are | ||
23 | still functional in 64-bit mode. | ||
24 | |||
25 | Common FS and GS usage | ||
26 | ------------------------------ | ||
27 | |||
28 | The FS segment is commonly used to address Thread Local Storage (TLS). FS | ||
29 | is usually managed by runtime code or a threading library. Variables | ||
30 | declared with the '__thread' storage class specifier are instantiated per | ||
31 | thread and the compiler emits the FS: address prefix for accesses to these | ||
32 | variables. Each thread has its own FS base address so common code can be | ||
33 | used without complex address offset calculations to access the per thread | ||
34 | instances. Applications should not use FS for other purposes when they use | ||
35 | runtimes or threading libraries which manage the per thread FS. | ||
36 | |||
37 | The GS segment has no common use and can be used freely by | ||
38 | applications. GCC and Clang support GS based addressing via address space | ||
39 | identifiers. | ||
40 | |||
41 | Reading and writing the FS/GS base address | ||
42 | ------------------------------------------ | ||
43 | |||
44 | There exist two mechanisms to read and write the FS/FS base address: | ||
45 | |||
46 | - the arch_prctl() system call | ||
47 | |||
48 | - the FSGSBASE instruction family | ||
49 | |||
50 | Accessing FS/GS base with arch_prctl() | ||
51 | -------------------------------------- | ||
52 | |||
53 | The arch_prctl(2) based mechanism is available on all 64bit CPUs and all | ||
54 | kernel versions. | ||
55 | |||
56 | Reading the base: | ||
57 | |||
58 | arch_prctl(ARCH_GET_FS, &fsbase); | ||
59 | arch_prctl(ARCH_GET_GS, &gsbase); | ||
60 | |||
61 | Writing the base: | ||
62 | |||
63 | arch_prctl(ARCH_SET_FS, fsbase); | ||
64 | arch_prctl(ARCH_SET_GS, gsbase); | ||
65 | |||
66 | The ARCH_SET_GS prctl may be disabled depending on kernel configuration | ||
67 | and security settings. | ||
68 | |||
69 | Accessing FS/GS base with the FSGSBASE instructions | ||
70 | --------------------------------------------------- | ||
71 | |||
72 | With the Ivy Bridge CPU generation Intel introduced a new set of | ||
73 | instructions to access the FS and GS base registers directly from user | ||
74 | space. These instructions are also supported on AMD Family 17H CPUs. The | ||
75 | following instructions are available: | ||
76 | |||
77 | =============== =========================== | ||
78 | RDFSBASE %reg Read the FS base register | ||
79 | RDGSBASE %reg Read the GS base register | ||
80 | WRFSBASE %reg Write the FS base register | ||
81 | WRGSBASE %reg Write the GS base register | ||
82 | =============== =========================== | ||
83 | |||
84 | The instructions avoid the overhead of the arch_prctl() syscall and allow | ||
85 | more flexible usage of the FS/GS addressing modes in user space | ||
86 | applications. This does not prevent conflicts between threading libraries | ||
87 | and runtimes which utilize FS and applications which want to use it for | ||
88 | their own purpose. | ||
89 | |||
90 | FSGSBASE instructions enablement | ||
91 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
92 | The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If | ||
93 | available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs. | ||
94 | |||
95 | The availability of the instructions does not enable them | ||
96 | automatically. The kernel has to enable them explicitly in CR4. The | ||
97 | reason for this is that older kernels make assumptions about the values in | ||
98 | the GS register and enforce them when GS base is set via | ||
99 | arch_prctl(). Allowing user space to write arbitrary values to GS base | ||
100 | would violate these assumptions and cause malfunction. | ||
101 | |||
102 | On kernels which do not enable FSGSBASE the execution of the FSGSBASE | ||
103 | instructions will fault with a #UD exception. | ||
104 | |||
105 | The kernel provides reliable information about the enabled state in the | ||
106 | ELF AUX vector. If the HWCAP2_FSGSBASE bit is set in the AUX vector, the | ||
107 | kernel has FSGSBASE instructions enabled and applications can use them. | ||
108 | The following code example shows how this detection works:: | ||
109 | |||
110 | #include <sys/auxv.h> | ||
111 | #include <elf.h> | ||
112 | |||
113 | /* Will be eventually in asm/hwcap.h */ | ||
114 | #ifndef HWCAP2_FSGSBASE | ||
115 | #define HWCAP2_FSGSBASE (1 << 1) | ||
116 | #endif | ||
117 | |||
118 | .... | ||
119 | |||
120 | unsigned val = getauxval(AT_HWCAP2); | ||
121 | |||
122 | if (val & HWCAP2_FSGSBASE) | ||
123 | printf("FSGSBASE enabled\n"); | ||
124 | |||
125 | FSGSBASE instructions compiler support | ||
126 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
127 | |||
128 | GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE | ||
129 | instructions. Clang supports them as well. | ||
130 | |||
131 | =================== =========================== | ||
132 | _readfsbase_u64() Read the FS base register | ||
133 | _readfsbase_u64() Read the GS base register | ||
134 | _writefsbase_u64() Write the FS base register | ||
135 | _writegsbase_u64() Write the GS base register | ||
136 | =================== =========================== | ||
137 | |||
138 | To utilize these instrinsics <immintrin.h> must be included in the source | ||
139 | code and the compiler option -mfsgsbase has to be added. | ||
140 | |||
141 | Compiler support for FS/GS based addressing | ||
142 | ------------------------------------------- | ||
143 | |||
144 | GCC version 6 and newer provide support for FS/GS based addressing via | ||
145 | Named Address Spaces. GCC implements the following address space | ||
146 | identifiers for x86: | ||
147 | |||
148 | ========= ==================================== | ||
149 | __seg_fs Variable is addressed relative to FS | ||
150 | __seg_gs Variable is addressed relative to GS | ||
151 | ========= ==================================== | ||
152 | |||
153 | The preprocessor symbols __SEG_FS and __SEG_GS are defined when these | ||
154 | address spaces are supported. Code which implements fallback modes should | ||
155 | check whether these symbols are defined. Usage example:: | ||
156 | |||
157 | #ifdef __SEG_GS | ||
158 | |||
159 | long data0 = 0; | ||
160 | long data1 = 1; | ||
161 | |||
162 | long __seg_gs *ptr; | ||
163 | |||
164 | /* Check whether FSGSBASE is enabled by the kernel (HWCAP2_FSGSBASE) */ | ||
165 | .... | ||
166 | |||
167 | /* Set GS to point to data0 */ | ||
168 | _writegsbase_u64(&data0); | ||
169 | |||
170 | /* Access offset 0 of GS */ | ||
171 | ptr = 0; | ||
172 | printf("data0 = %ld\n", *ptr); | ||
173 | |||
174 | /* Set GS to point to data1 */ | ||
175 | _writegsbase_u64(&data1); | ||
176 | /* ptr still addresses offset 0! */ | ||
177 | printf("data1 = %ld\n", *ptr); | ||
178 | |||
179 | |||
180 | Clang does not provide the GCC address space identifiers, but it provides | ||
181 | address spaces via an attribute based mechanism in Clang 5 and newer | ||
182 | versions: | ||
183 | |||
184 | ==================================== ===================================== | ||
185 | __attribute__((address_space(256)) Variable is addressed relative to GS | ||
186 | __attribute__((address_space(257)) Variable is addressed relative to FS | ||
187 | ==================================== ===================================== | ||
188 | |||
189 | FS/GS based addressing with inline assembly | ||
190 | ------------------------------------------- | ||
191 | |||
192 | In case the compiler does not support address spaces, inline assembly can | ||
193 | be used for FS/GS based addressing mode:: | ||
194 | |||
195 | mov %fs:offset, %reg | ||
196 | mov %gs:offset, %reg | ||
197 | |||
198 | mov %reg, %fs:offset | ||
199 | mov %reg, %gs:offset | ||
diff --git a/Documentation/x86/x86_64/index.rst b/Documentation/x86/x86_64/index.rst index a56070fc8e77..d6eaaa5a35fc 100644 --- a/Documentation/x86/x86_64/index.rst +++ b/Documentation/x86/x86_64/index.rst | |||
@@ -14,4 +14,3 @@ x86_64 Support | |||
14 | fake-numa-for-cpusets | 14 | fake-numa-for-cpusets |
15 | cpu-hotplug-spec | 15 | cpu-hotplug-spec |
16 | machinecheck | 16 | machinecheck |
17 | fsgs | ||
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index d3fbe2dc03ea..efb0d1b1f15f 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <asm/percpu.h> | 6 | #include <asm/percpu.h> |
7 | #include <asm/asm-offsets.h> | 7 | #include <asm/asm-offsets.h> |
8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
9 | #include <asm/inst.h> | ||
10 | 9 | ||
11 | /* | 10 | /* |
12 | 11 | ||
@@ -338,12 +337,6 @@ For 32-bit we have the following conventions - kernel is built with | |||
338 | #endif | 337 | #endif |
339 | .endm | 338 | .endm |
340 | 339 | ||
341 | .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req | ||
342 | rdgsbase \save_reg | ||
343 | GET_PERCPU_BASE \scratch_reg | ||
344 | wrgsbase \scratch_reg | ||
345 | .endm | ||
346 | |||
347 | #endif /* CONFIG_X86_64 */ | 340 | #endif /* CONFIG_X86_64 */ |
348 | 341 | ||
349 | .macro STACKLEAK_ERASE | 342 | .macro STACKLEAK_ERASE |
@@ -352,39 +345,6 @@ For 32-bit we have the following conventions - kernel is built with | |||
352 | #endif | 345 | #endif |
353 | .endm | 346 | .endm |
354 | 347 | ||
355 | #ifdef CONFIG_SMP | ||
356 | |||
357 | /* | ||
358 | * CPU/node NR is loaded from the limit (size) field of a special segment | ||
359 | * descriptor entry in GDT. | ||
360 | */ | ||
361 | .macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req | ||
362 | movq $__CPUNODE_SEG, \reg | ||
363 | lsl \reg, \reg | ||
364 | .endm | ||
365 | |||
366 | /* | ||
367 | * Fetch the per-CPU GSBASE value for this processor and put it in @reg. | ||
368 | * We normally use %gs for accessing per-CPU data, but we are setting up | ||
369 | * %gs here and obviously can not use %gs itself to access per-CPU data. | ||
370 | */ | ||
371 | .macro GET_PERCPU_BASE reg:req | ||
372 | ALTERNATIVE \ | ||
373 | "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \ | ||
374 | "RDPID \reg", \ | ||
375 | X86_FEATURE_RDPID | ||
376 | andq $VDSO_CPUNODE_MASK, \reg | ||
377 | movq __per_cpu_offset(, \reg, 8), \reg | ||
378 | .endm | ||
379 | |||
380 | #else | ||
381 | |||
382 | .macro GET_PERCPU_BASE reg:req | ||
383 | movq pcpu_unit_offsets(%rip), \reg | ||
384 | .endm | ||
385 | |||
386 | #endif /* CONFIG_SMP */ | ||
387 | |||
388 | /* | 348 | /* |
389 | * This does 'call enter_from_user_mode' unless we can avoid it based on | 349 | * This does 'call enter_from_user_mode' unless we can avoid it based on |
390 | * kernel config or using the static jump infrastructure. | 350 | * kernel config or using the static jump infrastructure. |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7f9f5119d6b1..3b7a0e8d3bc0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <asm/export.h> | 38 | #include <asm/export.h> |
39 | #include <asm/frame.h> | 39 | #include <asm/frame.h> |
40 | #include <asm/nospec-branch.h> | 40 | #include <asm/nospec-branch.h> |
41 | #include <asm/fsgsbase.h> | ||
42 | #include <linux/err.h> | 41 | #include <linux/err.h> |
43 | 42 | ||
44 | #include "calling.h" | 43 | #include "calling.h" |
@@ -948,6 +947,7 @@ ENTRY(\sym) | |||
948 | addq $\ist_offset, CPU_TSS_IST(\shift_ist) | 947 | addq $\ist_offset, CPU_TSS_IST(\shift_ist) |
949 | .endif | 948 | .endif |
950 | 949 | ||
950 | /* these procedures expect "no swapgs" flag in ebx */ | ||
951 | .if \paranoid | 951 | .if \paranoid |
952 | jmp paranoid_exit | 952 | jmp paranoid_exit |
953 | .else | 953 | .else |
@@ -1164,21 +1164,24 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 | |||
1164 | #endif | 1164 | #endif |
1165 | 1165 | ||
1166 | /* | 1166 | /* |
1167 | * Save all registers in pt_regs. Return GSBASE related information | 1167 | * Save all registers in pt_regs, and switch gs if needed. |
1168 | * in EBX depending on the availability of the FSGSBASE instructions: | 1168 | * Use slow, but surefire "are we in kernel?" check. |
1169 | * | 1169 | * Return: ebx=0: need swapgs on exit, ebx=1: otherwise |
1170 | * FSGSBASE R/EBX | ||
1171 | * N 0 -> SWAPGS on exit | ||
1172 | * 1 -> no SWAPGS on exit | ||
1173 | * | ||
1174 | * Y GSBASE value at entry, must be restored in paranoid_exit | ||
1175 | */ | 1170 | */ |
1176 | ENTRY(paranoid_entry) | 1171 | ENTRY(paranoid_entry) |
1177 | UNWIND_HINT_FUNC | 1172 | UNWIND_HINT_FUNC |
1178 | cld | 1173 | cld |
1179 | PUSH_AND_CLEAR_REGS save_ret=1 | 1174 | PUSH_AND_CLEAR_REGS save_ret=1 |
1180 | ENCODE_FRAME_POINTER 8 | 1175 | ENCODE_FRAME_POINTER 8 |
1176 | movl $1, %ebx | ||
1177 | movl $MSR_GS_BASE, %ecx | ||
1178 | rdmsr | ||
1179 | testl %edx, %edx | ||
1180 | js 1f /* negative -> in kernel */ | ||
1181 | SWAPGS | ||
1182 | xorl %ebx, %ebx | ||
1181 | 1183 | ||
1184 | 1: | ||
1182 | /* | 1185 | /* |
1183 | * Always stash CR3 in %r14. This value will be restored, | 1186 | * Always stash CR3 in %r14. This value will be restored, |
1184 | * verbatim, at exit. Needed if paranoid_entry interrupted | 1187 | * verbatim, at exit. Needed if paranoid_entry interrupted |
@@ -1188,49 +1191,9 @@ ENTRY(paranoid_entry) | |||
1188 | * This is also why CS (stashed in the "iret frame" by the | 1191 | * This is also why CS (stashed in the "iret frame" by the |
1189 | * hardware at entry) can not be used: this may be a return | 1192 | * hardware at entry) can not be used: this may be a return |
1190 | * to kernel code, but with a user CR3 value. | 1193 | * to kernel code, but with a user CR3 value. |
1191 | * | ||
1192 | * Switching CR3 does not depend on kernel GSBASE so it can | ||
1193 | * be done before switching to the kernel GSBASE. This is | ||
1194 | * required for FSGSBASE because the kernel GSBASE has to | ||
1195 | * be retrieved from a kernel internal table. | ||
1196 | */ | 1194 | */ |
1197 | SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 | 1195 | SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 |
1198 | 1196 | ||
1199 | /* | ||
1200 | * Handling GSBASE depends on the availability of FSGSBASE. | ||
1201 | * | ||
1202 | * Without FSGSBASE the kernel enforces that negative GSBASE | ||
1203 | * values indicate kernel GSBASE. With FSGSBASE no assumptions | ||
1204 | * can be made about the GSBASE value when entering from user | ||
1205 | * space. | ||
1206 | */ | ||
1207 | ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE | ||
1208 | |||
1209 | /* | ||
1210 | * Read the current GSBASE and store it in in %rbx unconditionally, | ||
1211 | * retrieve and set the current CPUs kernel GSBASE. The stored value | ||
1212 | * has to be restored in paranoid_exit unconditionally. | ||
1213 | */ | ||
1214 | SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx | ||
1215 | ret | ||
1216 | |||
1217 | .Lparanoid_entry_checkgs: | ||
1218 | /* EBX = 1 -> kernel GSBASE active, no restore required */ | ||
1219 | movl $1, %ebx | ||
1220 | /* | ||
1221 | * The kernel-enforced convention is a negative GSBASE indicates | ||
1222 | * a kernel value. No SWAPGS needed on entry and exit. | ||
1223 | */ | ||
1224 | movl $MSR_GS_BASE, %ecx | ||
1225 | rdmsr | ||
1226 | testl %edx, %edx | ||
1227 | jns .Lparanoid_entry_swapgs | ||
1228 | ret | ||
1229 | |||
1230 | .Lparanoid_entry_swapgs: | ||
1231 | SWAPGS | ||
1232 | /* EBX = 0 -> SWAPGS required on exit */ | ||
1233 | xorl %ebx, %ebx | ||
1234 | ret | 1197 | ret |
1235 | END(paranoid_entry) | 1198 | END(paranoid_entry) |
1236 | 1199 | ||
@@ -1241,47 +1204,28 @@ END(paranoid_entry) | |||
1241 | * | 1204 | * |
1242 | * We may be returning to very strange contexts (e.g. very early | 1205 | * We may be returning to very strange contexts (e.g. very early |
1243 | * in syscall entry), so checking for preemption here would | 1206 | * in syscall entry), so checking for preemption here would |
1244 | * be complicated. Fortunately, there's no good reason to try | 1207 | * be complicated. Fortunately, we there's no good reason |
1245 | * to handle preemption here. | 1208 | * to try to handle preemption here. |
1246 | * | ||
1247 | * R/EBX contains the GSBASE related information depending on the | ||
1248 | * availability of the FSGSBASE instructions: | ||
1249 | * | ||
1250 | * FSGSBASE R/EBX | ||
1251 | * N 0 -> SWAPGS on exit | ||
1252 | * 1 -> no SWAPGS on exit | ||
1253 | * | 1209 | * |
1254 | * Y User space GSBASE, must be restored unconditionally | 1210 | * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) |
1255 | */ | 1211 | */ |
1256 | ENTRY(paranoid_exit) | 1212 | ENTRY(paranoid_exit) |
1257 | UNWIND_HINT_REGS | 1213 | UNWIND_HINT_REGS |
1258 | DISABLE_INTERRUPTS(CLBR_ANY) | 1214 | DISABLE_INTERRUPTS(CLBR_ANY) |
1259 | TRACE_IRQS_OFF_DEBUG | 1215 | TRACE_IRQS_OFF_DEBUG |
1260 | 1216 | testl %ebx, %ebx /* swapgs needed? */ | |
1261 | /* Handle GS depending on FSGSBASE availability */ | ||
1262 | ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "nop",X86_FEATURE_FSGSBASE | ||
1263 | |||
1264 | /* With FSGSBASE enabled, unconditionally restore GSBASE */ | ||
1265 | wrgsbase %rbx | ||
1266 | jmp .Lparanoid_exit_no_swapgs; | ||
1267 | |||
1268 | .Lparanoid_exit_checkgs: | ||
1269 | /* On non-FSGSBASE systems, conditionally do SWAPGS */ | ||
1270 | testl %ebx, %ebx | ||
1271 | jnz .Lparanoid_exit_no_swapgs | 1217 | jnz .Lparanoid_exit_no_swapgs |
1272 | TRACE_IRQS_IRETQ | 1218 | TRACE_IRQS_IRETQ |
1273 | /* Always restore stashed CR3 value (see paranoid_entry) */ | 1219 | /* Always restore stashed CR3 value (see paranoid_entry) */ |
1274 | RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 | 1220 | RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 |
1275 | SWAPGS_UNSAFE_STACK | 1221 | SWAPGS_UNSAFE_STACK |
1276 | jmp .Lparanoid_exit_restore | 1222 | jmp .Lparanoid_exit_restore |
1277 | |||
1278 | .Lparanoid_exit_no_swapgs: | 1223 | .Lparanoid_exit_no_swapgs: |
1279 | TRACE_IRQS_IRETQ_DEBUG | 1224 | TRACE_IRQS_IRETQ_DEBUG |
1280 | /* Always restore stashed CR3 value (see paranoid_entry) */ | 1225 | /* Always restore stashed CR3 value (see paranoid_entry) */ |
1281 | RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 | 1226 | RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 |
1282 | |||
1283 | .Lparanoid_exit_restore: | 1227 | .Lparanoid_exit_restore: |
1284 | jmp restore_regs_and_return_to_kernel | 1228 | jmp restore_regs_and_return_to_kernel |
1285 | END(paranoid_exit) | 1229 | END(paranoid_exit) |
1286 | 1230 | ||
1287 | /* | 1231 | /* |
@@ -1692,27 +1636,10 @@ end_repeat_nmi: | |||
1692 | /* Always restore stashed CR3 value (see paranoid_entry) */ | 1636 | /* Always restore stashed CR3 value (see paranoid_entry) */ |
1693 | RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 | 1637 | RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 |
1694 | 1638 | ||
1695 | /* | 1639 | testl %ebx, %ebx /* swapgs needed? */ |
1696 | * The above invocation of paranoid_entry stored the GSBASE | ||
1697 | * related information in R/EBX depending on the availability | ||
1698 | * of FSGSBASE. | ||
1699 | * | ||
1700 | * If FSGSBASE is enabled, restore the saved GSBASE value | ||
1701 | * unconditionally, otherwise take the conditional SWAPGS path. | ||
1702 | */ | ||
1703 | ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE | ||
1704 | |||
1705 | wrgsbase %rbx | ||
1706 | jmp nmi_restore | ||
1707 | |||
1708 | nmi_no_fsgsbase: | ||
1709 | /* EBX == 0 -> invoke SWAPGS */ | ||
1710 | testl %ebx, %ebx | ||
1711 | jnz nmi_restore | 1640 | jnz nmi_restore |
1712 | |||
1713 | nmi_swapgs: | 1641 | nmi_swapgs: |
1714 | SWAPGS_UNSAFE_STACK | 1642 | SWAPGS_UNSAFE_STACK |
1715 | |||
1716 | nmi_restore: | 1643 | nmi_restore: |
1717 | POP_REGS | 1644 | POP_REGS |
1718 | 1645 | ||
@@ -1743,11 +1670,17 @@ nmi_restore: | |||
1743 | iretq | 1670 | iretq |
1744 | END(nmi) | 1671 | END(nmi) |
1745 | 1672 | ||
1673 | #ifndef CONFIG_IA32_EMULATION | ||
1674 | /* | ||
1675 | * This handles SYSCALL from 32-bit code. There is no way to program | ||
1676 | * MSRs to fully disable 32-bit SYSCALL. | ||
1677 | */ | ||
1746 | ENTRY(ignore_sysret) | 1678 | ENTRY(ignore_sysret) |
1747 | UNWIND_HINT_EMPTY | 1679 | UNWIND_HINT_EMPTY |
1748 | mov $-ENOSYS, %eax | 1680 | mov $-ENOSYS, %eax |
1749 | sysret | 1681 | sysret |
1750 | END(ignore_sysret) | 1682 | END(ignore_sysret) |
1683 | #endif | ||
1751 | 1684 | ||
1752 | ENTRY(rewind_stack_do_exit) | 1685 | ENTRY(rewind_stack_do_exit) |
1753 | UNWIND_HINT_FUNC | 1686 | UNWIND_HINT_FUNC |
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h index aefd53767a5d..bca4c743de77 100644 --- a/arch/x86/include/asm/fsgsbase.h +++ b/arch/x86/include/asm/fsgsbase.h | |||
@@ -19,63 +19,36 @@ extern unsigned long x86_gsbase_read_task(struct task_struct *task); | |||
19 | extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); | 19 | extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); |
20 | extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); | 20 | extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); |
21 | 21 | ||
22 | /* Must be protected by X86_FEATURE_FSGSBASE check. */ | 22 | /* Helper functions for reading/writing FS/GS base */ |
23 | 23 | ||
24 | static __always_inline unsigned long rdfsbase(void) | 24 | static inline unsigned long x86_fsbase_read_cpu(void) |
25 | { | 25 | { |
26 | unsigned long fsbase; | 26 | unsigned long fsbase; |
27 | 27 | ||
28 | asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory"); | 28 | rdmsrl(MSR_FS_BASE, fsbase); |
29 | 29 | ||
30 | return fsbase; | 30 | return fsbase; |
31 | } | 31 | } |
32 | 32 | ||
33 | static __always_inline unsigned long rdgsbase(void) | 33 | static inline unsigned long x86_gsbase_read_cpu_inactive(void) |
34 | { | 34 | { |
35 | unsigned long gsbase; | 35 | unsigned long gsbase; |
36 | 36 | ||
37 | asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory"); | 37 | rdmsrl(MSR_KERNEL_GS_BASE, gsbase); |
38 | 38 | ||
39 | return gsbase; | 39 | return gsbase; |
40 | } | 40 | } |
41 | 41 | ||
42 | static __always_inline void wrfsbase(unsigned long fsbase) | 42 | static inline void x86_fsbase_write_cpu(unsigned long fsbase) |
43 | { | ||
44 | asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); | ||
45 | } | ||
46 | |||
47 | static __always_inline void wrgsbase(unsigned long gsbase) | ||
48 | { | ||
49 | asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); | ||
50 | } | ||
51 | |||
52 | #include <asm/cpufeature.h> | ||
53 | |||
54 | /* Helper functions for reading/writing FS/GS base */ | ||
55 | |||
56 | static inline unsigned long x86_fsbase_read_cpu(void) | ||
57 | { | 43 | { |
58 | unsigned long fsbase; | 44 | wrmsrl(MSR_FS_BASE, fsbase); |
59 | |||
60 | if (static_cpu_has(X86_FEATURE_FSGSBASE)) | ||
61 | fsbase = rdfsbase(); | ||
62 | else | ||
63 | rdmsrl(MSR_FS_BASE, fsbase); | ||
64 | |||
65 | return fsbase; | ||
66 | } | 45 | } |
67 | 46 | ||
68 | static inline void x86_fsbase_write_cpu(unsigned long fsbase) | 47 | static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase) |
69 | { | 48 | { |
70 | if (static_cpu_has(X86_FEATURE_FSGSBASE)) | 49 | wrmsrl(MSR_KERNEL_GS_BASE, gsbase); |
71 | wrfsbase(fsbase); | ||
72 | else | ||
73 | wrmsrl(MSR_FS_BASE, fsbase); | ||
74 | } | 50 | } |
75 | 51 | ||
76 | extern unsigned long x86_gsbase_read_cpu_inactive(void); | ||
77 | extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase); | ||
78 | |||
79 | #endif /* CONFIG_X86_64 */ | 52 | #endif /* CONFIG_X86_64 */ |
80 | 53 | ||
81 | #endif /* __ASSEMBLY__ */ | 54 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index d063841a17e3..f5a796da07f8 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h | |||
@@ -306,21 +306,6 @@ | |||
306 | .endif | 306 | .endif |
307 | MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 | 307 | MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 |
308 | .endm | 308 | .endm |
309 | |||
310 | .macro RDPID opd | ||
311 | REG_TYPE rdpid_opd_type \opd | ||
312 | .if rdpid_opd_type == REG_TYPE_R64 | ||
313 | R64_NUM rdpid_opd \opd | ||
314 | .else | ||
315 | R32_NUM rdpid_opd \opd | ||
316 | .endif | ||
317 | .byte 0xf3 | ||
318 | .if rdpid_opd > 7 | ||
319 | PFX_REX rdpid_opd 0 | ||
320 | .endif | ||
321 | .byte 0x0f, 0xc7 | ||
322 | MODRM 0xc0 rdpid_opd 0x7 | ||
323 | .endm | ||
324 | #endif | 309 | #endif |
325 | 310 | ||
326 | #endif | 311 | #endif |
diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h index c5ce54e749f6..6ebaae90e207 100644 --- a/arch/x86/include/uapi/asm/hwcap2.h +++ b/arch/x86/include/uapi/asm/hwcap2.h | |||
@@ -5,7 +5,4 @@ | |||
5 | /* MONITOR/MWAIT enabled in Ring 3 */ | 5 | /* MONITOR/MWAIT enabled in Ring 3 */ |
6 | #define HWCAP2_RING3MWAIT (1 << 0) | 6 | #define HWCAP2_RING3MWAIT (1 << 0) |
7 | 7 | ||
8 | /* Kernel allows FSGSBASE instructions available in Ring 3 */ | ||
9 | #define HWCAP2_FSGSBASE BIT(1) | ||
10 | |||
11 | #endif | 8 | #endif |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0948fc25446a..482f74859fb7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -366,22 +366,6 @@ out: | |||
366 | cr4_clear_bits(X86_CR4_UMIP); | 366 | cr4_clear_bits(X86_CR4_UMIP); |
367 | } | 367 | } |
368 | 368 | ||
369 | static __init int x86_nofsgsbase_setup(char *arg) | ||
370 | { | ||
371 | /* Require an exact match without trailing characters. */ | ||
372 | if (strlen(arg)) | ||
373 | return 0; | ||
374 | |||
375 | /* Do not emit a message if the feature is not present. */ | ||
376 | if (!boot_cpu_has(X86_FEATURE_FSGSBASE)) | ||
377 | return 1; | ||
378 | |||
379 | setup_clear_cpu_cap(X86_FEATURE_FSGSBASE); | ||
380 | pr_info("FSGSBASE disabled via kernel command line\n"); | ||
381 | return 1; | ||
382 | } | ||
383 | __setup("nofsgsbase", x86_nofsgsbase_setup); | ||
384 | |||
385 | /* | 369 | /* |
386 | * Protection Keys are not available in 32-bit mode. | 370 | * Protection Keys are not available in 32-bit mode. |
387 | */ | 371 | */ |
@@ -1387,12 +1371,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) | |||
1387 | setup_smap(c); | 1371 | setup_smap(c); |
1388 | setup_umip(c); | 1372 | setup_umip(c); |
1389 | 1373 | ||
1390 | /* Enable FSGSBASE instructions if available. */ | ||
1391 | if (cpu_has(c, X86_FEATURE_FSGSBASE)) { | ||
1392 | cr4_set_bits(X86_CR4_FSGSBASE); | ||
1393 | elf_hwcap2 |= HWCAP2_FSGSBASE; | ||
1394 | } | ||
1395 | |||
1396 | /* | 1374 | /* |
1397 | * The vendor-specific functions might have changed features. | 1375 | * The vendor-specific functions might have changed features. |
1398 | * Now we do "generic changes." | 1376 | * Now we do "generic changes." |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f17c1a714779..8d6d92ebeb54 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) | |||
66 | } | 66 | } |
67 | } | 67 | } |
68 | 68 | ||
69 | /* | ||
70 | * Processors which have self-snooping capability can handle conflicting | ||
71 | * memory type across CPUs by snooping its own cache. However, there exists | ||
72 | * CPU models in which having conflicting memory types still leads to | ||
73 | * unpredictable behavior, machine check errors, or hangs. Clear this | ||
74 | * feature to prevent its use on machines with known erratas. | ||
75 | */ | ||
76 | static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) | ||
77 | { | ||
78 | switch (c->x86_model) { | ||
79 | case INTEL_FAM6_CORE_YONAH: | ||
80 | case INTEL_FAM6_CORE2_MEROM: | ||
81 | case INTEL_FAM6_CORE2_MEROM_L: | ||
82 | case INTEL_FAM6_CORE2_PENRYN: | ||
83 | case INTEL_FAM6_CORE2_DUNNINGTON: | ||
84 | case INTEL_FAM6_NEHALEM: | ||
85 | case INTEL_FAM6_NEHALEM_G: | ||
86 | case INTEL_FAM6_NEHALEM_EP: | ||
87 | case INTEL_FAM6_NEHALEM_EX: | ||
88 | case INTEL_FAM6_WESTMERE: | ||
89 | case INTEL_FAM6_WESTMERE_EP: | ||
90 | case INTEL_FAM6_SANDYBRIDGE: | ||
91 | setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); | ||
92 | } | ||
93 | } | ||
94 | |||
69 | static bool ring3mwait_disabled __read_mostly; | 95 | static bool ring3mwait_disabled __read_mostly; |
70 | 96 | ||
71 | static int __init ring3mwait_disable(char *__unused) | 97 | static int __init ring3mwait_disable(char *__unused) |
@@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
304 | } | 330 | } |
305 | 331 | ||
306 | check_mpx_erratum(c); | 332 | check_mpx_erratum(c); |
333 | check_memory_type_self_snoop_errata(c); | ||
307 | 334 | ||
308 | /* | 335 | /* |
309 | * Get the number of SMT siblings early from the extended topology | 336 | * Get the number of SMT siblings early from the extended topology |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9356c1c9024d..aa5c064a6a22 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
743 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | 743 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
744 | cr0 = read_cr0() | X86_CR0_CD; | 744 | cr0 = read_cr0() | X86_CR0_CD; |
745 | write_cr0(cr0); | 745 | write_cr0(cr0); |
746 | wbinvd(); | 746 | |
747 | /* | ||
748 | * Cache flushing is the most time-consuming step when programming | ||
749 | * the MTRRs. Fortunately, as per the Intel Software Development | ||
750 | * Manual, we can skip it if the processor supports cache self- | ||
751 | * snooping. | ||
752 | */ | ||
753 | if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) | ||
754 | wbinvd(); | ||
747 | 755 | ||
748 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 756 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
749 | if (boot_cpu_has(X86_FEATURE_PGE)) { | 757 | if (boot_cpu_has(X86_FEATURE_PGE)) { |
@@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
760 | 768 | ||
761 | /* Disable MTRRs, and set the default type to uncached */ | 769 | /* Disable MTRRs, and set the default type to uncached */ |
762 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); | 770 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); |
763 | wbinvd(); | 771 | |
772 | /* Again, only flush caches if we have to. */ | ||
773 | if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) | ||
774 | wbinvd(); | ||
764 | } | 775 | } |
765 | 776 | ||
766 | static void post_set(void) __releases(set_atomicity_lock) | 777 | static void post_set(void) __releases(set_atomicity_lock) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8f239091c15d..250e4c4ac6d9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -162,40 +162,6 @@ enum which_selector { | |||
162 | }; | 162 | }; |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * Out of line to be protected from kprobes. It is not used on Xen | ||
166 | * paravirt. When paravirt support is needed, it needs to be renamed | ||
167 | * with native_ prefix. | ||
168 | */ | ||
169 | static noinline unsigned long __rdgsbase_inactive(void) | ||
170 | { | ||
171 | unsigned long gsbase; | ||
172 | |||
173 | lockdep_assert_irqs_disabled(); | ||
174 | |||
175 | native_swapgs(); | ||
176 | gsbase = rdgsbase(); | ||
177 | native_swapgs(); | ||
178 | |||
179 | return gsbase; | ||
180 | } | ||
181 | NOKPROBE_SYMBOL(__rdgsbase_inactive); | ||
182 | |||
183 | /* | ||
184 | * Out of line to be protected from kprobes. It is not used on Xen | ||
185 | * paravirt. When paravirt support is needed, it needs to be renamed | ||
186 | * with native_ prefix. | ||
187 | */ | ||
188 | static noinline void __wrgsbase_inactive(unsigned long gsbase) | ||
189 | { | ||
190 | lockdep_assert_irqs_disabled(); | ||
191 | |||
192 | native_swapgs(); | ||
193 | wrgsbase(gsbase); | ||
194 | native_swapgs(); | ||
195 | } | ||
196 | NOKPROBE_SYMBOL(__wrgsbase_inactive); | ||
197 | |||
198 | /* | ||
199 | * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are | 165 | * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are |
200 | * not available. The goal is to be reasonably fast on non-FSGSBASE systems. | 166 | * not available. The goal is to be reasonably fast on non-FSGSBASE systems. |
201 | * It's forcibly inlined because it'll generate better code and this function | 167 | * It's forcibly inlined because it'll generate better code and this function |
@@ -244,22 +210,8 @@ static __always_inline void save_fsgs(struct task_struct *task) | |||
244 | { | 210 | { |
245 | savesegment(fs, task->thread.fsindex); | 211 | savesegment(fs, task->thread.fsindex); |
246 | savesegment(gs, task->thread.gsindex); | 212 | savesegment(gs, task->thread.gsindex); |
247 | if (static_cpu_has(X86_FEATURE_FSGSBASE)) { | 213 | save_base_legacy(task, task->thread.fsindex, FS); |
248 | unsigned long flags; | 214 | save_base_legacy(task, task->thread.gsindex, GS); |
249 | |||
250 | /* | ||
251 | * If FSGSBASE is enabled, we can't make any useful guesses | ||
252 | * about the base, and user code expects us to save the current | ||
253 | * value. Fortunately, reading the base directly is efficient. | ||
254 | */ | ||
255 | task->thread.fsbase = rdfsbase(); | ||
256 | local_irq_save(flags); | ||
257 | task->thread.gsbase = __rdgsbase_inactive(); | ||
258 | local_irq_restore(flags); | ||
259 | } else { | ||
260 | save_base_legacy(task, task->thread.fsindex, FS); | ||
261 | save_base_legacy(task, task->thread.gsindex, GS); | ||
262 | } | ||
263 | } | 215 | } |
264 | 216 | ||
265 | #if IS_ENABLED(CONFIG_KVM) | 217 | #if IS_ENABLED(CONFIG_KVM) |
@@ -338,22 +290,10 @@ static __always_inline void load_seg_legacy(unsigned short prev_index, | |||
338 | static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, | 290 | static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, |
339 | struct thread_struct *next) | 291 | struct thread_struct *next) |
340 | { | 292 | { |
341 | if (static_cpu_has(X86_FEATURE_FSGSBASE)) { | 293 | load_seg_legacy(prev->fsindex, prev->fsbase, |
342 | /* Update the FS and GS selectors if they could have changed. */ | 294 | next->fsindex, next->fsbase, FS); |
343 | if (unlikely(prev->fsindex || next->fsindex)) | 295 | load_seg_legacy(prev->gsindex, prev->gsbase, |
344 | loadseg(FS, next->fsindex); | 296 | next->gsindex, next->gsbase, GS); |
345 | if (unlikely(prev->gsindex || next->gsindex)) | ||
346 | loadseg(GS, next->gsindex); | ||
347 | |||
348 | /* Update the bases. */ | ||
349 | wrfsbase(next->fsbase); | ||
350 | __wrgsbase_inactive(next->gsbase); | ||
351 | } else { | ||
352 | load_seg_legacy(prev->fsindex, prev->fsbase, | ||
353 | next->fsindex, next->fsbase, FS); | ||
354 | load_seg_legacy(prev->gsindex, prev->gsbase, | ||
355 | next->gsindex, next->gsbase, GS); | ||
356 | } | ||
357 | } | 297 | } |
358 | 298 | ||
359 | static unsigned long x86_fsgsbase_read_task(struct task_struct *task, | 299 | static unsigned long x86_fsgsbase_read_task(struct task_struct *task, |
@@ -399,46 +339,13 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task, | |||
399 | return base; | 339 | return base; |
400 | } | 340 | } |
401 | 341 | ||
402 | unsigned long x86_gsbase_read_cpu_inactive(void) | ||
403 | { | ||
404 | unsigned long gsbase; | ||
405 | |||
406 | if (static_cpu_has(X86_FEATURE_FSGSBASE)) { | ||
407 | unsigned long flags; | ||
408 | |||
409 | /* Interrupts are disabled here. */ | ||
410 | local_irq_save(flags); | ||
411 | gsbase = __rdgsbase_inactive(); | ||
412 | local_irq_restore(flags); | ||
413 | } else { | ||
414 | rdmsrl(MSR_KERNEL_GS_BASE, gsbase); | ||
415 | } | ||
416 | |||
417 | return gsbase; | ||
418 | } | ||
419 | |||
420 | void x86_gsbase_write_cpu_inactive(unsigned long gsbase) | ||
421 | { | ||
422 | if (static_cpu_has(X86_FEATURE_FSGSBASE)) { | ||
423 | unsigned long flags; | ||
424 | |||
425 | /* Interrupts are disabled here. */ | ||
426 | local_irq_save(flags); | ||
427 | __wrgsbase_inactive(gsbase); | ||
428 | local_irq_restore(flags); | ||
429 | } else { | ||
430 | wrmsrl(MSR_KERNEL_GS_BASE, gsbase); | ||
431 | } | ||
432 | } | ||
433 | |||
434 | unsigned long x86_fsbase_read_task(struct task_struct *task) | 342 | unsigned long x86_fsbase_read_task(struct task_struct *task) |
435 | { | 343 | { |
436 | unsigned long fsbase; | 344 | unsigned long fsbase; |
437 | 345 | ||
438 | if (task == current) | 346 | if (task == current) |
439 | fsbase = x86_fsbase_read_cpu(); | 347 | fsbase = x86_fsbase_read_cpu(); |
440 | else if (static_cpu_has(X86_FEATURE_FSGSBASE) || | 348 | else if (task->thread.fsindex == 0) |
441 | (task->thread.fsindex == 0)) | ||
442 | fsbase = task->thread.fsbase; | 349 | fsbase = task->thread.fsbase; |
443 | else | 350 | else |
444 | fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); | 351 | fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); |
@@ -452,8 +359,7 @@ unsigned long x86_gsbase_read_task(struct task_struct *task) | |||
452 | 359 | ||
453 | if (task == current) | 360 | if (task == current) |
454 | gsbase = x86_gsbase_read_cpu_inactive(); | 361 | gsbase = x86_gsbase_read_cpu_inactive(); |
455 | else if (static_cpu_has(X86_FEATURE_FSGSBASE) || | 362 | else if (task->thread.gsindex == 0) |
456 | (task->thread.gsindex == 0)) | ||
457 | gsbase = task->thread.gsbase; | 363 | gsbase = task->thread.gsbase; |
458 | else | 364 | else |
459 | gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); | 365 | gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); |
@@ -493,11 +399,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
493 | p->thread.sp = (unsigned long) fork_frame; | 399 | p->thread.sp = (unsigned long) fork_frame; |
494 | p->thread.io_bitmap_ptr = NULL; | 400 | p->thread.io_bitmap_ptr = NULL; |
495 | 401 | ||
496 | save_fsgs(me); | 402 | savesegment(gs, p->thread.gsindex); |
497 | p->thread.fsindex = me->thread.fsindex; | 403 | p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; |
498 | p->thread.fsbase = me->thread.fsbase; | 404 | savesegment(fs, p->thread.fsindex); |
499 | p->thread.gsindex = me->thread.gsindex; | 405 | p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; |
500 | p->thread.gsbase = me->thread.gsbase; | ||
501 | savesegment(es, p->thread.es); | 406 | savesegment(es, p->thread.es); |
502 | savesegment(ds, p->thread.ds); | 407 | savesegment(ds, p->thread.ds); |
503 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 408 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 186520198de7..fa07d526fe39 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile | |||
@@ -12,8 +12,9 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) | |||
12 | 12 | ||
13 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ | 13 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ |
14 | check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ | 14 | check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ |
15 | protection_keys test_vdso test_vsyscall mov_ss_trap | 15 | protection_keys test_vdso test_vsyscall mov_ss_trap \ |
16 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ | 16 | syscall_arg_fault |
17 | TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ | ||
17 | test_FCMOV test_FCOMI test_FISTTP \ | 18 | test_FCMOV test_FCOMI test_FISTTP \ |
18 | vdso_restorer | 19 | vdso_restorer |
19 | TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip | 20 | TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip |
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 21fd4f94b5b0..5ab4c60c100e 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c | |||
@@ -35,6 +35,8 @@ | |||
35 | static volatile sig_atomic_t want_segv; | 35 | static volatile sig_atomic_t want_segv; |
36 | static volatile unsigned long segv_addr; | 36 | static volatile unsigned long segv_addr; |
37 | 37 | ||
38 | static unsigned short *shared_scratch; | ||
39 | |||
38 | static int nerrs; | 40 | static int nerrs; |
39 | 41 | ||
40 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | 42 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), |
@@ -242,16 +244,11 @@ static void do_remote_base() | |||
242 | 244 | ||
243 | static __thread int set_thread_area_entry_number = -1; | 245 | static __thread int set_thread_area_entry_number = -1; |
244 | 246 | ||
245 | static void do_unexpected_base(void) | 247 | static unsigned short load_gs(void) |
246 | { | 248 | { |
247 | /* | 249 | /* |
248 | * The goal here is to try to arrange for GS == 0, GSBASE != | 250 | * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think |
249 | * 0, and for the the kernel the think that GSBASE == 0. | 251 | * that GSBASE == 0 (i.e. thread.gsbase == 0). |
250 | * | ||
251 | * To make the test as reliable as possible, this uses | ||
252 | * explicit descriptors. (This is not the only way. This | ||
253 | * could use ARCH_SET_GS with a low, nonzero base, but the | ||
254 | * relevant side effect of ARCH_SET_GS could change.) | ||
255 | */ | 252 | */ |
256 | 253 | ||
257 | /* Step 1: tell the kernel that we have GSBASE == 0. */ | 254 | /* Step 1: tell the kernel that we have GSBASE == 0. */ |
@@ -271,8 +268,9 @@ static void do_unexpected_base(void) | |||
271 | .useable = 0 | 268 | .useable = 0 |
272 | }; | 269 | }; |
273 | if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { | 270 | if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { |
274 | printf("\tother thread: using LDT slot 0\n"); | 271 | printf("\tusing LDT slot 0\n"); |
275 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); | 272 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); |
273 | return 0x7; | ||
276 | } else { | 274 | } else { |
277 | /* No modify_ldt for us (configured out, perhaps) */ | 275 | /* No modify_ldt for us (configured out, perhaps) */ |
278 | 276 | ||
@@ -294,20 +292,15 @@ static void do_unexpected_base(void) | |||
294 | 292 | ||
295 | if (ret != 0) { | 293 | if (ret != 0) { |
296 | printf("[NOTE]\tcould not create a segment -- test won't do anything\n"); | 294 | printf("[NOTE]\tcould not create a segment -- test won't do anything\n"); |
297 | return; | 295 | return 0; |
298 | } | 296 | } |
299 | printf("\tother thread: using GDT slot %d\n", desc.entry_number); | 297 | printf("\tusing GDT slot %d\n", desc.entry_number); |
300 | set_thread_area_entry_number = desc.entry_number; | 298 | set_thread_area_entry_number = desc.entry_number; |
301 | 299 | ||
302 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3))); | 300 | unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3); |
301 | asm volatile ("mov %0, %%gs" : : "rm" (gs)); | ||
302 | return gs; | ||
303 | } | 303 | } |
304 | |||
305 | /* | ||
306 | * Step 3: set the selector back to zero. On AMD chips, this will | ||
307 | * preserve GSBASE. | ||
308 | */ | ||
309 | |||
310 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); | ||
311 | } | 304 | } |
312 | 305 | ||
313 | void test_wrbase(unsigned short index, unsigned long base) | 306 | void test_wrbase(unsigned short index, unsigned long base) |
@@ -346,12 +339,19 @@ static void *threadproc(void *ctx) | |||
346 | if (ftx == 3) | 339 | if (ftx == 3) |
347 | return NULL; | 340 | return NULL; |
348 | 341 | ||
349 | if (ftx == 1) | 342 | if (ftx == 1) { |
350 | do_remote_base(); | 343 | do_remote_base(); |
351 | else if (ftx == 2) | 344 | } else if (ftx == 2) { |
352 | do_unexpected_base(); | 345 | /* |
353 | else | 346 | * On AMD chips, this causes GSBASE != 0, GS == 0, and |
347 | * thread.gsbase == 0. | ||
348 | */ | ||
349 | |||
350 | load_gs(); | ||
351 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); | ||
352 | } else { | ||
354 | errx(1, "helper thread got bad command"); | 353 | errx(1, "helper thread got bad command"); |
354 | } | ||
355 | 355 | ||
356 | ftx = 0; | 356 | ftx = 0; |
357 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); | 357 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); |
@@ -453,12 +453,7 @@ static void test_ptrace_write_gsbase(void) | |||
453 | if (child == 0) { | 453 | if (child == 0) { |
454 | printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n"); | 454 | printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n"); |
455 | 455 | ||
456 | /* | 456 | *shared_scratch = load_gs(); |
457 | * Use the LDT setup and fetch the GSBASE from the LDT | ||
458 | * by switching to the (nonzero) selector (again) | ||
459 | */ | ||
460 | do_unexpected_base(); | ||
461 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); | ||
462 | 457 | ||
463 | if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) | 458 | if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) |
464 | err(1, "PTRACE_TRACEME"); | 459 | err(1, "PTRACE_TRACEME"); |
@@ -476,7 +471,7 @@ static void test_ptrace_write_gsbase(void) | |||
476 | 471 | ||
477 | gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL); | 472 | gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL); |
478 | 473 | ||
479 | if (gs != 0x7) { | 474 | if (gs != *shared_scratch) { |
480 | nerrs++; | 475 | nerrs++; |
481 | printf("[FAIL]\tGS is not prepared with nonzero\n"); | 476 | printf("[FAIL]\tGS is not prepared with nonzero\n"); |
482 | goto END; | 477 | goto END; |
@@ -494,16 +489,24 @@ static void test_ptrace_write_gsbase(void) | |||
494 | * selector value is changed or not by the GSBASE write in | 489 | * selector value is changed or not by the GSBASE write in |
495 | * a ptracer. | 490 | * a ptracer. |
496 | */ | 491 | */ |
497 | if (gs != 0x7) { | 492 | if (gs != *shared_scratch) { |
498 | nerrs++; | 493 | nerrs++; |
499 | printf("[FAIL]\tGS changed to %lx\n", gs); | 494 | printf("[FAIL]\tGS changed to %lx\n", gs); |
495 | |||
496 | /* | ||
497 | * On older kernels, poking a nonzero value into the | ||
498 | * base would zero the selector. On newer kernels, | ||
499 | * this behavior has changed -- poking the base | ||
500 | * changes only the base and, if FSGSBASE is not | ||
501 | * available, this may have no effect. | ||
502 | */ | ||
503 | if (gs == 0) | ||
504 | printf("\tNote: this is expected behavior on older kernels.\n"); | ||
500 | } else if (have_fsgsbase && (base != 0xFF)) { | 505 | } else if (have_fsgsbase && (base != 0xFF)) { |
501 | nerrs++; | 506 | nerrs++; |
502 | printf("[FAIL]\tGSBASE changed to %lx\n", base); | 507 | printf("[FAIL]\tGSBASE changed to %lx\n", base); |
503 | } else { | 508 | } else { |
504 | printf("[OK]\tGS remained 0x7 %s"); | 509 | printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : ""); |
505 | if (have_fsgsbase) | ||
506 | printf("and GSBASE changed to 0xFF"); | ||
507 | printf("\n"); | 510 | printf("\n"); |
508 | } | 511 | } |
509 | } | 512 | } |
@@ -516,6 +519,9 @@ int main() | |||
516 | { | 519 | { |
517 | pthread_t thread; | 520 | pthread_t thread; |
518 | 521 | ||
522 | shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, | ||
523 | MAP_ANONYMOUS | MAP_SHARED, -1, 0); | ||
524 | |||
519 | /* Probe FSGSBASE */ | 525 | /* Probe FSGSBASE */ |
520 | sethandler(SIGILL, sigill, 0); | 526 | sethandler(SIGILL, sigill, 0); |
521 | if (sigsetjmp(jmpbuf, 1) == 0) { | 527 | if (sigsetjmp(jmpbuf, 1) == 0) { |
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c index 4e25d38c8bbd..bc0ecc2e862e 100644 --- a/tools/testing/selftests/x86/syscall_arg_fault.c +++ b/tools/testing/selftests/x86/syscall_arg_fault.c | |||
@@ -15,9 +15,30 @@ | |||
15 | #include <setjmp.h> | 15 | #include <setjmp.h> |
16 | #include <errno.h> | 16 | #include <errno.h> |
17 | 17 | ||
18 | #ifdef __x86_64__ | ||
19 | # define WIDTH "q" | ||
20 | #else | ||
21 | # define WIDTH "l" | ||
22 | #endif | ||
23 | |||
18 | /* Our sigaltstack scratch space. */ | 24 | /* Our sigaltstack scratch space. */ |
19 | static unsigned char altstack_data[SIGSTKSZ]; | 25 | static unsigned char altstack_data[SIGSTKSZ]; |
20 | 26 | ||
27 | static unsigned long get_eflags(void) | ||
28 | { | ||
29 | unsigned long eflags; | ||
30 | asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); | ||
31 | return eflags; | ||
32 | } | ||
33 | |||
34 | static void set_eflags(unsigned long eflags) | ||
35 | { | ||
36 | asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH | ||
37 | : : "rm" (eflags) : "flags"); | ||
38 | } | ||
39 | |||
40 | #define X86_EFLAGS_TF (1UL << 8) | ||
41 | |||
21 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | 42 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), |
22 | int flags) | 43 | int flags) |
23 | { | 44 | { |
@@ -35,13 +56,22 @@ static sigjmp_buf jmpbuf; | |||
35 | 56 | ||
36 | static volatile sig_atomic_t n_errs; | 57 | static volatile sig_atomic_t n_errs; |
37 | 58 | ||
59 | #ifdef __x86_64__ | ||
60 | #define REG_AX REG_RAX | ||
61 | #define REG_IP REG_RIP | ||
62 | #else | ||
63 | #define REG_AX REG_EAX | ||
64 | #define REG_IP REG_EIP | ||
65 | #endif | ||
66 | |||
38 | static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void) | 67 | static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void) |
39 | { | 68 | { |
40 | ucontext_t *ctx = (ucontext_t*)ctx_void; | 69 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
70 | long ax = (long)ctx->uc_mcontext.gregs[REG_AX]; | ||
41 | 71 | ||
42 | if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) { | 72 | if (ax != -EFAULT && ax != -ENOSYS) { |
43 | printf("[FAIL]\tAX had the wrong value: 0x%x\n", | 73 | printf("[FAIL]\tAX had the wrong value: 0x%lx\n", |
44 | ctx->uc_mcontext.gregs[REG_EAX]); | 74 | (unsigned long)ax); |
45 | n_errs++; | 75 | n_errs++; |
46 | } else { | 76 | } else { |
47 | printf("[OK]\tSeems okay\n"); | 77 | printf("[OK]\tSeems okay\n"); |
@@ -50,9 +80,42 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void) | |||
50 | siglongjmp(jmpbuf, 1); | 80 | siglongjmp(jmpbuf, 1); |
51 | } | 81 | } |
52 | 82 | ||
83 | static volatile sig_atomic_t sigtrap_consecutive_syscalls; | ||
84 | |||
85 | static void sigtrap(int sig, siginfo_t *info, void *ctx_void) | ||
86 | { | ||
87 | /* | ||
88 | * KVM has some bugs that can cause us to stop making progress. | ||
89 | * detect them and complain, but don't infinite loop or fail the | ||
90 | * test. | ||
91 | */ | ||
92 | |||
93 | ucontext_t *ctx = (ucontext_t*)ctx_void; | ||
94 | unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP]; | ||
95 | |||
96 | if (*ip == 0x340f || *ip == 0x050f) { | ||
97 | /* The trap was on SYSCALL or SYSENTER */ | ||
98 | sigtrap_consecutive_syscalls++; | ||
99 | if (sigtrap_consecutive_syscalls > 3) { | ||
100 | printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n"); | ||
101 | siglongjmp(jmpbuf, 1); | ||
102 | } | ||
103 | } else { | ||
104 | sigtrap_consecutive_syscalls = 0; | ||
105 | } | ||
106 | } | ||
107 | |||
53 | static void sigill(int sig, siginfo_t *info, void *ctx_void) | 108 | static void sigill(int sig, siginfo_t *info, void *ctx_void) |
54 | { | 109 | { |
55 | printf("[SKIP]\tIllegal instruction\n"); | 110 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
111 | unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP]; | ||
112 | |||
113 | if (*ip == 0x0b0f) { | ||
114 | /* one of the ud2 instructions faulted */ | ||
115 | printf("[OK]\tSYSCALL returned normally\n"); | ||
116 | } else { | ||
117 | printf("[SKIP]\tIllegal instruction\n"); | ||
118 | } | ||
56 | siglongjmp(jmpbuf, 1); | 119 | siglongjmp(jmpbuf, 1); |
57 | } | 120 | } |
58 | 121 | ||
@@ -120,9 +183,48 @@ int main() | |||
120 | "movl $-1, %%ebp\n\t" | 183 | "movl $-1, %%ebp\n\t" |
121 | "movl $-1, %%esp\n\t" | 184 | "movl $-1, %%esp\n\t" |
122 | "syscall\n\t" | 185 | "syscall\n\t" |
123 | "pushl $0" /* make sure we segfault cleanly */ | 186 | "ud2" /* make sure we recover cleanly */ |
187 | : : : "memory", "flags"); | ||
188 | } | ||
189 | |||
190 | printf("[RUN]\tSYSENTER with TF and invalid state\n"); | ||
191 | sethandler(SIGTRAP, sigtrap, SA_ONSTACK); | ||
192 | |||
193 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
194 | sigtrap_consecutive_syscalls = 0; | ||
195 | set_eflags(get_eflags() | X86_EFLAGS_TF); | ||
196 | asm volatile ( | ||
197 | "movl $-1, %%eax\n\t" | ||
198 | "movl $-1, %%ebx\n\t" | ||
199 | "movl $-1, %%ecx\n\t" | ||
200 | "movl $-1, %%edx\n\t" | ||
201 | "movl $-1, %%esi\n\t" | ||
202 | "movl $-1, %%edi\n\t" | ||
203 | "movl $-1, %%ebp\n\t" | ||
204 | "movl $-1, %%esp\n\t" | ||
205 | "sysenter" | ||
206 | : : : "memory", "flags"); | ||
207 | } | ||
208 | set_eflags(get_eflags() & ~X86_EFLAGS_TF); | ||
209 | |||
210 | printf("[RUN]\tSYSCALL with TF and invalid state\n"); | ||
211 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
212 | sigtrap_consecutive_syscalls = 0; | ||
213 | set_eflags(get_eflags() | X86_EFLAGS_TF); | ||
214 | asm volatile ( | ||
215 | "movl $-1, %%eax\n\t" | ||
216 | "movl $-1, %%ebx\n\t" | ||
217 | "movl $-1, %%ecx\n\t" | ||
218 | "movl $-1, %%edx\n\t" | ||
219 | "movl $-1, %%esi\n\t" | ||
220 | "movl $-1, %%edi\n\t" | ||
221 | "movl $-1, %%ebp\n\t" | ||
222 | "movl $-1, %%esp\n\t" | ||
223 | "syscall\n\t" | ||
224 | "ud2" /* make sure we recover cleanly */ | ||
124 | : : : "memory", "flags"); | 225 | : : : "memory", "flags"); |
125 | } | 226 | } |
227 | set_eflags(get_eflags() & ~X86_EFLAGS_TF); | ||
126 | 228 | ||
127 | return 0; | 229 | return 0; |
128 | } | 230 | } |