aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRicardo Neri <ricardo.neri-calderon@linux.intel.com>2017-11-05 21:27:52 -0500
committerIngo Molnar <mingo@kernel.org>2017-11-08 05:16:22 -0500
commit1e5db223696afa55e6a038fac638f759e1fdcc01 (patch)
tree18b0e3914a62787f2c07e6183bbd999147119fe1
parent3522c2a6a4f341058b8291326a945e2a2d2aaf55 (diff)
x86/umip: Add emulation code for UMIP instructions
The feature User-Mode Instruction Prevention present in recent Intel processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str) from being executed with CPL > 0. Otherwise, a general protection fault is issued. Rather than relaying to the user space the general protection fault caused by the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be trapped and the instruction emulated to provide a dummy result. This allows to both conserve the current kernel behavior and not reveal the system resources that UMIP intends to protect (i.e., the locations of the global descriptor and interrupt descriptor tables, the segment selectors of the local descriptor table, the value of the task state register and the contents of the CR0 register). This emulation is needed because certain applications (e.g., WineHQ and DOSEMU2) rely on this subset of instructions to function. Given that sldt and str are not commonly used in programs that run on WineHQ or DOSEMU2, they are not emulated. Also, emulation is provided only for 32-bit processes; 64-bit processes that attempt to use the instructions that UMIP protects will receive the SIGSEGV signal issued as a consequence of the general protection fault. The instructions protected by UMIP can be split in two groups. Those which return a kernel memory address (sgdt and sidt) and those which return a value (smsw, sldt and str; the last two not emulated). For the instructions that return a kernel memory address, applications such as WineHQ rely on the result being located in the kernel memory space, not the actual location of the table. The result is emulated as a hard-coded value that lies close to the top of the kernel memory. The limit for the GDT and the IDT are set to zero. The instruction smsw is emulated to return the value that the register CR0 has at boot time as set in the head_32. Care is taken to appropriately emulate the results when segmentation is used. That is, rather than relying on USER_DS and USER_CS, the function insn_get_addr_ref() inspects the segment descriptor pointed by the registers in pt_regs. This ensures that we correctly obtain the segment base address and the address and operand sizes even if the user space application uses a local descriptor table. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bp@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Chen Yucong <slaoub@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Huang Rui <ray.huang@amd.com> Cc: Jiri Slaby <jslaby@suse.cz> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi V. Shankar <ravi.v.shankar@intel.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Tony Luck <tony.luck@intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: ricardo.neri@intel.com Link: http://lkml.kernel.org/r/1509935277-22138-8-git-send-email-ricardo.neri-calderon@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/asm/umip.h12
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/umip.c321
3 files changed, 334 insertions, 0 deletions
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h
new file mode 100644
index 000000000000..db43f2a0d92c
--- /dev/null
+++ b/arch/x86/include/asm/umip.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_UMIP_H
2#define _ASM_X86_UMIP_H
3
4#include <linux/types.h>
5#include <asm/ptrace.h>
6
7#ifdef CONFIG_X86_INTEL_UMIP
8bool fixup_umip_exception(struct pt_regs *regs);
9#else
10static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; }
11#endif /* CONFIG_X86_INTEL_UMIP */
12#endif /* _ASM_X86_UMIP_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 295abaa58add..81bb565f4497 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -127,6 +127,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
127obj-$(CONFIG_PERF_EVENTS) += perf_regs.o 127obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
128obj-$(CONFIG_TRACING) += tracepoint.o 128obj-$(CONFIG_TRACING) += tracepoint.o
129obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o 129obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
130obj-$(CONFIG_X86_INTEL_UMIP) += umip.o
130 131
131obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o 132obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
132obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o 133obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
new file mode 100644
index 000000000000..d80b816228ea
--- /dev/null
+++ b/arch/x86/kernel/umip.c
@@ -0,0 +1,321 @@
1/*
2 * umip.c Emulation for instruction protected by the Intel User-Mode
3 * Instruction Prevention feature
4 *
5 * Copyright (c) 2017, Intel Corporation.
6 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
7 */
8
9#include <linux/uaccess.h>
10#include <asm/umip.h>
11#include <asm/traps.h>
12#include <asm/insn.h>
13#include <asm/insn-eval.h>
14
15/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
16 *
17 * The feature User-Mode Instruction Prevention present in recent Intel
18 * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
19 * from being executed with CPL > 0. Otherwise, a general protection fault is
20 * issued.
21 *
22 * Rather than relaying to the user space the general protection fault caused by
23 * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
24 * trapped and emulate the result of such instructions to provide dummy values.
25 * This allows to both conserve the current kernel behavior and not reveal the
26 * system resources that UMIP intends to protect (i.e., the locations of the
27 * global descriptor and interrupt descriptor tables, the segment selectors of
28 * the local descriptor table, the value of the task state register and the
29 * contents of the CR0 register).
30 *
31 * This emulation is needed because certain applications (e.g., WineHQ and
32 * DOSEMU2) rely on this subset of instructions to function.
33 *
34 * The instructions protected by UMIP can be split in two groups. Those which
35 * return a kernel memory address (sgdt and sidt) and those which return a
36 * value (sldt, str and smsw).
37 *
38 * For the instructions that return a kernel memory address, applications
39 * such as WineHQ rely on the result being located in the kernel memory space,
40 * not the actual location of the table. The result is emulated as a hard-coded
41 * value that, lies close to the top of the kernel memory. The limit for the GDT
42 * and the IDT are set to zero.
43 *
44 * Given that sldt and str are not commonly used in programs that run on WineHQ
45 * or DOSEMU2, they are not emulated.
46 *
47 * The instruction smsw is emulated to return the value that the register CR0
48 * has at boot time as set in the head_32.
49 *
50 * Also, emulation is provided only for 32-bit processes; 64-bit processes
51 * that attempt to use the instructions that UMIP protects will receive the
52 * SIGSEGV signal issued as a consequence of the general protection fault.
53 *
54 * Care is taken to appropriately emulate the results when segmentation is
55 * used. That is, rather than relying on USER_DS and USER_CS, the function
56 * insn_get_addr_ref() inspects the segment descriptor pointed by the
57 * registers in pt_regs. This ensures that we correctly obtain the segment
58 * base address and the address and operand sizes even if the user space
59 * application uses a local descriptor table.
60 */
61
62#define UMIP_DUMMY_GDT_BASE 0xfffe0000
63#define UMIP_DUMMY_IDT_BASE 0xffff0000
64
65/*
66 * The SGDT and SIDT instructions store the contents of the global descriptor
67 * table and interrupt table registers, respectively. The destination is a
68 * memory operand of X+2 bytes. X bytes are used to store the base address of
69 * the table and 2 bytes are used to store the limit. In 32-bit processes, the
70 * only processes for which emulation is provided, X has a value of 4.
71 */
72#define UMIP_GDT_IDT_BASE_SIZE 4
73#define UMIP_GDT_IDT_LIMIT_SIZE 2
74
75#define UMIP_INST_SGDT 0 /* 0F 01 /0 */
76#define UMIP_INST_SIDT 1 /* 0F 01 /1 */
77#define UMIP_INST_SMSW 3 /* 0F 01 /4 */
78
79/**
80 * identify_insn() - Identify a UMIP-protected instruction
81 * @insn: Instruction structure with opcode and ModRM byte.
82 *
83 * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected
84 * instruction that can be emulated.
85 *
86 * Returns:
87 *
88 * On success, a constant identifying a specific UMIP-protected instruction that
89 * can be emulated.
90 *
91 * -EINVAL on error or when not an UMIP-protected instruction that can be
92 * emulated.
93 */
94static int identify_insn(struct insn *insn)
95{
96 /* By getting modrm we also get the opcode. */
97 insn_get_modrm(insn);
98
99 if (!insn->modrm.nbytes)
100 return -EINVAL;
101
102 /* All the instructions of interest start with 0x0f. */
103 if (insn->opcode.bytes[0] != 0xf)
104 return -EINVAL;
105
106 if (insn->opcode.bytes[1] == 0x1) {
107 switch (X86_MODRM_REG(insn->modrm.value)) {
108 case 0:
109 return UMIP_INST_SGDT;
110 case 1:
111 return UMIP_INST_SIDT;
112 case 4:
113 return UMIP_INST_SMSW;
114 default:
115 return -EINVAL;
116 }
117 }
118
119 /* SLDT AND STR are not emulated */
120 return -EINVAL;
121}
122
123/**
124 * emulate_umip_insn() - Emulate UMIP instructions and return dummy values
125 * @insn: Instruction structure with operands
126 * @umip_inst: A constant indicating the instruction to emulate
127 * @data: Buffer into which the dummy result is stored
128 * @data_size: Size of the emulated result
129 *
130 * Emulate an instruction protected by UMIP and provide a dummy result. The
131 * result of the emulation is saved in @data. The size of the results depends
132 * on both the instruction and type of operand (register vs memory address).
133 * The size of the result is updated in @data_size. Caller is responsible
134 * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE +
135 * UMIP_GDT_IDT_LIMIT_SIZE bytes.
136 *
137 * Returns:
138 *
139 * 0 on success, -EINVAL on error while emulating.
140 */
141static int emulate_umip_insn(struct insn *insn, int umip_inst,
142 unsigned char *data, int *data_size)
143{
144 unsigned long dummy_base_addr, dummy_value;
145 unsigned short dummy_limit = 0;
146
147 if (!data || !data_size || !insn)
148 return -EINVAL;
149 /*
150 * These two instructions return the base address and limit of the
151 * global and interrupt descriptor table, respectively. According to the
152 * Intel Software Development manual, the base address can be 24-bit,
153 * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is
154 * 16-bit, the returned value of the base address is supposed to be a
155 * zero-extended 24-byte number. However, it seems that a 32-byte number
156 * is always returned irrespective of the operand size.
157 */
158 if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
159 /* SGDT and SIDT do not use registers operands. */
160 if (X86_MODRM_MOD(insn->modrm.value) == 3)
161 return -EINVAL;
162
163 if (umip_inst == UMIP_INST_SGDT)
164 dummy_base_addr = UMIP_DUMMY_GDT_BASE;
165 else
166 dummy_base_addr = UMIP_DUMMY_IDT_BASE;
167
168 *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
169
170 memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
171 memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
172
173 } else if (umip_inst == UMIP_INST_SMSW) {
174 dummy_value = CR0_STATE;
175
176 /*
177 * Even though the CR0 register has 4 bytes, the number
178 * of bytes to be copied in the result buffer is determined
179 * by whether the operand is a register or a memory location.
180 * If operand is a register, return as many bytes as the operand
181 * size. If operand is memory, return only the two least
182 * siginificant bytes of CR0.
183 */
184 if (X86_MODRM_MOD(insn->modrm.value) == 3)
185 *data_size = insn->opnd_bytes;
186 else
187 *data_size = 2;
188
189 memcpy(data, &dummy_value, *data_size);
190 /* STR and SLDT are not emulated */
191 } else {
192 return -EINVAL;
193 }
194
195 return 0;
196}
197
198/**
199 * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
200 * @regs: Registers as saved when entering the #GP handler
201 *
202 * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
203 * fault if executed with CPL > 0 (i.e., from user space). If the offending
204 * user-space process is not in long mode, this function fixes the exception
205 * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
206 * fixed up. Also long mode user-space processes are not fixed up.
207 *
208 * If operands are memory addresses, results are copied to user-space memory as
209 * indicated by the instruction pointed by eIP using the registers indicated in
210 * the instruction operands. If operands are registers, results are copied into
211 * the context that was saved when entering kernel mode.
212 *
213 * Returns:
214 *
215 * True if emulation was successful; false if not.
216 */
217bool fixup_umip_exception(struct pt_regs *regs)
218{
219 int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst;
220 unsigned long seg_base = 0, *reg_addr;
221 /* 10 bytes is the maximum size of the result of UMIP instructions */
222 unsigned char dummy_data[10] = { 0 };
223 unsigned char buf[MAX_INSN_SIZE];
224 void __user *uaddr;
225 struct insn insn;
226 char seg_defs;
227
228 if (!regs)
229 return false;
230
231 /* Do not emulate 64-bit processes. */
232 if (user_64bit_mode(regs))
233 return false;
234
235 /*
236 * If not in user-space long mode, a custom code segment could be in
237 * use. This is true in protected mode (if the process defined a local
238 * descriptor table), or virtual-8086 mode. In most of the cases
239 * seg_base will be zero as in USER_CS.
240 */
241 if (!user_64bit_mode(regs))
242 seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
243
244 if (seg_base == -1L)
245 return false;
246
247 not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
248 sizeof(buf));
249 nr_copied = sizeof(buf) - not_copied;
250
251 /*
252 * The copy_from_user above could have failed if user code is protected
253 * by a memory protection key. Give up on emulation in such a case.
254 * Should we issue a page fault?
255 */
256 if (!nr_copied)
257 return false;
258
259 insn_init(&insn, buf, nr_copied, user_64bit_mode(regs));
260
261 /*
262 * Override the default operand and address sizes with what is specified
263 * in the code segment descriptor. The instruction decoder only sets
264 * the address size it to either 4 or 8 address bytes and does nothing
265 * for the operand bytes. This OK for most of the cases, but we could
266 * have special cases where, for instance, a 16-bit code segment
267 * descriptor is used.
268 * If there is an address override prefix, the instruction decoder
269 * correctly updates these values, even for 16-bit defaults.
270 */
271 seg_defs = insn_get_code_seg_params(regs);
272 if (seg_defs == -EINVAL)
273 return false;
274
275 insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
276 insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
277
278 insn_get_length(&insn);
279 if (nr_copied < insn.length)
280 return false;
281
282 umip_inst = identify_insn(&insn);
283 if (umip_inst < 0)
284 return false;
285
286 if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
287 return false;
288
289 /*
290 * If operand is a register, write result to the copy of the register
291 * value that was pushed to the stack when entering into kernel mode.
292 * Upon exit, the value we write will be restored to the actual hardware
293 * register.
294 */
295 if (X86_MODRM_MOD(insn.modrm.value) == 3) {
296 reg_offset = insn_get_modrm_rm_off(&insn, regs);
297
298 /*
299 * Negative values are usually errors. In memory addressing,
300 * the exception is -EDOM. Since we expect a register operand,
301 * all negative values are errors.
302 */
303 if (reg_offset < 0)
304 return false;
305
306 reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
307 memcpy(reg_addr, dummy_data, dummy_data_size);
308 } else {
309 uaddr = insn_get_addr_ref(&insn, regs);
310 if ((unsigned long)uaddr == -1L)
311 return false;
312
313 nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
314 if (nr_copied > 0)
315 return false;
316 }
317
318 /* increase IP to let the program keep going */
319 regs->ip += insn.length;
320 return true;
321}