aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-03 21:59:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-03 21:59:10 -0500
commita75a3f6fc92888e4119744d8594ffdf748c3d444 (patch)
tree06c344beb369b1067a5621a175fae04508ba8d0d
parentd2bea739f8b41d620c235d81e00289d01169dc3c (diff)
parent3bd29515d1cad26fa85a1a9b442de8816c1f5c54 (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "The main change in this cycle is another step in the big x86 system call interface rework by Andy Lutomirski, which moves most of the low level x86 entry code from assembly to C, for all syscall entries except native 64-bit system calls: arch/x86/entry/entry_32.S | 182 ++++------ arch/x86/entry/entry_64_compat.S | 547 ++++++++----------------------- 194 insertions(+), 535 deletions(-) ... our hope is that the final remaining step (converting native 64-bit system calls) will be less painful as all the previous steps, given that most of the legacies and quirks are concentrated around native 32-bit and compat environments" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits) x86/entry/32: Fix FS and GS restore in opportunistic SYSEXIT x86/entry/32: Fix entry_INT80_32() to expect interrupts to be on um/x86: Fix build after x86 syscall changes x86/asm: Remove the xyz_cfi macros from dwarf2.h selftests/x86: Style fixes for the 'unwind_vdso' test x86/entry/64/compat: Document sysenter_fix_flags's reason for existence x86/entry: Split and inline syscall_return_slowpath() x86/entry: Split and inline prepare_exit_to_usermode() x86/entry: Use pt_regs_to_thread_info() in syscall entry tracing x86/entry: Hide two syscall entry assertions behind CONFIG_DEBUG_ENTRY x86/entry: Micro-optimize compat fast syscall arg fetch x86/entry: Force inlining of 32-bit syscall code x86/entry: Make irqs_disabled checks in exit code depend on lockdep x86/entry: Remove unnecessary IRQ twiddling in fast 32-bit syscalls x86/asm: Remove thread_info.sysenter_return x86/entry/32: Re-implement SYSENTER using the new C path x86/entry/32: Switch INT80 to the new C syscall path x86/entry/32: Open-code return tracking from fork and kthreads x86/entry/compat: Implement opportunistic SYSRETL for compat syscalls x86/vdso/compat: Wire up SYSENTER and SYSCSALL for compat userspace ...
-rw-r--r--arch/x86/Kconfig49
-rw-r--r--arch/x86/Makefile10
-rw-r--r--arch/x86/entry/common.c264
-rw-r--r--arch/x86/entry/entry_32.S182
-rw-r--r--arch/x86/entry/entry_64.S9
-rw-r--r--arch/x86/entry/entry_64_compat.S547
-rw-r--r--arch/x86/entry/syscall_32.c9
-rw-r--r--arch/x86/entry/syscall_64.c4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl12
-rw-r--r--arch/x86/entry/vdso/Makefile39
-rw-r--r--arch/x86/entry/vdso/vdso2c.c2
-rw-r--r--arch/x86/entry/vdso/vdso32-setup.c28
-rw-r--r--arch/x86/entry/vdso/vdso32/int80.S56
-rw-r--r--arch/x86/entry/vdso/vdso32/syscall.S75
-rw-r--r--arch/x86/entry/vdso/vdso32/sysenter.S116
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S57
-rw-r--r--arch/x86/entry/vdso/vma.c13
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c9
-rw-r--r--arch/x86/ia32/ia32_signal.c4
-rw-r--r--arch/x86/include/asm/dwarf2.h84
-rw-r--r--arch/x86/include/asm/elf.h10
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/switch_to.h12
-rw-r--r--arch/x86/include/asm/syscall.h14
-rw-r--r--arch/x86/include/asm/thread_info.h1
-rw-r--r--arch/x86/include/asm/uaccess.h14
-rw-r--r--arch/x86/include/asm/vdso.h10
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/um/asm/syscall.h4
-rw-r--r--arch/x86/um/sys_call_table_32.c7
-rw-r--r--arch/x86/um/sys_call_table_64.c7
-rw-r--r--arch/x86/xen/setup.c13
-rw-r--r--tools/testing/selftests/x86/Makefile6
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c294
-rw-r--r--tools/testing/selftests/x86/raw_syscall_helper_32.S46
-rw-r--r--tools/testing/selftests/x86/test_syscall_vdso.c401
-rw-r--r--tools/testing/selftests/x86/thunks_32.S55
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c211
39 files changed, 1709 insertions, 976 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 255ea22ccbec..db3622f22b61 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2027,6 +2027,55 @@ config COMPAT_VDSO
2027 If unsure, say N: if you are compiling your own kernel, you 2027 If unsure, say N: if you are compiling your own kernel, you
2028 are unlikely to be using a buggy version of glibc. 2028 are unlikely to be using a buggy version of glibc.
2029 2029
2030choice
2031 prompt "vsyscall table for legacy applications"
2032 depends on X86_64
2033 default LEGACY_VSYSCALL_EMULATE
2034 help
2035 Legacy user code that does not know how to find the vDSO expects
2036 to be able to issue three syscalls by calling fixed addresses in
2037 kernel space. Since this location is not randomized with ASLR,
2038 it can be used to assist security vulnerability exploitation.
2039
2040 This setting can be changed at boot time via the kernel command
2041 line parameter vsyscall=[native|emulate|none].
2042
2043 On a system with recent enough glibc (2.14 or newer) and no
2044 static binaries, you can say None without a performance penalty
2045 to improve security.
2046
2047 If unsure, select "Emulate".
2048
2049 config LEGACY_VSYSCALL_NATIVE
2050 bool "Native"
2051 help
2052 Actual executable code is located in the fixed vsyscall
2053 address mapping, implementing time() efficiently. Since
2054 this makes the mapping executable, it can be used during
2055 security vulnerability exploitation (traditionally as
2056 ROP gadgets). This configuration is not recommended.
2057
2058 config LEGACY_VSYSCALL_EMULATE
2059 bool "Emulate"
2060 help
2061 The kernel traps and emulates calls into the fixed
2062 vsyscall address mapping. This makes the mapping
2063 non-executable, but it still contains known contents,
2064 which could be used in certain rare security vulnerability
2065 exploits. This configuration is recommended when userspace
2066 still uses the vsyscall area.
2067
2068 config LEGACY_VSYSCALL_NONE
2069 bool "None"
2070 help
2071 There will be no vsyscall mapping at all. This will
2072 eliminate any risk of ASLR bypass due to the vsyscall
2073 fixed address mapping. Attempts to use the vsyscalls
2074 will be reported to dmesg, so that either old or
2075 malicious userspace programs can be identified.
2076
2077endchoice
2078
2030config CMDLINE_BOOL 2079config CMDLINE_BOOL
2031 bool "Built-in kernel command line" 2080 bool "Built-in kernel command line"
2032 ---help--- 2081 ---help---
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 747860c696e1..2dfaa72260b4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -159,6 +159,12 @@ endif
159sp-$(CONFIG_X86_32) := esp 159sp-$(CONFIG_X86_32) := esp
160sp-$(CONFIG_X86_64) := rsp 160sp-$(CONFIG_X86_64) := rsp
161 161
162# do binutils support CFI?
163cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
164# is .cfi_signal_frame supported too?
165cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
166cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
167
162# does binutils support specific instructions? 168# does binutils support specific instructions?
163asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) 169asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
164asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) 170asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
@@ -166,8 +172,8 @@ asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
166avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) 172avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
167avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) 173avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
168 174
169KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) 175KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
170KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) 176KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
171 177
172LDFLAGS := -m elf_$(UTS_MACHINE) 178LDFLAGS := -m elf_$(UTS_MACHINE)
173 179
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 80dcc9261ca3..a89fdbc1f0be 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -24,10 +24,19 @@
24 24
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/traps.h> 26#include <asm/traps.h>
27#include <asm/vdso.h>
28#include <asm/uaccess.h>
27 29
28#define CREATE_TRACE_POINTS 30#define CREATE_TRACE_POINTS
29#include <trace/events/syscalls.h> 31#include <trace/events/syscalls.h>
30 32
33static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
34{
35 unsigned long top_of_stack =
36 (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
37 return (struct thread_info *)(top_of_stack - THREAD_SIZE);
38}
39
31#ifdef CONFIG_CONTEXT_TRACKING 40#ifdef CONFIG_CONTEXT_TRACKING
32/* Called on entry from user mode with IRQs off. */ 41/* Called on entry from user mode with IRQs off. */
33__visible void enter_from_user_mode(void) 42__visible void enter_from_user_mode(void)
@@ -66,13 +75,14 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
66 */ 75 */
67unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) 76unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
68{ 77{
78 struct thread_info *ti = pt_regs_to_thread_info(regs);
69 unsigned long ret = 0; 79 unsigned long ret = 0;
70 u32 work; 80 u32 work;
71 81
72 BUG_ON(regs != task_pt_regs(current)); 82 if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
83 BUG_ON(regs != task_pt_regs(current));
73 84
74 work = ACCESS_ONCE(current_thread_info()->flags) & 85 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
75 _TIF_WORK_SYSCALL_ENTRY;
76 86
77#ifdef CONFIG_CONTEXT_TRACKING 87#ifdef CONFIG_CONTEXT_TRACKING
78 /* 88 /*
@@ -154,11 +164,12 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
154long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, 164long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
155 unsigned long phase1_result) 165 unsigned long phase1_result)
156{ 166{
167 struct thread_info *ti = pt_regs_to_thread_info(regs);
157 long ret = 0; 168 long ret = 0;
158 u32 work = ACCESS_ONCE(current_thread_info()->flags) & 169 u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
159 _TIF_WORK_SYSCALL_ENTRY;
160 170
161 BUG_ON(regs != task_pt_regs(current)); 171 if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
172 BUG_ON(regs != task_pt_regs(current));
162 173
163 /* 174 /*
164 * If we stepped into a sysenter/syscall insn, it trapped in 175 * If we stepped into a sysenter/syscall insn, it trapped in
@@ -207,19 +218,12 @@ long syscall_trace_enter(struct pt_regs *regs)
207 return syscall_trace_enter_phase2(regs, arch, phase1_result); 218 return syscall_trace_enter_phase2(regs, arch, phase1_result);
208} 219}
209 220
210static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) 221#define EXIT_TO_USERMODE_LOOP_FLAGS \
211{ 222 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
212 unsigned long top_of_stack = 223 _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
213 (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
214 return (struct thread_info *)(top_of_stack - THREAD_SIZE);
215}
216 224
217/* Called with IRQs disabled. */ 225static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
218__visible void prepare_exit_to_usermode(struct pt_regs *regs)
219{ 226{
220 if (WARN_ON(!irqs_disabled()))
221 local_irq_disable();
222
223 /* 227 /*
224 * In order to return to user mode, we need to have IRQs off with 228 * In order to return to user mode, we need to have IRQs off with
225 * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY, 229 * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY,
@@ -229,14 +233,6 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
229 * work to clear some of the flags can sleep. 233 * work to clear some of the flags can sleep.
230 */ 234 */
231 while (true) { 235 while (true) {
232 u32 cached_flags =
233 READ_ONCE(pt_regs_to_thread_info(regs)->flags);
234
235 if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |
236 _TIF_UPROBE | _TIF_NEED_RESCHED |
237 _TIF_USER_RETURN_NOTIFY)))
238 break;
239
240 /* We have work to do. */ 236 /* We have work to do. */
241 local_irq_enable(); 237 local_irq_enable();
242 238
@@ -260,50 +256,81 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
260 256
261 /* Disable IRQs and retry */ 257 /* Disable IRQs and retry */
262 local_irq_disable(); 258 local_irq_disable();
259
260 cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
261
262 if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
263 break;
264
263 } 265 }
266}
267
268/* Called with IRQs disabled. */
269__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
270{
271 u32 cached_flags;
272
273 if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
274 local_irq_disable();
275
276 lockdep_sys_exit();
277
278 cached_flags =
279 READ_ONCE(pt_regs_to_thread_info(regs)->flags);
280
281 if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
282 exit_to_usermode_loop(regs, cached_flags);
264 283
265 user_enter(); 284 user_enter();
266} 285}
267 286
287#define SYSCALL_EXIT_WORK_FLAGS \
288 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
289 _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
290
291static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
292{
293 bool step;
294
295 audit_syscall_exit(regs);
296
297 if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
298 trace_sys_exit(regs, regs->ax);
299
300 /*
301 * If TIF_SYSCALL_EMU is set, we only get here because of
302 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
303 * We already reported this syscall instruction in
304 * syscall_trace_enter().
305 */
306 step = unlikely(
307 (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
308 == _TIF_SINGLESTEP);
309 if (step || cached_flags & _TIF_SYSCALL_TRACE)
310 tracehook_report_syscall_exit(regs, step);
311}
312
268/* 313/*
269 * Called with IRQs on and fully valid regs. Returns with IRQs off in a 314 * Called with IRQs on and fully valid regs. Returns with IRQs off in a
270 * state such that we can immediately switch to user mode. 315 * state such that we can immediately switch to user mode.
271 */ 316 */
272__visible void syscall_return_slowpath(struct pt_regs *regs) 317__visible inline void syscall_return_slowpath(struct pt_regs *regs)
273{ 318{
274 struct thread_info *ti = pt_regs_to_thread_info(regs); 319 struct thread_info *ti = pt_regs_to_thread_info(regs);
275 u32 cached_flags = READ_ONCE(ti->flags); 320 u32 cached_flags = READ_ONCE(ti->flags);
276 bool step;
277 321
278 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 322 CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
279 323
280 if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled", 324 if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
281 regs->orig_ax)) 325 WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
282 local_irq_enable(); 326 local_irq_enable();
283 327
284 /* 328 /*
285 * First do one-time work. If these work items are enabled, we 329 * First do one-time work. If these work items are enabled, we
286 * want to run them exactly once per syscall exit with IRQs on. 330 * want to run them exactly once per syscall exit with IRQs on.
287 */ 331 */
288 if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | 332 if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
289 _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) { 333 syscall_slow_exit_work(regs, cached_flags);
290 audit_syscall_exit(regs);
291
292 if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
293 trace_sys_exit(regs, regs->ax);
294
295 /*
296 * If TIF_SYSCALL_EMU is set, we only get here because of
297 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
298 * We already reported this syscall instruction in
299 * syscall_trace_enter().
300 */
301 step = unlikely(
302 (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
303 == _TIF_SINGLESTEP);
304 if (step || cached_flags & _TIF_SYSCALL_TRACE)
305 tracehook_report_syscall_exit(regs, step);
306 }
307 334
308#ifdef CONFIG_COMPAT 335#ifdef CONFIG_COMPAT
309 /* 336 /*
@@ -316,3 +343,144 @@ __visible void syscall_return_slowpath(struct pt_regs *regs)
316 local_irq_disable(); 343 local_irq_disable();
317 prepare_exit_to_usermode(regs); 344 prepare_exit_to_usermode(regs);
318} 345}
346
347#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
348/*
349 * Does a 32-bit syscall. Called with IRQs on and does all entry and
350 * exit work and returns with IRQs off. This function is extremely hot
351 * in workloads that use it, and it's usually called from
352 * do_fast_syscall_32, so forcibly inline it to improve performance.
353 */
354#ifdef CONFIG_X86_32
355/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
356__visible
357#else
358/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
359static
360#endif
361__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
362{
363 struct thread_info *ti = pt_regs_to_thread_info(regs);
364 unsigned int nr = (unsigned int)regs->orig_ax;
365
366#ifdef CONFIG_IA32_EMULATION
367 ti->status |= TS_COMPAT;
368#endif
369
370 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
371 /*
372 * Subtlety here: if ptrace pokes something larger than
373 * 2^32-1 into orig_ax, this truncates it. This may or
374 * may not be necessary, but it matches the old asm
375 * behavior.
376 */
377 nr = syscall_trace_enter(regs);
378 }
379
380 if (likely(nr < IA32_NR_syscalls)) {
381 /*
382 * It's possible that a 32-bit syscall implementation
383 * takes a 64-bit parameter but nonetheless assumes that
384 * the high bits are zero. Make sure we zero-extend all
385 * of the args.
386 */
387 regs->ax = ia32_sys_call_table[nr](
388 (unsigned int)regs->bx, (unsigned int)regs->cx,
389 (unsigned int)regs->dx, (unsigned int)regs->si,
390 (unsigned int)regs->di, (unsigned int)regs->bp);
391 }
392
393 syscall_return_slowpath(regs);
394}
395
396#ifdef CONFIG_X86_64
397/* Handles INT80 on 64-bit kernels */
398__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
399{
400 local_irq_enable();
401 do_syscall_32_irqs_on(regs);
402}
403#endif
404
405/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
406__visible long do_fast_syscall_32(struct pt_regs *regs)
407{
408 /*
409 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
410 * convention. Adjust regs so it looks like we entered using int80.
411 */
412
413 unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
414 vdso_image_32.sym_int80_landing_pad;
415
416 /*
417 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
418 * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
419 * Fix it up.
420 */
421 regs->ip = landing_pad;
422
423 /*
424 * Fetch ECX from where the vDSO stashed it.
425 *
426 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
427 */
428 local_irq_enable();
429 if (
430#ifdef CONFIG_X86_64
431 /*
432 * Micro-optimization: the pointer we're following is explicitly
433 * 32 bits, so it can't be out of range.
434 */
435 __get_user(*(u32 *)&regs->cx,
436 (u32 __user __force *)(unsigned long)(u32)regs->sp)
437#else
438 get_user(*(u32 *)&regs->cx,
439 (u32 __user __force *)(unsigned long)(u32)regs->sp)
440#endif
441 ) {
442
443 /* User code screwed up. */
444 local_irq_disable();
445 regs->ax = -EFAULT;
446#ifdef CONFIG_CONTEXT_TRACKING
447 enter_from_user_mode();
448#endif
449 prepare_exit_to_usermode(regs);
450 return 0; /* Keep it simple: use IRET. */
451 }
452
453 /* Now this is just like a normal syscall. */
454 do_syscall_32_irqs_on(regs);
455
456#ifdef CONFIG_X86_64
457 /*
458 * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
459 * SYSRETL is available on all 64-bit CPUs, so we don't need to
460 * bother with SYSEXIT.
461 *
462 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
463 * because the ECX fixup above will ensure that this is essentially
464 * never the case.
465 */
466 return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
467 regs->ip == landing_pad &&
468 (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
469#else
470 /*
471 * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
472 *
473 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
474 * because the ECX fixup above will ensure that this is essentially
475 * never the case.
476 *
477 * We don't allow syscalls at all from VM86 mode, but we still
478 * need to check VM, because we might be returning from sys_vm86.
479 */
480 return static_cpu_has(X86_FEATURE_SEP) &&
481 regs->cs == __USER_CS && regs->ss == __USER_DS &&
482 regs->ip == landing_pad &&
483 (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
484#endif
485}
486#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index b2909bf8cf70..3eb572ed3d7a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -3,7 +3,7 @@
3 * 3 *
4 * entry_32.S contains the system-call and low-level fault and trap handling routines. 4 * entry_32.S contains the system-call and low-level fault and trap handling routines.
5 * 5 *
6 * Stack layout in 'syscall_exit': 6 * Stack layout while running C code:
7 * ptrace needs to have all registers on the stack. 7 * ptrace needs to have all registers on the stack.
8 * If the order here is changed, it needs to be 8 * If the order here is changed, it needs to be
9 * updated in fork.c:copy_process(), signal.c:do_signal(), 9 * updated in fork.c:copy_process(), signal.c:do_signal(),
@@ -153,13 +153,13 @@
153 153
154#endif /* CONFIG_X86_32_LAZY_GS */ 154#endif /* CONFIG_X86_32_LAZY_GS */
155 155
156.macro SAVE_ALL 156.macro SAVE_ALL pt_regs_ax=%eax
157 cld 157 cld
158 PUSH_GS 158 PUSH_GS
159 pushl %fs 159 pushl %fs
160 pushl %es 160 pushl %es
161 pushl %ds 161 pushl %ds
162 pushl %eax 162 pushl \pt_regs_ax
163 pushl %ebp 163 pushl %ebp
164 pushl %edi 164 pushl %edi
165 pushl %esi 165 pushl %esi
@@ -211,7 +211,11 @@ ENTRY(ret_from_fork)
211 popl %eax 211 popl %eax
212 pushl $0x0202 # Reset kernel eflags 212 pushl $0x0202 # Reset kernel eflags
213 popfl 213 popfl
214 jmp syscall_exit 214
215 /* When we fork, we trace the syscall return in the child, too. */
216 movl %esp, %eax
217 call syscall_return_slowpath
218 jmp restore_all
215END(ret_from_fork) 219END(ret_from_fork)
216 220
217ENTRY(ret_from_kernel_thread) 221ENTRY(ret_from_kernel_thread)
@@ -224,7 +228,15 @@ ENTRY(ret_from_kernel_thread)
224 movl PT_EBP(%esp), %eax 228 movl PT_EBP(%esp), %eax
225 call *PT_EBX(%esp) 229 call *PT_EBX(%esp)
226 movl $0, PT_EAX(%esp) 230 movl $0, PT_EAX(%esp)
227 jmp syscall_exit 231
232 /*
233 * Kernel threads return to userspace as if returning from a syscall.
234 * We should check whether anything actually uses this path and, if so,
235 * consider switching it over to ret_from_fork.
236 */
237 movl %esp, %eax
238 call syscall_return_slowpath
239 jmp restore_all
228ENDPROC(ret_from_kernel_thread) 240ENDPROC(ret_from_kernel_thread)
229 241
230/* 242/*
@@ -255,7 +267,6 @@ ret_from_intr:
255 jb resume_kernel # not returning to v8086 or userspace 267 jb resume_kernel # not returning to v8086 or userspace
256 268
257ENTRY(resume_userspace) 269ENTRY(resume_userspace)
258 LOCKDEP_SYS_EXIT
259 DISABLE_INTERRUPTS(CLBR_ANY) 270 DISABLE_INTERRUPTS(CLBR_ANY)
260 TRACE_IRQS_OFF 271 TRACE_IRQS_OFF
261 movl %esp, %eax 272 movl %esp, %eax
@@ -276,76 +287,47 @@ need_resched:
276END(resume_kernel) 287END(resume_kernel)
277#endif 288#endif
278 289
279/*
280 * SYSENTER_RETURN points to after the SYSENTER instruction
281 * in the vsyscall page. See vsyscall-sysentry.S, which defines
282 * the symbol.
283 */
284
285 # SYSENTER call handler stub 290 # SYSENTER call handler stub
286ENTRY(entry_SYSENTER_32) 291ENTRY(entry_SYSENTER_32)
287 movl TSS_sysenter_sp0(%esp), %esp 292 movl TSS_sysenter_sp0(%esp), %esp
288sysenter_past_esp: 293sysenter_past_esp:
294 pushl $__USER_DS /* pt_regs->ss */
295 pushl %ecx /* pt_regs->cx */
296 pushfl /* pt_regs->flags (except IF = 0) */
297 orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
298 pushl $__USER_CS /* pt_regs->cs */
299 pushl $0 /* pt_regs->ip = 0 (placeholder) */
300 pushl %eax /* pt_regs->orig_ax */
301 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
302
289 /* 303 /*
290 * Interrupts are disabled here, but we can't trace it until 304 * User mode is traced as though IRQs are on, and SYSENTER
291 * enough kernel state to call TRACE_IRQS_OFF can be called - but 305 * turned them off.
292 * we immediately enable interrupts at that point anyway.
293 */
294 pushl $__USER_DS
295 pushl %ebp
296 pushfl
297 orl $X86_EFLAGS_IF, (%esp)
298 pushl $__USER_CS
299 /*
300 * Push current_thread_info()->sysenter_return to the stack.
301 * A tiny bit of offset fixup is necessary: TI_sysenter_return
302 * is relative to thread_info, which is at the bottom of the
303 * kernel stack page. 4*4 means the 4 words pushed above;
304 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
305 * and THREAD_SIZE takes us to the bottom.
306 */ 306 */
307 pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) 307 TRACE_IRQS_OFF
308
309 pushl %eax
310 SAVE_ALL
311 ENABLE_INTERRUPTS(CLBR_NONE)
312
313/*
314 * Load the potential sixth argument from user stack.
315 * Careful about security.
316 */
317 cmpl $__PAGE_OFFSET-3, %ebp
318 jae syscall_fault
319 ASM_STAC
3201: movl (%ebp), %ebp
321 ASM_CLAC
322 movl %ebp, PT_EBP(%esp)
323 _ASM_EXTABLE(1b, syscall_fault)
324 308
325 GET_THREAD_INFO(%ebp) 309 movl %esp, %eax
310 call do_fast_syscall_32
311 testl %eax, %eax
312 jz .Lsyscall_32_done
326 313
327 testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) 314/* Opportunistic SYSEXIT */
328 jnz syscall_trace_entry 315 TRACE_IRQS_ON /* User mode traces as IRQs on. */
329sysenter_do_call: 316 movl PT_EIP(%esp), %edx /* pt_regs->ip */
330 cmpl $(NR_syscalls), %eax 317 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
331 jae sysenter_badsys
332 call *sys_call_table(, %eax, 4)
333sysenter_after_call:
334 movl %eax, PT_EAX(%esp)
335 LOCKDEP_SYS_EXIT
336 DISABLE_INTERRUPTS(CLBR_ANY)
337 TRACE_IRQS_OFF
338 movl TI_flags(%ebp), %ecx
339 testl $_TIF_ALLWORK_MASK, %ecx
340 jnz syscall_exit_work_irqs_off
341sysenter_exit:
342/* if something modifies registers it must also disable sysexit */
343 movl PT_EIP(%esp), %edx
344 movl PT_OLDESP(%esp), %ecx
345 xorl %ebp, %ebp
346 TRACE_IRQS_ON
3471: mov PT_FS(%esp), %fs 3181: mov PT_FS(%esp), %fs
348 PTGS_TO_GS 319 PTGS_TO_GS
320 popl %ebx /* pt_regs->bx */
321 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
322 popl %esi /* pt_regs->si */
323 popl %edi /* pt_regs->di */
324 popl %ebp /* pt_regs->bp */
325 popl %eax /* pt_regs->ax */
326
327 /*
328 * Return back to the vDSO, which will pop ecx and edx.
329 * Don't bother with DS and ES (they already contain __USER_DS).
330 */
349 ENABLE_INTERRUPTS_SYSEXIT 331 ENABLE_INTERRUPTS_SYSEXIT
350 332
351.pushsection .fixup, "ax" 333.pushsection .fixup, "ax"
@@ -359,21 +341,18 @@ ENDPROC(entry_SYSENTER_32)
359 # system call handler stub 341 # system call handler stub
360ENTRY(entry_INT80_32) 342ENTRY(entry_INT80_32)
361 ASM_CLAC 343 ASM_CLAC
362 pushl %eax # save orig_eax 344 pushl %eax /* pt_regs->orig_ax */
363 SAVE_ALL 345 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
364 GET_THREAD_INFO(%ebp) 346
365 # system call tracing in operation / emulation 347 /*
366 testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) 348 * User mode is traced as though IRQs are on. Unlike the 64-bit
367 jnz syscall_trace_entry 349 * case, INT80 is a trap gate on 32-bit kernels, so interrupts
368 cmpl $(NR_syscalls), %eax 350 * are already on (unless user code is messing around with iopl).
369 jae syscall_badsys 351 */
370syscall_call: 352
371 call *sys_call_table(, %eax, 4) 353 movl %esp, %eax
372syscall_after_call: 354 call do_syscall_32_irqs_on
373 movl %eax, PT_EAX(%esp) # store the return value 355.Lsyscall_32_done:
374syscall_exit:
375 LOCKDEP_SYS_EXIT
376 jmp syscall_exit_work
377 356
378restore_all: 357restore_all:
379 TRACE_IRQS_IRET 358 TRACE_IRQS_IRET
@@ -450,47 +429,6 @@ ldt_ss:
450#endif 429#endif
451ENDPROC(entry_INT80_32) 430ENDPROC(entry_INT80_32)
452 431
453 # perform syscall exit tracing
454 ALIGN
455syscall_trace_entry:
456 movl $-ENOSYS, PT_EAX(%esp)
457 movl %esp, %eax
458 call syscall_trace_enter
459 /* What it returned is what we'll actually use. */
460 cmpl $(NR_syscalls), %eax
461 jnae syscall_call
462 jmp syscall_exit
463END(syscall_trace_entry)
464
465 # perform syscall exit tracing
466 ALIGN
467syscall_exit_work_irqs_off:
468 TRACE_IRQS_ON
469 ENABLE_INTERRUPTS(CLBR_ANY)
470
471syscall_exit_work:
472 movl %esp, %eax
473 call syscall_return_slowpath
474 jmp restore_all
475END(syscall_exit_work)
476
477syscall_fault:
478 ASM_CLAC
479 GET_THREAD_INFO(%ebp)
480 movl $-EFAULT, PT_EAX(%esp)
481 jmp resume_userspace
482END(syscall_fault)
483
484syscall_badsys:
485 movl $-ENOSYS, %eax
486 jmp syscall_after_call
487END(syscall_badsys)
488
489sysenter_badsys:
490 movl $-ENOSYS, %eax
491 jmp sysenter_after_call
492END(sysenter_badsys)
493
494.macro FIXUP_ESPFIX_STACK 432.macro FIXUP_ESPFIX_STACK
495/* 433/*
496 * Switch back for ESPFIX stack to the normal zerobased stack 434 * Switch back for ESPFIX stack to the normal zerobased stack
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 055a01de7c8d..53616ca03244 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -391,20 +391,16 @@ GLOBAL(stub_execveat)
391 jmp return_from_execve 391 jmp return_from_execve
392END(stub_execveat) 392END(stub_execveat)
393 393
394#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) 394#if defined(CONFIG_X86_X32_ABI)
395 .align 8 395 .align 8
396GLOBAL(stub_x32_execve) 396GLOBAL(stub_x32_execve)
397GLOBAL(stub32_execve)
398 call compat_sys_execve 397 call compat_sys_execve
399 jmp return_from_execve 398 jmp return_from_execve
400END(stub32_execve)
401END(stub_x32_execve) 399END(stub_x32_execve)
402 .align 8 400 .align 8
403GLOBAL(stub_x32_execveat) 401GLOBAL(stub_x32_execveat)
404GLOBAL(stub32_execveat)
405 call compat_sys_execveat 402 call compat_sys_execveat
406 jmp return_from_execve 403 jmp return_from_execve
407END(stub32_execveat)
408END(stub_x32_execveat) 404END(stub_x32_execveat)
409#endif 405#endif
410 406
@@ -557,7 +553,6 @@ ret_from_intr:
557 jz retint_kernel 553 jz retint_kernel
558 554
559 /* Interrupt came from user space */ 555 /* Interrupt came from user space */
560 LOCKDEP_SYS_EXIT_IRQ
561GLOBAL(retint_user) 556GLOBAL(retint_user)
562 mov %rsp,%rdi 557 mov %rsp,%rdi
563 call prepare_exit_to_usermode 558 call prepare_exit_to_usermode
@@ -587,7 +582,7 @@ retint_kernel:
587 * At this label, code paths which return to kernel and to user, 582 * At this label, code paths which return to kernel and to user,
588 * which come from interrupts/exception and from syscalls, merge. 583 * which come from interrupts/exception and from syscalls, merge.
589 */ 584 */
590restore_regs_and_iret: 585GLOBAL(restore_regs_and_iret)
591 RESTORE_EXTRA_REGS 586 RESTORE_EXTRA_REGS
592restore_c_regs_and_iret: 587restore_c_regs_and_iret:
593 RESTORE_C_REGS 588 RESTORE_C_REGS
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index a9360d40fb7f..c3201830a85e 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -16,16 +16,6 @@
16#include <linux/linkage.h> 16#include <linux/linkage.h>
17#include <linux/err.h> 17#include <linux/err.h>
18 18
19/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
20#include <linux/elf-em.h>
21#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
22#define __AUDIT_ARCH_LE 0x40000000
23
24#ifndef CONFIG_AUDITSYSCALL
25# define sysexit_audit ia32_ret_from_sys_call_irqs_off
26# define sysretl_audit ia32_ret_from_sys_call_irqs_off
27#endif
28
29 .section .entry.text, "ax" 19 .section .entry.text, "ax"
30 20
31#ifdef CONFIG_PARAVIRT 21#ifdef CONFIG_PARAVIRT
@@ -58,219 +48,87 @@ ENDPROC(native_usergs_sysret32)
58 * with the int 0x80 path. 48 * with the int 0x80 path.
59 */ 49 */
60ENTRY(entry_SYSENTER_compat) 50ENTRY(entry_SYSENTER_compat)
61 /* 51 /* Interrupts are off on entry. */
62 * Interrupts are off on entry.
63 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
64 * it is too small to ever cause noticeable irq latency.
65 */
66 SWAPGS_UNSAFE_STACK 52 SWAPGS_UNSAFE_STACK
67 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 53 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
68 ENABLE_INTERRUPTS(CLBR_NONE)
69 54
70 /* Zero-extending 32-bit regs, do not remove */ 55 /*
71 movl %ebp, %ebp 56 * User tracing code (ptrace or signal handlers) might assume that
57 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
58 * syscall. Just in case the high bits are nonzero, zero-extend
59 * the syscall number. (This could almost certainly be deleted
60 * with no ill effects.)
61 */
72 movl %eax, %eax 62 movl %eax, %eax
73 63
74 movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
75
76 /* Construct struct pt_regs on stack */ 64 /* Construct struct pt_regs on stack */
77 pushq $__USER32_DS /* pt_regs->ss */ 65 pushq $__USER32_DS /* pt_regs->ss */
78 pushq %rbp /* pt_regs->sp */ 66 pushq %rcx /* pt_regs->sp */
79 pushfq /* pt_regs->flags */ 67
68 /*
69 * Push flags. This is nasty. First, interrupts are currently
70 * off, but we need pt_regs->flags to have IF set. Second, even
71 * if TF was set when SYSENTER started, it's clear by now. We fix
72 * that later using TIF_SINGLESTEP.
73 */
74 pushfq /* pt_regs->flags (except IF = 0) */
75 orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
76 ASM_CLAC /* Clear AC after saving FLAGS */
77
80 pushq $__USER32_CS /* pt_regs->cs */ 78 pushq $__USER32_CS /* pt_regs->cs */
81 pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */ 79 xorq %r8,%r8
80 pushq %r8 /* pt_regs->ip = 0 (placeholder) */
82 pushq %rax /* pt_regs->orig_ax */ 81 pushq %rax /* pt_regs->orig_ax */
83 pushq %rdi /* pt_regs->di */ 82 pushq %rdi /* pt_regs->di */
84 pushq %rsi /* pt_regs->si */ 83 pushq %rsi /* pt_regs->si */
85 pushq %rdx /* pt_regs->dx */ 84 pushq %rdx /* pt_regs->dx */
86 pushq %rcx /* pt_regs->cx */ 85 pushq %rcx /* pt_regs->cx (will be overwritten) */
87 pushq $-ENOSYS /* pt_regs->ax */ 86 pushq $-ENOSYS /* pt_regs->ax */
87 pushq %r8 /* pt_regs->r8 = 0 */
88 pushq %r8 /* pt_regs->r9 = 0 */
89 pushq %r8 /* pt_regs->r10 = 0 */
90 pushq %r8 /* pt_regs->r11 = 0 */
91 pushq %rbx /* pt_regs->rbx */
92 pushq %rbp /* pt_regs->rbp */
93 pushq %r8 /* pt_regs->r12 = 0 */
94 pushq %r8 /* pt_regs->r13 = 0 */
95 pushq %r8 /* pt_regs->r14 = 0 */
96 pushq %r8 /* pt_regs->r15 = 0 */
88 cld 97 cld
89 sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
90
91 /*
92 * no need to do an access_ok check here because rbp has been
93 * 32-bit zero extended
94 */
95 ASM_STAC
961: movl (%rbp), %ebp
97 _ASM_EXTABLE(1b, ia32_badarg)
98 ASM_CLAC
99 98
100 /* 99 /*
101 * Sysenter doesn't filter flags, so we need to clear NT 100 * Sysenter doesn't filter flags, so we need to clear NT
102 * ourselves. To save a few cycles, we can check whether 101 * ourselves. To save a few cycles, we can check whether
103 * NT was set instead of doing an unconditional popfq. 102 * NT was set instead of doing an unconditional popfq.
103 * This needs to happen before enabling interrupts so that
104 * we don't get preempted with NT set.
105 *
106 * NB.: sysenter_fix_flags is a label with the code under it moved
107 * out-of-line as an optimization: NT is unlikely to be set in the
108 * majority of the cases and instead of polluting the I$ unnecessarily,
109 * we're keeping that code behind a branch which will predict as
110 * not-taken and therefore its instructions won't be fetched.
104 */ 111 */
105 testl $X86_EFLAGS_NT, EFLAGS(%rsp) 112 testl $X86_EFLAGS_NT, EFLAGS(%rsp)
106 jnz sysenter_fix_flags 113 jnz sysenter_fix_flags
107sysenter_flags_fixed: 114sysenter_flags_fixed:
108 115
109 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
110 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
111 jnz sysenter_tracesys
112
113sysenter_do_call:
114 /* 32-bit syscall -> 64-bit C ABI argument conversion */
115 movl %edi, %r8d /* arg5 */
116 movl %ebp, %r9d /* arg6 */
117 xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
118 movl %ebx, %edi /* arg1 */
119 movl %edx, %edx /* arg3 (zero extension) */
120sysenter_dispatch:
121 cmpq $(IA32_NR_syscalls-1), %rax
122 ja 1f
123 call *ia32_sys_call_table(, %rax, 8)
124 movq %rax, RAX(%rsp)
1251:
126 DISABLE_INTERRUPTS(CLBR_NONE)
127 TRACE_IRQS_OFF
128 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
129 jnz sysexit_audit
130sysexit_from_sys_call:
131 /* 116 /*
132 * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an 117 * User mode is traced as though IRQs are on, and SYSENTER
133 * NMI between STI and SYSEXIT has poorly specified behavior, 118 * turned them off.
134 * and and NMI followed by an IRQ with usergs is fatal. So
135 * we just pretend we're using SYSEXIT but we really use
136 * SYSRETL instead.
137 *
138 * This code path is still called 'sysexit' because it pairs
139 * with 'sysenter' and it uses the SYSENTER calling convention.
140 */ 119 */
141 andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
142 movl RIP(%rsp), %ecx /* User %eip */
143 movq RAX(%rsp), %rax
144 movl RSI(%rsp), %esi
145 movl RDI(%rsp), %edi
146 xorl %edx, %edx /* Do not leak kernel information */
147 xorq %r8, %r8
148 xorq %r9, %r9
149 xorq %r10, %r10
150 movl EFLAGS(%rsp), %r11d /* User eflags */
151 TRACE_IRQS_ON
152
153 /*
154 * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
155 * since it avoids a dicey window with interrupts enabled.
156 */
157 movl RSP(%rsp), %esp
158
159 /*
160 * USERGS_SYSRET32 does:
161 * gsbase = user's gs base
162 * eip = ecx
163 * rflags = r11
164 * cs = __USER32_CS
165 * ss = __USER_DS
166 *
167 * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
168 *
169 * pop %ebp
170 * pop %edx
171 * pop %ecx
172 *
173 * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
174 * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
175 * address (already known to user code), and R12-R15 are
176 * callee-saved and therefore don't contain any interesting
177 * kernel data.
178 */
179 USERGS_SYSRET32
180
181#ifdef CONFIG_AUDITSYSCALL
182 .macro auditsys_entry_common
183 /*
184 * At this point, registers hold syscall args in the 32-bit syscall ABI:
185 * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP.
186 *
187 * We want to pass them to __audit_syscall_entry(), which is a 64-bit
188 * C function with 5 parameters, so shuffle them to match what
189 * the function expects: RDI,RSI,RDX,RCX,R8.
190 */
191 movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */
192 xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */
193 /* arg3 (RDX) <= 2nd syscall arg (ECX) */
194 movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */
195 movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */
196 call __audit_syscall_entry
197
198 /*
199 * We are going to jump back to the syscall dispatch code.
200 * Prepare syscall args as required by the 64-bit C ABI.
201 * Registers clobbered by __audit_syscall_entry() are
202 * loaded from pt_regs on stack:
203 */
204 movl ORIG_RAX(%rsp), %eax /* syscall number */
205 movl %ebx, %edi /* arg1 */
206 movl RCX(%rsp), %esi /* arg2 */
207 movl RDX(%rsp), %edx /* arg3 */
208 movl RSI(%rsp), %ecx /* arg4 */
209 movl RDI(%rsp), %r8d /* arg5 */
210 .endm
211
212 .macro auditsys_exit exit
213 TRACE_IRQS_ON
214 ENABLE_INTERRUPTS(CLBR_NONE)
215 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
216 jnz ia32_ret_from_sys_call
217 movl %eax, %esi /* second arg, syscall return value */
218 cmpl $-MAX_ERRNO, %eax /* is it an error ? */
219 jbe 1f
220 movslq %eax, %rsi /* if error sign extend to 64 bits */
2211: setbe %al /* 1 if error, 0 if not */
222 movzbl %al, %edi /* zero-extend that into %edi */
223 call __audit_syscall_exit
224 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
225 DISABLE_INTERRUPTS(CLBR_NONE)
226 TRACE_IRQS_OFF 120 TRACE_IRQS_OFF
227 testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
228 jz \exit
229 xorl %eax, %eax /* Do not leak kernel information */
230 movq %rax, R11(%rsp)
231 movq %rax, R10(%rsp)
232 movq %rax, R9(%rsp)
233 movq %rax, R8(%rsp)
234 jmp int_ret_from_sys_call_irqs_off
235 .endm
236 121
237sysenter_auditsys: 122 movq %rsp, %rdi
238 auditsys_entry_common 123 call do_fast_syscall_32
239 movl %ebp, %r9d /* reload 6th syscall arg */ 124 testl %eax, %eax
240 jmp sysenter_dispatch 125 jz .Lsyscall_32_done
241 126 jmp sysret32_from_system_call
242sysexit_audit:
243 auditsys_exit sysexit_from_sys_call
244#endif
245 127
246sysenter_fix_flags: 128sysenter_fix_flags:
247 pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) 129 pushq $X86_EFLAGS_FIXED
248 popfq 130 popfq
249 jmp sysenter_flags_fixed 131 jmp sysenter_flags_fixed
250
251sysenter_tracesys:
252#ifdef CONFIG_AUDITSYSCALL
253 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
254 jz sysenter_auditsys
255#endif
256 SAVE_EXTRA_REGS
257 xorl %eax, %eax /* Do not leak kernel information */
258 movq %rax, R11(%rsp)
259 movq %rax, R10(%rsp)
260 movq %rax, R9(%rsp)
261 movq %rax, R8(%rsp)
262 movq %rsp, %rdi /* &pt_regs -> arg1 */
263 call syscall_trace_enter
264
265 /* Reload arg registers from stack. (see sysenter_tracesys) */
266 movl RCX(%rsp), %ecx
267 movl RDX(%rsp), %edx
268 movl RSI(%rsp), %esi
269 movl RDI(%rsp), %edi
270 movl %eax, %eax /* zero extension */
271
272 RESTORE_EXTRA_REGS
273 jmp sysenter_do_call
274ENDPROC(entry_SYSENTER_compat) 132ENDPROC(entry_SYSENTER_compat)
275 133
276/* 134/*
@@ -298,21 +156,14 @@ ENDPROC(entry_SYSENTER_compat)
298 * edi arg5 156 * edi arg5
299 * esp user stack 157 * esp user stack
300 * 0(%esp) arg6 158 * 0(%esp) arg6
301 *
302 * This is purely a fast path. For anything complicated we use the int 0x80
303 * path below. We set up a complete hardware stack frame to share code
304 * with the int 0x80 path.
305 */ 159 */
306ENTRY(entry_SYSCALL_compat) 160ENTRY(entry_SYSCALL_compat)
307 /* 161 /* Interrupts are off on entry. */
308 * Interrupts are off on entry.
309 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
310 * it is too small to ever cause noticeable irq latency.
311 */
312 SWAPGS_UNSAFE_STACK 162 SWAPGS_UNSAFE_STACK
163
164 /* Stash user ESP and switch to the kernel stack. */
313 movl %esp, %r8d 165 movl %esp, %r8d
314 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 166 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
315 ENABLE_INTERRUPTS(CLBR_NONE)
316 167
317 /* Zero-extending 32-bit regs, do not remove */ 168 /* Zero-extending 32-bit regs, do not remove */
318 movl %eax, %eax 169 movl %eax, %eax
@@ -327,162 +178,67 @@ ENTRY(entry_SYSCALL_compat)
327 pushq %rdi /* pt_regs->di */ 178 pushq %rdi /* pt_regs->di */
328 pushq %rsi /* pt_regs->si */ 179 pushq %rsi /* pt_regs->si */
329 pushq %rdx /* pt_regs->dx */ 180 pushq %rdx /* pt_regs->dx */
330 pushq %rbp /* pt_regs->cx */ 181 pushq %rcx /* pt_regs->cx (will be overwritten) */
331 movl %ebp, %ecx
332 pushq $-ENOSYS /* pt_regs->ax */ 182 pushq $-ENOSYS /* pt_regs->ax */
333 sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */ 183 xorq %r8,%r8
184 pushq %r8 /* pt_regs->r8 = 0 */
185 pushq %r8 /* pt_regs->r9 = 0 */
186 pushq %r8 /* pt_regs->r10 = 0 */
187 pushq %r8 /* pt_regs->r11 = 0 */
188 pushq %rbx /* pt_regs->rbx */
189 pushq %rbp /* pt_regs->rbp */
190 pushq %r8 /* pt_regs->r12 = 0 */
191 pushq %r8 /* pt_regs->r13 = 0 */
192 pushq %r8 /* pt_regs->r14 = 0 */
193 pushq %r8 /* pt_regs->r15 = 0 */
334 194
335 /* 195 /*
336 * No need to do an access_ok check here because r8 has been 196 * User mode is traced as though IRQs are on, and SYSENTER
337 * 32-bit zero extended: 197 * turned them off.
338 */ 198 */
339 ASM_STAC
3401: movl (%r8), %r9d
341 _ASM_EXTABLE(1b, ia32_badarg)
342 ASM_CLAC
343 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
344 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
345 jnz cstar_tracesys
346
347cstar_do_call:
348 /* 32-bit syscall -> 64-bit C ABI argument conversion */
349 movl %edi, %r8d /* arg5 */
350 /* r9 already loaded */ /* arg6 */
351 xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
352 movl %ebx, %edi /* arg1 */
353 movl %edx, %edx /* arg3 (zero extension) */
354
355cstar_dispatch:
356 cmpq $(IA32_NR_syscalls-1), %rax
357 ja 1f
358
359 call *ia32_sys_call_table(, %rax, 8)
360 movq %rax, RAX(%rsp)
3611:
362 DISABLE_INTERRUPTS(CLBR_NONE)
363 TRACE_IRQS_OFF 199 TRACE_IRQS_OFF
364 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
365 jnz sysretl_audit
366 200
367sysretl_from_sys_call: 201 movq %rsp, %rdi
368 andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) 202 call do_fast_syscall_32
369 movl RDX(%rsp), %edx 203 testl %eax, %eax
370 movl RSI(%rsp), %esi 204 jz .Lsyscall_32_done
371 movl RDI(%rsp), %edi 205
372 movl RIP(%rsp), %ecx 206 /* Opportunistic SYSRET */
373 movl EFLAGS(%rsp), %r11d 207sysret32_from_system_call:
374 movq RAX(%rsp), %rax 208 TRACE_IRQS_ON /* User mode traces as IRQs on. */
375 xorq %r10, %r10 209 movq RBX(%rsp), %rbx /* pt_regs->rbx */
376 xorq %r9, %r9 210 movq RBP(%rsp), %rbp /* pt_regs->rbp */
377 xorq %r8, %r8 211 movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
378 TRACE_IRQS_ON 212 movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */
379 movl RSP(%rsp), %esp 213 addq $RAX, %rsp /* Skip r8-r15 */
380 /* 214 popq %rax /* pt_regs->rax */
381 * 64-bit->32-bit SYSRET restores eip from ecx, 215 popq %rdx /* Skip pt_regs->cx */
382 * eflags from r11 (but RF and VM bits are forced to 0), 216 popq %rdx /* pt_regs->dx */
383 * cs and ss are loaded from MSRs. 217 popq %rsi /* pt_regs->si */
384 * (Note: 32-bit->32-bit SYSRET is different: since r11 218 popq %rdi /* pt_regs->di */
385 * does not exist, it merely sets eflags.IF=1). 219
220 /*
221 * USERGS_SYSRET32 does:
222 * GSBASE = user's GS base
223 * EIP = ECX
224 * RFLAGS = R11
225 * CS = __USER32_CS
226 * SS = __USER_DS
227 *
228 * ECX will not match pt_regs->cx, but we're returning to a vDSO
229 * trampoline that will fix up RCX, so this is okay.
386 * 230 *
387 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss 231 * R12-R15 are callee-saved, so they contain whatever was in them
388 * descriptor is not reinitialized. This means that we must 232 * when the system call started, which is already known to user
389 * avoid SYSRET with SS == NULL, which could happen if we schedule, 233 * code. We zero R8-R10 to avoid info leaks.
390 * exit the kernel, and re-enter using an interrupt vector. (All 234 */
391 * interrupt entries on x86_64 set SS to NULL.) We prevent that 235 xorq %r8, %r8
392 * from happening by reloading SS in __switch_to. 236 xorq %r9, %r9
393 */ 237 xorq %r10, %r10
394 USERGS_SYSRET32 238 movq RSP-ORIG_RAX(%rsp), %rsp
395 239 USERGS_SYSRET32
396#ifdef CONFIG_AUDITSYSCALL
397cstar_auditsys:
398 movl %r9d, R9(%rsp) /* register to be clobbered by call */
399 auditsys_entry_common
400 movl R9(%rsp), %r9d /* reload 6th syscall arg */
401 jmp cstar_dispatch
402
403sysretl_audit:
404 auditsys_exit sysretl_from_sys_call
405#endif
406
407cstar_tracesys:
408#ifdef CONFIG_AUDITSYSCALL
409 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
410 jz cstar_auditsys
411#endif
412 xchgl %r9d, %ebp
413 SAVE_EXTRA_REGS
414 xorl %eax, %eax /* Do not leak kernel information */
415 movq %rax, R11(%rsp)
416 movq %rax, R10(%rsp)
417 movq %r9, R9(%rsp)
418 movq %rax, R8(%rsp)
419 movq %rsp, %rdi /* &pt_regs -> arg1 */
420 call syscall_trace_enter
421 movl R9(%rsp), %r9d
422
423 /* Reload arg registers from stack. (see sysenter_tracesys) */
424 movl RCX(%rsp), %ecx
425 movl RDX(%rsp), %edx
426 movl RSI(%rsp), %esi
427 movl RDI(%rsp), %edi
428 movl %eax, %eax /* zero extension */
429
430 RESTORE_EXTRA_REGS
431 xchgl %ebp, %r9d
432 jmp cstar_do_call
433END(entry_SYSCALL_compat) 240END(entry_SYSCALL_compat)
434 241
435ia32_badarg:
436 /*
437 * So far, we've entered kernel mode, set AC, turned on IRQs, and
438 * saved C regs except r8-r11. We haven't done any of the other
439 * standard entry work, though. We want to bail, but we shouldn't
440 * treat this as a syscall entry since we don't even know what the
441 * args are. Instead, treat this as a non-syscall entry, finish
442 * the entry work, and immediately exit after setting AX = -EFAULT.
443 *
444 * We're really just being polite here. Killing the task outright
445 * would be a reasonable action, too. Given that the only valid
446 * way to have gotten here is through the vDSO, and we already know
447 * that the stack pointer is bad, the task isn't going to survive
448 * for long no matter what we do.
449 */
450
451 ASM_CLAC /* undo STAC */
452 movq $-EFAULT, RAX(%rsp) /* return -EFAULT if possible */
453
454 /* Fill in the rest of pt_regs */
455 xorl %eax, %eax
456 movq %rax, R11(%rsp)
457 movq %rax, R10(%rsp)
458 movq %rax, R9(%rsp)
459 movq %rax, R8(%rsp)
460 SAVE_EXTRA_REGS
461
462 /* Turn IRQs back off. */
463 DISABLE_INTERRUPTS(CLBR_NONE)
464 TRACE_IRQS_OFF
465
466 /* Now finish entering normal kernel mode. */
467#ifdef CONFIG_CONTEXT_TRACKING
468 call enter_from_user_mode
469#endif
470
471 /* And exit again. */
472 jmp retint_user
473
474ia32_ret_from_sys_call_irqs_off:
475 TRACE_IRQS_ON
476 ENABLE_INTERRUPTS(CLBR_NONE)
477
478ia32_ret_from_sys_call:
479 xorl %eax, %eax /* Do not leak kernel information */
480 movq %rax, R11(%rsp)
481 movq %rax, R10(%rsp)
482 movq %rax, R9(%rsp)
483 movq %rax, R8(%rsp)
484 jmp int_ret_from_sys_call
485
486/* 242/*
487 * Emulated IA32 system calls via int 0x80. 243 * Emulated IA32 system calls via int 0x80.
488 * 244 *
@@ -507,14 +263,17 @@ ia32_ret_from_sys_call:
507ENTRY(entry_INT80_compat) 263ENTRY(entry_INT80_compat)
508 /* 264 /*
509 * Interrupts are off on entry. 265 * Interrupts are off on entry.
510 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
511 * it is too small to ever cause noticeable irq latency.
512 */ 266 */
513 PARAVIRT_ADJUST_EXCEPTION_FRAME 267 PARAVIRT_ADJUST_EXCEPTION_FRAME
514 SWAPGS 268 SWAPGS
515 ENABLE_INTERRUPTS(CLBR_NONE)
516 269
517 /* Zero-extending 32-bit regs, do not remove */ 270 /*
271 * User tracing code (ptrace or signal handlers) might assume that
272 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
273 * syscall. Just in case the high bits are nonzero, zero-extend
274 * the syscall number. (This could almost certainly be deleted
275 * with no ill effects.)
276 */
518 movl %eax, %eax 277 movl %eax, %eax
519 278
520 /* Construct struct pt_regs on stack (iret frame is already on stack) */ 279 /* Construct struct pt_regs on stack (iret frame is already on stack) */
@@ -524,67 +283,37 @@ ENTRY(entry_INT80_compat)
524 pushq %rdx /* pt_regs->dx */ 283 pushq %rdx /* pt_regs->dx */
525 pushq %rcx /* pt_regs->cx */ 284 pushq %rcx /* pt_regs->cx */
526 pushq $-ENOSYS /* pt_regs->ax */ 285 pushq $-ENOSYS /* pt_regs->ax */
527 pushq $0 /* pt_regs->r8 */ 286 xorq %r8,%r8
528 pushq $0 /* pt_regs->r9 */ 287 pushq %r8 /* pt_regs->r8 = 0 */
529 pushq $0 /* pt_regs->r10 */ 288 pushq %r8 /* pt_regs->r9 = 0 */
530 pushq $0 /* pt_regs->r11 */ 289 pushq %r8 /* pt_regs->r10 = 0 */
290 pushq %r8 /* pt_regs->r11 = 0 */
291 pushq %rbx /* pt_regs->rbx */
292 pushq %rbp /* pt_regs->rbp */
293 pushq %r12 /* pt_regs->r12 */
294 pushq %r13 /* pt_regs->r13 */
295 pushq %r14 /* pt_regs->r14 */
296 pushq %r15 /* pt_regs->r15 */
531 cld 297 cld
532 sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
533
534 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
535 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
536 jnz ia32_tracesys
537
538ia32_do_call:
539 /* 32-bit syscall -> 64-bit C ABI argument conversion */
540 movl %edi, %r8d /* arg5 */
541 movl %ebp, %r9d /* arg6 */
542 xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
543 movl %ebx, %edi /* arg1 */
544 movl %edx, %edx /* arg3 (zero extension) */
545 cmpq $(IA32_NR_syscalls-1), %rax
546 ja 1f
547 298
548 call *ia32_sys_call_table(, %rax, 8)
549 movq %rax, RAX(%rsp)
5501:
551 jmp int_ret_from_sys_call
552
553ia32_tracesys:
554 SAVE_EXTRA_REGS
555 movq %rsp, %rdi /* &pt_regs -> arg1 */
556 call syscall_trace_enter
557 /* 299 /*
558 * Reload arg registers from stack in case ptrace changed them. 300 * User mode is traced as though IRQs are on, and the interrupt
559 * Don't reload %eax because syscall_trace_enter() returned 301 * gate turned them off.
560 * the %rax value we should see. But do truncate it to 32 bits.
561 * If it's -1 to make us punt the syscall, then (u32)-1 is still
562 * an appropriately invalid value.
563 */ 302 */
564 movl RCX(%rsp), %ecx 303 TRACE_IRQS_OFF
565 movl RDX(%rsp), %edx
566 movl RSI(%rsp), %esi
567 movl RDI(%rsp), %edi
568 movl %eax, %eax /* zero extension */
569 RESTORE_EXTRA_REGS
570 jmp ia32_do_call
571END(entry_INT80_compat)
572 304
573 .macro PTREGSCALL label, func 305 movq %rsp, %rdi
574 ALIGN 306 call do_syscall_32_irqs_off
575GLOBAL(\label) 307.Lsyscall_32_done:
576 leaq \func(%rip), %rax
577 jmp ia32_ptregs_common
578 .endm
579 308
580 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn 309 /* Go back to user mode. */
581 PTREGSCALL stub32_sigreturn, sys32_sigreturn 310 TRACE_IRQS_ON
582 PTREGSCALL stub32_fork, sys_fork 311 SWAPGS
583 PTREGSCALL stub32_vfork, sys_vfork 312 jmp restore_regs_and_iret
313END(entry_INT80_compat)
584 314
585 ALIGN 315 ALIGN
586GLOBAL(stub32_clone) 316GLOBAL(stub32_clone)
587 leaq sys_clone(%rip), %rax
588 /* 317 /*
589 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). 318 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
590 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). 319 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
@@ -593,12 +322,4 @@ GLOBAL(stub32_clone)
593 * so we need to swap arguments here before calling it: 322 * so we need to swap arguments here before calling it:
594 */ 323 */
595 xchg %r8, %rcx 324 xchg %r8, %rcx
596 jmp ia32_ptregs_common 325 jmp sys_clone
597
598 ALIGN
599ia32_ptregs_common:
600 SAVE_EXTRA_REGS 8
601 call *%rax
602 RESTORE_EXTRA_REGS 8
603 ret
604END(ia32_ptregs_common)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8ea34f94e973..9a6649857106 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -4,24 +4,21 @@
4#include <linux/sys.h> 4#include <linux/sys.h>
5#include <linux/cache.h> 5#include <linux/cache.h>
6#include <asm/asm-offsets.h> 6#include <asm/asm-offsets.h>
7#include <asm/syscall.h>
7 8
8#ifdef CONFIG_IA32_EMULATION 9#ifdef CONFIG_IA32_EMULATION
9#define SYM(sym, compat) compat 10#define SYM(sym, compat) compat
10#else 11#else
11#define SYM(sym, compat) sym 12#define SYM(sym, compat) sym
12#define ia32_sys_call_table sys_call_table
13#define __NR_syscall_compat_max __NR_syscall_max
14#endif 13#endif
15 14
16#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; 15#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
17#include <asm/syscalls_32.h> 16#include <asm/syscalls_32.h>
18#undef __SYSCALL_I386 17#undef __SYSCALL_I386
19 18
20#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat), 19#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
21 20
22typedef asmlinkage void (*sys_call_ptr_t)(void); 21extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
23
24extern asmlinkage void sys_ni_syscall(void);
25 22
26__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = { 23__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
27 /* 24 /*
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 4ac730b37f0b..41283d22be7a 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -14,13 +14,13 @@
14# define __SYSCALL_X32(nr, sym, compat) /* nothing */ 14# define __SYSCALL_X32(nr, sym, compat) /* nothing */
15#endif 15#endif
16 16
17#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; 17#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
18#include <asm/syscalls_64.h> 18#include <asm/syscalls_64.h>
19#undef __SYSCALL_64 19#undef __SYSCALL_64
20 20
21#define __SYSCALL_64(nr, sym, compat) [nr] = sym, 21#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
22 22
23extern void sys_ni_syscall(void); 23extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
24 24
25asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { 25asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
26 /* 26 /*
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7663c455b9f6..caa2c712d1e7 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -8,7 +8,7 @@
8# 8#
90 i386 restart_syscall sys_restart_syscall 90 i386 restart_syscall sys_restart_syscall
101 i386 exit sys_exit 101 i386 exit sys_exit
112 i386 fork sys_fork stub32_fork 112 i386 fork sys_fork sys_fork
123 i386 read sys_read 123 i386 read sys_read
134 i386 write sys_write 134 i386 write sys_write
145 i386 open sys_open compat_sys_open 145 i386 open sys_open compat_sys_open
@@ -17,7 +17,7 @@
178 i386 creat sys_creat 178 i386 creat sys_creat
189 i386 link sys_link 189 i386 link sys_link
1910 i386 unlink sys_unlink 1910 i386 unlink sys_unlink
2011 i386 execve sys_execve stub32_execve 2011 i386 execve sys_execve compat_sys_execve
2112 i386 chdir sys_chdir 2112 i386 chdir sys_chdir
2213 i386 time sys_time compat_sys_time 2213 i386 time sys_time compat_sys_time
2314 i386 mknod sys_mknod 2314 i386 mknod sys_mknod
@@ -125,7 +125,7 @@
125116 i386 sysinfo sys_sysinfo compat_sys_sysinfo 125116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
126117 i386 ipc sys_ipc compat_sys_ipc 126117 i386 ipc sys_ipc compat_sys_ipc
127118 i386 fsync sys_fsync 127118 i386 fsync sys_fsync
128119 i386 sigreturn sys_sigreturn stub32_sigreturn 128119 i386 sigreturn sys_sigreturn sys32_sigreturn
129120 i386 clone sys_clone stub32_clone 129120 i386 clone sys_clone stub32_clone
130121 i386 setdomainname sys_setdomainname 130121 i386 setdomainname sys_setdomainname
131122 i386 uname sys_newuname 131122 i386 uname sys_newuname
@@ -179,7 +179,7 @@
179170 i386 setresgid sys_setresgid16 179170 i386 setresgid sys_setresgid16
180171 i386 getresgid sys_getresgid16 180171 i386 getresgid sys_getresgid16
181172 i386 prctl sys_prctl 181172 i386 prctl sys_prctl
182173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn 182173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
183174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 183174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
184175 i386 rt_sigprocmask sys_rt_sigprocmask 184175 i386 rt_sigprocmask sys_rt_sigprocmask
185176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending 185176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
@@ -196,7 +196,7 @@
196187 i386 sendfile sys_sendfile compat_sys_sendfile 196187 i386 sendfile sys_sendfile compat_sys_sendfile
197188 i386 getpmsg 197188 i386 getpmsg
198189 i386 putpmsg 198189 i386 putpmsg
199190 i386 vfork sys_vfork stub32_vfork 199190 i386 vfork sys_vfork sys_vfork
200191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 200191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
201192 i386 mmap2 sys_mmap_pgoff 201192 i386 mmap2 sys_mmap_pgoff
202193 i386 truncate64 sys_truncate64 sys32_truncate64 202193 i386 truncate64 sys_truncate64 sys32_truncate64
@@ -364,7 +364,7 @@
364355 i386 getrandom sys_getrandom 364355 i386 getrandom sys_getrandom
365356 i386 memfd_create sys_memfd_create 365356 i386 memfd_create sys_memfd_create
366357 i386 bpf sys_bpf 366357 i386 bpf sys_bpf
367358 i386 execveat sys_execveat stub32_execveat 367358 i386 execveat sys_execveat compat_sys_execveat
368359 i386 socket sys_socket 368359 i386 socket sys_socket
369360 i386 socketpair sys_socketpair 369360 i386 socketpair sys_socketpair
370361 i386 bind sys_bind 370361 i386 bind sys_bind
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index a3d0767a6b29..265c0ed68118 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -19,9 +19,7 @@ obj-y += vma.o
19# vDSO images to build 19# vDSO images to build
20vdso_img-$(VDSO64-y) += 64 20vdso_img-$(VDSO64-y) += 64
21vdso_img-$(VDSOX32-y) += x32 21vdso_img-$(VDSOX32-y) += x32
22vdso_img-$(VDSO32-y) += 32-int80 22vdso_img-$(VDSO32-y) += 32
23vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall
24vdso_img-$(VDSO32-y) += 32-sysenter
25 23
26obj-$(VDSO32-y) += vdso32-setup.o 24obj-$(VDSO32-y) += vdso32-setup.o
27 25
@@ -69,7 +67,7 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
69CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ 67CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
70 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ 68 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
71 -fno-omit-frame-pointer -foptimize-sibling-calls \ 69 -fno-omit-frame-pointer -foptimize-sibling-calls \
72 -DDISABLE_BRANCH_PROFILING 70 -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
73 71
74$(vobjs): KBUILD_CFLAGS += $(CFL) 72$(vobjs): KBUILD_CFLAGS += $(CFL)
75 73
@@ -122,15 +120,6 @@ $(obj)/%.so: $(obj)/%.so.dbg
122$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE 120$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
123 $(call if_changed,vdso) 121 $(call if_changed,vdso)
124 122
125#
126# Build multiple 32-bit vDSO images to choose from at boot time.
127#
128vdso32.so-$(VDSO32-y) += int80
129vdso32.so-$(CONFIG_IA32_EMULATION) += syscall
130vdso32.so-$(VDSO32-y) += sysenter
131
132vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
133
134CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 123CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
135VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 124VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
136 125
@@ -139,14 +128,12 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
139override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ 128override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
140 129
141targets += vdso32/vdso32.lds 130targets += vdso32/vdso32.lds
142targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) 131targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
143targets += vdso32/vclock_gettime.o 132targets += vdso32/vclock_gettime.o
144 133
145$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) 134KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
146 135$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
147KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) 136$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32
148$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
149$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
150 137
151KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) 138KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
152KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) 139KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
@@ -157,13 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
157KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) 144KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
158KBUILD_CFLAGS_32 += -fno-omit-frame-pointer 145KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
159KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING 146KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
160$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) 147$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
161 148
162$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ 149$(obj)/vdso32.so.dbg: FORCE \
163 $(obj)/vdso32/vdso32.lds \ 150 $(obj)/vdso32/vdso32.lds \
164 $(obj)/vdso32/vclock_gettime.o \ 151 $(obj)/vdso32/vclock_gettime.o \
165 $(obj)/vdso32/note.o \ 152 $(obj)/vdso32/note.o \
166 $(obj)/vdso32/%.o 153 $(obj)/vdso32/system_call.o
167 $(call if_changed,vdso) 154 $(call if_changed,vdso)
168 155
169# 156#
@@ -206,4 +193,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
206PHONY += vdso_install $(vdso_img_insttargets) 193PHONY += vdso_install $(vdso_img_insttargets)
207vdso_install: $(vdso_img_insttargets) FORCE 194vdso_install: $(vdso_img_insttargets) FORCE
208 195
209clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so* 196clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8627db24a7f6..785d9922b106 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -98,10 +98,10 @@ struct vdso_sym required_syms[] = {
98 "VDSO_FAKE_SECTION_TABLE_END", false 98 "VDSO_FAKE_SECTION_TABLE_END", false
99 }, 99 },
100 {"VDSO32_NOTE_MASK", true}, 100 {"VDSO32_NOTE_MASK", true},
101 {"VDSO32_SYSENTER_RETURN", true},
102 {"__kernel_vsyscall", true}, 101 {"__kernel_vsyscall", true},
103 {"__kernel_sigreturn", true}, 102 {"__kernel_sigreturn", true},
104 {"__kernel_rt_sigreturn", true}, 103 {"__kernel_rt_sigreturn", true},
104 {"int80_landing_pad", true},
105}; 105};
106 106
107__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) 107__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index e904c270573b..08a317a9ae4b 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -48,35 +48,9 @@ __setup("vdso32=", vdso32_setup);
48__setup_param("vdso=", vdso_setup, vdso32_setup, 0); 48__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
49#endif 49#endif
50 50
51#ifdef CONFIG_X86_64
52
53#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
54#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
55
56#else /* CONFIG_X86_32 */
57
58#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
59#define vdso32_syscall() (0)
60
61#endif /* CONFIG_X86_64 */
62
63#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
64const struct vdso_image *selected_vdso32;
65#endif
66
67int __init sysenter_setup(void) 51int __init sysenter_setup(void)
68{ 52{
69#ifdef CONFIG_COMPAT 53 init_vdso_image(&vdso_image_32);
70 if (vdso32_syscall())
71 selected_vdso32 = &vdso_image_32_syscall;
72 else
73#endif
74 if (vdso32_sysenter())
75 selected_vdso32 = &vdso_image_32_sysenter;
76 else
77 selected_vdso32 = &vdso_image_32_int80;
78
79 init_vdso_image(selected_vdso32);
80 54
81 return 0; 55 return 0;
82} 56}
diff --git a/arch/x86/entry/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/int80.S
deleted file mode 100644
index b15b7c01aedb..000000000000
--- a/arch/x86/entry/vdso/vdso32/int80.S
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * Code for the vDSO. This version uses the old int $0x80 method.
3 *
4 * First get the common code for the sigreturn entry points.
5 * This must come first.
6 */
7#include "sigreturn.S"
8
9 .text
10 .globl __kernel_vsyscall
11 .type __kernel_vsyscall,@function
12 ALIGN
13__kernel_vsyscall:
14.LSTART_vsyscall:
15 int $0x80
16 ret
17.LEND_vsyscall:
18 .size __kernel_vsyscall,.-.LSTART_vsyscall
19 .previous
20
21 .section .eh_frame,"a",@progbits
22.LSTARTFRAMEDLSI:
23 .long .LENDCIEDLSI-.LSTARTCIEDLSI
24.LSTARTCIEDLSI:
25 .long 0 /* CIE ID */
26 .byte 1 /* Version number */
27 .string "zR" /* NUL-terminated augmentation string */
28 .uleb128 1 /* Code alignment factor */
29 .sleb128 -4 /* Data alignment factor */
30 .byte 8 /* Return address register column */
31 .uleb128 1 /* Augmentation value length */
32 .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
33 .byte 0x0c /* DW_CFA_def_cfa */
34 .uleb128 4
35 .uleb128 4
36 .byte 0x88 /* DW_CFA_offset, column 0x8 */
37 .uleb128 1
38 .align 4
39.LENDCIEDLSI:
40 .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
41.LSTARTFDEDLSI:
42 .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
43 .long .LSTART_vsyscall-. /* PC-relative start address */
44 .long .LEND_vsyscall-.LSTART_vsyscall
45 .uleb128 0
46 .align 4
47.LENDFDEDLSI:
48 .previous
49
50 /*
51 * Pad out the segment to match the size of the sysenter.S version.
52 */
53VDSO32_vsyscall_eh_frame_size = 0x40
54 .section .data,"aw",@progbits
55 .space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
56 .previous
diff --git a/arch/x86/entry/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S
deleted file mode 100644
index 6b286bb5251c..000000000000
--- a/arch/x86/entry/vdso/vdso32/syscall.S
+++ /dev/null
@@ -1,75 +0,0 @@
1/*
2 * Code for the vDSO. This version uses the syscall instruction.
3 *
4 * First get the common code for the sigreturn entry points.
5 * This must come first.
6 */
7#define SYSCALL_ENTER_KERNEL syscall
8#include "sigreturn.S"
9
10#include <asm/segment.h>
11
12 .text
13 .globl __kernel_vsyscall
14 .type __kernel_vsyscall,@function
15 ALIGN
16__kernel_vsyscall:
17.LSTART_vsyscall:
18 push %ebp
19.Lpush_ebp:
20 movl %ecx, %ebp
21 syscall
22 movl %ebp, %ecx
23 popl %ebp
24.Lpop_ebp:
25 ret
26.LEND_vsyscall:
27 .size __kernel_vsyscall,.-.LSTART_vsyscall
28
29 .section .eh_frame,"a",@progbits
30.LSTARTFRAME:
31 .long .LENDCIE-.LSTARTCIE
32.LSTARTCIE:
33 .long 0 /* CIE ID */
34 .byte 1 /* Version number */
35 .string "zR" /* NUL-terminated augmentation string */
36 .uleb128 1 /* Code alignment factor */
37 .sleb128 -4 /* Data alignment factor */
38 .byte 8 /* Return address register column */
39 .uleb128 1 /* Augmentation value length */
40 .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
41 .byte 0x0c /* DW_CFA_def_cfa */
42 .uleb128 4
43 .uleb128 4
44 .byte 0x88 /* DW_CFA_offset, column 0x8 */
45 .uleb128 1
46 .align 4
47.LENDCIE:
48
49 .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
50.LSTARTFDE1:
51 .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
52 .long .LSTART_vsyscall-. /* PC-relative start address */
53 .long .LEND_vsyscall-.LSTART_vsyscall
54 .uleb128 0 /* Augmentation length */
55 /* What follows are the instructions for the table generation.
56 We have to record all changes of the stack pointer. */
57 .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
58 .byte 0x0e /* DW_CFA_def_cfa_offset */
59 .uleb128 8
60 .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
61 .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
62 .byte 0xc5 /* DW_CFA_restore %ebp */
63 .byte 0x0e /* DW_CFA_def_cfa_offset */
64 .uleb128 4
65 .align 4
66.LENDFDE1:
67 .previous
68
69 /*
70 * Pad out the segment to match the size of the sysenter.S version.
71 */
72VDSO32_vsyscall_eh_frame_size = 0x40
73 .section .data,"aw",@progbits
74 .space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
75 .previous
diff --git a/arch/x86/entry/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S
deleted file mode 100644
index e354bceee0e0..000000000000
--- a/arch/x86/entry/vdso/vdso32/sysenter.S
+++ /dev/null
@@ -1,116 +0,0 @@
1/*
2 * Code for the vDSO. This version uses the sysenter instruction.
3 *
4 * First get the common code for the sigreturn entry points.
5 * This must come first.
6 */
7#include "sigreturn.S"
8
9/*
10 * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
11 * %ecx itself for arg2. The pushing is because the sysexit instruction
12 * (found in entry.S) requires that we clobber %ecx with the desired %esp.
13 * User code might expect that %ecx is unclobbered though, as it would be
14 * for returning via the iret instruction, so we must push and pop.
15 *
16 * The caller puts arg3 in %edx, which the sysexit instruction requires
17 * for %eip. Thus, exactly as for arg2, we must push and pop.
18 *
19 * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
20 * instruction clobbers %esp, the user's %esp won't even survive entry
21 * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
22 * arg6 from the stack.
23 *
24 * You can not use this vsyscall for the clone() syscall because the
25 * three words on the parent stack do not get copied to the child.
26 */
27 .text
28 .globl __kernel_vsyscall
29 .type __kernel_vsyscall,@function
30 ALIGN
31__kernel_vsyscall:
32.LSTART_vsyscall:
33 push %ecx
34.Lpush_ecx:
35 push %edx
36.Lpush_edx:
37 push %ebp
38.Lenter_kernel:
39 movl %esp,%ebp
40 sysenter
41
42 /* 7: align return point with nop's to make disassembly easier */
43 .space 7,0x90
44
45 /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
46 int $0x80
47 /* 16: System call normal return point is here! */
48VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
49 pop %ebp
50.Lpop_ebp:
51 pop %edx
52.Lpop_edx:
53 pop %ecx
54.Lpop_ecx:
55 ret
56.LEND_vsyscall:
57 .size __kernel_vsyscall,.-.LSTART_vsyscall
58 .previous
59
60 .section .eh_frame,"a",@progbits
61.LSTARTFRAMEDLSI:
62 .long .LENDCIEDLSI-.LSTARTCIEDLSI
63.LSTARTCIEDLSI:
64 .long 0 /* CIE ID */
65 .byte 1 /* Version number */
66 .string "zR" /* NUL-terminated augmentation string */
67 .uleb128 1 /* Code alignment factor */
68 .sleb128 -4 /* Data alignment factor */
69 .byte 8 /* Return address register column */
70 .uleb128 1 /* Augmentation value length */
71 .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
72 .byte 0x0c /* DW_CFA_def_cfa */
73 .uleb128 4
74 .uleb128 4
75 .byte 0x88 /* DW_CFA_offset, column 0x8 */
76 .uleb128 1
77 .align 4
78.LENDCIEDLSI:
79 .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
80.LSTARTFDEDLSI:
81 .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
82 .long .LSTART_vsyscall-. /* PC-relative start address */
83 .long .LEND_vsyscall-.LSTART_vsyscall
84 .uleb128 0
85 /* What follows are the instructions for the table generation.
86 We have to record all changes of the stack pointer. */
87 .byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
88 .byte 0x0e /* DW_CFA_def_cfa_offset */
89 .byte 0x08 /* RA at offset 8 now */
90 .byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
91 .byte 0x0e /* DW_CFA_def_cfa_offset */
92 .byte 0x0c /* RA at offset 12 now */
93 .byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
94 .byte 0x0e /* DW_CFA_def_cfa_offset */
95 .byte 0x10 /* RA at offset 16 now */
96 .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
97 /* Finally the epilogue. */
98 .byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
99 .byte 0x0e /* DW_CFA_def_cfa_offset */
100 .byte 0x0c /* RA at offset 12 now */
101 .byte 0xc5 /* DW_CFA_restore %ebp */
102 .byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
103 .byte 0x0e /* DW_CFA_def_cfa_offset */
104 .byte 0x08 /* RA at offset 8 now */
105 .byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
106 .byte 0x0e /* DW_CFA_def_cfa_offset */
107 .byte 0x04 /* RA at offset 4 now */
108 .align 4
109.LENDFDEDLSI:
110 .previous
111
112 /*
113 * Emit a symbol with the size of this .eh_frame data,
114 * to verify it matches the other versions.
115 */
116VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
new file mode 100644
index 000000000000..93bd8452383f
--- /dev/null
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -0,0 +1,57 @@
1/*
2 * Code for the vDSO. This version uses the old int $0x80 method.
3*/
4
5#include <asm/dwarf2.h>
6#include <asm/cpufeature.h>
7#include <asm/alternative-asm.h>
8
9/*
10 * First get the common code for the sigreturn entry points.
11 * This must come first.
12 */
13#include "sigreturn.S"
14
15 .text
16 .globl __kernel_vsyscall
17 .type __kernel_vsyscall,@function
18 ALIGN
19__kernel_vsyscall:
20 CFI_STARTPROC
21 /*
22 * Reshuffle regs so that all of any of the entry instructions
23 * will preserve enough state.
24 */
25 pushl %edx
26 CFI_ADJUST_CFA_OFFSET 4
27 CFI_REL_OFFSET edx, 0
28 pushl %ecx
29 CFI_ADJUST_CFA_OFFSET 4
30 CFI_REL_OFFSET ecx, 0
31 movl %esp, %ecx
32
33#ifdef CONFIG_X86_64
34 /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
35 ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
36 "syscall", X86_FEATURE_SYSCALL32
37#else
38 ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
39#endif
40
41 /* Enter using int $0x80 */
42 movl (%esp), %ecx
43 int $0x80
44GLOBAL(int80_landing_pad)
45
46 /* Restore ECX and EDX in case they were clobbered. */
47 popl %ecx
48 CFI_RESTORE ecx
49 CFI_ADJUST_CFA_OFFSET -4
50 popl %edx
51 CFI_RESTORE edx
52 CFI_ADJUST_CFA_OFFSET -4
53 ret
54 CFI_ENDPROC
55
56 .size __kernel_vsyscall,.-__kernel_vsyscall
57 .previous
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 434543145d78..64df47148160 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -180,21 +180,10 @@ up_fail:
180#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 180#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
181static int load_vdso32(void) 181static int load_vdso32(void)
182{ 182{
183 int ret;
184
185 if (vdso32_enabled != 1) /* Other values all mean "disabled" */ 183 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
186 return 0; 184 return 0;
187 185
188 ret = map_vdso(selected_vdso32, false); 186 return map_vdso(&vdso_image_32, false);
189 if (ret)
190 return ret;
191
192 if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
193 current_thread_info()->sysenter_return =
194 current->mm->context.vdso +
195 selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
196
197 return 0;
198} 187}
199#endif 188#endif
200 189
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index b160c0c6baed..174c2549939d 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -38,7 +38,14 @@
38#define CREATE_TRACE_POINTS 38#define CREATE_TRACE_POINTS
39#include "vsyscall_trace.h" 39#include "vsyscall_trace.h"
40 40
41static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; 41static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
42#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
43 NATIVE;
44#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
45 NONE;
46#else
47 EMULATE;
48#endif
42 49
43static int __init vsyscall_setup(char *str) 50static int __init vsyscall_setup(char *str)
44{ 51{
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a0a19b7ba22d..e6a5c275cd3f 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -289,7 +289,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
289 /* Return stub is in 32bit vsyscall page */ 289 /* Return stub is in 32bit vsyscall page */
290 if (current->mm->context.vdso) 290 if (current->mm->context.vdso)
291 restorer = current->mm->context.vdso + 291 restorer = current->mm->context.vdso +
292 selected_vdso32->sym___kernel_sigreturn; 292 vdso_image_32.sym___kernel_sigreturn;
293 else 293 else
294 restorer = &frame->retcode; 294 restorer = &frame->retcode;
295 } 295 }
@@ -368,7 +368,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
368 restorer = ksig->ka.sa.sa_restorer; 368 restorer = ksig->ka.sa.sa_restorer;
369 else 369 else
370 restorer = current->mm->context.vdso + 370 restorer = current->mm->context.vdso +
371 selected_vdso32->sym___kernel_rt_sigreturn; 371 vdso_image_32.sym___kernel_rt_sigreturn;
372 put_user_ex(ptr_to_compat(restorer), &frame->pretcode); 372 put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
373 373
374 /* 374 /*
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
new file mode 100644
index 000000000000..b7a1ab865d68
--- /dev/null
+++ b/arch/x86/include/asm/dwarf2.h
@@ -0,0 +1,84 @@
1#ifndef _ASM_X86_DWARF2_H
2#define _ASM_X86_DWARF2_H
3
4#ifndef __ASSEMBLY__
5#warning "asm/dwarf2.h should be only included in pure assembly files"
6#endif
7
8/*
9 * Macros for dwarf2 CFI unwind table entries.
10 * See "as.info" for details on these pseudo ops. Unfortunately
11 * they are only supported in very new binutils, so define them
12 * away for older version.
13 */
14
15#ifdef CONFIG_AS_CFI
16
17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined
30#define CFI_ESCAPE .cfi_escape
31
32#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
33#define CFI_SIGNAL_FRAME .cfi_signal_frame
34#else
35#define CFI_SIGNAL_FRAME
36#endif
37
38#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__)
39#ifndef BUILD_VDSO
40 /*
41 * Emit CFI data in .debug_frame sections, not .eh_frame sections.
42 * The latter we currently just discard since we don't do DWARF
43 * unwinding at runtime. So only the offline DWARF information is
44 * useful to anyone. Note we should not use this directive if
45 * vmlinux.lds.S gets changed so it doesn't discard .eh_frame.
46 */
47 .cfi_sections .debug_frame
48#else
49 /*
50 * For the vDSO, emit both runtime unwind information and debug
51 * symbols for the .dbg file.
52 */
53 .cfi_sections .eh_frame, .debug_frame
54#endif
55#endif
56
57#else
58
59/*
60 * Due to the structure of pre-exisiting code, don't use assembler line
61 * comment character # to ignore the arguments. Instead, use a dummy macro.
62 */
63.macro cfi_ignore a=0, b=0, c=0, d=0
64.endm
65
66#define CFI_STARTPROC cfi_ignore
67#define CFI_ENDPROC cfi_ignore
68#define CFI_DEF_CFA cfi_ignore
69#define CFI_DEF_CFA_REGISTER cfi_ignore
70#define CFI_DEF_CFA_OFFSET cfi_ignore
71#define CFI_ADJUST_CFA_OFFSET cfi_ignore
72#define CFI_OFFSET cfi_ignore
73#define CFI_REL_OFFSET cfi_ignore
74#define CFI_REGISTER cfi_ignore
75#define CFI_RESTORE cfi_ignore
76#define CFI_REMEMBER_STATE cfi_ignore
77#define CFI_RESTORE_STATE cfi_ignore
78#define CFI_UNDEFINED cfi_ignore
79#define CFI_ESCAPE cfi_ignore
80#define CFI_SIGNAL_FRAME cfi_ignore
81
82#endif
83
84#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 141c561f4664..1514753fd435 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,11 +171,11 @@ do { \
171static inline void elf_common_init(struct thread_struct *t, 171static inline void elf_common_init(struct thread_struct *t,
172 struct pt_regs *regs, const u16 ds) 172 struct pt_regs *regs, const u16 ds)
173{ 173{
174 /* Commented-out registers are cleared in stub_execve */ 174 /* ax gets execve's return value. */
175 /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0; 175 /*regs->ax = */ regs->bx = regs->cx = regs->dx = 0;
176 regs->si = regs->di /*= regs->bp*/ = 0; 176 regs->si = regs->di = regs->bp = 0;
177 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; 177 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
178 /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/ 178 regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
179 t->fs = t->gs = 0; 179 t->fs = t->gs = 0;
180 t->fsindex = t->gsindex = 0; 180 t->fsindex = t->gsindex = 0;
181 t->ds = t->es = ds; 181 t->ds = t->es = ds;
@@ -328,7 +328,7 @@ else \
328 328
329#define VDSO_ENTRY \ 329#define VDSO_ENTRY \
330 ((unsigned long)current->mm->context.vdso + \ 330 ((unsigned long)current->mm->context.vdso + \
331 selected_vdso32->sym___kernel_vsyscall) 331 vdso_image_32.sym___kernel_vsyscall)
332 332
333struct linux_binprm; 333struct linux_binprm;
334 334
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 19577dd325fa..b55f30960554 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -556,12 +556,12 @@ static inline unsigned int cpuid_edx(unsigned int op)
556} 556}
557 557
558/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ 558/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
559static inline void rep_nop(void) 559static __always_inline void rep_nop(void)
560{ 560{
561 asm volatile("rep; nop" ::: "memory"); 561 asm volatile("rep; nop" ::: "memory");
562} 562}
563 563
564static inline void cpu_relax(void) 564static __always_inline void cpu_relax(void)
565{ 565{
566 rep_nop(); 566 rep_nop();
567} 567}
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index d7f3b3b78ac3..751bf4b7bf11 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,12 +79,12 @@ do { \
79#else /* CONFIG_X86_32 */ 79#else /* CONFIG_X86_32 */
80 80
81/* frame pointer must be last for get_wchan */ 81/* frame pointer must be last for get_wchan */
82#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" 82#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
83#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" 83#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
84 84
85#define __EXTRA_CLOBBER \ 85#define __EXTRA_CLOBBER \
86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
87 "r12", "r13", "r14", "r15" 87 "r12", "r13", "r14", "r15", "flags"
88 88
89#ifdef CONFIG_CC_STACKPROTECTOR 89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \ 90#define __switch_canary \
@@ -100,7 +100,11 @@ do { \
100#define __switch_canary_iparam 100#define __switch_canary_iparam
101#endif /* CC_STACKPROTECTOR */ 101#endif /* CC_STACKPROTECTOR */
102 102
103/* Save restore flags to clear handle leaking NT */ 103/*
104 * There is no need to save or restore flags, because flags are always
105 * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
106 * has no effect.
107 */
104#define switch_to(prev, next, last) \ 108#define switch_to(prev, next, last) \
105 asm volatile(SAVE_CONTEXT \ 109 asm volatile(SAVE_CONTEXT \
106 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 110 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d6a756ae04c8..999b7cd2e78c 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -20,9 +20,21 @@
20#include <asm/thread_info.h> /* for TS_COMPAT */ 20#include <asm/thread_info.h> /* for TS_COMPAT */
21#include <asm/unistd.h> 21#include <asm/unistd.h>
22 22
23typedef void (*sys_call_ptr_t)(void); 23typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
24 unsigned long, unsigned long,
25 unsigned long, unsigned long);
24extern const sys_call_ptr_t sys_call_table[]; 26extern const sys_call_ptr_t sys_call_table[];
25 27
28#if defined(CONFIG_X86_32)
29#define ia32_sys_call_table sys_call_table
30#define __NR_syscall_compat_max __NR_syscall_max
31#define IA32_NR_syscalls NR_syscalls
32#endif
33
34#if defined(CONFIG_IA32_EMULATION)
35extern const sys_call_ptr_t ia32_sys_call_table[];
36#endif
37
26/* 38/*
27 * Only the low 32 bits of orig_ax are meaningful, so we return int. 39 * Only the low 32 bits of orig_ax are meaningful, so we return int.
28 * This importantly ignores the high bits on 64-bit, so comparisons 40 * This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 809877e9030b..c7b551028740 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -58,7 +58,6 @@ struct thread_info {
58 __u32 status; /* thread synchronous flags */ 58 __u32 status; /* thread synchronous flags */
59 __u32 cpu; /* current CPU */ 59 __u32 cpu; /* current CPU */
60 mm_segment_t addr_limit; 60 mm_segment_t addr_limit;
61 void __user *sysenter_return;
62 unsigned int sig_on_uaccess_error:1; 61 unsigned int sig_on_uaccess_error:1;
63 unsigned int uaccess_err:1; /* uaccess failed */ 62 unsigned int uaccess_err:1; /* uaccess failed */
64}; 63};
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a8df874f3e88..09b1b0ab94b7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -51,13 +51,13 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
51 * limit, not add it to the address). 51 * limit, not add it to the address).
52 */ 52 */
53 if (__builtin_constant_p(size)) 53 if (__builtin_constant_p(size))
54 return addr > limit - size; 54 return unlikely(addr > limit - size);
55 55
56 /* Arbitrary sizes? Be careful about overflow */ 56 /* Arbitrary sizes? Be careful about overflow */
57 addr += size; 57 addr += size;
58 if (addr < size) 58 if (unlikely(addr < size))
59 return true; 59 return true;
60 return addr > limit; 60 return unlikely(addr > limit);
61} 61}
62 62
63#define __range_not_ok(addr, size, limit) \ 63#define __range_not_ok(addr, size, limit) \
@@ -182,7 +182,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
182 : "=a" (__ret_gu), "=r" (__val_gu) \ 182 : "=a" (__ret_gu), "=r" (__val_gu) \
183 : "0" (ptr), "i" (sizeof(*(ptr)))); \ 183 : "0" (ptr), "i" (sizeof(*(ptr)))); \
184 (x) = (__force __typeof__(*(ptr))) __val_gu; \ 184 (x) = (__force __typeof__(*(ptr))) __val_gu; \
185 __ret_gu; \ 185 __builtin_expect(__ret_gu, 0); \
186}) 186})
187 187
188#define __put_user_x(size, x, ptr, __ret_pu) \ 188#define __put_user_x(size, x, ptr, __ret_pu) \
@@ -278,7 +278,7 @@ extern void __put_user_8(void);
278 __put_user_x(X, __pu_val, ptr, __ret_pu); \ 278 __put_user_x(X, __pu_val, ptr, __ret_pu); \
279 break; \ 279 break; \
280 } \ 280 } \
281 __ret_pu; \ 281 __builtin_expect(__ret_pu, 0); \
282}) 282})
283 283
284#define __put_user_size(x, ptr, size, retval, errret) \ 284#define __put_user_size(x, ptr, size, retval, errret) \
@@ -401,7 +401,7 @@ do { \
401({ \ 401({ \
402 int __pu_err; \ 402 int __pu_err; \
403 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ 403 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
404 __pu_err; \ 404 __builtin_expect(__pu_err, 0); \
405}) 405})
406 406
407#define __get_user_nocheck(x, ptr, size) \ 407#define __get_user_nocheck(x, ptr, size) \
@@ -410,7 +410,7 @@ do { \
410 unsigned long __gu_val; \ 410 unsigned long __gu_val; \
411 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ 411 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
412 (x) = (__force __typeof__(*(ptr)))__gu_val; \ 412 (x) = (__force __typeof__(*(ptr)))__gu_val; \
413 __gu_err; \ 413 __builtin_expect(__gu_err, 0); \
414}) 414})
415 415
416/* FIXME: this hack is definitely wrong -AK */ 416/* FIXME: this hack is definitely wrong -AK */
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 8021bd28c0f1..756de9190aec 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -26,7 +26,7 @@ struct vdso_image {
26 long sym___kernel_sigreturn; 26 long sym___kernel_sigreturn;
27 long sym___kernel_rt_sigreturn; 27 long sym___kernel_rt_sigreturn;
28 long sym___kernel_vsyscall; 28 long sym___kernel_vsyscall;
29 long sym_VDSO32_SYSENTER_RETURN; 29 long sym_int80_landing_pad;
30}; 30};
31 31
32#ifdef CONFIG_X86_64 32#ifdef CONFIG_X86_64
@@ -38,13 +38,7 @@ extern const struct vdso_image vdso_image_x32;
38#endif 38#endif
39 39
40#if defined CONFIG_X86_32 || defined CONFIG_COMPAT 40#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
41extern const struct vdso_image vdso_image_32_int80; 41extern const struct vdso_image vdso_image_32;
42#ifdef CONFIG_COMPAT
43extern const struct vdso_image vdso_image_32_syscall;
44#endif
45extern const struct vdso_image vdso_image_32_sysenter;
46
47extern const struct vdso_image *selected_vdso32;
48#endif 42#endif
49 43
50extern void __init init_vdso_image(const struct vdso_image *image); 44extern void __init init_vdso_image(const struct vdso_image *image);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8e3d22a1af94..95a18e25d5bf 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -54,9 +54,6 @@ void common(void) {
54 OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip); 54 OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
55 55
56 BLANK(); 56 BLANK();
57 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
58
59 BLANK();
60 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); 57 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
61#endif 58#endif
62 59
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index da52e6bb5c7f..d87ce92d3404 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -299,7 +299,7 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
299 299
300 if (current->mm->context.vdso) 300 if (current->mm->context.vdso)
301 restorer = current->mm->context.vdso + 301 restorer = current->mm->context.vdso +
302 selected_vdso32->sym___kernel_sigreturn; 302 vdso_image_32.sym___kernel_sigreturn;
303 else 303 else
304 restorer = &frame->retcode; 304 restorer = &frame->retcode;
305 if (ksig->ka.sa.sa_flags & SA_RESTORER) 305 if (ksig->ka.sa.sa_flags & SA_RESTORER)
@@ -363,7 +363,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
363 363
364 /* Set up to return from userspace. */ 364 /* Set up to return from userspace. */
365 restorer = current->mm->context.vdso + 365 restorer = current->mm->context.vdso +
366 selected_vdso32->sym___kernel_rt_sigreturn; 366 vdso_image_32.sym___kernel_rt_sigreturn;
367 if (ksig->ka.sa.sa_flags & SA_RESTORER) 367 if (ksig->ka.sa.sa_flags & SA_RESTORER)
368 restorer = ksig->ka.sa.sa_restorer; 368 restorer = ksig->ka.sa.sa_restorer;
369 put_user_ex(restorer, &frame->pretcode); 369 put_user_ex(restorer, &frame->pretcode);
diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h
index 9fe77b7b5a0e..81d6562ce01d 100644
--- a/arch/x86/um/asm/syscall.h
+++ b/arch/x86/um/asm/syscall.h
@@ -3,6 +3,10 @@
3 3
4#include <uapi/linux/audit.h> 4#include <uapi/linux/audit.h>
5 5
6typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
7 unsigned long, unsigned long,
8 unsigned long, unsigned long);
9
6static inline int syscall_get_arch(void) 10static inline int syscall_get_arch(void)
7{ 11{
8#ifdef CONFIG_X86_32 12#ifdef CONFIG_X86_32
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index bd16d6c370ec..439c0994b696 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -7,6 +7,7 @@
7#include <linux/sys.h> 7#include <linux/sys.h>
8#include <linux/cache.h> 8#include <linux/cache.h>
9#include <generated/user_constants.h> 9#include <generated/user_constants.h>
10#include <asm/syscall.h>
10 11
11#define __NO_STUBS 12#define __NO_STUBS
12 13
@@ -24,15 +25,13 @@
24 25
25#define old_mmap sys_old_mmap 26#define old_mmap sys_old_mmap
26 27
27#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; 28#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
28#include <asm/syscalls_32.h> 29#include <asm/syscalls_32.h>
29 30
30#undef __SYSCALL_I386 31#undef __SYSCALL_I386
31#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, 32#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
32 33
33typedef asmlinkage void (*sys_call_ptr_t)(void); 34extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
34
35extern asmlinkage void sys_ni_syscall(void);
36 35
37const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { 36const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
38 /* 37 /*
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index a75d8700472a..b74ea6c2c0e7 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -7,6 +7,7 @@
7#include <linux/sys.h> 7#include <linux/sys.h>
8#include <linux/cache.h> 8#include <linux/cache.h>
9#include <generated/user_constants.h> 9#include <generated/user_constants.h>
10#include <asm/syscall.h>
10 11
11#define __NO_STUBS 12#define __NO_STUBS
12 13
@@ -37,15 +38,13 @@
37#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) 38#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
38#define __SYSCALL_X32(nr, sym, compat) /* Not supported */ 39#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
39 40
40#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; 41#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
41#include <asm/syscalls_64.h> 42#include <asm/syscalls_64.h>
42 43
43#undef __SYSCALL_64 44#undef __SYSCALL_64
44#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym, 45#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
45 46
46typedef void (*sys_call_ptr_t)(void); 47extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
47
48extern void sys_ni_syscall(void);
49 48
50const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { 49const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
51 /* 50 /*
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1c30e4ab1022..63320b6d35bc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -965,17 +965,8 @@ char * __init xen_auto_xlated_memory_setup(void)
965static void __init fiddle_vdso(void) 965static void __init fiddle_vdso(void)
966{ 966{
967#ifdef CONFIG_X86_32 967#ifdef CONFIG_X86_32
968 /* 968 u32 *mask = vdso_image_32.data +
969 * This could be called before selected_vdso32 is initialized, so 969 vdso_image_32.sym_VDSO32_NOTE_MASK;
970 * just fiddle with both possible images. vdso_image_32_syscall
971 * can't be selected, since it only exists on 64-bit systems.
972 */
973 u32 *mask;
974 mask = vdso_image_32_int80.data +
975 vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
976 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
977 mask = vdso_image_32_sysenter.data +
978 vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
979 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 970 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
980#endif 971#endif
981} 972}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 29089b24d18b..389701f59940 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,8 +4,8 @@ include ../lib.mk
4 4
5.PHONY: all all_32 all_64 warn_32bit_failure clean 5.PHONY: all all_32 all_64 warn_32bit_failure clean
6 6
7TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt 7TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall
8TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn 8TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso
9 9
10TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) 10TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
11BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) 11BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
@@ -60,3 +60,5 @@ endif
60 60
61# Some tests have additional dependencies. 61# Some tests have additional dependencies.
62sysret_ss_attrs_64: thunks.S 62sysret_ss_attrs_64: thunks.S
63ptrace_syscall_32: raw_syscall_helper_32.S
64test_syscall_vdso_32: thunks_32.S
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
new file mode 100644
index 000000000000..5105b49cd8aa
--- /dev/null
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -0,0 +1,294 @@
1#define _GNU_SOURCE
2
3#include <sys/ptrace.h>
4#include <sys/types.h>
5#include <sys/wait.h>
6#include <sys/syscall.h>
7#include <sys/user.h>
8#include <unistd.h>
9#include <errno.h>
10#include <stddef.h>
11#include <stdio.h>
12#include <err.h>
13#include <string.h>
14#include <asm/ptrace-abi.h>
15#include <sys/auxv.h>
16
17/* Bitness-agnostic defines for user_regs_struct fields. */
18#ifdef __x86_64__
19# define user_syscall_nr orig_rax
20# define user_arg0 rdi
21# define user_arg1 rsi
22# define user_arg2 rdx
23# define user_arg3 r10
24# define user_arg4 r8
25# define user_arg5 r9
26# define user_ip rip
27# define user_ax rax
28#else
29# define user_syscall_nr orig_eax
30# define user_arg0 ebx
31# define user_arg1 ecx
32# define user_arg2 edx
33# define user_arg3 esi
34# define user_arg4 edi
35# define user_arg5 ebp
36# define user_ip eip
37# define user_ax eax
38#endif
39
40static int nerrs = 0;
41
42struct syscall_args32 {
43 uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
44};
45
46#ifdef __i386__
47extern void sys32_helper(struct syscall_args32 *, void *);
48extern void int80_and_ret(void);
49#endif
50
51/*
52 * Helper to invoke int80 with controlled regs and capture the final regs.
53 */
54static void do_full_int80(struct syscall_args32 *args)
55{
56#ifdef __x86_64__
57 register unsigned long bp asm("bp") = args->arg5;
58 asm volatile ("int $0x80"
59 : "+a" (args->nr),
60 "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
61 "+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
62 args->arg5 = bp;
63#else
64 sys32_helper(args, int80_and_ret);
65#endif
66}
67
68#ifdef __i386__
69static void (*vsyscall32)(void);
70
71/*
72 * Nasty helper to invoke AT_SYSINFO (i.e. __kernel_vsyscall) with
73 * controlled regs and capture the final regs. This is so nasty that it
74 * crashes my copy of gdb :)
75 */
76static void do_full_vsyscall32(struct syscall_args32 *args)
77{
78 sys32_helper(args, vsyscall32);
79}
80#endif
81
82static siginfo_t wait_trap(pid_t chld)
83{
84 siginfo_t si;
85 if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
86 err(1, "waitid");
87 if (si.si_pid != chld)
88 errx(1, "got unexpected pid in event\n");
89 if (si.si_code != CLD_TRAPPED)
90 errx(1, "got unexpected event type %d\n", si.si_code);
91 return si;
92}
93
94static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
95 int flags)
96{
97 struct sigaction sa;
98 memset(&sa, 0, sizeof(sa));
99 sa.sa_sigaction = handler;
100 sa.sa_flags = SA_SIGINFO | flags;
101 sigemptyset(&sa.sa_mask);
102 if (sigaction(sig, &sa, 0))
103 err(1, "sigaction");
104}
105
106static void clearhandler(int sig)
107{
108 struct sigaction sa;
109 memset(&sa, 0, sizeof(sa));
110 sa.sa_handler = SIG_DFL;
111 sigemptyset(&sa.sa_mask);
112 if (sigaction(sig, &sa, 0))
113 err(1, "sigaction");
114}
115
116#ifdef __x86_64__
117# define REG_BP REG_RBP
118#else
119# define REG_BP REG_EBP
120#endif
121
122static void empty_handler(int sig, siginfo_t *si, void *ctx_void)
123{
124}
125
126static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *))
127{
128 struct syscall_args32 args = {
129 .nr = 224, /* gettid */
130 .arg0 = 10, .arg1 = 11, .arg2 = 12,
131 .arg3 = 13, .arg4 = 14, .arg5 = 15,
132 };
133
134 do_syscall(&args);
135
136 if (args.nr != getpid() ||
137 args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
138 args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
139 printf("[FAIL]\tgetpid() failed to preseve regs\n");
140 nerrs++;
141 } else {
142 printf("[OK]\tgetpid() preserves regs\n");
143 }
144
145 sethandler(SIGUSR1, empty_handler, 0);
146
147 args.nr = 37; /* kill */
148 args.arg0 = getpid();
149 args.arg1 = SIGUSR1;
150 do_syscall(&args);
151 if (args.nr != 0 ||
152 args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
153 args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
154 printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preseve regs\n");
155 nerrs++;
156 } else {
157 printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
158 }
159 clearhandler(SIGUSR1);
160}
161
162static void test_ptrace_syscall_restart(void)
163{
164 printf("[RUN]\tptrace-induced syscall restart\n");
165 pid_t chld = fork();
166 if (chld < 0)
167 err(1, "fork");
168
169 if (chld == 0) {
170 if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
171 err(1, "PTRACE_TRACEME");
172
173 printf("\tChild will make one syscall\n");
174 raise(SIGSTOP);
175
176 syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
177 _exit(0);
178 }
179
180 int status;
181
182 /* Wait for SIGSTOP. */
183 if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
184 err(1, "waitpid");
185
186 struct user_regs_struct regs;
187
188 printf("[RUN]\tSYSEMU\n");
189 if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
190 err(1, "PTRACE_SYSCALL");
191 wait_trap(chld);
192
193 if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
194 err(1, "PTRACE_GETREGS");
195
196 if (regs.user_syscall_nr != SYS_gettid ||
197 regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
198 regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
199 regs.user_arg4 != 14 || regs.user_arg5 != 15) {
200 printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
201 nerrs++;
202 } else {
203 printf("[OK]\tInitial nr and args are correct\n");
204 }
205
206 printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
207 (unsigned long)regs.user_ip);
208
209 /*
210 * This does exactly what it appears to do if syscall is int80 or
211 * SYSCALL64. For SYSCALL32 or SYSENTER, though, this is highly
212 * magical. It needs to work so that ptrace and syscall restart
213 * work as expected.
214 */
215 regs.user_ax = regs.user_syscall_nr;
216 regs.user_ip -= 2;
217 if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
218 err(1, "PTRACE_SETREGS");
219
220 if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
221 err(1, "PTRACE_SYSCALL");
222 wait_trap(chld);
223
224 if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
225 err(1, "PTRACE_GETREGS");
226
227 if (regs.user_syscall_nr != SYS_gettid ||
228 regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
229 regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
230 regs.user_arg4 != 14 || regs.user_arg5 != 15) {
231 printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
232 nerrs++;
233 } else {
234 printf("[OK]\tRestarted nr and args are correct\n");
235 }
236
237 printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
238 (unsigned long)regs.user_ip);
239
240 regs.user_ax = SYS_getpid;
241 regs.user_arg0 = 20;
242 regs.user_arg1 = 21;
243 regs.user_arg2 = 22;
244 regs.user_arg3 = 23;
245 regs.user_arg4 = 24;
246 regs.user_arg5 = 25;
247 regs.user_ip -= 2;
248
249 if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
250 err(1, "PTRACE_SETREGS");
251
252 if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
253 err(1, "PTRACE_SYSCALL");
254 wait_trap(chld);
255
256 if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
257 err(1, "PTRACE_GETREGS");
258
259 if (regs.user_syscall_nr != SYS_getpid ||
260 regs.user_arg0 != 20 || regs.user_arg1 != 21 || regs.user_arg2 != 22 ||
261 regs.user_arg3 != 23 || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
262 printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
263 nerrs++;
264 } else {
265 printf("[OK]\tReplacement nr and args are correct\n");
266 }
267
268 if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
269 err(1, "PTRACE_CONT");
270 if (waitpid(chld, &status, 0) != chld)
271 err(1, "waitpid");
272 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
273 printf("[FAIL]\tChild failed\n");
274 nerrs++;
275 } else {
276 printf("[OK]\tChild exited cleanly\n");
277 }
278}
279
280int main()
281{
282 printf("[RUN]\tCheck int80 return regs\n");
283 test_sys32_regs(do_full_int80);
284
285#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
286 vsyscall32 = (void *)getauxval(AT_SYSINFO);
287 printf("[RUN]\tCheck AT_SYSINFO return regs\n");
288 test_sys32_regs(do_full_vsyscall32);
289#endif
290
291 test_ptrace_syscall_restart();
292
293 return 0;
294}
diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
new file mode 100644
index 000000000000..534e71e35c6a
--- /dev/null
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -0,0 +1,46 @@
1.global sys32_helper
2sys32_helper:
3 /* Args: syscall_args_32*, function pointer */
4 pushl %ebp
5 pushl %ebx
6 pushl %esi
7 pushl %edi
8 movl 5*4(%esp), %eax /* pointer to args struct */
9
10 movl 1*4(%eax), %ebx
11 movl 2*4(%eax), %ecx
12 movl 3*4(%eax), %edx
13 movl 4*4(%eax), %esi
14 movl 5*4(%eax), %edi
15 movl 6*4(%eax), %ebp
16 movl 0*4(%eax), %eax
17
18 call *(6*4)(%esp) /* Do the syscall */
19
20 /* Now we need to recover without losing any reg values */
21 pushl %eax
22 movl 6*4(%esp), %eax
23 popl 0*4(%eax)
24 movl %ebx, 1*4(%eax)
25 movl %ecx, 2*4(%eax)
26 movl %edx, 3*4(%eax)
27 movl %esi, 4*4(%eax)
28 movl %edi, 5*4(%eax)
29 movl %ebp, 6*4(%eax)
30
31 popl %edi
32 popl %esi
33 popl %ebx
34 popl %ebp
35 ret
36
37 .type sys32_helper, @function
38 .size sys32_helper, .-sys32_helper
39
40.global int80_and_ret
41int80_and_ret:
42 int $0x80
43 ret
44
45 .type int80_and_ret, @function
46 .size int80_and_ret, .-int80_and_ret
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
new file mode 100644
index 000000000000..40370354d4c1
--- /dev/null
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -0,0 +1,401 @@
1/*
2 * 32-bit syscall ABI conformance test.
3 *
4 * Copyright (c) 2015 Denys Vlasenko
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15/*
16 * Can be built statically:
17 * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
18 */
19#undef _GNU_SOURCE
20#define _GNU_SOURCE 1
21#undef __USE_GNU
22#define __USE_GNU 1
23#include <unistd.h>
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <signal.h>
28#include <sys/types.h>
29#include <sys/select.h>
30#include <sys/time.h>
31#include <elf.h>
32#include <sys/ptrace.h>
33#include <sys/wait.h>
34
35#if !defined(__i386__)
36int main(int argc, char **argv, char **envp)
37{
38 printf("[SKIP]\tNot a 32-bit x86 userspace\n");
39 return 0;
40}
41#else
42
43long syscall_addr;
44long get_syscall(char **envp)
45{
46 Elf32_auxv_t *auxv;
47 while (*envp++ != NULL)
48 continue;
49 for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
50 if (auxv->a_type == AT_SYSINFO)
51 return auxv->a_un.a_val;
52 printf("[WARN]\tAT_SYSINFO not supplied\n");
53 return 0;
54}
55
56asm (
57 " .pushsection .text\n"
58 " .global int80\n"
59 "int80:\n"
60 " int $0x80\n"
61 " ret\n"
62 " .popsection\n"
63);
64extern char int80;
65
66struct regs64 {
67 uint64_t rax, rbx, rcx, rdx;
68 uint64_t rsi, rdi, rbp, rsp;
69 uint64_t r8, r9, r10, r11;
70 uint64_t r12, r13, r14, r15;
71};
72struct regs64 regs64;
73int kernel_is_64bit;
74
75asm (
76 " .pushsection .text\n"
77 " .code64\n"
78 "get_regs64:\n"
79 " push %rax\n"
80 " mov $regs64, %eax\n"
81 " pop 0*8(%rax)\n"
82 " movq %rbx, 1*8(%rax)\n"
83 " movq %rcx, 2*8(%rax)\n"
84 " movq %rdx, 3*8(%rax)\n"
85 " movq %rsi, 4*8(%rax)\n"
86 " movq %rdi, 5*8(%rax)\n"
87 " movq %rbp, 6*8(%rax)\n"
88 " movq %rsp, 7*8(%rax)\n"
89 " movq %r8, 8*8(%rax)\n"
90 " movq %r9, 9*8(%rax)\n"
91 " movq %r10, 10*8(%rax)\n"
92 " movq %r11, 11*8(%rax)\n"
93 " movq %r12, 12*8(%rax)\n"
94 " movq %r13, 13*8(%rax)\n"
95 " movq %r14, 14*8(%rax)\n"
96 " movq %r15, 15*8(%rax)\n"
97 " ret\n"
98 "poison_regs64:\n"
99 " movq $0x7f7f7f7f, %r8\n"
100 " shl $32, %r8\n"
101 " orq $0x7f7f7f7f, %r8\n"
102 " movq %r8, %r9\n"
103 " movq %r8, %r10\n"
104 " movq %r8, %r11\n"
105 " movq %r8, %r12\n"
106 " movq %r8, %r13\n"
107 " movq %r8, %r14\n"
108 " movq %r8, %r15\n"
109 " ret\n"
110 " .code32\n"
111 " .popsection\n"
112);
113extern void get_regs64(void);
114extern void poison_regs64(void);
115extern unsigned long call64_from_32(void (*function)(void));
116void print_regs64(void)
117{
118 if (!kernel_is_64bit)
119 return;
120 printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx);
121 printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp);
122 printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11);
123 printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15);
124}
125
126int check_regs64(void)
127{
128 int err = 0;
129 int num = 8;
130 uint64_t *r64 = &regs64.r8;
131
132 if (!kernel_is_64bit)
133 return 0;
134
135 do {
136 if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
137 continue; /* register did not change */
138 if (syscall_addr != (long)&int80) {
139 /*
140 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
141 * either clear them to 0, or for R11, load EFLAGS.
142 */
143 if (*r64 == 0)
144 continue;
145 if (num == 11) {
146 printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
147 continue;
148 }
149 } else {
150 /* INT80 syscall entrypoint can be used by
151 * 64-bit programs too, unlike SYSCALL/SYSENTER.
152 * Therefore it must preserve R12+
153 * (they are callee-saved registers in 64-bit C ABI).
154 *
155 * This was probably historically not intended,
156 * but R8..11 are clobbered (cleared to 0).
157 * IOW: they are the only registers which aren't
158 * preserved across INT80 syscall.
159 */
160 if (*r64 == 0 && num <= 11)
161 continue;
162 }
163 printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
164 err++;
165 } while (r64++, ++num < 16);
166
167 if (!err)
168 printf("[OK]\tR8..R15 did not leak kernel data\n");
169 return err;
170}
171
172int nfds;
173fd_set rfds;
174fd_set wfds;
175fd_set efds;
176struct timespec timeout;
177sigset_t sigmask;
178struct {
179 sigset_t *sp;
180 int sz;
181} sigmask_desc;
182
183void prep_args()
184{
185 nfds = 42;
186 FD_ZERO(&rfds);
187 FD_ZERO(&wfds);
188 FD_ZERO(&efds);
189 FD_SET(0, &rfds);
190 FD_SET(1, &wfds);
191 FD_SET(2, &efds);
192 timeout.tv_sec = 0;
193 timeout.tv_nsec = 123;
194 sigemptyset(&sigmask);
195 sigaddset(&sigmask, SIGINT);
196 sigaddset(&sigmask, SIGUSR2);
197 sigaddset(&sigmask, SIGRTMAX);
198 sigmask_desc.sp = &sigmask;
199 sigmask_desc.sz = 8; /* bytes */
200}
201
202static void print_flags(const char *name, unsigned long r)
203{
204 static const char *bitarray[] = {
205 "\n" ,"c\n" ,/* Carry Flag */
206 "0 " ,"1 " ,/* Bit 1 - always on */
207 "" ,"p " ,/* Parity Flag */
208 "0 " ,"3? " ,
209 "" ,"a " ,/* Auxiliary carry Flag */
210 "0 " ,"5? " ,
211 "" ,"z " ,/* Zero Flag */
212 "" ,"s " ,/* Sign Flag */
213 "" ,"t " ,/* Trap Flag */
214 "" ,"i " ,/* Interrupt Flag */
215 "" ,"d " ,/* Direction Flag */
216 "" ,"o " ,/* Overflow Flag */
217 "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */
218 "0" ,"1" ,/* I/O Privilege Level (2 bits) */
219 "" ,"n " ,/* Nested Task */
220 "0 " ,"15? ",
221 "" ,"r " ,/* Resume Flag */
222 "" ,"v " ,/* Virtual Mode */
223 "" ,"ac " ,/* Alignment Check/Access Control */
224 "" ,"vif ",/* Virtual Interrupt Flag */
225 "" ,"vip ",/* Virtual Interrupt Pending */
226 "" ,"id " ,/* CPUID detection */
227 NULL
228 };
229 const char **bitstr;
230 int bit;
231
232 printf("%s=%016lx ", name, r);
233 bitstr = bitarray + 42;
234 bit = 21;
235 if ((r >> 22) != 0)
236 printf("(extra bits are set) ");
237 do {
238 if (bitstr[(r >> bit) & 1][0])
239 fputs(bitstr[(r >> bit) & 1], stdout);
240 bitstr -= 2;
241 bit--;
242 } while (bit >= 0);
243}
244
245int run_syscall(void)
246{
247 long flags, bad_arg;
248
249 prep_args();
250
251 if (kernel_is_64bit)
252 call64_from_32(poison_regs64);
253 /*print_regs64();*/
254
255 asm("\n"
256 /* Try 6-arg syscall: pselect. It should return quickly */
257 " push %%ebp\n"
258 " mov $308, %%eax\n" /* PSELECT */
259 " mov nfds, %%ebx\n" /* ebx arg1 */
260 " mov $rfds, %%ecx\n" /* ecx arg2 */
261 " mov $wfds, %%edx\n" /* edx arg3 */
262 " mov $efds, %%esi\n" /* esi arg4 */
263 " mov $timeout, %%edi\n" /* edi arg5 */
264 " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */
265 " push $0x200ed7\n" /* set almost all flags */
266 " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
267 " call *syscall_addr\n"
268 /* Check that registers are not clobbered */
269 " pushf\n"
270 " pop %%eax\n"
271 " cld\n"
272 " cmp nfds, %%ebx\n" /* ebx arg1 */
273 " mov $1, %%ebx\n"
274 " jne 1f\n"
275 " cmp $rfds, %%ecx\n" /* ecx arg2 */
276 " mov $2, %%ebx\n"
277 " jne 1f\n"
278 " cmp $wfds, %%edx\n" /* edx arg3 */
279 " mov $3, %%ebx\n"
280 " jne 1f\n"
281 " cmp $efds, %%esi\n" /* esi arg4 */
282 " mov $4, %%ebx\n"
283 " jne 1f\n"
284 " cmp $timeout, %%edi\n" /* edi arg5 */
285 " mov $5, %%ebx\n"
286 " jne 1f\n"
287 " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */
288 " mov $6, %%ebx\n"
289 " jne 1f\n"
290 " mov $0, %%ebx\n"
291 "1:\n"
292 " pop %%ebp\n"
293 : "=a" (flags), "=b" (bad_arg)
294 :
295 : "cx", "dx", "si", "di"
296 );
297
298 if (kernel_is_64bit) {
299 memset(&regs64, 0x77, sizeof(regs64));
300 call64_from_32(get_regs64);
301 /*print_regs64();*/
302 }
303
304 /*
305 * On paravirt kernels, flags are not preserved across syscalls.
306 * Thus, we do not consider it a bug if some are changed.
307 * We just show ones which do.
308 */
309 if ((0x200ed7 ^ flags) != 0) {
310 print_flags("[WARN]\tFlags before", 0x200ed7);
311 print_flags("[WARN]\tFlags after", flags);
312 print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
313 }
314
315 if (bad_arg) {
316 printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
317 return 1;
318 }
319 printf("[OK]\tArguments are preserved across syscall\n");
320
321 return check_regs64();
322}
323
324int run_syscall_twice()
325{
326 int exitcode = 0;
327 long sv;
328
329 if (syscall_addr) {
330 printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
331 exitcode = run_syscall();
332 }
333 sv = syscall_addr;
334 syscall_addr = (long)&int80;
335 printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
336 exitcode += run_syscall();
337 syscall_addr = sv;
338 return exitcode;
339}
340
341void ptrace_me()
342{
343 pid_t pid;
344
345 fflush(NULL);
346 pid = fork();
347 if (pid < 0)
348 exit(1);
349 if (pid == 0) {
350 /* child */
351 if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
352 exit(0);
353 raise(SIGSTOP);
354 return;
355 }
356 /* parent */
357 printf("[RUN]\tRunning tests under ptrace\n");
358 while (1) {
359 int status;
360 pid = waitpid(-1, &status, __WALL);
361 if (WIFEXITED(status))
362 exit(WEXITSTATUS(status));
363 if (WIFSIGNALED(status))
364 exit(WTERMSIG(status));
365 if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
366 exit(255);
367 /*
368 * Note: we do not inject sig = WSTOPSIG(status).
369 * We probably should, but careful: do not inject SIGTRAP
370 * generated by syscall entry/exit stops.
371 * That kills the child.
372 */
373 ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
374 }
375}
376
377int main(int argc, char **argv, char **envp)
378{
379 int exitcode = 0;
380 int cs;
381
382 asm("\n"
383 " movl %%cs, %%eax\n"
384 : "=a" (cs)
385 );
386 kernel_is_64bit = (cs == 0x23);
387 if (!kernel_is_64bit)
388 printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
389
390 /* This only works for non-static builds:
391 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
392 */
393 syscall_addr = get_syscall(envp);
394
395 exitcode += run_syscall_twice();
396 ptrace_me();
397 exitcode += run_syscall_twice();
398
399 return exitcode;
400}
401#endif
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
new file mode 100644
index 000000000000..29b644bb9f2f
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -0,0 +1,55 @@
1/*
2 * thunks_32.S - assembly helpers for mixed-bitness code
3 * Copyright (c) 2015 Denys Vlasenko
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * These are little helpers that make it easier to switch bitness on
15 * the fly.
16 */
17
18 .text
19 .code32
20
21 .global call64_from_32
22 .type call32_from_64, @function
23
24 // 4(%esp): function to call
25call64_from_32:
26 // Fetch function address
27 mov 4(%esp), %eax
28
29 // Save registers which are callee-clobbered by 64-bit ABI
30 push %ecx
31 push %edx
32 push %esi
33 push %edi
34
35 // Switch to long mode
36 jmp $0x33,$1f
371: .code64
38
39 // Call the function
40 call *%rax
41
42 // Switch to compatibility mode
43 push $0x23 /* USER32_CS */
44 .code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
45 lretq
461: .code32
47
48 pop %edi
49 pop %esi
50 pop %edx
51 pop %ecx
52
53 ret
54
55.size call64_from_32, .-call64_from_32
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
new file mode 100644
index 000000000000..00a26a82fa98
--- /dev/null
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -0,0 +1,211 @@
1/*
2 * unwind_vdso.c - tests unwind info for AT_SYSINFO in the vDSO
3 * Copyright (c) 2014-2015 Andrew Lutomirski
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * This tests __kernel_vsyscall's unwind info.
15 */
16
17#define _GNU_SOURCE
18
19#include <features.h>
20#include <stdio.h>
21
22#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
23
24int main()
25{
26 /* We need getauxval(). */
27 printf("[SKIP]\tGLIBC before 2.16 cannot compile this test\n");
28 return 0;
29}
30
31#else
32
33#include <sys/time.h>
34#include <stdlib.h>
35#include <syscall.h>
36#include <unistd.h>
37#include <string.h>
38#include <inttypes.h>
39#include <sys/mman.h>
40#include <signal.h>
41#include <sys/ucontext.h>
42#include <err.h>
43#include <stddef.h>
44#include <stdbool.h>
45#include <sys/ptrace.h>
46#include <sys/user.h>
47#include <sys/ucontext.h>
48#include <link.h>
49#include <sys/auxv.h>
50#include <dlfcn.h>
51#include <unwind.h>
52
53static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
54 int flags)
55{
56 struct sigaction sa;
57 memset(&sa, 0, sizeof(sa));
58 sa.sa_sigaction = handler;
59 sa.sa_flags = SA_SIGINFO | flags;
60 sigemptyset(&sa.sa_mask);
61 if (sigaction(sig, &sa, 0))
62 err(1, "sigaction");
63}
64
65#ifdef __x86_64__
66# define WIDTH "q"
67#else
68# define WIDTH "l"
69#endif
70
71static unsigned long get_eflags(void)
72{
73 unsigned long eflags;
74 asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
75 return eflags;
76}
77
78static void set_eflags(unsigned long eflags)
79{
80 asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
81 : : "rm" (eflags) : "flags");
82}
83
84#define X86_EFLAGS_TF (1UL << 8)
85
86static volatile sig_atomic_t nerrs;
87static unsigned long sysinfo;
88static bool got_sysinfo = false;
89static unsigned long return_address;
90
91struct unwind_state {
92 unsigned long ip; /* trap source */
93 int depth; /* -1 until we hit the trap source */
94};
95
96_Unwind_Reason_Code trace_fn(struct _Unwind_Context * ctx, void *opaque)
97{
98 struct unwind_state *state = opaque;
99 unsigned long ip = _Unwind_GetIP(ctx);
100
101 if (state->depth == -1) {
102 if (ip == state->ip)
103 state->depth = 0;
104 else
105 return _URC_NO_REASON; /* Not there yet */
106 }
107 printf("\t 0x%lx\n", ip);
108
109 if (ip == return_address) {
110 /* Here we are. */
111 unsigned long eax = _Unwind_GetGR(ctx, 0);
112 unsigned long ecx = _Unwind_GetGR(ctx, 1);
113 unsigned long edx = _Unwind_GetGR(ctx, 2);
114 unsigned long ebx = _Unwind_GetGR(ctx, 3);
115 unsigned long ebp = _Unwind_GetGR(ctx, 5);
116 unsigned long esi = _Unwind_GetGR(ctx, 6);
117 unsigned long edi = _Unwind_GetGR(ctx, 7);
118 bool ok = (eax == SYS_getpid || eax == getpid()) &&
119 ebx == 1 && ecx == 2 && edx == 3 &&
120 esi == 4 && edi == 5 && ebp == 6;
121
122 if (!ok)
123 nerrs++;
124 printf("[%s]\t NR = %ld, args = %ld, %ld, %ld, %ld, %ld, %ld\n",
125 (ok ? "OK" : "FAIL"),
126 eax, ebx, ecx, edx, esi, edi, ebp);
127
128 return _URC_NORMAL_STOP;
129 } else {
130 state->depth++;
131 return _URC_NO_REASON;
132 }
133}
134
135static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
136{
137 ucontext_t *ctx = (ucontext_t *)ctx_void;
138 struct unwind_state state;
139 unsigned long ip = ctx->uc_mcontext.gregs[REG_EIP];
140
141 if (!got_sysinfo && ip == sysinfo) {
142 got_sysinfo = true;
143
144 /* Find the return address. */
145 return_address = *(unsigned long *)(unsigned long)ctx->uc_mcontext.gregs[REG_ESP];
146
147 printf("\tIn vsyscall at 0x%lx, returning to 0x%lx\n",
148 ip, return_address);
149 }
150
151 if (!got_sysinfo)
152 return; /* Not there yet */
153
154 if (ip == return_address) {
155 ctx->uc_mcontext.gregs[REG_EFL] &= ~X86_EFLAGS_TF;
156 printf("\tVsyscall is done\n");
157 return;
158 }
159
160 printf("\tSIGTRAP at 0x%lx\n", ip);
161
162 state.ip = ip;
163 state.depth = -1;
164 _Unwind_Backtrace(trace_fn, &state);
165}
166
167int main()
168{
169 sysinfo = getauxval(AT_SYSINFO);
170 printf("\tAT_SYSINFO is 0x%lx\n", sysinfo);
171
172 Dl_info info;
173 if (!dladdr((void *)sysinfo, &info)) {
174 printf("[WARN]\tdladdr failed on AT_SYSINFO\n");
175 } else {
176 printf("[OK]\tAT_SYSINFO maps to %s, loaded at 0x%p\n",
177 info.dli_fname, info.dli_fbase);
178 }
179
180 sethandler(SIGTRAP, sigtrap, 0);
181
182 syscall(SYS_getpid); /* Force symbol binding without TF set. */
183 printf("[RUN]\tSet TF and check a fast syscall\n");
184 set_eflags(get_eflags() | X86_EFLAGS_TF);
185 syscall(SYS_getpid, 1, 2, 3, 4, 5, 6);
186 if (!got_sysinfo) {
187 set_eflags(get_eflags() & ~X86_EFLAGS_TF);
188
189 /*
190 * The most likely cause of this is that you're on Debian or
191 * a Debian-based distro, you're missing libc6-i686, and you're
192 * affected by libc/19006 (https://sourceware.org/PR19006).
193 */
194 printf("[WARN]\tsyscall(2) didn't enter AT_SYSINFO\n");
195 }
196
197 if (get_eflags() & X86_EFLAGS_TF) {
198 printf("[FAIL]\tTF is still set\n");
199 nerrs++;
200 }
201
202 if (nerrs) {
203 printf("[FAIL]\tThere were errors\n");
204 return 1;
205 } else {
206 printf("[OK]\tAll is well\n");
207 return 0;
208 }
209}
210
211#endif /* New enough libc */