diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-01 11:40:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-01 11:40:25 -0400 |
commit | 5778077d03cb25aac9b6a428e18970642fc019e3 (patch) | |
tree | 2e3f3da1fb99c3646da5ed9a09644696ca5f2309 | |
parent | 65a99597f044c083983f4274ab049c9ec3b9d764 (diff) | |
parent | 7e01ebffffedec22cea86ebe94802f909e4579ca (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar:
"The biggest changes in this cycle were:
- Revamp, simplify (and in some cases fix) Time Stamp Counter (TSC)
primitives. (Andy Lutomirski)
- Add new, comprehensible entry and exit handlers written in C.
(Andy Lutomirski)
- vm86 mode cleanups and fixes. (Brian Gerst)
- 32-bit compat code cleanups. (Brian Gerst)
The amount of simplification in low level assembly code is already
palpable:
arch/x86/entry/entry_32.S | 130 +----
arch/x86/entry/entry_64.S | 197 ++-----
but more simplifications are planned.
There's also the usual laudry mix of low level changes - see the
changelog for details"
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (83 commits)
x86/asm: Drop repeated macro of X86_EFLAGS_AC definition
x86/asm/msr: Make wrmsrl() a function
x86/asm/delay: Introduce an MWAITX-based delay with a configurable timer
x86/asm: Add MONITORX/MWAITX instruction support
x86/traps: Weaken context tracking entry assertions
x86/asm/tsc: Add rdtscll() merge helper
selftests/x86: Add syscall_nt selftest
selftests/x86: Disable sigreturn_64
x86/vdso: Emit a GNU hash
x86/entry: Remove do_notify_resume(), syscall_trace_leave(), and their TIF masks
x86/entry/32: Migrate to C exit path
x86/entry/32: Remove 32-bit syscall audit optimizations
x86/vm86: Rename vm86->v86flags and v86mask
x86/vm86: Rename vm86->vm86_info to user_vm86
x86/vm86: Clean up vm86.h includes
x86/vm86: Move the vm86 IRQ definitions to vm86.h
x86/vm86: Use the normal pt_regs area for vm86
x86/vm86: Eliminate 'struct kernel_vm86_struct'
x86/vm86: Move fields from 'struct kernel_vm86_struct' to 'struct vm86'
x86/vm86: Move vm86 fields out of 'thread_struct'
...
100 files changed, 2197 insertions, 1384 deletions
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 83a91f976330..35ab97e4bb9b 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h | |||
@@ -22,7 +22,8 @@ extern int kmalloc_ok; | |||
22 | extern unsigned long alloc_stack(int order, int atomic); | 22 | extern unsigned long alloc_stack(int order, int atomic); |
23 | extern void free_stack(unsigned long stack, int order); | 23 | extern void free_stack(unsigned long stack, int order); |
24 | 24 | ||
25 | extern int do_signal(void); | 25 | struct pt_regs; |
26 | extern void do_signal(struct pt_regs *regs); | ||
26 | extern void interrupt_end(void); | 27 | extern void interrupt_end(void); |
27 | extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs); | 28 | extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs); |
28 | 29 | ||
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 68b9119841cd..a6d922672b9f 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c | |||
@@ -90,12 +90,14 @@ void *__switch_to(struct task_struct *from, struct task_struct *to) | |||
90 | 90 | ||
91 | void interrupt_end(void) | 91 | void interrupt_end(void) |
92 | { | 92 | { |
93 | struct pt_regs *regs = ¤t->thread.regs; | ||
94 | |||
93 | if (need_resched()) | 95 | if (need_resched()) |
94 | schedule(); | 96 | schedule(); |
95 | if (test_thread_flag(TIF_SIGPENDING)) | 97 | if (test_thread_flag(TIF_SIGPENDING)) |
96 | do_signal(); | 98 | do_signal(regs); |
97 | if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) | 99 | if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) |
98 | tracehook_notify_resume(¤t->thread.regs); | 100 | tracehook_notify_resume(regs); |
99 | } | 101 | } |
100 | 102 | ||
101 | void exit_thread(void) | 103 | void exit_thread(void) |
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 4f60e4aad790..57acbd67d85d 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c | |||
@@ -64,7 +64,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
64 | signal_setup_done(err, ksig, singlestep); | 64 | signal_setup_done(err, ksig, singlestep); |
65 | } | 65 | } |
66 | 66 | ||
67 | static int kern_do_signal(struct pt_regs *regs) | 67 | void do_signal(struct pt_regs *regs) |
68 | { | 68 | { |
69 | struct ksignal ksig; | 69 | struct ksignal ksig; |
70 | int handled_sig = 0; | 70 | int handled_sig = 0; |
@@ -110,10 +110,4 @@ static int kern_do_signal(struct pt_regs *regs) | |||
110 | */ | 110 | */ |
111 | if (!handled_sig) | 111 | if (!handled_sig) |
112 | restore_saved_sigmask(); | 112 | restore_saved_sigmask(); |
113 | return handled_sig; | ||
114 | } | ||
115 | |||
116 | int do_signal(void) | ||
117 | { | ||
118 | return kern_do_signal(¤t->thread.regs); | ||
119 | } | 113 | } |
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index f1b3eb14b855..2077248e8a72 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c | |||
@@ -291,7 +291,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, | |||
291 | /* We are under mmap_sem, release it such that current can terminate */ | 291 | /* We are under mmap_sem, release it such that current can terminate */ |
292 | up_write(¤t->mm->mmap_sem); | 292 | up_write(¤t->mm->mmap_sem); |
293 | force_sig(SIGKILL, current); | 293 | force_sig(SIGKILL, current); |
294 | do_signal(); | 294 | do_signal(¤t->thread.regs); |
295 | } | 295 | } |
296 | } | 296 | } |
297 | 297 | ||
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 557232f758b6..d8a9fce6ee2e 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c | |||
@@ -173,7 +173,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) | |||
173 | void fatal_sigsegv(void) | 173 | void fatal_sigsegv(void) |
174 | { | 174 | { |
175 | force_sigsegv(SIGSEGV, current); | 175 | force_sigsegv(SIGSEGV, current); |
176 | do_signal(); | 176 | do_signal(¤t->thread.regs); |
177 | /* | 177 | /* |
178 | * This is to tell gcc that we're not returning - do_signal | 178 | * This is to tell gcc that we're not returning - do_signal |
179 | * can, in general, return, but in this case, it's not, since | 179 | * can, in general, return, but in this case, it's not, since |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 06dbb5da90c6..48f7433dac6f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -133,7 +133,7 @@ config X86 | |||
133 | select HAVE_PERF_USER_STACK_DUMP | 133 | select HAVE_PERF_USER_STACK_DUMP |
134 | select HAVE_REGS_AND_STACK_ACCESS_API | 134 | select HAVE_REGS_AND_STACK_ACCESS_API |
135 | select HAVE_SYSCALL_TRACEPOINTS | 135 | select HAVE_SYSCALL_TRACEPOINTS |
136 | select HAVE_UID16 if X86_32 | 136 | select HAVE_UID16 if X86_32 || IA32_EMULATION |
137 | select HAVE_UNSTABLE_SCHED_CLOCK | 137 | select HAVE_UNSTABLE_SCHED_CLOCK |
138 | select HAVE_USER_RETURN_NOTIFIER | 138 | select HAVE_USER_RETURN_NOTIFIER |
139 | select IRQ_FORCED_THREADING | 139 | select IRQ_FORCED_THREADING |
@@ -1003,19 +1003,41 @@ config X86_THERMAL_VECTOR | |||
1003 | def_bool y | 1003 | def_bool y |
1004 | depends on X86_MCE_INTEL | 1004 | depends on X86_MCE_INTEL |
1005 | 1005 | ||
1006 | config VM86 | 1006 | config X86_LEGACY_VM86 |
1007 | bool "Enable VM86 support" if EXPERT | 1007 | bool "Legacy VM86 support (obsolete)" |
1008 | default y | 1008 | default n |
1009 | depends on X86_32 | 1009 | depends on X86_32 |
1010 | ---help--- | 1010 | ---help--- |
1011 | This option is required by programs like DOSEMU to run | 1011 | This option allows user programs to put the CPU into V8086 |
1012 | 16-bit real mode legacy code on x86 processors. It also may | 1012 | mode, which is an 80286-era approximation of 16-bit real mode. |
1013 | be needed by software like XFree86 to initialize some video | 1013 | |
1014 | cards via BIOS. Disabling this option saves about 6K. | 1014 | Some very old versions of X and/or vbetool require this option |
1015 | for user mode setting. Similarly, DOSEMU will use it if | ||
1016 | available to accelerate real mode DOS programs. However, any | ||
1017 | recent version of DOSEMU, X, or vbetool should be fully | ||
1018 | functional even without kernel VM86 support, as they will all | ||
1019 | fall back to (pretty well performing) software emulation. | ||
1020 | |||
1021 | Anything that works on a 64-bit kernel is unlikely to need | ||
1022 | this option, as 64-bit kernels don't, and can't, support V8086 | ||
1023 | mode. This option is also unrelated to 16-bit protected mode | ||
1024 | and is not needed to run most 16-bit programs under Wine. | ||
1025 | |||
1026 | Enabling this option adds considerable attack surface to the | ||
1027 | kernel and slows down system calls and exception handling. | ||
1028 | |||
1029 | Unless you use very old userspace or need the last drop of | ||
1030 | performance in your real mode DOS games and can't use KVM, | ||
1031 | say N here. | ||
1032 | |||
1033 | config VM86 | ||
1034 | bool | ||
1035 | default X86_LEGACY_VM86 | ||
1015 | 1036 | ||
1016 | config X86_16BIT | 1037 | config X86_16BIT |
1017 | bool "Enable support for 16-bit segments" if EXPERT | 1038 | bool "Enable support for 16-bit segments" if EXPERT |
1018 | default y | 1039 | default y |
1040 | depends on MODIFY_LDT_SYSCALL | ||
1019 | ---help--- | 1041 | ---help--- |
1020 | This option is required by programs like Wine to run 16-bit | 1042 | This option is required by programs like Wine to run 16-bit |
1021 | protected mode legacy code on x86 processors. Disabling | 1043 | protected mode legacy code on x86 processors. Disabling |
@@ -1510,6 +1532,7 @@ config X86_RESERVE_LOW | |||
1510 | 1532 | ||
1511 | config MATH_EMULATION | 1533 | config MATH_EMULATION |
1512 | bool | 1534 | bool |
1535 | depends on MODIFY_LDT_SYSCALL | ||
1513 | prompt "Math emulation" if X86_32 | 1536 | prompt "Math emulation" if X86_32 |
1514 | ---help--- | 1537 | ---help--- |
1515 | Linux can emulate a math coprocessor (used for floating point | 1538 | Linux can emulate a math coprocessor (used for floating point |
@@ -2054,6 +2077,22 @@ config CMDLINE_OVERRIDE | |||
2054 | This is used to work around broken boot loaders. This should | 2077 | This is used to work around broken boot loaders. This should |
2055 | be set to 'N' under normal conditions. | 2078 | be set to 'N' under normal conditions. |
2056 | 2079 | ||
2080 | config MODIFY_LDT_SYSCALL | ||
2081 | bool "Enable the LDT (local descriptor table)" if EXPERT | ||
2082 | default y | ||
2083 | ---help--- | ||
2084 | Linux can allow user programs to install a per-process x86 | ||
2085 | Local Descriptor Table (LDT) using the modify_ldt(2) system | ||
2086 | call. This is required to run 16-bit or segmented code such as | ||
2087 | DOSEMU or some Wine programs. It is also used by some very old | ||
2088 | threading libraries. | ||
2089 | |||
2090 | Enabling this feature adds a small amount of overhead to | ||
2091 | context switches and increases the low-level kernel attack | ||
2092 | surface. Disabling it removes the modify_ldt(2) system call. | ||
2093 | |||
2094 | Saying 'N' here may make sense for embedded or server kernels. | ||
2095 | |||
2057 | source "kernel/livepatch/Kconfig" | 2096 | source "kernel/livepatch/Kconfig" |
2058 | 2097 | ||
2059 | endmenu | 2098 | endmenu |
@@ -2523,7 +2562,7 @@ config IA32_EMULATION | |||
2523 | depends on X86_64 | 2562 | depends on X86_64 |
2524 | select BINFMT_ELF | 2563 | select BINFMT_ELF |
2525 | select COMPAT_BINFMT_ELF | 2564 | select COMPAT_BINFMT_ELF |
2526 | select HAVE_UID16 | 2565 | select ARCH_WANT_OLD_COMPAT_IPC |
2527 | ---help--- | 2566 | ---help--- |
2528 | Include code to run legacy 32-bit programs under a | 2567 | Include code to run legacy 32-bit programs under a |
2529 | 64-bit kernel. You should likely turn this on, unless you're | 2568 | 64-bit kernel. You should likely turn this on, unless you're |
@@ -2537,7 +2576,7 @@ config IA32_AOUT | |||
2537 | 2576 | ||
2538 | config X86_X32 | 2577 | config X86_X32 |
2539 | bool "x32 ABI for 64-bit mode" | 2578 | bool "x32 ABI for 64-bit mode" |
2540 | depends on X86_64 && IA32_EMULATION | 2579 | depends on X86_64 |
2541 | ---help--- | 2580 | ---help--- |
2542 | Include code to run binaries for the x32 native 32-bit ABI | 2581 | Include code to run binaries for the x32 native 32-bit ABI |
2543 | for 64-bit processors. An x32 process gets access to the | 2582 | for 64-bit processors. An x32 process gets access to the |
@@ -2551,7 +2590,6 @@ config X86_X32 | |||
2551 | config COMPAT | 2590 | config COMPAT |
2552 | def_bool y | 2591 | def_bool y |
2553 | depends on IA32_EMULATION || X86_X32 | 2592 | depends on IA32_EMULATION || X86_X32 |
2554 | select ARCH_WANT_OLD_COMPAT_IPC | ||
2555 | 2593 | ||
2556 | if COMPAT | 2594 | if COMPAT |
2557 | config COMPAT_FOR_U64_ALIGNMENT | 2595 | config COMPAT_FOR_U64_ALIGNMENT |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0f38418719ab..747860c696e1 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -39,6 +39,16 @@ ifdef CONFIG_X86_NEED_RELOCS | |||
39 | LDFLAGS_vmlinux := --emit-relocs | 39 | LDFLAGS_vmlinux := --emit-relocs |
40 | endif | 40 | endif |
41 | 41 | ||
42 | # | ||
43 | # Prevent GCC from generating any FP code by mistake. | ||
44 | # | ||
45 | # This must happen before we try the -mpreferred-stack-boundary, see: | ||
46 | # | ||
47 | # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 | ||
48 | # | ||
49 | KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow | ||
50 | KBUILD_CFLAGS += $(call cc-option,-mno-avx,) | ||
51 | |||
42 | ifeq ($(CONFIG_X86_32),y) | 52 | ifeq ($(CONFIG_X86_32),y) |
43 | BITS := 32 | 53 | BITS := 32 |
44 | UTS_MACHINE := i386 | 54 | UTS_MACHINE := i386 |
@@ -167,9 +177,6 @@ KBUILD_CFLAGS += -pipe | |||
167 | KBUILD_CFLAGS += -Wno-sign-compare | 177 | KBUILD_CFLAGS += -Wno-sign-compare |
168 | # | 178 | # |
169 | KBUILD_CFLAGS += -fno-asynchronous-unwind-tables | 179 | KBUILD_CFLAGS += -fno-asynchronous-unwind-tables |
170 | # prevent gcc from generating any FP code by mistake | ||
171 | KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow | ||
172 | KBUILD_CFLAGS += $(call cc-option,-mno-avx,) | ||
173 | 180 | ||
174 | KBUILD_CFLAGS += $(mflags-y) | 181 | KBUILD_CFLAGS += $(mflags-y) |
175 | KBUILD_AFLAGS += $(mflags-y) | 182 | KBUILD_AFLAGS += $(mflags-y) |
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index d7b1f655b3ef..6a9b96b4624d 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c | |||
@@ -82,7 +82,7 @@ static unsigned long get_random_long(void) | |||
82 | 82 | ||
83 | if (has_cpuflag(X86_FEATURE_TSC)) { | 83 | if (has_cpuflag(X86_FEATURE_TSC)) { |
84 | debug_putstr(" RDTSC"); | 84 | debug_putstr(" RDTSC"); |
85 | rdtscll(raw); | 85 | raw = rdtsc(); |
86 | 86 | ||
87 | random ^= raw; | 87 | random ^= raw; |
88 | use_i8254 = false; | 88 | use_i8254 = false; |
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7a144971db79..bd55dedd7614 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile | |||
@@ -2,6 +2,7 @@ | |||
2 | # Makefile for the x86 low level entry code | 2 | # Makefile for the x86 low level entry code |
3 | # | 3 | # |
4 | obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o | 4 | obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o |
5 | obj-y += common.o | ||
5 | 6 | ||
6 | obj-y += vdso/ | 7 | obj-y += vdso/ |
7 | obj-y += vsyscall/ | 8 | obj-y += vsyscall/ |
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index f4e6308c4200..3c71dd947c7b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h | |||
@@ -135,9 +135,6 @@ For 32-bit we have the following conventions - kernel is built with | |||
135 | movq %rbp, 4*8+\offset(%rsp) | 135 | movq %rbp, 4*8+\offset(%rsp) |
136 | movq %rbx, 5*8+\offset(%rsp) | 136 | movq %rbx, 5*8+\offset(%rsp) |
137 | .endm | 137 | .endm |
138 | .macro SAVE_EXTRA_REGS_RBP offset=0 | ||
139 | movq %rbp, 4*8+\offset(%rsp) | ||
140 | .endm | ||
141 | 138 | ||
142 | .macro RESTORE_EXTRA_REGS offset=0 | 139 | .macro RESTORE_EXTRA_REGS offset=0 |
143 | movq 0*8+\offset(%rsp), %r15 | 140 | movq 0*8+\offset(%rsp), %r15 |
@@ -193,12 +190,6 @@ For 32-bit we have the following conventions - kernel is built with | |||
193 | .macro RESTORE_C_REGS_EXCEPT_RCX_R11 | 190 | .macro RESTORE_C_REGS_EXCEPT_RCX_R11 |
194 | RESTORE_C_REGS_HELPER 1,0,0,1,1 | 191 | RESTORE_C_REGS_HELPER 1,0,0,1,1 |
195 | .endm | 192 | .endm |
196 | .macro RESTORE_RSI_RDI | ||
197 | RESTORE_C_REGS_HELPER 0,0,0,0,0 | ||
198 | .endm | ||
199 | .macro RESTORE_RSI_RDI_RDX | ||
200 | RESTORE_C_REGS_HELPER 0,0,0,0,1 | ||
201 | .endm | ||
202 | 193 | ||
203 | .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 | 194 | .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 |
204 | subq $-(15*8+\addskip), %rsp | 195 | subq $-(15*8+\addskip), %rsp |
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c new file mode 100644 index 000000000000..80dcc9261ca3 --- /dev/null +++ b/arch/x86/entry/common.c | |||
@@ -0,0 +1,318 @@ | |||
1 | /* | ||
2 | * common.c - C code for kernel entry and exit | ||
3 | * Copyright (c) 2015 Andrew Lutomirski | ||
4 | * GPL v2 | ||
5 | * | ||
6 | * Based on asm and ptrace code by many authors. The code here originated | ||
7 | * in ptrace.c and signal.c. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/ptrace.h> | ||
16 | #include <linux/tracehook.h> | ||
17 | #include <linux/audit.h> | ||
18 | #include <linux/seccomp.h> | ||
19 | #include <linux/signal.h> | ||
20 | #include <linux/export.h> | ||
21 | #include <linux/context_tracking.h> | ||
22 | #include <linux/user-return-notifier.h> | ||
23 | #include <linux/uprobes.h> | ||
24 | |||
25 | #include <asm/desc.h> | ||
26 | #include <asm/traps.h> | ||
27 | |||
28 | #define CREATE_TRACE_POINTS | ||
29 | #include <trace/events/syscalls.h> | ||
30 | |||
31 | #ifdef CONFIG_CONTEXT_TRACKING | ||
32 | /* Called on entry from user mode with IRQs off. */ | ||
33 | __visible void enter_from_user_mode(void) | ||
34 | { | ||
35 | CT_WARN_ON(ct_state() != CONTEXT_USER); | ||
36 | user_exit(); | ||
37 | } | ||
38 | #endif | ||
39 | |||
40 | static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) | ||
41 | { | ||
42 | #ifdef CONFIG_X86_64 | ||
43 | if (arch == AUDIT_ARCH_X86_64) { | ||
44 | audit_syscall_entry(regs->orig_ax, regs->di, | ||
45 | regs->si, regs->dx, regs->r10); | ||
46 | } else | ||
47 | #endif | ||
48 | { | ||
49 | audit_syscall_entry(regs->orig_ax, regs->bx, | ||
50 | regs->cx, regs->dx, regs->si); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * We can return 0 to resume the syscall or anything else to go to phase | ||
56 | * 2. If we resume the syscall, we need to put something appropriate in | ||
57 | * regs->orig_ax. | ||
58 | * | ||
59 | * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax | ||
60 | * are fully functional. | ||
61 | * | ||
62 | * For phase 2's benefit, our return value is: | ||
63 | * 0: resume the syscall | ||
64 | * 1: go to phase 2; no seccomp phase 2 needed | ||
65 | * anything else: go to phase 2; pass return value to seccomp | ||
66 | */ | ||
67 | unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) | ||
68 | { | ||
69 | unsigned long ret = 0; | ||
70 | u32 work; | ||
71 | |||
72 | BUG_ON(regs != task_pt_regs(current)); | ||
73 | |||
74 | work = ACCESS_ONCE(current_thread_info()->flags) & | ||
75 | _TIF_WORK_SYSCALL_ENTRY; | ||
76 | |||
77 | #ifdef CONFIG_CONTEXT_TRACKING | ||
78 | /* | ||
79 | * If TIF_NOHZ is set, we are required to call user_exit() before | ||
80 | * doing anything that could touch RCU. | ||
81 | */ | ||
82 | if (work & _TIF_NOHZ) { | ||
83 | enter_from_user_mode(); | ||
84 | work &= ~_TIF_NOHZ; | ||
85 | } | ||
86 | #endif | ||
87 | |||
88 | #ifdef CONFIG_SECCOMP | ||
89 | /* | ||
90 | * Do seccomp first -- it should minimize exposure of other | ||
91 | * code, and keeping seccomp fast is probably more valuable | ||
92 | * than the rest of this. | ||
93 | */ | ||
94 | if (work & _TIF_SECCOMP) { | ||
95 | struct seccomp_data sd; | ||
96 | |||
97 | sd.arch = arch; | ||
98 | sd.nr = regs->orig_ax; | ||
99 | sd.instruction_pointer = regs->ip; | ||
100 | #ifdef CONFIG_X86_64 | ||
101 | if (arch == AUDIT_ARCH_X86_64) { | ||
102 | sd.args[0] = regs->di; | ||
103 | sd.args[1] = regs->si; | ||
104 | sd.args[2] = regs->dx; | ||
105 | sd.args[3] = regs->r10; | ||
106 | sd.args[4] = regs->r8; | ||
107 | sd.args[5] = regs->r9; | ||
108 | } else | ||
109 | #endif | ||
110 | { | ||
111 | sd.args[0] = regs->bx; | ||
112 | sd.args[1] = regs->cx; | ||
113 | sd.args[2] = regs->dx; | ||
114 | sd.args[3] = regs->si; | ||
115 | sd.args[4] = regs->di; | ||
116 | sd.args[5] = regs->bp; | ||
117 | } | ||
118 | |||
119 | BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); | ||
120 | BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); | ||
121 | |||
122 | ret = seccomp_phase1(&sd); | ||
123 | if (ret == SECCOMP_PHASE1_SKIP) { | ||
124 | regs->orig_ax = -1; | ||
125 | ret = 0; | ||
126 | } else if (ret != SECCOMP_PHASE1_OK) { | ||
127 | return ret; /* Go directly to phase 2 */ | ||
128 | } | ||
129 | |||
130 | work &= ~_TIF_SECCOMP; | ||
131 | } | ||
132 | #endif | ||
133 | |||
134 | /* Do our best to finish without phase 2. */ | ||
135 | if (work == 0) | ||
136 | return ret; /* seccomp and/or nohz only (ret == 0 here) */ | ||
137 | |||
138 | #ifdef CONFIG_AUDITSYSCALL | ||
139 | if (work == _TIF_SYSCALL_AUDIT) { | ||
140 | /* | ||
141 | * If there is no more work to be done except auditing, | ||
142 | * then audit in phase 1. Phase 2 always audits, so, if | ||
143 | * we audit here, then we can't go on to phase 2. | ||
144 | */ | ||
145 | do_audit_syscall_entry(regs, arch); | ||
146 | return 0; | ||
147 | } | ||
148 | #endif | ||
149 | |||
150 | return 1; /* Something is enabled that we can't handle in phase 1 */ | ||
151 | } | ||
152 | |||
153 | /* Returns the syscall nr to run (which should match regs->orig_ax). */ | ||
154 | long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, | ||
155 | unsigned long phase1_result) | ||
156 | { | ||
157 | long ret = 0; | ||
158 | u32 work = ACCESS_ONCE(current_thread_info()->flags) & | ||
159 | _TIF_WORK_SYSCALL_ENTRY; | ||
160 | |||
161 | BUG_ON(regs != task_pt_regs(current)); | ||
162 | |||
163 | /* | ||
164 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
165 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
166 | * If user-mode had set TF itself, then it's still clear from | ||
167 | * do_debug() and we need to set it again to restore the user | ||
168 | * state. If we entered on the slow path, TF was already set. | ||
169 | */ | ||
170 | if (work & _TIF_SINGLESTEP) | ||
171 | regs->flags |= X86_EFLAGS_TF; | ||
172 | |||
173 | #ifdef CONFIG_SECCOMP | ||
174 | /* | ||
175 | * Call seccomp_phase2 before running the other hooks so that | ||
176 | * they can see any changes made by a seccomp tracer. | ||
177 | */ | ||
178 | if (phase1_result > 1 && seccomp_phase2(phase1_result)) { | ||
179 | /* seccomp failures shouldn't expose any additional code. */ | ||
180 | return -1; | ||
181 | } | ||
182 | #endif | ||
183 | |||
184 | if (unlikely(work & _TIF_SYSCALL_EMU)) | ||
185 | ret = -1L; | ||
186 | |||
187 | if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && | ||
188 | tracehook_report_syscall_entry(regs)) | ||
189 | ret = -1L; | ||
190 | |||
191 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | ||
192 | trace_sys_enter(regs, regs->orig_ax); | ||
193 | |||
194 | do_audit_syscall_entry(regs, arch); | ||
195 | |||
196 | return ret ?: regs->orig_ax; | ||
197 | } | ||
198 | |||
199 | long syscall_trace_enter(struct pt_regs *regs) | ||
200 | { | ||
201 | u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; | ||
202 | unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); | ||
203 | |||
204 | if (phase1_result == 0) | ||
205 | return regs->orig_ax; | ||
206 | else | ||
207 | return syscall_trace_enter_phase2(regs, arch, phase1_result); | ||
208 | } | ||
209 | |||
210 | static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) | ||
211 | { | ||
212 | unsigned long top_of_stack = | ||
213 | (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; | ||
214 | return (struct thread_info *)(top_of_stack - THREAD_SIZE); | ||
215 | } | ||
216 | |||
217 | /* Called with IRQs disabled. */ | ||
218 | __visible void prepare_exit_to_usermode(struct pt_regs *regs) | ||
219 | { | ||
220 | if (WARN_ON(!irqs_disabled())) | ||
221 | local_irq_disable(); | ||
222 | |||
223 | /* | ||
224 | * In order to return to user mode, we need to have IRQs off with | ||
225 | * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY, | ||
226 | * _TIF_UPROBE, or _TIF_NEED_RESCHED set. Several of these flags | ||
227 | * can be set at any time on preemptable kernels if we have IRQs on, | ||
228 | * so we need to loop. Disabling preemption wouldn't help: doing the | ||
229 | * work to clear some of the flags can sleep. | ||
230 | */ | ||
231 | while (true) { | ||
232 | u32 cached_flags = | ||
233 | READ_ONCE(pt_regs_to_thread_info(regs)->flags); | ||
234 | |||
235 | if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | | ||
236 | _TIF_UPROBE | _TIF_NEED_RESCHED | | ||
237 | _TIF_USER_RETURN_NOTIFY))) | ||
238 | break; | ||
239 | |||
240 | /* We have work to do. */ | ||
241 | local_irq_enable(); | ||
242 | |||
243 | if (cached_flags & _TIF_NEED_RESCHED) | ||
244 | schedule(); | ||
245 | |||
246 | if (cached_flags & _TIF_UPROBE) | ||
247 | uprobe_notify_resume(regs); | ||
248 | |||
249 | /* deal with pending signal delivery */ | ||
250 | if (cached_flags & _TIF_SIGPENDING) | ||
251 | do_signal(regs); | ||
252 | |||
253 | if (cached_flags & _TIF_NOTIFY_RESUME) { | ||
254 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
255 | tracehook_notify_resume(regs); | ||
256 | } | ||
257 | |||
258 | if (cached_flags & _TIF_USER_RETURN_NOTIFY) | ||
259 | fire_user_return_notifiers(); | ||
260 | |||
261 | /* Disable IRQs and retry */ | ||
262 | local_irq_disable(); | ||
263 | } | ||
264 | |||
265 | user_enter(); | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Called with IRQs on and fully valid regs. Returns with IRQs off in a | ||
270 | * state such that we can immediately switch to user mode. | ||
271 | */ | ||
272 | __visible void syscall_return_slowpath(struct pt_regs *regs) | ||
273 | { | ||
274 | struct thread_info *ti = pt_regs_to_thread_info(regs); | ||
275 | u32 cached_flags = READ_ONCE(ti->flags); | ||
276 | bool step; | ||
277 | |||
278 | CT_WARN_ON(ct_state() != CONTEXT_KERNEL); | ||
279 | |||
280 | if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled", | ||
281 | regs->orig_ax)) | ||
282 | local_irq_enable(); | ||
283 | |||
284 | /* | ||
285 | * First do one-time work. If these work items are enabled, we | ||
286 | * want to run them exactly once per syscall exit with IRQs on. | ||
287 | */ | ||
288 | if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | | ||
289 | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) { | ||
290 | audit_syscall_exit(regs); | ||
291 | |||
292 | if (cached_flags & _TIF_SYSCALL_TRACEPOINT) | ||
293 | trace_sys_exit(regs, regs->ax); | ||
294 | |||
295 | /* | ||
296 | * If TIF_SYSCALL_EMU is set, we only get here because of | ||
297 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | ||
298 | * We already reported this syscall instruction in | ||
299 | * syscall_trace_enter(). | ||
300 | */ | ||
301 | step = unlikely( | ||
302 | (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)) | ||
303 | == _TIF_SINGLESTEP); | ||
304 | if (step || cached_flags & _TIF_SYSCALL_TRACE) | ||
305 | tracehook_report_syscall_exit(regs, step); | ||
306 | } | ||
307 | |||
308 | #ifdef CONFIG_COMPAT | ||
309 | /* | ||
310 | * Compat syscalls set TS_COMPAT. Make sure we clear it before | ||
311 | * returning to user mode. | ||
312 | */ | ||
313 | ti->status &= ~TS_COMPAT; | ||
314 | #endif | ||
315 | |||
316 | local_irq_disable(); | ||
317 | prepare_exit_to_usermode(regs); | ||
318 | } | ||
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 21dc60a60b5f..b2909bf8cf70 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S | |||
@@ -45,16 +45,6 @@ | |||
45 | #include <asm/asm.h> | 45 | #include <asm/asm.h> |
46 | #include <asm/smap.h> | 46 | #include <asm/smap.h> |
47 | 47 | ||
48 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
49 | #include <linux/elf-em.h> | ||
50 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | ||
51 | #define __AUDIT_ARCH_LE 0x40000000 | ||
52 | |||
53 | #ifndef CONFIG_AUDITSYSCALL | ||
54 | # define sysenter_audit syscall_trace_entry | ||
55 | # define sysexit_audit syscall_exit_work | ||
56 | #endif | ||
57 | |||
58 | .section .entry.text, "ax" | 48 | .section .entry.text, "ax" |
59 | 49 | ||
60 | /* | 50 | /* |
@@ -266,14 +256,10 @@ ret_from_intr: | |||
266 | 256 | ||
267 | ENTRY(resume_userspace) | 257 | ENTRY(resume_userspace) |
268 | LOCKDEP_SYS_EXIT | 258 | LOCKDEP_SYS_EXIT |
269 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | 259 | DISABLE_INTERRUPTS(CLBR_ANY) |
270 | # setting need_resched or sigpending | ||
271 | # between sampling and the iret | ||
272 | TRACE_IRQS_OFF | 260 | TRACE_IRQS_OFF |
273 | movl TI_flags(%ebp), %ecx | 261 | movl %esp, %eax |
274 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on | 262 | call prepare_exit_to_usermode |
275 | # int/exception return? | ||
276 | jne work_pending | ||
277 | jmp restore_all | 263 | jmp restore_all |
278 | END(ret_from_exception) | 264 | END(ret_from_exception) |
279 | 265 | ||
@@ -339,7 +325,7 @@ sysenter_past_esp: | |||
339 | GET_THREAD_INFO(%ebp) | 325 | GET_THREAD_INFO(%ebp) |
340 | 326 | ||
341 | testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) | 327 | testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) |
342 | jnz sysenter_audit | 328 | jnz syscall_trace_entry |
343 | sysenter_do_call: | 329 | sysenter_do_call: |
344 | cmpl $(NR_syscalls), %eax | 330 | cmpl $(NR_syscalls), %eax |
345 | jae sysenter_badsys | 331 | jae sysenter_badsys |
@@ -351,7 +337,7 @@ sysenter_after_call: | |||
351 | TRACE_IRQS_OFF | 337 | TRACE_IRQS_OFF |
352 | movl TI_flags(%ebp), %ecx | 338 | movl TI_flags(%ebp), %ecx |
353 | testl $_TIF_ALLWORK_MASK, %ecx | 339 | testl $_TIF_ALLWORK_MASK, %ecx |
354 | jnz sysexit_audit | 340 | jnz syscall_exit_work_irqs_off |
355 | sysenter_exit: | 341 | sysenter_exit: |
356 | /* if something modifies registers it must also disable sysexit */ | 342 | /* if something modifies registers it must also disable sysexit */ |
357 | movl PT_EIP(%esp), %edx | 343 | movl PT_EIP(%esp), %edx |
@@ -362,40 +348,6 @@ sysenter_exit: | |||
362 | PTGS_TO_GS | 348 | PTGS_TO_GS |
363 | ENABLE_INTERRUPTS_SYSEXIT | 349 | ENABLE_INTERRUPTS_SYSEXIT |
364 | 350 | ||
365 | #ifdef CONFIG_AUDITSYSCALL | ||
366 | sysenter_audit: | ||
367 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp) | ||
368 | jnz syscall_trace_entry | ||
369 | /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ | ||
370 | movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ | ||
371 | /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ | ||
372 | pushl PT_ESI(%esp) /* a3: 5th arg */ | ||
373 | pushl PT_EDX+4(%esp) /* a2: 4th arg */ | ||
374 | call __audit_syscall_entry | ||
375 | popl %ecx /* get that remapped edx off the stack */ | ||
376 | popl %ecx /* get that remapped esi off the stack */ | ||
377 | movl PT_EAX(%esp), %eax /* reload syscall number */ | ||
378 | jmp sysenter_do_call | ||
379 | |||
380 | sysexit_audit: | ||
381 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx | ||
382 | jnz syscall_exit_work | ||
383 | TRACE_IRQS_ON | ||
384 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
385 | movl %eax, %edx /* second arg, syscall return value */ | ||
386 | cmpl $-MAX_ERRNO, %eax /* is it an error ? */ | ||
387 | setbe %al /* 1 if so, 0 if not */ | ||
388 | movzbl %al, %eax /* zero-extend that */ | ||
389 | call __audit_syscall_exit | ||
390 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
391 | TRACE_IRQS_OFF | ||
392 | movl TI_flags(%ebp), %ecx | ||
393 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx | ||
394 | jnz syscall_exit_work | ||
395 | movl PT_EAX(%esp), %eax /* reload syscall return value */ | ||
396 | jmp sysenter_exit | ||
397 | #endif | ||
398 | |||
399 | .pushsection .fixup, "ax" | 351 | .pushsection .fixup, "ax" |
400 | 2: movl $0, PT_FS(%esp) | 352 | 2: movl $0, PT_FS(%esp) |
401 | jmp 1b | 353 | jmp 1b |
@@ -421,13 +373,7 @@ syscall_after_call: | |||
421 | movl %eax, PT_EAX(%esp) # store the return value | 373 | movl %eax, PT_EAX(%esp) # store the return value |
422 | syscall_exit: | 374 | syscall_exit: |
423 | LOCKDEP_SYS_EXIT | 375 | LOCKDEP_SYS_EXIT |
424 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | 376 | jmp syscall_exit_work |
425 | # setting need_resched or sigpending | ||
426 | # between sampling and the iret | ||
427 | TRACE_IRQS_OFF | ||
428 | movl TI_flags(%ebp), %ecx | ||
429 | testl $_TIF_ALLWORK_MASK, %ecx # current->work | ||
430 | jnz syscall_exit_work | ||
431 | 377 | ||
432 | restore_all: | 378 | restore_all: |
433 | TRACE_IRQS_IRET | 379 | TRACE_IRQS_IRET |
@@ -504,57 +450,6 @@ ldt_ss: | |||
504 | #endif | 450 | #endif |
505 | ENDPROC(entry_INT80_32) | 451 | ENDPROC(entry_INT80_32) |
506 | 452 | ||
507 | # perform work that needs to be done immediately before resumption | ||
508 | ALIGN | ||
509 | work_pending: | ||
510 | testb $_TIF_NEED_RESCHED, %cl | ||
511 | jz work_notifysig | ||
512 | work_resched: | ||
513 | call schedule | ||
514 | LOCKDEP_SYS_EXIT | ||
515 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | ||
516 | # setting need_resched or sigpending | ||
517 | # between sampling and the iret | ||
518 | TRACE_IRQS_OFF | ||
519 | movl TI_flags(%ebp), %ecx | ||
520 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done other | ||
521 | # than syscall tracing? | ||
522 | jz restore_all | ||
523 | testb $_TIF_NEED_RESCHED, %cl | ||
524 | jnz work_resched | ||
525 | |||
526 | work_notifysig: # deal with pending signals and | ||
527 | # notify-resume requests | ||
528 | #ifdef CONFIG_VM86 | ||
529 | testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) | ||
530 | movl %esp, %eax | ||
531 | jnz work_notifysig_v86 # returning to kernel-space or | ||
532 | # vm86-space | ||
533 | 1: | ||
534 | #else | ||
535 | movl %esp, %eax | ||
536 | #endif | ||
537 | TRACE_IRQS_ON | ||
538 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
539 | movb PT_CS(%esp), %bl | ||
540 | andb $SEGMENT_RPL_MASK, %bl | ||
541 | cmpb $USER_RPL, %bl | ||
542 | jb resume_kernel | ||
543 | xorl %edx, %edx | ||
544 | call do_notify_resume | ||
545 | jmp resume_userspace | ||
546 | |||
547 | #ifdef CONFIG_VM86 | ||
548 | ALIGN | ||
549 | work_notifysig_v86: | ||
550 | pushl %ecx # save ti_flags for do_notify_resume | ||
551 | call save_v86_state # %eax contains pt_regs pointer | ||
552 | popl %ecx | ||
553 | movl %eax, %esp | ||
554 | jmp 1b | ||
555 | #endif | ||
556 | END(work_pending) | ||
557 | |||
558 | # perform syscall exit tracing | 453 | # perform syscall exit tracing |
559 | ALIGN | 454 | ALIGN |
560 | syscall_trace_entry: | 455 | syscall_trace_entry: |
@@ -569,15 +464,14 @@ END(syscall_trace_entry) | |||
569 | 464 | ||
570 | # perform syscall exit tracing | 465 | # perform syscall exit tracing |
571 | ALIGN | 466 | ALIGN |
572 | syscall_exit_work: | 467 | syscall_exit_work_irqs_off: |
573 | testl $_TIF_WORK_SYSCALL_EXIT, %ecx | ||
574 | jz work_pending | ||
575 | TRACE_IRQS_ON | 468 | TRACE_IRQS_ON |
576 | ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call | 469 | ENABLE_INTERRUPTS(CLBR_ANY) |
577 | # schedule() instead | 470 | |
471 | syscall_exit_work: | ||
578 | movl %esp, %eax | 472 | movl %esp, %eax |
579 | call syscall_trace_leave | 473 | call syscall_return_slowpath |
580 | jmp resume_userspace | 474 | jmp restore_all |
581 | END(syscall_exit_work) | 475 | END(syscall_exit_work) |
582 | 476 | ||
583 | syscall_fault: | 477 | syscall_fault: |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8cb3e438f21e..d3033183ed70 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <asm/paravirt.h> | 33 | #include <asm/paravirt.h> |
34 | #include <asm/percpu.h> | 34 | #include <asm/percpu.h> |
35 | #include <asm/asm.h> | 35 | #include <asm/asm.h> |
36 | #include <asm/context_tracking.h> | ||
37 | #include <asm/smap.h> | 36 | #include <asm/smap.h> |
38 | #include <asm/pgtable_types.h> | 37 | #include <asm/pgtable_types.h> |
39 | #include <linux/err.h> | 38 | #include <linux/err.h> |
@@ -229,6 +228,11 @@ entry_SYSCALL_64_fastpath: | |||
229 | */ | 228 | */ |
230 | USERGS_SYSRET64 | 229 | USERGS_SYSRET64 |
231 | 230 | ||
231 | GLOBAL(int_ret_from_sys_call_irqs_off) | ||
232 | TRACE_IRQS_ON | ||
233 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
234 | jmp int_ret_from_sys_call | ||
235 | |||
232 | /* Do syscall entry tracing */ | 236 | /* Do syscall entry tracing */ |
233 | tracesys: | 237 | tracesys: |
234 | movq %rsp, %rdi | 238 | movq %rsp, %rdi |
@@ -272,69 +276,11 @@ tracesys_phase2: | |||
272 | * Has correct iret frame. | 276 | * Has correct iret frame. |
273 | */ | 277 | */ |
274 | GLOBAL(int_ret_from_sys_call) | 278 | GLOBAL(int_ret_from_sys_call) |
275 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
276 | int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */ | ||
277 | TRACE_IRQS_OFF | ||
278 | movl $_TIF_ALLWORK_MASK, %edi | ||
279 | /* edi: mask to check */ | ||
280 | GLOBAL(int_with_check) | ||
281 | LOCKDEP_SYS_EXIT_IRQ | ||
282 | GET_THREAD_INFO(%rcx) | ||
283 | movl TI_flags(%rcx), %edx | ||
284 | andl %edi, %edx | ||
285 | jnz int_careful | ||
286 | andl $~TS_COMPAT, TI_status(%rcx) | ||
287 | jmp syscall_return | ||
288 | |||
289 | /* | ||
290 | * Either reschedule or signal or syscall exit tracking needed. | ||
291 | * First do a reschedule test. | ||
292 | * edx: work, edi: workmask | ||
293 | */ | ||
294 | int_careful: | ||
295 | bt $TIF_NEED_RESCHED, %edx | ||
296 | jnc int_very_careful | ||
297 | TRACE_IRQS_ON | ||
298 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
299 | pushq %rdi | ||
300 | SCHEDULE_USER | ||
301 | popq %rdi | ||
302 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
303 | TRACE_IRQS_OFF | ||
304 | jmp int_with_check | ||
305 | |||
306 | /* handle signals and tracing -- both require a full pt_regs */ | ||
307 | int_very_careful: | ||
308 | TRACE_IRQS_ON | ||
309 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
310 | SAVE_EXTRA_REGS | 279 | SAVE_EXTRA_REGS |
311 | /* Check for syscall exit trace */ | 280 | movq %rsp, %rdi |
312 | testl $_TIF_WORK_SYSCALL_EXIT, %edx | 281 | call syscall_return_slowpath /* returns with IRQs disabled */ |
313 | jz int_signal | ||
314 | pushq %rdi | ||
315 | leaq 8(%rsp), %rdi /* &ptregs -> arg1 */ | ||
316 | call syscall_trace_leave | ||
317 | popq %rdi | ||
318 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi | ||
319 | jmp int_restore_rest | ||
320 | |||
321 | int_signal: | ||
322 | testl $_TIF_DO_NOTIFY_MASK, %edx | ||
323 | jz 1f | ||
324 | movq %rsp, %rdi /* &ptregs -> arg1 */ | ||
325 | xorl %esi, %esi /* oldset -> arg2 */ | ||
326 | call do_notify_resume | ||
327 | 1: movl $_TIF_WORK_MASK, %edi | ||
328 | int_restore_rest: | ||
329 | RESTORE_EXTRA_REGS | 282 | RESTORE_EXTRA_REGS |
330 | DISABLE_INTERRUPTS(CLBR_NONE) | 283 | TRACE_IRQS_IRETQ /* we're about to change IF */ |
331 | TRACE_IRQS_OFF | ||
332 | jmp int_with_check | ||
333 | |||
334 | syscall_return: | ||
335 | /* The IRETQ could re-enable interrupts: */ | ||
336 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
337 | TRACE_IRQS_IRETQ | ||
338 | 284 | ||
339 | /* | 285 | /* |
340 | * Try to use SYSRET instead of IRET if we're returning to | 286 | * Try to use SYSRET instead of IRET if we're returning to |
@@ -555,23 +501,22 @@ END(irq_entries_start) | |||
555 | /* 0(%rsp): ~(interrupt number) */ | 501 | /* 0(%rsp): ~(interrupt number) */ |
556 | .macro interrupt func | 502 | .macro interrupt func |
557 | cld | 503 | cld |
558 | /* | 504 | ALLOC_PT_GPREGS_ON_STACK |
559 | * Since nothing in interrupt handling code touches r12...r15 members | 505 | SAVE_C_REGS |
560 | * of "struct pt_regs", and since interrupts can nest, we can save | 506 | SAVE_EXTRA_REGS |
561 | * four stack slots and simultaneously provide | ||
562 | * an unwind-friendly stack layout by saving "truncated" pt_regs | ||
563 | * exactly up to rbp slot, without these members. | ||
564 | */ | ||
565 | ALLOC_PT_GPREGS_ON_STACK -RBP | ||
566 | SAVE_C_REGS -RBP | ||
567 | /* this goes to 0(%rsp) for unwinder, not for saving the value: */ | ||
568 | SAVE_EXTRA_REGS_RBP -RBP | ||
569 | |||
570 | leaq -RBP(%rsp), %rdi /* arg1 for \func (pointer to pt_regs) */ | ||
571 | 507 | ||
572 | testb $3, CS-RBP(%rsp) | 508 | testb $3, CS(%rsp) |
573 | jz 1f | 509 | jz 1f |
510 | |||
511 | /* | ||
512 | * IRQ from user mode. Switch to kernel gsbase and inform context | ||
513 | * tracking that we're in kernel mode. | ||
514 | */ | ||
574 | SWAPGS | 515 | SWAPGS |
516 | #ifdef CONFIG_CONTEXT_TRACKING | ||
517 | call enter_from_user_mode | ||
518 | #endif | ||
519 | |||
575 | 1: | 520 | 1: |
576 | /* | 521 | /* |
577 | * Save previous stack pointer, optionally switch to interrupt stack. | 522 | * Save previous stack pointer, optionally switch to interrupt stack. |
@@ -580,14 +525,14 @@ END(irq_entries_start) | |||
580 | * a little cheaper to use a separate counter in the PDA (short of | 525 | * a little cheaper to use a separate counter in the PDA (short of |
581 | * moving irq_enter into assembly, which would be too much work) | 526 | * moving irq_enter into assembly, which would be too much work) |
582 | */ | 527 | */ |
583 | movq %rsp, %rsi | 528 | movq %rsp, %rdi |
584 | incl PER_CPU_VAR(irq_count) | 529 | incl PER_CPU_VAR(irq_count) |
585 | cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp | 530 | cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp |
586 | pushq %rsi | 531 | pushq %rdi |
587 | /* We entered an interrupt context - irqs are off: */ | 532 | /* We entered an interrupt context - irqs are off: */ |
588 | TRACE_IRQS_OFF | 533 | TRACE_IRQS_OFF |
589 | 534 | ||
590 | call \func | 535 | call \func /* rdi points to pt_regs */ |
591 | .endm | 536 | .endm |
592 | 537 | ||
593 | /* | 538 | /* |
@@ -606,34 +551,19 @@ ret_from_intr: | |||
606 | decl PER_CPU_VAR(irq_count) | 551 | decl PER_CPU_VAR(irq_count) |
607 | 552 | ||
608 | /* Restore saved previous stack */ | 553 | /* Restore saved previous stack */ |
609 | popq %rsi | 554 | popq %rsp |
610 | /* return code expects complete pt_regs - adjust rsp accordingly: */ | ||
611 | leaq -RBP(%rsi), %rsp | ||
612 | 555 | ||
613 | testb $3, CS(%rsp) | 556 | testb $3, CS(%rsp) |
614 | jz retint_kernel | 557 | jz retint_kernel |
615 | /* Interrupt came from user space */ | ||
616 | retint_user: | ||
617 | GET_THREAD_INFO(%rcx) | ||
618 | 558 | ||
619 | /* %rcx: thread info. Interrupts are off. */ | 559 | /* Interrupt came from user space */ |
620 | retint_with_reschedule: | ||
621 | movl $_TIF_WORK_MASK, %edi | ||
622 | retint_check: | ||
623 | LOCKDEP_SYS_EXIT_IRQ | 560 | LOCKDEP_SYS_EXIT_IRQ |
624 | movl TI_flags(%rcx), %edx | 561 | GLOBAL(retint_user) |
625 | andl %edi, %edx | 562 | mov %rsp,%rdi |
626 | jnz retint_careful | 563 | call prepare_exit_to_usermode |
627 | |||
628 | retint_swapgs: /* return to user-space */ | ||
629 | /* | ||
630 | * The iretq could re-enable interrupts: | ||
631 | */ | ||
632 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
633 | TRACE_IRQS_IRETQ | 564 | TRACE_IRQS_IRETQ |
634 | |||
635 | SWAPGS | 565 | SWAPGS |
636 | jmp restore_c_regs_and_iret | 566 | jmp restore_regs_and_iret |
637 | 567 | ||
638 | /* Returning to kernel space */ | 568 | /* Returning to kernel space */ |
639 | retint_kernel: | 569 | retint_kernel: |
@@ -657,6 +587,8 @@ retint_kernel: | |||
657 | * At this label, code paths which return to kernel and to user, | 587 | * At this label, code paths which return to kernel and to user, |
658 | * which come from interrupts/exception and from syscalls, merge. | 588 | * which come from interrupts/exception and from syscalls, merge. |
659 | */ | 589 | */ |
590 | restore_regs_and_iret: | ||
591 | RESTORE_EXTRA_REGS | ||
660 | restore_c_regs_and_iret: | 592 | restore_c_regs_and_iret: |
661 | RESTORE_C_REGS | 593 | RESTORE_C_REGS |
662 | REMOVE_PT_GPREGS_FROM_STACK 8 | 594 | REMOVE_PT_GPREGS_FROM_STACK 8 |
@@ -707,37 +639,6 @@ native_irq_return_ldt: | |||
707 | popq %rax | 639 | popq %rax |
708 | jmp native_irq_return_iret | 640 | jmp native_irq_return_iret |
709 | #endif | 641 | #endif |
710 | |||
711 | /* edi: workmask, edx: work */ | ||
712 | retint_careful: | ||
713 | bt $TIF_NEED_RESCHED, %edx | ||
714 | jnc retint_signal | ||
715 | TRACE_IRQS_ON | ||
716 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
717 | pushq %rdi | ||
718 | SCHEDULE_USER | ||
719 | popq %rdi | ||
720 | GET_THREAD_INFO(%rcx) | ||
721 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
722 | TRACE_IRQS_OFF | ||
723 | jmp retint_check | ||
724 | |||
725 | retint_signal: | ||
726 | testl $_TIF_DO_NOTIFY_MASK, %edx | ||
727 | jz retint_swapgs | ||
728 | TRACE_IRQS_ON | ||
729 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
730 | SAVE_EXTRA_REGS | ||
731 | movq $-1, ORIG_RAX(%rsp) | ||
732 | xorl %esi, %esi /* oldset */ | ||
733 | movq %rsp, %rdi /* &pt_regs */ | ||
734 | call do_notify_resume | ||
735 | RESTORE_EXTRA_REGS | ||
736 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
737 | TRACE_IRQS_OFF | ||
738 | GET_THREAD_INFO(%rcx) | ||
739 | jmp retint_with_reschedule | ||
740 | |||
741 | END(common_interrupt) | 642 | END(common_interrupt) |
742 | 643 | ||
743 | /* | 644 | /* |
@@ -1143,12 +1044,22 @@ ENTRY(error_entry) | |||
1143 | SAVE_EXTRA_REGS 8 | 1044 | SAVE_EXTRA_REGS 8 |
1144 | xorl %ebx, %ebx | 1045 | xorl %ebx, %ebx |
1145 | testb $3, CS+8(%rsp) | 1046 | testb $3, CS+8(%rsp) |
1146 | jz error_kernelspace | 1047 | jz .Lerror_kernelspace |
1147 | 1048 | ||
1148 | /* We entered from user mode */ | 1049 | .Lerror_entry_from_usermode_swapgs: |
1050 | /* | ||
1051 | * We entered from user mode or we're pretending to have entered | ||
1052 | * from user mode due to an IRET fault. | ||
1053 | */ | ||
1149 | SWAPGS | 1054 | SWAPGS |
1150 | 1055 | ||
1151 | error_entry_done: | 1056 | .Lerror_entry_from_usermode_after_swapgs: |
1057 | #ifdef CONFIG_CONTEXT_TRACKING | ||
1058 | call enter_from_user_mode | ||
1059 | #endif | ||
1060 | |||
1061 | .Lerror_entry_done: | ||
1062 | |||
1152 | TRACE_IRQS_OFF | 1063 | TRACE_IRQS_OFF |
1153 | ret | 1064 | ret |
1154 | 1065 | ||
@@ -1158,31 +1069,30 @@ error_entry_done: | |||
1158 | * truncated RIP for IRET exceptions returning to compat mode. Check | 1069 | * truncated RIP for IRET exceptions returning to compat mode. Check |
1159 | * for these here too. | 1070 | * for these here too. |
1160 | */ | 1071 | */ |
1161 | error_kernelspace: | 1072 | .Lerror_kernelspace: |
1162 | incl %ebx | 1073 | incl %ebx |
1163 | leaq native_irq_return_iret(%rip), %rcx | 1074 | leaq native_irq_return_iret(%rip), %rcx |
1164 | cmpq %rcx, RIP+8(%rsp) | 1075 | cmpq %rcx, RIP+8(%rsp) |
1165 | je error_bad_iret | 1076 | je .Lerror_bad_iret |
1166 | movl %ecx, %eax /* zero extend */ | 1077 | movl %ecx, %eax /* zero extend */ |
1167 | cmpq %rax, RIP+8(%rsp) | 1078 | cmpq %rax, RIP+8(%rsp) |
1168 | je bstep_iret | 1079 | je .Lbstep_iret |
1169 | cmpq $gs_change, RIP+8(%rsp) | 1080 | cmpq $gs_change, RIP+8(%rsp) |
1170 | jne error_entry_done | 1081 | jne .Lerror_entry_done |
1171 | 1082 | ||
1172 | /* | 1083 | /* |
1173 | * hack: gs_change can fail with user gsbase. If this happens, fix up | 1084 | * hack: gs_change can fail with user gsbase. If this happens, fix up |
1174 | * gsbase and proceed. We'll fix up the exception and land in | 1085 | * gsbase and proceed. We'll fix up the exception and land in |
1175 | * gs_change's error handler with kernel gsbase. | 1086 | * gs_change's error handler with kernel gsbase. |
1176 | */ | 1087 | */ |
1177 | SWAPGS | 1088 | jmp .Lerror_entry_from_usermode_swapgs |
1178 | jmp error_entry_done | ||
1179 | 1089 | ||
1180 | bstep_iret: | 1090 | .Lbstep_iret: |
1181 | /* Fix truncated RIP */ | 1091 | /* Fix truncated RIP */ |
1182 | movq %rcx, RIP+8(%rsp) | 1092 | movq %rcx, RIP+8(%rsp) |
1183 | /* fall through */ | 1093 | /* fall through */ |
1184 | 1094 | ||
1185 | error_bad_iret: | 1095 | .Lerror_bad_iret: |
1186 | /* | 1096 | /* |
1187 | * We came from an IRET to user mode, so we have user gsbase. | 1097 | * We came from an IRET to user mode, so we have user gsbase. |
1188 | * Switch to kernel gsbase: | 1098 | * Switch to kernel gsbase: |
@@ -1198,7 +1108,7 @@ error_bad_iret: | |||
1198 | call fixup_bad_iret | 1108 | call fixup_bad_iret |
1199 | mov %rax, %rsp | 1109 | mov %rax, %rsp |
1200 | decl %ebx | 1110 | decl %ebx |
1201 | jmp error_entry_done | 1111 | jmp .Lerror_entry_from_usermode_after_swapgs |
1202 | END(error_entry) | 1112 | END(error_entry) |
1203 | 1113 | ||
1204 | 1114 | ||
@@ -1209,7 +1119,6 @@ END(error_entry) | |||
1209 | */ | 1119 | */ |
1210 | ENTRY(error_exit) | 1120 | ENTRY(error_exit) |
1211 | movl %ebx, %eax | 1121 | movl %ebx, %eax |
1212 | RESTORE_EXTRA_REGS | ||
1213 | DISABLE_INTERRUPTS(CLBR_NONE) | 1122 | DISABLE_INTERRUPTS(CLBR_NONE) |
1214 | TRACE_IRQS_OFF | 1123 | TRACE_IRQS_OFF |
1215 | testl %eax, %eax | 1124 | testl %eax, %eax |
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index a7e257d9cb90..a9360d40fb7f 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S | |||
@@ -22,8 +22,8 @@ | |||
22 | #define __AUDIT_ARCH_LE 0x40000000 | 22 | #define __AUDIT_ARCH_LE 0x40000000 |
23 | 23 | ||
24 | #ifndef CONFIG_AUDITSYSCALL | 24 | #ifndef CONFIG_AUDITSYSCALL |
25 | # define sysexit_audit ia32_ret_from_sys_call | 25 | # define sysexit_audit ia32_ret_from_sys_call_irqs_off |
26 | # define sysretl_audit ia32_ret_from_sys_call | 26 | # define sysretl_audit ia32_ret_from_sys_call_irqs_off |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | .section .entry.text, "ax" | 29 | .section .entry.text, "ax" |
@@ -141,7 +141,8 @@ sysexit_from_sys_call: | |||
141 | andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) | 141 | andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
142 | movl RIP(%rsp), %ecx /* User %eip */ | 142 | movl RIP(%rsp), %ecx /* User %eip */ |
143 | movq RAX(%rsp), %rax | 143 | movq RAX(%rsp), %rax |
144 | RESTORE_RSI_RDI | 144 | movl RSI(%rsp), %esi |
145 | movl RDI(%rsp), %edi | ||
145 | xorl %edx, %edx /* Do not leak kernel information */ | 146 | xorl %edx, %edx /* Do not leak kernel information */ |
146 | xorq %r8, %r8 | 147 | xorq %r8, %r8 |
147 | xorq %r9, %r9 | 148 | xorq %r9, %r9 |
@@ -209,10 +210,10 @@ sysexit_from_sys_call: | |||
209 | .endm | 210 | .endm |
210 | 211 | ||
211 | .macro auditsys_exit exit | 212 | .macro auditsys_exit exit |
212 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | ||
213 | jnz ia32_ret_from_sys_call | ||
214 | TRACE_IRQS_ON | 213 | TRACE_IRQS_ON |
215 | ENABLE_INTERRUPTS(CLBR_NONE) | 214 | ENABLE_INTERRUPTS(CLBR_NONE) |
215 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | ||
216 | jnz ia32_ret_from_sys_call | ||
216 | movl %eax, %esi /* second arg, syscall return value */ | 217 | movl %eax, %esi /* second arg, syscall return value */ |
217 | cmpl $-MAX_ERRNO, %eax /* is it an error ? */ | 218 | cmpl $-MAX_ERRNO, %eax /* is it an error ? */ |
218 | jbe 1f | 219 | jbe 1f |
@@ -230,7 +231,7 @@ sysexit_from_sys_call: | |||
230 | movq %rax, R10(%rsp) | 231 | movq %rax, R10(%rsp) |
231 | movq %rax, R9(%rsp) | 232 | movq %rax, R9(%rsp) |
232 | movq %rax, R8(%rsp) | 233 | movq %rax, R8(%rsp) |
233 | jmp int_with_check | 234 | jmp int_ret_from_sys_call_irqs_off |
234 | .endm | 235 | .endm |
235 | 236 | ||
236 | sysenter_auditsys: | 237 | sysenter_auditsys: |
@@ -365,7 +366,9 @@ cstar_dispatch: | |||
365 | 366 | ||
366 | sysretl_from_sys_call: | 367 | sysretl_from_sys_call: |
367 | andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) | 368 | andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
368 | RESTORE_RSI_RDI_RDX | 369 | movl RDX(%rsp), %edx |
370 | movl RSI(%rsp), %esi | ||
371 | movl RDI(%rsp), %edi | ||
369 | movl RIP(%rsp), %ecx | 372 | movl RIP(%rsp), %ecx |
370 | movl EFLAGS(%rsp), %r11d | 373 | movl EFLAGS(%rsp), %r11d |
371 | movq RAX(%rsp), %rax | 374 | movq RAX(%rsp), %rax |
@@ -430,8 +433,48 @@ cstar_tracesys: | |||
430 | END(entry_SYSCALL_compat) | 433 | END(entry_SYSCALL_compat) |
431 | 434 | ||
432 | ia32_badarg: | 435 | ia32_badarg: |
433 | ASM_CLAC | 436 | /* |
434 | movq $-EFAULT, RAX(%rsp) | 437 | * So far, we've entered kernel mode, set AC, turned on IRQs, and |
438 | * saved C regs except r8-r11. We haven't done any of the other | ||
439 | * standard entry work, though. We want to bail, but we shouldn't | ||
440 | * treat this as a syscall entry since we don't even know what the | ||
441 | * args are. Instead, treat this as a non-syscall entry, finish | ||
442 | * the entry work, and immediately exit after setting AX = -EFAULT. | ||
443 | * | ||
444 | * We're really just being polite here. Killing the task outright | ||
445 | * would be a reasonable action, too. Given that the only valid | ||
446 | * way to have gotten here is through the vDSO, and we already know | ||
447 | * that the stack pointer is bad, the task isn't going to survive | ||
448 | * for long no matter what we do. | ||
449 | */ | ||
450 | |||
451 | ASM_CLAC /* undo STAC */ | ||
452 | movq $-EFAULT, RAX(%rsp) /* return -EFAULT if possible */ | ||
453 | |||
454 | /* Fill in the rest of pt_regs */ | ||
455 | xorl %eax, %eax | ||
456 | movq %rax, R11(%rsp) | ||
457 | movq %rax, R10(%rsp) | ||
458 | movq %rax, R9(%rsp) | ||
459 | movq %rax, R8(%rsp) | ||
460 | SAVE_EXTRA_REGS | ||
461 | |||
462 | /* Turn IRQs back off. */ | ||
463 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
464 | TRACE_IRQS_OFF | ||
465 | |||
466 | /* Now finish entering normal kernel mode. */ | ||
467 | #ifdef CONFIG_CONTEXT_TRACKING | ||
468 | call enter_from_user_mode | ||
469 | #endif | ||
470 | |||
471 | /* And exit again. */ | ||
472 | jmp retint_user | ||
473 | |||
474 | ia32_ret_from_sys_call_irqs_off: | ||
475 | TRACE_IRQS_ON | ||
476 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
477 | |||
435 | ia32_ret_from_sys_call: | 478 | ia32_ret_from_sys_call: |
436 | xorl %eax, %eax /* Do not leak kernel information */ | 479 | xorl %eax, %eax /* Do not leak kernel information */ |
437 | movq %rax, R11(%rsp) | 480 | movq %rax, R11(%rsp) |
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ef8187f9d28d..25e3cf1cd8fd 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl | |||
@@ -365,3 +365,18 @@ | |||
365 | 356 i386 memfd_create sys_memfd_create | 365 | 356 i386 memfd_create sys_memfd_create |
366 | 357 i386 bpf sys_bpf | 366 | 357 i386 bpf sys_bpf |
367 | 358 i386 execveat sys_execveat stub32_execveat | 367 | 358 i386 execveat sys_execveat stub32_execveat |
368 | 359 i386 socket sys_socket | ||
369 | 360 i386 socketpair sys_socketpair | ||
370 | 361 i386 bind sys_bind | ||
371 | 362 i386 connect sys_connect | ||
372 | 363 i386 listen sys_listen | ||
373 | 364 i386 accept4 sys_accept4 | ||
374 | 365 i386 getsockopt sys_getsockopt compat_sys_getsockopt | ||
375 | 366 i386 setsockopt sys_setsockopt compat_sys_setsockopt | ||
376 | 367 i386 getsockname sys_getsockname | ||
377 | 368 i386 getpeername sys_getpeername | ||
378 | 369 i386 sendto sys_sendto | ||
379 | 370 i386 sendmsg sys_sendmsg compat_sys_sendmsg | ||
380 | 371 i386 recvfrom sys_recvfrom compat_sys_recvfrom | ||
381 | 372 i386 recvmsg sys_recvmsg compat_sys_recvmsg | ||
382 | 373 i386 shutdown sys_shutdown | ||
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index e97032069f88..a3d0767a6b29 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile | |||
@@ -8,7 +8,7 @@ KASAN_SANITIZE := n | |||
8 | VDSO64-$(CONFIG_X86_64) := y | 8 | VDSO64-$(CONFIG_X86_64) := y |
9 | VDSOX32-$(CONFIG_X86_X32_ABI) := y | 9 | VDSOX32-$(CONFIG_X86_X32_ABI) := y |
10 | VDSO32-$(CONFIG_X86_32) := y | 10 | VDSO32-$(CONFIG_X86_32) := y |
11 | VDSO32-$(CONFIG_COMPAT) := y | 11 | VDSO32-$(CONFIG_IA32_EMULATION) := y |
12 | 12 | ||
13 | # files to link into the vdso | 13 | # files to link into the vdso |
14 | vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o | 14 | vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o |
@@ -20,7 +20,7 @@ obj-y += vma.o | |||
20 | vdso_img-$(VDSO64-y) += 64 | 20 | vdso_img-$(VDSO64-y) += 64 |
21 | vdso_img-$(VDSOX32-y) += x32 | 21 | vdso_img-$(VDSOX32-y) += x32 |
22 | vdso_img-$(VDSO32-y) += 32-int80 | 22 | vdso_img-$(VDSO32-y) += 32-int80 |
23 | vdso_img-$(CONFIG_COMPAT) += 32-syscall | 23 | vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall |
24 | vdso_img-$(VDSO32-y) += 32-sysenter | 24 | vdso_img-$(VDSO32-y) += 32-sysenter |
25 | 25 | ||
26 | obj-$(VDSO32-y) += vdso32-setup.o | 26 | obj-$(VDSO32-y) += vdso32-setup.o |
@@ -126,7 +126,7 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE | |||
126 | # Build multiple 32-bit vDSO images to choose from at boot time. | 126 | # Build multiple 32-bit vDSO images to choose from at boot time. |
127 | # | 127 | # |
128 | vdso32.so-$(VDSO32-y) += int80 | 128 | vdso32.so-$(VDSO32-y) += int80 |
129 | vdso32.so-$(CONFIG_COMPAT) += syscall | 129 | vdso32.so-$(CONFIG_IA32_EMULATION) += syscall |
130 | vdso32.so-$(VDSO32-y) += sysenter | 130 | vdso32.so-$(VDSO32-y) += sysenter |
131 | 131 | ||
132 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) | 132 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) |
@@ -175,7 +175,7 @@ quiet_cmd_vdso = VDSO $@ | |||
175 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ | 175 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ |
176 | sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' | 176 | sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' |
177 | 177 | ||
178 | VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ | 178 | VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \ |
179 | $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) | 179 | $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) |
180 | GCOV_PROFILE := n | 180 | GCOV_PROFILE := n |
181 | 181 | ||
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 9793322751e0..ca94fa649251 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c | |||
@@ -175,20 +175,8 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
175 | 175 | ||
176 | notrace static cycle_t vread_tsc(void) | 176 | notrace static cycle_t vread_tsc(void) |
177 | { | 177 | { |
178 | cycle_t ret; | 178 | cycle_t ret = (cycle_t)rdtsc_ordered(); |
179 | u64 last; | 179 | u64 last = gtod->cycle_last; |
180 | |||
181 | /* | ||
182 | * Empirically, a fence (of type that depends on the CPU) | ||
183 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
184 | * with respect to loads. The various CPU manuals are unclear | ||
185 | * as to whether rdtsc can be reordered with later loads, | ||
186 | * but no one has ever seen it happen. | ||
187 | */ | ||
188 | rdtsc_barrier(); | ||
189 | ret = (cycle_t)__native_read_tsc(); | ||
190 | |||
191 | last = gtod->cycle_last; | ||
192 | 180 | ||
193 | if (likely(ret >= last)) | 181 | if (likely(ret >= last)) |
194 | return ret; | 182 | return ret; |
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 1c9f750c3859..434543145d78 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c | |||
@@ -177,7 +177,7 @@ up_fail: | |||
177 | return ret; | 177 | return ret; |
178 | } | 178 | } |
179 | 179 | ||
180 | #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) | 180 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
181 | static int load_vdso32(void) | 181 | static int load_vdso32(void) |
182 | { | 182 | { |
183 | int ret; | 183 | int ret; |
@@ -219,8 +219,11 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |||
219 | return map_vdso(&vdso_image_x32, true); | 219 | return map_vdso(&vdso_image_x32, true); |
220 | } | 220 | } |
221 | #endif | 221 | #endif |
222 | 222 | #ifdef CONFIG_IA32_EMULATION | |
223 | return load_vdso32(); | 223 | return load_vdso32(); |
224 | #else | ||
225 | return 0; | ||
226 | #endif | ||
224 | } | 227 | } |
225 | #endif | 228 | #endif |
226 | #else | 229 | #else |
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 2dcc6ff6fdcc..26a46f44e298 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c | |||
@@ -290,7 +290,7 @@ static struct vm_area_struct gate_vma = { | |||
290 | 290 | ||
291 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) | 291 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) |
292 | { | 292 | { |
293 | #ifdef CONFIG_IA32_EMULATION | 293 | #ifdef CONFIG_COMPAT |
294 | if (!mm || mm->context.ia32_compat) | 294 | if (!mm || mm->context.ia32_compat) |
295 | return NULL; | 295 | return NULL; |
296 | #endif | 296 | #endif |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index ae3a29ae875b..a0a19b7ba22d 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -34,99 +34,6 @@ | |||
34 | #include <asm/sys_ia32.h> | 34 | #include <asm/sys_ia32.h> |
35 | #include <asm/smap.h> | 35 | #include <asm/smap.h> |
36 | 36 | ||
37 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) | ||
38 | { | ||
39 | int err = 0; | ||
40 | bool ia32 = test_thread_flag(TIF_IA32); | ||
41 | |||
42 | if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) | ||
43 | return -EFAULT; | ||
44 | |||
45 | put_user_try { | ||
46 | /* If you change siginfo_t structure, please make sure that | ||
47 | this code is fixed accordingly. | ||
48 | It should never copy any pad contained in the structure | ||
49 | to avoid security leaks, but must copy the generic | ||
50 | 3 ints plus the relevant union member. */ | ||
51 | put_user_ex(from->si_signo, &to->si_signo); | ||
52 | put_user_ex(from->si_errno, &to->si_errno); | ||
53 | put_user_ex((short)from->si_code, &to->si_code); | ||
54 | |||
55 | if (from->si_code < 0) { | ||
56 | put_user_ex(from->si_pid, &to->si_pid); | ||
57 | put_user_ex(from->si_uid, &to->si_uid); | ||
58 | put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); | ||
59 | } else { | ||
60 | /* | ||
61 | * First 32bits of unions are always present: | ||
62 | * si_pid === si_band === si_tid === si_addr(LS half) | ||
63 | */ | ||
64 | put_user_ex(from->_sifields._pad[0], | ||
65 | &to->_sifields._pad[0]); | ||
66 | switch (from->si_code >> 16) { | ||
67 | case __SI_FAULT >> 16: | ||
68 | break; | ||
69 | case __SI_SYS >> 16: | ||
70 | put_user_ex(from->si_syscall, &to->si_syscall); | ||
71 | put_user_ex(from->si_arch, &to->si_arch); | ||
72 | break; | ||
73 | case __SI_CHLD >> 16: | ||
74 | if (ia32) { | ||
75 | put_user_ex(from->si_utime, &to->si_utime); | ||
76 | put_user_ex(from->si_stime, &to->si_stime); | ||
77 | } else { | ||
78 | put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); | ||
79 | put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); | ||
80 | } | ||
81 | put_user_ex(from->si_status, &to->si_status); | ||
82 | /* FALL THROUGH */ | ||
83 | default: | ||
84 | case __SI_KILL >> 16: | ||
85 | put_user_ex(from->si_uid, &to->si_uid); | ||
86 | break; | ||
87 | case __SI_POLL >> 16: | ||
88 | put_user_ex(from->si_fd, &to->si_fd); | ||
89 | break; | ||
90 | case __SI_TIMER >> 16: | ||
91 | put_user_ex(from->si_overrun, &to->si_overrun); | ||
92 | put_user_ex(ptr_to_compat(from->si_ptr), | ||
93 | &to->si_ptr); | ||
94 | break; | ||
95 | /* This is not generated by the kernel as of now. */ | ||
96 | case __SI_RT >> 16: | ||
97 | case __SI_MESGQ >> 16: | ||
98 | put_user_ex(from->si_uid, &to->si_uid); | ||
99 | put_user_ex(from->si_int, &to->si_int); | ||
100 | break; | ||
101 | } | ||
102 | } | ||
103 | } put_user_catch(err); | ||
104 | |||
105 | return err; | ||
106 | } | ||
107 | |||
108 | int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) | ||
109 | { | ||
110 | int err = 0; | ||
111 | u32 ptr32; | ||
112 | |||
113 | if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) | ||
114 | return -EFAULT; | ||
115 | |||
116 | get_user_try { | ||
117 | get_user_ex(to->si_signo, &from->si_signo); | ||
118 | get_user_ex(to->si_errno, &from->si_errno); | ||
119 | get_user_ex(to->si_code, &from->si_code); | ||
120 | |||
121 | get_user_ex(to->si_pid, &from->si_pid); | ||
122 | get_user_ex(to->si_uid, &from->si_uid); | ||
123 | get_user_ex(ptr32, &from->si_ptr); | ||
124 | to->si_ptr = compat_ptr(ptr32); | ||
125 | } get_user_catch(err); | ||
126 | |||
127 | return err; | ||
128 | } | ||
129 | |||
130 | /* | 37 | /* |
131 | * Do a signal return; undo the signal stack. | 38 | * Do a signal return; undo the signal stack. |
132 | */ | 39 | */ |
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e51a8f803f55..818cb8788225 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h | |||
@@ -91,15 +91,4 @@ do { \ | |||
91 | #define smp_mb__before_atomic() barrier() | 91 | #define smp_mb__before_atomic() barrier() |
92 | #define smp_mb__after_atomic() barrier() | 92 | #define smp_mb__after_atomic() barrier() |
93 | 93 | ||
94 | /* | ||
95 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | ||
96 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | ||
97 | * code region. | ||
98 | */ | ||
99 | static __always_inline void rdtsc_barrier(void) | ||
100 | { | ||
101 | alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, | ||
102 | "lfence", X86_FEATURE_LFENCE_RDTSC); | ||
103 | } | ||
104 | |||
105 | #endif /* _ASM_X86_BARRIER_H */ | 94 | #endif /* _ASM_X86_BARRIER_H */ |
diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h deleted file mode 100644 index 1fe49704b146..000000000000 --- a/arch/x86/include/asm/context_tracking.h +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | #ifndef _ASM_X86_CONTEXT_TRACKING_H | ||
2 | #define _ASM_X86_CONTEXT_TRACKING_H | ||
3 | |||
4 | #ifdef CONFIG_CONTEXT_TRACKING | ||
5 | # define SCHEDULE_USER call schedule_user | ||
6 | #else | ||
7 | # define SCHEDULE_USER call schedule | ||
8 | #endif | ||
9 | |||
10 | #endif | ||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3d6606fb97d0..a39e5708209b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -176,6 +176,7 @@ | |||
176 | #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ | 176 | #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ |
177 | #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ | 177 | #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ |
178 | #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ | 178 | #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ |
179 | #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ | ||
179 | 180 | ||
180 | /* | 181 | /* |
181 | * Auxiliary flags: Linux defined - For features scattered in various | 182 | * Auxiliary flags: Linux defined - For features scattered in various |
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 9b3b4f2754c7..36a760bda462 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h | |||
@@ -4,5 +4,6 @@ | |||
4 | #include <asm-generic/delay.h> | 4 | #include <asm-generic/delay.h> |
5 | 5 | ||
6 | void use_tsc_delay(void); | 6 | void use_tsc_delay(void); |
7 | void use_mwaitx_delay(void); | ||
7 | 8 | ||
8 | #endif /* _ASM_X86_DELAY_H */ | 9 | #endif /* _ASM_X86_DELAY_H */ |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f161c189c27b..141c561f4664 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -78,7 +78,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; | |||
78 | #ifdef CONFIG_X86_64 | 78 | #ifdef CONFIG_X86_64 |
79 | extern unsigned int vdso64_enabled; | 79 | extern unsigned int vdso64_enabled; |
80 | #endif | 80 | #endif |
81 | #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) | 81 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
82 | extern unsigned int vdso32_enabled; | 82 | extern unsigned int vdso32_enabled; |
83 | #endif | 83 | #endif |
84 | 84 | ||
@@ -187,8 +187,8 @@ static inline void elf_common_init(struct thread_struct *t, | |||
187 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ | 187 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ |
188 | elf_common_init(¤t->thread, regs, __USER_DS) | 188 | elf_common_init(¤t->thread, regs, __USER_DS) |
189 | 189 | ||
190 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); | 190 | void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp); |
191 | #define compat_start_thread start_thread_ia32 | 191 | #define compat_start_thread compat_start_thread |
192 | 192 | ||
193 | void set_personality_ia32(bool); | 193 | void set_personality_ia32(bool); |
194 | #define COMPAT_SET_PERSONALITY(ex) \ | 194 | #define COMPAT_SET_PERSONALITY(ex) \ |
@@ -344,14 +344,9 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |||
344 | */ | 344 | */ |
345 | static inline int mmap_is_ia32(void) | 345 | static inline int mmap_is_ia32(void) |
346 | { | 346 | { |
347 | #ifdef CONFIG_X86_32 | 347 | return config_enabled(CONFIG_X86_32) || |
348 | return 1; | 348 | (config_enabled(CONFIG_COMPAT) && |
349 | #endif | 349 | test_thread_flag(TIF_ADDR32)); |
350 | #ifdef CONFIG_IA32_EMULATION | ||
351 | if (test_thread_flag(TIF_ADDR32)) | ||
352 | return 1; | ||
353 | #endif | ||
354 | return 0; | ||
355 | } | 350 | } |
356 | 351 | ||
357 | /* Do not change the values. See get_align_mask() */ | 352 | /* Do not change the values. See get_align_mask() */ |
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index d0e8e0141041..28019765442e 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h | |||
@@ -22,15 +22,6 @@ struct ucontext_ia32 { | |||
22 | compat_sigset_t uc_sigmask; /* mask last for extensibility */ | 22 | compat_sigset_t uc_sigmask; /* mask last for extensibility */ |
23 | }; | 23 | }; |
24 | 24 | ||
25 | struct ucontext_x32 { | ||
26 | unsigned int uc_flags; | ||
27 | unsigned int uc_link; | ||
28 | compat_stack_t uc_stack; | ||
29 | unsigned int uc__pad0; /* needed for alignment */ | ||
30 | struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ | ||
31 | compat_sigset_t uc_sigmask; /* mask last for extensibility */ | ||
32 | }; | ||
33 | |||
34 | /* This matches struct stat64 in glibc2.2, hence the absolutely | 25 | /* This matches struct stat64 in glibc2.2, hence the absolutely |
35 | * insane amounts of padding around dev_t's. | 26 | * insane amounts of padding around dev_t's. |
36 | */ | 27 | */ |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4c2d2eb2060a..6ca9fd6234e1 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -117,16 +117,6 @@ | |||
117 | 117 | ||
118 | #define FPU_IRQ 13 | 118 | #define FPU_IRQ 13 |
119 | 119 | ||
120 | #define FIRST_VM86_IRQ 3 | ||
121 | #define LAST_VM86_IRQ 15 | ||
122 | |||
123 | #ifndef __ASSEMBLY__ | ||
124 | static inline int invalid_vm86_irq(int irq) | ||
125 | { | ||
126 | return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; | ||
127 | } | ||
128 | #endif | ||
129 | |||
130 | /* | 120 | /* |
131 | * Size the maximum number of interrupts. | 121 | * Size the maximum number of interrupts. |
132 | * | 122 | * |
diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 031f6266f425..0d9b14f60d2c 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _ASM_X86_MATH_EMU_H | 2 | #define _ASM_X86_MATH_EMU_H |
3 | 3 | ||
4 | #include <asm/ptrace.h> | 4 | #include <asm/ptrace.h> |
5 | #include <asm/vm86.h> | ||
6 | 5 | ||
7 | /* This structure matches the layout of the data saved to the stack | 6 | /* This structure matches the layout of the data saved to the stack |
8 | following a device-not-present interrupt, part of it saved | 7 | following a device-not-present interrupt, part of it saved |
@@ -10,9 +9,6 @@ | |||
10 | */ | 9 | */ |
11 | struct math_emu_info { | 10 | struct math_emu_info { |
12 | long ___orig_eip; | 11 | long ___orig_eip; |
13 | union { | 12 | struct pt_regs *regs; |
14 | struct pt_regs *regs; | ||
15 | struct kernel_vm86_regs *vm86; | ||
16 | }; | ||
17 | }; | 13 | }; |
18 | #endif /* _ASM_X86_MATH_EMU_H */ | 14 | #endif /* _ASM_X86_MATH_EMU_H */ |
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 364d27481a52..55234d5e7160 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h | |||
@@ -9,7 +9,9 @@ | |||
9 | * we put the segment information here. | 9 | * we put the segment information here. |
10 | */ | 10 | */ |
11 | typedef struct { | 11 | typedef struct { |
12 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
12 | struct ldt_struct *ldt; | 13 | struct ldt_struct *ldt; |
14 | #endif | ||
13 | 15 | ||
14 | #ifdef CONFIG_X86_64 | 16 | #ifdef CONFIG_X86_64 |
15 | /* True if mm supports a task running in 32 bit compatibility mode. */ | 17 | /* True if mm supports a task running in 32 bit compatibility mode. */ |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 984abfe47edc..379cd3658799 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -33,6 +33,7 @@ static inline void load_mm_cr4(struct mm_struct *mm) | |||
33 | static inline void load_mm_cr4(struct mm_struct *mm) {} | 33 | static inline void load_mm_cr4(struct mm_struct *mm) {} |
34 | #endif | 34 | #endif |
35 | 35 | ||
36 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
36 | /* | 37 | /* |
37 | * ldt_structs can be allocated, used, and freed, but they are never | 38 | * ldt_structs can be allocated, used, and freed, but they are never |
38 | * modified while live. | 39 | * modified while live. |
@@ -48,8 +49,23 @@ struct ldt_struct { | |||
48 | int size; | 49 | int size; |
49 | }; | 50 | }; |
50 | 51 | ||
52 | /* | ||
53 | * Used for LDT copy/destruction. | ||
54 | */ | ||
55 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm); | ||
56 | void destroy_context(struct mm_struct *mm); | ||
57 | #else /* CONFIG_MODIFY_LDT_SYSCALL */ | ||
58 | static inline int init_new_context(struct task_struct *tsk, | ||
59 | struct mm_struct *mm) | ||
60 | { | ||
61 | return 0; | ||
62 | } | ||
63 | static inline void destroy_context(struct mm_struct *mm) {} | ||
64 | #endif | ||
65 | |||
51 | static inline void load_mm_ldt(struct mm_struct *mm) | 66 | static inline void load_mm_ldt(struct mm_struct *mm) |
52 | { | 67 | { |
68 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
53 | struct ldt_struct *ldt; | 69 | struct ldt_struct *ldt; |
54 | 70 | ||
55 | /* lockless_dereference synchronizes with smp_store_release */ | 71 | /* lockless_dereference synchronizes with smp_store_release */ |
@@ -73,17 +89,13 @@ static inline void load_mm_ldt(struct mm_struct *mm) | |||
73 | set_ldt(ldt->entries, ldt->size); | 89 | set_ldt(ldt->entries, ldt->size); |
74 | else | 90 | else |
75 | clear_LDT(); | 91 | clear_LDT(); |
92 | #else | ||
93 | clear_LDT(); | ||
94 | #endif | ||
76 | 95 | ||
77 | DEBUG_LOCKS_WARN_ON(preemptible()); | 96 | DEBUG_LOCKS_WARN_ON(preemptible()); |
78 | } | 97 | } |
79 | 98 | ||
80 | /* | ||
81 | * Used for LDT copy/destruction. | ||
82 | */ | ||
83 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm); | ||
84 | void destroy_context(struct mm_struct *mm); | ||
85 | |||
86 | |||
87 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | 99 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) |
88 | { | 100 | { |
89 | #ifdef CONFIG_SMP | 101 | #ifdef CONFIG_SMP |
@@ -114,6 +126,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
114 | /* Load per-mm CR4 state */ | 126 | /* Load per-mm CR4 state */ |
115 | load_mm_cr4(next); | 127 | load_mm_cr4(next); |
116 | 128 | ||
129 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
117 | /* | 130 | /* |
118 | * Load the LDT, if the LDT is different. | 131 | * Load the LDT, if the LDT is different. |
119 | * | 132 | * |
@@ -128,6 +141,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
128 | */ | 141 | */ |
129 | if (unlikely(prev->context.ldt != next->context.ldt)) | 142 | if (unlikely(prev->context.ldt != next->context.ldt)) |
130 | load_mm_ldt(next); | 143 | load_mm_ldt(next); |
144 | #endif | ||
131 | } | 145 | } |
132 | #ifdef CONFIG_SMP | 146 | #ifdef CONFIG_SMP |
133 | else { | 147 | else { |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index e6a707eb5081..77d8b284e4a7 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -47,14 +47,13 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) | |||
47 | * it means rax *or* rdx. | 47 | * it means rax *or* rdx. |
48 | */ | 48 | */ |
49 | #ifdef CONFIG_X86_64 | 49 | #ifdef CONFIG_X86_64 |
50 | #define DECLARE_ARGS(val, low, high) unsigned low, high | 50 | /* Using 64-bit values saves one instruction clearing the high half of low */ |
51 | #define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32)) | 51 | #define DECLARE_ARGS(val, low, high) unsigned long low, high |
52 | #define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) | 52 | #define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) |
53 | #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) | 53 | #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) |
54 | #else | 54 | #else |
55 | #define DECLARE_ARGS(val, low, high) unsigned long long val | 55 | #define DECLARE_ARGS(val, low, high) unsigned long long val |
56 | #define EAX_EDX_VAL(val, low, high) (val) | 56 | #define EAX_EDX_VAL(val, low, high) (val) |
57 | #define EAX_EDX_ARGS(val, low, high) "A" (val) | ||
58 | #define EAX_EDX_RET(val, low, high) "=A" (val) | 57 | #define EAX_EDX_RET(val, low, high) "=A" (val) |
59 | #endif | 58 | #endif |
60 | 59 | ||
@@ -106,12 +105,19 @@ notrace static inline int native_write_msr_safe(unsigned int msr, | |||
106 | return err; | 105 | return err; |
107 | } | 106 | } |
108 | 107 | ||
109 | extern unsigned long long native_read_tsc(void); | ||
110 | |||
111 | extern int rdmsr_safe_regs(u32 regs[8]); | 108 | extern int rdmsr_safe_regs(u32 regs[8]); |
112 | extern int wrmsr_safe_regs(u32 regs[8]); | 109 | extern int wrmsr_safe_regs(u32 regs[8]); |
113 | 110 | ||
114 | static __always_inline unsigned long long __native_read_tsc(void) | 111 | /** |
112 | * rdtsc() - returns the current TSC without ordering constraints | ||
113 | * | ||
114 | * rdtsc() returns the result of RDTSC as a 64-bit integer. The | ||
115 | * only ordering constraint it supplies is the ordering implied by | ||
116 | * "asm volatile": it will put the RDTSC in the place you expect. The | ||
117 | * CPU can and will speculatively execute that RDTSC, though, so the | ||
118 | * results can be non-monotonic if compared on different CPUs. | ||
119 | */ | ||
120 | static __always_inline unsigned long long rdtsc(void) | ||
115 | { | 121 | { |
116 | DECLARE_ARGS(val, low, high); | 122 | DECLARE_ARGS(val, low, high); |
117 | 123 | ||
@@ -120,6 +126,35 @@ static __always_inline unsigned long long __native_read_tsc(void) | |||
120 | return EAX_EDX_VAL(val, low, high); | 126 | return EAX_EDX_VAL(val, low, high); |
121 | } | 127 | } |
122 | 128 | ||
129 | /** | ||
130 | * rdtsc_ordered() - read the current TSC in program order | ||
131 | * | ||
132 | * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. | ||
133 | * It is ordered like a load to a global in-memory counter. It should | ||
134 | * be impossible to observe non-monotonic rdtsc_unordered() behavior | ||
135 | * across multiple CPUs as long as the TSC is synced. | ||
136 | */ | ||
137 | static __always_inline unsigned long long rdtsc_ordered(void) | ||
138 | { | ||
139 | /* | ||
140 | * The RDTSC instruction is not ordered relative to memory | ||
141 | * access. The Intel SDM and the AMD APM are both vague on this | ||
142 | * point, but empirically an RDTSC instruction can be | ||
143 | * speculatively executed before prior loads. An RDTSC | ||
144 | * immediately after an appropriate barrier appears to be | ||
145 | * ordered as a normal load, that is, it provides the same | ||
146 | * ordering guarantees as reading from a global memory location | ||
147 | * that some other imaginary CPU is updating continuously with a | ||
148 | * time stamp. | ||
149 | */ | ||
150 | alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, | ||
151 | "lfence", X86_FEATURE_LFENCE_RDTSC); | ||
152 | return rdtsc(); | ||
153 | } | ||
154 | |||
155 | /* Deprecated, keep it for a cycle for easier merging: */ | ||
156 | #define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0) | ||
157 | |||
123 | static inline unsigned long long native_read_pmc(int counter) | 158 | static inline unsigned long long native_read_pmc(int counter) |
124 | { | 159 | { |
125 | DECLARE_ARGS(val, low, high); | 160 | DECLARE_ARGS(val, low, high); |
@@ -153,8 +188,10 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high) | |||
153 | #define rdmsrl(msr, val) \ | 188 | #define rdmsrl(msr, val) \ |
154 | ((val) = native_read_msr((msr))) | 189 | ((val) = native_read_msr((msr))) |
155 | 190 | ||
156 | #define wrmsrl(msr, val) \ | 191 | static inline void wrmsrl(unsigned msr, u64 val) |
157 | native_write_msr((msr), (u32)((u64)(val)), (u32)((u64)(val) >> 32)) | 192 | { |
193 | native_write_msr(msr, (u32)val, (u32)(val >> 32)); | ||
194 | } | ||
158 | 195 | ||
159 | /* wrmsr with exception handling */ | 196 | /* wrmsr with exception handling */ |
160 | static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high) | 197 | static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high) |
@@ -180,12 +217,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
180 | return err; | 217 | return err; |
181 | } | 218 | } |
182 | 219 | ||
183 | #define rdtscl(low) \ | ||
184 | ((low) = (u32)__native_read_tsc()) | ||
185 | |||
186 | #define rdtscll(val) \ | ||
187 | ((val) = __native_read_tsc()) | ||
188 | |||
189 | #define rdpmc(counter, low, high) \ | 220 | #define rdpmc(counter, low, high) \ |
190 | do { \ | 221 | do { \ |
191 | u64 _l = native_read_pmc((counter)); \ | 222 | u64 _l = native_read_pmc((counter)); \ |
@@ -195,15 +226,6 @@ do { \ | |||
195 | 226 | ||
196 | #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) | 227 | #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) |
197 | 228 | ||
198 | #define rdtscp(low, high, aux) \ | ||
199 | do { \ | ||
200 | unsigned long long _val = native_read_tscp(&(aux)); \ | ||
201 | (low) = (u32)_val; \ | ||
202 | (high) = (u32)(_val >> 32); \ | ||
203 | } while (0) | ||
204 | |||
205 | #define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) | ||
206 | |||
207 | #endif /* !CONFIG_PARAVIRT */ | 229 | #endif /* !CONFIG_PARAVIRT */ |
208 | 230 | ||
209 | /* | 231 | /* |
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 653dfa7662e1..c70689b5e5aa 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h | |||
@@ -14,6 +14,9 @@ | |||
14 | #define CPUID5_ECX_INTERRUPT_BREAK 0x2 | 14 | #define CPUID5_ECX_INTERRUPT_BREAK 0x2 |
15 | 15 | ||
16 | #define MWAIT_ECX_INTERRUPT_BREAK 0x1 | 16 | #define MWAIT_ECX_INTERRUPT_BREAK 0x1 |
17 | #define MWAITX_ECX_TIMER_ENABLE BIT(1) | ||
18 | #define MWAITX_MAX_LOOPS ((u32)-1) | ||
19 | #define MWAITX_DISABLE_CSTATES 0xf | ||
17 | 20 | ||
18 | static inline void __monitor(const void *eax, unsigned long ecx, | 21 | static inline void __monitor(const void *eax, unsigned long ecx, |
19 | unsigned long edx) | 22 | unsigned long edx) |
@@ -23,6 +26,14 @@ static inline void __monitor(const void *eax, unsigned long ecx, | |||
23 | :: "a" (eax), "c" (ecx), "d"(edx)); | 26 | :: "a" (eax), "c" (ecx), "d"(edx)); |
24 | } | 27 | } |
25 | 28 | ||
29 | static inline void __monitorx(const void *eax, unsigned long ecx, | ||
30 | unsigned long edx) | ||
31 | { | ||
32 | /* "monitorx %eax, %ecx, %edx;" */ | ||
33 | asm volatile(".byte 0x0f, 0x01, 0xfa;" | ||
34 | :: "a" (eax), "c" (ecx), "d"(edx)); | ||
35 | } | ||
36 | |||
26 | static inline void __mwait(unsigned long eax, unsigned long ecx) | 37 | static inline void __mwait(unsigned long eax, unsigned long ecx) |
27 | { | 38 | { |
28 | /* "mwait %eax, %ecx;" */ | 39 | /* "mwait %eax, %ecx;" */ |
@@ -30,6 +41,40 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) | |||
30 | :: "a" (eax), "c" (ecx)); | 41 | :: "a" (eax), "c" (ecx)); |
31 | } | 42 | } |
32 | 43 | ||
44 | /* | ||
45 | * MWAITX allows for a timer expiration to get the core out a wait state in | ||
46 | * addition to the default MWAIT exit condition of a store appearing at a | ||
47 | * monitored virtual address. | ||
48 | * | ||
49 | * Registers: | ||
50 | * | ||
51 | * MWAITX ECX[1]: enable timer if set | ||
52 | * MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks. The software P0 | ||
53 | * frequency is the same as the TSC frequency. | ||
54 | * | ||
55 | * Below is a comparison between MWAIT and MWAITX on AMD processors: | ||
56 | * | ||
57 | * MWAIT MWAITX | ||
58 | * opcode 0f 01 c9 | 0f 01 fb | ||
59 | * ECX[0] value of RFLAGS.IF seen by instruction | ||
60 | * ECX[1] unused/#GP if set | enable timer if set | ||
61 | * ECX[31:2] unused/#GP if set | ||
62 | * EAX unused (reserve for hint) | ||
63 | * EBX[31:0] unused | max wait time (P0 clocks) | ||
64 | * | ||
65 | * MONITOR MONITORX | ||
66 | * opcode 0f 01 c8 | 0f 01 fa | ||
67 | * EAX (logical) address to monitor | ||
68 | * ECX #GP if not zero | ||
69 | */ | ||
70 | static inline void __mwaitx(unsigned long eax, unsigned long ebx, | ||
71 | unsigned long ecx) | ||
72 | { | ||
73 | /* "mwaitx %eax, %ebx, %ecx;" */ | ||
74 | asm volatile(".byte 0x0f, 0x01, 0xfb;" | ||
75 | :: "a" (eax), "b" (ebx), "c" (ecx)); | ||
76 | } | ||
77 | |||
33 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | 78 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) |
34 | { | 79 | { |
35 | trace_hardirqs_on(); | 80 | trace_hardirqs_on(); |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d143bfad45d7..10d0596433f8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -153,7 +153,11 @@ do { \ | |||
153 | val = paravirt_read_msr(msr, &_err); \ | 153 | val = paravirt_read_msr(msr, &_err); \ |
154 | } while (0) | 154 | } while (0) |
155 | 155 | ||
156 | #define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) | 156 | static inline void wrmsrl(unsigned msr, u64 val) |
157 | { | ||
158 | wrmsr(msr, (u32)val, (u32)(val>>32)); | ||
159 | } | ||
160 | |||
157 | #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) | 161 | #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) |
158 | 162 | ||
159 | /* rdmsr with exception handling */ | 163 | /* rdmsr with exception handling */ |
@@ -174,19 +178,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
174 | return err; | 178 | return err; |
175 | } | 179 | } |
176 | 180 | ||
177 | static inline u64 paravirt_read_tsc(void) | ||
178 | { | ||
179 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); | ||
180 | } | ||
181 | |||
182 | #define rdtscl(low) \ | ||
183 | do { \ | ||
184 | u64 _l = paravirt_read_tsc(); \ | ||
185 | low = (int)_l; \ | ||
186 | } while (0) | ||
187 | |||
188 | #define rdtscll(val) (val = paravirt_read_tsc()) | ||
189 | |||
190 | static inline unsigned long long paravirt_sched_clock(void) | 181 | static inline unsigned long long paravirt_sched_clock(void) |
191 | { | 182 | { |
192 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | 183 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); |
@@ -215,27 +206,6 @@ do { \ | |||
215 | 206 | ||
216 | #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) | 207 | #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) |
217 | 208 | ||
218 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) | ||
219 | { | ||
220 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); | ||
221 | } | ||
222 | |||
223 | #define rdtscp(low, high, aux) \ | ||
224 | do { \ | ||
225 | int __aux; \ | ||
226 | unsigned long __val = paravirt_rdtscp(&__aux); \ | ||
227 | (low) = (u32)__val; \ | ||
228 | (high) = (u32)(__val >> 32); \ | ||
229 | (aux) = __aux; \ | ||
230 | } while (0) | ||
231 | |||
232 | #define rdtscpll(val, aux) \ | ||
233 | do { \ | ||
234 | unsigned long __aux; \ | ||
235 | val = paravirt_rdtscp(&__aux); \ | ||
236 | (aux) = __aux; \ | ||
237 | } while (0) | ||
238 | |||
239 | static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) | 209 | static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) |
240 | { | 210 | { |
241 | PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); | 211 | PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a6b8f9fadb06..ce029e4fa7c6 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -156,9 +156,7 @@ struct pv_cpu_ops { | |||
156 | u64 (*read_msr)(unsigned int msr, int *err); | 156 | u64 (*read_msr)(unsigned int msr, int *err); |
157 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | 157 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); |
158 | 158 | ||
159 | u64 (*read_tsc)(void); | ||
160 | u64 (*read_pmc)(int counter); | 159 | u64 (*read_pmc)(int counter); |
161 | unsigned long long (*read_tscp)(unsigned int *aux); | ||
162 | 160 | ||
163 | #ifdef CONFIG_X86_32 | 161 | #ifdef CONFIG_X86_32 |
164 | /* | 162 | /* |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 944f1785ed0d..9615a4e2645e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -6,8 +6,8 @@ | |||
6 | /* Forward declaration, a strange C thing */ | 6 | /* Forward declaration, a strange C thing */ |
7 | struct task_struct; | 7 | struct task_struct; |
8 | struct mm_struct; | 8 | struct mm_struct; |
9 | struct vm86; | ||
9 | 10 | ||
10 | #include <asm/vm86.h> | ||
11 | #include <asm/math_emu.h> | 11 | #include <asm/math_emu.h> |
12 | #include <asm/segment.h> | 12 | #include <asm/segment.h> |
13 | #include <asm/types.h> | 13 | #include <asm/types.h> |
@@ -400,15 +400,9 @@ struct thread_struct { | |||
400 | unsigned long cr2; | 400 | unsigned long cr2; |
401 | unsigned long trap_nr; | 401 | unsigned long trap_nr; |
402 | unsigned long error_code; | 402 | unsigned long error_code; |
403 | #ifdef CONFIG_X86_32 | 403 | #ifdef CONFIG_VM86 |
404 | /* Virtual 86 mode info */ | 404 | /* Virtual 86 mode info */ |
405 | struct vm86_struct __user *vm86_info; | 405 | struct vm86 *vm86; |
406 | unsigned long screen_bitmap; | ||
407 | unsigned long v86flags; | ||
408 | unsigned long v86mask; | ||
409 | unsigned long saved_sp0; | ||
410 | unsigned int saved_fs; | ||
411 | unsigned int saved_gs; | ||
412 | #endif | 406 | #endif |
413 | /* IO permissions: */ | 407 | /* IO permissions: */ |
414 | unsigned long *io_bitmap_ptr; | 408 | unsigned long *io_bitmap_ptr; |
@@ -720,7 +714,6 @@ static inline void spin_lock_prefetch(const void *x) | |||
720 | 714 | ||
721 | #define INIT_THREAD { \ | 715 | #define INIT_THREAD { \ |
722 | .sp0 = TOP_OF_INIT_STACK, \ | 716 | .sp0 = TOP_OF_INIT_STACK, \ |
723 | .vm86_info = NULL, \ | ||
724 | .sysenter_cs = __KERNEL_CS, \ | 717 | .sysenter_cs = __KERNEL_CS, \ |
725 | .io_bitmap_ptr = NULL, \ | 718 | .io_bitmap_ptr = NULL, \ |
726 | } | 719 | } |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5fabf1362942..6271281f947d 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -88,7 +88,6 @@ extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch, | |||
88 | unsigned long phase1_result); | 88 | unsigned long phase1_result); |
89 | 89 | ||
90 | extern long syscall_trace_enter(struct pt_regs *); | 90 | extern long syscall_trace_enter(struct pt_regs *); |
91 | extern void syscall_trace_leave(struct pt_regs *); | ||
92 | 91 | ||
93 | static inline unsigned long regs_return_value(struct pt_regs *regs) | 92 | static inline unsigned long regs_return_value(struct pt_regs *regs) |
94 | { | 93 | { |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 628954ceede1..7a6bed5c08bc 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) | |||
62 | static __always_inline | 62 | static __always_inline |
63 | u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) | 63 | u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) |
64 | { | 64 | { |
65 | u64 delta = __native_read_tsc() - src->tsc_timestamp; | 65 | u64 delta = rdtsc_ordered() - src->tsc_timestamp; |
66 | return pvclock_scale_delta(delta, src->tsc_to_system_mul, | 66 | return pvclock_scale_delta(delta, src->tsc_to_system_mul, |
67 | src->tsc_shift); | 67 | src->tsc_shift); |
68 | } | 68 | } |
@@ -76,13 +76,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
76 | u8 ret_flags; | 76 | u8 ret_flags; |
77 | 77 | ||
78 | version = src->version; | 78 | version = src->version; |
79 | /* Note: emulated platforms which do not advertise SSE2 support | 79 | |
80 | * result in kvmclock not using the necessary RDTSC barriers. | ||
81 | * Without barriers, it is possible that RDTSC instruction reads from | ||
82 | * the time stamp counter outside rdtsc_barrier protected section | ||
83 | * below, resulting in violation of monotonicity. | ||
84 | */ | ||
85 | rdtsc_barrier(); | ||
86 | offset = pvclock_get_nsec_offset(src); | 80 | offset = pvclock_get_nsec_offset(src); |
87 | ret = src->system_time + offset; | 81 | ret = src->system_time + offset; |
88 | ret_flags = src->flags; | 82 | ret_flags = src->flags; |
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 7c7c27c97daa..1f3175bb994e 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <asm/sigcontext.h> | 4 | #include <asm/sigcontext.h> |
5 | #include <asm/siginfo.h> | 5 | #include <asm/siginfo.h> |
6 | #include <asm/ucontext.h> | 6 | #include <asm/ucontext.h> |
7 | #include <linux/compat.h> | ||
7 | 8 | ||
8 | #ifdef CONFIG_X86_32 | 9 | #ifdef CONFIG_X86_32 |
9 | #define sigframe_ia32 sigframe | 10 | #define sigframe_ia32 sigframe |
@@ -69,6 +70,15 @@ struct rt_sigframe { | |||
69 | 70 | ||
70 | #ifdef CONFIG_X86_X32_ABI | 71 | #ifdef CONFIG_X86_X32_ABI |
71 | 72 | ||
73 | struct ucontext_x32 { | ||
74 | unsigned int uc_flags; | ||
75 | unsigned int uc_link; | ||
76 | compat_stack_t uc_stack; | ||
77 | unsigned int uc__pad0; /* needed for alignment */ | ||
78 | struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ | ||
79 | compat_sigset_t uc_sigmask; /* mask last for extensibility */ | ||
80 | }; | ||
81 | |||
72 | struct rt_sigframe_x32 { | 82 | struct rt_sigframe_x32 { |
73 | u64 pretcode; | 83 | u64 pretcode; |
74 | struct ucontext_x32 uc; | 84 | struct ucontext_x32 uc; |
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 31eab867e6d3..c481be78fcf1 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h | |||
@@ -30,7 +30,7 @@ typedef sigset_t compat_sigset_t; | |||
30 | #endif /* __ASSEMBLY__ */ | 30 | #endif /* __ASSEMBLY__ */ |
31 | #include <uapi/asm/signal.h> | 31 | #include <uapi/asm/signal.h> |
32 | #ifndef __ASSEMBLY__ | 32 | #ifndef __ASSEMBLY__ |
33 | extern void do_notify_resume(struct pt_regs *, void *, __u32); | 33 | extern void do_signal(struct pt_regs *regs); |
34 | 34 | ||
35 | #define __ARCH_HAS_SA_RESTORER | 35 | #define __ARCH_HAS_SA_RESTORER |
36 | 36 | ||
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c2e00bb2a136..58505f01962f 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h | |||
@@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void) | |||
72 | * on during the bootup the random pool has true entropy too. | 72 | * on during the bootup the random pool has true entropy too. |
73 | */ | 73 | */ |
74 | get_random_bytes(&canary, sizeof(canary)); | 74 | get_random_bytes(&canary, sizeof(canary)); |
75 | tsc = __native_read_tsc(); | 75 | tsc = rdtsc(); |
76 | canary += tsc + (tsc << 32UL); | 76 | canary += tsc + (tsc << 32UL); |
77 | 77 | ||
78 | current->stack_canary = canary; | 78 | current->stack_canary = canary; |
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 592a6a672e07..91dfcafe27a6 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -37,6 +37,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *); | |||
37 | asmlinkage unsigned long sys_sigreturn(void); | 37 | asmlinkage unsigned long sys_sigreturn(void); |
38 | 38 | ||
39 | /* kernel/vm86_32.c */ | 39 | /* kernel/vm86_32.c */ |
40 | struct vm86_struct; | ||
40 | asmlinkage long sys_vm86old(struct vm86_struct __user *); | 41 | asmlinkage long sys_vm86old(struct vm86_struct __user *); |
41 | asmlinkage long sys_vm86(unsigned long, unsigned long); | 42 | asmlinkage long sys_vm86(unsigned long, unsigned long); |
42 | 43 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 225ee545e1a0..8afdc3e44247 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -27,14 +27,17 @@ | |||
27 | * Without this offset, that can result in a page fault. (We are | 27 | * Without this offset, that can result in a page fault. (We are |
28 | * careful that, in this case, the value we read doesn't matter.) | 28 | * careful that, in this case, the value we read doesn't matter.) |
29 | * | 29 | * |
30 | * In vm86 mode, the hardware frame is much longer still, but we neither | 30 | * In vm86 mode, the hardware frame is much longer still, so add 16 |
31 | * access the extra members from NMI context, nor do we write such a | 31 | * bytes to make room for the real-mode segments. |
32 | * frame at sp0 at all. | ||
33 | * | 32 | * |
34 | * x86_64 has a fixed-length stack frame. | 33 | * x86_64 has a fixed-length stack frame. |
35 | */ | 34 | */ |
36 | #ifdef CONFIG_X86_32 | 35 | #ifdef CONFIG_X86_32 |
37 | # define TOP_OF_KERNEL_STACK_PADDING 8 | 36 | # ifdef CONFIG_VM86 |
37 | # define TOP_OF_KERNEL_STACK_PADDING 16 | ||
38 | # else | ||
39 | # define TOP_OF_KERNEL_STACK_PADDING 8 | ||
40 | # endif | ||
38 | #else | 41 | #else |
39 | # define TOP_OF_KERNEL_STACK_PADDING 0 | 42 | # define TOP_OF_KERNEL_STACK_PADDING 0 |
40 | #endif | 43 | #endif |
@@ -140,27 +143,11 @@ struct thread_info { | |||
140 | _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ | 143 | _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ |
141 | _TIF_NOHZ) | 144 | _TIF_NOHZ) |
142 | 145 | ||
143 | /* work to do in syscall_trace_leave() */ | ||
144 | #define _TIF_WORK_SYSCALL_EXIT \ | ||
145 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ | ||
146 | _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) | ||
147 | |||
148 | /* work to do on interrupt/exception return */ | ||
149 | #define _TIF_WORK_MASK \ | ||
150 | (0x0000FFFF & \ | ||
151 | ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \ | ||
152 | _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) | ||
153 | |||
154 | /* work to do on any return to user space */ | 146 | /* work to do on any return to user space */ |
155 | #define _TIF_ALLWORK_MASK \ | 147 | #define _TIF_ALLWORK_MASK \ |
156 | ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ | 148 | ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ |
157 | _TIF_NOHZ) | 149 | _TIF_NOHZ) |
158 | 150 | ||
159 | /* Only used for 64 bit */ | ||
160 | #define _TIF_DO_NOTIFY_MASK \ | ||
161 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ | ||
162 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) | ||
163 | |||
164 | /* flags to check in __switch_to() */ | 151 | /* flags to check in __switch_to() */ |
165 | #define _TIF_WORK_CTXSW \ | 152 | #define _TIF_WORK_CTXSW \ |
166 | (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) | 153 | (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c5380bea2a36..c3496619740a 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -112,8 +112,8 @@ asmlinkage void smp_threshold_interrupt(void); | |||
112 | asmlinkage void smp_deferred_error_interrupt(void); | 112 | asmlinkage void smp_deferred_error_interrupt(void); |
113 | #endif | 113 | #endif |
114 | 114 | ||
115 | extern enum ctx_state ist_enter(struct pt_regs *regs); | 115 | extern void ist_enter(struct pt_regs *regs); |
116 | extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state); | 116 | extern void ist_exit(struct pt_regs *regs); |
117 | extern void ist_begin_non_atomic(struct pt_regs *regs); | 117 | extern void ist_begin_non_atomic(struct pt_regs *regs); |
118 | extern void ist_end_non_atomic(void); | 118 | extern void ist_end_non_atomic(void); |
119 | 119 | ||
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index aad56eb3bbe2..6d7c5479bcea 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -21,28 +21,12 @@ extern void disable_TSC(void); | |||
21 | 21 | ||
22 | static inline cycles_t get_cycles(void) | 22 | static inline cycles_t get_cycles(void) |
23 | { | 23 | { |
24 | unsigned long long ret = 0; | ||
25 | |||
26 | #ifndef CONFIG_X86_TSC | 24 | #ifndef CONFIG_X86_TSC |
27 | if (!cpu_has_tsc) | 25 | if (!cpu_has_tsc) |
28 | return 0; | 26 | return 0; |
29 | #endif | 27 | #endif |
30 | rdtscll(ret); | ||
31 | |||
32 | return ret; | ||
33 | } | ||
34 | 28 | ||
35 | static __always_inline cycles_t vget_cycles(void) | 29 | return rdtsc(); |
36 | { | ||
37 | /* | ||
38 | * We only do VDSOs on TSC capable CPUs, so this shouldn't | ||
39 | * access boot_cpu_data (which is not VDSO-safe): | ||
40 | */ | ||
41 | #ifndef CONFIG_X86_TSC | ||
42 | if (!cpu_has_tsc) | ||
43 | return 0; | ||
44 | #endif | ||
45 | return (cycles_t)__native_read_tsc(); | ||
46 | } | 30 | } |
47 | 31 | ||
48 | extern void tsc_init(void); | 32 | extern void tsc_init(void); |
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 1d8de3f3feca..1e491f3af317 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_VM86_H | 1 | #ifndef _ASM_X86_VM86_H |
2 | #define _ASM_X86_VM86_H | 2 | #define _ASM_X86_VM86_H |
3 | 3 | ||
4 | |||
5 | #include <asm/ptrace.h> | 4 | #include <asm/ptrace.h> |
6 | #include <uapi/asm/vm86.h> | 5 | #include <uapi/asm/vm86.h> |
7 | 6 | ||
@@ -28,43 +27,49 @@ struct kernel_vm86_regs { | |||
28 | unsigned short gs, __gsh; | 27 | unsigned short gs, __gsh; |
29 | }; | 28 | }; |
30 | 29 | ||
31 | struct kernel_vm86_struct { | 30 | struct vm86 { |
32 | struct kernel_vm86_regs regs; | 31 | struct vm86plus_struct __user *user_vm86; |
33 | /* | 32 | struct pt_regs regs32; |
34 | * the below part remains on the kernel stack while we are in VM86 mode. | 33 | unsigned long veflags; |
35 | * 'tss.esp0' then contains the address of VM86_TSS_ESP0 below, and when we | 34 | unsigned long veflags_mask; |
36 | * get forced back from VM86, the CPU and "SAVE_ALL" will restore the above | 35 | unsigned long saved_sp0; |
37 | * 'struct kernel_vm86_regs' with the then actual values. | 36 | |
38 | * Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct' | ||
39 | * in kernelspace, hence we need not reget the data from userspace. | ||
40 | */ | ||
41 | #define VM86_TSS_ESP0 flags | ||
42 | unsigned long flags; | 37 | unsigned long flags; |
43 | unsigned long screen_bitmap; | 38 | unsigned long screen_bitmap; |
44 | unsigned long cpu_type; | 39 | unsigned long cpu_type; |
45 | struct revectored_struct int_revectored; | 40 | struct revectored_struct int_revectored; |
46 | struct revectored_struct int21_revectored; | 41 | struct revectored_struct int21_revectored; |
47 | struct vm86plus_info_struct vm86plus; | 42 | struct vm86plus_info_struct vm86plus; |
48 | struct pt_regs *regs32; /* here we save the pointer to the old regs */ | ||
49 | /* | ||
50 | * The below is not part of the structure, but the stack layout continues | ||
51 | * this way. In front of 'return-eip' may be some data, depending on | ||
52 | * compilation, so we don't rely on this and save the pointer to 'oldregs' | ||
53 | * in 'regs32' above. | ||
54 | * However, with GCC-2.7.2 and the current CFLAGS you see exactly this: | ||
55 | |||
56 | long return-eip; from call to vm86() | ||
57 | struct pt_regs oldregs; user space registers as saved by syscall | ||
58 | */ | ||
59 | }; | 43 | }; |
60 | 44 | ||
61 | #ifdef CONFIG_VM86 | 45 | #ifdef CONFIG_VM86 |
62 | 46 | ||
63 | void handle_vm86_fault(struct kernel_vm86_regs *, long); | 47 | void handle_vm86_fault(struct kernel_vm86_regs *, long); |
64 | int handle_vm86_trap(struct kernel_vm86_regs *, long, int); | 48 | int handle_vm86_trap(struct kernel_vm86_regs *, long, int); |
65 | struct pt_regs *save_v86_state(struct kernel_vm86_regs *); | 49 | void save_v86_state(struct kernel_vm86_regs *, int); |
66 | 50 | ||
67 | struct task_struct; | 51 | struct task_struct; |
52 | |||
53 | #define free_vm86(t) do { \ | ||
54 | struct thread_struct *__t = (t); \ | ||
55 | if (__t->vm86 != NULL) { \ | ||
56 | kfree(__t->vm86); \ | ||
57 | __t->vm86 = NULL; \ | ||
58 | } \ | ||
59 | } while (0) | ||
60 | |||
61 | /* | ||
62 | * Support for VM86 programs to request interrupts for | ||
63 | * real mode hardware drivers: | ||
64 | */ | ||
65 | #define FIRST_VM86_IRQ 3 | ||
66 | #define LAST_VM86_IRQ 15 | ||
67 | |||
68 | static inline int invalid_vm86_irq(int irq) | ||
69 | { | ||
70 | return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; | ||
71 | } | ||
72 | |||
68 | void release_vm86_irqs(struct task_struct *); | 73 | void release_vm86_irqs(struct task_struct *); |
69 | 74 | ||
70 | #else | 75 | #else |
@@ -77,6 +82,10 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) | |||
77 | return 0; | 82 | return 0; |
78 | } | 83 | } |
79 | 84 | ||
85 | static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { } | ||
86 | |||
87 | #define free_vm86(t) do { } while(0) | ||
88 | |||
80 | #endif /* CONFIG_VM86 */ | 89 | #endif /* CONFIG_VM86 */ |
81 | 90 | ||
82 | #endif /* _ASM_X86_VM86_H */ | 91 | #endif /* _ASM_X86_VM86_H */ |
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 180a0c3c224d..79887abcb5e1 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h | |||
@@ -37,8 +37,6 @@ | |||
37 | #define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT) | 37 | #define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT) |
38 | #define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */ | 38 | #define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */ |
39 | #define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT) | 39 | #define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT) |
40 | #define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */ | ||
41 | #define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT) | ||
42 | #define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */ | 40 | #define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */ |
43 | #define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT) | 41 | #define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT) |
44 | #define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */ | 42 | #define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0f15af41bd80..514064897d55 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n | |||
23 | CFLAGS_irq.o := -I$(src)/../include/asm/trace | 23 | CFLAGS_irq.o := -I$(src)/../include/asm/trace |
24 | 24 | ||
25 | obj-y := process_$(BITS).o signal.o | 25 | obj-y := process_$(BITS).o signal.o |
26 | obj-$(CONFIG_COMPAT) += signal_compat.o | ||
26 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 27 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
27 | obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o | 28 | obj-y += time.o ioport.o dumpstack.o nmi.o |
29 | obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o | ||
28 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o | 30 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o |
29 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 31 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
30 | obj-y += probe_roms.o | 32 | obj-y += probe_roms.o |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index ede92c3364d3..222a57076039 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -263,7 +263,7 @@ static int apbt_clocksource_register(void) | |||
263 | 263 | ||
264 | /* Verify whether apbt counter works */ | 264 | /* Verify whether apbt counter works */ |
265 | t1 = dw_apb_clocksource_read(clocksource_apbt); | 265 | t1 = dw_apb_clocksource_read(clocksource_apbt); |
266 | rdtscll(start); | 266 | start = rdtsc(); |
267 | 267 | ||
268 | /* | 268 | /* |
269 | * We don't know the TSC frequency yet, but waiting for | 269 | * We don't know the TSC frequency yet, but waiting for |
@@ -273,7 +273,7 @@ static int apbt_clocksource_register(void) | |||
273 | */ | 273 | */ |
274 | do { | 274 | do { |
275 | rep_nop(); | 275 | rep_nop(); |
276 | rdtscll(now); | 276 | now = rdtsc(); |
277 | } while ((now - start) < 200000UL); | 277 | } while ((now - start) < 200000UL); |
278 | 278 | ||
279 | /* APBT is the only always on clocksource, it has to work! */ | 279 | /* APBT is the only always on clocksource, it has to work! */ |
@@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void) | |||
390 | old = dw_apb_clocksource_read(clocksource_apbt); | 390 | old = dw_apb_clocksource_read(clocksource_apbt); |
391 | old += loop; | 391 | old += loop; |
392 | 392 | ||
393 | t1 = __native_read_tsc(); | 393 | t1 = rdtsc(); |
394 | 394 | ||
395 | do { | 395 | do { |
396 | new = dw_apb_clocksource_read(clocksource_apbt); | 396 | new = dw_apb_clocksource_read(clocksource_apbt); |
397 | } while (new < old); | 397 | } while (new < old); |
398 | 398 | ||
399 | t2 = __native_read_tsc(); | 399 | t2 = rdtsc(); |
400 | 400 | ||
401 | shift = 5; | 401 | shift = 5; |
402 | if (unlikely(loop >> shift == 0)) { | 402 | if (unlikely(loop >> shift == 0)) { |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index cde732c1b495..5aba9220a5ac 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -457,7 +457,7 @@ static int lapic_next_deadline(unsigned long delta, | |||
457 | { | 457 | { |
458 | u64 tsc; | 458 | u64 tsc; |
459 | 459 | ||
460 | rdtscll(tsc); | 460 | tsc = rdtsc(); |
461 | wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); | 461 | wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); |
462 | return 0; | 462 | return 0; |
463 | } | 463 | } |
@@ -592,7 +592,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) | |||
592 | unsigned long pm = acpi_pm_read_early(); | 592 | unsigned long pm = acpi_pm_read_early(); |
593 | 593 | ||
594 | if (cpu_has_tsc) | 594 | if (cpu_has_tsc) |
595 | rdtscll(tsc); | 595 | tsc = rdtsc(); |
596 | 596 | ||
597 | switch (lapic_cal_loops++) { | 597 | switch (lapic_cal_loops++) { |
598 | case 0: | 598 | case 0: |
@@ -1209,7 +1209,7 @@ void setup_local_APIC(void) | |||
1209 | long long max_loops = cpu_khz ? cpu_khz : 1000000; | 1209 | long long max_loops = cpu_khz ? cpu_khz : 1000000; |
1210 | 1210 | ||
1211 | if (cpu_has_tsc) | 1211 | if (cpu_has_tsc) |
1212 | rdtscll(tsc); | 1212 | tsc = rdtsc(); |
1213 | 1213 | ||
1214 | if (disable_apic) { | 1214 | if (disable_apic) { |
1215 | disable_ioapic_support(); | 1215 | disable_ioapic_support(); |
@@ -1293,7 +1293,7 @@ void setup_local_APIC(void) | |||
1293 | } | 1293 | } |
1294 | if (queued) { | 1294 | if (queued) { |
1295 | if (cpu_has_tsc && cpu_khz) { | 1295 | if (cpu_has_tsc && cpu_khz) { |
1296 | rdtscll(ntsc); | 1296 | ntsc = rdtsc(); |
1297 | max_loops = (cpu_khz << 10) - (ntsc - tsc); | 1297 | max_loops = (cpu_khz << 10) - (ntsc - tsc); |
1298 | } else | 1298 | } else |
1299 | max_loops--; | 1299 | max_loops--; |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dd3a4baffe50..4a70fc6d400a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
12 | #include <asm/smp.h> | 12 | #include <asm/smp.h> |
13 | #include <asm/pci-direct.h> | 13 | #include <asm/pci-direct.h> |
14 | #include <asm/delay.h> | ||
14 | 15 | ||
15 | #ifdef CONFIG_X86_64 | 16 | #ifdef CONFIG_X86_64 |
16 | # include <asm/mmconfig.h> | 17 | # include <asm/mmconfig.h> |
@@ -114,7 +115,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) | |||
114 | const int K6_BUG_LOOP = 1000000; | 115 | const int K6_BUG_LOOP = 1000000; |
115 | int n; | 116 | int n; |
116 | void (*f_vide)(void); | 117 | void (*f_vide)(void); |
117 | unsigned long d, d2; | 118 | u64 d, d2; |
118 | 119 | ||
119 | printk(KERN_INFO "AMD K6 stepping B detected - "); | 120 | printk(KERN_INFO "AMD K6 stepping B detected - "); |
120 | 121 | ||
@@ -125,10 +126,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c) | |||
125 | 126 | ||
126 | n = K6_BUG_LOOP; | 127 | n = K6_BUG_LOOP; |
127 | f_vide = vide; | 128 | f_vide = vide; |
128 | rdtscl(d); | 129 | d = rdtsc(); |
129 | while (n--) | 130 | while (n--) |
130 | f_vide(); | 131 | f_vide(); |
131 | rdtscl(d2); | 132 | d2 = rdtsc(); |
132 | d = d2-d; | 133 | d = d2-d; |
133 | 134 | ||
134 | if (d > 20*K6_BUG_LOOP) | 135 | if (d > 20*K6_BUG_LOOP) |
@@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) | |||
506 | /* A random value per boot for bit slice [12:upper_bit) */ | 507 | /* A random value per boot for bit slice [12:upper_bit) */ |
507 | va_align.bits = get_random_int() & va_align.mask; | 508 | va_align.bits = get_random_int() & va_align.mask; |
508 | } | 509 | } |
510 | |||
511 | if (cpu_has(c, X86_FEATURE_MWAITX)) | ||
512 | use_mwaitx_delay(); | ||
509 | } | 513 | } |
510 | 514 | ||
511 | static void early_init_amd(struct cpuinfo_x86 *c) | 515 | static void early_init_amd(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb9e5df42dd2..b128808853a2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1185,10 +1185,10 @@ void syscall_init(void) | |||
1185 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | 1185 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. |
1186 | */ | 1186 | */ |
1187 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | 1187 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); |
1188 | wrmsrl(MSR_LSTAR, entry_SYSCALL_64); | 1188 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); |
1189 | 1189 | ||
1190 | #ifdef CONFIG_IA32_EMULATION | 1190 | #ifdef CONFIG_IA32_EMULATION |
1191 | wrmsrl(MSR_CSTAR, entry_SYSCALL_compat); | 1191 | wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); |
1192 | /* | 1192 | /* |
1193 | * This only works on Intel CPUs. | 1193 | * This only works on Intel CPUs. |
1194 | * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. | 1194 | * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. |
@@ -1199,7 +1199,7 @@ void syscall_init(void) | |||
1199 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); | 1199 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); |
1200 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); | 1200 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); |
1201 | #else | 1201 | #else |
1202 | wrmsrl(MSR_CSTAR, ignore_sysret); | 1202 | wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); |
1203 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); | 1203 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); |
1204 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); | 1204 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); |
1205 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); | 1205 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0f8f21c8284a..9d014b82a124 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -127,7 +127,7 @@ void mce_setup(struct mce *m) | |||
127 | { | 127 | { |
128 | memset(m, 0, sizeof(struct mce)); | 128 | memset(m, 0, sizeof(struct mce)); |
129 | m->cpu = m->extcpu = smp_processor_id(); | 129 | m->cpu = m->extcpu = smp_processor_id(); |
130 | rdtscll(m->tsc); | 130 | m->tsc = rdtsc(); |
131 | /* We hope get_seconds stays lockless */ | 131 | /* We hope get_seconds stays lockless */ |
132 | m->time = get_seconds(); | 132 | m->time = get_seconds(); |
133 | m->cpuvendor = boot_cpu_data.x86_vendor; | 133 | m->cpuvendor = boot_cpu_data.x86_vendor; |
@@ -974,7 +974,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
974 | { | 974 | { |
975 | struct mca_config *cfg = &mca_cfg; | 975 | struct mca_config *cfg = &mca_cfg; |
976 | struct mce m, *final; | 976 | struct mce m, *final; |
977 | enum ctx_state prev_state; | ||
978 | int i; | 977 | int i; |
979 | int worst = 0; | 978 | int worst = 0; |
980 | int severity; | 979 | int severity; |
@@ -1000,7 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1000 | int flags = MF_ACTION_REQUIRED; | 999 | int flags = MF_ACTION_REQUIRED; |
1001 | int lmce = 0; | 1000 | int lmce = 0; |
1002 | 1001 | ||
1003 | prev_state = ist_enter(regs); | 1002 | ist_enter(regs); |
1004 | 1003 | ||
1005 | this_cpu_inc(mce_exception_count); | 1004 | this_cpu_inc(mce_exception_count); |
1006 | 1005 | ||
@@ -1166,7 +1165,7 @@ out: | |||
1166 | local_irq_disable(); | 1165 | local_irq_disable(); |
1167 | ist_end_non_atomic(); | 1166 | ist_end_non_atomic(); |
1168 | done: | 1167 | done: |
1169 | ist_exit(regs, prev_state); | 1168 | ist_exit(regs); |
1170 | } | 1169 | } |
1171 | EXPORT_SYMBOL_GPL(do_machine_check); | 1170 | EXPORT_SYMBOL_GPL(do_machine_check); |
1172 | 1171 | ||
@@ -1754,7 +1753,7 @@ static void collect_tscs(void *data) | |||
1754 | { | 1753 | { |
1755 | unsigned long *cpu_tsc = (unsigned long *)data; | 1754 | unsigned long *cpu_tsc = (unsigned long *)data; |
1756 | 1755 | ||
1757 | rdtscll(cpu_tsc[smp_processor_id()]); | 1756 | cpu_tsc[smp_processor_id()] = rdtsc(); |
1758 | } | 1757 | } |
1759 | 1758 | ||
1760 | static int mce_apei_read_done; | 1759 | static int mce_apei_read_done; |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 737b0ad4e61a..12402e10aeff 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly; | |||
19 | /* Machine check handler for Pentium class Intel CPUs: */ | 19 | /* Machine check handler for Pentium class Intel CPUs: */ |
20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
21 | { | 21 | { |
22 | enum ctx_state prev_state; | ||
23 | u32 loaddr, hi, lotype; | 22 | u32 loaddr, hi, lotype; |
24 | 23 | ||
25 | prev_state = ist_enter(regs); | 24 | ist_enter(regs); |
26 | 25 | ||
27 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | 26 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); |
28 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | 27 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); |
@@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) | |||
39 | 38 | ||
40 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); | 39 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
41 | 40 | ||
42 | ist_exit(regs, prev_state); | 41 | ist_exit(regs); |
43 | } | 42 | } |
44 | 43 | ||
45 | /* Set up machine check reporting for processors with Intel style MCE: */ | 44 | /* Set up machine check reporting for processors with Intel style MCE: */ |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 44f138296fbe..01dd8702880b 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -15,12 +15,12 @@ | |||
15 | /* Machine check handler for WinChip C6: */ | 15 | /* Machine check handler for WinChip C6: */ |
16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
17 | { | 17 | { |
18 | enum ctx_state prev_state = ist_enter(regs); | 18 | ist_enter(regs); |
19 | 19 | ||
20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
21 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); | 21 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
22 | 22 | ||
23 | ist_exit(regs, prev_state); | 23 | ist_exit(regs); |
24 | } | 24 | } |
25 | 25 | ||
26 | /* Set up machine check reporting on the Winchip C6 series */ | 26 | /* Set up machine check reporting on the Winchip C6 series */ |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f56cf074d01a..66dd3fe99b82 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int segment) | |||
2179 | int idx = segment >> 3; | 2179 | int idx = segment >> 3; |
2180 | 2180 | ||
2181 | if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { | 2181 | if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { |
2182 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
2182 | struct ldt_struct *ldt; | 2183 | struct ldt_struct *ldt; |
2183 | 2184 | ||
2184 | if (idx > LDT_ENTRIES) | 2185 | if (idx > LDT_ENTRIES) |
@@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int segment) | |||
2190 | return 0; | 2191 | return 0; |
2191 | 2192 | ||
2192 | desc = &ldt->entries[idx]; | 2193 | desc = &ldt->entries[idx]; |
2194 | #else | ||
2195 | return 0; | ||
2196 | #endif | ||
2193 | } else { | 2197 | } else { |
2194 | if (idx > GDT_ENTRIES) | 2198 | if (idx > GDT_ENTRIES) |
2195 | return 0; | 2199 | return 0; |
@@ -2200,7 +2204,7 @@ static unsigned long get_segment_base(unsigned int segment) | |||
2200 | return get_desc_base(desc); | 2204 | return get_desc_base(desc); |
2201 | } | 2205 | } |
2202 | 2206 | ||
2203 | #ifdef CONFIG_COMPAT | 2207 | #ifdef CONFIG_IA32_EMULATION |
2204 | 2208 | ||
2205 | #include <asm/compat.h> | 2209 | #include <asm/compat.h> |
2206 | 2210 | ||
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index ce95676abd60..4d38416e2a7f 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c | |||
@@ -110,7 +110,7 @@ static void init_espfix_random(void) | |||
110 | */ | 110 | */ |
111 | if (!arch_get_random_long(&rand)) { | 111 | if (!arch_get_random_long(&rand)) { |
112 | /* The constant is an arbitrary large prime */ | 112 | /* The constant is an arbitrary large prime */ |
113 | rdtscll(rand); | 113 | rand = rdtsc(); |
114 | rand *= 0xc345c6b72fd16123UL; | 114 | rand *= 0xc345c6b72fd16123UL; |
115 | } | 115 | } |
116 | 116 | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 10757d0a3fcf..f75c5908c7a6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -735,7 +735,7 @@ static int hpet_clocksource_register(void) | |||
735 | 735 | ||
736 | /* Verify whether hpet counter works */ | 736 | /* Verify whether hpet counter works */ |
737 | t1 = hpet_readl(HPET_COUNTER); | 737 | t1 = hpet_readl(HPET_COUNTER); |
738 | rdtscll(start); | 738 | start = rdtsc(); |
739 | 739 | ||
740 | /* | 740 | /* |
741 | * We don't know the TSC frequency yet, but waiting for | 741 | * We don't know the TSC frequency yet, but waiting for |
@@ -745,7 +745,7 @@ static int hpet_clocksource_register(void) | |||
745 | */ | 745 | */ |
746 | do { | 746 | do { |
747 | rep_nop(); | 747 | rep_nop(); |
748 | rdtscll(now); | 748 | now = rdtsc(); |
749 | } while ((now - start) < 200000UL); | 749 | } while ((now - start) < 200000UL); |
750 | 750 | ||
751 | if (t1 == hpet_readl(HPET_COUNTER)) { | 751 | if (t1 == hpet_readl(HPET_COUNTER)) { |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c7dfe1be784e..4616672a4049 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -216,8 +216,23 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
216 | unsigned vector = ~regs->orig_ax; | 216 | unsigned vector = ~regs->orig_ax; |
217 | unsigned irq; | 217 | unsigned irq; |
218 | 218 | ||
219 | /* | ||
220 | * NB: Unlike exception entries, IRQ entries do not reliably | ||
221 | * handle context tracking in the low-level entry code. This is | ||
222 | * because syscall entries execute briefly with IRQs on before | ||
223 | * updating context tracking state, so we can take an IRQ from | ||
224 | * kernel mode with CONTEXT_USER. The low-level entry code only | ||
225 | * updates the context if we came from user mode, so we won't | ||
226 | * switch to CONTEXT_KERNEL. We'll fix that once the syscall | ||
227 | * code is cleaned up enough that we can cleanly defer enabling | ||
228 | * IRQs. | ||
229 | */ | ||
230 | |||
219 | entering_irq(); | 231 | entering_irq(); |
220 | 232 | ||
233 | /* entering_irq() tells RCU that we're not quiescent. Check it. */ | ||
234 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); | ||
235 | |||
221 | irq = __this_cpu_read(vector_irq[vector]); | 236 | irq = __this_cpu_read(vector_irq[vector]); |
222 | 237 | ||
223 | if (!handle_irq(irq, regs)) { | 238 | if (!handle_irq(irq, regs)) { |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index d05bd2e2ee91..697f90db0e37 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) | |||
110 | a->handler, whole_msecs, decimal_msecs); | 110 | a->handler, whole_msecs, decimal_msecs); |
111 | } | 111 | } |
112 | 112 | ||
113 | static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | 113 | static int nmi_handle(unsigned int type, struct pt_regs *regs) |
114 | { | 114 | { |
115 | struct nmi_desc *desc = nmi_to_desc(type); | 115 | struct nmi_desc *desc = nmi_to_desc(type); |
116 | struct nmiaction *a; | 116 | struct nmiaction *a; |
@@ -213,7 +213,7 @@ static void | |||
213 | pci_serr_error(unsigned char reason, struct pt_regs *regs) | 213 | pci_serr_error(unsigned char reason, struct pt_regs *regs) |
214 | { | 214 | { |
215 | /* check to see if anyone registered against these types of errors */ | 215 | /* check to see if anyone registered against these types of errors */ |
216 | if (nmi_handle(NMI_SERR, regs, false)) | 216 | if (nmi_handle(NMI_SERR, regs)) |
217 | return; | 217 | return; |
218 | 218 | ||
219 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", | 219 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", |
@@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
247 | unsigned long i; | 247 | unsigned long i; |
248 | 248 | ||
249 | /* check to see if anyone registered against these types of errors */ | 249 | /* check to see if anyone registered against these types of errors */ |
250 | if (nmi_handle(NMI_IO_CHECK, regs, false)) | 250 | if (nmi_handle(NMI_IO_CHECK, regs)) |
251 | return; | 251 | return; |
252 | 252 | ||
253 | pr_emerg( | 253 | pr_emerg( |
@@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
284 | * as only the first one is ever run (unless it can actually determine | 284 | * as only the first one is ever run (unless it can actually determine |
285 | * if it caused the NMI) | 285 | * if it caused the NMI) |
286 | */ | 286 | */ |
287 | handled = nmi_handle(NMI_UNKNOWN, regs, false); | 287 | handled = nmi_handle(NMI_UNKNOWN, regs); |
288 | if (handled) { | 288 | if (handled) { |
289 | __this_cpu_add(nmi_stats.unknown, handled); | 289 | __this_cpu_add(nmi_stats.unknown, handled); |
290 | return; | 290 | return; |
@@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs) | |||
332 | 332 | ||
333 | __this_cpu_write(last_nmi_rip, regs->ip); | 333 | __this_cpu_write(last_nmi_rip, regs->ip); |
334 | 334 | ||
335 | handled = nmi_handle(NMI_LOCAL, regs, b2b); | 335 | handled = nmi_handle(NMI_LOCAL, regs); |
336 | __this_cpu_add(nmi_stats.normal, handled); | 336 | __this_cpu_add(nmi_stats.normal, handled); |
337 | if (handled) { | 337 | if (handled) { |
338 | /* | 338 | /* |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 58bcfb67c01f..f68e48f5f6c2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { | |||
351 | .wbinvd = native_wbinvd, | 351 | .wbinvd = native_wbinvd, |
352 | .read_msr = native_read_msr_safe, | 352 | .read_msr = native_read_msr_safe, |
353 | .write_msr = native_write_msr_safe, | 353 | .write_msr = native_write_msr_safe, |
354 | .read_tsc = native_read_tsc, | ||
355 | .read_pmc = native_read_pmc, | 354 | .read_pmc = native_read_pmc, |
356 | .read_tscp = native_read_tscp, | ||
357 | .load_tr_desc = native_load_tr_desc, | 355 | .load_tr_desc = native_load_tr_desc, |
358 | .set_ldt = native_set_ldt, | 356 | .set_ldt = native_set_ldt, |
359 | .load_gdt = native_load_gdt, | 357 | .load_gdt = native_load_gdt, |
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index e1b013696dde..c89f50a76e97 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); | |||
10 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); | 10 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); |
11 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | 11 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); |
12 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | 12 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); |
13 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); | ||
14 | 13 | ||
15 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) | 14 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) |
16 | DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); | 15 | DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); |
@@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
52 | PATCH_SITE(pv_mmu_ops, read_cr3); | 51 | PATCH_SITE(pv_mmu_ops, read_cr3); |
53 | PATCH_SITE(pv_mmu_ops, write_cr3); | 52 | PATCH_SITE(pv_mmu_ops, write_cr3); |
54 | PATCH_SITE(pv_cpu_ops, clts); | 53 | PATCH_SITE(pv_cpu_ops, clts); |
55 | PATCH_SITE(pv_cpu_ops, read_tsc); | ||
56 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) | 54 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) |
57 | case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): | 55 | case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): |
58 | if (pv_is_native_spin_unlock()) { | 56 | if (pv_is_native_spin_unlock()) { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d83740ab85b0..6d0e62ae8516 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <asm/mce.h> | 32 | #include <asm/mce.h> |
33 | #include <asm/vm86.h> | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | 36 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
@@ -111,6 +112,8 @@ void exit_thread(void) | |||
111 | kfree(bp); | 112 | kfree(bp); |
112 | } | 113 | } |
113 | 114 | ||
115 | free_vm86(t); | ||
116 | |||
114 | fpu__drop(fpu); | 117 | fpu__drop(fpu); |
115 | } | 118 | } |
116 | 119 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f73c962fe636..c13df2c735f8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <asm/syscalls.h> | 53 | #include <asm/syscalls.h> |
54 | #include <asm/debugreg.h> | 54 | #include <asm/debugreg.h> |
55 | #include <asm/switch_to.h> | 55 | #include <asm/switch_to.h> |
56 | #include <asm/vm86.h> | ||
56 | 57 | ||
57 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 58 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
58 | asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); | 59 | asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f6b916387590..3c1bbcf12924 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
121 | void release_thread(struct task_struct *dead_task) | 121 | void release_thread(struct task_struct *dead_task) |
122 | { | 122 | { |
123 | if (dead_task->mm) { | 123 | if (dead_task->mm) { |
124 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
124 | if (dead_task->mm->context.ldt) { | 125 | if (dead_task->mm->context.ldt) { |
125 | pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", | 126 | pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", |
126 | dead_task->comm, | 127 | dead_task->comm, |
@@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task) | |||
128 | dead_task->mm->context.ldt->size); | 129 | dead_task->mm->context.ldt->size); |
129 | BUG(); | 130 | BUG(); |
130 | } | 131 | } |
132 | #endif | ||
131 | } | 133 | } |
132 | } | 134 | } |
133 | 135 | ||
@@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
248 | __USER_CS, __USER_DS, 0); | 250 | __USER_CS, __USER_DS, 0); |
249 | } | 251 | } |
250 | 252 | ||
251 | #ifdef CONFIG_IA32_EMULATION | 253 | #ifdef CONFIG_COMPAT |
252 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) | 254 | void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) |
253 | { | 255 | { |
254 | start_thread_common(regs, new_ip, new_sp, | 256 | start_thread_common(regs, new_ip, new_sp, |
255 | test_thread_flag(TIF_X32) | 257 | test_thread_flag(TIF_X32) |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9be72bc3613f..558f50edebca 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -37,12 +37,10 @@ | |||
37 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
38 | #include <asm/hw_breakpoint.h> | 38 | #include <asm/hw_breakpoint.h> |
39 | #include <asm/traps.h> | 39 | #include <asm/traps.h> |
40 | #include <asm/syscall.h> | ||
40 | 41 | ||
41 | #include "tls.h" | 42 | #include "tls.h" |
42 | 43 | ||
43 | #define CREATE_TRACE_POINTS | ||
44 | #include <trace/events/syscalls.h> | ||
45 | |||
46 | enum x86_regset { | 44 | enum x86_regset { |
47 | REGSET_GENERAL, | 45 | REGSET_GENERAL, |
48 | REGSET_FP, | 46 | REGSET_FP, |
@@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target, | |||
1123 | return ret; | 1121 | return ret; |
1124 | } | 1122 | } |
1125 | 1123 | ||
1124 | static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request, | ||
1125 | compat_ulong_t caddr, compat_ulong_t cdata) | ||
1126 | { | ||
1127 | unsigned long addr = caddr; | ||
1128 | unsigned long data = cdata; | ||
1129 | void __user *datap = compat_ptr(data); | ||
1130 | int ret; | ||
1131 | __u32 val; | ||
1132 | |||
1133 | switch (request) { | ||
1134 | case PTRACE_PEEKUSR: | ||
1135 | ret = getreg32(child, addr, &val); | ||
1136 | if (ret == 0) | ||
1137 | ret = put_user(val, (__u32 __user *)datap); | ||
1138 | break; | ||
1139 | |||
1140 | case PTRACE_POKEUSR: | ||
1141 | ret = putreg32(child, addr, data); | ||
1142 | break; | ||
1143 | |||
1144 | case PTRACE_GETREGS: /* Get all gp regs from the child. */ | ||
1145 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1146 | REGSET_GENERAL, | ||
1147 | 0, sizeof(struct user_regs_struct32), | ||
1148 | datap); | ||
1149 | |||
1150 | case PTRACE_SETREGS: /* Set all gp regs in the child. */ | ||
1151 | return copy_regset_from_user(child, &user_x86_32_view, | ||
1152 | REGSET_GENERAL, 0, | ||
1153 | sizeof(struct user_regs_struct32), | ||
1154 | datap); | ||
1155 | |||
1156 | case PTRACE_GETFPREGS: /* Get the child FPU state. */ | ||
1157 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1158 | REGSET_FP, 0, | ||
1159 | sizeof(struct user_i387_ia32_struct), | ||
1160 | datap); | ||
1161 | |||
1162 | case PTRACE_SETFPREGS: /* Set the child FPU state. */ | ||
1163 | return copy_regset_from_user( | ||
1164 | child, &user_x86_32_view, REGSET_FP, | ||
1165 | 0, sizeof(struct user_i387_ia32_struct), datap); | ||
1166 | |||
1167 | case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ | ||
1168 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1169 | REGSET_XFP, 0, | ||
1170 | sizeof(struct user32_fxsr_struct), | ||
1171 | datap); | ||
1172 | |||
1173 | case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ | ||
1174 | return copy_regset_from_user(child, &user_x86_32_view, | ||
1175 | REGSET_XFP, 0, | ||
1176 | sizeof(struct user32_fxsr_struct), | ||
1177 | datap); | ||
1178 | |||
1179 | case PTRACE_GET_THREAD_AREA: | ||
1180 | case PTRACE_SET_THREAD_AREA: | ||
1181 | return arch_ptrace(child, request, addr, data); | ||
1182 | |||
1183 | default: | ||
1184 | return compat_ptrace_request(child, request, addr, data); | ||
1185 | } | ||
1186 | |||
1187 | return ret; | ||
1188 | } | ||
1189 | #endif /* CONFIG_IA32_EMULATION */ | ||
1190 | |||
1126 | #ifdef CONFIG_X86_X32_ABI | 1191 | #ifdef CONFIG_X86_X32_ABI |
1127 | static long x32_arch_ptrace(struct task_struct *child, | 1192 | static long x32_arch_ptrace(struct task_struct *child, |
1128 | compat_long_t request, compat_ulong_t caddr, | 1193 | compat_long_t request, compat_ulong_t caddr, |
@@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child, | |||
1211 | } | 1276 | } |
1212 | #endif | 1277 | #endif |
1213 | 1278 | ||
1279 | #ifdef CONFIG_COMPAT | ||
1214 | long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | 1280 | long compat_arch_ptrace(struct task_struct *child, compat_long_t request, |
1215 | compat_ulong_t caddr, compat_ulong_t cdata) | 1281 | compat_ulong_t caddr, compat_ulong_t cdata) |
1216 | { | 1282 | { |
1217 | unsigned long addr = caddr; | ||
1218 | unsigned long data = cdata; | ||
1219 | void __user *datap = compat_ptr(data); | ||
1220 | int ret; | ||
1221 | __u32 val; | ||
1222 | |||
1223 | #ifdef CONFIG_X86_X32_ABI | 1283 | #ifdef CONFIG_X86_X32_ABI |
1224 | if (!is_ia32_task()) | 1284 | if (!is_ia32_task()) |
1225 | return x32_arch_ptrace(child, request, caddr, cdata); | 1285 | return x32_arch_ptrace(child, request, caddr, cdata); |
1226 | #endif | 1286 | #endif |
1227 | 1287 | #ifdef CONFIG_IA32_EMULATION | |
1228 | switch (request) { | 1288 | return ia32_arch_ptrace(child, request, caddr, cdata); |
1229 | case PTRACE_PEEKUSR: | 1289 | #else |
1230 | ret = getreg32(child, addr, &val); | 1290 | return 0; |
1231 | if (ret == 0) | 1291 | #endif |
1232 | ret = put_user(val, (__u32 __user *)datap); | ||
1233 | break; | ||
1234 | |||
1235 | case PTRACE_POKEUSR: | ||
1236 | ret = putreg32(child, addr, data); | ||
1237 | break; | ||
1238 | |||
1239 | case PTRACE_GETREGS: /* Get all gp regs from the child. */ | ||
1240 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1241 | REGSET_GENERAL, | ||
1242 | 0, sizeof(struct user_regs_struct32), | ||
1243 | datap); | ||
1244 | |||
1245 | case PTRACE_SETREGS: /* Set all gp regs in the child. */ | ||
1246 | return copy_regset_from_user(child, &user_x86_32_view, | ||
1247 | REGSET_GENERAL, 0, | ||
1248 | sizeof(struct user_regs_struct32), | ||
1249 | datap); | ||
1250 | |||
1251 | case PTRACE_GETFPREGS: /* Get the child FPU state. */ | ||
1252 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1253 | REGSET_FP, 0, | ||
1254 | sizeof(struct user_i387_ia32_struct), | ||
1255 | datap); | ||
1256 | |||
1257 | case PTRACE_SETFPREGS: /* Set the child FPU state. */ | ||
1258 | return copy_regset_from_user( | ||
1259 | child, &user_x86_32_view, REGSET_FP, | ||
1260 | 0, sizeof(struct user_i387_ia32_struct), datap); | ||
1261 | |||
1262 | case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ | ||
1263 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1264 | REGSET_XFP, 0, | ||
1265 | sizeof(struct user32_fxsr_struct), | ||
1266 | datap); | ||
1267 | |||
1268 | case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ | ||
1269 | return copy_regset_from_user(child, &user_x86_32_view, | ||
1270 | REGSET_XFP, 0, | ||
1271 | sizeof(struct user32_fxsr_struct), | ||
1272 | datap); | ||
1273 | |||
1274 | case PTRACE_GET_THREAD_AREA: | ||
1275 | case PTRACE_SET_THREAD_AREA: | ||
1276 | return arch_ptrace(child, request, addr, data); | ||
1277 | |||
1278 | default: | ||
1279 | return compat_ptrace_request(child, request, addr, data); | ||
1280 | } | ||
1281 | |||
1282 | return ret; | ||
1283 | } | 1292 | } |
1284 | 1293 | #endif /* CONFIG_COMPAT */ | |
1285 | #endif /* CONFIG_IA32_EMULATION */ | ||
1286 | 1294 | ||
1287 | #ifdef CONFIG_X86_64 | 1295 | #ifdef CONFIG_X86_64 |
1288 | 1296 | ||
@@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | |||
1434 | /* Send us the fake SIGTRAP */ | 1442 | /* Send us the fake SIGTRAP */ |
1435 | force_sig_info(SIGTRAP, &info, tsk); | 1443 | force_sig_info(SIGTRAP, &info, tsk); |
1436 | } | 1444 | } |
1437 | |||
1438 | static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) | ||
1439 | { | ||
1440 | #ifdef CONFIG_X86_64 | ||
1441 | if (arch == AUDIT_ARCH_X86_64) { | ||
1442 | audit_syscall_entry(regs->orig_ax, regs->di, | ||
1443 | regs->si, regs->dx, regs->r10); | ||
1444 | } else | ||
1445 | #endif | ||
1446 | { | ||
1447 | audit_syscall_entry(regs->orig_ax, regs->bx, | ||
1448 | regs->cx, regs->dx, regs->si); | ||
1449 | } | ||
1450 | } | ||
1451 | |||
1452 | /* | ||
1453 | * We can return 0 to resume the syscall or anything else to go to phase | ||
1454 | * 2. If we resume the syscall, we need to put something appropriate in | ||
1455 | * regs->orig_ax. | ||
1456 | * | ||
1457 | * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax | ||
1458 | * are fully functional. | ||
1459 | * | ||
1460 | * For phase 2's benefit, our return value is: | ||
1461 | * 0: resume the syscall | ||
1462 | * 1: go to phase 2; no seccomp phase 2 needed | ||
1463 | * anything else: go to phase 2; pass return value to seccomp | ||
1464 | */ | ||
1465 | unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) | ||
1466 | { | ||
1467 | unsigned long ret = 0; | ||
1468 | u32 work; | ||
1469 | |||
1470 | BUG_ON(regs != task_pt_regs(current)); | ||
1471 | |||
1472 | work = ACCESS_ONCE(current_thread_info()->flags) & | ||
1473 | _TIF_WORK_SYSCALL_ENTRY; | ||
1474 | |||
1475 | /* | ||
1476 | * If TIF_NOHZ is set, we are required to call user_exit() before | ||
1477 | * doing anything that could touch RCU. | ||
1478 | */ | ||
1479 | if (work & _TIF_NOHZ) { | ||
1480 | user_exit(); | ||
1481 | work &= ~_TIF_NOHZ; | ||
1482 | } | ||
1483 | |||
1484 | #ifdef CONFIG_SECCOMP | ||
1485 | /* | ||
1486 | * Do seccomp first -- it should minimize exposure of other | ||
1487 | * code, and keeping seccomp fast is probably more valuable | ||
1488 | * than the rest of this. | ||
1489 | */ | ||
1490 | if (work & _TIF_SECCOMP) { | ||
1491 | struct seccomp_data sd; | ||
1492 | |||
1493 | sd.arch = arch; | ||
1494 | sd.nr = regs->orig_ax; | ||
1495 | sd.instruction_pointer = regs->ip; | ||
1496 | #ifdef CONFIG_X86_64 | ||
1497 | if (arch == AUDIT_ARCH_X86_64) { | ||
1498 | sd.args[0] = regs->di; | ||
1499 | sd.args[1] = regs->si; | ||
1500 | sd.args[2] = regs->dx; | ||
1501 | sd.args[3] = regs->r10; | ||
1502 | sd.args[4] = regs->r8; | ||
1503 | sd.args[5] = regs->r9; | ||
1504 | } else | ||
1505 | #endif | ||
1506 | { | ||
1507 | sd.args[0] = regs->bx; | ||
1508 | sd.args[1] = regs->cx; | ||
1509 | sd.args[2] = regs->dx; | ||
1510 | sd.args[3] = regs->si; | ||
1511 | sd.args[4] = regs->di; | ||
1512 | sd.args[5] = regs->bp; | ||
1513 | } | ||
1514 | |||
1515 | BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); | ||
1516 | BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); | ||
1517 | |||
1518 | ret = seccomp_phase1(&sd); | ||
1519 | if (ret == SECCOMP_PHASE1_SKIP) { | ||
1520 | regs->orig_ax = -1; | ||
1521 | ret = 0; | ||
1522 | } else if (ret != SECCOMP_PHASE1_OK) { | ||
1523 | return ret; /* Go directly to phase 2 */ | ||
1524 | } | ||
1525 | |||
1526 | work &= ~_TIF_SECCOMP; | ||
1527 | } | ||
1528 | #endif | ||
1529 | |||
1530 | /* Do our best to finish without phase 2. */ | ||
1531 | if (work == 0) | ||
1532 | return ret; /* seccomp and/or nohz only (ret == 0 here) */ | ||
1533 | |||
1534 | #ifdef CONFIG_AUDITSYSCALL | ||
1535 | if (work == _TIF_SYSCALL_AUDIT) { | ||
1536 | /* | ||
1537 | * If there is no more work to be done except auditing, | ||
1538 | * then audit in phase 1. Phase 2 always audits, so, if | ||
1539 | * we audit here, then we can't go on to phase 2. | ||
1540 | */ | ||
1541 | do_audit_syscall_entry(regs, arch); | ||
1542 | return 0; | ||
1543 | } | ||
1544 | #endif | ||
1545 | |||
1546 | return 1; /* Something is enabled that we can't handle in phase 1 */ | ||
1547 | } | ||
1548 | |||
1549 | /* Returns the syscall nr to run (which should match regs->orig_ax). */ | ||
1550 | long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, | ||
1551 | unsigned long phase1_result) | ||
1552 | { | ||
1553 | long ret = 0; | ||
1554 | u32 work = ACCESS_ONCE(current_thread_info()->flags) & | ||
1555 | _TIF_WORK_SYSCALL_ENTRY; | ||
1556 | |||
1557 | BUG_ON(regs != task_pt_regs(current)); | ||
1558 | |||
1559 | /* | ||
1560 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
1561 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
1562 | * If user-mode had set TF itself, then it's still clear from | ||
1563 | * do_debug() and we need to set it again to restore the user | ||
1564 | * state. If we entered on the slow path, TF was already set. | ||
1565 | */ | ||
1566 | if (work & _TIF_SINGLESTEP) | ||
1567 | regs->flags |= X86_EFLAGS_TF; | ||
1568 | |||
1569 | #ifdef CONFIG_SECCOMP | ||
1570 | /* | ||
1571 | * Call seccomp_phase2 before running the other hooks so that | ||
1572 | * they can see any changes made by a seccomp tracer. | ||
1573 | */ | ||
1574 | if (phase1_result > 1 && seccomp_phase2(phase1_result)) { | ||
1575 | /* seccomp failures shouldn't expose any additional code. */ | ||
1576 | return -1; | ||
1577 | } | ||
1578 | #endif | ||
1579 | |||
1580 | if (unlikely(work & _TIF_SYSCALL_EMU)) | ||
1581 | ret = -1L; | ||
1582 | |||
1583 | if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && | ||
1584 | tracehook_report_syscall_entry(regs)) | ||
1585 | ret = -1L; | ||
1586 | |||
1587 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | ||
1588 | trace_sys_enter(regs, regs->orig_ax); | ||
1589 | |||
1590 | do_audit_syscall_entry(regs, arch); | ||
1591 | |||
1592 | return ret ?: regs->orig_ax; | ||
1593 | } | ||
1594 | |||
1595 | long syscall_trace_enter(struct pt_regs *regs) | ||
1596 | { | ||
1597 | u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; | ||
1598 | unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); | ||
1599 | |||
1600 | if (phase1_result == 0) | ||
1601 | return regs->orig_ax; | ||
1602 | else | ||
1603 | return syscall_trace_enter_phase2(regs, arch, phase1_result); | ||
1604 | } | ||
1605 | |||
1606 | void syscall_trace_leave(struct pt_regs *regs) | ||
1607 | { | ||
1608 | bool step; | ||
1609 | |||
1610 | /* | ||
1611 | * We may come here right after calling schedule_user() | ||
1612 | * or do_notify_resume(), in which case we can be in RCU | ||
1613 | * user mode. | ||
1614 | */ | ||
1615 | user_exit(); | ||
1616 | |||
1617 | audit_syscall_exit(regs); | ||
1618 | |||
1619 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | ||
1620 | trace_sys_exit(regs, regs->ax); | ||
1621 | |||
1622 | /* | ||
1623 | * If TIF_SYSCALL_EMU is set, we only get here because of | ||
1624 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | ||
1625 | * We already reported this syscall instruction in | ||
1626 | * syscall_trace_enter(). | ||
1627 | */ | ||
1628 | step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && | ||
1629 | !test_thread_flag(TIF_SYSCALL_EMU); | ||
1630 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) | ||
1631 | tracehook_report_syscall_exit(regs, step); | ||
1632 | |||
1633 | user_enter(); | ||
1634 | } | ||
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 71820c42b6ce..da52e6bb5c7f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -31,11 +31,11 @@ | |||
31 | #include <asm/vdso.h> | 31 | #include <asm/vdso.h> |
32 | #include <asm/mce.h> | 32 | #include <asm/mce.h> |
33 | #include <asm/sighandling.h> | 33 | #include <asm/sighandling.h> |
34 | #include <asm/vm86.h> | ||
34 | 35 | ||
35 | #ifdef CONFIG_X86_64 | 36 | #ifdef CONFIG_X86_64 |
36 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
37 | #include <asm/ia32_unistd.h> | 38 | #include <asm/ia32_unistd.h> |
38 | #include <asm/sys_ia32.h> | ||
39 | #endif /* CONFIG_X86_64 */ | 39 | #endif /* CONFIG_X86_64 */ |
40 | 40 | ||
41 | #include <asm/syscall.h> | 41 | #include <asm/syscall.h> |
@@ -632,6 +632,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
632 | bool stepping, failed; | 632 | bool stepping, failed; |
633 | struct fpu *fpu = ¤t->thread.fpu; | 633 | struct fpu *fpu = ¤t->thread.fpu; |
634 | 634 | ||
635 | if (v8086_mode(regs)) | ||
636 | save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); | ||
637 | |||
635 | /* Are we from a system call? */ | 638 | /* Are we from a system call? */ |
636 | if (syscall_get_nr(current, regs) >= 0) { | 639 | if (syscall_get_nr(current, regs) >= 0) { |
637 | /* If so, check system call restarting.. */ | 640 | /* If so, check system call restarting.. */ |
@@ -697,7 +700,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
697 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 700 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
698 | * mistake. | 701 | * mistake. |
699 | */ | 702 | */ |
700 | static void do_signal(struct pt_regs *regs) | 703 | void do_signal(struct pt_regs *regs) |
701 | { | 704 | { |
702 | struct ksignal ksig; | 705 | struct ksignal ksig; |
703 | 706 | ||
@@ -732,32 +735,6 @@ static void do_signal(struct pt_regs *regs) | |||
732 | restore_saved_sigmask(); | 735 | restore_saved_sigmask(); |
733 | } | 736 | } |
734 | 737 | ||
735 | /* | ||
736 | * notification of userspace execution resumption | ||
737 | * - triggered by the TIF_WORK_MASK flags | ||
738 | */ | ||
739 | __visible void | ||
740 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | ||
741 | { | ||
742 | user_exit(); | ||
743 | |||
744 | if (thread_info_flags & _TIF_UPROBE) | ||
745 | uprobe_notify_resume(regs); | ||
746 | |||
747 | /* deal with pending signal delivery */ | ||
748 | if (thread_info_flags & _TIF_SIGPENDING) | ||
749 | do_signal(regs); | ||
750 | |||
751 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
752 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
753 | tracehook_notify_resume(regs); | ||
754 | } | ||
755 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | ||
756 | fire_user_return_notifiers(); | ||
757 | |||
758 | user_enter(); | ||
759 | } | ||
760 | |||
761 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 738 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
762 | { | 739 | { |
763 | struct task_struct *me = current; | 740 | struct task_struct *me = current; |
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c new file mode 100644 index 000000000000..dc3c0b1c816f --- /dev/null +++ b/arch/x86/kernel/signal_compat.c | |||
@@ -0,0 +1,95 @@ | |||
1 | #include <linux/compat.h> | ||
2 | #include <linux/uaccess.h> | ||
3 | |||
4 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) | ||
5 | { | ||
6 | int err = 0; | ||
7 | bool ia32 = test_thread_flag(TIF_IA32); | ||
8 | |||
9 | if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) | ||
10 | return -EFAULT; | ||
11 | |||
12 | put_user_try { | ||
13 | /* If you change siginfo_t structure, please make sure that | ||
14 | this code is fixed accordingly. | ||
15 | It should never copy any pad contained in the structure | ||
16 | to avoid security leaks, but must copy the generic | ||
17 | 3 ints plus the relevant union member. */ | ||
18 | put_user_ex(from->si_signo, &to->si_signo); | ||
19 | put_user_ex(from->si_errno, &to->si_errno); | ||
20 | put_user_ex((short)from->si_code, &to->si_code); | ||
21 | |||
22 | if (from->si_code < 0) { | ||
23 | put_user_ex(from->si_pid, &to->si_pid); | ||
24 | put_user_ex(from->si_uid, &to->si_uid); | ||
25 | put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); | ||
26 | } else { | ||
27 | /* | ||
28 | * First 32bits of unions are always present: | ||
29 | * si_pid === si_band === si_tid === si_addr(LS half) | ||
30 | */ | ||
31 | put_user_ex(from->_sifields._pad[0], | ||
32 | &to->_sifields._pad[0]); | ||
33 | switch (from->si_code >> 16) { | ||
34 | case __SI_FAULT >> 16: | ||
35 | break; | ||
36 | case __SI_SYS >> 16: | ||
37 | put_user_ex(from->si_syscall, &to->si_syscall); | ||
38 | put_user_ex(from->si_arch, &to->si_arch); | ||
39 | break; | ||
40 | case __SI_CHLD >> 16: | ||
41 | if (ia32) { | ||
42 | put_user_ex(from->si_utime, &to->si_utime); | ||
43 | put_user_ex(from->si_stime, &to->si_stime); | ||
44 | } else { | ||
45 | put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); | ||
46 | put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); | ||
47 | } | ||
48 | put_user_ex(from->si_status, &to->si_status); | ||
49 | /* FALL THROUGH */ | ||
50 | default: | ||
51 | case __SI_KILL >> 16: | ||
52 | put_user_ex(from->si_uid, &to->si_uid); | ||
53 | break; | ||
54 | case __SI_POLL >> 16: | ||
55 | put_user_ex(from->si_fd, &to->si_fd); | ||
56 | break; | ||
57 | case __SI_TIMER >> 16: | ||
58 | put_user_ex(from->si_overrun, &to->si_overrun); | ||
59 | put_user_ex(ptr_to_compat(from->si_ptr), | ||
60 | &to->si_ptr); | ||
61 | break; | ||
62 | /* This is not generated by the kernel as of now. */ | ||
63 | case __SI_RT >> 16: | ||
64 | case __SI_MESGQ >> 16: | ||
65 | put_user_ex(from->si_uid, &to->si_uid); | ||
66 | put_user_ex(from->si_int, &to->si_int); | ||
67 | break; | ||
68 | } | ||
69 | } | ||
70 | } put_user_catch(err); | ||
71 | |||
72 | return err; | ||
73 | } | ||
74 | |||
75 | int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) | ||
76 | { | ||
77 | int err = 0; | ||
78 | u32 ptr32; | ||
79 | |||
80 | if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) | ||
81 | return -EFAULT; | ||
82 | |||
83 | get_user_try { | ||
84 | get_user_ex(to->si_signo, &from->si_signo); | ||
85 | get_user_ex(to->si_errno, &from->si_errno); | ||
86 | get_user_ex(to->si_code, &from->si_code); | ||
87 | |||
88 | get_user_ex(to->si_pid, &from->si_pid); | ||
89 | get_user_ex(to->si_uid, &from->si_uid); | ||
90 | get_user_ex(ptr32, &from->si_ptr); | ||
91 | to->si_ptr = compat_ptr(ptr32); | ||
92 | } get_user_catch(err); | ||
93 | |||
94 | return err; | ||
95 | } | ||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 0ccb53a9fcd9..c9a073866ca7 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re | |||
18 | return addr; | 18 | return addr; |
19 | } | 19 | } |
20 | 20 | ||
21 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
21 | /* | 22 | /* |
22 | * We'll assume that the code segments in the GDT | 23 | * We'll assume that the code segments in the GDT |
23 | * are all zero-based. That is largely true: the | 24 | * are all zero-based. That is largely true: the |
@@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re | |||
45 | } | 46 | } |
46 | mutex_unlock(&child->mm->context.lock); | 47 | mutex_unlock(&child->mm->context.lock); |
47 | } | 48 | } |
49 | #endif | ||
48 | 50 | ||
49 | return addr; | 51 | return addr; |
50 | } | 52 | } |
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c index 25b993729f9b..80bb24d9b880 100644 --- a/arch/x86/kernel/trace_clock.c +++ b/arch/x86/kernel/trace_clock.c | |||
@@ -12,10 +12,5 @@ | |||
12 | */ | 12 | */ |
13 | u64 notrace trace_clock_x86_tsc(void) | 13 | u64 notrace trace_clock_x86_tsc(void) |
14 | { | 14 | { |
15 | u64 ret; | 15 | return rdtsc_ordered(); |
16 | |||
17 | rdtsc_barrier(); | ||
18 | rdtscll(ret); | ||
19 | |||
20 | return ret; | ||
21 | } | 16 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5a5231d1d11..346eec73f7db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #include <asm/fpu/xstate.h> | 62 | #include <asm/fpu/xstate.h> |
63 | #include <asm/trace/mpx.h> | 63 | #include <asm/trace/mpx.h> |
64 | #include <asm/mpx.h> | 64 | #include <asm/mpx.h> |
65 | #include <asm/vm86.h> | ||
65 | 66 | ||
66 | #ifdef CONFIG_X86_64 | 67 | #ifdef CONFIG_X86_64 |
67 | #include <asm/x86_init.h> | 68 | #include <asm/x86_init.h> |
@@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
108 | preempt_count_dec(); | 109 | preempt_count_dec(); |
109 | } | 110 | } |
110 | 111 | ||
111 | enum ctx_state ist_enter(struct pt_regs *regs) | 112 | void ist_enter(struct pt_regs *regs) |
112 | { | 113 | { |
113 | enum ctx_state prev_state; | ||
114 | |||
115 | if (user_mode(regs)) { | 114 | if (user_mode(regs)) { |
116 | /* Other than that, we're just an exception. */ | 115 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
117 | prev_state = exception_enter(); | ||
118 | } else { | 116 | } else { |
119 | /* | 117 | /* |
120 | * We might have interrupted pretty much anything. In | 118 | * We might have interrupted pretty much anything. In |
@@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs) | |||
123 | * but we need to notify RCU. | 121 | * but we need to notify RCU. |
124 | */ | 122 | */ |
125 | rcu_nmi_enter(); | 123 | rcu_nmi_enter(); |
126 | prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */ | ||
127 | } | 124 | } |
128 | 125 | ||
129 | /* | 126 | /* |
130 | * We are atomic because we're on the IST stack (or we're on x86_32, | 127 | * We are atomic because we're on the IST stack; or we're on |
131 | * in which case we still shouldn't schedule). | 128 | * x86_32, in which case we still shouldn't schedule; or we're |
132 | * | 129 | * on x86_64 and entered from user mode, in which case we're |
133 | * This must be after exception_enter(), because exception_enter() | 130 | * still atomic unless ist_begin_non_atomic is called. |
134 | * won't do anything if in_interrupt() returns true. | ||
135 | */ | 131 | */ |
136 | preempt_count_add(HARDIRQ_OFFSET); | 132 | preempt_count_add(HARDIRQ_OFFSET); |
137 | 133 | ||
138 | /* This code is a bit fragile. Test it. */ | 134 | /* This code is a bit fragile. Test it. */ |
139 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); | 135 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); |
140 | |||
141 | return prev_state; | ||
142 | } | 136 | } |
143 | 137 | ||
144 | void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) | 138 | void ist_exit(struct pt_regs *regs) |
145 | { | 139 | { |
146 | /* Must be before exception_exit. */ | ||
147 | preempt_count_sub(HARDIRQ_OFFSET); | 140 | preempt_count_sub(HARDIRQ_OFFSET); |
148 | 141 | ||
149 | if (user_mode(regs)) | 142 | if (!user_mode(regs)) |
150 | return exception_exit(prev_state); | ||
151 | else | ||
152 | rcu_nmi_exit(); | 143 | rcu_nmi_exit(); |
153 | } | 144 | } |
154 | 145 | ||
@@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) | |||
162 | * a double fault, it can be safe to schedule. ist_begin_non_atomic() | 153 | * a double fault, it can be safe to schedule. ist_begin_non_atomic() |
163 | * begins a non-atomic section within an ist_enter()/ist_exit() region. | 154 | * begins a non-atomic section within an ist_enter()/ist_exit() region. |
164 | * Callers are responsible for enabling interrupts themselves inside | 155 | * Callers are responsible for enabling interrupts themselves inside |
165 | * the non-atomic section, and callers must call is_end_non_atomic() | 156 | * the non-atomic section, and callers must call ist_end_non_atomic() |
166 | * before ist_exit(). | 157 | * before ist_exit(). |
167 | */ | 158 | */ |
168 | void ist_begin_non_atomic(struct pt_regs *regs) | 159 | void ist_begin_non_atomic(struct pt_regs *regs) |
@@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap); | |||
289 | static void do_error_trap(struct pt_regs *regs, long error_code, char *str, | 280 | static void do_error_trap(struct pt_regs *regs, long error_code, char *str, |
290 | unsigned long trapnr, int signr) | 281 | unsigned long trapnr, int signr) |
291 | { | 282 | { |
292 | enum ctx_state prev_state = exception_enter(); | ||
293 | siginfo_t info; | 283 | siginfo_t info; |
294 | 284 | ||
285 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); | ||
286 | |||
295 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != | 287 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != |
296 | NOTIFY_STOP) { | 288 | NOTIFY_STOP) { |
297 | conditional_sti(regs); | 289 | conditional_sti(regs); |
298 | do_trap(trapnr, signr, str, regs, error_code, | 290 | do_trap(trapnr, signr, str, regs, error_code, |
299 | fill_trap_info(regs, signr, trapnr, &info)); | 291 | fill_trap_info(regs, signr, trapnr, &info)); |
300 | } | 292 | } |
301 | |||
302 | exception_exit(prev_state); | ||
303 | } | 293 | } |
304 | 294 | ||
305 | #define DO_ERROR(trapnr, signr, str, name) \ | 295 | #define DO_ERROR(trapnr, signr, str, name) \ |
@@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
351 | } | 341 | } |
352 | #endif | 342 | #endif |
353 | 343 | ||
354 | ist_enter(regs); /* Discard prev_state because we won't return. */ | 344 | ist_enter(regs); |
355 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 345 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
356 | 346 | ||
357 | tsk->thread.error_code = error_code; | 347 | tsk->thread.error_code = error_code; |
@@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
371 | 361 | ||
372 | dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | 362 | dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) |
373 | { | 363 | { |
374 | enum ctx_state prev_state; | ||
375 | const struct bndcsr *bndcsr; | 364 | const struct bndcsr *bndcsr; |
376 | siginfo_t *info; | 365 | siginfo_t *info; |
377 | 366 | ||
378 | prev_state = exception_enter(); | 367 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
379 | if (notify_die(DIE_TRAP, "bounds", regs, error_code, | 368 | if (notify_die(DIE_TRAP, "bounds", regs, error_code, |
380 | X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) | 369 | X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) |
381 | goto exit; | 370 | return; |
382 | conditional_sti(regs); | 371 | conditional_sti(regs); |
383 | 372 | ||
384 | if (!user_mode(regs)) | 373 | if (!user_mode(regs)) |
@@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | |||
435 | die("bounds", regs, error_code); | 424 | die("bounds", regs, error_code); |
436 | } | 425 | } |
437 | 426 | ||
438 | exit: | ||
439 | exception_exit(prev_state); | ||
440 | return; | 427 | return; |
428 | |||
441 | exit_trap: | 429 | exit_trap: |
442 | /* | 430 | /* |
443 | * This path out is for all the cases where we could not | 431 | * This path out is for all the cases where we could not |
@@ -447,35 +435,33 @@ exit_trap: | |||
447 | * time.. | 435 | * time.. |
448 | */ | 436 | */ |
449 | do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); | 437 | do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); |
450 | exception_exit(prev_state); | ||
451 | } | 438 | } |
452 | 439 | ||
453 | dotraplinkage void | 440 | dotraplinkage void |
454 | do_general_protection(struct pt_regs *regs, long error_code) | 441 | do_general_protection(struct pt_regs *regs, long error_code) |
455 | { | 442 | { |
456 | struct task_struct *tsk; | 443 | struct task_struct *tsk; |
457 | enum ctx_state prev_state; | ||
458 | 444 | ||
459 | prev_state = exception_enter(); | 445 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
460 | conditional_sti(regs); | 446 | conditional_sti(regs); |
461 | 447 | ||
462 | if (v8086_mode(regs)) { | 448 | if (v8086_mode(regs)) { |
463 | local_irq_enable(); | 449 | local_irq_enable(); |
464 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | 450 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); |
465 | goto exit; | 451 | return; |
466 | } | 452 | } |
467 | 453 | ||
468 | tsk = current; | 454 | tsk = current; |
469 | if (!user_mode(regs)) { | 455 | if (!user_mode(regs)) { |
470 | if (fixup_exception(regs)) | 456 | if (fixup_exception(regs)) |
471 | goto exit; | 457 | return; |
472 | 458 | ||
473 | tsk->thread.error_code = error_code; | 459 | tsk->thread.error_code = error_code; |
474 | tsk->thread.trap_nr = X86_TRAP_GP; | 460 | tsk->thread.trap_nr = X86_TRAP_GP; |
475 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, | 461 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, |
476 | X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) | 462 | X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) |
477 | die("general protection fault", regs, error_code); | 463 | die("general protection fault", regs, error_code); |
478 | goto exit; | 464 | return; |
479 | } | 465 | } |
480 | 466 | ||
481 | tsk->thread.error_code = error_code; | 467 | tsk->thread.error_code = error_code; |
@@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
491 | } | 477 | } |
492 | 478 | ||
493 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); | 479 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
494 | exit: | ||
495 | exception_exit(prev_state); | ||
496 | } | 480 | } |
497 | NOKPROBE_SYMBOL(do_general_protection); | 481 | NOKPROBE_SYMBOL(do_general_protection); |
498 | 482 | ||
499 | /* May run on IST stack. */ | 483 | /* May run on IST stack. */ |
500 | dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | 484 | dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) |
501 | { | 485 | { |
502 | enum ctx_state prev_state; | ||
503 | |||
504 | #ifdef CONFIG_DYNAMIC_FTRACE | 486 | #ifdef CONFIG_DYNAMIC_FTRACE |
505 | /* | 487 | /* |
506 | * ftrace must be first, everything else may cause a recursive crash. | 488 | * ftrace must be first, everything else may cause a recursive crash. |
@@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | |||
513 | if (poke_int3_handler(regs)) | 495 | if (poke_int3_handler(regs)) |
514 | return; | 496 | return; |
515 | 497 | ||
516 | prev_state = ist_enter(regs); | 498 | ist_enter(regs); |
499 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); | ||
517 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 500 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
518 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 501 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
519 | SIGTRAP) == NOTIFY_STOP) | 502 | SIGTRAP) == NOTIFY_STOP) |
@@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | |||
539 | preempt_conditional_cli(regs); | 522 | preempt_conditional_cli(regs); |
540 | debug_stack_usage_dec(); | 523 | debug_stack_usage_dec(); |
541 | exit: | 524 | exit: |
542 | ist_exit(regs, prev_state); | 525 | ist_exit(regs); |
543 | } | 526 | } |
544 | NOKPROBE_SYMBOL(do_int3); | 527 | NOKPROBE_SYMBOL(do_int3); |
545 | 528 | ||
@@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret); | |||
615 | dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | 598 | dotraplinkage void do_debug(struct pt_regs *regs, long error_code) |
616 | { | 599 | { |
617 | struct task_struct *tsk = current; | 600 | struct task_struct *tsk = current; |
618 | enum ctx_state prev_state; | ||
619 | int user_icebp = 0; | 601 | int user_icebp = 0; |
620 | unsigned long dr6; | 602 | unsigned long dr6; |
621 | int si_code; | 603 | int si_code; |
622 | 604 | ||
623 | prev_state = ist_enter(regs); | 605 | ist_enter(regs); |
624 | 606 | ||
625 | get_debugreg(dr6, 6); | 607 | get_debugreg(dr6, 6); |
626 | 608 | ||
@@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
695 | debug_stack_usage_dec(); | 677 | debug_stack_usage_dec(); |
696 | 678 | ||
697 | exit: | 679 | exit: |
698 | ist_exit(regs, prev_state); | 680 | ist_exit(regs); |
699 | } | 681 | } |
700 | NOKPROBE_SYMBOL(do_debug); | 682 | NOKPROBE_SYMBOL(do_debug); |
701 | 683 | ||
@@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
747 | 729 | ||
748 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | 730 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
749 | { | 731 | { |
750 | enum ctx_state prev_state; | 732 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
751 | |||
752 | prev_state = exception_enter(); | ||
753 | math_error(regs, error_code, X86_TRAP_MF); | 733 | math_error(regs, error_code, X86_TRAP_MF); |
754 | exception_exit(prev_state); | ||
755 | } | 734 | } |
756 | 735 | ||
757 | dotraplinkage void | 736 | dotraplinkage void |
758 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 737 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
759 | { | 738 | { |
760 | enum ctx_state prev_state; | 739 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
761 | |||
762 | prev_state = exception_enter(); | ||
763 | math_error(regs, error_code, X86_TRAP_XF); | 740 | math_error(regs, error_code, X86_TRAP_XF); |
764 | exception_exit(prev_state); | ||
765 | } | 741 | } |
766 | 742 | ||
767 | dotraplinkage void | 743 | dotraplinkage void |
@@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | |||
773 | dotraplinkage void | 749 | dotraplinkage void |
774 | do_device_not_available(struct pt_regs *regs, long error_code) | 750 | do_device_not_available(struct pt_regs *regs, long error_code) |
775 | { | 751 | { |
776 | enum ctx_state prev_state; | 752 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
777 | |||
778 | prev_state = exception_enter(); | ||
779 | BUG_ON(use_eager_fpu()); | 753 | BUG_ON(use_eager_fpu()); |
780 | 754 | ||
781 | #ifdef CONFIG_MATH_EMULATION | 755 | #ifdef CONFIG_MATH_EMULATION |
@@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
786 | 760 | ||
787 | info.regs = regs; | 761 | info.regs = regs; |
788 | math_emulate(&info); | 762 | math_emulate(&info); |
789 | exception_exit(prev_state); | ||
790 | return; | 763 | return; |
791 | } | 764 | } |
792 | #endif | 765 | #endif |
@@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
794 | #ifdef CONFIG_X86_32 | 767 | #ifdef CONFIG_X86_32 |
795 | conditional_sti(regs); | 768 | conditional_sti(regs); |
796 | #endif | 769 | #endif |
797 | exception_exit(prev_state); | ||
798 | } | 770 | } |
799 | NOKPROBE_SYMBOL(do_device_not_available); | 771 | NOKPROBE_SYMBOL(do_device_not_available); |
800 | 772 | ||
@@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available); | |||
802 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | 774 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) |
803 | { | 775 | { |
804 | siginfo_t info; | 776 | siginfo_t info; |
805 | enum ctx_state prev_state; | ||
806 | 777 | ||
807 | prev_state = exception_enter(); | 778 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
808 | local_irq_enable(); | 779 | local_irq_enable(); |
809 | 780 | ||
810 | info.si_signo = SIGILL; | 781 | info.si_signo = SIGILL; |
@@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
816 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, | 787 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, |
817 | &info); | 788 | &info); |
818 | } | 789 | } |
819 | exception_exit(prev_state); | ||
820 | } | 790 | } |
821 | #endif | 791 | #endif |
822 | 792 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 88e9a38c71a5..79055cf2c497 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | |||
248 | 248 | ||
249 | data = cyc2ns_write_begin(cpu); | 249 | data = cyc2ns_write_begin(cpu); |
250 | 250 | ||
251 | rdtscll(tsc_now); | 251 | tsc_now = rdtsc(); |
252 | ns_now = cycles_2_ns(tsc_now); | 252 | ns_now = cycles_2_ns(tsc_now); |
253 | 253 | ||
254 | /* | 254 | /* |
@@ -290,7 +290,7 @@ u64 native_sched_clock(void) | |||
290 | } | 290 | } |
291 | 291 | ||
292 | /* read the Time Stamp Counter: */ | 292 | /* read the Time Stamp Counter: */ |
293 | rdtscll(tsc_now); | 293 | tsc_now = rdtsc(); |
294 | 294 | ||
295 | /* return the value in ns */ | 295 | /* return the value in ns */ |
296 | return cycles_2_ns(tsc_now); | 296 | return cycles_2_ns(tsc_now); |
@@ -316,12 +316,6 @@ unsigned long long | |||
316 | sched_clock(void) __attribute__((alias("native_sched_clock"))); | 316 | sched_clock(void) __attribute__((alias("native_sched_clock"))); |
317 | #endif | 317 | #endif |
318 | 318 | ||
319 | unsigned long long native_read_tsc(void) | ||
320 | { | ||
321 | return __native_read_tsc(); | ||
322 | } | ||
323 | EXPORT_SYMBOL(native_read_tsc); | ||
324 | |||
325 | int check_tsc_unstable(void) | 319 | int check_tsc_unstable(void) |
326 | { | 320 | { |
327 | return tsc_unstable; | 321 | return tsc_unstable; |
@@ -984,7 +978,7 @@ static struct clocksource clocksource_tsc; | |||
984 | */ | 978 | */ |
985 | static cycle_t read_tsc(struct clocksource *cs) | 979 | static cycle_t read_tsc(struct clocksource *cs) |
986 | { | 980 | { |
987 | return (cycle_t)get_cycles(); | 981 | return (cycle_t)rdtsc_ordered(); |
988 | } | 982 | } |
989 | 983 | ||
990 | /* | 984 | /* |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index dd8d0791dfb5..78083bf23ed1 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -39,16 +39,15 @@ static cycles_t max_warp; | |||
39 | static int nr_warps; | 39 | static int nr_warps; |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * TSC-warp measurement loop running on both CPUs: | 42 | * TSC-warp measurement loop running on both CPUs. This is not called |
43 | * if there is no TSC. | ||
43 | */ | 44 | */ |
44 | static void check_tsc_warp(unsigned int timeout) | 45 | static void check_tsc_warp(unsigned int timeout) |
45 | { | 46 | { |
46 | cycles_t start, now, prev, end; | 47 | cycles_t start, now, prev, end; |
47 | int i; | 48 | int i; |
48 | 49 | ||
49 | rdtsc_barrier(); | 50 | start = rdtsc_ordered(); |
50 | start = get_cycles(); | ||
51 | rdtsc_barrier(); | ||
52 | /* | 51 | /* |
53 | * The measurement runs for 'timeout' msecs: | 52 | * The measurement runs for 'timeout' msecs: |
54 | */ | 53 | */ |
@@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout) | |||
63 | */ | 62 | */ |
64 | arch_spin_lock(&sync_lock); | 63 | arch_spin_lock(&sync_lock); |
65 | prev = last_tsc; | 64 | prev = last_tsc; |
66 | rdtsc_barrier(); | 65 | now = rdtsc_ordered(); |
67 | now = get_cycles(); | ||
68 | rdtsc_barrier(); | ||
69 | last_tsc = now; | 66 | last_tsc = now; |
70 | arch_spin_unlock(&sync_lock); | 67 | arch_spin_unlock(&sync_lock); |
71 | 68 | ||
@@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu) | |||
126 | 123 | ||
127 | /* | 124 | /* |
128 | * No need to check if we already know that the TSC is not | 125 | * No need to check if we already know that the TSC is not |
129 | * synchronized: | 126 | * synchronized or if we have no TSC. |
130 | */ | 127 | */ |
131 | if (unsynchronized_tsc()) | 128 | if (unsynchronized_tsc()) |
132 | return; | 129 | return; |
@@ -190,6 +187,7 @@ void check_tsc_sync_target(void) | |||
190 | { | 187 | { |
191 | int cpus = 2; | 188 | int cpus = 2; |
192 | 189 | ||
190 | /* Also aborts if there is no TSC. */ | ||
193 | if (unsynchronized_tsc() || tsc_clocksource_reliable) | 191 | if (unsynchronized_tsc() || tsc_clocksource_reliable) |
194 | return; | 192 | return; |
195 | 193 | ||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index fc9db6ef2a95..abd8b856bd2b 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -44,11 +44,14 @@ | |||
44 | #include <linux/ptrace.h> | 44 | #include <linux/ptrace.h> |
45 | #include <linux/audit.h> | 45 | #include <linux/audit.h> |
46 | #include <linux/stddef.h> | 46 | #include <linux/stddef.h> |
47 | #include <linux/slab.h> | ||
47 | 48 | ||
48 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
49 | #include <asm/io.h> | 50 | #include <asm/io.h> |
50 | #include <asm/tlbflush.h> | 51 | #include <asm/tlbflush.h> |
51 | #include <asm/irq.h> | 52 | #include <asm/irq.h> |
53 | #include <asm/traps.h> | ||
54 | #include <asm/vm86.h> | ||
52 | 55 | ||
53 | /* | 56 | /* |
54 | * Known problems: | 57 | * Known problems: |
@@ -66,10 +69,6 @@ | |||
66 | */ | 69 | */ |
67 | 70 | ||
68 | 71 | ||
69 | #define KVM86 ((struct kernel_vm86_struct *)regs) | ||
70 | #define VMPI KVM86->vm86plus | ||
71 | |||
72 | |||
73 | /* | 72 | /* |
74 | * 8- and 16-bit register defines.. | 73 | * 8- and 16-bit register defines.. |
75 | */ | 74 | */ |
@@ -81,8 +80,8 @@ | |||
81 | /* | 80 | /* |
82 | * virtual flags (16 and 32-bit versions) | 81 | * virtual flags (16 and 32-bit versions) |
83 | */ | 82 | */ |
84 | #define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) | 83 | #define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags)) |
85 | #define VEFLAGS (current->thread.v86flags) | 84 | #define VEFLAGS (current->thread.vm86->veflags) |
86 | 85 | ||
87 | #define set_flags(X, new, mask) \ | 86 | #define set_flags(X, new, mask) \ |
88 | ((X) = ((X) & ~(mask)) | ((new) & (mask))) | 87 | ((X) = ((X) & ~(mask)) | ((new) & (mask))) |
@@ -90,46 +89,13 @@ | |||
90 | #define SAFE_MASK (0xDD5) | 89 | #define SAFE_MASK (0xDD5) |
91 | #define RETURN_MASK (0xDFF) | 90 | #define RETURN_MASK (0xDFF) |
92 | 91 | ||
93 | /* convert kernel_vm86_regs to vm86_regs */ | 92 | void save_v86_state(struct kernel_vm86_regs *regs, int retval) |
94 | static int copy_vm86_regs_to_user(struct vm86_regs __user *user, | ||
95 | const struct kernel_vm86_regs *regs) | ||
96 | { | ||
97 | int ret = 0; | ||
98 | |||
99 | /* | ||
100 | * kernel_vm86_regs is missing gs, so copy everything up to | ||
101 | * (but not including) orig_eax, and then rest including orig_eax. | ||
102 | */ | ||
103 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax)); | ||
104 | ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax, | ||
105 | sizeof(struct kernel_vm86_regs) - | ||
106 | offsetof(struct kernel_vm86_regs, pt.orig_ax)); | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | /* convert vm86_regs to kernel_vm86_regs */ | ||
112 | static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, | ||
113 | const struct vm86_regs __user *user, | ||
114 | unsigned extra) | ||
115 | { | ||
116 | int ret = 0; | ||
117 | |||
118 | /* copy ax-fs inclusive */ | ||
119 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax)); | ||
120 | /* copy orig_ax-__gsh+extra */ | ||
121 | ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax, | ||
122 | sizeof(struct kernel_vm86_regs) - | ||
123 | offsetof(struct kernel_vm86_regs, pt.orig_ax) + | ||
124 | extra); | ||
125 | return ret; | ||
126 | } | ||
127 | |||
128 | struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | ||
129 | { | 93 | { |
130 | struct tss_struct *tss; | 94 | struct tss_struct *tss; |
131 | struct pt_regs *ret; | 95 | struct task_struct *tsk = current; |
132 | unsigned long tmp; | 96 | struct vm86plus_struct __user *user; |
97 | struct vm86 *vm86 = current->thread.vm86; | ||
98 | long err = 0; | ||
133 | 99 | ||
134 | /* | 100 | /* |
135 | * This gets called from entry.S with interrupts disabled, but | 101 | * This gets called from entry.S with interrupts disabled, but |
@@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | |||
138 | */ | 104 | */ |
139 | local_irq_enable(); | 105 | local_irq_enable(); |
140 | 106 | ||
141 | if (!current->thread.vm86_info) { | 107 | if (!vm86 || !vm86->user_vm86) { |
142 | pr_alert("no vm86_info: BAD\n"); | 108 | pr_alert("no user_vm86: BAD\n"); |
143 | do_exit(SIGSEGV); | 109 | do_exit(SIGSEGV); |
144 | } | 110 | } |
145 | set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); | 111 | set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask); |
146 | tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); | 112 | user = vm86->user_vm86; |
147 | tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); | 113 | |
148 | if (tmp) { | 114 | if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ? |
149 | pr_alert("could not access userspace vm86_info\n"); | 115 | sizeof(struct vm86plus_struct) : |
116 | sizeof(struct vm86_struct))) { | ||
117 | pr_alert("could not access userspace vm86 info\n"); | ||
118 | do_exit(SIGSEGV); | ||
119 | } | ||
120 | |||
121 | put_user_try { | ||
122 | put_user_ex(regs->pt.bx, &user->regs.ebx); | ||
123 | put_user_ex(regs->pt.cx, &user->regs.ecx); | ||
124 | put_user_ex(regs->pt.dx, &user->regs.edx); | ||
125 | put_user_ex(regs->pt.si, &user->regs.esi); | ||
126 | put_user_ex(regs->pt.di, &user->regs.edi); | ||
127 | put_user_ex(regs->pt.bp, &user->regs.ebp); | ||
128 | put_user_ex(regs->pt.ax, &user->regs.eax); | ||
129 | put_user_ex(regs->pt.ip, &user->regs.eip); | ||
130 | put_user_ex(regs->pt.cs, &user->regs.cs); | ||
131 | put_user_ex(regs->pt.flags, &user->regs.eflags); | ||
132 | put_user_ex(regs->pt.sp, &user->regs.esp); | ||
133 | put_user_ex(regs->pt.ss, &user->regs.ss); | ||
134 | put_user_ex(regs->es, &user->regs.es); | ||
135 | put_user_ex(regs->ds, &user->regs.ds); | ||
136 | put_user_ex(regs->fs, &user->regs.fs); | ||
137 | put_user_ex(regs->gs, &user->regs.gs); | ||
138 | |||
139 | put_user_ex(vm86->screen_bitmap, &user->screen_bitmap); | ||
140 | } put_user_catch(err); | ||
141 | if (err) { | ||
142 | pr_alert("could not access userspace vm86 info\n"); | ||
150 | do_exit(SIGSEGV); | 143 | do_exit(SIGSEGV); |
151 | } | 144 | } |
152 | 145 | ||
153 | tss = &per_cpu(cpu_tss, get_cpu()); | 146 | tss = &per_cpu(cpu_tss, get_cpu()); |
154 | current->thread.sp0 = current->thread.saved_sp0; | 147 | tsk->thread.sp0 = vm86->saved_sp0; |
155 | current->thread.sysenter_cs = __KERNEL_CS; | 148 | tsk->thread.sysenter_cs = __KERNEL_CS; |
156 | load_sp0(tss, ¤t->thread); | 149 | load_sp0(tss, &tsk->thread); |
157 | current->thread.saved_sp0 = 0; | 150 | vm86->saved_sp0 = 0; |
158 | put_cpu(); | 151 | put_cpu(); |
159 | 152 | ||
160 | ret = KVM86->regs32; | 153 | memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); |
161 | 154 | ||
162 | ret->fs = current->thread.saved_fs; | 155 | lazy_load_gs(vm86->regs32.gs); |
163 | set_user_gs(ret, current->thread.saved_gs); | ||
164 | 156 | ||
165 | return ret; | 157 | regs->pt.ax = retval; |
166 | } | 158 | } |
167 | 159 | ||
168 | static void mark_screen_rdonly(struct mm_struct *mm) | 160 | static void mark_screen_rdonly(struct mm_struct *mm) |
@@ -200,45 +192,16 @@ out: | |||
200 | 192 | ||
201 | 193 | ||
202 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | 194 | static int do_vm86_irq_handling(int subfunction, int irqnumber); |
203 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | 195 | static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus); |
204 | 196 | ||
205 | SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86) | 197 | SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86) |
206 | { | 198 | { |
207 | struct kernel_vm86_struct info; /* declare this _on top_, | 199 | return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false); |
208 | * this avoids wasting of stack space. | ||
209 | * This remains on the stack until we | ||
210 | * return to 32 bit user space. | ||
211 | */ | ||
212 | struct task_struct *tsk = current; | ||
213 | int tmp; | ||
214 | |||
215 | if (tsk->thread.saved_sp0) | ||
216 | return -EPERM; | ||
217 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | ||
218 | offsetof(struct kernel_vm86_struct, vm86plus) - | ||
219 | sizeof(info.regs)); | ||
220 | if (tmp) | ||
221 | return -EFAULT; | ||
222 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); | ||
223 | info.regs32 = current_pt_regs(); | ||
224 | tsk->thread.vm86_info = v86; | ||
225 | do_sys_vm86(&info, tsk); | ||
226 | return 0; /* we never return here */ | ||
227 | } | 200 | } |
228 | 201 | ||
229 | 202 | ||
230 | SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) | 203 | SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) |
231 | { | 204 | { |
232 | struct kernel_vm86_struct info; /* declare this _on top_, | ||
233 | * this avoids wasting of stack space. | ||
234 | * This remains on the stack until we | ||
235 | * return to 32 bit user space. | ||
236 | */ | ||
237 | struct task_struct *tsk; | ||
238 | int tmp; | ||
239 | struct vm86plus_struct __user *v86; | ||
240 | |||
241 | tsk = current; | ||
242 | switch (cmd) { | 205 | switch (cmd) { |
243 | case VM86_REQUEST_IRQ: | 206 | case VM86_REQUEST_IRQ: |
244 | case VM86_FREE_IRQ: | 207 | case VM86_FREE_IRQ: |
@@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) | |||
256 | } | 219 | } |
257 | 220 | ||
258 | /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ | 221 | /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ |
259 | if (tsk->thread.saved_sp0) | 222 | return do_sys_vm86((struct vm86plus_struct __user *) arg, true); |
260 | return -EPERM; | ||
261 | v86 = (struct vm86plus_struct __user *)arg; | ||
262 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | ||
263 | offsetof(struct kernel_vm86_struct, regs32) - | ||
264 | sizeof(info.regs)); | ||
265 | if (tmp) | ||
266 | return -EFAULT; | ||
267 | info.regs32 = current_pt_regs(); | ||
268 | info.vm86plus.is_vm86pus = 1; | ||
269 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; | ||
270 | do_sys_vm86(&info, tsk); | ||
271 | return 0; /* we never return here */ | ||
272 | } | 223 | } |
273 | 224 | ||
274 | 225 | ||
275 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) | 226 | static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) |
276 | { | 227 | { |
277 | struct tss_struct *tss; | 228 | struct tss_struct *tss; |
278 | /* | 229 | struct task_struct *tsk = current; |
279 | * make sure the vm86() system call doesn't try to do anything silly | 230 | struct vm86 *vm86 = tsk->thread.vm86; |
280 | */ | 231 | struct kernel_vm86_regs vm86regs; |
281 | info->regs.pt.ds = 0; | 232 | struct pt_regs *regs = current_pt_regs(); |
282 | info->regs.pt.es = 0; | 233 | unsigned long err = 0; |
283 | info->regs.pt.fs = 0; | 234 | |
284 | #ifndef CONFIG_X86_32_LAZY_GS | 235 | if (!vm86) { |
285 | info->regs.pt.gs = 0; | 236 | if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) |
286 | #endif | 237 | return -ENOMEM; |
238 | tsk->thread.vm86 = vm86; | ||
239 | } | ||
240 | if (vm86->saved_sp0) | ||
241 | return -EPERM; | ||
242 | |||
243 | if (!access_ok(VERIFY_READ, user_vm86, plus ? | ||
244 | sizeof(struct vm86_struct) : | ||
245 | sizeof(struct vm86plus_struct))) | ||
246 | return -EFAULT; | ||
247 | |||
248 | memset(&vm86regs, 0, sizeof(vm86regs)); | ||
249 | get_user_try { | ||
250 | unsigned short seg; | ||
251 | get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx); | ||
252 | get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx); | ||
253 | get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx); | ||
254 | get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi); | ||
255 | get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi); | ||
256 | get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp); | ||
257 | get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax); | ||
258 | get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip); | ||
259 | get_user_ex(seg, &user_vm86->regs.cs); | ||
260 | vm86regs.pt.cs = seg; | ||
261 | get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags); | ||
262 | get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp); | ||
263 | get_user_ex(seg, &user_vm86->regs.ss); | ||
264 | vm86regs.pt.ss = seg; | ||
265 | get_user_ex(vm86regs.es, &user_vm86->regs.es); | ||
266 | get_user_ex(vm86regs.ds, &user_vm86->regs.ds); | ||
267 | get_user_ex(vm86regs.fs, &user_vm86->regs.fs); | ||
268 | get_user_ex(vm86regs.gs, &user_vm86->regs.gs); | ||
269 | |||
270 | get_user_ex(vm86->flags, &user_vm86->flags); | ||
271 | get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap); | ||
272 | get_user_ex(vm86->cpu_type, &user_vm86->cpu_type); | ||
273 | } get_user_catch(err); | ||
274 | if (err) | ||
275 | return err; | ||
276 | |||
277 | if (copy_from_user(&vm86->int_revectored, | ||
278 | &user_vm86->int_revectored, | ||
279 | sizeof(struct revectored_struct))) | ||
280 | return -EFAULT; | ||
281 | if (copy_from_user(&vm86->int21_revectored, | ||
282 | &user_vm86->int21_revectored, | ||
283 | sizeof(struct revectored_struct))) | ||
284 | return -EFAULT; | ||
285 | if (plus) { | ||
286 | if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus, | ||
287 | sizeof(struct vm86plus_info_struct))) | ||
288 | return -EFAULT; | ||
289 | vm86->vm86plus.is_vm86pus = 1; | ||
290 | } else | ||
291 | memset(&vm86->vm86plus, 0, | ||
292 | sizeof(struct vm86plus_info_struct)); | ||
293 | |||
294 | memcpy(&vm86->regs32, regs, sizeof(struct pt_regs)); | ||
295 | vm86->user_vm86 = user_vm86; | ||
287 | 296 | ||
288 | /* | 297 | /* |
289 | * The flags register is also special: we cannot trust that the user | 298 | * The flags register is also special: we cannot trust that the user |
290 | * has set it up safely, so this makes sure interrupt etc flags are | 299 | * has set it up safely, so this makes sure interrupt etc flags are |
291 | * inherited from protected mode. | 300 | * inherited from protected mode. |
292 | */ | 301 | */ |
293 | VEFLAGS = info->regs.pt.flags; | 302 | VEFLAGS = vm86regs.pt.flags; |
294 | info->regs.pt.flags &= SAFE_MASK; | 303 | vm86regs.pt.flags &= SAFE_MASK; |
295 | info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK; | 304 | vm86regs.pt.flags |= regs->flags & ~SAFE_MASK; |
296 | info->regs.pt.flags |= X86_VM_MASK; | 305 | vm86regs.pt.flags |= X86_VM_MASK; |
306 | |||
307 | vm86regs.pt.orig_ax = regs->orig_ax; | ||
297 | 308 | ||
298 | switch (info->cpu_type) { | 309 | switch (vm86->cpu_type) { |
299 | case CPU_286: | 310 | case CPU_286: |
300 | tsk->thread.v86mask = 0; | 311 | vm86->veflags_mask = 0; |
301 | break; | 312 | break; |
302 | case CPU_386: | 313 | case CPU_386: |
303 | tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; | 314 | vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; |
304 | break; | 315 | break; |
305 | case CPU_486: | 316 | case CPU_486: |
306 | tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; | 317 | vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; |
307 | break; | 318 | break; |
308 | default: | 319 | default: |
309 | tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; | 320 | vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; |
310 | break; | 321 | break; |
311 | } | 322 | } |
312 | 323 | ||
313 | /* | 324 | /* |
314 | * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) | 325 | * Save old state |
315 | */ | 326 | */ |
316 | info->regs32->ax = VM86_SIGNAL; | 327 | vm86->saved_sp0 = tsk->thread.sp0; |
317 | tsk->thread.saved_sp0 = tsk->thread.sp0; | 328 | lazy_save_gs(vm86->regs32.gs); |
318 | tsk->thread.saved_fs = info->regs32->fs; | ||
319 | tsk->thread.saved_gs = get_user_gs(info->regs32); | ||
320 | 329 | ||
321 | tss = &per_cpu(cpu_tss, get_cpu()); | 330 | tss = &per_cpu(cpu_tss, get_cpu()); |
322 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; | 331 | /* make room for real-mode segments */ |
332 | tsk->thread.sp0 += 16; | ||
323 | if (cpu_has_sep) | 333 | if (cpu_has_sep) |
324 | tsk->thread.sysenter_cs = 0; | 334 | tsk->thread.sysenter_cs = 0; |
325 | load_sp0(tss, &tsk->thread); | 335 | load_sp0(tss, &tsk->thread); |
326 | put_cpu(); | 336 | put_cpu(); |
327 | 337 | ||
328 | tsk->thread.screen_bitmap = info->screen_bitmap; | 338 | if (vm86->flags & VM86_SCREEN_BITMAP) |
329 | if (info->flags & VM86_SCREEN_BITMAP) | ||
330 | mark_screen_rdonly(tsk->mm); | 339 | mark_screen_rdonly(tsk->mm); |
331 | 340 | ||
332 | /*call __audit_syscall_exit since we do not exit via the normal paths */ | 341 | memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs)); |
333 | #ifdef CONFIG_AUDITSYSCALL | 342 | force_iret(); |
334 | if (unlikely(current->audit_context)) | 343 | return regs->ax; |
335 | __audit_syscall_exit(1, 0); | ||
336 | #endif | ||
337 | |||
338 | __asm__ __volatile__( | ||
339 | "movl %0,%%esp\n\t" | ||
340 | "movl %1,%%ebp\n\t" | ||
341 | #ifdef CONFIG_X86_32_LAZY_GS | ||
342 | "mov %2, %%gs\n\t" | ||
343 | #endif | ||
344 | "jmp resume_userspace" | ||
345 | : /* no outputs */ | ||
346 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); | ||
347 | /* we never return here */ | ||
348 | } | ||
349 | |||
350 | static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) | ||
351 | { | ||
352 | struct pt_regs *regs32; | ||
353 | |||
354 | regs32 = save_v86_state(regs16); | ||
355 | regs32->ax = retval; | ||
356 | __asm__ __volatile__("movl %0,%%esp\n\t" | ||
357 | "movl %1,%%ebp\n\t" | ||
358 | "jmp resume_userspace" | ||
359 | : : "r" (regs32), "r" (current_thread_info())); | ||
360 | } | 344 | } |
361 | 345 | ||
362 | static inline void set_IF(struct kernel_vm86_regs *regs) | 346 | static inline void set_IF(struct kernel_vm86_regs *regs) |
363 | { | 347 | { |
364 | VEFLAGS |= X86_EFLAGS_VIF; | 348 | VEFLAGS |= X86_EFLAGS_VIF; |
365 | if (VEFLAGS & X86_EFLAGS_VIP) | ||
366 | return_to_32bit(regs, VM86_STI); | ||
367 | } | 349 | } |
368 | 350 | ||
369 | static inline void clear_IF(struct kernel_vm86_regs *regs) | 351 | static inline void clear_IF(struct kernel_vm86_regs *regs) |
@@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs) | |||
395 | 377 | ||
396 | static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs) | 378 | static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs) |
397 | { | 379 | { |
398 | set_flags(VEFLAGS, flags, current->thread.v86mask); | 380 | set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask); |
399 | set_flags(regs->pt.flags, flags, SAFE_MASK); | 381 | set_flags(regs->pt.flags, flags, SAFE_MASK); |
400 | if (flags & X86_EFLAGS_IF) | 382 | if (flags & X86_EFLAGS_IF) |
401 | set_IF(regs); | 383 | set_IF(regs); |
@@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs | |||
405 | 387 | ||
406 | static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs) | 388 | static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs) |
407 | { | 389 | { |
408 | set_flags(VFLAGS, flags, current->thread.v86mask); | 390 | set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask); |
409 | set_flags(regs->pt.flags, flags, SAFE_MASK); | 391 | set_flags(regs->pt.flags, flags, SAFE_MASK); |
410 | if (flags & X86_EFLAGS_IF) | 392 | if (flags & X86_EFLAGS_IF) |
411 | set_IF(regs); | 393 | set_IF(regs); |
@@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) | |||
420 | if (VEFLAGS & X86_EFLAGS_VIF) | 402 | if (VEFLAGS & X86_EFLAGS_VIF) |
421 | flags |= X86_EFLAGS_IF; | 403 | flags |= X86_EFLAGS_IF; |
422 | flags |= X86_EFLAGS_IOPL; | 404 | flags |= X86_EFLAGS_IOPL; |
423 | return flags | (VEFLAGS & current->thread.v86mask); | 405 | return flags | (VEFLAGS & current->thread.vm86->veflags_mask); |
424 | } | 406 | } |
425 | 407 | ||
426 | static inline int is_revectored(int nr, struct revectored_struct *bitmap) | 408 | static inline int is_revectored(int nr, struct revectored_struct *bitmap) |
@@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i, | |||
518 | { | 500 | { |
519 | unsigned long __user *intr_ptr; | 501 | unsigned long __user *intr_ptr; |
520 | unsigned long segoffs; | 502 | unsigned long segoffs; |
503 | struct vm86 *vm86 = current->thread.vm86; | ||
521 | 504 | ||
522 | if (regs->pt.cs == BIOSSEG) | 505 | if (regs->pt.cs == BIOSSEG) |
523 | goto cannot_handle; | 506 | goto cannot_handle; |
524 | if (is_revectored(i, &KVM86->int_revectored)) | 507 | if (is_revectored(i, &vm86->int_revectored)) |
525 | goto cannot_handle; | 508 | goto cannot_handle; |
526 | if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) | 509 | if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored)) |
527 | goto cannot_handle; | 510 | goto cannot_handle; |
528 | intr_ptr = (unsigned long __user *) (i << 2); | 511 | intr_ptr = (unsigned long __user *) (i << 2); |
529 | if (get_user(segoffs, intr_ptr)) | 512 | if (get_user(segoffs, intr_ptr)) |
@@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i, | |||
542 | return; | 525 | return; |
543 | 526 | ||
544 | cannot_handle: | 527 | cannot_handle: |
545 | return_to_32bit(regs, VM86_INTx + (i << 8)); | 528 | save_v86_state(regs, VM86_INTx + (i << 8)); |
546 | } | 529 | } |
547 | 530 | ||
548 | int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) | 531 | int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) |
549 | { | 532 | { |
550 | if (VMPI.is_vm86pus) { | 533 | struct vm86 *vm86 = current->thread.vm86; |
534 | |||
535 | if (vm86->vm86plus.is_vm86pus) { | ||
551 | if ((trapno == 3) || (trapno == 1)) { | 536 | if ((trapno == 3) || (trapno == 1)) { |
552 | KVM86->regs32->ax = VM86_TRAP + (trapno << 8); | 537 | save_v86_state(regs, VM86_TRAP + (trapno << 8)); |
553 | /* setting this flag forces the code in entry_32.S to | ||
554 | the path where we call save_v86_state() and change | ||
555 | the stack pointer to KVM86->regs32 */ | ||
556 | set_thread_flag(TIF_NOTIFY_RESUME); | ||
557 | return 0; | 538 | return 0; |
558 | } | 539 | } |
559 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); | 540 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); |
@@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) | |||
574 | unsigned char __user *ssp; | 555 | unsigned char __user *ssp; |
575 | unsigned short ip, sp, orig_flags; | 556 | unsigned short ip, sp, orig_flags; |
576 | int data32, pref_done; | 557 | int data32, pref_done; |
558 | struct vm86plus_info_struct *vmpi = ¤t->thread.vm86->vm86plus; | ||
577 | 559 | ||
578 | #define CHECK_IF_IN_TRAP \ | 560 | #define CHECK_IF_IN_TRAP \ |
579 | if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ | 561 | if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \ |
580 | newflags |= X86_EFLAGS_TF | 562 | newflags |= X86_EFLAGS_TF |
581 | #define VM86_FAULT_RETURN do { \ | ||
582 | if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \ | ||
583 | return_to_32bit(regs, VM86_PICRETURN); \ | ||
584 | if (orig_flags & X86_EFLAGS_TF) \ | ||
585 | handle_vm86_trap(regs, 0, 1); \ | ||
586 | return; } while (0) | ||
587 | 563 | ||
588 | orig_flags = *(unsigned short *)®s->pt.flags; | 564 | orig_flags = *(unsigned short *)®s->pt.flags; |
589 | 565 | ||
@@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) | |||
622 | SP(regs) -= 2; | 598 | SP(regs) -= 2; |
623 | } | 599 | } |
624 | IP(regs) = ip; | 600 | IP(regs) = ip; |
625 | VM86_FAULT_RETURN; | 601 | goto vm86_fault_return; |
626 | 602 | ||
627 | /* popf */ | 603 | /* popf */ |
628 | case 0x9d: | 604 | case 0x9d: |
@@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) | |||
642 | else | 618 | else |
643 | set_vflags_short(newflags, regs); | 619 | set_vflags_short(newflags, regs); |
644 | 620 | ||
645 | VM86_FAULT_RETURN; | 621 | goto check_vip; |
646 | } | 622 | } |
647 | 623 | ||
648 | /* int xx */ | 624 | /* int xx */ |
649 | case 0xcd: { | 625 | case 0xcd: { |
650 | int intno = popb(csp, ip, simulate_sigsegv); | 626 | int intno = popb(csp, ip, simulate_sigsegv); |
651 | IP(regs) = ip; | 627 | IP(regs) = ip; |
652 | if (VMPI.vm86dbg_active) { | 628 | if (vmpi->vm86dbg_active) { |
653 | if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3]) | 629 | if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) { |
654 | return_to_32bit(regs, VM86_INTx + (intno << 8)); | 630 | save_v86_state(regs, VM86_INTx + (intno << 8)); |
631 | return; | ||
632 | } | ||
655 | } | 633 | } |
656 | do_int(regs, intno, ssp, sp); | 634 | do_int(regs, intno, ssp, sp); |
657 | return; | 635 | return; |
@@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) | |||
682 | } else { | 660 | } else { |
683 | set_vflags_short(newflags, regs); | 661 | set_vflags_short(newflags, regs); |
684 | } | 662 | } |
685 | VM86_FAULT_RETURN; | 663 | goto check_vip; |
686 | } | 664 | } |
687 | 665 | ||
688 | /* cli */ | 666 | /* cli */ |
689 | case 0xfa: | 667 | case 0xfa: |
690 | IP(regs) = ip; | 668 | IP(regs) = ip; |
691 | clear_IF(regs); | 669 | clear_IF(regs); |
692 | VM86_FAULT_RETURN; | 670 | goto vm86_fault_return; |
693 | 671 | ||
694 | /* sti */ | 672 | /* sti */ |
695 | /* | 673 | /* |
@@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) | |||
701 | case 0xfb: | 679 | case 0xfb: |
702 | IP(regs) = ip; | 680 | IP(regs) = ip; |
703 | set_IF(regs); | 681 | set_IF(regs); |
704 | VM86_FAULT_RETURN; | 682 | goto check_vip; |
705 | 683 | ||
706 | default: | 684 | default: |
707 | return_to_32bit(regs, VM86_UNKNOWN); | 685 | save_v86_state(regs, VM86_UNKNOWN); |
708 | } | 686 | } |
709 | 687 | ||
710 | return; | 688 | return; |
711 | 689 | ||
690 | check_vip: | ||
691 | if (VEFLAGS & X86_EFLAGS_VIP) { | ||
692 | save_v86_state(regs, VM86_STI); | ||
693 | return; | ||
694 | } | ||
695 | |||
696 | vm86_fault_return: | ||
697 | if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) { | ||
698 | save_v86_state(regs, VM86_PICRETURN); | ||
699 | return; | ||
700 | } | ||
701 | if (orig_flags & X86_EFLAGS_TF) | ||
702 | handle_vm86_trap(regs, 0, X86_TRAP_DB); | ||
703 | return; | ||
704 | |||
712 | simulate_sigsegv: | 705 | simulate_sigsegv: |
713 | /* FIXME: After a long discussion with Stas we finally | 706 | /* FIXME: After a long discussion with Stas we finally |
714 | * agreed, that this is wrong. Here we should | 707 | * agreed, that this is wrong. Here we should |
@@ -720,7 +713,7 @@ simulate_sigsegv: | |||
720 | * should be a mixture of the two, but how do we | 713 | * should be a mixture of the two, but how do we |
721 | * get the information? [KD] | 714 | * get the information? [KD] |
722 | */ | 715 | */ |
723 | return_to_32bit(regs, VM86_UNKNOWN); | 716 | save_v86_state(regs, VM86_UNKNOWN); |
724 | } | 717 | } |
725 | 718 | ||
726 | /* ---------------- vm86 special IRQ passing stuff ----------------- */ | 719 | /* ---------------- vm86 special IRQ passing stuff ----------------- */ |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9a3e342e3cda..8d9013c5e1ee 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1172,7 +1172,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) | |||
1172 | 1172 | ||
1173 | tsc_deadline = apic->lapic_timer.expired_tscdeadline; | 1173 | tsc_deadline = apic->lapic_timer.expired_tscdeadline; |
1174 | apic->lapic_timer.expired_tscdeadline = 0; | 1174 | apic->lapic_timer.expired_tscdeadline = 0; |
1175 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); | 1175 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); |
1176 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); | 1176 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); |
1177 | 1177 | ||
1178 | /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ | 1178 | /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ |
@@ -1240,7 +1240,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
1240 | local_irq_save(flags); | 1240 | local_irq_save(flags); |
1241 | 1241 | ||
1242 | now = apic->lapic_timer.timer.base->get_time(); | 1242 | now = apic->lapic_timer.timer.base->get_time(); |
1243 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); | 1243 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); |
1244 | if (likely(tscdeadline > guest_tsc)) { | 1244 | if (likely(tscdeadline > guest_tsc)) { |
1245 | ns = (tscdeadline - guest_tsc) * 1000000ULL; | 1245 | ns = (tscdeadline - guest_tsc) * 1000000ULL; |
1246 | do_div(ns, this_tsc_khz); | 1246 | do_div(ns, this_tsc_khz); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 74d825716f4f..fdb8cb63a6c0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1139,7 +1139,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | |||
1139 | { | 1139 | { |
1140 | u64 tsc; | 1140 | u64 tsc; |
1141 | 1141 | ||
1142 | tsc = svm_scale_tsc(vcpu, native_read_tsc()); | 1142 | tsc = svm_scale_tsc(vcpu, rdtsc()); |
1143 | 1143 | ||
1144 | return target_tsc - tsc; | 1144 | return target_tsc - tsc; |
1145 | } | 1145 | } |
@@ -3174,7 +3174,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3174 | switch (msr_info->index) { | 3174 | switch (msr_info->index) { |
3175 | case MSR_IA32_TSC: { | 3175 | case MSR_IA32_TSC: { |
3176 | msr_info->data = svm->vmcb->control.tsc_offset + | 3176 | msr_info->data = svm->vmcb->control.tsc_offset + |
3177 | svm_scale_tsc(vcpu, native_read_tsc()); | 3177 | svm_scale_tsc(vcpu, rdtsc()); |
3178 | 3178 | ||
3179 | break; | 3179 | break; |
3180 | } | 3180 | } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index da1590ea43fc..4a4eec30cc08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2236,7 +2236,7 @@ static u64 guest_read_tsc(void) | |||
2236 | { | 2236 | { |
2237 | u64 host_tsc, tsc_offset; | 2237 | u64 host_tsc, tsc_offset; |
2238 | 2238 | ||
2239 | rdtscll(host_tsc); | 2239 | host_tsc = rdtsc(); |
2240 | tsc_offset = vmcs_read64(TSC_OFFSET); | 2240 | tsc_offset = vmcs_read64(TSC_OFFSET); |
2241 | return host_tsc + tsc_offset; | 2241 | return host_tsc + tsc_offset; |
2242 | } | 2242 | } |
@@ -2317,7 +2317,7 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho | |||
2317 | 2317 | ||
2318 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | 2318 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) |
2319 | { | 2319 | { |
2320 | return target_tsc - native_read_tsc(); | 2320 | return target_tsc - rdtsc(); |
2321 | } | 2321 | } |
2322 | 2322 | ||
2323 | static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) | 2323 | static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4bbc2a1676c9..1e7e76e14e89 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1441,20 +1441,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); | |||
1441 | 1441 | ||
1442 | static cycle_t read_tsc(void) | 1442 | static cycle_t read_tsc(void) |
1443 | { | 1443 | { |
1444 | cycle_t ret; | 1444 | cycle_t ret = (cycle_t)rdtsc_ordered(); |
1445 | u64 last; | 1445 | u64 last = pvclock_gtod_data.clock.cycle_last; |
1446 | |||
1447 | /* | ||
1448 | * Empirically, a fence (of type that depends on the CPU) | ||
1449 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
1450 | * with respect to loads. The various CPU manuals are unclear | ||
1451 | * as to whether rdtsc can be reordered with later loads, | ||
1452 | * but no one has ever seen it happen. | ||
1453 | */ | ||
1454 | rdtsc_barrier(); | ||
1455 | ret = (cycle_t)vget_cycles(); | ||
1456 | |||
1457 | last = pvclock_gtod_data.clock.cycle_last; | ||
1458 | 1446 | ||
1459 | if (likely(ret >= last)) | 1447 | if (likely(ret >= last)) |
1460 | return ret; | 1448 | return ret; |
@@ -1643,7 +1631,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1643 | return 1; | 1631 | return 1; |
1644 | } | 1632 | } |
1645 | if (!use_master_clock) { | 1633 | if (!use_master_clock) { |
1646 | host_tsc = native_read_tsc(); | 1634 | host_tsc = rdtsc(); |
1647 | kernel_ns = get_kernel_ns(); | 1635 | kernel_ns = get_kernel_ns(); |
1648 | } | 1636 | } |
1649 | 1637 | ||
@@ -2620,7 +2608,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2620 | 2608 | ||
2621 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | 2609 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { |
2622 | s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : | 2610 | s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : |
2623 | native_read_tsc() - vcpu->arch.last_host_tsc; | 2611 | rdtsc() - vcpu->arch.last_host_tsc; |
2624 | if (tsc_delta < 0) | 2612 | if (tsc_delta < 0) |
2625 | mark_tsc_unstable("KVM discovered backwards TSC"); | 2613 | mark_tsc_unstable("KVM discovered backwards TSC"); |
2626 | if (check_tsc_unstable()) { | 2614 | if (check_tsc_unstable()) { |
@@ -2648,7 +2636,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2648 | { | 2636 | { |
2649 | kvm_x86_ops->vcpu_put(vcpu); | 2637 | kvm_x86_ops->vcpu_put(vcpu); |
2650 | kvm_put_guest_fpu(vcpu); | 2638 | kvm_put_guest_fpu(vcpu); |
2651 | vcpu->arch.last_host_tsc = native_read_tsc(); | 2639 | vcpu->arch.last_host_tsc = rdtsc(); |
2652 | } | 2640 | } |
2653 | 2641 | ||
2654 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2642 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
@@ -6387,7 +6375,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6387 | hw_breakpoint_restore(); | 6375 | hw_breakpoint_restore(); |
6388 | 6376 | ||
6389 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, | 6377 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, |
6390 | native_read_tsc()); | 6378 | rdtsc()); |
6391 | 6379 | ||
6392 | vcpu->mode = OUTSIDE_GUEST_MODE; | 6380 | vcpu->mode = OUTSIDE_GUEST_MODE; |
6393 | smp_wmb(); | 6381 | smp_wmb(); |
@@ -7196,7 +7184,7 @@ int kvm_arch_hardware_enable(void) | |||
7196 | if (ret != 0) | 7184 | if (ret != 0) |
7197 | return ret; | 7185 | return ret; |
7198 | 7186 | ||
7199 | local_tsc = native_read_tsc(); | 7187 | local_tsc = rdtsc(); |
7200 | stable = !check_tsc_unstable(); | 7188 | stable = !check_tsc_unstable(); |
7201 | list_for_each_entry(kvm, &vm_list, vm_list) { | 7189 | list_for_each_entry(kvm, &vm_list, vm_list) { |
7202 | kvm_for_each_vcpu(i, vcpu, kvm) { | 7190 | kvm_for_each_vcpu(i, vcpu, kvm) { |
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 39d6a3db0b96..e912b2f6d36e 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
21 | #include <asm/delay.h> | 21 | #include <asm/delay.h> |
22 | #include <asm/timer.h> | 22 | #include <asm/timer.h> |
23 | #include <asm/mwait.h> | ||
23 | 24 | ||
24 | #ifdef CONFIG_SMP | 25 | #ifdef CONFIG_SMP |
25 | # include <asm/smp.h> | 26 | # include <asm/smp.h> |
@@ -49,16 +50,14 @@ static void delay_loop(unsigned long loops) | |||
49 | /* TSC based delay: */ | 50 | /* TSC based delay: */ |
50 | static void delay_tsc(unsigned long __loops) | 51 | static void delay_tsc(unsigned long __loops) |
51 | { | 52 | { |
52 | u32 bclock, now, loops = __loops; | 53 | u64 bclock, now, loops = __loops; |
53 | int cpu; | 54 | int cpu; |
54 | 55 | ||
55 | preempt_disable(); | 56 | preempt_disable(); |
56 | cpu = smp_processor_id(); | 57 | cpu = smp_processor_id(); |
57 | rdtsc_barrier(); | 58 | bclock = rdtsc_ordered(); |
58 | rdtscl(bclock); | ||
59 | for (;;) { | 59 | for (;;) { |
60 | rdtsc_barrier(); | 60 | now = rdtsc_ordered(); |
61 | rdtscl(now); | ||
62 | if ((now - bclock) >= loops) | 61 | if ((now - bclock) >= loops) |
63 | break; | 62 | break; |
64 | 63 | ||
@@ -79,14 +78,51 @@ static void delay_tsc(unsigned long __loops) | |||
79 | if (unlikely(cpu != smp_processor_id())) { | 78 | if (unlikely(cpu != smp_processor_id())) { |
80 | loops -= (now - bclock); | 79 | loops -= (now - bclock); |
81 | cpu = smp_processor_id(); | 80 | cpu = smp_processor_id(); |
82 | rdtsc_barrier(); | 81 | bclock = rdtsc_ordered(); |
83 | rdtscl(bclock); | ||
84 | } | 82 | } |
85 | } | 83 | } |
86 | preempt_enable(); | 84 | preempt_enable(); |
87 | } | 85 | } |
88 | 86 | ||
89 | /* | 87 | /* |
88 | * On some AMD platforms, MWAITX has a configurable 32-bit timer, that | ||
89 | * counts with TSC frequency. The input value is the loop of the | ||
90 | * counter, it will exit when the timer expires. | ||
91 | */ | ||
92 | static void delay_mwaitx(unsigned long __loops) | ||
93 | { | ||
94 | u64 start, end, delay, loops = __loops; | ||
95 | |||
96 | start = rdtsc_ordered(); | ||
97 | |||
98 | for (;;) { | ||
99 | delay = min_t(u64, MWAITX_MAX_LOOPS, loops); | ||
100 | |||
101 | /* | ||
102 | * Use cpu_tss as a cacheline-aligned, seldomly | ||
103 | * accessed per-cpu variable as the monitor target. | ||
104 | */ | ||
105 | __monitorx(this_cpu_ptr(&cpu_tss), 0, 0); | ||
106 | |||
107 | /* | ||
108 | * AMD, like Intel, supports the EAX hint and EAX=0xf | ||
109 | * means, do not enter any deep C-state and we use it | ||
110 | * here in delay() to minimize wakeup latency. | ||
111 | */ | ||
112 | __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); | ||
113 | |||
114 | end = rdtsc_ordered(); | ||
115 | |||
116 | if (loops <= end - start) | ||
117 | break; | ||
118 | |||
119 | loops -= end - start; | ||
120 | |||
121 | start = end; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* | ||
90 | * Since we calibrate only once at boot, this | 126 | * Since we calibrate only once at boot, this |
91 | * function should be set once at boot and not changed | 127 | * function should be set once at boot and not changed |
92 | */ | 128 | */ |
@@ -94,13 +130,19 @@ static void (*delay_fn)(unsigned long) = delay_loop; | |||
94 | 130 | ||
95 | void use_tsc_delay(void) | 131 | void use_tsc_delay(void) |
96 | { | 132 | { |
97 | delay_fn = delay_tsc; | 133 | if (delay_fn == delay_loop) |
134 | delay_fn = delay_tsc; | ||
135 | } | ||
136 | |||
137 | void use_mwaitx_delay(void) | ||
138 | { | ||
139 | delay_fn = delay_mwaitx; | ||
98 | } | 140 | } |
99 | 141 | ||
100 | int read_current_timer(unsigned long *timer_val) | 142 | int read_current_timer(unsigned long *timer_val) |
101 | { | 143 | { |
102 | if (delay_fn == delay_tsc) { | 144 | if (delay_fn == delay_tsc) { |
103 | rdtscll(*timer_val); | 145 | *timer_val = rdtsc(); |
104 | return 0; | 146 | return 0; |
105 | } | 147 | } |
106 | return -1; | 148 | return -1; |
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 8300db71c2a6..8db26591d91a 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/stddef.h> | 20 | #include <linux/stddef.h> |
21 | 21 | ||
22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
23 | #include <asm/vm86.h> | ||
23 | 24 | ||
24 | #include "fpu_system.h" | 25 | #include "fpu_system.h" |
25 | #include "exception.h" | 26 | #include "exception.h" |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9dc909841739..eef44d9a3f77 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ | 20 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ |
21 | #include <asm/fixmap.h> /* VSYSCALL_ADDR */ | 21 | #include <asm/fixmap.h> /* VSYSCALL_ADDR */ |
22 | #include <asm/vsyscall.h> /* emulate_vsyscall */ | 22 | #include <asm/vsyscall.h> /* emulate_vsyscall */ |
23 | #include <asm/vm86.h> /* struct vm86 */ | ||
23 | 24 | ||
24 | #define CREATE_TRACE_POINTS | 25 | #define CREATE_TRACE_POINTS |
25 | #include <asm/trace/exceptions.h> | 26 | #include <asm/trace/exceptions.h> |
@@ -301,14 +302,16 @@ static inline void | |||
301 | check_v8086_mode(struct pt_regs *regs, unsigned long address, | 302 | check_v8086_mode(struct pt_regs *regs, unsigned long address, |
302 | struct task_struct *tsk) | 303 | struct task_struct *tsk) |
303 | { | 304 | { |
305 | #ifdef CONFIG_VM86 | ||
304 | unsigned long bit; | 306 | unsigned long bit; |
305 | 307 | ||
306 | if (!v8086_mode(regs)) | 308 | if (!v8086_mode(regs) || !tsk->thread.vm86) |
307 | return; | 309 | return; |
308 | 310 | ||
309 | bit = (address - 0xA0000) >> PAGE_SHIFT; | 311 | bit = (address - 0xA0000) >> PAGE_SHIFT; |
310 | if (bit < 32) | 312 | if (bit < 32) |
311 | tsk->thread.screen_bitmap |= 1 << bit; | 313 | tsk->thread.vm86->screen_bitmap |= 1 << bit; |
314 | #endif | ||
312 | } | 315 | } |
313 | 316 | ||
314 | static bool low_pfn(unsigned long pfn) | 317 | static bool low_pfn(unsigned long pfn) |
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index b9531d343134..755481f14d90 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h | |||
@@ -45,17 +45,4 @@ | |||
45 | #define read_barrier_depends() do { } while (0) | 45 | #define read_barrier_depends() do { } while (0) |
46 | #define smp_read_barrier_depends() do { } while (0) | 46 | #define smp_read_barrier_depends() do { } while (0) |
47 | 47 | ||
48 | /* | ||
49 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | ||
50 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | ||
51 | * code region. | ||
52 | * | ||
53 | * (Could use an alternative three way for this if there was one.) | ||
54 | */ | ||
55 | static inline void rdtsc_barrier(void) | ||
56 | { | ||
57 | alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, | ||
58 | "lfence", X86_FEATURE_LFENCE_RDTSC); | ||
59 | } | ||
60 | |||
61 | #endif | 48 | #endif |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 11d6fb4e8483..d9cfa452da9d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1215,11 +1215,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1215 | .read_msr = xen_read_msr_safe, | 1215 | .read_msr = xen_read_msr_safe, |
1216 | .write_msr = xen_write_msr_safe, | 1216 | .write_msr = xen_write_msr_safe, |
1217 | 1217 | ||
1218 | .read_tsc = native_read_tsc, | ||
1219 | .read_pmc = native_read_pmc, | 1218 | .read_pmc = native_read_pmc, |
1220 | 1219 | ||
1221 | .read_tscp = native_read_tscp, | ||
1222 | |||
1223 | .iret = xen_iret, | 1220 | .iret = xen_iret, |
1224 | #ifdef CONFIG_X86_64 | 1221 | #ifdef CONFIG_X86_64 |
1225 | .usergs_sysret32 = xen_sysret32, | 1222 | .usergs_sysret32 = xen_sysret32, |
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fcb929ec5304..7898de054f4e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -766,7 +766,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) | |||
766 | local_irq_save(flags); | 766 | local_irq_save(flags); |
767 | rdmsrl(MSR_IA32_APERF, aperf); | 767 | rdmsrl(MSR_IA32_APERF, aperf); |
768 | rdmsrl(MSR_IA32_MPERF, mperf); | 768 | rdmsrl(MSR_IA32_MPERF, mperf); |
769 | tsc = native_read_tsc(); | 769 | tsc = rdtsc(); |
770 | local_irq_restore(flags); | 770 | local_irq_restore(flags); |
771 | 771 | ||
772 | cpu->last_sample_time = cpu->sample.time; | 772 | cpu->last_sample_time = cpu->sample.time; |
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index e853a2134680..4a2a9e370be7 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c | |||
@@ -149,9 +149,9 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |||
149 | 149 | ||
150 | for(i = 0; i < 50; i++) { | 150 | for(i = 0; i < 50; i++) { |
151 | local_irq_save(flags); | 151 | local_irq_save(flags); |
152 | rdtscl(t1); | 152 | t1 = rdtsc(); |
153 | for (t = 0; t < 50; t++) gameport_read(gameport); | 153 | for (t = 0; t < 50; t++) gameport_read(gameport); |
154 | rdtscl(t2); | 154 | t2 = rdtsc(); |
155 | local_irq_restore(flags); | 155 | local_irq_restore(flags); |
156 | udelay(i * 10); | 156 | udelay(i * 10); |
157 | if (t2 - t1 < tx) tx = t2 - t1; | 157 | if (t2 - t1 < tx) tx = t2 - t1; |
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 4284080e481d..6f8b084e13d0 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c | |||
@@ -143,7 +143,7 @@ struct analog_port { | |||
143 | 143 | ||
144 | #include <linux/i8253.h> | 144 | #include <linux/i8253.h> |
145 | 145 | ||
146 | #define GET_TIME(x) do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0) | 146 | #define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0) |
147 | #define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) | 147 | #define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) |
148 | #define TIME_NAME (cpu_has_tsc?"TSC":"PIT") | 148 | #define TIME_NAME (cpu_has_tsc?"TSC":"PIT") |
149 | static unsigned int get_time_pit(void) | 149 | static unsigned int get_time_pit(void) |
@@ -160,7 +160,7 @@ static unsigned int get_time_pit(void) | |||
160 | return count; | 160 | return count; |
161 | } | 161 | } |
162 | #elif defined(__x86_64__) | 162 | #elif defined(__x86_64__) |
163 | #define GET_TIME(x) rdtscl(x) | 163 | #define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) |
164 | #define DELTA(x,y) ((y)-(x)) | 164 | #define DELTA(x,y) ((y)-(x)) |
165 | #define TIME_NAME "TSC" | 165 | #define TIME_NAME "TSC" |
166 | #elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) | 166 | #elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) |
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 83c7cce0d172..72c9f1f352b4 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c | |||
@@ -638,7 +638,7 @@ static int receive(struct net_device *dev, int cnt) | |||
638 | #define GETTICK(x) \ | 638 | #define GETTICK(x) \ |
639 | ({ \ | 639 | ({ \ |
640 | if (cpu_has_tsc) \ | 640 | if (cpu_has_tsc) \ |
641 | rdtscl(x); \ | 641 | x = (unsigned int)rdtsc(); \ |
642 | }) | 642 | }) |
643 | #else /* __i386__ */ | 643 | #else /* __i386__ */ |
644 | #define GETTICK(x) | 644 | #define GETTICK(x) |
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index f35ed53adaac..d4cda5e9600e 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c | |||
@@ -1924,6 +1924,9 @@ static void adpt_alpha_info(sysInfo_S* si) | |||
1924 | #endif | 1924 | #endif |
1925 | 1925 | ||
1926 | #if defined __i386__ | 1926 | #if defined __i386__ |
1927 | |||
1928 | #include <uapi/asm/vm86.h> | ||
1929 | |||
1927 | static void adpt_i386_info(sysInfo_S* si) | 1930 | static void adpt_i386_info(sysInfo_S* si) |
1928 | { | 1931 | { |
1929 | // This is all the info we need for now | 1932 | // This is all the info we need for now |
diff --git a/drivers/staging/media/lirc/lirc_serial.c b/drivers/staging/media/lirc/lirc_serial.c index dc7984455c3a..465796a686c4 100644 --- a/drivers/staging/media/lirc/lirc_serial.c +++ b/drivers/staging/media/lirc/lirc_serial.c | |||
@@ -327,9 +327,6 @@ static void safe_udelay(unsigned long usecs) | |||
327 | * time | 327 | * time |
328 | */ | 328 | */ |
329 | 329 | ||
330 | /* So send_pulse can quickly convert microseconds to clocks */ | ||
331 | static unsigned long conv_us_to_clocks; | ||
332 | |||
333 | static int init_timing_params(unsigned int new_duty_cycle, | 330 | static int init_timing_params(unsigned int new_duty_cycle, |
334 | unsigned int new_freq) | 331 | unsigned int new_freq) |
335 | { | 332 | { |
@@ -344,7 +341,6 @@ static int init_timing_params(unsigned int new_duty_cycle, | |||
344 | /* How many clocks in a microsecond?, avoiding long long divide */ | 341 | /* How many clocks in a microsecond?, avoiding long long divide */ |
345 | work = loops_per_sec; | 342 | work = loops_per_sec; |
346 | work *= 4295; /* 4295 = 2^32 / 1e6 */ | 343 | work *= 4295; /* 4295 = 2^32 / 1e6 */ |
347 | conv_us_to_clocks = work >> 32; | ||
348 | 344 | ||
349 | /* | 345 | /* |
350 | * Carrier period in clocks, approach good up to 32GHz clock, | 346 | * Carrier period in clocks, approach good up to 32GHz clock, |
@@ -357,10 +353,9 @@ static int init_timing_params(unsigned int new_duty_cycle, | |||
357 | pulse_width = period * duty_cycle / 100; | 353 | pulse_width = period * duty_cycle / 100; |
358 | space_width = period - pulse_width; | 354 | space_width = period - pulse_width; |
359 | dprintk("in init_timing_params, freq=%d, duty_cycle=%d, " | 355 | dprintk("in init_timing_params, freq=%d, duty_cycle=%d, " |
360 | "clk/jiffy=%ld, pulse=%ld, space=%ld, " | 356 | "clk/jiffy=%ld, pulse=%ld, space=%ld\n", |
361 | "conv_us_to_clocks=%ld\n", | ||
362 | freq, duty_cycle, __this_cpu_read(cpu_info.loops_per_jiffy), | 357 | freq, duty_cycle, __this_cpu_read(cpu_info.loops_per_jiffy), |
363 | pulse_width, space_width, conv_us_to_clocks); | 358 | pulse_width, space_width); |
364 | return 0; | 359 | return 0; |
365 | } | 360 | } |
366 | #else /* ! USE_RDTSC */ | 361 | #else /* ! USE_RDTSC */ |
@@ -431,63 +426,14 @@ static long send_pulse_irdeo(unsigned long length) | |||
431 | return ret; | 426 | return ret; |
432 | } | 427 | } |
433 | 428 | ||
434 | #ifdef USE_RDTSC | ||
435 | /* Version that uses Pentium rdtsc instruction to measure clocks */ | ||
436 | |||
437 | /* | ||
438 | * This version does sub-microsecond timing using rdtsc instruction, | ||
439 | * and does away with the fudged LIRC_SERIAL_TRANSMITTER_LATENCY | ||
440 | * Implicitly i586 architecture... - Steve | ||
441 | */ | ||
442 | |||
443 | static long send_pulse_homebrew_softcarrier(unsigned long length) | ||
444 | { | ||
445 | int flag; | ||
446 | unsigned long target, start, now; | ||
447 | |||
448 | /* Get going quick as we can */ | ||
449 | rdtscl(start); | ||
450 | on(); | ||
451 | /* Convert length from microseconds to clocks */ | ||
452 | length *= conv_us_to_clocks; | ||
453 | /* And loop till time is up - flipping at right intervals */ | ||
454 | now = start; | ||
455 | target = pulse_width; | ||
456 | flag = 1; | ||
457 | /* | ||
458 | * FIXME: This looks like a hard busy wait, without even an occasional, | ||
459 | * polite, cpu_relax() call. There's got to be a better way? | ||
460 | * | ||
461 | * The i2c code has the result of a lot of bit-banging work, I wonder if | ||
462 | * there's something there which could be helpful here. | ||
463 | */ | ||
464 | while ((now - start) < length) { | ||
465 | /* Delay till flip time */ | ||
466 | do { | ||
467 | rdtscl(now); | ||
468 | } while ((now - start) < target); | ||
469 | |||
470 | /* flip */ | ||
471 | if (flag) { | ||
472 | rdtscl(now); | ||
473 | off(); | ||
474 | target += space_width; | ||
475 | } else { | ||
476 | rdtscl(now); on(); | ||
477 | target += pulse_width; | ||
478 | } | ||
479 | flag = !flag; | ||
480 | } | ||
481 | rdtscl(now); | ||
482 | return ((now - start) - length) / conv_us_to_clocks; | ||
483 | } | ||
484 | #else /* ! USE_RDTSC */ | ||
485 | /* Version using udelay() */ | 429 | /* Version using udelay() */ |
486 | 430 | ||
487 | /* | 431 | /* |
488 | * here we use fixed point arithmetic, with 8 | 432 | * here we use fixed point arithmetic, with 8 |
489 | * fractional bits. that gets us within 0.1% or so of the right average | 433 | * fractional bits. that gets us within 0.1% or so of the right average |
490 | * frequency, albeit with some jitter in pulse length - Steve | 434 | * frequency, albeit with some jitter in pulse length - Steve |
435 | * | ||
436 | * This should use ndelay instead. | ||
491 | */ | 437 | */ |
492 | 438 | ||
493 | /* To match 8 fractional bits used for pulse/space length */ | 439 | /* To match 8 fractional bits used for pulse/space length */ |
@@ -520,7 +466,6 @@ static long send_pulse_homebrew_softcarrier(unsigned long length) | |||
520 | } | 466 | } |
521 | return (actual-length) >> 8; | 467 | return (actual-length) >> 8; |
522 | } | 468 | } |
523 | #endif /* USE_RDTSC */ | ||
524 | 469 | ||
525 | static long send_pulse_homebrew(unsigned long length) | 470 | static long send_pulse_homebrew(unsigned long length) |
526 | { | 471 | { |
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 5820e8513927..2ac0c704bcb8 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c | |||
@@ -340,7 +340,7 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, | |||
340 | 340 | ||
341 | /* check result for the last window */ | 341 | /* check result for the last window */ |
342 | msr_now = pkg_state_counter(); | 342 | msr_now = pkg_state_counter(); |
343 | rdtscll(tsc_now); | 343 | tsc_now = rdtsc(); |
344 | 344 | ||
345 | /* calculate pkg cstate vs tsc ratio */ | 345 | /* calculate pkg cstate vs tsc ratio */ |
346 | if (!msr_last || !tsc_last) | 346 | if (!msr_last || !tsc_last) |
@@ -482,7 +482,7 @@ static void poll_pkg_cstate(struct work_struct *dummy) | |||
482 | u64 val64; | 482 | u64 val64; |
483 | 483 | ||
484 | msr_now = pkg_state_counter(); | 484 | msr_now = pkg_state_counter(); |
485 | rdtscll(tsc_now); | 485 | tsc_now = rdtsc(); |
486 | jiffies_now = jiffies; | 486 | jiffies_now = jiffies; |
487 | 487 | ||
488 | /* calculate pkg cstate vs tsc ratio */ | 488 | /* calculate pkg cstate vs tsc ratio */ |
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index b96bd299966f..008fc67d0d96 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h | |||
@@ -49,13 +49,28 @@ static inline void exception_exit(enum ctx_state prev_ctx) | |||
49 | } | 49 | } |
50 | } | 50 | } |
51 | 51 | ||
52 | |||
53 | /** | ||
54 | * ct_state() - return the current context tracking state if known | ||
55 | * | ||
56 | * Returns the current cpu's context tracking state if context tracking | ||
57 | * is enabled. If context tracking is disabled, returns | ||
58 | * CONTEXT_DISABLED. This should be used primarily for debugging. | ||
59 | */ | ||
60 | static inline enum ctx_state ct_state(void) | ||
61 | { | ||
62 | return context_tracking_is_enabled() ? | ||
63 | this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; | ||
64 | } | ||
52 | #else | 65 | #else |
53 | static inline void user_enter(void) { } | 66 | static inline void user_enter(void) { } |
54 | static inline void user_exit(void) { } | 67 | static inline void user_exit(void) { } |
55 | static inline enum ctx_state exception_enter(void) { return 0; } | 68 | static inline enum ctx_state exception_enter(void) { return 0; } |
56 | static inline void exception_exit(enum ctx_state prev_ctx) { } | 69 | static inline void exception_exit(enum ctx_state prev_ctx) { } |
70 | static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } | ||
57 | #endif /* !CONFIG_CONTEXT_TRACKING */ | 71 | #endif /* !CONFIG_CONTEXT_TRACKING */ |
58 | 72 | ||
73 | #define CT_WARN_ON(cond) WARN_ON(context_tracking_is_enabled() && (cond)) | ||
59 | 74 | ||
60 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | 75 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE |
61 | extern void context_tracking_init(void); | 76 | extern void context_tracking_init(void); |
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 678ecdf90cf6..ee956c528fab 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h | |||
@@ -14,6 +14,7 @@ struct context_tracking { | |||
14 | bool active; | 14 | bool active; |
15 | int recursion; | 15 | int recursion; |
16 | enum ctx_state { | 16 | enum ctx_state { |
17 | CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ | ||
17 | CONTEXT_KERNEL = 0, | 18 | CONTEXT_KERNEL = 0, |
18 | CONTEXT_USER, | 19 | CONTEXT_USER, |
19 | CONTEXT_GUEST, | 20 | CONTEXT_GUEST, |
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 16c5ed5a627c..47dd0cebd204 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h | |||
@@ -286,7 +286,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |||
286 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n | 286 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n |
287 | */ | 287 | */ |
288 | 288 | ||
289 | static inline raw_spinlock_t *spinlock_check(spinlock_t *lock) | 289 | static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock) |
290 | { | 290 | { |
291 | return &lock->rlock; | 291 | return &lock->rlock; |
292 | } | 292 | } |
@@ -297,17 +297,17 @@ do { \ | |||
297 | raw_spin_lock_init(&(_lock)->rlock); \ | 297 | raw_spin_lock_init(&(_lock)->rlock); \ |
298 | } while (0) | 298 | } while (0) |
299 | 299 | ||
300 | static inline void spin_lock(spinlock_t *lock) | 300 | static __always_inline void spin_lock(spinlock_t *lock) |
301 | { | 301 | { |
302 | raw_spin_lock(&lock->rlock); | 302 | raw_spin_lock(&lock->rlock); |
303 | } | 303 | } |
304 | 304 | ||
305 | static inline void spin_lock_bh(spinlock_t *lock) | 305 | static __always_inline void spin_lock_bh(spinlock_t *lock) |
306 | { | 306 | { |
307 | raw_spin_lock_bh(&lock->rlock); | 307 | raw_spin_lock_bh(&lock->rlock); |
308 | } | 308 | } |
309 | 309 | ||
310 | static inline int spin_trylock(spinlock_t *lock) | 310 | static __always_inline int spin_trylock(spinlock_t *lock) |
311 | { | 311 | { |
312 | return raw_spin_trylock(&lock->rlock); | 312 | return raw_spin_trylock(&lock->rlock); |
313 | } | 313 | } |
@@ -327,7 +327,7 @@ do { \ | |||
327 | raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ | 327 | raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ |
328 | } while (0) | 328 | } while (0) |
329 | 329 | ||
330 | static inline void spin_lock_irq(spinlock_t *lock) | 330 | static __always_inline void spin_lock_irq(spinlock_t *lock) |
331 | { | 331 | { |
332 | raw_spin_lock_irq(&lock->rlock); | 332 | raw_spin_lock_irq(&lock->rlock); |
333 | } | 333 | } |
@@ -342,32 +342,32 @@ do { \ | |||
342 | raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ | 342 | raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ |
343 | } while (0) | 343 | } while (0) |
344 | 344 | ||
345 | static inline void spin_unlock(spinlock_t *lock) | 345 | static __always_inline void spin_unlock(spinlock_t *lock) |
346 | { | 346 | { |
347 | raw_spin_unlock(&lock->rlock); | 347 | raw_spin_unlock(&lock->rlock); |
348 | } | 348 | } |
349 | 349 | ||
350 | static inline void spin_unlock_bh(spinlock_t *lock) | 350 | static __always_inline void spin_unlock_bh(spinlock_t *lock) |
351 | { | 351 | { |
352 | raw_spin_unlock_bh(&lock->rlock); | 352 | raw_spin_unlock_bh(&lock->rlock); |
353 | } | 353 | } |
354 | 354 | ||
355 | static inline void spin_unlock_irq(spinlock_t *lock) | 355 | static __always_inline void spin_unlock_irq(spinlock_t *lock) |
356 | { | 356 | { |
357 | raw_spin_unlock_irq(&lock->rlock); | 357 | raw_spin_unlock_irq(&lock->rlock); |
358 | } | 358 | } |
359 | 359 | ||
360 | static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) | 360 | static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) |
361 | { | 361 | { |
362 | raw_spin_unlock_irqrestore(&lock->rlock, flags); | 362 | raw_spin_unlock_irqrestore(&lock->rlock, flags); |
363 | } | 363 | } |
364 | 364 | ||
365 | static inline int spin_trylock_bh(spinlock_t *lock) | 365 | static __always_inline int spin_trylock_bh(spinlock_t *lock) |
366 | { | 366 | { |
367 | return raw_spin_trylock_bh(&lock->rlock); | 367 | return raw_spin_trylock_bh(&lock->rlock); |
368 | } | 368 | } |
369 | 369 | ||
370 | static inline int spin_trylock_irq(spinlock_t *lock) | 370 | static __always_inline int spin_trylock_irq(spinlock_t *lock) |
371 | { | 371 | { |
372 | return raw_spin_trylock_irq(&lock->rlock); | 372 | return raw_spin_trylock_irq(&lock->rlock); |
373 | } | 373 | } |
@@ -377,22 +377,22 @@ static inline int spin_trylock_irq(spinlock_t *lock) | |||
377 | raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ | 377 | raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ |
378 | }) | 378 | }) |
379 | 379 | ||
380 | static inline void spin_unlock_wait(spinlock_t *lock) | 380 | static __always_inline void spin_unlock_wait(spinlock_t *lock) |
381 | { | 381 | { |
382 | raw_spin_unlock_wait(&lock->rlock); | 382 | raw_spin_unlock_wait(&lock->rlock); |
383 | } | 383 | } |
384 | 384 | ||
385 | static inline int spin_is_locked(spinlock_t *lock) | 385 | static __always_inline int spin_is_locked(spinlock_t *lock) |
386 | { | 386 | { |
387 | return raw_spin_is_locked(&lock->rlock); | 387 | return raw_spin_is_locked(&lock->rlock); |
388 | } | 388 | } |
389 | 389 | ||
390 | static inline int spin_is_contended(spinlock_t *lock) | 390 | static __always_inline int spin_is_contended(spinlock_t *lock) |
391 | { | 391 | { |
392 | return raw_spin_is_contended(&lock->rlock); | 392 | return raw_spin_is_contended(&lock->rlock); |
393 | } | 393 | } |
394 | 394 | ||
395 | static inline int spin_can_lock(spinlock_t *lock) | 395 | static __always_inline int spin_can_lock(spinlock_t *lock) |
396 | { | 396 | { |
397 | return raw_spin_can_lock(&lock->rlock); | 397 | return raw_spin_can_lock(&lock->rlock); |
398 | } | 398 | } |
diff --git a/kernel/notifier.c b/kernel/notifier.c index ae9fc7cc360e..fd2c9acbcc19 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
@@ -544,6 +544,8 @@ int notrace notify_die(enum die_val val, const char *str, | |||
544 | .signr = sig, | 544 | .signr = sig, |
545 | 545 | ||
546 | }; | 546 | }; |
547 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | ||
548 | "notify_die called but RCU thinks we're quiescent"); | ||
547 | return atomic_notifier_call_chain(&die_chain, val, &args); | 549 | return atomic_notifier_call_chain(&die_chain, val, &args); |
548 | } | 550 | } |
549 | NOKPROBE_SYMBOL(notify_die); | 551 | NOKPROBE_SYMBOL(notify_die); |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7995ef5868d8..ca7d84f438f1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -140,6 +140,7 @@ cond_syscall(sys_sgetmask); | |||
140 | cond_syscall(sys_ssetmask); | 140 | cond_syscall(sys_ssetmask); |
141 | cond_syscall(sys_vm86old); | 141 | cond_syscall(sys_vm86old); |
142 | cond_syscall(sys_vm86); | 142 | cond_syscall(sys_vm86); |
143 | cond_syscall(sys_modify_ldt); | ||
143 | cond_syscall(sys_ipc); | 144 | cond_syscall(sys_ipc); |
144 | cond_syscall(compat_sys_ipc); | 145 | cond_syscall(compat_sys_ipc); |
145 | cond_syscall(compat_sys_sysctl); | 146 | cond_syscall(compat_sys_sysctl); |
diff --git a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c index 5224ee5b392d..6ff8383f2941 100644 --- a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c +++ b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c | |||
@@ -81,11 +81,11 @@ static int __init cpufreq_test_tsc(void) | |||
81 | 81 | ||
82 | printk(KERN_DEBUG "start--> \n"); | 82 | printk(KERN_DEBUG "start--> \n"); |
83 | then = read_pmtmr(); | 83 | then = read_pmtmr(); |
84 | rdtscll(then_tsc); | 84 | then_tsc = rdtsc(); |
85 | for (i=0;i<20;i++) { | 85 | for (i=0;i<20;i++) { |
86 | mdelay(100); | 86 | mdelay(100); |
87 | now = read_pmtmr(); | 87 | now = read_pmtmr(); |
88 | rdtscll(now_tsc); | 88 | now_tsc = rdtsc(); |
89 | diff = (now - then) & 0xFFFFFF; | 89 | diff = (now - then) & 0xFFFFFF; |
90 | diff_tsc = now_tsc - then_tsc; | 90 | diff_tsc = now_tsc - then_tsc; |
91 | printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc); | 91 | printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc); |
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index caa60d56d7d1..29089b24d18b 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile | |||
@@ -4,8 +4,8 @@ include ../lib.mk | |||
4 | 4 | ||
5 | .PHONY: all all_32 all_64 warn_32bit_failure clean | 5 | .PHONY: all all_32 all_64 warn_32bit_failure clean |
6 | 6 | ||
7 | TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs | 7 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt |
8 | TARGETS_C_32BIT_ONLY := entry_from_vm86 | 8 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn |
9 | 9 | ||
10 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) | 10 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) |
11 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) | 11 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) |
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 5c38a187677b..9a43a59a9bb4 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c | |||
@@ -28,6 +28,55 @@ | |||
28 | static unsigned long load_addr = 0x10000; | 28 | static unsigned long load_addr = 0x10000; |
29 | static int nerrs = 0; | 29 | static int nerrs = 0; |
30 | 30 | ||
31 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | ||
32 | int flags) | ||
33 | { | ||
34 | struct sigaction sa; | ||
35 | memset(&sa, 0, sizeof(sa)); | ||
36 | sa.sa_sigaction = handler; | ||
37 | sa.sa_flags = SA_SIGINFO | flags; | ||
38 | sigemptyset(&sa.sa_mask); | ||
39 | if (sigaction(sig, &sa, 0)) | ||
40 | err(1, "sigaction"); | ||
41 | } | ||
42 | |||
43 | static void clearhandler(int sig) | ||
44 | { | ||
45 | struct sigaction sa; | ||
46 | memset(&sa, 0, sizeof(sa)); | ||
47 | sa.sa_handler = SIG_DFL; | ||
48 | sigemptyset(&sa.sa_mask); | ||
49 | if (sigaction(sig, &sa, 0)) | ||
50 | err(1, "sigaction"); | ||
51 | } | ||
52 | |||
53 | static sig_atomic_t got_signal; | ||
54 | |||
55 | static void sighandler(int sig, siginfo_t *info, void *ctx_void) | ||
56 | { | ||
57 | ucontext_t *ctx = (ucontext_t*)ctx_void; | ||
58 | |||
59 | if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM || | ||
60 | (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) { | ||
61 | printf("[FAIL]\tSignal frame should not reflect vm86 mode\n"); | ||
62 | nerrs++; | ||
63 | } | ||
64 | |||
65 | const char *signame; | ||
66 | if (sig == SIGSEGV) | ||
67 | signame = "SIGSEGV"; | ||
68 | else if (sig == SIGILL) | ||
69 | signame = "SIGILL"; | ||
70 | else | ||
71 | signame = "unexpected signal"; | ||
72 | |||
73 | printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame, | ||
74 | (unsigned long)ctx->uc_mcontext.gregs[REG_EFL], | ||
75 | (unsigned short)ctx->uc_mcontext.gregs[REG_CS]); | ||
76 | |||
77 | got_signal = 1; | ||
78 | } | ||
79 | |||
31 | asm ( | 80 | asm ( |
32 | ".pushsection .rodata\n\t" | 81 | ".pushsection .rodata\n\t" |
33 | ".type vmcode_bound, @object\n\t" | 82 | ".type vmcode_bound, @object\n\t" |
@@ -38,6 +87,14 @@ asm ( | |||
38 | "int3\n\t" | 87 | "int3\n\t" |
39 | "vmcode_sysenter:\n\t" | 88 | "vmcode_sysenter:\n\t" |
40 | "sysenter\n\t" | 89 | "sysenter\n\t" |
90 | "vmcode_syscall:\n\t" | ||
91 | "syscall\n\t" | ||
92 | "vmcode_sti:\n\t" | ||
93 | "sti\n\t" | ||
94 | "vmcode_int3:\n\t" | ||
95 | "int3\n\t" | ||
96 | "vmcode_int80:\n\t" | ||
97 | "int $0x80\n\t" | ||
41 | ".size vmcode, . - vmcode\n\t" | 98 | ".size vmcode, . - vmcode\n\t" |
42 | "end_vmcode:\n\t" | 99 | "end_vmcode:\n\t" |
43 | ".code32\n\t" | 100 | ".code32\n\t" |
@@ -45,9 +102,12 @@ asm ( | |||
45 | ); | 102 | ); |
46 | 103 | ||
47 | extern unsigned char vmcode[], end_vmcode[]; | 104 | extern unsigned char vmcode[], end_vmcode[]; |
48 | extern unsigned char vmcode_bound[], vmcode_sysenter[]; | 105 | extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], |
106 | vmcode_sti[], vmcode_int3[], vmcode_int80[]; | ||
49 | 107 | ||
50 | static void do_test(struct vm86plus_struct *v86, unsigned long eip, | 108 | /* Returns false if the test was skipped. */ |
109 | static bool do_test(struct vm86plus_struct *v86, unsigned long eip, | ||
110 | unsigned int rettype, unsigned int retarg, | ||
51 | const char *text) | 111 | const char *text) |
52 | { | 112 | { |
53 | long ret; | 113 | long ret; |
@@ -58,7 +118,7 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, | |||
58 | 118 | ||
59 | if (ret == -1 && errno == ENOSYS) { | 119 | if (ret == -1 && errno == ENOSYS) { |
60 | printf("[SKIP]\tvm86 not supported\n"); | 120 | printf("[SKIP]\tvm86 not supported\n"); |
61 | return; | 121 | return false; |
62 | } | 122 | } |
63 | 123 | ||
64 | if (VM86_TYPE(ret) == VM86_INTx) { | 124 | if (VM86_TYPE(ret) == VM86_INTx) { |
@@ -73,13 +133,30 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, | |||
73 | else | 133 | else |
74 | sprintf(trapname, "%d", trapno); | 134 | sprintf(trapname, "%d", trapno); |
75 | 135 | ||
76 | printf("[OK]\tExited vm86 mode due to #%s\n", trapname); | 136 | printf("[INFO]\tExited vm86 mode due to #%s\n", trapname); |
77 | } else if (VM86_TYPE(ret) == VM86_UNKNOWN) { | 137 | } else if (VM86_TYPE(ret) == VM86_UNKNOWN) { |
78 | printf("[OK]\tExited vm86 mode due to unhandled GP fault\n"); | 138 | printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n"); |
139 | } else if (VM86_TYPE(ret) == VM86_TRAP) { | ||
140 | printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n", | ||
141 | VM86_ARG(ret)); | ||
142 | } else if (VM86_TYPE(ret) == VM86_SIGNAL) { | ||
143 | printf("[INFO]\tExited vm86 mode due to a signal\n"); | ||
144 | } else if (VM86_TYPE(ret) == VM86_STI) { | ||
145 | printf("[INFO]\tExited vm86 mode due to STI\n"); | ||
79 | } else { | 146 | } else { |
80 | printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n", | 147 | printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n", |
81 | VM86_TYPE(ret), VM86_ARG(ret)); | 148 | VM86_TYPE(ret), VM86_ARG(ret)); |
82 | } | 149 | } |
150 | |||
151 | if (rettype == -1 || | ||
152 | (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { | ||
153 | printf("[OK]\tReturned correctly\n"); | ||
154 | } else { | ||
155 | printf("[FAIL]\tIncorrect return reason\n"); | ||
156 | nerrs++; | ||
157 | } | ||
158 | |||
159 | return true; | ||
83 | } | 160 | } |
84 | 161 | ||
85 | int main(void) | 162 | int main(void) |
@@ -105,10 +182,52 @@ int main(void) | |||
105 | assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ | 182 | assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ |
106 | 183 | ||
107 | /* #BR -- should deliver SIG??? */ | 184 | /* #BR -- should deliver SIG??? */ |
108 | do_test(&v86, vmcode_bound - vmcode, "#BR"); | 185 | do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR"); |
109 | 186 | ||
110 | /* SYSENTER -- should cause #GP or #UD depending on CPU */ | 187 | /* |
111 | do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER"); | 188 | * SYSENTER -- should cause #GP or #UD depending on CPU. |
189 | * Expected return type -1 means that we shouldn't validate | ||
190 | * the vm86 return value. This will avoid problems on non-SEP | ||
191 | * CPUs. | ||
192 | */ | ||
193 | sethandler(SIGILL, sighandler, 0); | ||
194 | do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER"); | ||
195 | clearhandler(SIGILL); | ||
196 | |||
197 | /* | ||
198 | * SYSCALL would be a disaster in VM86 mode. Fortunately, | ||
199 | * there is no kernel that both enables SYSCALL and sets | ||
200 | * EFER.SCE, so it's #UD on all systems. But vm86 is | ||
201 | * buggy (or has a "feature"), so the SIGILL will actually | ||
202 | * be delivered. | ||
203 | */ | ||
204 | sethandler(SIGILL, sighandler, 0); | ||
205 | do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL"); | ||
206 | clearhandler(SIGILL); | ||
207 | |||
208 | /* STI with VIP set */ | ||
209 | v86.regs.eflags |= X86_EFLAGS_VIP; | ||
210 | v86.regs.eflags &= ~X86_EFLAGS_IF; | ||
211 | do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); | ||
212 | |||
213 | /* INT3 -- should cause #BP */ | ||
214 | do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); | ||
215 | |||
216 | /* INT80 -- should exit with "INTx 0x80" */ | ||
217 | v86.regs.eax = (unsigned int)-1; | ||
218 | do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80"); | ||
219 | |||
220 | /* Execute a null pointer */ | ||
221 | v86.regs.cs = 0; | ||
222 | v86.regs.ss = 0; | ||
223 | sethandler(SIGSEGV, sighandler, 0); | ||
224 | got_signal = 0; | ||
225 | if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") && | ||
226 | !got_signal) { | ||
227 | printf("[FAIL]\tDid not receive SIGSEGV\n"); | ||
228 | nerrs++; | ||
229 | } | ||
230 | clearhandler(SIGSEGV); | ||
112 | 231 | ||
113 | return (nerrs == 0 ? 0 : 1); | 232 | return (nerrs == 0 ? 0 : 1); |
114 | } | 233 | } |
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c new file mode 100644 index 000000000000..31a3035cd4eb --- /dev/null +++ b/tools/testing/selftests/x86/ldt_gdt.c | |||
@@ -0,0 +1,576 @@ | |||
1 | /* | ||
2 | * ldt_gdt.c - Test cases for LDT and GDT access | ||
3 | * Copyright (c) 2015 Andrew Lutomirski | ||
4 | */ | ||
5 | |||
6 | #define _GNU_SOURCE | ||
7 | #include <err.h> | ||
8 | #include <stdio.h> | ||
9 | #include <stdint.h> | ||
10 | #include <signal.h> | ||
11 | #include <setjmp.h> | ||
12 | #include <stdlib.h> | ||
13 | #include <string.h> | ||
14 | #include <errno.h> | ||
15 | #include <unistd.h> | ||
16 | #include <sys/syscall.h> | ||
17 | #include <asm/ldt.h> | ||
18 | #include <sys/types.h> | ||
19 | #include <sys/wait.h> | ||
20 | #include <stdbool.h> | ||
21 | #include <pthread.h> | ||
22 | #include <sched.h> | ||
23 | #include <linux/futex.h> | ||
24 | |||
25 | #define AR_ACCESSED (1<<8) | ||
26 | |||
27 | #define AR_TYPE_RODATA (0 * (1<<9)) | ||
28 | #define AR_TYPE_RWDATA (1 * (1<<9)) | ||
29 | #define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9)) | ||
30 | #define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9)) | ||
31 | #define AR_TYPE_XOCODE (4 * (1<<9)) | ||
32 | #define AR_TYPE_XRCODE (5 * (1<<9)) | ||
33 | #define AR_TYPE_XOCODE_CONF (6 * (1<<9)) | ||
34 | #define AR_TYPE_XRCODE_CONF (7 * (1<<9)) | ||
35 | |||
36 | #define AR_DPL3 (3 * (1<<13)) | ||
37 | |||
38 | #define AR_S (1 << 12) | ||
39 | #define AR_P (1 << 15) | ||
40 | #define AR_AVL (1 << 20) | ||
41 | #define AR_L (1 << 21) | ||
42 | #define AR_DB (1 << 22) | ||
43 | #define AR_G (1 << 23) | ||
44 | |||
45 | static int nerrs; | ||
46 | |||
47 | static void check_invalid_segment(uint16_t index, int ldt) | ||
48 | { | ||
49 | uint32_t has_limit = 0, has_ar = 0, limit, ar; | ||
50 | uint32_t selector = (index << 3) | (ldt << 2) | 3; | ||
51 | |||
52 | asm ("lsl %[selector], %[limit]\n\t" | ||
53 | "jnz 1f\n\t" | ||
54 | "movl $1, %[has_limit]\n\t" | ||
55 | "1:" | ||
56 | : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) | ||
57 | : [selector] "r" (selector)); | ||
58 | asm ("larl %[selector], %[ar]\n\t" | ||
59 | "jnz 1f\n\t" | ||
60 | "movl $1, %[has_ar]\n\t" | ||
61 | "1:" | ||
62 | : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) | ||
63 | : [selector] "r" (selector)); | ||
64 | |||
65 | if (has_limit || has_ar) { | ||
66 | printf("[FAIL]\t%s entry %hu is valid but should be invalid\n", | ||
67 | (ldt ? "LDT" : "GDT"), index); | ||
68 | nerrs++; | ||
69 | } else { | ||
70 | printf("[OK]\t%s entry %hu is invalid\n", | ||
71 | (ldt ? "LDT" : "GDT"), index); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | static void check_valid_segment(uint16_t index, int ldt, | ||
76 | uint32_t expected_ar, uint32_t expected_limit, | ||
77 | bool verbose) | ||
78 | { | ||
79 | uint32_t has_limit = 0, has_ar = 0, limit, ar; | ||
80 | uint32_t selector = (index << 3) | (ldt << 2) | 3; | ||
81 | |||
82 | asm ("lsl %[selector], %[limit]\n\t" | ||
83 | "jnz 1f\n\t" | ||
84 | "movl $1, %[has_limit]\n\t" | ||
85 | "1:" | ||
86 | : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) | ||
87 | : [selector] "r" (selector)); | ||
88 | asm ("larl %[selector], %[ar]\n\t" | ||
89 | "jnz 1f\n\t" | ||
90 | "movl $1, %[has_ar]\n\t" | ||
91 | "1:" | ||
92 | : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) | ||
93 | : [selector] "r" (selector)); | ||
94 | |||
95 | if (!has_limit || !has_ar) { | ||
96 | printf("[FAIL]\t%s entry %hu is invalid but should be valid\n", | ||
97 | (ldt ? "LDT" : "GDT"), index); | ||
98 | nerrs++; | ||
99 | return; | ||
100 | } | ||
101 | |||
102 | if (ar != expected_ar) { | ||
103 | printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", | ||
104 | (ldt ? "LDT" : "GDT"), index, ar, expected_ar); | ||
105 | nerrs++; | ||
106 | } else if (limit != expected_limit) { | ||
107 | printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n", | ||
108 | (ldt ? "LDT" : "GDT"), index, limit, expected_limit); | ||
109 | nerrs++; | ||
110 | } else if (verbose) { | ||
111 | printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n", | ||
112 | (ldt ? "LDT" : "GDT"), index, ar, limit); | ||
113 | } | ||
114 | } | ||
115 | |||
116 | static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, | ||
117 | bool oldmode) | ||
118 | { | ||
119 | int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, | ||
120 | desc, sizeof(*desc)); | ||
121 | if (ret < -1) | ||
122 | errno = -ret; | ||
123 | if (ret == 0) { | ||
124 | uint32_t limit = desc->limit; | ||
125 | if (desc->limit_in_pages) | ||
126 | limit = (limit << 12) + 4095; | ||
127 | check_valid_segment(desc->entry_number, 1, ar, limit, true); | ||
128 | return true; | ||
129 | } else if (errno == ENOSYS) { | ||
130 | printf("[OK]\tmodify_ldt returned -ENOSYS\n"); | ||
131 | return false; | ||
132 | } else { | ||
133 | if (desc->seg_32bit) { | ||
134 | printf("[FAIL]\tUnexpected modify_ldt failure %d\n", | ||
135 | errno); | ||
136 | nerrs++; | ||
137 | return false; | ||
138 | } else { | ||
139 | printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); | ||
140 | return false; | ||
141 | } | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static bool install_valid(const struct user_desc *desc, uint32_t ar) | ||
146 | { | ||
147 | return install_valid_mode(desc, ar, false); | ||
148 | } | ||
149 | |||
150 | static void install_invalid(const struct user_desc *desc, bool oldmode) | ||
151 | { | ||
152 | int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, | ||
153 | desc, sizeof(*desc)); | ||
154 | if (ret < -1) | ||
155 | errno = -ret; | ||
156 | if (ret == 0) { | ||
157 | check_invalid_segment(desc->entry_number, 1); | ||
158 | } else if (errno == ENOSYS) { | ||
159 | printf("[OK]\tmodify_ldt returned -ENOSYS\n"); | ||
160 | } else { | ||
161 | if (desc->seg_32bit) { | ||
162 | printf("[FAIL]\tUnexpected modify_ldt failure %d\n", | ||
163 | errno); | ||
164 | nerrs++; | ||
165 | } else { | ||
166 | printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); | ||
167 | } | ||
168 | } | ||
169 | } | ||
170 | |||
171 | static int safe_modify_ldt(int func, struct user_desc *ptr, | ||
172 | unsigned long bytecount) | ||
173 | { | ||
174 | int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount); | ||
175 | if (ret < -1) | ||
176 | errno = -ret; | ||
177 | return ret; | ||
178 | } | ||
179 | |||
180 | static void fail_install(struct user_desc *desc) | ||
181 | { | ||
182 | if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) { | ||
183 | printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n"); | ||
184 | nerrs++; | ||
185 | } else if (errno == ENOSYS) { | ||
186 | printf("[OK]\tmodify_ldt returned -ENOSYS\n"); | ||
187 | } else { | ||
188 | printf("[OK]\tmodify_ldt failure %d\n", errno); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | static void do_simple_tests(void) | ||
193 | { | ||
194 | struct user_desc desc = { | ||
195 | .entry_number = 0, | ||
196 | .base_addr = 0, | ||
197 | .limit = 10, | ||
198 | .seg_32bit = 1, | ||
199 | .contents = 2, /* Code, not conforming */ | ||
200 | .read_exec_only = 0, | ||
201 | .limit_in_pages = 0, | ||
202 | .seg_not_present = 0, | ||
203 | .useable = 0 | ||
204 | }; | ||
205 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); | ||
206 | |||
207 | desc.limit_in_pages = 1; | ||
208 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | | ||
209 | AR_S | AR_P | AR_DB | AR_G); | ||
210 | |||
211 | check_invalid_segment(1, 1); | ||
212 | |||
213 | desc.entry_number = 2; | ||
214 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | | ||
215 | AR_S | AR_P | AR_DB | AR_G); | ||
216 | |||
217 | check_invalid_segment(1, 1); | ||
218 | |||
219 | desc.base_addr = 0xf0000000; | ||
220 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | | ||
221 | AR_S | AR_P | AR_DB | AR_G); | ||
222 | |||
223 | desc.useable = 1; | ||
224 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | | ||
225 | AR_S | AR_P | AR_DB | AR_G | AR_AVL); | ||
226 | |||
227 | desc.seg_not_present = 1; | ||
228 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | | ||
229 | AR_S | AR_DB | AR_G | AR_AVL); | ||
230 | |||
231 | desc.seg_32bit = 0; | ||
232 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | | ||
233 | AR_S | AR_G | AR_AVL); | ||
234 | |||
235 | desc.seg_32bit = 1; | ||
236 | desc.contents = 0; | ||
237 | install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | | ||
238 | AR_S | AR_DB | AR_G | AR_AVL); | ||
239 | |||
240 | desc.read_exec_only = 1; | ||
241 | install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | | ||
242 | AR_S | AR_DB | AR_G | AR_AVL); | ||
243 | |||
244 | desc.contents = 1; | ||
245 | install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | | ||
246 | AR_S | AR_DB | AR_G | AR_AVL); | ||
247 | |||
248 | desc.read_exec_only = 0; | ||
249 | desc.limit_in_pages = 0; | ||
250 | install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | | ||
251 | AR_S | AR_DB | AR_AVL); | ||
252 | |||
253 | desc.contents = 3; | ||
254 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF | | ||
255 | AR_S | AR_DB | AR_AVL); | ||
256 | |||
257 | desc.read_exec_only = 1; | ||
258 | install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF | | ||
259 | AR_S | AR_DB | AR_AVL); | ||
260 | |||
261 | desc.read_exec_only = 0; | ||
262 | desc.contents = 2; | ||
263 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | | ||
264 | AR_S | AR_DB | AR_AVL); | ||
265 | |||
266 | desc.read_exec_only = 1; | ||
267 | |||
268 | #ifdef __x86_64__ | ||
269 | desc.lm = 1; | ||
270 | install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | | ||
271 | AR_S | AR_DB | AR_AVL); | ||
272 | desc.lm = 0; | ||
273 | #endif | ||
274 | |||
275 | bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | | ||
276 | AR_S | AR_DB | AR_AVL); | ||
277 | |||
278 | if (entry1_okay) { | ||
279 | printf("[RUN]\tTest fork\n"); | ||
280 | pid_t child = fork(); | ||
281 | if (child == 0) { | ||
282 | nerrs = 0; | ||
283 | check_valid_segment(desc.entry_number, 1, | ||
284 | AR_DPL3 | AR_TYPE_XOCODE | | ||
285 | AR_S | AR_DB | AR_AVL, desc.limit, | ||
286 | true); | ||
287 | check_invalid_segment(1, 1); | ||
288 | exit(nerrs ? 1 : 0); | ||
289 | } else { | ||
290 | int status; | ||
291 | if (waitpid(child, &status, 0) != child || | ||
292 | !WIFEXITED(status)) { | ||
293 | printf("[FAIL]\tChild died\n"); | ||
294 | nerrs++; | ||
295 | } else if (WEXITSTATUS(status) != 0) { | ||
296 | printf("[FAIL]\tChild failed\n"); | ||
297 | nerrs++; | ||
298 | } else { | ||
299 | printf("[OK]\tChild succeeded\n"); | ||
300 | } | ||
301 | } | ||
302 | |||
303 | printf("[RUN]\tTest size\n"); | ||
304 | int i; | ||
305 | for (i = 0; i < 8192; i++) { | ||
306 | desc.entry_number = i; | ||
307 | desc.limit = i; | ||
308 | if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { | ||
309 | printf("[FAIL]\tFailed to install entry %d\n", i); | ||
310 | nerrs++; | ||
311 | break; | ||
312 | } | ||
313 | } | ||
314 | for (int j = 0; j < i; j++) { | ||
315 | check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE | | ||
316 | AR_S | AR_DB | AR_AVL, j, false); | ||
317 | } | ||
318 | printf("[DONE]\tSize test\n"); | ||
319 | } else { | ||
320 | printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n"); | ||
321 | } | ||
322 | |||
323 | /* Test entry_number too high. */ | ||
324 | desc.entry_number = 8192; | ||
325 | fail_install(&desc); | ||
326 | |||
327 | /* Test deletion and actions mistakeable for deletion. */ | ||
328 | memset(&desc, 0, sizeof(desc)); | ||
329 | install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P); | ||
330 | |||
331 | desc.seg_not_present = 1; | ||
332 | install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); | ||
333 | |||
334 | desc.seg_not_present = 0; | ||
335 | desc.read_exec_only = 1; | ||
336 | install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P); | ||
337 | |||
338 | desc.read_exec_only = 0; | ||
339 | desc.seg_not_present = 1; | ||
340 | install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); | ||
341 | |||
342 | desc.read_exec_only = 1; | ||
343 | desc.limit = 1; | ||
344 | install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); | ||
345 | |||
346 | desc.limit = 0; | ||
347 | desc.base_addr = 1; | ||
348 | install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); | ||
349 | |||
350 | desc.base_addr = 0; | ||
351 | install_invalid(&desc, false); | ||
352 | |||
353 | desc.seg_not_present = 0; | ||
354 | desc.read_exec_only = 0; | ||
355 | desc.seg_32bit = 1; | ||
356 | install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); | ||
357 | install_invalid(&desc, true); | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * 0: thread is idle | ||
362 | * 1: thread armed | ||
363 | * 2: thread should clear LDT entry 0 | ||
364 | * 3: thread should exit | ||
365 | */ | ||
366 | static volatile unsigned int ftx; | ||
367 | |||
368 | static void *threadproc(void *ctx) | ||
369 | { | ||
370 | cpu_set_t cpuset; | ||
371 | CPU_ZERO(&cpuset); | ||
372 | CPU_SET(1, &cpuset); | ||
373 | if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) | ||
374 | err(1, "sched_setaffinity to CPU 1"); /* should never fail */ | ||
375 | |||
376 | while (1) { | ||
377 | syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0); | ||
378 | while (ftx != 2) { | ||
379 | if (ftx >= 3) | ||
380 | return NULL; | ||
381 | } | ||
382 | |||
383 | /* clear LDT entry 0 */ | ||
384 | const struct user_desc desc = {}; | ||
385 | if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0) | ||
386 | err(1, "modify_ldt"); | ||
387 | |||
388 | /* If ftx == 2, set it to zero. If ftx == 100, quit. */ | ||
389 | unsigned int x = -2; | ||
390 | asm volatile ("lock xaddl %[x], %[ftx]" : | ||
391 | [x] "+r" (x), [ftx] "+m" (ftx)); | ||
392 | if (x != 2) | ||
393 | return NULL; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | ||
398 | int flags) | ||
399 | { | ||
400 | struct sigaction sa; | ||
401 | memset(&sa, 0, sizeof(sa)); | ||
402 | sa.sa_sigaction = handler; | ||
403 | sa.sa_flags = SA_SIGINFO | flags; | ||
404 | sigemptyset(&sa.sa_mask); | ||
405 | if (sigaction(sig, &sa, 0)) | ||
406 | err(1, "sigaction"); | ||
407 | |||
408 | } | ||
409 | |||
410 | static jmp_buf jmpbuf; | ||
411 | |||
412 | static void sigsegv(int sig, siginfo_t *info, void *ctx_void) | ||
413 | { | ||
414 | siglongjmp(jmpbuf, 1); | ||
415 | } | ||
416 | |||
417 | static void do_multicpu_tests(void) | ||
418 | { | ||
419 | cpu_set_t cpuset; | ||
420 | pthread_t thread; | ||
421 | int failures = 0, iters = 5, i; | ||
422 | unsigned short orig_ss; | ||
423 | |||
424 | CPU_ZERO(&cpuset); | ||
425 | CPU_SET(1, &cpuset); | ||
426 | if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { | ||
427 | printf("[SKIP]\tCannot set affinity to CPU 1\n"); | ||
428 | return; | ||
429 | } | ||
430 | |||
431 | CPU_ZERO(&cpuset); | ||
432 | CPU_SET(0, &cpuset); | ||
433 | if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { | ||
434 | printf("[SKIP]\tCannot set affinity to CPU 0\n"); | ||
435 | return; | ||
436 | } | ||
437 | |||
438 | sethandler(SIGSEGV, sigsegv, 0); | ||
439 | #ifdef __i386__ | ||
440 | /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */ | ||
441 | sethandler(SIGILL, sigsegv, 0); | ||
442 | #endif | ||
443 | |||
444 | printf("[RUN]\tCross-CPU LDT invalidation\n"); | ||
445 | |||
446 | if (pthread_create(&thread, 0, threadproc, 0) != 0) | ||
447 | err(1, "pthread_create"); | ||
448 | |||
449 | asm volatile ("mov %%ss, %0" : "=rm" (orig_ss)); | ||
450 | |||
451 | for (i = 0; i < 5; i++) { | ||
452 | if (sigsetjmp(jmpbuf, 1) != 0) | ||
453 | continue; | ||
454 | |||
455 | /* Make sure the thread is ready after the last test. */ | ||
456 | while (ftx != 0) | ||
457 | ; | ||
458 | |||
459 | struct user_desc desc = { | ||
460 | .entry_number = 0, | ||
461 | .base_addr = 0, | ||
462 | .limit = 0xfffff, | ||
463 | .seg_32bit = 1, | ||
464 | .contents = 0, /* Data */ | ||
465 | .read_exec_only = 0, | ||
466 | .limit_in_pages = 1, | ||
467 | .seg_not_present = 0, | ||
468 | .useable = 0 | ||
469 | }; | ||
470 | |||
471 | if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { | ||
472 | if (errno != ENOSYS) | ||
473 | err(1, "modify_ldt"); | ||
474 | printf("[SKIP]\tmodify_ldt unavailable\n"); | ||
475 | break; | ||
476 | } | ||
477 | |||
478 | /* Arm the thread. */ | ||
479 | ftx = 1; | ||
480 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); | ||
481 | |||
482 | asm volatile ("mov %0, %%ss" : : "r" (0x7)); | ||
483 | |||
484 | /* Go! */ | ||
485 | ftx = 2; | ||
486 | |||
487 | while (ftx != 0) | ||
488 | ; | ||
489 | |||
490 | /* | ||
491 | * On success, modify_ldt will segfault us synchronously, | ||
492 | * and we'll escape via siglongjmp. | ||
493 | */ | ||
494 | |||
495 | failures++; | ||
496 | asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); | ||
497 | }; | ||
498 | |||
499 | ftx = 100; /* Kill the thread. */ | ||
500 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); | ||
501 | |||
502 | if (pthread_join(thread, NULL) != 0) | ||
503 | err(1, "pthread_join"); | ||
504 | |||
505 | if (failures) { | ||
506 | printf("[FAIL]\t%d of %d iterations failed\n", failures, iters); | ||
507 | nerrs++; | ||
508 | } else { | ||
509 | printf("[OK]\tAll %d iterations succeeded\n", iters); | ||
510 | } | ||
511 | } | ||
512 | |||
513 | static int finish_exec_test(void) | ||
514 | { | ||
515 | /* | ||
516 | * In a sensible world, this would be check_invalid_segment(0, 1); | ||
517 | * For better or for worse, though, the LDT is inherited across exec. | ||
518 | * We can probably change this safely, but for now we test it. | ||
519 | */ | ||
520 | check_valid_segment(0, 1, | ||
521 | AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, | ||
522 | 42, true); | ||
523 | |||
524 | return nerrs ? 1 : 0; | ||
525 | } | ||
526 | |||
527 | static void do_exec_test(void) | ||
528 | { | ||
529 | printf("[RUN]\tTest exec\n"); | ||
530 | |||
531 | struct user_desc desc = { | ||
532 | .entry_number = 0, | ||
533 | .base_addr = 0, | ||
534 | .limit = 42, | ||
535 | .seg_32bit = 1, | ||
536 | .contents = 2, /* Code, not conforming */ | ||
537 | .read_exec_only = 0, | ||
538 | .limit_in_pages = 0, | ||
539 | .seg_not_present = 0, | ||
540 | .useable = 0 | ||
541 | }; | ||
542 | install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); | ||
543 | |||
544 | pid_t child = fork(); | ||
545 | if (child == 0) { | ||
546 | execl("/proc/self/exe", "ldt_gdt_test_exec", NULL); | ||
547 | printf("[FAIL]\tCould not exec self\n"); | ||
548 | exit(1); /* exec failed */ | ||
549 | } else { | ||
550 | int status; | ||
551 | if (waitpid(child, &status, 0) != child || | ||
552 | !WIFEXITED(status)) { | ||
553 | printf("[FAIL]\tChild died\n"); | ||
554 | nerrs++; | ||
555 | } else if (WEXITSTATUS(status) != 0) { | ||
556 | printf("[FAIL]\tChild failed\n"); | ||
557 | nerrs++; | ||
558 | } else { | ||
559 | printf("[OK]\tChild succeeded\n"); | ||
560 | } | ||
561 | } | ||
562 | } | ||
563 | |||
564 | int main(int argc, char **argv) | ||
565 | { | ||
566 | if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec")) | ||
567 | return finish_exec_test(); | ||
568 | |||
569 | do_simple_tests(); | ||
570 | |||
571 | do_multicpu_tests(); | ||
572 | |||
573 | do_exec_test(); | ||
574 | |||
575 | return nerrs ? 1 : 0; | ||
576 | } | ||
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c new file mode 100644 index 000000000000..7db4fc9fa09f --- /dev/null +++ b/tools/testing/selftests/x86/syscall_arg_fault.c | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args | ||
3 | * Copyright (c) 2015 Andrew Lutomirski | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | */ | ||
14 | |||
15 | #define _GNU_SOURCE | ||
16 | |||
17 | #include <stdlib.h> | ||
18 | #include <stdio.h> | ||
19 | #include <string.h> | ||
20 | #include <sys/signal.h> | ||
21 | #include <sys/ucontext.h> | ||
22 | #include <err.h> | ||
23 | #include <setjmp.h> | ||
24 | #include <errno.h> | ||
25 | |||
26 | /* Our sigaltstack scratch space. */ | ||
27 | static unsigned char altstack_data[SIGSTKSZ]; | ||
28 | |||
29 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | ||
30 | int flags) | ||
31 | { | ||
32 | struct sigaction sa; | ||
33 | memset(&sa, 0, sizeof(sa)); | ||
34 | sa.sa_sigaction = handler; | ||
35 | sa.sa_flags = SA_SIGINFO | flags; | ||
36 | sigemptyset(&sa.sa_mask); | ||
37 | if (sigaction(sig, &sa, 0)) | ||
38 | err(1, "sigaction"); | ||
39 | } | ||
40 | |||
41 | static volatile sig_atomic_t sig_traps; | ||
42 | static sigjmp_buf jmpbuf; | ||
43 | |||
44 | static volatile sig_atomic_t n_errs; | ||
45 | |||
46 | static void sigsegv(int sig, siginfo_t *info, void *ctx_void) | ||
47 | { | ||
48 | ucontext_t *ctx = (ucontext_t*)ctx_void; | ||
49 | |||
50 | if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) { | ||
51 | printf("[FAIL]\tAX had the wrong value: 0x%x\n", | ||
52 | ctx->uc_mcontext.gregs[REG_EAX]); | ||
53 | n_errs++; | ||
54 | } else { | ||
55 | printf("[OK]\tSeems okay\n"); | ||
56 | } | ||
57 | |||
58 | siglongjmp(jmpbuf, 1); | ||
59 | } | ||
60 | |||
61 | static void sigill(int sig, siginfo_t *info, void *ctx_void) | ||
62 | { | ||
63 | printf("[SKIP]\tIllegal instruction\n"); | ||
64 | siglongjmp(jmpbuf, 1); | ||
65 | } | ||
66 | |||
67 | int main() | ||
68 | { | ||
69 | stack_t stack = { | ||
70 | .ss_sp = altstack_data, | ||
71 | .ss_size = SIGSTKSZ, | ||
72 | }; | ||
73 | if (sigaltstack(&stack, NULL) != 0) | ||
74 | err(1, "sigaltstack"); | ||
75 | |||
76 | sethandler(SIGSEGV, sigsegv, SA_ONSTACK); | ||
77 | sethandler(SIGILL, sigill, SA_ONSTACK); | ||
78 | |||
79 | /* | ||
80 | * Exercise another nasty special case. The 32-bit SYSCALL | ||
81 | * and SYSENTER instructions (even in compat mode) each | ||
82 | * clobber one register. A Linux system call has a syscall | ||
83 | * number and six arguments, and the user stack pointer | ||
84 | * needs to live in some register on return. That means | ||
85 | * that we need eight registers, but SYSCALL and SYSENTER | ||
86 | * only preserve seven registers. As a result, one argument | ||
87 | * ends up on the stack. The stack is user memory, which | ||
88 | * means that the kernel can fail to read it. | ||
89 | * | ||
90 | * The 32-bit fast system calls don't have a defined ABI: | ||
91 | * we're supposed to invoke them through the vDSO. So we'll | ||
92 | * fudge it: we set all regs to invalid pointer values and | ||
93 | * invoke the entry instruction. The return will fail no | ||
94 | * matter what, and we completely lose our program state, | ||
95 | * but we can fix it up with a signal handler. | ||
96 | */ | ||
97 | |||
98 | printf("[RUN]\tSYSENTER with invalid state\n"); | ||
99 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
100 | asm volatile ( | ||
101 | "movl $-1, %%eax\n\t" | ||
102 | "movl $-1, %%ebx\n\t" | ||
103 | "movl $-1, %%ecx\n\t" | ||
104 | "movl $-1, %%edx\n\t" | ||
105 | "movl $-1, %%esi\n\t" | ||
106 | "movl $-1, %%edi\n\t" | ||
107 | "movl $-1, %%ebp\n\t" | ||
108 | "movl $-1, %%esp\n\t" | ||
109 | "sysenter" | ||
110 | : : : "memory", "flags"); | ||
111 | } | ||
112 | |||
113 | printf("[RUN]\tSYSCALL with invalid state\n"); | ||
114 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
115 | asm volatile ( | ||
116 | "movl $-1, %%eax\n\t" | ||
117 | "movl $-1, %%ebx\n\t" | ||
118 | "movl $-1, %%ecx\n\t" | ||
119 | "movl $-1, %%edx\n\t" | ||
120 | "movl $-1, %%esi\n\t" | ||
121 | "movl $-1, %%edi\n\t" | ||
122 | "movl $-1, %%ebp\n\t" | ||
123 | "movl $-1, %%esp\n\t" | ||
124 | "syscall\n\t" | ||
125 | "pushl $0" /* make sure we segfault cleanly */ | ||
126 | : : : "memory", "flags"); | ||
127 | } | ||
128 | |||
129 | return 0; | ||
130 | } | ||
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c new file mode 100644 index 000000000000..60c06af4646a --- /dev/null +++ b/tools/testing/selftests/x86/syscall_nt.c | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * syscall_nt.c - checks syscalls with NT set | ||
3 | * Copyright (c) 2014-2015 Andrew Lutomirski | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | * | ||
14 | * Some obscure user-space code requires the ability to make system calls | ||
15 | * with FLAGS.NT set. Make sure it works. | ||
16 | */ | ||
17 | |||
18 | #include <stdio.h> | ||
19 | #include <unistd.h> | ||
20 | #include <sys/syscall.h> | ||
21 | #include <asm/processor-flags.h> | ||
22 | |||
23 | #ifdef __x86_64__ | ||
24 | # define WIDTH "q" | ||
25 | #else | ||
26 | # define WIDTH "l" | ||
27 | #endif | ||
28 | |||
29 | static unsigned long get_eflags(void) | ||
30 | { | ||
31 | unsigned long eflags; | ||
32 | asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); | ||
33 | return eflags; | ||
34 | } | ||
35 | |||
36 | static void set_eflags(unsigned long eflags) | ||
37 | { | ||
38 | asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH | ||
39 | : : "rm" (eflags) : "flags"); | ||
40 | } | ||
41 | |||
42 | int main() | ||
43 | { | ||
44 | printf("[RUN]\tSet NT and issue a syscall\n"); | ||
45 | set_eflags(get_eflags() | X86_EFLAGS_NT); | ||
46 | syscall(SYS_getpid); | ||
47 | if (get_eflags() & X86_EFLAGS_NT) { | ||
48 | printf("[OK]\tThe syscall worked and NT is still set\n"); | ||
49 | return 0; | ||
50 | } else { | ||
51 | printf("[FAIL]\tThe syscall worked but NT was cleared\n"); | ||
52 | return 1; | ||
53 | } | ||
54 | } | ||