aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@MIT.EDU>2011-08-03 09:31:53 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2011-08-04 19:13:49 -0400
commit318f5a2a672152328c9fb4dead504b89ec738a43 (patch)
treed37bcc93c8c1b29c057c44dac13148531706631e /arch/x86
parent5d5791af4c0d4fd32093882357506355c3357503 (diff)
x86-64: Add user_64bit_mode paravirt op
Three places in the kernel assume that the only long mode CPL 3 selector is __USER_CS. This is not true on Xen -- Xen's sysretq changes cs to the magic value 0xe033. Two of the places are corner cases, but as of "x86-64: Improve vsyscall emulation CS and RIP handling" (c9712944b2a12373cb6ff8059afcfb7e826a6c54), vsyscalls will segfault if called with Xen's extra CS selector. This causes a panic when older init builds die. It seems impossible to make Xen use __USER_CS reliably without taking a performance hit on every system call, so this fixes the tests instead with a new paravirt op. It's a little ugly because ptrace.h can't include paravirt.h. Signed-off-by: Andy Lutomirski <luto@mit.edu> Link: http://lkml.kernel.org/r/f4fcb3947340d9e96ce1054a432f183f9da9db83.1312378163.git.luto@mit.edu Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/desc.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h6
-rw-r--r--arch/x86/include/asm/ptrace.h19
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c6
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/xen/enlighten.c4
8 files changed, 38 insertions, 9 deletions
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 7b439d9aea2a..41935fadfdfc 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
27 27
28 desc->base2 = (info->base_addr & 0xff000000) >> 24; 28 desc->base2 = (info->base_addr & 0xff000000) >> 24;
29 /* 29 /*
30 * Don't allow setting of the lm bit. It is useless anyway 30 * Don't allow setting of the lm bit. It would confuse
31 * because 64bit system calls require __USER_CS: 31 * user_64bit_mode and would get overridden by sysret anyway.
32 */ 32 */
33 desc->l = 0; 33 desc->l = 0;
34} 34}
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 82885099c869..96a0f80407b8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,6 +41,7 @@
41 41
42#include <asm/desc_defs.h> 42#include <asm/desc_defs.h>
43#include <asm/kmap_types.h> 43#include <asm/kmap_types.h>
44#include <asm/pgtable_types.h>
44 45
45struct page; 46struct page;
46struct thread_struct; 47struct thread_struct;
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
63struct pv_info { 64struct pv_info {
64 unsigned int kernel_rpl; 65 unsigned int kernel_rpl;
65 int shared_kernel_pmd; 66 int shared_kernel_pmd;
67
68#ifdef CONFIG_X86_64
69 u16 extra_user_64bit_cs; /* __USER_CS if none */
70#endif
71
66 int paravirt_enabled; 72 int paravirt_enabled;
67 const char *name; 73 const char *name;
68}; 74};
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 94e7618fcac8..35664547125b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -131,6 +131,9 @@ struct pt_regs {
131#ifdef __KERNEL__ 131#ifdef __KERNEL__
132 132
133#include <linux/init.h> 133#include <linux/init.h>
134#ifdef CONFIG_PARAVIRT
135#include <asm/paravirt_types.h>
136#endif
134 137
135struct cpuinfo_x86; 138struct cpuinfo_x86;
136struct task_struct; 139struct task_struct;
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
187#endif 190#endif
188} 191}
189 192
193#ifdef CONFIG_X86_64
194static inline bool user_64bit_mode(struct pt_regs *regs)
195{
196#ifndef CONFIG_PARAVIRT
197 /*
198 * On non-paravirt systems, this is the only long mode CPL 3
199 * selector. We do not allow long mode selectors in the LDT.
200 */
201 return regs->cs == __USER_CS;
202#else
203 /* Headers are too twisted for this to go in paravirt.h. */
204 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
205#endif
206}
207#endif
208
190/* 209/*
191 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode 210 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
192 * when it traps. The previous stack will be directly underneath the saved 211 * when it traps. The previous stack will be directly underneath the saved
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1aeeb71b..681f15994218 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -299,6 +299,10 @@ struct pv_info pv_info = {
299 .paravirt_enabled = 0, 299 .paravirt_enabled = 0,
300 .kernel_rpl = 0, 300 .kernel_rpl = 0,
301 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 301 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
302
303#ifdef CONFIG_X86_64
304 .extra_user_64bit_cs = __USER_CS,
305#endif
302}; 306};
303 307
304struct pv_init_ops pv_init_ops = { 308struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7977f0cfe339..c346d1161488 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
74 74
75#ifdef CONFIG_X86_64 75#ifdef CONFIG_X86_64
76 case 0x40 ... 0x4f: 76 case 0x40 ... 0x4f:
77 if (regs->cs != __USER_CS) 77 if (!user_64bit_mode(regs))
78 /* 32-bit mode: register increment */ 78 /* 32-bit mode: register increment */
79 return 0; 79 return 0;
80 /* 64-bit mode: REX prefix */ 80 /* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dda7dff9cef7..1725930a6f9f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -127,11 +127,7 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
127 127
128 local_irq_enable(); 128 local_irq_enable();
129 129
130 /* 130 if (!user_64bit_mode(regs)) {
131 * Real 64-bit user mode code has cs == __USER_CS. Anything else
132 * is bogus.
133 */
134 if (regs->cs != __USER_CS) {
135 /* 131 /*
136 * If we trapped from kernel mode, we might as well OOPS now 132 * If we trapped from kernel mode, we might as well OOPS now
137 * instead of returning to some random address and OOPSing 133 * instead of returning to some random address and OOPSing
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2dbf6bf4c7e5..c1d018238f32 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
105 * but for now it's good enough to assume that long 105 * but for now it's good enough to assume that long
106 * mode only uses well known segments or kernel. 106 * mode only uses well known segments or kernel.
107 */ 107 */
108 return (!user_mode(regs)) || (regs->cs == __USER_CS); 108 return (!user_mode(regs) || user_64bit_mode(regs));
109#endif 109#endif
110 case 0x60: 110 case 0x60:
111 /* 0x64 thru 0x67 are valid prefixes in all modes. */ 111 /* 0x64 thru 0x67 are valid prefixes in all modes. */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5525163a0398..78fe33d03565 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -937,6 +937,10 @@ static const struct pv_info xen_info __initconst = {
937 .paravirt_enabled = 1, 937 .paravirt_enabled = 1,
938 .shared_kernel_pmd = 0, 938 .shared_kernel_pmd = 0,
939 939
940#ifdef CONFIG_X86_64
941 .extra_user_64bit_cs = FLAT_USER_CS64,
942#endif
943
940 .name = "Xen", 944 .name = "Xen",
941}; 945};
942 946