diff options
author | Andy Lutomirski <luto@MIT.EDU> | 2011-08-03 09:31:53 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2011-08-04 19:13:49 -0400 |
commit | 318f5a2a672152328c9fb4dead504b89ec738a43 (patch) | |
tree | d37bcc93c8c1b29c057c44dac13148531706631e /arch | |
parent | 5d5791af4c0d4fd32093882357506355c3357503 (diff) |
x86-64: Add user_64bit_mode paravirt op
Three places in the kernel assume that the only long mode CPL 3
selector is __USER_CS. This is not true on Xen -- Xen's sysretq
changes cs to the magic value 0xe033.
Two of the places are corner cases, but as of "x86-64: Improve
vsyscall emulation CS and RIP handling"
(c9712944b2a12373cb6ff8059afcfb7e826a6c54), vsyscalls will segfault
if called with Xen's extra CS selector. This causes a panic when
older init builds die.
It seems impossible to make Xen use __USER_CS reliably without
taking a performance hit on every system call, so this fixes the
tests instead with a new paravirt op. It's a little ugly because
ptrace.h can't include paravirt.h.
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Link: http://lkml.kernel.org/r/f4fcb3947340d9e96ce1054a432f183f9da9db83.1312378163.git.luto@mit.edu
Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/desc.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/ptrace.h | 19 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/step.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 4 |
8 files changed, 38 insertions, 9 deletions
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 7b439d9aea2a..41935fadfdfc 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in | |||
27 | 27 | ||
28 | desc->base2 = (info->base_addr & 0xff000000) >> 24; | 28 | desc->base2 = (info->base_addr & 0xff000000) >> 24; |
29 | /* | 29 | /* |
30 | * Don't allow setting of the lm bit. It is useless anyway | 30 | * Don't allow setting of the lm bit. It would confuse |
31 | * because 64bit system calls require __USER_CS: | 31 | * user_64bit_mode and would get overridden by sysret anyway. |
32 | */ | 32 | */ |
33 | desc->l = 0; | 33 | desc->l = 0; |
34 | } | 34 | } |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 82885099c869..96a0f80407b8 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -41,6 +41,7 @@ | |||
41 | 41 | ||
42 | #include <asm/desc_defs.h> | 42 | #include <asm/desc_defs.h> |
43 | #include <asm/kmap_types.h> | 43 | #include <asm/kmap_types.h> |
44 | #include <asm/pgtable_types.h> | ||
44 | 45 | ||
45 | struct page; | 46 | struct page; |
46 | struct thread_struct; | 47 | struct thread_struct; |
@@ -63,6 +64,11 @@ struct paravirt_callee_save { | |||
63 | struct pv_info { | 64 | struct pv_info { |
64 | unsigned int kernel_rpl; | 65 | unsigned int kernel_rpl; |
65 | int shared_kernel_pmd; | 66 | int shared_kernel_pmd; |
67 | |||
68 | #ifdef CONFIG_X86_64 | ||
69 | u16 extra_user_64bit_cs; /* __USER_CS if none */ | ||
70 | #endif | ||
71 | |||
66 | int paravirt_enabled; | 72 | int paravirt_enabled; |
67 | const char *name; | 73 | const char *name; |
68 | }; | 74 | }; |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 94e7618fcac8..35664547125b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -131,6 +131,9 @@ struct pt_regs { | |||
131 | #ifdef __KERNEL__ | 131 | #ifdef __KERNEL__ |
132 | 132 | ||
133 | #include <linux/init.h> | 133 | #include <linux/init.h> |
134 | #ifdef CONFIG_PARAVIRT | ||
135 | #include <asm/paravirt_types.h> | ||
136 | #endif | ||
134 | 137 | ||
135 | struct cpuinfo_x86; | 138 | struct cpuinfo_x86; |
136 | struct task_struct; | 139 | struct task_struct; |
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs) | |||
187 | #endif | 190 | #endif |
188 | } | 191 | } |
189 | 192 | ||
193 | #ifdef CONFIG_X86_64 | ||
194 | static inline bool user_64bit_mode(struct pt_regs *regs) | ||
195 | { | ||
196 | #ifndef CONFIG_PARAVIRT | ||
197 | /* | ||
198 | * On non-paravirt systems, this is the only long mode CPL 3 | ||
199 | * selector. We do not allow long mode selectors in the LDT. | ||
200 | */ | ||
201 | return regs->cs == __USER_CS; | ||
202 | #else | ||
203 | /* Headers are too twisted for this to go in paravirt.h. */ | ||
204 | return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; | ||
205 | #endif | ||
206 | } | ||
207 | #endif | ||
208 | |||
190 | /* | 209 | /* |
191 | * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode | 210 | * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode |
192 | * when it traps. The previous stack will be directly underneath the saved | 211 | * when it traps. The previous stack will be directly underneath the saved |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 869e1aeeb71b..681f15994218 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -299,6 +299,10 @@ struct pv_info pv_info = { | |||
299 | .paravirt_enabled = 0, | 299 | .paravirt_enabled = 0, |
300 | .kernel_rpl = 0, | 300 | .kernel_rpl = 0, |
301 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 301 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
302 | |||
303 | #ifdef CONFIG_X86_64 | ||
304 | .extra_user_64bit_cs = __USER_CS, | ||
305 | #endif | ||
302 | }; | 306 | }; |
303 | 307 | ||
304 | struct pv_init_ops pv_init_ops = { | 308 | struct pv_init_ops pv_init_ops = { |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 7977f0cfe339..c346d1161488 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | |||
74 | 74 | ||
75 | #ifdef CONFIG_X86_64 | 75 | #ifdef CONFIG_X86_64 |
76 | case 0x40 ... 0x4f: | 76 | case 0x40 ... 0x4f: |
77 | if (regs->cs != __USER_CS) | 77 | if (!user_64bit_mode(regs)) |
78 | /* 32-bit mode: register increment */ | 78 | /* 32-bit mode: register increment */ |
79 | return 0; | 79 | return 0; |
80 | /* 64-bit mode: REX prefix */ | 80 | /* 64-bit mode: REX prefix */ |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dda7dff9cef7..1725930a6f9f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -127,11 +127,7 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) | |||
127 | 127 | ||
128 | local_irq_enable(); | 128 | local_irq_enable(); |
129 | 129 | ||
130 | /* | 130 | if (!user_64bit_mode(regs)) { |
131 | * Real 64-bit user mode code has cs == __USER_CS. Anything else | ||
132 | * is bogus. | ||
133 | */ | ||
134 | if (regs->cs != __USER_CS) { | ||
135 | /* | 131 | /* |
136 | * If we trapped from kernel mode, we might as well OOPS now | 132 | * If we trapped from kernel mode, we might as well OOPS now |
137 | * instead of returning to some random address and OOPSing | 133 | * instead of returning to some random address and OOPSing |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2dbf6bf4c7e5..c1d018238f32 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, | |||
105 | * but for now it's good enough to assume that long | 105 | * but for now it's good enough to assume that long |
106 | * mode only uses well known segments or kernel. | 106 | * mode only uses well known segments or kernel. |
107 | */ | 107 | */ |
108 | return (!user_mode(regs)) || (regs->cs == __USER_CS); | 108 | return (!user_mode(regs) || user_64bit_mode(regs)); |
109 | #endif | 109 | #endif |
110 | case 0x60: | 110 | case 0x60: |
111 | /* 0x64 thru 0x67 are valid prefixes in all modes. */ | 111 | /* 0x64 thru 0x67 are valid prefixes in all modes. */ |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5525163a0398..78fe33d03565 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -937,6 +937,10 @@ static const struct pv_info xen_info __initconst = { | |||
937 | .paravirt_enabled = 1, | 937 | .paravirt_enabled = 1, |
938 | .shared_kernel_pmd = 0, | 938 | .shared_kernel_pmd = 0, |
939 | 939 | ||
940 | #ifdef CONFIG_X86_64 | ||
941 | .extra_user_64bit_cs = FLAT_USER_CS64, | ||
942 | #endif | ||
943 | |||
940 | .name = "Xen", | 944 | .name = "Xen", |
941 | }; | 945 | }; |
942 | 946 | ||