diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-05-02 13:27:12 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-05-02 13:27:12 -0400 |
commit | 1dbf527c51c6c20c19869c8125cb5b87c3d09506 (patch) | |
tree | ab480b3eff886e54c6d3c881936f046599e7ea61 /arch/i386/kernel | |
parent | d4f7a2c18e59e0304a1c733589ce14fc02fec1bd (diff) |
[PATCH] i386: Make COMPAT_VDSO runtime selectable.
Now that relocation of the VDSO for COMPAT_VDSO users is done at
runtime rather than compile time, it is possible to enable/disable
compat mode at runtime.
This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the
kernel command line, or via sysctl. (Switching on a running system
shouldn't be done lightly; any process which was relying on the compat
VDSO will be upset if it goes away.)
The COMPAT_VDSO config option still exists, but if enabled it just
makes vdso_enabled default to VDSO_COMPAT.
+From: Hugh Dickins <hugh@veritas.com>
Fix oops from i386-make-compat_vdso-runtime-selectable.patch.
Even mingetty at system startup finds it easy to trigger an oops
while reading /proc/PID/maps: though it has a good hold on the mm
itself, that cannot stop exit_mm() from resetting tsk->mm to NULL.
(It is usually show_map()'s call to get_gate_vma() which oopses,
and I expect we could change that to check priv->tail_vma instead;
but no matter, even m_start()'s call just after get_task_mm() is racy.)
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Zachary Amsden <zach@vmware.com>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/sysenter.c | 145 |
1 files changed, 94 insertions, 51 deletions
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index e5a958379ac9..0b9768ee1e8d 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -23,16 +23,25 @@ | |||
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
25 | #include <asm/elf.h> | 25 | #include <asm/elf.h> |
26 | #include <asm/tlbflush.h> | ||
27 | |||
28 | enum { | ||
29 | VDSO_DISABLED = 0, | ||
30 | VDSO_ENABLED = 1, | ||
31 | VDSO_COMPAT = 2, | ||
32 | }; | ||
33 | |||
34 | #ifdef CONFIG_COMPAT_VDSO | ||
35 | #define VDSO_DEFAULT VDSO_COMPAT | ||
36 | #else | ||
37 | #define VDSO_DEFAULT VDSO_ENABLED | ||
38 | #endif | ||
26 | 39 | ||
27 | /* | 40 | /* |
28 | * Should the kernel map a VDSO page into processes and pass its | 41 | * Should the kernel map a VDSO page into processes and pass its |
29 | * address down to glibc upon exec()? | 42 | * address down to glibc upon exec()? |
30 | */ | 43 | */ |
31 | #ifdef CONFIG_PARAVIRT | 44 | unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; |
32 | unsigned int __read_mostly vdso_enabled = 0; | ||
33 | #else | ||
34 | unsigned int __read_mostly vdso_enabled = 1; | ||
35 | #endif | ||
36 | 45 | ||
37 | EXPORT_SYMBOL_GPL(vdso_enabled); | 46 | EXPORT_SYMBOL_GPL(vdso_enabled); |
38 | 47 | ||
@@ -47,7 +56,6 @@ __setup("vdso=", vdso_setup); | |||
47 | 56 | ||
48 | extern asmlinkage void sysenter_entry(void); | 57 | extern asmlinkage void sysenter_entry(void); |
49 | 58 | ||
50 | #ifdef CONFIG_COMPAT_VDSO | ||
51 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | 59 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, |
52 | unsigned offset, unsigned size) | 60 | unsigned offset, unsigned size) |
53 | { | 61 | { |
@@ -164,11 +172,6 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) | |||
164 | shdr[i].sh_size); | 172 | shdr[i].sh_size); |
165 | } | 173 | } |
166 | } | 174 | } |
167 | #else | ||
168 | static inline void relocate_vdso(Elf32_Ehdr *ehdr) | ||
169 | { | ||
170 | } | ||
171 | #endif /* COMPAT_VDSO */ | ||
172 | 175 | ||
173 | void enable_sep_cpu(void) | 176 | void enable_sep_cpu(void) |
174 | { | 177 | { |
@@ -188,6 +191,25 @@ void enable_sep_cpu(void) | |||
188 | put_cpu(); | 191 | put_cpu(); |
189 | } | 192 | } |
190 | 193 | ||
194 | static struct vm_area_struct gate_vma; | ||
195 | |||
196 | static int __init gate_vma_init(void) | ||
197 | { | ||
198 | gate_vma.vm_mm = NULL; | ||
199 | gate_vma.vm_start = FIXADDR_USER_START; | ||
200 | gate_vma.vm_end = FIXADDR_USER_END; | ||
201 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | ||
202 | gate_vma.vm_page_prot = __P101; | ||
203 | /* | ||
204 | * Make sure the vDSO gets into every core dump. | ||
205 | * Dumping its contents makes post-mortem fully interpretable later | ||
206 | * without matching up the same kernel and hardware config to see | ||
207 | * what PC values meant. | ||
208 | */ | ||
209 | gate_vma.vm_flags |= VM_ALWAYSDUMP; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
191 | /* | 213 | /* |
192 | * These symbols are defined by vsyscall.o to mark the bounds | 214 | * These symbols are defined by vsyscall.o to mark the bounds |
193 | * of the ELF DSO images included therein. | 215 | * of the ELF DSO images included therein. |
@@ -196,6 +218,22 @@ extern const char vsyscall_int80_start, vsyscall_int80_end; | |||
196 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 218 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
197 | static struct page *syscall_pages[1]; | 219 | static struct page *syscall_pages[1]; |
198 | 220 | ||
221 | static void map_compat_vdso(int map) | ||
222 | { | ||
223 | static int vdso_mapped; | ||
224 | |||
225 | if (map == vdso_mapped) | ||
226 | return; | ||
227 | |||
228 | vdso_mapped = map; | ||
229 | |||
230 | __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, | ||
231 | map ? PAGE_READONLY_EXEC : PAGE_NONE); | ||
232 | |||
233 | /* flush stray tlbs */ | ||
234 | flush_tlb_all(); | ||
235 | } | ||
236 | |||
199 | int __init sysenter_setup(void) | 237 | int __init sysenter_setup(void) |
200 | { | 238 | { |
201 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | 239 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
@@ -204,10 +242,9 @@ int __init sysenter_setup(void) | |||
204 | 242 | ||
205 | syscall_pages[0] = virt_to_page(syscall_page); | 243 | syscall_pages[0] = virt_to_page(syscall_page); |
206 | 244 | ||
207 | #ifdef CONFIG_COMPAT_VDSO | 245 | gate_vma_init(); |
208 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); | 246 | |
209 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | 247 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
210 | #endif | ||
211 | 248 | ||
212 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 249 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
213 | vsyscall = &vsyscall_int80_start; | 250 | vsyscall = &vsyscall_int80_start; |
@@ -226,42 +263,57 @@ int __init sysenter_setup(void) | |||
226 | /* Defined in vsyscall-sysenter.S */ | 263 | /* Defined in vsyscall-sysenter.S */ |
227 | extern void SYSENTER_RETURN; | 264 | extern void SYSENTER_RETURN; |
228 | 265 | ||
229 | #ifdef __HAVE_ARCH_GATE_AREA | ||
230 | /* Setup a VMA at program startup for the vsyscall page */ | 266 | /* Setup a VMA at program startup for the vsyscall page */ |
231 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | 267 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) |
232 | { | 268 | { |
233 | struct mm_struct *mm = current->mm; | 269 | struct mm_struct *mm = current->mm; |
234 | unsigned long addr; | 270 | unsigned long addr; |
235 | int ret; | 271 | int ret; |
272 | bool compat; | ||
236 | 273 | ||
237 | down_write(&mm->mmap_sem); | 274 | down_write(&mm->mmap_sem); |
238 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
239 | if (IS_ERR_VALUE(addr)) { | ||
240 | ret = addr; | ||
241 | goto up_fail; | ||
242 | } | ||
243 | 275 | ||
244 | /* | 276 | /* Test compat mode once here, in case someone |
245 | * MAYWRITE to allow gdb to COW and set breakpoints | 277 | changes it via sysctl */ |
246 | * | 278 | compat = (vdso_enabled == VDSO_COMPAT); |
247 | * Make sure the vDSO gets into every core dump. | 279 | |
248 | * Dumping its contents makes post-mortem fully interpretable later | 280 | map_compat_vdso(compat); |
249 | * without matching up the same kernel and hardware config to see | 281 | |
250 | * what PC values meant. | 282 | if (compat) |
251 | */ | 283 | addr = VDSO_HIGH_BASE; |
252 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | 284 | else { |
253 | VM_READ|VM_EXEC| | 285 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
254 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 286 | if (IS_ERR_VALUE(addr)) { |
255 | VM_ALWAYSDUMP, | 287 | ret = addr; |
256 | syscall_pages); | 288 | goto up_fail; |
257 | if (ret) | 289 | } |
258 | goto up_fail; | 290 | |
291 | /* | ||
292 | * MAYWRITE to allow gdb to COW and set breakpoints | ||
293 | * | ||
294 | * Make sure the vDSO gets into every core dump. | ||
295 | * Dumping its contents makes post-mortem fully | ||
296 | * interpretable later without matching up the same | ||
297 | * kernel and hardware config to see what PC values | ||
298 | * meant. | ||
299 | */ | ||
300 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | ||
301 | VM_READ|VM_EXEC| | ||
302 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | ||
303 | VM_ALWAYSDUMP, | ||
304 | syscall_pages); | ||
305 | |||
306 | if (ret) | ||
307 | goto up_fail; | ||
308 | } | ||
259 | 309 | ||
260 | current->mm->context.vdso = (void *)addr; | 310 | current->mm->context.vdso = (void *)addr; |
261 | current_thread_info()->sysenter_return = | 311 | current_thread_info()->sysenter_return = |
262 | (void *)VDSO_SYM(&SYSENTER_RETURN); | 312 | (void *)VDSO_SYM(&SYSENTER_RETURN); |
263 | up_fail: | 313 | |
314 | up_fail: | ||
264 | up_write(&mm->mmap_sem); | 315 | up_write(&mm->mmap_sem); |
316 | |||
265 | return ret; | 317 | return ret; |
266 | } | 318 | } |
267 | 319 | ||
@@ -274,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
274 | 326 | ||
275 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | 327 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
276 | { | 328 | { |
329 | struct mm_struct *mm = tsk->mm; | ||
330 | |||
331 | /* Check to see if this task was created in compat vdso mode */ | ||
332 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | ||
333 | return &gate_vma; | ||
277 | return NULL; | 334 | return NULL; |
278 | } | 335 | } |
279 | 336 | ||
@@ -286,17 +343,3 @@ int in_gate_area_no_task(unsigned long addr) | |||
286 | { | 343 | { |
287 | return 0; | 344 | return 0; |
288 | } | 345 | } |
289 | #else /* !__HAVE_ARCH_GATE_AREA */ | ||
290 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
291 | { | ||
292 | /* | ||
293 | * If not creating userspace VMA, simply set vdso to point to | ||
294 | * fixmap page. | ||
295 | */ | ||
296 | current->mm->context.vdso = (void *)VDSO_HIGH_BASE; | ||
297 | current_thread_info()->sysenter_return = | ||
298 | (void *)VDSO_SYM(&SYSENTER_RETURN); | ||
299 | |||
300 | return 0; | ||
301 | } | ||
302 | #endif /* __HAVE_ARCH_GATE_AREA */ | ||