From b93590901a01a6d036b3b7c856bcc5724fdb9911 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 23 Sep 2014 10:50:51 -0700 Subject: x86_64/vsyscall: Move all of the gate_area code to vsyscall_64.c This code exists for the sole purpose of making the vsyscall page look sort of like real userspace memory. Move it so that it lives with the rest of the vsyscall code. Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/a7ee266773671a05f00b7175ca65a0dd812d2e4b.1411494540.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsyscall_64.c | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 957779f4eb40..521d5ed19547 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -283,6 +283,55 @@ sigsegv: return true; } +/* + * A pseudo VMA to allow ptrace access for the vsyscall page. This only + * covers the 64bit vsyscall page now. 32bit has a real VMA now and does + * not need special handling anymore: + */ +static const char *gate_vma_name(struct vm_area_struct *vma) +{ + return "[vsyscall]"; +} +static struct vm_operations_struct gate_vma_ops = { + .name = gate_vma_name, +}; +static struct vm_area_struct gate_vma = { + .vm_start = VSYSCALL_ADDR, + .vm_end = VSYSCALL_ADDR + PAGE_SIZE, + .vm_page_prot = PAGE_READONLY_EXEC, + .vm_flags = VM_READ | VM_EXEC, + .vm_ops = &gate_vma_ops, +}; + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ +#ifdef CONFIG_IA32_EMULATION + if (!mm || mm->context.ia32_compat) + return NULL; +#endif + return &gate_vma; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = get_gate_vma(mm); + + if (!vma) + return 0; + + return (addr >= vma->vm_start) && (addr < vma->vm_end); +} + +/* + * Use this when you have no reliable mm, typically from interrupt + * context. It is less reliable than using a task's mm and may give + * false positives. + */ +int in_gate_area_no_mm(unsigned long addr) +{ + return (addr & PAGE_MASK) == VSYSCALL_ADDR; +} + /* * Assume __initcall executes before all user space. Hopefully kmod * doesn't violate that. We'll find out if it does. -- cgit v1.2.2 From d4f829dd9026797bd5db8715a30192f23b22afaa Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 23 Sep 2014 10:50:52 -0700 Subject: x86_64/vdso: Move getcpu code from vsyscall_64.c to vdso/vma.c This is pure cut-and-paste. At this point, vsyscall_64.c contains only code needed for vsyscall emulation, but some of the comments and function names are still confused. Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/a244daf7d3cbe71afc08ad09fdfe1866ca1f1978.1411494540.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsyscall_64.c | 57 ------------------------------------------- 1 file changed, 57 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 521d5ed19547..2f9ef0c1d112 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -332,49 +332,6 @@ int in_gate_area_no_mm(unsigned long addr) return (addr & PAGE_MASK) == VSYSCALL_ADDR; } -/* - * Assume __initcall executes before all user space. Hopefully kmod - * doesn't violate that. We'll find out if it does. - */ -static void vsyscall_set_cpu(int cpu) -{ - unsigned long d; - unsigned long node = 0; -#ifdef CONFIG_NUMA - node = cpu_to_node(cpu); -#endif - if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) - write_rdtscp_aux((node << 12) | cpu); - - /* - * Store cpu number in limit so that it can be loaded quickly - * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) - */ - d = 0x0f40000000000ULL; - d |= cpu; - d |= (node & 0xf) << 12; - d |= (node >> 4) << 48; - - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); -} - -static void cpu_vsyscall_init(void *arg) -{ - /* preemption should be already off */ - vsyscall_set_cpu(raw_smp_processor_id()); -} - -static int -cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) -{ - long cpu = (long)arg; - - if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) - smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); - - return NOTIFY_DONE; -} - void __init map_vsyscall(void) { extern char __vsyscall_page; @@ -387,17 +344,3 @@ void __init map_vsyscall(void) BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } - -static int __init vsyscall_init(void) -{ - cpu_notifier_register_begin(); - - on_each_cpu(cpu_vsyscall_init, NULL, 1); - /* notifier priority > KVM */ - __hotcpu_notifier(cpu_vsyscall_notifier, 30); - - cpu_notifier_register_done(); - - return 0; -} -__initcall(vsyscall_init); -- cgit v1.2.2 From 61a492fb1759f3e892ad0408e36d3575c5f890d0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 23 Sep 2014 10:50:56 -0700 Subject: x86_64/vdso: Remove jiffies from the vvar page I think that the jiffies vvar was once used for the vgetcpu cache. That code is long gone, so let's just make jiffies be a normal variable. Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/fcfee6f8749af14d96373a9e2656354ad0b95499.1411494540.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 0fa29609b2c4..25adc0e16eaa 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -23,7 +23,7 @@ #include #ifdef CONFIG_X86_64 -__visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) -- cgit v1.2.2 From e76b027e6408f5570dc940b731ec9ae870c6188a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 30 Oct 2014 14:58:01 -0700 Subject: x86,vdso: Use LSL unconditionally for vgetcpu LSL is faster than RDTSCP and works everywhere; there's no need to switch between them depending on CPU. Signed-off-by: Andy Lutomirski Cc: Andi Kleen Link: http://lkml.kernel.org/r/72f73d5ec4514e02bba345b9759177ef03742efb.1414706021.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 10 ---------- arch/x86/kernel/vsyscall_64.c | 2 -- 2 files changed, 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b4f78c9ba19..175372b854be 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -956,14 +956,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_64 -static void vgetcpu_set_mode(void) -{ - if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) - vgetcpu_mode = VGETCPU_RDTSCP; - else - vgetcpu_mode = VGETCPU_LSL; -} - #ifdef CONFIG_IA32_EMULATION /* May not be __init: called during resume */ static void syscall32_cpu_init(void) @@ -1006,8 +998,6 @@ void __init identify_boot_cpu(void) #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); -#else - vgetcpu_set_mode(); #endif cpu_detect_tlb(&boot_cpu_data); } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 2f9ef0c1d112..419e83b58436 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -52,8 +52,6 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -DEFINE_VVAR(int, vgetcpu_mode); - static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; static int __init vsyscall_setup(char *str) -- cgit v1.2.2 From 87983c66bc02c9cd8e4a42e7924435145d52bb13 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 29 Oct 2014 14:33:45 -0700 Subject: x86_64, vsyscall: Turn vsyscalls all the way off when vsyscall==none I see no point in having an unusable read-only page sitting at 0xffffffffff600000 when vsyscall=none. Instead, skip mapping it and remove it from /proc/PID/maps. I kept the ratelimited warning when programs try to use a vsyscall in this mode, since it may help admins avoid confusion. Signed-off-by: Andy Lutomirski Reviewed-by: Josh Triplett Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/0dddbadc1d4e3bfbaf887938ff42afc97a7cc1f2.1414618407.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsyscall_64.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 419e83b58436..2d912629c96e 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -307,6 +307,8 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) if (!mm || mm->context.ia32_compat) return NULL; #endif + if (vsyscall_mode == NONE) + return NULL; return &gate_vma; } @@ -327,7 +329,7 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr) */ int in_gate_area_no_mm(unsigned long addr) { - return (addr & PAGE_MASK) == VSYSCALL_ADDR; + return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } void __init map_vsyscall(void) @@ -335,10 +337,12 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + if (vsyscall_mode != NONE) + __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, + vsyscall_mode == NATIVE + ? PAGE_KERNEL_VSYSCALL + : PAGE_KERNEL_VVAR); + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } -- cgit v1.2.2 From 95c46b56922409ed8838b3b420b11cfebb8c6c88 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 29 Oct 2014 14:33:46 -0700 Subject: x86_64, vsyscall: Rewrite comment and clean up headers in vsyscall code vsyscall_64.c is just vsyscall emulation. Tidy it up accordingly. [ tglx: Preserved the original copyright notices ] Signed-off-by: Andy Lutomirski Reviewed-by: Josh Triplett Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/9c448d5643d0fdb618f8cde9a54c21d2bcd486ce.1414618407.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsyscall_64.c | 50 ++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 2d912629c96e..7d9eb4bc10ac 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -1,52 +1,38 @@ /* + * Copyright (c) 2012-2014 Andy Lutomirski + * + * Based on the original implementation which is: * Copyright (C) 2001 Andrea Arcangeli SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * - * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] + * Parts of the original code have been moved to arch/x86/vdso/vma.c + * + * This file implements vsyscall emulation. vsyscalls are a legacy ABI: + * Userspace can request certain kernel services by calling fixed + * addresses. This concept is problematic: * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. + * - It interferes with ASLR. + * - It's awkward to write code that lives in kernel addresses but is + * callable by userspace at fixed addresses. + * - The whole concept is impossible for 32-bit compat userspace. + * - UML cannot easily virtualize a vsyscall. * - * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located - * at virtual address -10Mbyte+1024bytes etc... There are at max 4 - * vsyscalls. One vsyscall can reserve more than 1 slot to avoid - * jumping out of line if necessary. We cannot add more with this - * mechanism because older kernels won't return -ENOSYS. + * As of mid-2014, I believe that there is no new userspace code that + * will use a vsyscall if the vDSO is present. I hope that there will + * soon be no new userspace code that will ever use a vsyscall. * - * Note: the concept clashes with user mode linux. UML users should - * use the vDSO. + * The code in this file emulates vsyscalls when notified of a page + * fault to a vsyscall address. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include #include #include -#include -#include -#include -#include -#include #include #define CREATE_TRACE_POINTS -- cgit v1.2.2 From 1ad83c858c7d4ea210429142c99a1548e6715a35 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 29 Oct 2014 14:33:47 -0700 Subject: x86_64,vsyscall: Make vsyscall emulation configurable This adds CONFIG_X86_VSYSCALL_EMULATION, guarded by CONFIG_EXPERT. Turning it off completely disables vsyscall emulation, saving ~3.5k for vsyscall_64.c, 4k for vsyscall_emu_64.S (the fake vsyscall page), some tiny amount of core mm code that supports a gate area, and possibly 4k for a wasted pagetable. The latter is because the vsyscall addresses are misaligned and fit poorly in the fixmap. Signed-off-by: Andy Lutomirski Reviewed-by: Josh Triplett Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/406db88b8dd5f0cbbf38216d11be34bbb43c7eae.1414618407.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/setup.c | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8f1e77440b2b..5d4502c8b983 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,8 +28,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += mcount_64.o obj-y += syscall_$(BITS).o vsyscall_gtod.o -obj-$(CONFIG_X86_64) += vsyscall_64.o -obj-$(CONFIG_X86_64) += vsyscall_emu_64.o +obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 235cfd39e0d7..59a6f884fdad 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1190,9 +1190,7 @@ void __init setup_arch(char **cmdline_p) tboot_probe(); -#ifdef CONFIG_X86_64 map_vsyscall(); -#endif generic_apic_probe(); -- cgit v1.2.2 From 26893107aa717cd11010f0c278d02535defa1ac9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 4 Nov 2014 15:36:50 -0800 Subject: x86_64/vsyscall: Restore orig_ax after vsyscall seccomp The vsyscall emulation code sets orig_ax for seccomp's benefit, but it forgot to set it back. I'm not sure that this is observable at all, but it could cause confusion to various /proc or ptrace users, and it's possible that it could cause minor artifacts if a signal were to be delivered on return from vsyscall emulation. Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Link: http://lkml.kernel.org/r/cdc6a564517a4df09235572ee5f530ccdcf933f7.1415144089.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsyscall_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 7d9eb4bc10ac..2dcc6ff6fdcc 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -206,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) "seccomp tried to change syscall nr or ip"); do_exit(SIGSYS); } + regs->orig_ax = -1; if (tmp) goto do_ret; /* skip requested */ -- cgit v1.2.2