aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 17:24:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 17:24:20 -0500
commit3100e448e7d74489a96cb7b45d88fe6962774eaa (patch)
tree53e46a702bd191ca43639b560d2bb1d3b0ad18c8 /arch/x86/kernel
parentc9f861c77269bc9950c16c6404a9476062241671 (diff)
parent26893107aa717cd11010f0c278d02535defa1ac9 (diff)
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Various vDSO updates from Andy Lutomirski, mostly cleanups and reorganization to improve maintainability, but also some micro-optimizations and robustization changes" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86_64/vsyscall: Restore orig_ax after vsyscall seccomp x86_64: Add a comment explaining the TASK_SIZE_MAX guard page x86_64,vsyscall: Make vsyscall emulation configurable x86_64, vsyscall: Rewrite comment and clean up headers in vsyscall code x86_64, vsyscall: Turn vsyscalls all the way off when vsyscall==none x86,vdso: Use LSL unconditionally for vgetcpu x86: vdso: Fix build with older gcc x86_64/vdso: Clean up vgetcpu init and merge the vdso initcalls x86_64/vdso: Remove jiffies from the vvar page x86/vdso: Make the PER_CPU segment 32 bits x86/vdso: Make the PER_CPU segment start out accessed x86/vdso: Change the PER_CPU segment to use struct desc_struct x86_64/vdso: Move getcpu code from vsyscall_64.c to vdso/vma.c x86_64/vsyscall: Move all of the gate_area code to vsyscall_64.c
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/cpu/common.c10
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c147
5 files changed, 66 insertions, 98 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8f1e77440b2b..5d4502c8b983 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,8 +28,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
28obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 28obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
29obj-$(CONFIG_X86_64) += mcount_64.o 29obj-$(CONFIG_X86_64) += mcount_64.o
30obj-y += syscall_$(BITS).o vsyscall_gtod.o 30obj-y += syscall_$(BITS).o vsyscall_gtod.o
31obj-$(CONFIG_X86_64) += vsyscall_64.o 31obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
32obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
33obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o 32obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
34obj-$(CONFIG_SYSFS) += ksysfs.o 33obj-$(CONFIG_SYSFS) += ksysfs.o
35obj-y += bootflag.o e820.o 34obj-y += bootflag.o e820.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cfa9b5b2c27a..c6049650c093 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -958,14 +958,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
958} 958}
959 959
960#ifdef CONFIG_X86_64 960#ifdef CONFIG_X86_64
961static void vgetcpu_set_mode(void)
962{
963 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
964 vgetcpu_mode = VGETCPU_RDTSCP;
965 else
966 vgetcpu_mode = VGETCPU_LSL;
967}
968
969#ifdef CONFIG_IA32_EMULATION 961#ifdef CONFIG_IA32_EMULATION
970/* May not be __init: called during resume */ 962/* May not be __init: called during resume */
971static void syscall32_cpu_init(void) 963static void syscall32_cpu_init(void)
@@ -1008,8 +1000,6 @@ void __init identify_boot_cpu(void)
1008#ifdef CONFIG_X86_32 1000#ifdef CONFIG_X86_32
1009 sysenter_setup(); 1001 sysenter_setup();
1010 enable_sep_cpu(); 1002 enable_sep_cpu();
1011#else
1012 vgetcpu_set_mode();
1013#endif 1003#endif
1014 cpu_detect_tlb(&boot_cpu_data); 1004 cpu_detect_tlb(&boot_cpu_data);
1015} 1005}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 214245d6b996..ab4734e5411d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1192,9 +1192,7 @@ void __init setup_arch(char **cmdline_p)
1192 1192
1193 tboot_probe(); 1193 tboot_probe();
1194 1194
1195#ifdef CONFIG_X86_64
1196 map_vsyscall(); 1195 map_vsyscall();
1197#endif
1198 1196
1199 generic_apic_probe(); 1197 generic_apic_probe();
1200 1198
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 0fa29609b2c4..25adc0e16eaa 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -23,7 +23,7 @@
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
26__visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; 26__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
27#endif 27#endif
28 28
29unsigned long profile_pc(struct pt_regs *regs) 29unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 957779f4eb40..2dcc6ff6fdcc 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -1,59 +1,43 @@
1/* 1/*
2 * Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
3 *
4 * Based on the original implementation which is:
2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs. 6 * Copyright 2003 Andi Kleen, SuSE Labs.
4 * 7 *
5 * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] 8 * Parts of the original code have been moved to arch/x86/vdso/vma.c
9 *
10 * This file implements vsyscall emulation. vsyscalls are a legacy ABI:
11 * Userspace can request certain kernel services by calling fixed
12 * addresses. This concept is problematic:
6 * 13 *
7 * Thanks to hpa@transmeta.com for some useful hint. 14 * - It interferes with ASLR.
8 * Special thanks to Ingo Molnar for his early experience with 15 * - It's awkward to write code that lives in kernel addresses but is
9 * a different vsyscall implementation for Linux/IA32 and for the name. 16 * callable by userspace at fixed addresses.
17 * - The whole concept is impossible for 32-bit compat userspace.
18 * - UML cannot easily virtualize a vsyscall.
10 * 19 *
11 * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located 20 * As of mid-2014, I believe that there is no new userspace code that
12 * at virtual address -10Mbyte+1024bytes etc... There are at max 4 21 * will use a vsyscall if the vDSO is present. I hope that there will
13 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid 22 * soon be no new userspace code that will ever use a vsyscall.
14 * jumping out of line if necessary. We cannot add more with this
15 * mechanism because older kernels won't return -ENOSYS.
16 * 23 *
17 * Note: the concept clashes with user mode linux. UML users should 24 * The code in this file emulates vsyscalls when notified of a page
18 * use the vDSO. 25 * fault to a vsyscall address.
19 */ 26 */
20 27
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/time.h>
24#include <linux/init.h>
25#include <linux/kernel.h> 28#include <linux/kernel.h>
26#include <linux/timer.h> 29#include <linux/timer.h>
27#include <linux/seqlock.h>
28#include <linux/jiffies.h>
29#include <linux/sysctl.h>
30#include <linux/topology.h>
31#include <linux/timekeeper_internal.h>
32#include <linux/getcpu.h>
33#include <linux/cpu.h>
34#include <linux/smp.h>
35#include <linux/notifier.h>
36#include <linux/syscalls.h> 30#include <linux/syscalls.h>
37#include <linux/ratelimit.h> 31#include <linux/ratelimit.h>
38 32
39#include <asm/vsyscall.h> 33#include <asm/vsyscall.h>
40#include <asm/pgtable.h>
41#include <asm/compat.h>
42#include <asm/page.h>
43#include <asm/unistd.h> 34#include <asm/unistd.h>
44#include <asm/fixmap.h> 35#include <asm/fixmap.h>
45#include <asm/errno.h>
46#include <asm/io.h>
47#include <asm/segment.h>
48#include <asm/desc.h>
49#include <asm/topology.h>
50#include <asm/traps.h> 36#include <asm/traps.h>
51 37
52#define CREATE_TRACE_POINTS 38#define CREATE_TRACE_POINTS
53#include "vsyscall_trace.h" 39#include "vsyscall_trace.h"
54 40
55DEFINE_VVAR(int, vgetcpu_mode);
56
57static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; 41static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
58 42
59static int __init vsyscall_setup(char *str) 43static int __init vsyscall_setup(char *str)
@@ -222,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
222 "seccomp tried to change syscall nr or ip"); 206 "seccomp tried to change syscall nr or ip");
223 do_exit(SIGSYS); 207 do_exit(SIGSYS);
224 } 208 }
209 regs->orig_ax = -1;
225 if (tmp) 210 if (tmp)
226 goto do_ret; /* skip requested */ 211 goto do_ret; /* skip requested */
227 212
@@ -284,46 +269,54 @@ sigsegv:
284} 269}
285 270
286/* 271/*
287 * Assume __initcall executes before all user space. Hopefully kmod 272 * A pseudo VMA to allow ptrace access for the vsyscall page. This only
288 * doesn't violate that. We'll find out if it does. 273 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
274 * not need special handling anymore:
289 */ 275 */
290static void vsyscall_set_cpu(int cpu) 276static const char *gate_vma_name(struct vm_area_struct *vma)
291{ 277{
292 unsigned long d; 278 return "[vsyscall]";
293 unsigned long node = 0;
294#ifdef CONFIG_NUMA
295 node = cpu_to_node(cpu);
296#endif
297 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
298 write_rdtscp_aux((node << 12) | cpu);
299
300 /*
301 * Store cpu number in limit so that it can be loaded quickly
302 * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
303 */
304 d = 0x0f40000000000ULL;
305 d |= cpu;
306 d |= (node & 0xf) << 12;
307 d |= (node >> 4) << 48;
308
309 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
310} 279}
311 280static struct vm_operations_struct gate_vma_ops = {
312static void cpu_vsyscall_init(void *arg) 281 .name = gate_vma_name,
282};
283static struct vm_area_struct gate_vma = {
284 .vm_start = VSYSCALL_ADDR,
285 .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
286 .vm_page_prot = PAGE_READONLY_EXEC,
287 .vm_flags = VM_READ | VM_EXEC,
288 .vm_ops = &gate_vma_ops,
289};
290
291struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
313{ 292{
314 /* preemption should be already off */ 293#ifdef CONFIG_IA32_EMULATION
315 vsyscall_set_cpu(raw_smp_processor_id()); 294 if (!mm || mm->context.ia32_compat)
295 return NULL;
296#endif
297 if (vsyscall_mode == NONE)
298 return NULL;
299 return &gate_vma;
316} 300}
317 301
318static int 302int in_gate_area(struct mm_struct *mm, unsigned long addr)
319cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
320{ 303{
321 long cpu = (long)arg; 304 struct vm_area_struct *vma = get_gate_vma(mm);
305
306 if (!vma)
307 return 0;
322 308
323 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) 309 return (addr >= vma->vm_start) && (addr < vma->vm_end);
324 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); 310}
325 311
326 return NOTIFY_DONE; 312/*
313 * Use this when you have no reliable mm, typically from interrupt
314 * context. It is less reliable than using a task's mm and may give
315 * false positives.
316 */
317int in_gate_area_no_mm(unsigned long addr)
318{
319 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
327} 320}
328 321
329void __init map_vsyscall(void) 322void __init map_vsyscall(void)
@@ -331,24 +324,12 @@ void __init map_vsyscall(void)
331 extern char __vsyscall_page; 324 extern char __vsyscall_page;
332 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 325 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
333 326
334 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 327 if (vsyscall_mode != NONE)
335 vsyscall_mode == NATIVE 328 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
336 ? PAGE_KERNEL_VSYSCALL 329 vsyscall_mode == NATIVE
337 : PAGE_KERNEL_VVAR); 330 ? PAGE_KERNEL_VSYSCALL
331 : PAGE_KERNEL_VVAR);
332
338 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 333 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
339 (unsigned long)VSYSCALL_ADDR); 334 (unsigned long)VSYSCALL_ADDR);
340} 335}
341
342static int __init vsyscall_init(void)
343{
344 cpu_notifier_register_begin();
345
346 on_each_cpu(cpu_vsyscall_init, NULL, 1);
347 /* notifier priority > KVM */
348 __hotcpu_notifier(cpu_vsyscall_notifier, 30);
349
350 cpu_notifier_register_done();
351
352 return 0;
353}
354__initcall(vsyscall_init);