aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/vdso/vma.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2007-07-21 11:10:01 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-21 21:37:08 -0400
commit2aae950b21e4bc789d1fc6668faf67e8748300b7 (patch)
tree5777768cc2493695ec9f4000c14f3584b3db28fd /arch/x86_64/vdso/vma.c
parenta586df067afe0580bb02b7a6312ca2afe49bba03 (diff)
x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu
This implements new vDSO for x86-64. The concept is similar to the existing vDSOs on i386 and PPC. x86-64 has had static vsyscalls before, but these are not flexible enough anymore. A vDSO is a ELF shared library supplied by the kernel that is mapped into user address space. The vDSO mapping is randomized for each process for security reasons. Doing this was needed for clock_gettime, because clock_gettime always needs a syscall fallback and having one at a fixed address would have made buffer overflow exploits too easy to write. The vdso can be disabled with vdso=0 It currently includes a new gettimeofday implemention and optimized clock_gettime(). The gettimeofday implementation is slightly faster than the one in the old vsyscall. clock_gettime is significantly faster than the syscall for CLOCK_MONOTONIC and CLOCK_REALTIME. The new calls are generally faster than the old vsyscall. Advantages over the old x86-64 vsyscalls: - Extensible - Randomized - Cleaner - Easier to virtualize (the old static address range previously causes overhead e.g. for Xen because it has to create special page tables for it) Weak points: - glibc support still to be written The VM interface is partly based on Ingo Molnar's i386 version. Includes compile fix from Joachim Deguara Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86_64/vdso/vma.c')
-rw-r--r--arch/x86_64/vdso/vma.c139
1 files changed, 139 insertions, 0 deletions
diff --git a/arch/x86_64/vdso/vma.c b/arch/x86_64/vdso/vma.c
new file mode 100644
index 000000000000..d4cb83a6c066
--- /dev/null
+++ b/arch/x86_64/vdso/vma.c
@@ -0,0 +1,139 @@
1/*
2 * Set up the VMAs to tell the VM about the vDSO.
3 * Copyright 2007 Andi Kleen, SUSE Labs.
4 * Subject to the GPL, v.2
5 */
6#include <linux/mm.h>
7#include <linux/sched.h>
8#include <linux/init.h>
9#include <linux/random.h>
10#include <asm/vsyscall.h>
11#include <asm/vgtod.h>
12#include <asm/proto.h>
13#include "voffset.h"
14
15int vdso_enabled = 1;
16
17#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
18#include "vextern.h"
19#undef VEXTERN
20
21extern char vdso_kernel_start[], vdso_start[], vdso_end[];
22extern unsigned short vdso_sync_cpuid;
23
24struct page **vdso_pages;
25
26static inline void *var_ref(void *vbase, char *var, char *name)
27{
28 unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
29 void *p = vbase + offset;
30 if (*(void **)p != (void *)VMAGIC) {
31 printk("VDSO: variable %s broken\n", name);
32 vdso_enabled = 0;
33 }
34 return p;
35}
36
37static int __init init_vdso_vars(void)
38{
39 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
40 int i;
41 char *vbase;
42
43 vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
44 if (!vdso_pages)
45 goto oom;
46 for (i = 0; i < npages; i++) {
47 struct page *p;
48 p = alloc_page(GFP_KERNEL);
49 if (!p)
50 goto oom;
51 vdso_pages[i] = p;
52 copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
53 }
54
55 vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
56 if (!vbase)
57 goto oom;
58
59 if (memcmp(vbase, "\177ELF", 4)) {
60 printk("VDSO: I'm broken; not ELF\n");
61 vdso_enabled = 0;
62 }
63
64#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
65#define VEXTERN(x) \
66 V(vdso_ ## x) = &__ ## x;
67#include "vextern.h"
68#undef VEXTERN
69 return 0;
70
71 oom:
72 printk("Cannot allocate vdso\n");
73 vdso_enabled = 0;
74 return -ENOMEM;
75}
76__initcall(init_vdso_vars);
77
78struct linux_binprm;
79
80/* Put the vdso above the (randomized) stack with another randomized offset.
81 This way there is no hole in the middle of address space.
82 To save memory make sure it is still in the same PTE as the stack top.
83 This doesn't give that many random bits */
84static unsigned long vdso_addr(unsigned long start, unsigned len)
85{
86 unsigned long addr, end;
87 unsigned offset;
88 end = (start + PMD_SIZE - 1) & PMD_MASK;
89 if (end >= TASK_SIZE64)
90 end = TASK_SIZE64;
91 end -= len;
92 /* This loses some more bits than a modulo, but is cheaper */
93 offset = get_random_int() & (PTRS_PER_PTE - 1);
94 addr = start + (offset << PAGE_SHIFT);
95 if (addr >= end)
96 addr = end;
97 return addr;
98}
99
100/* Setup a VMA at program startup for the vsyscall page.
101 Not called for compat tasks */
102int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
103{
104 struct mm_struct *mm = current->mm;
105 unsigned long addr;
106 int ret;
107 unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
108
109 if (!vdso_enabled)
110 return 0;
111
112 down_write(&mm->mmap_sem);
113 addr = vdso_addr(mm->start_stack, len);
114 addr = get_unmapped_area(NULL, addr, len, 0, 0);
115 if (IS_ERR_VALUE(addr)) {
116 ret = addr;
117 goto up_fail;
118 }
119
120 ret = install_special_mapping(mm, addr, len,
121 VM_READ|VM_EXEC|
122 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
123 VM_ALWAYSDUMP,
124 vdso_pages);
125 if (ret)
126 goto up_fail;
127
128 current->mm->context.vdso = (void *)addr;
129up_fail:
130 up_write(&mm->mmap_sem);
131 return ret;
132}
133
134static __init int vdso_setup(char *s)
135{
136 vdso_enabled = simple_strtoul(s, NULL, 0);
137 return 0;
138}
139__setup("vdso=", vdso_setup);