aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-10-11 05:17:10 -0400
committerThomas Gleixner <tglx@linutronix.de>2007-10-11 05:17:10 -0400
commit7648b1330c335601b7c09c25f77a03cda128fcab (patch)
tree8b92b501dc746b135bf9019472b425e8ef052714 /arch/x86
parent185f3d38900f750a4566f87cde6a178f3595a115 (diff)
x86_64: move vdso
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/vdso/.gitignore1
-rw-r--r--arch/x86/vdso/Makefile49
-rw-r--r--arch/x86/vdso/vclock_gettime.c121
-rw-r--r--arch/x86/vdso/vdso-note.S12
-rw-r--r--arch/x86/vdso/vdso-start.S2
-rw-r--r--arch/x86/vdso/vdso.S2
-rw-r--r--arch/x86/vdso/vdso.lds.S77
-rw-r--r--arch/x86/vdso/vextern.h16
-rw-r--r--arch/x86/vdso/vgetcpu.c50
-rw-r--r--arch/x86/vdso/vma.c140
-rw-r--r--arch/x86/vdso/voffset.h1
-rw-r--r--arch/x86/vdso/vvar.c12
12 files changed, 483 insertions, 0 deletions
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
new file mode 100644
index 000000000000..f8b69d84238e
--- /dev/null
+++ b/arch/x86/vdso/.gitignore
@@ -0,0 +1 @@
vdso.lds
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
new file mode 100644
index 000000000000..8d03de029d9b
--- /dev/null
+++ b/arch/x86/vdso/Makefile
@@ -0,0 +1,49 @@
1#
2# x86-64 vDSO.
3#
4
5# files to link into the vdso
6# vdso-start.o has to be first
7vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
8
9# files to link into kernel
10obj-y := vma.o vdso.o vdso-syms.o
11
12vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
13
14$(obj)/vdso.o: $(obj)/vdso.so
15
16targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o
17
18# The DSO images are built using a special linker script.
19quiet_cmd_syscall = SYSCALL $@
20 cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
21 -Wl,-T,$(filter-out FORCE,$^) -o $@
22
23export CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
24
25vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
26 $(call ld-option, -Wl$(comma)--hash-style=sysv) \
27 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
28SYSCFLAGS_vdso.so = $(vdso-flags)
29
30$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
31
32$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
33 $(call if_changed,syscall)
34
35CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
36
37$(obj)/vclock_gettime.o: CFLAGS = $(CFL)
38$(obj)/vgetcpu.o: CFLAGS = $(CFL)
39
40# We also create a special relocatable object that should mirror the symbol
41# table and layout of the linked DSO. With ld -R we can then refer to
42# these symbols in the kernel code rather than hand-coded addresses.
43extra-y += vdso-syms.o
44$(obj)/built-in.o: $(obj)/vdso-syms.o
45$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
46
47SYSCFLAGS_vdso-syms.o = -r -d
48$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
49 $(call if_changed,syscall)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
new file mode 100644
index 000000000000..5b54cdfb2b07
--- /dev/null
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -0,0 +1,121 @@
1/*
2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2
4 *
5 * Fast user context implementation of clock_gettime and gettimeofday.
6 *
7 * The code should have no internal unresolved relocations.
8 * Check with readelf after changing.
9 * Also alternative() doesn't work.
10 */
11
12#include <linux/kernel.h>
13#include <linux/posix-timers.h>
14#include <linux/time.h>
15#include <linux/string.h>
16#include <asm/vsyscall.h>
17#include <asm/vgtod.h>
18#include <asm/timex.h>
19#include <asm/hpet.h>
20#include <asm/unistd.h>
21#include <asm/io.h>
22#include <asm/vgtod.h>
23#include "vextern.h"
24
25#define gtod vdso_vsyscall_gtod_data
26
27static long vdso_fallback_gettime(long clock, struct timespec *ts)
28{
29 long ret;
30 asm("syscall" : "=a" (ret) :
31 "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
32 return ret;
33}
34
35static inline long vgetns(void)
36{
37 long v;
38 cycles_t (*vread)(void);
39 vread = gtod->clock.vread;
40 v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask;
41 return (v * gtod->clock.mult) >> gtod->clock.shift;
42}
43
44static noinline int do_realtime(struct timespec *ts)
45{
46 unsigned long seq, ns;
47 do {
48 seq = read_seqbegin(&gtod->lock);
49 ts->tv_sec = gtod->wall_time_sec;
50 ts->tv_nsec = gtod->wall_time_nsec;
51 ns = vgetns();
52 } while (unlikely(read_seqretry(&gtod->lock, seq)));
53 timespec_add_ns(ts, ns);
54 return 0;
55}
56
57/* Copy of the version in kernel/time.c which we cannot directly access */
58static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
59{
60 while (nsec >= NSEC_PER_SEC) {
61 nsec -= NSEC_PER_SEC;
62 ++sec;
63 }
64 while (nsec < 0) {
65 nsec += NSEC_PER_SEC;
66 --sec;
67 }
68 ts->tv_sec = sec;
69 ts->tv_nsec = nsec;
70}
71
72static noinline int do_monotonic(struct timespec *ts)
73{
74 unsigned long seq, ns, secs;
75 do {
76 seq = read_seqbegin(&gtod->lock);
77 secs = gtod->wall_time_sec;
78 ns = gtod->wall_time_nsec + vgetns();
79 secs += gtod->wall_to_monotonic.tv_sec;
80 ns += gtod->wall_to_monotonic.tv_nsec;
81 } while (unlikely(read_seqretry(&gtod->lock, seq)));
82 vset_normalized_timespec(ts, secs, ns);
83 return 0;
84}
85
86int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
87{
88 if (likely(gtod->sysctl_enabled && gtod->clock.vread))
89 switch (clock) {
90 case CLOCK_REALTIME:
91 return do_realtime(ts);
92 case CLOCK_MONOTONIC:
93 return do_monotonic(ts);
94 }
95 return vdso_fallback_gettime(clock, ts);
96}
97int clock_gettime(clockid_t, struct timespec *)
98 __attribute__((weak, alias("__vdso_clock_gettime")));
99
100int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
101{
102 long ret;
103 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
104 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
105 offsetof(struct timespec, tv_nsec) ||
106 sizeof(*tv) != sizeof(struct timespec));
107 do_realtime((struct timespec *)tv);
108 tv->tv_usec /= 1000;
109 if (unlikely(tz != NULL)) {
110 /* This relies on gcc inlining the memcpy. We'll notice
111 if it ever fails to do so. */
112 memcpy(tz, &gtod->sys_tz, sizeof(struct timezone));
113 }
114 return 0;
115 }
116 asm("syscall" : "=a" (ret) :
117 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
118 return ret;
119}
120int gettimeofday(struct timeval *, struct timezone *)
121 __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86/vdso/vdso-note.S b/arch/x86/vdso/vdso-note.S
new file mode 100644
index 000000000000..79a071e4357e
--- /dev/null
+++ b/arch/x86/vdso/vdso-note.S
@@ -0,0 +1,12 @@
1/*
2 * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
3 * Here we can supply some information useful to userland.
4 */
5
6#include <linux/uts.h>
7#include <linux/version.h>
8#include <linux/elfnote.h>
9
10ELFNOTE_START(Linux, 0, "a")
11 .long LINUX_VERSION_CODE
12ELFNOTE_END
diff --git a/arch/x86/vdso/vdso-start.S b/arch/x86/vdso/vdso-start.S
new file mode 100644
index 000000000000..2dc2cdb84d67
--- /dev/null
+++ b/arch/x86/vdso/vdso-start.S
@@ -0,0 +1,2 @@
1 .globl vdso_kernel_start
2vdso_kernel_start:
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
new file mode 100644
index 000000000000..4b1620a1529e
--- /dev/null
+++ b/arch/x86/vdso/vdso.S
@@ -0,0 +1,2 @@
1 .section ".vdso","a"
2 .incbin "arch/x86/vdso/vdso.so"
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
new file mode 100644
index 000000000000..b9a60e665d08
--- /dev/null
+++ b/arch/x86/vdso/vdso.lds.S
@@ -0,0 +1,77 @@
1/*
2 * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
3 * object prelinked to its virtual address, and with only one read-only
4 * segment (that fits in one page). This script controls its layout.
5 */
6#include <asm/asm-offsets.h>
7#include "voffset.h"
8
9#define VDSO_PRELINK 0xffffffffff700000
10
11SECTIONS
12{
13 . = VDSO_PRELINK + SIZEOF_HEADERS;
14
15 .hash : { *(.hash) } :text
16 .gnu.hash : { *(.gnu.hash) }
17 .dynsym : { *(.dynsym) }
18 .dynstr : { *(.dynstr) }
19 .gnu.version : { *(.gnu.version) }
20 .gnu.version_d : { *(.gnu.version_d) }
21 .gnu.version_r : { *(.gnu.version_r) }
22
23 /* This linker script is used both with -r and with -shared.
24 For the layouts to match, we need to skip more than enough
25 space for the dynamic symbol table et al. If this amount
26 is insufficient, ld -shared will barf. Just increase it here. */
27 . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
28
29 .text : { *(.text) } :text
30 .text.ptr : { *(.text.ptr) } :text
31 . = VDSO_PRELINK + 0x900;
32 .data : { *(.data) } :text
33 .bss : { *(.bss) } :text
34
35 .altinstructions : { *(.altinstructions) } :text
36 .altinstr_replacement : { *(.altinstr_replacement) } :text
37
38 .note : { *(.note.*) } :text :note
39 .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
40 .eh_frame : { KEEP (*(.eh_frame)) } :text
41 .dynamic : { *(.dynamic) } :text :dynamic
42 .useless : {
43 *(.got.plt) *(.got)
44 *(.gnu.linkonce.d.*)
45 *(.dynbss)
46 *(.gnu.linkonce.b.*)
47 } :text
48}
49
50/*
51 * We must supply the ELF program headers explicitly to get just one
52 * PT_LOAD segment, and set the flags explicitly to make segments read-only.
53 */
54PHDRS
55{
56 text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
57 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
58 note PT_NOTE FLAGS(4); /* PF_R */
59 eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
60}
61
62/*
63 * This controls what symbols we export from the DSO.
64 */
65VERSION
66{
67 LINUX_2.6 {
68 global:
69 clock_gettime;
70 __vdso_clock_gettime;
71 gettimeofday;
72 __vdso_gettimeofday;
73 getcpu;
74 __vdso_getcpu;
75 local: *;
76 };
77}
diff --git a/arch/x86/vdso/vextern.h b/arch/x86/vdso/vextern.h
new file mode 100644
index 000000000000..1683ba2ae3e8
--- /dev/null
+++ b/arch/x86/vdso/vextern.h
@@ -0,0 +1,16 @@
1#ifndef VEXTERN
2#include <asm/vsyscall.h>
3#define VEXTERN(x) \
4 extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
5#endif
6
7#define VMAGIC 0xfeedbabeabcdefabUL
8
9/* Any kernel variables used in the vDSO must be exported in the main
10 kernel's vmlinux.lds.S/vsyscall.h/proper __section and
11 put into vextern.h and be referenced as a pointer with vdso prefix.
12 The main kernel later fills in the values. */
13
14VEXTERN(jiffies)
15VEXTERN(vgetcpu_mode)
16VEXTERN(vsyscall_gtod_data)
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
new file mode 100644
index 000000000000..91f6e85d0fc2
--- /dev/null
+++ b/arch/x86/vdso/vgetcpu.c
@@ -0,0 +1,50 @@
1/*
2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2
4 *
5 * Fast user context implementation of getcpu()
6 */
7
8#include <linux/kernel.h>
9#include <linux/getcpu.h>
10#include <linux/jiffies.h>
11#include <linux/time.h>
12#include <asm/vsyscall.h>
13#include <asm/vgtod.h>
14#include "vextern.h"
15
16long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
17{
18 unsigned int dummy, p;
19 unsigned long j = 0;
20
21 /* Fast cache - only recompute value once per jiffies and avoid
22 relatively costly rdtscp/cpuid otherwise.
23 This works because the scheduler usually keeps the process
24 on the same CPU and this syscall doesn't guarantee its
25 results anyways.
26 We do this here because otherwise user space would do it on
27 its own in a likely inferior way (no access to jiffies).
28 If you don't like it pass NULL. */
29 if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) {
30 p = tcache->blob[1];
31 } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
32 /* Load per CPU data from RDTSCP */
33 rdtscp(dummy, dummy, p);
34 } else {
35 /* Load per CPU data from GDT */
36 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
37 }
38 if (tcache) {
39 tcache->blob[0] = j;
40 tcache->blob[1] = p;
41 }
42 if (cpu)
43 *cpu = p & 0xfff;
44 if (node)
45 *node = p >> 12;
46 return 0;
47}
48
49long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
50 __attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
new file mode 100644
index 000000000000..ff9333e5fb08
--- /dev/null
+++ b/arch/x86/vdso/vma.c
@@ -0,0 +1,140 @@
1/*
2 * Set up the VMAs to tell the VM about the vDSO.
3 * Copyright 2007 Andi Kleen, SUSE Labs.
4 * Subject to the GPL, v.2
5 */
6#include <linux/mm.h>
7#include <linux/err.h>
8#include <linux/sched.h>
9#include <linux/init.h>
10#include <linux/random.h>
11#include <asm/vsyscall.h>
12#include <asm/vgtod.h>
13#include <asm/proto.h>
14#include "voffset.h"
15
16int vdso_enabled = 1;
17
18#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
19#include "vextern.h"
20#undef VEXTERN
21
22extern char vdso_kernel_start[], vdso_start[], vdso_end[];
23extern unsigned short vdso_sync_cpuid;
24
25struct page **vdso_pages;
26
27static inline void *var_ref(void *vbase, char *var, char *name)
28{
29 unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
30 void *p = vbase + offset;
31 if (*(void **)p != (void *)VMAGIC) {
32 printk("VDSO: variable %s broken\n", name);
33 vdso_enabled = 0;
34 }
35 return p;
36}
37
38static int __init init_vdso_vars(void)
39{
40 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
41 int i;
42 char *vbase;
43
44 vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
45 if (!vdso_pages)
46 goto oom;
47 for (i = 0; i < npages; i++) {
48 struct page *p;
49 p = alloc_page(GFP_KERNEL);
50 if (!p)
51 goto oom;
52 vdso_pages[i] = p;
53 copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
54 }
55
56 vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
57 if (!vbase)
58 goto oom;
59
60 if (memcmp(vbase, "\177ELF", 4)) {
61 printk("VDSO: I'm broken; not ELF\n");
62 vdso_enabled = 0;
63 }
64
65#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
66#define VEXTERN(x) \
67 V(vdso_ ## x) = &__ ## x;
68#include "vextern.h"
69#undef VEXTERN
70 return 0;
71
72 oom:
73 printk("Cannot allocate vdso\n");
74 vdso_enabled = 0;
75 return -ENOMEM;
76}
77__initcall(init_vdso_vars);
78
79struct linux_binprm;
80
81/* Put the vdso above the (randomized) stack with another randomized offset.
82 This way there is no hole in the middle of address space.
83 To save memory make sure it is still in the same PTE as the stack top.
84 This doesn't give that many random bits */
85static unsigned long vdso_addr(unsigned long start, unsigned len)
86{
87 unsigned long addr, end;
88 unsigned offset;
89 end = (start + PMD_SIZE - 1) & PMD_MASK;
90 if (end >= TASK_SIZE64)
91 end = TASK_SIZE64;
92 end -= len;
93 /* This loses some more bits than a modulo, but is cheaper */
94 offset = get_random_int() & (PTRS_PER_PTE - 1);
95 addr = start + (offset << PAGE_SHIFT);
96 if (addr >= end)
97 addr = end;
98 return addr;
99}
100
101/* Setup a VMA at program startup for the vsyscall page.
102 Not called for compat tasks */
103int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
104{
105 struct mm_struct *mm = current->mm;
106 unsigned long addr;
107 int ret;
108 unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
109
110 if (!vdso_enabled)
111 return 0;
112
113 down_write(&mm->mmap_sem);
114 addr = vdso_addr(mm->start_stack, len);
115 addr = get_unmapped_area(NULL, addr, len, 0, 0);
116 if (IS_ERR_VALUE(addr)) {
117 ret = addr;
118 goto up_fail;
119 }
120
121 ret = install_special_mapping(mm, addr, len,
122 VM_READ|VM_EXEC|
123 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
124 VM_ALWAYSDUMP,
125 vdso_pages);
126 if (ret)
127 goto up_fail;
128
129 current->mm->context.vdso = (void *)addr;
130up_fail:
131 up_write(&mm->mmap_sem);
132 return ret;
133}
134
135static __init int vdso_setup(char *s)
136{
137 vdso_enabled = simple_strtoul(s, NULL, 0);
138 return 0;
139}
140__setup("vdso=", vdso_setup);
diff --git a/arch/x86/vdso/voffset.h b/arch/x86/vdso/voffset.h
new file mode 100644
index 000000000000..4af67c79085f
--- /dev/null
+++ b/arch/x86/vdso/voffset.h
@@ -0,0 +1 @@
#define VDSO_TEXT_OFFSET 0x600
diff --git a/arch/x86/vdso/vvar.c b/arch/x86/vdso/vvar.c
new file mode 100644
index 000000000000..6fc22219a472
--- /dev/null
+++ b/arch/x86/vdso/vvar.c
@@ -0,0 +1,12 @@
1/* Define pointer to external vDSO variables.
2 These are part of the vDSO. The kernel fills in the real addresses
3 at boot time. This is done because when the vdso is linked the
4 kernel isn't yet and we don't know the final addresses. */
5#include <linux/kernel.h>
6#include <linux/time.h>
7#include <asm/vsyscall.h>
8#include <asm/timex.h>
9#include <asm/vgtod.h>
10
11#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC;
12#include "vextern.h"