diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 2 | ||||
-rw-r--r-- | arch/x86_64/Makefile | 3 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_binfmt.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 9 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 22 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 9 | ||||
-rw-r--r-- | arch/x86_64/vdso/Makefile | 49 | ||||
-rw-r--r-- | arch/x86_64/vdso/vclock_gettime.c | 120 | ||||
-rw-r--r-- | arch/x86_64/vdso/vdso-note.S | 12 | ||||
-rw-r--r-- | arch/x86_64/vdso/vdso-start.S | 2 | ||||
-rw-r--r-- | arch/x86_64/vdso/vdso.S | 2 | ||||
-rw-r--r-- | arch/x86_64/vdso/vdso.lds.S | 77 | ||||
-rw-r--r-- | arch/x86_64/vdso/vextern.h | 16 | ||||
-rw-r--r-- | arch/x86_64/vdso/vgetcpu.c | 50 | ||||
-rw-r--r-- | arch/x86_64/vdso/vma.c | 139 | ||||
-rw-r--r-- | arch/x86_64/vdso/voffset.h | 1 | ||||
-rw-r--r-- | arch/x86_64/vdso/vvar.c | 12 | ||||
-rw-r--r-- | include/asm-x86_64/auxvec.h | 2 | ||||
-rw-r--r-- | include/asm-x86_64/elf.h | 13 | ||||
-rw-r--r-- | include/asm-x86_64/mmu.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/vgtod.h | 29 | ||||
-rw-r--r-- | include/asm-x86_64/vsyscall.h | 3 |
23 files changed, 554 insertions, 21 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5fbe07706ae9..fb80e9ffea68 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1882,7 +1882,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1882 | usbhid.mousepoll= | 1882 | usbhid.mousepoll= |
1883 | [USBHID] The interval which mice are to be polled at. | 1883 | [USBHID] The interval which mice are to be polled at. |
1884 | 1884 | ||
1885 | vdso= [IA-32,SH] | 1885 | vdso= [IA-32,SH,x86-64] |
1886 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) | 1886 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) |
1887 | vdso=1: enable VDSO (default) | 1887 | vdso=1: enable VDSO (default) |
1888 | vdso=0: disable VDSO mapping | 1888 | vdso=0: disable VDSO mapping |
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 29617ae3926d..128561d3e876 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile | |||
@@ -76,7 +76,8 @@ head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kern | |||
76 | libs-y += arch/x86_64/lib/ | 76 | libs-y += arch/x86_64/lib/ |
77 | core-y += arch/x86_64/kernel/ \ | 77 | core-y += arch/x86_64/kernel/ \ |
78 | arch/x86_64/mm/ \ | 78 | arch/x86_64/mm/ \ |
79 | arch/x86_64/crypto/ | 79 | arch/x86_64/crypto/ \ |
80 | arch/x86_64/vdso/ | ||
80 | core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ | 81 | core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ |
81 | drivers-$(CONFIG_PCI) += arch/x86_64/pci/ | 82 | drivers-$(CONFIG_PCI) += arch/x86_64/pci/ |
82 | drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ | 83 | drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ |
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index ed56a8806eab..b70f3e7cf06c 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c | |||
@@ -38,6 +38,7 @@ | |||
38 | 38 | ||
39 | int sysctl_vsyscall32 = 1; | 39 | int sysctl_vsyscall32 = 1; |
40 | 40 | ||
41 | #undef ARCH_DLINFO | ||
41 | #define ARCH_DLINFO do { \ | 42 | #define ARCH_DLINFO do { \ |
42 | if (sysctl_vsyscall32) { \ | 43 | if (sysctl_vsyscall32) { \ |
43 | NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ | 44 | NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 4a0895bacf51..5405a69a1f72 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <asm/hpet.h> | 44 | #include <asm/hpet.h> |
45 | #include <asm/mpspec.h> | 45 | #include <asm/mpspec.h> |
46 | #include <asm/nmi.h> | 46 | #include <asm/nmi.h> |
47 | #include <asm/vgtod.h> | ||
47 | 48 | ||
48 | static char *timename = NULL; | 49 | static char *timename = NULL; |
49 | 50 | ||
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 5c57ea4591c1..c2d5a840cb1a 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -93,6 +93,9 @@ SECTIONS | |||
93 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) | 93 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) |
94 | { *(.vsyscall_gtod_data) } | 94 | { *(.vsyscall_gtod_data) } |
95 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); | 95 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); |
96 | .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) | ||
97 | { *(.vsyscall_clock) } | ||
98 | vsyscall_clock = VVIRT(.vsyscall_clock); | ||
96 | 99 | ||
97 | 100 | ||
98 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) | 101 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) |
@@ -189,6 +192,12 @@ SECTIONS | |||
189 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } | 192 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } |
190 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } | 193 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } |
191 | 194 | ||
195 | /* vdso blob that is mapped into user space */ | ||
196 | vdso_start = . ; | ||
197 | .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) } | ||
198 | . = ALIGN(4096); | ||
199 | vdso_end = .; | ||
200 | |||
192 | #ifdef CONFIG_BLK_DEV_INITRD | 201 | #ifdef CONFIG_BLK_DEV_INITRD |
193 | . = ALIGN(4096); | 202 | . = ALIGN(4096); |
194 | __initramfs_start = .; | 203 | __initramfs_start = .; |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 57660d58d500..06c34949bfdc 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <asm/segment.h> | 42 | #include <asm/segment.h> |
43 | #include <asm/desc.h> | 43 | #include <asm/desc.h> |
44 | #include <asm/topology.h> | 44 | #include <asm/topology.h> |
45 | #include <asm/vgtod.h> | ||
45 | 46 | ||
46 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 47 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
47 | #define __syscall_clobber "r11","rcx","memory" | 48 | #define __syscall_clobber "r11","rcx","memory" |
@@ -57,26 +58,9 @@ | |||
57 | * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) | 58 | * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) |
58 | * Try to keep this structure as small as possible to avoid cache line ping pongs | 59 | * Try to keep this structure as small as possible to avoid cache line ping pongs |
59 | */ | 60 | */ |
60 | struct vsyscall_gtod_data_t { | ||
61 | seqlock_t lock; | ||
62 | |||
63 | /* open coded 'struct timespec' */ | ||
64 | time_t wall_time_sec; | ||
65 | u32 wall_time_nsec; | ||
66 | |||
67 | int sysctl_enabled; | ||
68 | struct timezone sys_tz; | ||
69 | struct { /* extract of a clocksource struct */ | ||
70 | cycle_t (*vread)(void); | ||
71 | cycle_t cycle_last; | ||
72 | cycle_t mask; | ||
73 | u32 mult; | ||
74 | u32 shift; | ||
75 | } clock; | ||
76 | }; | ||
77 | int __vgetcpu_mode __section_vgetcpu_mode; | 61 | int __vgetcpu_mode __section_vgetcpu_mode; |
78 | 62 | ||
79 | struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = | 63 | struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = |
80 | { | 64 | { |
81 | .lock = SEQLOCK_UNLOCKED, | 65 | .lock = SEQLOCK_UNLOCKED, |
82 | .sysctl_enabled = 1, | 66 | .sysctl_enabled = 1, |
@@ -96,6 +80,8 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
96 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 80 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
97 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 81 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
98 | vsyscall_gtod_data.sys_tz = sys_tz; | 82 | vsyscall_gtod_data.sys_tz = sys_tz; |
83 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | ||
84 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; | ||
99 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 85 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
100 | } | 86 | } |
101 | 87 | ||
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 9a0e98accf04..2f673225a51f 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -774,3 +774,12 @@ void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) | |||
774 | return __alloc_bootmem_core(pgdat->bdata, size, | 774 | return __alloc_bootmem_core(pgdat->bdata, size, |
775 | SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); | 775 | SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); |
776 | } | 776 | } |
777 | |||
778 | const char *arch_vma_name(struct vm_area_struct *vma) | ||
779 | { | ||
780 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) | ||
781 | return "[vdso]"; | ||
782 | if (vma == &gate_vma) | ||
783 | return "[vsyscall]"; | ||
784 | return NULL; | ||
785 | } | ||
diff --git a/arch/x86_64/vdso/Makefile b/arch/x86_64/vdso/Makefile new file mode 100644 index 000000000000..faaa72fb250c --- /dev/null +++ b/arch/x86_64/vdso/Makefile | |||
@@ -0,0 +1,49 @@ | |||
1 | # | ||
2 | # x86-64 vDSO. | ||
3 | # | ||
4 | |||
5 | # files to link into the vdso | ||
6 | # vdso-start.o has to be first | ||
7 | vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o | ||
8 | |||
9 | # files to link into kernel | ||
10 | obj-y := vma.o vdso.o vdso-syms.o | ||
11 | |||
12 | vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) | ||
13 | |||
14 | $(obj)/vdso.o: $(obj)/vdso.so | ||
15 | |||
16 | targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o | ||
17 | |||
18 | # The DSO images are built using a special linker script. | ||
19 | quiet_cmd_syscall = SYSCALL $@ | ||
20 | cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \ | ||
21 | -Wl,-T,$(filter-out FORCE,$^) -o $@ | ||
22 | |||
23 | export CPPFLAGS_vdso.lds += -P -C -U$(ARCH) | ||
24 | |||
25 | vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \ | ||
26 | $(call ld-option, -Wl$(comma)--hash-style=sysv) \ | ||
27 | -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 | ||
28 | SYSCFLAGS_vdso.so = $(vdso-flags) | ||
29 | |||
30 | $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so | ||
31 | |||
32 | $(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE | ||
33 | $(call if_changed,syscall) | ||
34 | |||
35 | CF := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 | ||
36 | |||
37 | $(obj)/vclock_gettime.o: CFLAGS = $(CF) | ||
38 | $(obj)/vgetcpu.o: CFLAGS = $(CF) | ||
39 | |||
40 | # We also create a special relocatable object that should mirror the symbol | ||
41 | # table and layout of the linked DSO. With ld -R we can then refer to | ||
42 | # these symbols in the kernel code rather than hand-coded addresses. | ||
43 | extra-y += vdso-syms.o | ||
44 | $(obj)/built-in.o: $(obj)/vdso-syms.o | ||
45 | $(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o | ||
46 | |||
47 | SYSCFLAGS_vdso-syms.o = -r -d | ||
48 | $(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE | ||
49 | $(call if_changed,syscall) | ||
diff --git a/arch/x86_64/vdso/vclock_gettime.c b/arch/x86_64/vdso/vclock_gettime.c new file mode 100644 index 000000000000..17f6a00de712 --- /dev/null +++ b/arch/x86_64/vdso/vclock_gettime.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * Copyright 2006 Andi Kleen, SUSE Labs. | ||
3 | * Subject to the GNU Public License, v.2 | ||
4 | * | ||
5 | * Fast user context implementation of clock_gettime and gettimeofday. | ||
6 | * | ||
7 | * The code should have no internal unresolved relocations. | ||
8 | * Check with readelf after changing. | ||
9 | * Also alternative() doesn't work. | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/posix-timers.h> | ||
14 | #include <linux/time.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <asm/vsyscall.h> | ||
17 | #include <asm/vgtod.h> | ||
18 | #include <asm/timex.h> | ||
19 | #include <asm/hpet.h> | ||
20 | #include <asm/unistd.h> | ||
21 | #include <asm/io.h> | ||
22 | #include <asm/vgtod.h> | ||
23 | #include "vextern.h" | ||
24 | |||
25 | #define gtod vdso_vsyscall_gtod_data | ||
26 | |||
27 | static long vdso_fallback_gettime(long clock, struct timespec *ts) | ||
28 | { | ||
29 | long ret; | ||
30 | asm("syscall" : "=a" (ret) : | ||
31 | "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); | ||
32 | return ret; | ||
33 | } | ||
34 | |||
35 | static inline long vgetns(void) | ||
36 | { | ||
37 | cycles_t (*vread)(void); | ||
38 | vread = gtod->clock.vread; | ||
39 | return ((vread() - gtod->clock.cycle_last) * gtod->clock.mult) >> | ||
40 | gtod->clock.shift; | ||
41 | } | ||
42 | |||
43 | static noinline int do_realtime(struct timespec *ts) | ||
44 | { | ||
45 | unsigned long seq, ns; | ||
46 | do { | ||
47 | seq = read_seqbegin(>od->lock); | ||
48 | ts->tv_sec = gtod->wall_time_sec; | ||
49 | ts->tv_nsec = gtod->wall_time_nsec; | ||
50 | ns = vgetns(); | ||
51 | } while (unlikely(read_seqretry(>od->lock, seq))); | ||
52 | timespec_add_ns(ts, ns); | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* Copy of the version in kernel/time.c which we cannot directly access */ | ||
57 | static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) | ||
58 | { | ||
59 | while (nsec >= NSEC_PER_SEC) { | ||
60 | nsec -= NSEC_PER_SEC; | ||
61 | ++sec; | ||
62 | } | ||
63 | while (nsec < 0) { | ||
64 | nsec += NSEC_PER_SEC; | ||
65 | --sec; | ||
66 | } | ||
67 | ts->tv_sec = sec; | ||
68 | ts->tv_nsec = nsec; | ||
69 | } | ||
70 | |||
71 | static noinline int do_monotonic(struct timespec *ts) | ||
72 | { | ||
73 | unsigned long seq, ns, secs; | ||
74 | do { | ||
75 | seq = read_seqbegin(>od->lock); | ||
76 | secs = gtod->wall_time_sec; | ||
77 | ns = gtod->wall_time_nsec + vgetns(); | ||
78 | secs += gtod->wall_to_monotonic.tv_sec; | ||
79 | ns += gtod->wall_to_monotonic.tv_nsec; | ||
80 | } while (unlikely(read_seqretry(>od->lock, seq))); | ||
81 | vset_normalized_timespec(ts, secs, ns); | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | ||
86 | { | ||
87 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) | ||
88 | switch (clock) { | ||
89 | case CLOCK_REALTIME: | ||
90 | return do_realtime(ts); | ||
91 | case CLOCK_MONOTONIC: | ||
92 | return do_monotonic(ts); | ||
93 | } | ||
94 | return vdso_fallback_gettime(clock, ts); | ||
95 | } | ||
96 | int clock_gettime(clockid_t, struct timespec *) | ||
97 | __attribute__((weak, alias("__vdso_clock_gettime"))); | ||
98 | |||
99 | int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | ||
100 | { | ||
101 | long ret; | ||
102 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { | ||
103 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | ||
104 | offsetof(struct timespec, tv_nsec) || | ||
105 | sizeof(*tv) != sizeof(struct timespec)); | ||
106 | do_realtime((struct timespec *)tv); | ||
107 | tv->tv_usec /= 1000; | ||
108 | if (unlikely(tz != NULL)) { | ||
109 | /* This relies on gcc inlining the memcpy. We'll notice | ||
110 | if it ever fails to do so. */ | ||
111 | memcpy(tz, >od->sys_tz, sizeof(struct timezone)); | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | asm("syscall" : "=a" (ret) : | ||
116 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | ||
117 | return ret; | ||
118 | } | ||
119 | int gettimeofday(struct timeval *, struct timezone *) | ||
120 | __attribute__((weak, alias("__vdso_gettimeofday"))); | ||
diff --git a/arch/x86_64/vdso/vdso-note.S b/arch/x86_64/vdso/vdso-note.S new file mode 100644 index 000000000000..79a071e4357e --- /dev/null +++ b/arch/x86_64/vdso/vdso-note.S | |||
@@ -0,0 +1,12 @@ | |||
1 | /* | ||
2 | * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. | ||
3 | * Here we can supply some information useful to userland. | ||
4 | */ | ||
5 | |||
6 | #include <linux/uts.h> | ||
7 | #include <linux/version.h> | ||
8 | #include <linux/elfnote.h> | ||
9 | |||
10 | ELFNOTE_START(Linux, 0, "a") | ||
11 | .long LINUX_VERSION_CODE | ||
12 | ELFNOTE_END | ||
diff --git a/arch/x86_64/vdso/vdso-start.S b/arch/x86_64/vdso/vdso-start.S new file mode 100644 index 000000000000..2dc2cdb84d67 --- /dev/null +++ b/arch/x86_64/vdso/vdso-start.S | |||
@@ -0,0 +1,2 @@ | |||
1 | .globl vdso_kernel_start | ||
2 | vdso_kernel_start: | ||
diff --git a/arch/x86_64/vdso/vdso.S b/arch/x86_64/vdso/vdso.S new file mode 100644 index 000000000000..92e80c1972a7 --- /dev/null +++ b/arch/x86_64/vdso/vdso.S | |||
@@ -0,0 +1,2 @@ | |||
1 | .section ".vdso","a" | ||
2 | .incbin "arch/x86_64/vdso/vdso.so" | ||
diff --git a/arch/x86_64/vdso/vdso.lds.S b/arch/x86_64/vdso/vdso.lds.S new file mode 100644 index 000000000000..b9a60e665d08 --- /dev/null +++ b/arch/x86_64/vdso/vdso.lds.S | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * Linker script for vsyscall DSO. The vsyscall page is an ELF shared | ||
3 | * object prelinked to its virtual address, and with only one read-only | ||
4 | * segment (that fits in one page). This script controls its layout. | ||
5 | */ | ||
6 | #include <asm/asm-offsets.h> | ||
7 | #include "voffset.h" | ||
8 | |||
9 | #define VDSO_PRELINK 0xffffffffff700000 | ||
10 | |||
11 | SECTIONS | ||
12 | { | ||
13 | . = VDSO_PRELINK + SIZEOF_HEADERS; | ||
14 | |||
15 | .hash : { *(.hash) } :text | ||
16 | .gnu.hash : { *(.gnu.hash) } | ||
17 | .dynsym : { *(.dynsym) } | ||
18 | .dynstr : { *(.dynstr) } | ||
19 | .gnu.version : { *(.gnu.version) } | ||
20 | .gnu.version_d : { *(.gnu.version_d) } | ||
21 | .gnu.version_r : { *(.gnu.version_r) } | ||
22 | |||
23 | /* This linker script is used both with -r and with -shared. | ||
24 | For the layouts to match, we need to skip more than enough | ||
25 | space for the dynamic symbol table et al. If this amount | ||
26 | is insufficient, ld -shared will barf. Just increase it here. */ | ||
27 | . = VDSO_PRELINK + VDSO_TEXT_OFFSET; | ||
28 | |||
29 | .text : { *(.text) } :text | ||
30 | .text.ptr : { *(.text.ptr) } :text | ||
31 | . = VDSO_PRELINK + 0x900; | ||
32 | .data : { *(.data) } :text | ||
33 | .bss : { *(.bss) } :text | ||
34 | |||
35 | .altinstructions : { *(.altinstructions) } :text | ||
36 | .altinstr_replacement : { *(.altinstr_replacement) } :text | ||
37 | |||
38 | .note : { *(.note.*) } :text :note | ||
39 | .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr | ||
40 | .eh_frame : { KEEP (*(.eh_frame)) } :text | ||
41 | .dynamic : { *(.dynamic) } :text :dynamic | ||
42 | .useless : { | ||
43 | *(.got.plt) *(.got) | ||
44 | *(.gnu.linkonce.d.*) | ||
45 | *(.dynbss) | ||
46 | *(.gnu.linkonce.b.*) | ||
47 | } :text | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * We must supply the ELF program headers explicitly to get just one | ||
52 | * PT_LOAD segment, and set the flags explicitly to make segments read-only. | ||
53 | */ | ||
54 | PHDRS | ||
55 | { | ||
56 | text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ | ||
57 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | ||
58 | note PT_NOTE FLAGS(4); /* PF_R */ | ||
59 | eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * This controls what symbols we export from the DSO. | ||
64 | */ | ||
65 | VERSION | ||
66 | { | ||
67 | LINUX_2.6 { | ||
68 | global: | ||
69 | clock_gettime; | ||
70 | __vdso_clock_gettime; | ||
71 | gettimeofday; | ||
72 | __vdso_gettimeofday; | ||
73 | getcpu; | ||
74 | __vdso_getcpu; | ||
75 | local: *; | ||
76 | }; | ||
77 | } | ||
diff --git a/arch/x86_64/vdso/vextern.h b/arch/x86_64/vdso/vextern.h new file mode 100644 index 000000000000..1683ba2ae3e8 --- /dev/null +++ b/arch/x86_64/vdso/vextern.h | |||
@@ -0,0 +1,16 @@ | |||
1 | #ifndef VEXTERN | ||
2 | #include <asm/vsyscall.h> | ||
3 | #define VEXTERN(x) \ | ||
4 | extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden"))); | ||
5 | #endif | ||
6 | |||
7 | #define VMAGIC 0xfeedbabeabcdefabUL | ||
8 | |||
9 | /* Any kernel variables used in the vDSO must be exported in the main | ||
10 | kernel's vmlinux.lds.S/vsyscall.h/proper __section and | ||
11 | put into vextern.h and be referenced as a pointer with vdso prefix. | ||
12 | The main kernel later fills in the values. */ | ||
13 | |||
14 | VEXTERN(jiffies) | ||
15 | VEXTERN(vgetcpu_mode) | ||
16 | VEXTERN(vsyscall_gtod_data) | ||
diff --git a/arch/x86_64/vdso/vgetcpu.c b/arch/x86_64/vdso/vgetcpu.c new file mode 100644 index 000000000000..91f6e85d0fc2 --- /dev/null +++ b/arch/x86_64/vdso/vgetcpu.c | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright 2006 Andi Kleen, SUSE Labs. | ||
3 | * Subject to the GNU Public License, v.2 | ||
4 | * | ||
5 | * Fast user context implementation of getcpu() | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/getcpu.h> | ||
10 | #include <linux/jiffies.h> | ||
11 | #include <linux/time.h> | ||
12 | #include <asm/vsyscall.h> | ||
13 | #include <asm/vgtod.h> | ||
14 | #include "vextern.h" | ||
15 | |||
16 | long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | ||
17 | { | ||
18 | unsigned int dummy, p; | ||
19 | unsigned long j = 0; | ||
20 | |||
21 | /* Fast cache - only recompute value once per jiffies and avoid | ||
22 | relatively costly rdtscp/cpuid otherwise. | ||
23 | This works because the scheduler usually keeps the process | ||
24 | on the same CPU and this syscall doesn't guarantee its | ||
25 | results anyways. | ||
26 | We do this here because otherwise user space would do it on | ||
27 | its own in a likely inferior way (no access to jiffies). | ||
28 | If you don't like it pass NULL. */ | ||
29 | if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) { | ||
30 | p = tcache->blob[1]; | ||
31 | } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { | ||
32 | /* Load per CPU data from RDTSCP */ | ||
33 | rdtscp(dummy, dummy, p); | ||
34 | } else { | ||
35 | /* Load per CPU data from GDT */ | ||
36 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
37 | } | ||
38 | if (tcache) { | ||
39 | tcache->blob[0] = j; | ||
40 | tcache->blob[1] = p; | ||
41 | } | ||
42 | if (cpu) | ||
43 | *cpu = p & 0xfff; | ||
44 | if (node) | ||
45 | *node = p >> 12; | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | ||
50 | __attribute__((weak, alias("__vdso_getcpu"))); | ||
diff --git a/arch/x86_64/vdso/vma.c b/arch/x86_64/vdso/vma.c new file mode 100644 index 000000000000..d4cb83a6c066 --- /dev/null +++ b/arch/x86_64/vdso/vma.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * Set up the VMAs to tell the VM about the vDSO. | ||
3 | * Copyright 2007 Andi Kleen, SUSE Labs. | ||
4 | * Subject to the GPL, v.2 | ||
5 | */ | ||
6 | #include <linux/mm.h> | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/random.h> | ||
10 | #include <asm/vsyscall.h> | ||
11 | #include <asm/vgtod.h> | ||
12 | #include <asm/proto.h> | ||
13 | #include "voffset.h" | ||
14 | |||
15 | int vdso_enabled = 1; | ||
16 | |||
17 | #define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x; | ||
18 | #include "vextern.h" | ||
19 | #undef VEXTERN | ||
20 | |||
21 | extern char vdso_kernel_start[], vdso_start[], vdso_end[]; | ||
22 | extern unsigned short vdso_sync_cpuid; | ||
23 | |||
24 | struct page **vdso_pages; | ||
25 | |||
26 | static inline void *var_ref(void *vbase, char *var, char *name) | ||
27 | { | ||
28 | unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET; | ||
29 | void *p = vbase + offset; | ||
30 | if (*(void **)p != (void *)VMAGIC) { | ||
31 | printk("VDSO: variable %s broken\n", name); | ||
32 | vdso_enabled = 0; | ||
33 | } | ||
34 | return p; | ||
35 | } | ||
36 | |||
37 | static int __init init_vdso_vars(void) | ||
38 | { | ||
39 | int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; | ||
40 | int i; | ||
41 | char *vbase; | ||
42 | |||
43 | vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); | ||
44 | if (!vdso_pages) | ||
45 | goto oom; | ||
46 | for (i = 0; i < npages; i++) { | ||
47 | struct page *p; | ||
48 | p = alloc_page(GFP_KERNEL); | ||
49 | if (!p) | ||
50 | goto oom; | ||
51 | vdso_pages[i] = p; | ||
52 | copy_page(page_address(p), vdso_start + i*PAGE_SIZE); | ||
53 | } | ||
54 | |||
55 | vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL); | ||
56 | if (!vbase) | ||
57 | goto oom; | ||
58 | |||
59 | if (memcmp(vbase, "\177ELF", 4)) { | ||
60 | printk("VDSO: I'm broken; not ELF\n"); | ||
61 | vdso_enabled = 0; | ||
62 | } | ||
63 | |||
64 | #define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x) | ||
65 | #define VEXTERN(x) \ | ||
66 | V(vdso_ ## x) = &__ ## x; | ||
67 | #include "vextern.h" | ||
68 | #undef VEXTERN | ||
69 | return 0; | ||
70 | |||
71 | oom: | ||
72 | printk("Cannot allocate vdso\n"); | ||
73 | vdso_enabled = 0; | ||
74 | return -ENOMEM; | ||
75 | } | ||
76 | __initcall(init_vdso_vars); | ||
77 | |||
78 | struct linux_binprm; | ||
79 | |||
80 | /* Put the vdso above the (randomized) stack with another randomized offset. | ||
81 | This way there is no hole in the middle of address space. | ||
82 | To save memory make sure it is still in the same PTE as the stack top. | ||
83 | This doesn't give that many random bits */ | ||
84 | static unsigned long vdso_addr(unsigned long start, unsigned len) | ||
85 | { | ||
86 | unsigned long addr, end; | ||
87 | unsigned offset; | ||
88 | end = (start + PMD_SIZE - 1) & PMD_MASK; | ||
89 | if (end >= TASK_SIZE64) | ||
90 | end = TASK_SIZE64; | ||
91 | end -= len; | ||
92 | /* This loses some more bits than a modulo, but is cheaper */ | ||
93 | offset = get_random_int() & (PTRS_PER_PTE - 1); | ||
94 | addr = start + (offset << PAGE_SHIFT); | ||
95 | if (addr >= end) | ||
96 | addr = end; | ||
97 | return addr; | ||
98 | } | ||
99 | |||
100 | /* Setup a VMA at program startup for the vsyscall page. | ||
101 | Not called for compat tasks */ | ||
102 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
103 | { | ||
104 | struct mm_struct *mm = current->mm; | ||
105 | unsigned long addr; | ||
106 | int ret; | ||
107 | unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); | ||
108 | |||
109 | if (!vdso_enabled) | ||
110 | return 0; | ||
111 | |||
112 | down_write(&mm->mmap_sem); | ||
113 | addr = vdso_addr(mm->start_stack, len); | ||
114 | addr = get_unmapped_area(NULL, addr, len, 0, 0); | ||
115 | if (IS_ERR_VALUE(addr)) { | ||
116 | ret = addr; | ||
117 | goto up_fail; | ||
118 | } | ||
119 | |||
120 | ret = install_special_mapping(mm, addr, len, | ||
121 | VM_READ|VM_EXEC| | ||
122 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | ||
123 | VM_ALWAYSDUMP, | ||
124 | vdso_pages); | ||
125 | if (ret) | ||
126 | goto up_fail; | ||
127 | |||
128 | current->mm->context.vdso = (void *)addr; | ||
129 | up_fail: | ||
130 | up_write(&mm->mmap_sem); | ||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | static __init int vdso_setup(char *s) | ||
135 | { | ||
136 | vdso_enabled = simple_strtoul(s, NULL, 0); | ||
137 | return 0; | ||
138 | } | ||
139 | __setup("vdso=", vdso_setup); | ||
diff --git a/arch/x86_64/vdso/voffset.h b/arch/x86_64/vdso/voffset.h new file mode 100644 index 000000000000..5304204911f2 --- /dev/null +++ b/arch/x86_64/vdso/voffset.h | |||
@@ -0,0 +1 @@ | |||
#define VDSO_TEXT_OFFSET 0x500 | |||
diff --git a/arch/x86_64/vdso/vvar.c b/arch/x86_64/vdso/vvar.c new file mode 100644 index 000000000000..6fc22219a472 --- /dev/null +++ b/arch/x86_64/vdso/vvar.c | |||
@@ -0,0 +1,12 @@ | |||
1 | /* Define pointer to external vDSO variables. | ||
2 | These are part of the vDSO. The kernel fills in the real addresses | ||
3 | at boot time. This is done because when the vdso is linked the | ||
4 | kernel isn't yet and we don't know the final addresses. */ | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/time.h> | ||
7 | #include <asm/vsyscall.h> | ||
8 | #include <asm/timex.h> | ||
9 | #include <asm/vgtod.h> | ||
10 | |||
11 | #define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC; | ||
12 | #include "vextern.h" | ||
diff --git a/include/asm-x86_64/auxvec.h b/include/asm-x86_64/auxvec.h index 2403c4cfced2..1d5ab0d03950 100644 --- a/include/asm-x86_64/auxvec.h +++ b/include/asm-x86_64/auxvec.h | |||
@@ -1,4 +1,6 @@ | |||
1 | #ifndef __ASM_X86_64_AUXVEC_H | 1 | #ifndef __ASM_X86_64_AUXVEC_H |
2 | #define __ASM_X86_64_AUXVEC_H | 2 | #define __ASM_X86_64_AUXVEC_H |
3 | 3 | ||
4 | #define AT_SYSINFO_EHDR 33 | ||
5 | |||
4 | #endif | 6 | #endif |
diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h index 6d24ea7c4d9d..b4fbe47f6ccd 100644 --- a/include/asm-x86_64/elf.h +++ b/include/asm-x86_64/elf.h | |||
@@ -162,6 +162,19 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); | |||
162 | /* 1GB for 64bit, 8MB for 32bit */ | 162 | /* 1GB for 64bit, 8MB for 32bit */ |
163 | #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) | 163 | #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) |
164 | 164 | ||
165 | |||
166 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | ||
167 | struct linux_binprm; | ||
168 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, | ||
169 | int executable_stack); | ||
170 | |||
171 | extern int vdso_enabled; | ||
172 | |||
173 | #define ARCH_DLINFO \ | ||
174 | do if (vdso_enabled) { \ | ||
175 | NEW_AUX_ENT(AT_SYSINFO_EHDR,(unsigned long)current->mm->context.vdso);\ | ||
176 | } while (0) | ||
177 | |||
165 | #endif | 178 | #endif |
166 | 179 | ||
167 | #endif | 180 | #endif |
diff --git a/include/asm-x86_64/mmu.h b/include/asm-x86_64/mmu.h index 5dc6ed79859a..d2cd4a9d984d 100644 --- a/include/asm-x86_64/mmu.h +++ b/include/asm-x86_64/mmu.h | |||
@@ -15,6 +15,7 @@ typedef struct { | |||
15 | rwlock_t ldtlock; | 15 | rwlock_t ldtlock; |
16 | int size; | 16 | int size; |
17 | struct semaphore sem; | 17 | struct semaphore sem; |
18 | void *vdso; | ||
18 | } mm_context_t; | 19 | } mm_context_t; |
19 | 20 | ||
20 | #endif | 21 | #endif |
diff --git a/include/asm-x86_64/vgtod.h b/include/asm-x86_64/vgtod.h new file mode 100644 index 000000000000..3301f0929342 --- /dev/null +++ b/include/asm-x86_64/vgtod.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef _ASM_VGTOD_H | ||
2 | #define _ASM_VGTOD_H 1 | ||
3 | |||
4 | #include <asm/vsyscall.h> | ||
5 | #include <linux/clocksource.h> | ||
6 | |||
7 | struct vsyscall_gtod_data { | ||
8 | seqlock_t lock; | ||
9 | |||
10 | /* open coded 'struct timespec' */ | ||
11 | time_t wall_time_sec; | ||
12 | u32 wall_time_nsec; | ||
13 | |||
14 | int sysctl_enabled; | ||
15 | struct timezone sys_tz; | ||
16 | struct { /* extract of a clocksource struct */ | ||
17 | cycle_t (*vread)(void); | ||
18 | cycle_t cycle_last; | ||
19 | cycle_t mask; | ||
20 | u32 mult; | ||
21 | u32 shift; | ||
22 | } clock; | ||
23 | struct timespec wall_to_monotonic; | ||
24 | }; | ||
25 | extern struct vsyscall_gtod_data __vsyscall_gtod_data | ||
26 | __section_vsyscall_gtod_data; | ||
27 | extern struct vsyscall_gtod_data vsyscall_gtod_data; | ||
28 | |||
29 | #endif | ||
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 82b4afe65c91..3b8ceb4af2cf 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h | |||
@@ -22,6 +22,8 @@ enum vsyscall_num { | |||
22 | /* Definitions for CONFIG_GENERIC_TIME definitions */ | 22 | /* Definitions for CONFIG_GENERIC_TIME definitions */ |
23 | #define __section_vsyscall_gtod_data __attribute__ \ | 23 | #define __section_vsyscall_gtod_data __attribute__ \ |
24 | ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) | 24 | ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) |
25 | #define __section_vsyscall_clock __attribute__ \ | ||
26 | ((unused, __section__ (".vsyscall_clock"),aligned(16))) | ||
25 | #define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) | 27 | #define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) |
26 | 28 | ||
27 | #define VGETCPU_RDTSCP 1 | 29 | #define VGETCPU_RDTSCP 1 |
@@ -36,7 +38,6 @@ extern volatile unsigned long __jiffies; | |||
36 | /* kernel space (writeable) */ | 38 | /* kernel space (writeable) */ |
37 | extern int vgetcpu_mode; | 39 | extern int vgetcpu_mode; |
38 | extern struct timezone sys_tz; | 40 | extern struct timezone sys_tz; |
39 | extern struct vsyscall_gtod_data_t vsyscall_gtod_data; | ||
40 | 41 | ||
41 | #endif /* __KERNEL__ */ | 42 | #endif /* __KERNEL__ */ |
42 | 43 | ||