diff options
author | Andi Kleen <ak@suse.de> | 2007-07-21 11:10:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-21 21:37:08 -0400 |
commit | 2aae950b21e4bc789d1fc6668faf67e8748300b7 (patch) | |
tree | 5777768cc2493695ec9f4000c14f3584b3db28fd /arch/x86_64/kernel | |
parent | a586df067afe0580bb02b7a6312ca2afe49bba03 (diff) |
x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu
This implements new vDSO for x86-64. The concept is similar
to the existing vDSOs on i386 and PPC. x86-64 has had static
vsyscalls before, but these are not flexible enough anymore.
A vDSO is a ELF shared library supplied by the kernel that is mapped into
user address space. The vDSO mapping is randomized for each process
for security reasons.
Doing this was needed for clock_gettime, because clock_gettime
always needs a syscall fallback and having one at a fixed
address would have made buffer overflow exploits too easy to write.
The vdso can be disabled with vdso=0
It currently includes a new gettimeofday implemention and optimized
clock_gettime(). The gettimeofday implementation is slightly faster
than the one in the old vsyscall. clock_gettime is significantly faster
than the syscall for CLOCK_MONOTONIC and CLOCK_REALTIME.
The new calls are generally faster than the old vsyscall.
Advantages over the old x86-64 vsyscalls:
- Extensible
- Randomized
- Cleaner
- Easier to virtualize (the old static address range previously causes
overhead e.g. for Xen because it has to create special page tables for it)
Weak points:
- glibc support still to be written
The VM interface is partly based on Ingo Molnar's i386 version.
Includes compile fix from Joachim Deguara
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r-- | arch/x86_64/kernel/time.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 9 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 22 |
3 files changed, 14 insertions, 18 deletions
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 4a0895bacf51..5405a69a1f72 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <asm/hpet.h> | 44 | #include <asm/hpet.h> |
45 | #include <asm/mpspec.h> | 45 | #include <asm/mpspec.h> |
46 | #include <asm/nmi.h> | 46 | #include <asm/nmi.h> |
47 | #include <asm/vgtod.h> | ||
47 | 48 | ||
48 | static char *timename = NULL; | 49 | static char *timename = NULL; |
49 | 50 | ||
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 5c57ea4591c1..c2d5a840cb1a 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -93,6 +93,9 @@ SECTIONS | |||
93 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) | 93 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) |
94 | { *(.vsyscall_gtod_data) } | 94 | { *(.vsyscall_gtod_data) } |
95 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); | 95 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); |
96 | .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) | ||
97 | { *(.vsyscall_clock) } | ||
98 | vsyscall_clock = VVIRT(.vsyscall_clock); | ||
96 | 99 | ||
97 | 100 | ||
98 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) | 101 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) |
@@ -189,6 +192,12 @@ SECTIONS | |||
189 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } | 192 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } |
190 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } | 193 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } |
191 | 194 | ||
195 | /* vdso blob that is mapped into user space */ | ||
196 | vdso_start = . ; | ||
197 | .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) } | ||
198 | . = ALIGN(4096); | ||
199 | vdso_end = .; | ||
200 | |||
192 | #ifdef CONFIG_BLK_DEV_INITRD | 201 | #ifdef CONFIG_BLK_DEV_INITRD |
193 | . = ALIGN(4096); | 202 | . = ALIGN(4096); |
194 | __initramfs_start = .; | 203 | __initramfs_start = .; |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 57660d58d500..06c34949bfdc 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <asm/segment.h> | 42 | #include <asm/segment.h> |
43 | #include <asm/desc.h> | 43 | #include <asm/desc.h> |
44 | #include <asm/topology.h> | 44 | #include <asm/topology.h> |
45 | #include <asm/vgtod.h> | ||
45 | 46 | ||
46 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 47 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
47 | #define __syscall_clobber "r11","rcx","memory" | 48 | #define __syscall_clobber "r11","rcx","memory" |
@@ -57,26 +58,9 @@ | |||
57 | * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) | 58 | * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) |
58 | * Try to keep this structure as small as possible to avoid cache line ping pongs | 59 | * Try to keep this structure as small as possible to avoid cache line ping pongs |
59 | */ | 60 | */ |
60 | struct vsyscall_gtod_data_t { | ||
61 | seqlock_t lock; | ||
62 | |||
63 | /* open coded 'struct timespec' */ | ||
64 | time_t wall_time_sec; | ||
65 | u32 wall_time_nsec; | ||
66 | |||
67 | int sysctl_enabled; | ||
68 | struct timezone sys_tz; | ||
69 | struct { /* extract of a clocksource struct */ | ||
70 | cycle_t (*vread)(void); | ||
71 | cycle_t cycle_last; | ||
72 | cycle_t mask; | ||
73 | u32 mult; | ||
74 | u32 shift; | ||
75 | } clock; | ||
76 | }; | ||
77 | int __vgetcpu_mode __section_vgetcpu_mode; | 61 | int __vgetcpu_mode __section_vgetcpu_mode; |
78 | 62 | ||
79 | struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = | 63 | struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = |
80 | { | 64 | { |
81 | .lock = SEQLOCK_UNLOCKED, | 65 | .lock = SEQLOCK_UNLOCKED, |
82 | .sysctl_enabled = 1, | 66 | .sysctl_enabled = 1, |
@@ -96,6 +80,8 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
96 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 80 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
97 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 81 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
98 | vsyscall_gtod_data.sys_tz = sys_tz; | 82 | vsyscall_gtod_data.sys_tz = sys_tz; |
83 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | ||
84 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; | ||
99 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 85 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
100 | } | 86 | } |
101 | 87 | ||