diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 15:19:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 15:19:31 -0400 |
commit | 14587a2a25447813996e6fb9e48d48627cb75a5d (patch) | |
tree | fb2a16f31297a8e85a1f6678231d50e2d389a1a0 /arch/x86/kernel | |
parent | fce637e392a762e4d4f0fc41ac3d3f557187ac21 (diff) | |
parent | e9d35946c84c44e33e007123d3d595ccbd21d1a4 (diff) |
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86: vdso: Remove unused variable
x86-64: Optimize vDSO time()
x86-64: Add time to vDSO
x86-64: Turn off -pg and turn on -foptimize-sibling-calls for vDSO
x86-64: Move vread_tsc into a new file with sensible options
x86-64: Vclock_gettime(CLOCK_MONOTONIC) can't ever see nsec < 0
x86-64: Don't generate cmov in vread_tsc
x86-64: Remove unnecessary barrier in vread_tsc
x86-64: Clean up vdso/kernel shared variables
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 8 | ||||
-rw-r--r-- | arch/x86/kernel/time.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 34 | ||||
-rw-r--r-- | arch/x86/kernel/vread_tsc_64.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 46 |
6 files changed, 73 insertions, 72 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 250806472a7e..f5abe3a245b8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -8,7 +8,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | |||
8 | 8 | ||
9 | ifdef CONFIG_FUNCTION_TRACER | 9 | ifdef CONFIG_FUNCTION_TRACER |
10 | # Do not profile debug and lowlevel utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | ||
12 | CFLAGS_REMOVE_rtc.o = -pg | 11 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 12 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | CFLAGS_REMOVE_pvclock.o = -pg | 13 | CFLAGS_REMOVE_pvclock.o = -pg |
@@ -24,13 +23,16 @@ endif | |||
24 | nostackp := $(call cc-option, -fno-stack-protector) | 23 | nostackp := $(call cc-option, -fno-stack-protector) |
25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | 24 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) |
26 | CFLAGS_hpet.o := $(nostackp) | 25 | CFLAGS_hpet.o := $(nostackp) |
27 | CFLAGS_tsc.o := $(nostackp) | 26 | CFLAGS_vread_tsc_64.o := $(nostackp) |
28 | CFLAGS_paravirt.o := $(nostackp) | 27 | CFLAGS_paravirt.o := $(nostackp) |
29 | GCOV_PROFILE_vsyscall_64.o := n | 28 | GCOV_PROFILE_vsyscall_64.o := n |
30 | GCOV_PROFILE_hpet.o := n | 29 | GCOV_PROFILE_hpet.o := n |
31 | GCOV_PROFILE_tsc.o := n | 30 | GCOV_PROFILE_tsc.o := n |
32 | GCOV_PROFILE_paravirt.o := n | 31 | GCOV_PROFILE_paravirt.o := n |
33 | 32 | ||
33 | # vread_tsc_64 is hot and should be fully optimized: | ||
34 | CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls | ||
35 | |||
34 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 36 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
35 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 37 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
36 | obj-y += time.o ioport.o ldt.o dumpstack.o | 38 | obj-y += time.o ioport.o ldt.o dumpstack.o |
@@ -39,7 +41,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
39 | obj-y += probe_roms.o | 41 | obj-y += probe_roms.o |
40 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 42 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 43 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 44 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o |
43 | obj-y += bootflag.o e820.o | 45 | obj-y += bootflag.o e820.o |
44 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 46 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 47 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 25a28a245937..00cbb272627f 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
24 | 24 | ||
25 | #ifdef CONFIG_X86_64 | 25 | #ifdef CONFIG_X86_64 |
26 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | 26 | DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | unsigned long profile_pc(struct pt_regs *regs) | 29 | unsigned long profile_pc(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9335bf7dd2e7..6cc6922262af 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -763,25 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs) | |||
763 | ret : clocksource_tsc.cycle_last; | 763 | ret : clocksource_tsc.cycle_last; |
764 | } | 764 | } |
765 | 765 | ||
766 | #ifdef CONFIG_X86_64 | ||
767 | static cycle_t __vsyscall_fn vread_tsc(void) | ||
768 | { | ||
769 | cycle_t ret; | ||
770 | |||
771 | /* | ||
772 | * Surround the RDTSC by barriers, to make sure it's not | ||
773 | * speculated to outside the seqlock critical section and | ||
774 | * does not cause time warps: | ||
775 | */ | ||
776 | rdtsc_barrier(); | ||
777 | ret = (cycle_t)vget_cycles(); | ||
778 | rdtsc_barrier(); | ||
779 | |||
780 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? | ||
781 | ret : __vsyscall_gtod_data.clock.cycle_last; | ||
782 | } | ||
783 | #endif | ||
784 | |||
785 | static void resume_tsc(struct clocksource *cs) | 766 | static void resume_tsc(struct clocksource *cs) |
786 | { | 767 | { |
787 | clocksource_tsc.cycle_last = 0; | 768 | clocksource_tsc.cycle_last = 0; |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 61682f0ac264..89aed99aafce 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -161,6 +161,12 @@ SECTIONS | |||
161 | 161 | ||
162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) | 162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) |
163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | 163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) |
164 | #define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \ | ||
165 | ADDR(.vsyscall_0) + offset \ | ||
166 | : AT(VLOAD(.vsyscall_var_ ## x)) { \ | ||
167 | *(.vsyscall_var_ ## x) \ | ||
168 | } \ | ||
169 | x = VVIRT(.vsyscall_var_ ## x); | ||
164 | 170 | ||
165 | . = ALIGN(4096); | 171 | . = ALIGN(4096); |
166 | __vsyscall_0 = .; | 172 | __vsyscall_0 = .; |
@@ -175,18 +181,6 @@ SECTIONS | |||
175 | *(.vsyscall_fn) | 181 | *(.vsyscall_fn) |
176 | } | 182 | } |
177 | 183 | ||
178 | . = ALIGN(L1_CACHE_BYTES); | ||
179 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { | ||
180 | *(.vsyscall_gtod_data) | ||
181 | } | ||
182 | |||
183 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); | ||
184 | .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { | ||
185 | *(.vsyscall_clock) | ||
186 | } | ||
187 | vsyscall_clock = VVIRT(.vsyscall_clock); | ||
188 | |||
189 | |||
190 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { | 184 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { |
191 | *(.vsyscall_1) | 185 | *(.vsyscall_1) |
192 | } | 186 | } |
@@ -194,21 +188,14 @@ SECTIONS | |||
194 | *(.vsyscall_2) | 188 | *(.vsyscall_2) |
195 | } | 189 | } |
196 | 190 | ||
197 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { | ||
198 | *(.vgetcpu_mode) | ||
199 | } | ||
200 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
201 | |||
202 | . = ALIGN(L1_CACHE_BYTES); | ||
203 | .jiffies : AT(VLOAD(.jiffies)) { | ||
204 | *(.jiffies) | ||
205 | } | ||
206 | jiffies = VVIRT(.jiffies); | ||
207 | |||
208 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { | 191 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { |
209 | *(.vsyscall_3) | 192 | *(.vsyscall_3) |
210 | } | 193 | } |
211 | 194 | ||
195 | #define __VVAR_KERNEL_LDS | ||
196 | #include <asm/vvar.h> | ||
197 | #undef __VVAR_KERNEL_LDS | ||
198 | |||
212 | . = __vsyscall_0 + PAGE_SIZE; | 199 | . = __vsyscall_0 + PAGE_SIZE; |
213 | 200 | ||
214 | #undef VSYSCALL_ADDR | 201 | #undef VSYSCALL_ADDR |
@@ -216,6 +203,7 @@ SECTIONS | |||
216 | #undef VLOAD | 203 | #undef VLOAD |
217 | #undef VVIRT_OFFSET | 204 | #undef VVIRT_OFFSET |
218 | #undef VVIRT | 205 | #undef VVIRT |
206 | #undef EMIT_VVAR | ||
219 | 207 | ||
220 | #endif /* CONFIG_X86_64 */ | 208 | #endif /* CONFIG_X86_64 */ |
221 | 209 | ||
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c new file mode 100644 index 000000000000..a81aa9e9894c --- /dev/null +++ b/arch/x86/kernel/vread_tsc_64.c | |||
@@ -0,0 +1,36 @@ | |||
1 | /* This code runs in userspace. */ | ||
2 | |||
3 | #define DISABLE_BRANCH_PROFILING | ||
4 | #include <asm/vgtod.h> | ||
5 | |||
6 | notrace cycle_t __vsyscall_fn vread_tsc(void) | ||
7 | { | ||
8 | cycle_t ret; | ||
9 | u64 last; | ||
10 | |||
11 | /* | ||
12 | * Empirically, a fence (of type that depends on the CPU) | ||
13 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
14 | * with respect to loads. The various CPU manuals are unclear | ||
15 | * as to whether rdtsc can be reordered with later loads, | ||
16 | * but no one has ever seen it happen. | ||
17 | */ | ||
18 | rdtsc_barrier(); | ||
19 | ret = (cycle_t)vget_cycles(); | ||
20 | |||
21 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
22 | |||
23 | if (likely(ret >= last)) | ||
24 | return ret; | ||
25 | |||
26 | /* | ||
27 | * GCC likes to generate cmov here, but this branch is extremely | ||
28 | * predictable (it's just a funciton of time and the likely is | ||
29 | * very likely) and there's a data dependence, so force GCC | ||
30 | * to generate a branch instead. I don't barrier() because | ||
31 | * we don't actually need a barrier, and if this function | ||
32 | * ever gets inlined it will generate worse code. | ||
33 | */ | ||
34 | asm volatile (""); | ||
35 | return last; | ||
36 | } | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 59be48d0d75c..3e682184d76c 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -49,15 +49,8 @@ | |||
49 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace | 49 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace |
50 | #define __syscall_clobber "r11","cx","memory" | 50 | #define __syscall_clobber "r11","cx","memory" |
51 | 51 | ||
52 | /* | 52 | DEFINE_VVAR(int, vgetcpu_mode); |
53 | * vsyscall_gtod_data contains data that is : | 53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
54 | * - readonly from vsyscalls | ||
55 | * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) | ||
56 | * Try to keep this structure as small as possible to avoid cache line ping pongs | ||
57 | */ | ||
58 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
59 | |||
60 | struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = | ||
61 | { | 54 | { |
62 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 55 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
63 | .sysctl_enabled = 1, | 56 | .sysctl_enabled = 1, |
@@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
97 | */ | 90 | */ |
98 | static __always_inline void do_get_tz(struct timezone * tz) | 91 | static __always_inline void do_get_tz(struct timezone * tz) |
99 | { | 92 | { |
100 | *tz = __vsyscall_gtod_data.sys_tz; | 93 | *tz = VVAR(vsyscall_gtod_data).sys_tz; |
101 | } | 94 | } |
102 | 95 | ||
103 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 96 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) |
@@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
126 | unsigned long mult, shift, nsec; | 119 | unsigned long mult, shift, nsec; |
127 | cycle_t (*vread)(void); | 120 | cycle_t (*vread)(void); |
128 | do { | 121 | do { |
129 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); | 122 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); |
130 | 123 | ||
131 | vread = __vsyscall_gtod_data.clock.vread; | 124 | vread = VVAR(vsyscall_gtod_data).clock.vread; |
132 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { | 125 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled || |
126 | !vread)) { | ||
133 | gettimeofday(tv,NULL); | 127 | gettimeofday(tv,NULL); |
134 | return; | 128 | return; |
135 | } | 129 | } |
136 | 130 | ||
137 | now = vread(); | 131 | now = vread(); |
138 | base = __vsyscall_gtod_data.clock.cycle_last; | 132 | base = VVAR(vsyscall_gtod_data).clock.cycle_last; |
139 | mask = __vsyscall_gtod_data.clock.mask; | 133 | mask = VVAR(vsyscall_gtod_data).clock.mask; |
140 | mult = __vsyscall_gtod_data.clock.mult; | 134 | mult = VVAR(vsyscall_gtod_data).clock.mult; |
141 | shift = __vsyscall_gtod_data.clock.shift; | 135 | shift = VVAR(vsyscall_gtod_data).clock.shift; |
142 | 136 | ||
143 | tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; | 137 | tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec; |
144 | nsec = __vsyscall_gtod_data.wall_time_nsec; | 138 | nsec = VVAR(vsyscall_gtod_data).wall_time_nsec; |
145 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | 139 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); |
146 | 140 | ||
147 | /* calculate interval: */ | 141 | /* calculate interval: */ |
148 | cycle_delta = (now - base) & mask; | 142 | cycle_delta = (now - base) & mask; |
@@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
171 | { | 165 | { |
172 | unsigned seq; | 166 | unsigned seq; |
173 | time_t result; | 167 | time_t result; |
174 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) | 168 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled)) |
175 | return time_syscall(t); | 169 | return time_syscall(t); |
176 | 170 | ||
177 | do { | 171 | do { |
178 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); | 172 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); |
179 | 173 | ||
180 | result = __vsyscall_gtod_data.wall_time_sec; | 174 | result = VVAR(vsyscall_gtod_data).wall_time_sec; |
181 | 175 | ||
182 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | 176 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); |
183 | 177 | ||
184 | if (t) | 178 | if (t) |
185 | *t = result; | 179 | *t = result; |
@@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | |||
208 | We do this here because otherwise user space would do it on | 202 | We do this here because otherwise user space would do it on |
209 | its own in a likely inferior way (no access to jiffies). | 203 | its own in a likely inferior way (no access to jiffies). |
210 | If you don't like it pass NULL. */ | 204 | If you don't like it pass NULL. */ |
211 | if (tcache && tcache->blob[0] == (j = __jiffies)) { | 205 | if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) { |
212 | p = tcache->blob[1]; | 206 | p = tcache->blob[1]; |
213 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | 207 | } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { |
214 | /* Load per CPU data from RDTSCP */ | 208 | /* Load per CPU data from RDTSCP */ |
215 | native_read_tscp(&p); | 209 | native_read_tscp(&p); |
216 | } else { | 210 | } else { |