diff options
Diffstat (limited to 'arch/x86/vdso/vclock_gettime.c')
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 256 |
1 files changed, 162 insertions, 94 deletions
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index eb5d7a56f8d4..16d686171e9a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -4,6 +4,9 @@ | |||
4 | * | 4 | * |
5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
6 | * | 6 | * |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> | ||
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | ||
9 | * | ||
7 | * The code should have no internal unresolved relocations. | 10 | * The code should have no internal unresolved relocations. |
8 | * Check with readelf after changing. | 11 | * Check with readelf after changing. |
9 | */ | 12 | */ |
@@ -11,56 +14,55 @@ | |||
11 | /* Disable profiling for userspace code: */ | 14 | /* Disable profiling for userspace code: */ |
12 | #define DISABLE_BRANCH_PROFILING | 15 | #define DISABLE_BRANCH_PROFILING |
13 | 16 | ||
14 | #include <linux/kernel.h> | 17 | #include <uapi/linux/time.h> |
15 | #include <linux/posix-timers.h> | ||
16 | #include <linux/time.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <asm/vsyscall.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/vgtod.h> | 18 | #include <asm/vgtod.h> |
21 | #include <asm/timex.h> | ||
22 | #include <asm/hpet.h> | 19 | #include <asm/hpet.h> |
20 | #include <asm/vvar.h> | ||
23 | #include <asm/unistd.h> | 21 | #include <asm/unistd.h> |
24 | #include <asm/io.h> | 22 | #include <asm/msr.h> |
25 | #include <asm/pvclock.h> | 23 | #include <linux/math64.h> |
24 | #include <linux/time.h> | ||
26 | 25 | ||
27 | #define gtod (&VVAR(vsyscall_gtod_data)) | 26 | #define gtod (&VVAR(vsyscall_gtod_data)) |
28 | 27 | ||
29 | notrace static cycle_t vread_tsc(void) | 28 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
29 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); | ||
30 | extern time_t __vdso_time(time_t *t); | ||
31 | |||
32 | #ifdef CONFIG_HPET_TIMER | ||
33 | static inline u32 read_hpet_counter(const volatile void *addr) | ||
30 | { | 34 | { |
31 | cycle_t ret; | 35 | return *(const volatile u32 *) (addr + HPET_COUNTER); |
32 | u64 last; | 36 | } |
37 | #endif | ||
33 | 38 | ||
34 | /* | 39 | #ifndef BUILD_VDSO32 |
35 | * Empirically, a fence (of type that depends on the CPU) | ||
36 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
37 | * with respect to loads. The various CPU manuals are unclear | ||
38 | * as to whether rdtsc can be reordered with later loads, | ||
39 | * but no one has ever seen it happen. | ||
40 | */ | ||
41 | rdtsc_barrier(); | ||
42 | ret = (cycle_t)vget_cycles(); | ||
43 | 40 | ||
44 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | 41 | #include <linux/kernel.h> |
42 | #include <asm/vsyscall.h> | ||
43 | #include <asm/fixmap.h> | ||
44 | #include <asm/pvclock.h> | ||
45 | 45 | ||
46 | if (likely(ret >= last)) | 46 | static notrace cycle_t vread_hpet(void) |
47 | return ret; | 47 | { |
48 | return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET)); | ||
49 | } | ||
48 | 50 | ||
49 | /* | 51 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
50 | * GCC likes to generate cmov here, but this branch is extremely | 52 | { |
51 | * predictable (it's just a funciton of time and the likely is | 53 | long ret; |
52 | * very likely) and there's a data dependence, so force GCC | 54 | asm("syscall" : "=a" (ret) : |
53 | * to generate a branch instead. I don't barrier() because | 55 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); |
54 | * we don't actually need a barrier, and if this function | 56 | return ret; |
55 | * ever gets inlined it will generate worse code. | ||
56 | */ | ||
57 | asm volatile (""); | ||
58 | return last; | ||
59 | } | 57 | } |
60 | 58 | ||
61 | static notrace cycle_t vread_hpet(void) | 59 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) |
62 | { | 60 | { |
63 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); | 61 | long ret; |
62 | |||
63 | asm("syscall" : "=a" (ret) : | ||
64 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | ||
65 | return ret; | ||
64 | } | 66 | } |
65 | 67 | ||
66 | #ifdef CONFIG_PARAVIRT_CLOCK | 68 | #ifdef CONFIG_PARAVIRT_CLOCK |
@@ -124,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
124 | *mode = VCLOCK_NONE; | 126 | *mode = VCLOCK_NONE; |
125 | 127 | ||
126 | /* refer to tsc.c read_tsc() comment for rationale */ | 128 | /* refer to tsc.c read_tsc() comment for rationale */ |
127 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | 129 | last = gtod->cycle_last; |
128 | 130 | ||
129 | if (likely(ret >= last)) | 131 | if (likely(ret >= last)) |
130 | return ret; | 132 | return ret; |
@@ -133,11 +135,30 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
133 | } | 135 | } |
134 | #endif | 136 | #endif |
135 | 137 | ||
138 | #else | ||
139 | |||
140 | extern u8 hpet_page | ||
141 | __attribute__((visibility("hidden"))); | ||
142 | |||
143 | #ifdef CONFIG_HPET_TIMER | ||
144 | static notrace cycle_t vread_hpet(void) | ||
145 | { | ||
146 | return read_hpet_counter((const void *)(&hpet_page)); | ||
147 | } | ||
148 | #endif | ||
149 | |||
136 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | 150 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
137 | { | 151 | { |
138 | long ret; | 152 | long ret; |
139 | asm("syscall" : "=a" (ret) : | 153 | |
140 | "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); | 154 | asm( |
155 | "mov %%ebx, %%edx \n" | ||
156 | "mov %2, %%ebx \n" | ||
157 | "call VDSO32_vsyscall \n" | ||
158 | "mov %%edx, %%ebx \n" | ||
159 | : "=a" (ret) | ||
160 | : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) | ||
161 | : "memory", "edx"); | ||
141 | return ret; | 162 | return ret; |
142 | } | 163 | } |
143 | 164 | ||
@@ -145,28 +166,79 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) | |||
145 | { | 166 | { |
146 | long ret; | 167 | long ret; |
147 | 168 | ||
148 | asm("syscall" : "=a" (ret) : | 169 | asm( |
149 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | 170 | "mov %%ebx, %%edx \n" |
171 | "mov %2, %%ebx \n" | ||
172 | "call VDSO32_vsyscall \n" | ||
173 | "mov %%edx, %%ebx \n" | ||
174 | : "=a" (ret) | ||
175 | : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) | ||
176 | : "memory", "edx"); | ||
150 | return ret; | 177 | return ret; |
151 | } | 178 | } |
152 | 179 | ||
180 | #ifdef CONFIG_PARAVIRT_CLOCK | ||
181 | |||
182 | static notrace cycle_t vread_pvclock(int *mode) | ||
183 | { | ||
184 | *mode = VCLOCK_NONE; | ||
185 | return 0; | ||
186 | } | ||
187 | #endif | ||
188 | |||
189 | #endif | ||
190 | |||
191 | notrace static cycle_t vread_tsc(void) | ||
192 | { | ||
193 | cycle_t ret; | ||
194 | u64 last; | ||
195 | |||
196 | /* | ||
197 | * Empirically, a fence (of type that depends on the CPU) | ||
198 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
199 | * with respect to loads. The various CPU manuals are unclear | ||
200 | * as to whether rdtsc can be reordered with later loads, | ||
201 | * but no one has ever seen it happen. | ||
202 | */ | ||
203 | rdtsc_barrier(); | ||
204 | ret = (cycle_t)__native_read_tsc(); | ||
205 | |||
206 | last = gtod->cycle_last; | ||
207 | |||
208 | if (likely(ret >= last)) | ||
209 | return ret; | ||
210 | |||
211 | /* | ||
212 | * GCC likes to generate cmov here, but this branch is extremely | ||
213 | * predictable (it's just a funciton of time and the likely is | ||
214 | * very likely) and there's a data dependence, so force GCC | ||
215 | * to generate a branch instead. I don't barrier() because | ||
216 | * we don't actually need a barrier, and if this function | ||
217 | * ever gets inlined it will generate worse code. | ||
218 | */ | ||
219 | asm volatile (""); | ||
220 | return last; | ||
221 | } | ||
153 | 222 | ||
154 | notrace static inline u64 vgetsns(int *mode) | 223 | notrace static inline u64 vgetsns(int *mode) |
155 | { | 224 | { |
156 | long v; | 225 | u64 v; |
157 | cycles_t cycles; | 226 | cycles_t cycles; |
158 | if (gtod->clock.vclock_mode == VCLOCK_TSC) | 227 | |
228 | if (gtod->vclock_mode == VCLOCK_TSC) | ||
159 | cycles = vread_tsc(); | 229 | cycles = vread_tsc(); |
160 | else if (gtod->clock.vclock_mode == VCLOCK_HPET) | 230 | #ifdef CONFIG_HPET_TIMER |
231 | else if (gtod->vclock_mode == VCLOCK_HPET) | ||
161 | cycles = vread_hpet(); | 232 | cycles = vread_hpet(); |
233 | #endif | ||
162 | #ifdef CONFIG_PARAVIRT_CLOCK | 234 | #ifdef CONFIG_PARAVIRT_CLOCK |
163 | else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) | 235 | else if (gtod->vclock_mode == VCLOCK_PVCLOCK) |
164 | cycles = vread_pvclock(mode); | 236 | cycles = vread_pvclock(mode); |
165 | #endif | 237 | #endif |
166 | else | 238 | else |
167 | return 0; | 239 | return 0; |
168 | v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; | 240 | v = (cycles - gtod->cycle_last) & gtod->mask; |
169 | return v * gtod->clock.mult; | 241 | return v * gtod->mult; |
170 | } | 242 | } |
171 | 243 | ||
172 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ | 244 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ |
@@ -176,106 +248,102 @@ notrace static int __always_inline do_realtime(struct timespec *ts) | |||
176 | u64 ns; | 248 | u64 ns; |
177 | int mode; | 249 | int mode; |
178 | 250 | ||
179 | ts->tv_nsec = 0; | ||
180 | do { | 251 | do { |
181 | seq = raw_read_seqcount_begin(>od->seq); | 252 | seq = gtod_read_begin(gtod); |
182 | mode = gtod->clock.vclock_mode; | 253 | mode = gtod->vclock_mode; |
183 | ts->tv_sec = gtod->wall_time_sec; | 254 | ts->tv_sec = gtod->wall_time_sec; |
184 | ns = gtod->wall_time_snsec; | 255 | ns = gtod->wall_time_snsec; |
185 | ns += vgetsns(&mode); | 256 | ns += vgetsns(&mode); |
186 | ns >>= gtod->clock.shift; | 257 | ns >>= gtod->shift; |
187 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 258 | } while (unlikely(gtod_read_retry(gtod, seq))); |
259 | |||
260 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | ||
261 | ts->tv_nsec = ns; | ||
188 | 262 | ||
189 | timespec_add_ns(ts, ns); | ||
190 | return mode; | 263 | return mode; |
191 | } | 264 | } |
192 | 265 | ||
193 | notrace static int do_monotonic(struct timespec *ts) | 266 | notrace static int __always_inline do_monotonic(struct timespec *ts) |
194 | { | 267 | { |
195 | unsigned long seq; | 268 | unsigned long seq; |
196 | u64 ns; | 269 | u64 ns; |
197 | int mode; | 270 | int mode; |
198 | 271 | ||
199 | ts->tv_nsec = 0; | ||
200 | do { | 272 | do { |
201 | seq = raw_read_seqcount_begin(>od->seq); | 273 | seq = gtod_read_begin(gtod); |
202 | mode = gtod->clock.vclock_mode; | 274 | mode = gtod->vclock_mode; |
203 | ts->tv_sec = gtod->monotonic_time_sec; | 275 | ts->tv_sec = gtod->monotonic_time_sec; |
204 | ns = gtod->monotonic_time_snsec; | 276 | ns = gtod->monotonic_time_snsec; |
205 | ns += vgetsns(&mode); | 277 | ns += vgetsns(&mode); |
206 | ns >>= gtod->clock.shift; | 278 | ns >>= gtod->shift; |
207 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 279 | } while (unlikely(gtod_read_retry(gtod, seq))); |
208 | timespec_add_ns(ts, ns); | 280 | |
281 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | ||
282 | ts->tv_nsec = ns; | ||
209 | 283 | ||
210 | return mode; | 284 | return mode; |
211 | } | 285 | } |
212 | 286 | ||
213 | notrace static int do_realtime_coarse(struct timespec *ts) | 287 | notrace static void do_realtime_coarse(struct timespec *ts) |
214 | { | 288 | { |
215 | unsigned long seq; | 289 | unsigned long seq; |
216 | do { | 290 | do { |
217 | seq = raw_read_seqcount_begin(>od->seq); | 291 | seq = gtod_read_begin(gtod); |
218 | ts->tv_sec = gtod->wall_time_coarse.tv_sec; | 292 | ts->tv_sec = gtod->wall_time_coarse_sec; |
219 | ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; | 293 | ts->tv_nsec = gtod->wall_time_coarse_nsec; |
220 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 294 | } while (unlikely(gtod_read_retry(gtod, seq))); |
221 | return 0; | ||
222 | } | 295 | } |
223 | 296 | ||
224 | notrace static int do_monotonic_coarse(struct timespec *ts) | 297 | notrace static void do_monotonic_coarse(struct timespec *ts) |
225 | { | 298 | { |
226 | unsigned long seq; | 299 | unsigned long seq; |
227 | do { | 300 | do { |
228 | seq = raw_read_seqcount_begin(>od->seq); | 301 | seq = gtod_read_begin(gtod); |
229 | ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; | 302 | ts->tv_sec = gtod->monotonic_time_coarse_sec; |
230 | ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; | 303 | ts->tv_nsec = gtod->monotonic_time_coarse_nsec; |
231 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 304 | } while (unlikely(gtod_read_retry(gtod, seq))); |
232 | |||
233 | return 0; | ||
234 | } | 305 | } |
235 | 306 | ||
236 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | 307 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
237 | { | 308 | { |
238 | int ret = VCLOCK_NONE; | ||
239 | |||
240 | switch (clock) { | 309 | switch (clock) { |
241 | case CLOCK_REALTIME: | 310 | case CLOCK_REALTIME: |
242 | ret = do_realtime(ts); | 311 | if (do_realtime(ts) == VCLOCK_NONE) |
312 | goto fallback; | ||
243 | break; | 313 | break; |
244 | case CLOCK_MONOTONIC: | 314 | case CLOCK_MONOTONIC: |
245 | ret = do_monotonic(ts); | 315 | if (do_monotonic(ts) == VCLOCK_NONE) |
316 | goto fallback; | ||
246 | break; | 317 | break; |
247 | case CLOCK_REALTIME_COARSE: | 318 | case CLOCK_REALTIME_COARSE: |
248 | return do_realtime_coarse(ts); | 319 | do_realtime_coarse(ts); |
320 | break; | ||
249 | case CLOCK_MONOTONIC_COARSE: | 321 | case CLOCK_MONOTONIC_COARSE: |
250 | return do_monotonic_coarse(ts); | 322 | do_monotonic_coarse(ts); |
323 | break; | ||
324 | default: | ||
325 | goto fallback; | ||
251 | } | 326 | } |
252 | 327 | ||
253 | if (ret == VCLOCK_NONE) | ||
254 | return vdso_fallback_gettime(clock, ts); | ||
255 | return 0; | 328 | return 0; |
329 | fallback: | ||
330 | return vdso_fallback_gettime(clock, ts); | ||
256 | } | 331 | } |
257 | int clock_gettime(clockid_t, struct timespec *) | 332 | int clock_gettime(clockid_t, struct timespec *) |
258 | __attribute__((weak, alias("__vdso_clock_gettime"))); | 333 | __attribute__((weak, alias("__vdso_clock_gettime"))); |
259 | 334 | ||
260 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | 335 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
261 | { | 336 | { |
262 | long ret = VCLOCK_NONE; | ||
263 | |||
264 | if (likely(tv != NULL)) { | 337 | if (likely(tv != NULL)) { |
265 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | 338 | if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) |
266 | offsetof(struct timespec, tv_nsec) || | 339 | return vdso_fallback_gtod(tv, tz); |
267 | sizeof(*tv) != sizeof(struct timespec)); | ||
268 | ret = do_realtime((struct timespec *)tv); | ||
269 | tv->tv_usec /= 1000; | 340 | tv->tv_usec /= 1000; |
270 | } | 341 | } |
271 | if (unlikely(tz != NULL)) { | 342 | if (unlikely(tz != NULL)) { |
272 | /* Avoid memcpy. Some old compilers fail to inline it */ | 343 | tz->tz_minuteswest = gtod->tz_minuteswest; |
273 | tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; | 344 | tz->tz_dsttime = gtod->tz_dsttime; |
274 | tz->tz_dsttime = gtod->sys_tz.tz_dsttime; | ||
275 | } | 345 | } |
276 | 346 | ||
277 | if (ret == VCLOCK_NONE) | ||
278 | return vdso_fallback_gtod(tv, tz); | ||
279 | return 0; | 347 | return 0; |
280 | } | 348 | } |
281 | int gettimeofday(struct timeval *, struct timezone *) | 349 | int gettimeofday(struct timeval *, struct timezone *) |
@@ -287,8 +355,8 @@ int gettimeofday(struct timeval *, struct timezone *) | |||
287 | */ | 355 | */ |
288 | notrace time_t __vdso_time(time_t *t) | 356 | notrace time_t __vdso_time(time_t *t) |
289 | { | 357 | { |
290 | /* This is atomic on x86_64 so we don't need any locks. */ | 358 | /* This is atomic on x86 so we don't need any locks. */ |
291 | time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); | 359 | time_t result = ACCESS_ONCE(gtod->wall_time_sec); |
292 | 360 | ||
293 | if (t) | 361 | if (t) |
294 | *t = result; | 362 | *t = result; |