aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@xensource.com>2007-07-17 21:37:05 -0400
committerJeremy Fitzhardinge <jeremy@goop.org>2007-07-18 11:47:43 -0400
commit15c84731d647c34d1491793fa6be96f5de3432eb (patch)
tree861d2e6864e87a5860e0998395a65eb977e5b721
parente46cdb66c8fc1c8d61cfae0f219ff47ac4b9d531 (diff)
xen: time implementation
Xen maintains a base clock which measures nanoseconds since system boot. This is provided to guests via a shared page which contains a base time in ns, a tsc timestamp at that point and tsc frequency parameters. Guests can compute the current time by reading the tsc and using it to extrapolate the current time from the basetime. The hypervisor makes sure that the frequency parameters are updated regularly, paricularly if the tsc changes rate or stops. This is implemented as a clocksource, so the interface to the rest of the kernel is a simple clocksource which simply returns the current time directly in nanoseconds. Xen also provides a simple timer mechanism, which allows a timeout to be set in the future. When that time arrives, a timer event is sent to the guest. There are two timer interfaces: - An old one which also delivers a stream of (unused) ticks at 100Hz, and on the same event, the actual timer events. The 100Hz ticks cause a lot of spurious wakeups, but are basically harmless. - The new timer interface doesn't have the 100Hz ticks, and can also fail if the specified time is in the past. This code presents the Xen timer as a clockevent driver, and uses the new interface by preference. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Chris Wright <chrisw@sous-sol.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/i386/xen/Makefile2
-rw-r--r--arch/i386/xen/enlighten.c6
-rw-r--r--arch/i386/xen/time.c407
3 files changed, 414 insertions, 1 deletions
diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile
index 7a78f27bfb16..bf51cabed0d2 100644
--- a/arch/i386/xen/Makefile
+++ b/arch/i386/xen/Makefile
@@ -1,2 +1,2 @@
1obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \
2 events.o 2 events.o time.o
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
index 6417dfdccb4c..25eb3592f11d 100644
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -609,6 +609,12 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
609 .arch_setup = xen_arch_setup, 609 .arch_setup = xen_arch_setup,
610 .init_IRQ = xen_init_IRQ, 610 .init_IRQ = xen_init_IRQ,
611 611
612 .time_init = xen_time_init,
613 .set_wallclock = xen_set_wallclock,
614 .get_wallclock = xen_get_wallclock,
615 .get_cpu_khz = xen_cpu_khz,
616 .sched_clock = xen_clocksource_read,
617
612 .cpuid = xen_cpuid, 618 .cpuid = xen_cpuid,
613 619
614 .set_debugreg = xen_set_debugreg, 620 .set_debugreg = xen_set_debugreg,
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c
new file mode 100644
index 000000000000..b457980ff3c2
--- /dev/null
+++ b/arch/i386/xen/time.c
@@ -0,0 +1,407 @@
1/*
2 * Xen time implementation.
3 *
4 * This is implemented in terms of a clocksource driver which uses
5 * the hypervisor clock as a nanosecond timebase, and a clockevent
6 * driver which uses the hypervisor's timer mechanism.
7 *
8 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
9 */
10#include <linux/kernel.h>
11#include <linux/interrupt.h>
12#include <linux/clocksource.h>
13#include <linux/clockchips.h>
14
15#include <asm/xen/hypervisor.h>
16#include <asm/xen/hypercall.h>
17
18#include <xen/events.h>
19#include <xen/interface/xen.h>
20#include <xen/interface/vcpu.h>
21
22#include "xen-ops.h"
23
24#define XEN_SHIFT 22
25
26/* Xen may fire a timer up to this many ns early */
27#define TIMER_SLOP 100000
28
29/* These are perodically updated in shared_info, and then copied here. */
30struct shadow_time_info {
31 u64 tsc_timestamp; /* TSC at last update of time vals. */
32 u64 system_timestamp; /* Time, in nanosecs, since boot. */
33 u32 tsc_to_nsec_mul;
34 int tsc_shift;
35 u32 version;
36};
37
38static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
39
40unsigned long xen_cpu_khz(void)
41{
42 u64 cpu_khz = 1000000ULL << 32;
43 const struct vcpu_time_info *info =
44 &HYPERVISOR_shared_info->vcpu_info[0].time;
45
46 do_div(cpu_khz, info->tsc_to_system_mul);
47 if (info->tsc_shift < 0)
48 cpu_khz <<= -info->tsc_shift;
49 else
50 cpu_khz >>= info->tsc_shift;
51
52 return cpu_khz;
53}
54
55/*
56 * Reads a consistent set of time-base values from Xen, into a shadow data
57 * area.
58 */
59static void get_time_values_from_xen(void)
60{
61 struct vcpu_time_info *src;
62 struct shadow_time_info *dst;
63
64 preempt_disable();
65
66 /* src is shared memory with the hypervisor, so we need to
67 make sure we get a consistent snapshot, even in the face of
68 being preempted. */
69 src = &__get_cpu_var(xen_vcpu)->time;
70 dst = &__get_cpu_var(shadow_time);
71
72 do {
73 dst->version = src->version;
74 rmb(); /* fetch version before data */
75 dst->tsc_timestamp = src->tsc_timestamp;
76 dst->system_timestamp = src->system_time;
77 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
78 dst->tsc_shift = src->tsc_shift;
79 rmb(); /* test version after fetching data */
80 } while ((src->version & 1) | (dst->version ^ src->version));
81
82 preempt_enable();
83}
84
85/*
86 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
87 * yielding a 64-bit result.
88 */
89static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
90{
91 u64 product;
92#ifdef __i386__
93 u32 tmp1, tmp2;
94#endif
95
96 if (shift < 0)
97 delta >>= -shift;
98 else
99 delta <<= shift;
100
101#ifdef __i386__
102 __asm__ (
103 "mul %5 ; "
104 "mov %4,%%eax ; "
105 "mov %%edx,%4 ; "
106 "mul %5 ; "
107 "xor %5,%5 ; "
108 "add %4,%%eax ; "
109 "adc %5,%%edx ; "
110 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
111 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
112#elif __x86_64__
113 __asm__ (
114 "mul %%rdx ; shrd $32,%%rdx,%%rax"
115 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
116#else
117#error implement me!
118#endif
119
120 return product;
121}
122
123static u64 get_nsec_offset(struct shadow_time_info *shadow)
124{
125 u64 now, delta;
126 rdtscll(now);
127 delta = now - shadow->tsc_timestamp;
128 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
129}
130
131cycle_t xen_clocksource_read(void)
132{
133 struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
134 cycle_t ret;
135
136 get_time_values_from_xen();
137
138 ret = shadow->system_timestamp + get_nsec_offset(shadow);
139
140 put_cpu_var(shadow_time);
141
142 return ret;
143}
144
145static void xen_read_wallclock(struct timespec *ts)
146{
147 const struct shared_info *s = HYPERVISOR_shared_info;
148 u32 version;
149 u64 delta;
150 struct timespec now;
151
152 /* get wallclock at system boot */
153 do {
154 version = s->wc_version;
155 rmb(); /* fetch version before time */
156 now.tv_sec = s->wc_sec;
157 now.tv_nsec = s->wc_nsec;
158 rmb(); /* fetch time before checking version */
159 } while ((s->wc_version & 1) | (version ^ s->wc_version));
160
161 delta = xen_clocksource_read(); /* time since system boot */
162 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
163
164 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
165 now.tv_sec = delta;
166
167 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
168}
169
170unsigned long xen_get_wallclock(void)
171{
172 struct timespec ts;
173
174 xen_read_wallclock(&ts);
175
176 return ts.tv_sec;
177}
178
179int xen_set_wallclock(unsigned long now)
180{
181 /* do nothing for domU */
182 return -1;
183}
184
185static struct clocksource xen_clocksource __read_mostly = {
186 .name = "xen",
187 .rating = 400,
188 .read = xen_clocksource_read,
189 .mask = ~0,
190 .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */
191 .shift = XEN_SHIFT,
192 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
193};
194
195/*
196 Xen clockevent implementation
197
198 Xen has two clockevent implementations:
199
200 The old timer_op one works with all released versions of Xen prior
201 to version 3.0.4. This version of the hypervisor provides a
202 single-shot timer with nanosecond resolution. However, sharing the
203 same event channel is a 100Hz tick which is delivered while the
204 vcpu is running. We don't care about or use this tick, but it will
205 cause the core time code to think the timer fired too soon, and
206 will end up resetting it each time. It could be filtered, but
207 doing so has complications when the ktime clocksource is not yet
208 the xen clocksource (ie, at boot time).
209
210 The new vcpu_op-based timer interface allows the tick timer period
211 to be changed or turned off. The tick timer is not useful as a
212 periodic timer because events are only delivered to running vcpus.
213 The one-shot timer can report when a timeout is in the past, so
214 set_next_event is capable of returning -ETIME when appropriate.
215 This interface is used when available.
216*/
217
218
219/*
220 Get a hypervisor absolute time. In theory we could maintain an
221 offset between the kernel's time and the hypervisor's time, and
222 apply that to a kernel's absolute timeout. Unfortunately the
223 hypervisor and kernel times can drift even if the kernel is using
224 the Xen clocksource, because ntp can warp the kernel's clocksource.
225*/
226static s64 get_abs_timeout(unsigned long delta)
227{
228 return xen_clocksource_read() + delta;
229}
230
231static void xen_timerop_set_mode(enum clock_event_mode mode,
232 struct clock_event_device *evt)
233{
234 switch (mode) {
235 case CLOCK_EVT_MODE_PERIODIC:
236 /* unsupported */
237 WARN_ON(1);
238 break;
239
240 case CLOCK_EVT_MODE_ONESHOT:
241 break;
242
243 case CLOCK_EVT_MODE_UNUSED:
244 case CLOCK_EVT_MODE_SHUTDOWN:
245 HYPERVISOR_set_timer_op(0); /* cancel timeout */
246 break;
247 }
248}
249
250static int xen_timerop_set_next_event(unsigned long delta,
251 struct clock_event_device *evt)
252{
253 WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
254
255 if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
256 BUG();
257
258 /* We may have missed the deadline, but there's no real way of
259 knowing for sure. If the event was in the past, then we'll
260 get an immediate interrupt. */
261
262 return 0;
263}
264
265static const struct clock_event_device xen_timerop_clockevent = {
266 .name = "xen",
267 .features = CLOCK_EVT_FEAT_ONESHOT,
268
269 .max_delta_ns = 0xffffffff,
270 .min_delta_ns = TIMER_SLOP,
271
272 .mult = 1,
273 .shift = 0,
274 .rating = 500,
275
276 .set_mode = xen_timerop_set_mode,
277 .set_next_event = xen_timerop_set_next_event,
278};
279
280
281
282static void xen_vcpuop_set_mode(enum clock_event_mode mode,
283 struct clock_event_device *evt)
284{
285 int cpu = smp_processor_id();
286
287 switch (mode) {
288 case CLOCK_EVT_MODE_PERIODIC:
289 WARN_ON(1); /* unsupported */
290 break;
291
292 case CLOCK_EVT_MODE_ONESHOT:
293 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
294 BUG();
295 break;
296
297 case CLOCK_EVT_MODE_UNUSED:
298 case CLOCK_EVT_MODE_SHUTDOWN:
299 if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
300 HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
301 BUG();
302 break;
303 }
304}
305
306static int xen_vcpuop_set_next_event(unsigned long delta,
307 struct clock_event_device *evt)
308{
309 int cpu = smp_processor_id();
310 struct vcpu_set_singleshot_timer single;
311 int ret;
312
313 WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
314
315 single.timeout_abs_ns = get_abs_timeout(delta);
316 single.flags = VCPU_SSHOTTMR_future;
317
318 ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
319
320 BUG_ON(ret != 0 && ret != -ETIME);
321
322 return ret;
323}
324
325static const struct clock_event_device xen_vcpuop_clockevent = {
326 .name = "xen",
327 .features = CLOCK_EVT_FEAT_ONESHOT,
328
329 .max_delta_ns = 0xffffffff,
330 .min_delta_ns = TIMER_SLOP,
331
332 .mult = 1,
333 .shift = 0,
334 .rating = 500,
335
336 .set_mode = xen_vcpuop_set_mode,
337 .set_next_event = xen_vcpuop_set_next_event,
338};
339
340static const struct clock_event_device *xen_clockevent =
341 &xen_timerop_clockevent;
342static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
343
344static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
345{
346 struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
347 irqreturn_t ret;
348
349 ret = IRQ_NONE;
350 if (evt->event_handler) {
351 evt->event_handler(evt);
352 ret = IRQ_HANDLED;
353 }
354
355 return ret;
356}
357
358static void xen_setup_timer(int cpu)
359{
360 const char *name;
361 struct clock_event_device *evt;
362 int irq;
363
364 printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
365
366 name = kasprintf(GFP_KERNEL, "timer%d", cpu);
367 if (!name)
368 name = "<timer kasprintf failed>";
369
370 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
371 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
372 name, NULL);
373
374 evt = &get_cpu_var(xen_clock_events);
375 memcpy(evt, xen_clockevent, sizeof(*evt));
376
377 evt->cpumask = cpumask_of_cpu(cpu);
378 evt->irq = irq;
379 clockevents_register_device(evt);
380
381 put_cpu_var(xen_clock_events);
382}
383
384__init void xen_time_init(void)
385{
386 int cpu = smp_processor_id();
387
388 get_time_values_from_xen();
389
390 clocksource_register(&xen_clocksource);
391
392 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
393 /* Successfully turned off 100hz tick, so we have the
394 vcpuop-based timer interface */
395 printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
396 xen_clockevent = &xen_vcpuop_clockevent;
397 }
398
399 /* Set initial system time with full resolution */
400 xen_read_wallclock(&xtime);
401 set_normalized_timespec(&wall_to_monotonic,
402 -xtime.tv_sec, -xtime.tv_nsec);
403
404 tsc_disable = 0;
405
406 xen_setup_timer(cpu);
407}