diff options
-rw-r--r-- | arch/i386/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/i386/xen/enlighten.c | 6 | ||||
-rw-r--r-- | arch/i386/xen/time.c | 407 |
3 files changed, 414 insertions, 1 deletions
diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile index 7a78f27bfb16..bf51cabed0d2 100644 --- a/arch/i386/xen/Makefile +++ b/arch/i386/xen/Makefile | |||
@@ -1,2 +1,2 @@ | |||
1 | obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ | 1 | obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ |
2 | events.o | 2 | events.o time.o |
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index 6417dfdccb4c..25eb3592f11d 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c | |||
@@ -609,6 +609,12 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { | |||
609 | .arch_setup = xen_arch_setup, | 609 | .arch_setup = xen_arch_setup, |
610 | .init_IRQ = xen_init_IRQ, | 610 | .init_IRQ = xen_init_IRQ, |
611 | 611 | ||
612 | .time_init = xen_time_init, | ||
613 | .set_wallclock = xen_set_wallclock, | ||
614 | .get_wallclock = xen_get_wallclock, | ||
615 | .get_cpu_khz = xen_cpu_khz, | ||
616 | .sched_clock = xen_clocksource_read, | ||
617 | |||
612 | .cpuid = xen_cpuid, | 618 | .cpuid = xen_cpuid, |
613 | 619 | ||
614 | .set_debugreg = xen_set_debugreg, | 620 | .set_debugreg = xen_set_debugreg, |
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c new file mode 100644 index 000000000000..b457980ff3c2 --- /dev/null +++ b/arch/i386/xen/time.c | |||
@@ -0,0 +1,407 @@ | |||
1 | /* | ||
2 | * Xen time implementation. | ||
3 | * | ||
4 | * This is implemented in terms of a clocksource driver which uses | ||
5 | * the hypervisor clock as a nanosecond timebase, and a clockevent | ||
6 | * driver which uses the hypervisor's timer mechanism. | ||
7 | * | ||
8 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
9 | */ | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/interrupt.h> | ||
12 | #include <linux/clocksource.h> | ||
13 | #include <linux/clockchips.h> | ||
14 | |||
15 | #include <asm/xen/hypervisor.h> | ||
16 | #include <asm/xen/hypercall.h> | ||
17 | |||
18 | #include <xen/events.h> | ||
19 | #include <xen/interface/xen.h> | ||
20 | #include <xen/interface/vcpu.h> | ||
21 | |||
22 | #include "xen-ops.h" | ||
23 | |||
24 | #define XEN_SHIFT 22 | ||
25 | |||
26 | /* Xen may fire a timer up to this many ns early */ | ||
27 | #define TIMER_SLOP 100000 | ||
28 | |||
29 | /* These are perodically updated in shared_info, and then copied here. */ | ||
30 | struct shadow_time_info { | ||
31 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
32 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
33 | u32 tsc_to_nsec_mul; | ||
34 | int tsc_shift; | ||
35 | u32 version; | ||
36 | }; | ||
37 | |||
38 | static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); | ||
39 | |||
40 | unsigned long xen_cpu_khz(void) | ||
41 | { | ||
42 | u64 cpu_khz = 1000000ULL << 32; | ||
43 | const struct vcpu_time_info *info = | ||
44 | &HYPERVISOR_shared_info->vcpu_info[0].time; | ||
45 | |||
46 | do_div(cpu_khz, info->tsc_to_system_mul); | ||
47 | if (info->tsc_shift < 0) | ||
48 | cpu_khz <<= -info->tsc_shift; | ||
49 | else | ||
50 | cpu_khz >>= info->tsc_shift; | ||
51 | |||
52 | return cpu_khz; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Reads a consistent set of time-base values from Xen, into a shadow data | ||
57 | * area. | ||
58 | */ | ||
59 | static void get_time_values_from_xen(void) | ||
60 | { | ||
61 | struct vcpu_time_info *src; | ||
62 | struct shadow_time_info *dst; | ||
63 | |||
64 | preempt_disable(); | ||
65 | |||
66 | /* src is shared memory with the hypervisor, so we need to | ||
67 | make sure we get a consistent snapshot, even in the face of | ||
68 | being preempted. */ | ||
69 | src = &__get_cpu_var(xen_vcpu)->time; | ||
70 | dst = &__get_cpu_var(shadow_time); | ||
71 | |||
72 | do { | ||
73 | dst->version = src->version; | ||
74 | rmb(); /* fetch version before data */ | ||
75 | dst->tsc_timestamp = src->tsc_timestamp; | ||
76 | dst->system_timestamp = src->system_time; | ||
77 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
78 | dst->tsc_shift = src->tsc_shift; | ||
79 | rmb(); /* test version after fetching data */ | ||
80 | } while ((src->version & 1) | (dst->version ^ src->version)); | ||
81 | |||
82 | preempt_enable(); | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
87 | * yielding a 64-bit result. | ||
88 | */ | ||
89 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
90 | { | ||
91 | u64 product; | ||
92 | #ifdef __i386__ | ||
93 | u32 tmp1, tmp2; | ||
94 | #endif | ||
95 | |||
96 | if (shift < 0) | ||
97 | delta >>= -shift; | ||
98 | else | ||
99 | delta <<= shift; | ||
100 | |||
101 | #ifdef __i386__ | ||
102 | __asm__ ( | ||
103 | "mul %5 ; " | ||
104 | "mov %4,%%eax ; " | ||
105 | "mov %%edx,%4 ; " | ||
106 | "mul %5 ; " | ||
107 | "xor %5,%5 ; " | ||
108 | "add %4,%%eax ; " | ||
109 | "adc %5,%%edx ; " | ||
110 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
111 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
112 | #elif __x86_64__ | ||
113 | __asm__ ( | ||
114 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
115 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
116 | #else | ||
117 | #error implement me! | ||
118 | #endif | ||
119 | |||
120 | return product; | ||
121 | } | ||
122 | |||
123 | static u64 get_nsec_offset(struct shadow_time_info *shadow) | ||
124 | { | ||
125 | u64 now, delta; | ||
126 | rdtscll(now); | ||
127 | delta = now - shadow->tsc_timestamp; | ||
128 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
129 | } | ||
130 | |||
131 | cycle_t xen_clocksource_read(void) | ||
132 | { | ||
133 | struct shadow_time_info *shadow = &get_cpu_var(shadow_time); | ||
134 | cycle_t ret; | ||
135 | |||
136 | get_time_values_from_xen(); | ||
137 | |||
138 | ret = shadow->system_timestamp + get_nsec_offset(shadow); | ||
139 | |||
140 | put_cpu_var(shadow_time); | ||
141 | |||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | static void xen_read_wallclock(struct timespec *ts) | ||
146 | { | ||
147 | const struct shared_info *s = HYPERVISOR_shared_info; | ||
148 | u32 version; | ||
149 | u64 delta; | ||
150 | struct timespec now; | ||
151 | |||
152 | /* get wallclock at system boot */ | ||
153 | do { | ||
154 | version = s->wc_version; | ||
155 | rmb(); /* fetch version before time */ | ||
156 | now.tv_sec = s->wc_sec; | ||
157 | now.tv_nsec = s->wc_nsec; | ||
158 | rmb(); /* fetch time before checking version */ | ||
159 | } while ((s->wc_version & 1) | (version ^ s->wc_version)); | ||
160 | |||
161 | delta = xen_clocksource_read(); /* time since system boot */ | ||
162 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | ||
163 | |||
164 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
165 | now.tv_sec = delta; | ||
166 | |||
167 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | ||
168 | } | ||
169 | |||
170 | unsigned long xen_get_wallclock(void) | ||
171 | { | ||
172 | struct timespec ts; | ||
173 | |||
174 | xen_read_wallclock(&ts); | ||
175 | |||
176 | return ts.tv_sec; | ||
177 | } | ||
178 | |||
179 | int xen_set_wallclock(unsigned long now) | ||
180 | { | ||
181 | /* do nothing for domU */ | ||
182 | return -1; | ||
183 | } | ||
184 | |||
185 | static struct clocksource xen_clocksource __read_mostly = { | ||
186 | .name = "xen", | ||
187 | .rating = 400, | ||
188 | .read = xen_clocksource_read, | ||
189 | .mask = ~0, | ||
190 | .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */ | ||
191 | .shift = XEN_SHIFT, | ||
192 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
193 | }; | ||
194 | |||
195 | /* | ||
196 | Xen clockevent implementation | ||
197 | |||
198 | Xen has two clockevent implementations: | ||
199 | |||
200 | The old timer_op one works with all released versions of Xen prior | ||
201 | to version 3.0.4. This version of the hypervisor provides a | ||
202 | single-shot timer with nanosecond resolution. However, sharing the | ||
203 | same event channel is a 100Hz tick which is delivered while the | ||
204 | vcpu is running. We don't care about or use this tick, but it will | ||
205 | cause the core time code to think the timer fired too soon, and | ||
206 | will end up resetting it each time. It could be filtered, but | ||
207 | doing so has complications when the ktime clocksource is not yet | ||
208 | the xen clocksource (ie, at boot time). | ||
209 | |||
210 | The new vcpu_op-based timer interface allows the tick timer period | ||
211 | to be changed or turned off. The tick timer is not useful as a | ||
212 | periodic timer because events are only delivered to running vcpus. | ||
213 | The one-shot timer can report when a timeout is in the past, so | ||
214 | set_next_event is capable of returning -ETIME when appropriate. | ||
215 | This interface is used when available. | ||
216 | */ | ||
217 | |||
218 | |||
219 | /* | ||
220 | Get a hypervisor absolute time. In theory we could maintain an | ||
221 | offset between the kernel's time and the hypervisor's time, and | ||
222 | apply that to a kernel's absolute timeout. Unfortunately the | ||
223 | hypervisor and kernel times can drift even if the kernel is using | ||
224 | the Xen clocksource, because ntp can warp the kernel's clocksource. | ||
225 | */ | ||
226 | static s64 get_abs_timeout(unsigned long delta) | ||
227 | { | ||
228 | return xen_clocksource_read() + delta; | ||
229 | } | ||
230 | |||
231 | static void xen_timerop_set_mode(enum clock_event_mode mode, | ||
232 | struct clock_event_device *evt) | ||
233 | { | ||
234 | switch (mode) { | ||
235 | case CLOCK_EVT_MODE_PERIODIC: | ||
236 | /* unsupported */ | ||
237 | WARN_ON(1); | ||
238 | break; | ||
239 | |||
240 | case CLOCK_EVT_MODE_ONESHOT: | ||
241 | break; | ||
242 | |||
243 | case CLOCK_EVT_MODE_UNUSED: | ||
244 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
245 | HYPERVISOR_set_timer_op(0); /* cancel timeout */ | ||
246 | break; | ||
247 | } | ||
248 | } | ||
249 | |||
250 | static int xen_timerop_set_next_event(unsigned long delta, | ||
251 | struct clock_event_device *evt) | ||
252 | { | ||
253 | WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
254 | |||
255 | if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) | ||
256 | BUG(); | ||
257 | |||
258 | /* We may have missed the deadline, but there's no real way of | ||
259 | knowing for sure. If the event was in the past, then we'll | ||
260 | get an immediate interrupt. */ | ||
261 | |||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | static const struct clock_event_device xen_timerop_clockevent = { | ||
266 | .name = "xen", | ||
267 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
268 | |||
269 | .max_delta_ns = 0xffffffff, | ||
270 | .min_delta_ns = TIMER_SLOP, | ||
271 | |||
272 | .mult = 1, | ||
273 | .shift = 0, | ||
274 | .rating = 500, | ||
275 | |||
276 | .set_mode = xen_timerop_set_mode, | ||
277 | .set_next_event = xen_timerop_set_next_event, | ||
278 | }; | ||
279 | |||
280 | |||
281 | |||
282 | static void xen_vcpuop_set_mode(enum clock_event_mode mode, | ||
283 | struct clock_event_device *evt) | ||
284 | { | ||
285 | int cpu = smp_processor_id(); | ||
286 | |||
287 | switch (mode) { | ||
288 | case CLOCK_EVT_MODE_PERIODIC: | ||
289 | WARN_ON(1); /* unsupported */ | ||
290 | break; | ||
291 | |||
292 | case CLOCK_EVT_MODE_ONESHOT: | ||
293 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
294 | BUG(); | ||
295 | break; | ||
296 | |||
297 | case CLOCK_EVT_MODE_UNUSED: | ||
298 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
299 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || | ||
300 | HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
301 | BUG(); | ||
302 | break; | ||
303 | } | ||
304 | } | ||
305 | |||
306 | static int xen_vcpuop_set_next_event(unsigned long delta, | ||
307 | struct clock_event_device *evt) | ||
308 | { | ||
309 | int cpu = smp_processor_id(); | ||
310 | struct vcpu_set_singleshot_timer single; | ||
311 | int ret; | ||
312 | |||
313 | WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
314 | |||
315 | single.timeout_abs_ns = get_abs_timeout(delta); | ||
316 | single.flags = VCPU_SSHOTTMR_future; | ||
317 | |||
318 | ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); | ||
319 | |||
320 | BUG_ON(ret != 0 && ret != -ETIME); | ||
321 | |||
322 | return ret; | ||
323 | } | ||
324 | |||
325 | static const struct clock_event_device xen_vcpuop_clockevent = { | ||
326 | .name = "xen", | ||
327 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
328 | |||
329 | .max_delta_ns = 0xffffffff, | ||
330 | .min_delta_ns = TIMER_SLOP, | ||
331 | |||
332 | .mult = 1, | ||
333 | .shift = 0, | ||
334 | .rating = 500, | ||
335 | |||
336 | .set_mode = xen_vcpuop_set_mode, | ||
337 | .set_next_event = xen_vcpuop_set_next_event, | ||
338 | }; | ||
339 | |||
340 | static const struct clock_event_device *xen_clockevent = | ||
341 | &xen_timerop_clockevent; | ||
342 | static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); | ||
343 | |||
344 | static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) | ||
345 | { | ||
346 | struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); | ||
347 | irqreturn_t ret; | ||
348 | |||
349 | ret = IRQ_NONE; | ||
350 | if (evt->event_handler) { | ||
351 | evt->event_handler(evt); | ||
352 | ret = IRQ_HANDLED; | ||
353 | } | ||
354 | |||
355 | return ret; | ||
356 | } | ||
357 | |||
358 | static void xen_setup_timer(int cpu) | ||
359 | { | ||
360 | const char *name; | ||
361 | struct clock_event_device *evt; | ||
362 | int irq; | ||
363 | |||
364 | printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); | ||
365 | |||
366 | name = kasprintf(GFP_KERNEL, "timer%d", cpu); | ||
367 | if (!name) | ||
368 | name = "<timer kasprintf failed>"; | ||
369 | |||
370 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, | ||
371 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
372 | name, NULL); | ||
373 | |||
374 | evt = &get_cpu_var(xen_clock_events); | ||
375 | memcpy(evt, xen_clockevent, sizeof(*evt)); | ||
376 | |||
377 | evt->cpumask = cpumask_of_cpu(cpu); | ||
378 | evt->irq = irq; | ||
379 | clockevents_register_device(evt); | ||
380 | |||
381 | put_cpu_var(xen_clock_events); | ||
382 | } | ||
383 | |||
384 | __init void xen_time_init(void) | ||
385 | { | ||
386 | int cpu = smp_processor_id(); | ||
387 | |||
388 | get_time_values_from_xen(); | ||
389 | |||
390 | clocksource_register(&xen_clocksource); | ||
391 | |||
392 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | ||
393 | /* Successfully turned off 100hz tick, so we have the | ||
394 | vcpuop-based timer interface */ | ||
395 | printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); | ||
396 | xen_clockevent = &xen_vcpuop_clockevent; | ||
397 | } | ||
398 | |||
399 | /* Set initial system time with full resolution */ | ||
400 | xen_read_wallclock(&xtime); | ||
401 | set_normalized_timespec(&wall_to_monotonic, | ||
402 | -xtime.tv_sec, -xtime.tv_nsec); | ||
403 | |||
404 | tsc_disable = 0; | ||
405 | |||
406 | xen_setup_timer(cpu); | ||
407 | } | ||