diff options
Diffstat (limited to 'arch/x86/xen/time.c')
| -rw-r--r-- | arch/x86/xen/time.c | 593 |
1 files changed, 593 insertions, 0 deletions
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c new file mode 100644 index 000000000000..dfd6db69ead5 --- /dev/null +++ b/arch/x86/xen/time.c | |||
| @@ -0,0 +1,593 @@ | |||
| 1 | /* | ||
| 2 | * Xen time implementation. | ||
| 3 | * | ||
| 4 | * This is implemented in terms of a clocksource driver which uses | ||
| 5 | * the hypervisor clock as a nanosecond timebase, and a clockevent | ||
| 6 | * driver which uses the hypervisor's timer mechanism. | ||
| 7 | * | ||
| 8 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
| 9 | */ | ||
| 10 | #include <linux/kernel.h> | ||
| 11 | #include <linux/interrupt.h> | ||
| 12 | #include <linux/clocksource.h> | ||
| 13 | #include <linux/clockchips.h> | ||
| 14 | #include <linux/kernel_stat.h> | ||
| 15 | |||
| 16 | #include <asm/xen/hypervisor.h> | ||
| 17 | #include <asm/xen/hypercall.h> | ||
| 18 | |||
| 19 | #include <xen/events.h> | ||
| 20 | #include <xen/interface/xen.h> | ||
| 21 | #include <xen/interface/vcpu.h> | ||
| 22 | |||
| 23 | #include "xen-ops.h" | ||
| 24 | |||
| 25 | #define XEN_SHIFT 22 | ||
| 26 | |||
| 27 | /* Xen may fire a timer up to this many ns early */ | ||
| 28 | #define TIMER_SLOP 100000 | ||
| 29 | #define NS_PER_TICK (1000000000LL / HZ) | ||
| 30 | |||
| 31 | static cycle_t xen_clocksource_read(void); | ||
| 32 | |||
| 33 | /* These are perodically updated in shared_info, and then copied here. */ | ||
| 34 | struct shadow_time_info { | ||
| 35 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
| 36 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
| 37 | u32 tsc_to_nsec_mul; | ||
| 38 | int tsc_shift; | ||
| 39 | u32 version; | ||
| 40 | }; | ||
| 41 | |||
| 42 | static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); | ||
| 43 | |||
| 44 | /* runstate info updated by Xen */ | ||
| 45 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | ||
| 46 | |||
| 47 | /* snapshots of runstate info */ | ||
| 48 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot); | ||
| 49 | |||
| 50 | /* unused ns of stolen and blocked time */ | ||
| 51 | static DEFINE_PER_CPU(u64, residual_stolen); | ||
| 52 | static DEFINE_PER_CPU(u64, residual_blocked); | ||
| 53 | |||
| 54 | /* return an consistent snapshot of 64-bit time/counter value */ | ||
| 55 | static u64 get64(const u64 *p) | ||
| 56 | { | ||
| 57 | u64 ret; | ||
| 58 | |||
| 59 | if (BITS_PER_LONG < 64) { | ||
| 60 | u32 *p32 = (u32 *)p; | ||
| 61 | u32 h, l; | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Read high then low, and then make sure high is | ||
| 65 | * still the same; this will only loop if low wraps | ||
| 66 | * and carries into high. | ||
| 67 | * XXX some clean way to make this endian-proof? | ||
| 68 | */ | ||
| 69 | do { | ||
| 70 | h = p32[1]; | ||
| 71 | barrier(); | ||
| 72 | l = p32[0]; | ||
| 73 | barrier(); | ||
| 74 | } while (p32[1] != h); | ||
| 75 | |||
| 76 | ret = (((u64)h) << 32) | l; | ||
| 77 | } else | ||
| 78 | ret = *p; | ||
| 79 | |||
| 80 | return ret; | ||
| 81 | } | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Runstate accounting | ||
| 85 | */ | ||
| 86 | static void get_runstate_snapshot(struct vcpu_runstate_info *res) | ||
| 87 | { | ||
| 88 | u64 state_time; | ||
| 89 | struct vcpu_runstate_info *state; | ||
| 90 | |||
| 91 | BUG_ON(preemptible()); | ||
| 92 | |||
| 93 | state = &__get_cpu_var(runstate); | ||
| 94 | |||
| 95 | /* | ||
| 96 | * The runstate info is always updated by the hypervisor on | ||
| 97 | * the current CPU, so there's no need to use anything | ||
| 98 | * stronger than a compiler barrier when fetching it. | ||
| 99 | */ | ||
| 100 | do { | ||
| 101 | state_time = get64(&state->state_entry_time); | ||
| 102 | barrier(); | ||
| 103 | *res = *state; | ||
| 104 | barrier(); | ||
| 105 | } while (get64(&state->state_entry_time) != state_time); | ||
| 106 | } | ||
| 107 | |||
| 108 | static void setup_runstate_info(int cpu) | ||
| 109 | { | ||
| 110 | struct vcpu_register_runstate_memory_area area; | ||
| 111 | |||
| 112 | area.addr.v = &per_cpu(runstate, cpu); | ||
| 113 | |||
| 114 | if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, | ||
| 115 | cpu, &area)) | ||
| 116 | BUG(); | ||
| 117 | } | ||
| 118 | |||
| 119 | static void do_stolen_accounting(void) | ||
| 120 | { | ||
| 121 | struct vcpu_runstate_info state; | ||
| 122 | struct vcpu_runstate_info *snap; | ||
| 123 | s64 blocked, runnable, offline, stolen; | ||
| 124 | cputime_t ticks; | ||
| 125 | |||
| 126 | get_runstate_snapshot(&state); | ||
| 127 | |||
| 128 | WARN_ON(state.state != RUNSTATE_running); | ||
| 129 | |||
| 130 | snap = &__get_cpu_var(runstate_snapshot); | ||
| 131 | |||
| 132 | /* work out how much time the VCPU has not been runn*ing* */ | ||
| 133 | blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; | ||
| 134 | runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; | ||
| 135 | offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; | ||
| 136 | |||
| 137 | *snap = state; | ||
| 138 | |||
| 139 | /* Add the appropriate number of ticks of stolen time, | ||
| 140 | including any left-overs from last time. Passing NULL to | ||
| 141 | account_steal_time accounts the time as stolen. */ | ||
| 142 | stolen = runnable + offline + __get_cpu_var(residual_stolen); | ||
| 143 | |||
| 144 | if (stolen < 0) | ||
| 145 | stolen = 0; | ||
| 146 | |||
| 147 | ticks = 0; | ||
| 148 | while (stolen >= NS_PER_TICK) { | ||
| 149 | ticks++; | ||
| 150 | stolen -= NS_PER_TICK; | ||
| 151 | } | ||
| 152 | __get_cpu_var(residual_stolen) = stolen; | ||
| 153 | account_steal_time(NULL, ticks); | ||
| 154 | |||
| 155 | /* Add the appropriate number of ticks of blocked time, | ||
| 156 | including any left-overs from last time. Passing idle to | ||
| 157 | account_steal_time accounts the time as idle/wait. */ | ||
| 158 | blocked += __get_cpu_var(residual_blocked); | ||
| 159 | |||
| 160 | if (blocked < 0) | ||
| 161 | blocked = 0; | ||
| 162 | |||
| 163 | ticks = 0; | ||
| 164 | while (blocked >= NS_PER_TICK) { | ||
| 165 | ticks++; | ||
| 166 | blocked -= NS_PER_TICK; | ||
| 167 | } | ||
| 168 | __get_cpu_var(residual_blocked) = blocked; | ||
| 169 | account_steal_time(idle_task(smp_processor_id()), ticks); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Xen sched_clock implementation. Returns the number of unstolen | ||
| 174 | * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED | ||
| 175 | * states. | ||
| 176 | */ | ||
| 177 | unsigned long long xen_sched_clock(void) | ||
| 178 | { | ||
| 179 | struct vcpu_runstate_info state; | ||
| 180 | cycle_t now; | ||
| 181 | u64 ret; | ||
| 182 | s64 offset; | ||
| 183 | |||
| 184 | /* | ||
| 185 | * Ideally sched_clock should be called on a per-cpu basis | ||
| 186 | * anyway, so preempt should already be disabled, but that's | ||
| 187 | * not current practice at the moment. | ||
| 188 | */ | ||
| 189 | preempt_disable(); | ||
| 190 | |||
| 191 | now = xen_clocksource_read(); | ||
| 192 | |||
| 193 | get_runstate_snapshot(&state); | ||
| 194 | |||
| 195 | WARN_ON(state.state != RUNSTATE_running); | ||
| 196 | |||
| 197 | offset = now - state.state_entry_time; | ||
| 198 | if (offset < 0) | ||
| 199 | offset = 0; | ||
| 200 | |||
| 201 | ret = state.time[RUNSTATE_blocked] + | ||
| 202 | state.time[RUNSTATE_running] + | ||
| 203 | offset; | ||
| 204 | |||
| 205 | preempt_enable(); | ||
| 206 | |||
| 207 | return ret; | ||
| 208 | } | ||
| 209 | |||
| 210 | |||
| 211 | /* Get the CPU speed from Xen */ | ||
| 212 | unsigned long xen_cpu_khz(void) | ||
| 213 | { | ||
| 214 | u64 cpu_khz = 1000000ULL << 32; | ||
| 215 | const struct vcpu_time_info *info = | ||
| 216 | &HYPERVISOR_shared_info->vcpu_info[0].time; | ||
| 217 | |||
| 218 | do_div(cpu_khz, info->tsc_to_system_mul); | ||
| 219 | if (info->tsc_shift < 0) | ||
| 220 | cpu_khz <<= -info->tsc_shift; | ||
| 221 | else | ||
| 222 | cpu_khz >>= info->tsc_shift; | ||
| 223 | |||
| 224 | return cpu_khz; | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 228 | * Reads a consistent set of time-base values from Xen, into a shadow data | ||
| 229 | * area. | ||
| 230 | */ | ||
| 231 | static unsigned get_time_values_from_xen(void) | ||
| 232 | { | ||
| 233 | struct vcpu_time_info *src; | ||
| 234 | struct shadow_time_info *dst; | ||
| 235 | |||
| 236 | /* src is shared memory with the hypervisor, so we need to | ||
| 237 | make sure we get a consistent snapshot, even in the face of | ||
| 238 | being preempted. */ | ||
| 239 | src = &__get_cpu_var(xen_vcpu)->time; | ||
| 240 | dst = &__get_cpu_var(shadow_time); | ||
| 241 | |||
| 242 | do { | ||
| 243 | dst->version = src->version; | ||
| 244 | rmb(); /* fetch version before data */ | ||
| 245 | dst->tsc_timestamp = src->tsc_timestamp; | ||
| 246 | dst->system_timestamp = src->system_time; | ||
| 247 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
| 248 | dst->tsc_shift = src->tsc_shift; | ||
| 249 | rmb(); /* test version after fetching data */ | ||
| 250 | } while ((src->version & 1) | (dst->version ^ src->version)); | ||
| 251 | |||
| 252 | return dst->version; | ||
| 253 | } | ||
| 254 | |||
| 255 | /* | ||
| 256 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
| 257 | * yielding a 64-bit result. | ||
| 258 | */ | ||
| 259 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
| 260 | { | ||
| 261 | u64 product; | ||
| 262 | #ifdef __i386__ | ||
| 263 | u32 tmp1, tmp2; | ||
| 264 | #endif | ||
| 265 | |||
| 266 | if (shift < 0) | ||
| 267 | delta >>= -shift; | ||
| 268 | else | ||
| 269 | delta <<= shift; | ||
| 270 | |||
| 271 | #ifdef __i386__ | ||
| 272 | __asm__ ( | ||
| 273 | "mul %5 ; " | ||
| 274 | "mov %4,%%eax ; " | ||
| 275 | "mov %%edx,%4 ; " | ||
| 276 | "mul %5 ; " | ||
| 277 | "xor %5,%5 ; " | ||
| 278 | "add %4,%%eax ; " | ||
| 279 | "adc %5,%%edx ; " | ||
| 280 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
| 281 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
| 282 | #elif __x86_64__ | ||
| 283 | __asm__ ( | ||
| 284 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
| 285 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
| 286 | #else | ||
| 287 | #error implement me! | ||
| 288 | #endif | ||
| 289 | |||
| 290 | return product; | ||
| 291 | } | ||
| 292 | |||
| 293 | static u64 get_nsec_offset(struct shadow_time_info *shadow) | ||
| 294 | { | ||
| 295 | u64 now, delta; | ||
| 296 | now = native_read_tsc(); | ||
| 297 | delta = now - shadow->tsc_timestamp; | ||
| 298 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
| 299 | } | ||
| 300 | |||
| 301 | static cycle_t xen_clocksource_read(void) | ||
| 302 | { | ||
| 303 | struct shadow_time_info *shadow = &get_cpu_var(shadow_time); | ||
| 304 | cycle_t ret; | ||
| 305 | unsigned version; | ||
| 306 | |||
| 307 | do { | ||
| 308 | version = get_time_values_from_xen(); | ||
| 309 | barrier(); | ||
| 310 | ret = shadow->system_timestamp + get_nsec_offset(shadow); | ||
| 311 | barrier(); | ||
| 312 | } while (version != __get_cpu_var(xen_vcpu)->time.version); | ||
| 313 | |||
| 314 | put_cpu_var(shadow_time); | ||
| 315 | |||
| 316 | return ret; | ||
| 317 | } | ||
| 318 | |||
| 319 | static void xen_read_wallclock(struct timespec *ts) | ||
| 320 | { | ||
| 321 | const struct shared_info *s = HYPERVISOR_shared_info; | ||
| 322 | u32 version; | ||
| 323 | u64 delta; | ||
| 324 | struct timespec now; | ||
| 325 | |||
| 326 | /* get wallclock at system boot */ | ||
| 327 | do { | ||
| 328 | version = s->wc_version; | ||
| 329 | rmb(); /* fetch version before time */ | ||
| 330 | now.tv_sec = s->wc_sec; | ||
| 331 | now.tv_nsec = s->wc_nsec; | ||
| 332 | rmb(); /* fetch time before checking version */ | ||
| 333 | } while ((s->wc_version & 1) | (version ^ s->wc_version)); | ||
| 334 | |||
| 335 | delta = xen_clocksource_read(); /* time since system boot */ | ||
| 336 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | ||
| 337 | |||
| 338 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
| 339 | now.tv_sec = delta; | ||
| 340 | |||
| 341 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | ||
| 342 | } | ||
| 343 | |||
| 344 | unsigned long xen_get_wallclock(void) | ||
| 345 | { | ||
| 346 | struct timespec ts; | ||
| 347 | |||
| 348 | xen_read_wallclock(&ts); | ||
| 349 | |||
| 350 | return ts.tv_sec; | ||
| 351 | } | ||
| 352 | |||
| 353 | int xen_set_wallclock(unsigned long now) | ||
| 354 | { | ||
| 355 | /* do nothing for domU */ | ||
| 356 | return -1; | ||
| 357 | } | ||
| 358 | |||
| 359 | static struct clocksource xen_clocksource __read_mostly = { | ||
| 360 | .name = "xen", | ||
| 361 | .rating = 400, | ||
| 362 | .read = xen_clocksource_read, | ||
| 363 | .mask = ~0, | ||
| 364 | .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */ | ||
| 365 | .shift = XEN_SHIFT, | ||
| 366 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
| 367 | }; | ||
| 368 | |||
| 369 | /* | ||
| 370 | Xen clockevent implementation | ||
| 371 | |||
| 372 | Xen has two clockevent implementations: | ||
| 373 | |||
| 374 | The old timer_op one works with all released versions of Xen prior | ||
| 375 | to version 3.0.4. This version of the hypervisor provides a | ||
| 376 | single-shot timer with nanosecond resolution. However, sharing the | ||
| 377 | same event channel is a 100Hz tick which is delivered while the | ||
| 378 | vcpu is running. We don't care about or use this tick, but it will | ||
| 379 | cause the core time code to think the timer fired too soon, and | ||
| 380 | will end up resetting it each time. It could be filtered, but | ||
| 381 | doing so has complications when the ktime clocksource is not yet | ||
| 382 | the xen clocksource (ie, at boot time). | ||
| 383 | |||
| 384 | The new vcpu_op-based timer interface allows the tick timer period | ||
| 385 | to be changed or turned off. The tick timer is not useful as a | ||
| 386 | periodic timer because events are only delivered to running vcpus. | ||
| 387 | The one-shot timer can report when a timeout is in the past, so | ||
| 388 | set_next_event is capable of returning -ETIME when appropriate. | ||
| 389 | This interface is used when available. | ||
| 390 | */ | ||
| 391 | |||
| 392 | |||
| 393 | /* | ||
| 394 | Get a hypervisor absolute time. In theory we could maintain an | ||
| 395 | offset between the kernel's time and the hypervisor's time, and | ||
| 396 | apply that to a kernel's absolute timeout. Unfortunately the | ||
| 397 | hypervisor and kernel times can drift even if the kernel is using | ||
| 398 | the Xen clocksource, because ntp can warp the kernel's clocksource. | ||
| 399 | */ | ||
| 400 | static s64 get_abs_timeout(unsigned long delta) | ||
| 401 | { | ||
| 402 | return xen_clocksource_read() + delta; | ||
| 403 | } | ||
| 404 | |||
| 405 | static void xen_timerop_set_mode(enum clock_event_mode mode, | ||
| 406 | struct clock_event_device *evt) | ||
| 407 | { | ||
| 408 | switch (mode) { | ||
| 409 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 410 | /* unsupported */ | ||
| 411 | WARN_ON(1); | ||
| 412 | break; | ||
| 413 | |||
| 414 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 415 | case CLOCK_EVT_MODE_RESUME: | ||
| 416 | break; | ||
| 417 | |||
| 418 | case CLOCK_EVT_MODE_UNUSED: | ||
| 419 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
| 420 | HYPERVISOR_set_timer_op(0); /* cancel timeout */ | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | } | ||
| 424 | |||
| 425 | static int xen_timerop_set_next_event(unsigned long delta, | ||
| 426 | struct clock_event_device *evt) | ||
| 427 | { | ||
| 428 | WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
| 429 | |||
| 430 | if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) | ||
| 431 | BUG(); | ||
| 432 | |||
| 433 | /* We may have missed the deadline, but there's no real way of | ||
| 434 | knowing for sure. If the event was in the past, then we'll | ||
| 435 | get an immediate interrupt. */ | ||
| 436 | |||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | |||
| 440 | static const struct clock_event_device xen_timerop_clockevent = { | ||
| 441 | .name = "xen", | ||
| 442 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
| 443 | |||
| 444 | .max_delta_ns = 0xffffffff, | ||
| 445 | .min_delta_ns = TIMER_SLOP, | ||
| 446 | |||
| 447 | .mult = 1, | ||
| 448 | .shift = 0, | ||
| 449 | .rating = 500, | ||
| 450 | |||
| 451 | .set_mode = xen_timerop_set_mode, | ||
| 452 | .set_next_event = xen_timerop_set_next_event, | ||
| 453 | }; | ||
| 454 | |||
| 455 | |||
| 456 | |||
| 457 | static void xen_vcpuop_set_mode(enum clock_event_mode mode, | ||
| 458 | struct clock_event_device *evt) | ||
| 459 | { | ||
| 460 | int cpu = smp_processor_id(); | ||
| 461 | |||
| 462 | switch (mode) { | ||
| 463 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 464 | WARN_ON(1); /* unsupported */ | ||
| 465 | break; | ||
| 466 | |||
| 467 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 468 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
| 469 | BUG(); | ||
| 470 | break; | ||
| 471 | |||
| 472 | case CLOCK_EVT_MODE_UNUSED: | ||
| 473 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
| 474 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || | ||
| 475 | HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
| 476 | BUG(); | ||
| 477 | break; | ||
| 478 | case CLOCK_EVT_MODE_RESUME: | ||
| 479 | break; | ||
| 480 | } | ||
| 481 | } | ||
| 482 | |||
| 483 | static int xen_vcpuop_set_next_event(unsigned long delta, | ||
| 484 | struct clock_event_device *evt) | ||
| 485 | { | ||
| 486 | int cpu = smp_processor_id(); | ||
| 487 | struct vcpu_set_singleshot_timer single; | ||
| 488 | int ret; | ||
| 489 | |||
| 490 | WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
| 491 | |||
| 492 | single.timeout_abs_ns = get_abs_timeout(delta); | ||
| 493 | single.flags = VCPU_SSHOTTMR_future; | ||
| 494 | |||
| 495 | ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); | ||
| 496 | |||
| 497 | BUG_ON(ret != 0 && ret != -ETIME); | ||
| 498 | |||
| 499 | return ret; | ||
| 500 | } | ||
| 501 | |||
| 502 | static const struct clock_event_device xen_vcpuop_clockevent = { | ||
| 503 | .name = "xen", | ||
| 504 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
| 505 | |||
| 506 | .max_delta_ns = 0xffffffff, | ||
| 507 | .min_delta_ns = TIMER_SLOP, | ||
| 508 | |||
| 509 | .mult = 1, | ||
| 510 | .shift = 0, | ||
| 511 | .rating = 500, | ||
| 512 | |||
| 513 | .set_mode = xen_vcpuop_set_mode, | ||
| 514 | .set_next_event = xen_vcpuop_set_next_event, | ||
| 515 | }; | ||
| 516 | |||
| 517 | static const struct clock_event_device *xen_clockevent = | ||
| 518 | &xen_timerop_clockevent; | ||
| 519 | static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); | ||
| 520 | |||
| 521 | static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) | ||
| 522 | { | ||
| 523 | struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); | ||
| 524 | irqreturn_t ret; | ||
| 525 | |||
| 526 | ret = IRQ_NONE; | ||
| 527 | if (evt->event_handler) { | ||
| 528 | evt->event_handler(evt); | ||
| 529 | ret = IRQ_HANDLED; | ||
| 530 | } | ||
| 531 | |||
| 532 | do_stolen_accounting(); | ||
| 533 | |||
| 534 | return ret; | ||
| 535 | } | ||
| 536 | |||
| 537 | void xen_setup_timer(int cpu) | ||
| 538 | { | ||
| 539 | const char *name; | ||
| 540 | struct clock_event_device *evt; | ||
| 541 | int irq; | ||
| 542 | |||
| 543 | printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); | ||
| 544 | |||
| 545 | name = kasprintf(GFP_KERNEL, "timer%d", cpu); | ||
| 546 | if (!name) | ||
| 547 | name = "<timer kasprintf failed>"; | ||
| 548 | |||
| 549 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, | ||
| 550 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
| 551 | name, NULL); | ||
| 552 | |||
| 553 | evt = &per_cpu(xen_clock_events, cpu); | ||
| 554 | memcpy(evt, xen_clockevent, sizeof(*evt)); | ||
| 555 | |||
| 556 | evt->cpumask = cpumask_of_cpu(cpu); | ||
| 557 | evt->irq = irq; | ||
| 558 | |||
| 559 | setup_runstate_info(cpu); | ||
| 560 | } | ||
| 561 | |||
| 562 | void xen_setup_cpu_clockevents(void) | ||
| 563 | { | ||
| 564 | BUG_ON(preemptible()); | ||
| 565 | |||
| 566 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); | ||
| 567 | } | ||
| 568 | |||
| 569 | __init void xen_time_init(void) | ||
| 570 | { | ||
| 571 | int cpu = smp_processor_id(); | ||
| 572 | |||
| 573 | get_time_values_from_xen(); | ||
| 574 | |||
| 575 | clocksource_register(&xen_clocksource); | ||
| 576 | |||
| 577 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | ||
| 578 | /* Successfully turned off 100Hz tick, so we have the | ||
| 579 | vcpuop-based timer interface */ | ||
| 580 | printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); | ||
| 581 | xen_clockevent = &xen_vcpuop_clockevent; | ||
| 582 | } | ||
| 583 | |||
| 584 | /* Set initial system time with full resolution */ | ||
| 585 | xen_read_wallclock(&xtime); | ||
| 586 | set_normalized_timespec(&wall_to_monotonic, | ||
| 587 | -xtime.tv_sec, -xtime.tv_nsec); | ||
| 588 | |||
| 589 | tsc_disable = 0; | ||
| 590 | |||
| 591 | xen_setup_timer(cpu); | ||
| 592 | xen_setup_cpu_clockevents(); | ||
| 593 | } | ||
