/* * Xen time implementation. * * This is implemented in terms of a clocksource driver which uses * the hypervisor clock as a nanosecond timebase, and a clockevent * driver which uses the hypervisor's timer mechanism. * * Jeremy Fitzhardinge , XenSource Inc, 2007 */ #include #include #include #include #include #include #include #include #include #include "xen-ops.h" #define XEN_SHIFT 22 /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 /* These are perodically updated in shared_info, and then copied here. */ struct shadow_time_info { u64 tsc_timestamp; /* TSC at last update of time vals. */ u64 system_timestamp; /* Time, in nanosecs, since boot. */ u32 tsc_to_nsec_mul; int tsc_shift; u32 version; }; static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); unsigned long xen_cpu_khz(void) { u64 cpu_khz = 1000000ULL << 32; const struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_info[0].time; do_div(cpu_khz, info->tsc_to_system_mul); if (info->tsc_shift < 0) cpu_khz <<= -info->tsc_shift; else cpu_khz >>= info->tsc_shift; return cpu_khz; } /* * Reads a consistent set of time-base values from Xen, into a shadow data * area. */ static void get_time_values_from_xen(void) { struct vcpu_time_info *src; struct shadow_time_info *dst; preempt_disable(); /* src is shared memory with the hypervisor, so we need to make sure we get a consistent snapshot, even in the face of being preempted. */ src = &__get_cpu_var(xen_vcpu)->time; dst = &__get_cpu_var(shadow_time); do { dst->version = src->version; rmb(); /* fetch version before data */ dst->tsc_timestamp = src->tsc_timestamp; dst->system_timestamp = src->system_time; dst->tsc_to_nsec_mul = src->tsc_to_system_mul; dst->tsc_shift = src->tsc_shift; rmb(); /* test version after fetching data */ } while ((src->version & 1) | (dst->version ^ src->version)); preempt_enable(); } /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) { u64 product; #ifdef __i386__ u32 tmp1, tmp2; #endif if (shift < 0) delta >>= -shift; else delta <<= shift; #ifdef __i386__ __asm__ ( "mul %5 ; " "mov %4,%%eax ; " "mov %%edx,%4 ; " "mul %5 ; " "xor %5,%5 ; " "add %4,%%eax ; " "adc %5,%%edx ; " : "=A" (product), "=r" (tmp1), "=r" (tmp2) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); #elif __x86_64__ __asm__ ( "mul %%rdx ; shrd $32,%%rdx,%%rax" : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); #else #error implement me! #endif return product; } static u64 get_nsec_offset(struct shadow_time_info *shadow) { u64 now, delta; rdtscll(now); delta = now - shadow->tsc_timestamp; return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); } cycle_t xen_clocksource_read(void) { struct shadow_time_info *shadow = &get_cpu_var(shadow_time); cycle_t ret; get_time_values_from_xen(); ret = shadow->system_timestamp + get_nsec_offset(shadow); put_cpu_var(shadow_time); return ret; } static void xen_read_wallclock(struct timespec *ts) { const struct shared_info *s = HYPERVISOR_shared_info; u32 version; u64 delta; struct timespec now; /* get wallclock at system boot */ do { version = s->wc_version; rmb(); /* fetch version before time */ now.tv_sec = s->wc_sec; now.tv_nsec = s->wc_nsec; rmb(); /* fetch time before checking version */ } while ((s->wc_version & 1) | (version ^ s->wc_version)); delta = xen_clocksource_read(); /* time since system boot */ delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; now.tv_nsec = do_div(delta, NSEC_PER_SEC); now.tv_sec = delta; set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } unsigned long xen_get_wallclock(void) { struct timespec ts; xen_read_wallclock(&ts); return ts.tv_sec; } int xen_set_wallclock(unsigned long now) { /* do nothing for domU */ return -1; } static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, .read = xen_clocksource_read, .mask = ~0, .mult = 1<mode != CLOCK_EVT_MODE_ONESHOT); if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) BUG(); /* We may have missed the deadline, but there's no real way of knowing for sure. If the event was in the past, then we'll get an immediate interrupt. */ return 0; } static const struct clock_event_device xen_timerop_clockevent = { .name = "xen", .features = CLOCK_EVT_FEAT_ONESHOT, .max_delta_ns = 0xffffffff, .min_delta_ns = TIMER_SLOP, .mult = 1, .shift = 0, .rating = 500, .set_mode = xen_timerop_set_mode, .set_next_event = xen_timerop_set_next_event, }; static void xen_vcpuop_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { int cpu = smp_processor_id(); switch (mode) { case CLOCK_EVT_MODE_PERIODIC: WARN_ON(1); /* unsupported */ break; case CLOCK_EVT_MODE_ONESHOT: if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) BUG(); break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) BUG(); break; } } static int xen_vcpuop_set_next_event(unsigned long delta, struct clock_event_device *evt) { int cpu = smp_processor_id(); struct vcpu_set_singleshot_timer single; int ret; WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); single.timeout_abs_ns = get_abs_timeout(delta); single.flags = VCPU_SSHOTTMR_future; ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); BUG_ON(ret != 0 && ret != -ETIME); return ret; } static const struct clock_event_device xen_vcpuop_clockevent = { .name = "xen", .features = CLOCK_EVT_FEAT_ONESHOT, .max_delta_ns = 0xffffffff, .min_delta_ns = TIMER_SLOP, .mult = 1, .shift = 0, .rating = 500, .set_mode = xen_vcpuop_set_mode, .set_next_event = xen_vcpuop_set_next_event, }; static const struct clock_event_device *xen_clockevent = &xen_timerop_clockevent; static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); irqreturn_t ret; ret = IRQ_NONE; if (evt->event_handler) { evt->event_handler(evt); ret = IRQ_HANDLED; } return ret; } static void xen_setup_timer(int cpu) { const char *name; struct clock_event_device *evt; int irq; printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); name = kasprintf(GFP_KERNEL, "timer%d", cpu); if (!name) name = ""; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, name, NULL); evt = &get_cpu_var(xen_clock_events); memcpy(evt, xen_clockevent, sizeof(*evt)); evt->cpumask = cpumask_of_cpu(cpu); evt->irq = irq; clockevents_register_device(evt); put_cpu_var(xen_clock_events); } __init void xen_time_init(void) { int cpu = smp_processor_id(); get_time_values_from_xen(); clocksource_register(&xen_clocksource); if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { /* Successfully turned off 100hz tick, so we have the vcpuop-based timer interface */ printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); xen_clockevent = &xen_vcpuop_clockevent; } /* Set initial system time with full resolution */ xen_read_wallclock(&xtime); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); tsc_disable = 0; xen_setup_timer(cpu); }