diff options
| -rw-r--r-- | arch/x86/xen/suspend.c | 4 | ||||
| -rw-r--r-- | arch/x86/xen/time.c | 90 | ||||
| -rw-r--r-- | arch/x86/xen/xen-ops.h | 2 | ||||
| -rw-r--r-- | include/xen/interface/vcpu.h | 42 |
4 files changed, 137 insertions, 1 deletions
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d6b1680693a9..800ed36ecfba 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
| @@ -16,6 +16,8 @@ | |||
| 16 | 16 | ||
| 17 | void xen_arch_pre_suspend(void) | 17 | void xen_arch_pre_suspend(void) |
| 18 | { | 18 | { |
| 19 | xen_save_time_memory_area(); | ||
| 20 | |||
| 19 | if (xen_pv_domain()) | 21 | if (xen_pv_domain()) |
| 20 | xen_pv_pre_suspend(); | 22 | xen_pv_pre_suspend(); |
| 21 | } | 23 | } |
| @@ -26,6 +28,8 @@ void xen_arch_post_suspend(int cancelled) | |||
| 26 | xen_pv_post_suspend(cancelled); | 28 | xen_pv_post_suspend(cancelled); |
| 27 | else | 29 | else |
| 28 | xen_hvm_post_suspend(cancelled); | 30 | xen_hvm_post_suspend(cancelled); |
| 31 | |||
| 32 | xen_restore_time_memory_area(); | ||
| 29 | } | 33 | } |
| 30 | 34 | ||
| 31 | static void xen_vcpu_notify_restore(void *data) | 35 | static void xen_vcpu_notify_restore(void *data) |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c96e61fd70e7..c2041043c606 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
| @@ -370,6 +370,92 @@ static const struct pv_time_ops xen_time_ops __initconst = { | |||
| 370 | .steal_clock = xen_steal_clock, | 370 | .steal_clock = xen_steal_clock, |
| 371 | }; | 371 | }; |
| 372 | 372 | ||
| 373 | static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; | ||
| 374 | |||
| 375 | void xen_save_time_memory_area(void) | ||
| 376 | { | ||
| 377 | struct vcpu_register_time_memory_area t; | ||
| 378 | int ret; | ||
| 379 | |||
| 380 | if (!xen_clock) | ||
| 381 | return; | ||
| 382 | |||
| 383 | t.addr.v = NULL; | ||
| 384 | |||
| 385 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); | ||
| 386 | if (ret != 0) | ||
| 387 | pr_notice("Cannot save secondary vcpu_time_info (err %d)", | ||
| 388 | ret); | ||
| 389 | else | ||
| 390 | clear_page(xen_clock); | ||
| 391 | } | ||
| 392 | |||
| 393 | void xen_restore_time_memory_area(void) | ||
| 394 | { | ||
| 395 | struct vcpu_register_time_memory_area t; | ||
| 396 | int ret; | ||
| 397 | |||
| 398 | if (!xen_clock) | ||
| 399 | return; | ||
| 400 | |||
| 401 | t.addr.v = &xen_clock->pvti; | ||
| 402 | |||
| 403 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); | ||
| 404 | |||
| 405 | /* | ||
| 406 | * We don't disable VCLOCK_PVCLOCK entirely if it fails to register the | ||
| 407 | * secondary time info with Xen or if we migrated to a host without the | ||
| 408 | * necessary flags. On both of these cases what happens is either | ||
| 409 | * process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT | ||
| 410 | * bit set. Userspace checks the latter and if 0, it discards the data | ||
| 411 | * in pvti and fallbacks to a system call for a reliable timestamp. | ||
| 412 | */ | ||
| 413 | if (ret != 0) | ||
| 414 | pr_notice("Cannot restore secondary vcpu_time_info (err %d)", | ||
| 415 | ret); | ||
| 416 | } | ||
| 417 | |||
| 418 | static void xen_setup_vsyscall_time_info(void) | ||
| 419 | { | ||
| 420 | struct vcpu_register_time_memory_area t; | ||
| 421 | struct pvclock_vsyscall_time_info *ti; | ||
| 422 | int ret; | ||
| 423 | |||
| 424 | ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL); | ||
| 425 | if (!ti) | ||
| 426 | return; | ||
| 427 | |||
| 428 | t.addr.v = &ti->pvti; | ||
| 429 | |||
| 430 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); | ||
| 431 | if (ret) { | ||
| 432 | pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret); | ||
| 433 | free_page((unsigned long)ti); | ||
| 434 | return; | ||
| 435 | } | ||
| 436 | |||
| 437 | /* | ||
| 438 | * If primary time info had this bit set, secondary should too since | ||
| 439 | * it's the same data on both just different memory regions. But we | ||
| 440 | * still check it in case hypervisor is buggy. | ||
| 441 | */ | ||
| 442 | if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) { | ||
| 443 | t.addr.v = NULL; | ||
| 444 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, | ||
| 445 | 0, &t); | ||
| 446 | if (!ret) | ||
| 447 | free_page((unsigned long)ti); | ||
| 448 | |||
| 449 | pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n"); | ||
| 450 | return; | ||
| 451 | } | ||
| 452 | |||
| 453 | xen_clock = ti; | ||
| 454 | pvclock_set_pvti_cpu0_va(xen_clock); | ||
| 455 | |||
| 456 | xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK; | ||
| 457 | } | ||
| 458 | |||
| 373 | static void __init xen_time_init(void) | 459 | static void __init xen_time_init(void) |
| 374 | { | 460 | { |
| 375 | struct pvclock_vcpu_time_info *pvti; | 461 | struct pvclock_vcpu_time_info *pvti; |
| @@ -401,8 +487,10 @@ static void __init xen_time_init(void) | |||
| 401 | * bit is supported hence speeding up Xen clocksource. | 487 | * bit is supported hence speeding up Xen clocksource. |
| 402 | */ | 488 | */ |
| 403 | pvti = &__this_cpu_read(xen_vcpu)->time; | 489 | pvti = &__this_cpu_read(xen_vcpu)->time; |
| 404 | if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) | 490 | if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) { |
| 405 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | 491 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); |
| 492 | xen_setup_vsyscall_time_info(); | ||
| 493 | } | ||
| 406 | 494 | ||
| 407 | xen_setup_runstate_info(cpu); | 495 | xen_setup_runstate_info(cpu); |
| 408 | xen_setup_timer(cpu); | 496 | xen_setup_timer(cpu); |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c8a6d224f7ed..f96dbedb33d4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
| @@ -69,6 +69,8 @@ void xen_setup_runstate_info(int cpu); | |||
| 69 | void xen_teardown_timer(int cpu); | 69 | void xen_teardown_timer(int cpu); |
| 70 | u64 xen_clocksource_read(void); | 70 | u64 xen_clocksource_read(void); |
| 71 | void xen_setup_cpu_clockevents(void); | 71 | void xen_setup_cpu_clockevents(void); |
| 72 | void xen_save_time_memory_area(void); | ||
| 73 | void xen_restore_time_memory_area(void); | ||
| 72 | void __init xen_init_time_ops(void); | 74 | void __init xen_init_time_ops(void); |
| 73 | void __init xen_hvm_init_time_ops(void); | 75 | void __init xen_hvm_init_time_ops(void); |
| 74 | 76 | ||
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index 98188c87f5c1..504c71601511 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h | |||
| @@ -178,4 +178,46 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); | |||
| 178 | 178 | ||
| 179 | /* Send an NMI to the specified VCPU. @extra_arg == NULL. */ | 179 | /* Send an NMI to the specified VCPU. @extra_arg == NULL. */ |
| 180 | #define VCPUOP_send_nmi 11 | 180 | #define VCPUOP_send_nmi 11 |
| 181 | |||
| 182 | /* | ||
| 183 | * Get the physical ID information for a pinned vcpu's underlying physical | ||
| 184 | * processor. The physical ID informmation is architecture-specific. | ||
| 185 | * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. | ||
| 186 | * This command returns -EINVAL if it is not a valid operation for this VCPU. | ||
| 187 | */ | ||
| 188 | #define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ | ||
| 189 | struct vcpu_get_physid { | ||
| 190 | uint64_t phys_id; | ||
| 191 | }; | ||
| 192 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_get_physid); | ||
| 193 | #define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) | ||
| 194 | #define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Register a memory location to get a secondary copy of the vcpu time | ||
| 198 | * parameters. The master copy still exists as part of the vcpu shared | ||
| 199 | * memory area, and this secondary copy is updated whenever the master copy | ||
| 200 | * is updated (and using the same versioning scheme for synchronisation). | ||
| 201 | * | ||
| 202 | * The intent is that this copy may be mapped (RO) into userspace so | ||
| 203 | * that usermode can compute system time using the time info and the | ||
| 204 | * tsc. Usermode will see an array of vcpu_time_info structures, one | ||
| 205 | * for each vcpu, and choose the right one by an existing mechanism | ||
| 206 | * which allows it to get the current vcpu number (such as via a | ||
| 207 | * segment limit). It can then apply the normal algorithm to compute | ||
| 208 | * system time from the tsc. | ||
| 209 | * | ||
| 210 | * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. | ||
| 211 | */ | ||
| 212 | #define VCPUOP_register_vcpu_time_memory_area 13 | ||
| 213 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info); | ||
| 214 | struct vcpu_register_time_memory_area { | ||
| 215 | union { | ||
| 216 | GUEST_HANDLE(vcpu_time_info) h; | ||
| 217 | struct pvclock_vcpu_time_info *v; | ||
| 218 | uint64_t p; | ||
| 219 | } addr; | ||
| 220 | }; | ||
| 221 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area); | ||
| 222 | |||
| 181 | #endif /* __XEN_PUBLIC_VCPU_H__ */ | 223 | #endif /* __XEN_PUBLIC_VCPU_H__ */ |
