diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 18:31:08 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 18:31:08 -0500 |
| commit | 66cdd0ceaf65a18996f561b770eedde1d123b019 (patch) | |
| tree | 4892eaa422d366fce5d1e866ff1fe0988af95569 /arch/x86/kernel | |
| parent | 896ea17d3da5f44b2625c9cda9874d7dfe447393 (diff) | |
| parent | 58b7825bc324da55415034a9f6ca5d716b8fd898 (diff) | |
Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti:
"Considerable KVM/PPC work, x86 kvmclock vsyscall support,
IA32_TSC_ADJUST MSR emulation, amongst others."
Fix up trivial conflict in kernel/sched/core.c due to cross-cpu
migration notifier added next to rq migration call-back.
* tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits)
KVM: emulator: fix real mode segment checks in address linearization
VMX: remove unneeded enable_unrestricted_guest check
KVM: VMX: fix DPL during entry to protected mode
x86/kexec: crash_vmclear_local_vmcss needs __rcu
kvm: Fix irqfd resampler list walk
KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump
x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary
KVM: MMU: optimize for set_spte
KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface
KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation
KVM: PPC: bookehv: Add guest computation mode for irq delivery
KVM: PPC: Make EPCR a valid field for booke64 and bookehv
KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit
KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation
KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation
KVM: PPC: e500: Add emulation helper for getting instruction ea
KVM: PPC: bookehv64: Add support for interrupt handling
KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler
KVM: PPC: booke: Fix get_tb() compile error on 64-bit
KVM: PPC: e500: Silence bogus GCC warning in tlb code
...
Diffstat (limited to 'arch/x86/kernel')
| -rw-r--r-- | arch/x86/kernel/crash.c | 32 | ||||
| -rw-r--r-- | arch/x86/kernel/kvm.c | 20 | ||||
| -rw-r--r-- | arch/x86/kernel/kvmclock.c | 88 | ||||
| -rw-r--r-- | arch/x86/kernel/pvclock.c | 143 |
4 files changed, 214 insertions, 69 deletions
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 13ad89971d4..74467feb4dc 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
| 17 | #include <linux/elf.h> | 17 | #include <linux/elf.h> |
| 18 | #include <linux/elfcore.h> | 18 | #include <linux/elfcore.h> |
| 19 | #include <linux/module.h> | ||
| 19 | 20 | ||
| 20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
| 21 | #include <asm/hardirq.h> | 22 | #include <asm/hardirq.h> |
| @@ -30,6 +31,27 @@ | |||
| 30 | 31 | ||
| 31 | int in_crash_kexec; | 32 | int in_crash_kexec; |
| 32 | 33 | ||
| 34 | /* | ||
| 35 | * This is used to VMCLEAR all VMCSs loaded on the | ||
| 36 | * processor. And when loading kvm_intel module, the | ||
| 37 | * callback function pointer will be assigned. | ||
| 38 | * | ||
| 39 | * protected by rcu. | ||
| 40 | */ | ||
| 41 | crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; | ||
| 42 | EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); | ||
| 43 | |||
| 44 | static inline void cpu_crash_vmclear_loaded_vmcss(void) | ||
| 45 | { | ||
| 46 | crash_vmclear_fn *do_vmclear_operation = NULL; | ||
| 47 | |||
| 48 | rcu_read_lock(); | ||
| 49 | do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); | ||
| 50 | if (do_vmclear_operation) | ||
| 51 | do_vmclear_operation(); | ||
| 52 | rcu_read_unlock(); | ||
| 53 | } | ||
| 54 | |||
| 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 55 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
| 34 | 56 | ||
| 35 | static void kdump_nmi_callback(int cpu, struct pt_regs *regs) | 57 | static void kdump_nmi_callback(int cpu, struct pt_regs *regs) |
| @@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) | |||
| 46 | #endif | 68 | #endif |
| 47 | crash_save_cpu(regs, cpu); | 69 | crash_save_cpu(regs, cpu); |
| 48 | 70 | ||
| 71 | /* | ||
| 72 | * VMCLEAR VMCSs loaded on all cpus if needed. | ||
| 73 | */ | ||
| 74 | cpu_crash_vmclear_loaded_vmcss(); | ||
| 75 | |||
| 49 | /* Disable VMX or SVM if needed. | 76 | /* Disable VMX or SVM if needed. |
| 50 | * | 77 | * |
| 51 | * We need to disable virtualization on all CPUs. | 78 | * We need to disable virtualization on all CPUs. |
| @@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
| 88 | 115 | ||
| 89 | kdump_nmi_shootdown_cpus(); | 116 | kdump_nmi_shootdown_cpus(); |
| 90 | 117 | ||
| 118 | /* | ||
| 119 | * VMCLEAR VMCSs loaded on this cpu if needed. | ||
| 120 | */ | ||
| 121 | cpu_crash_vmclear_loaded_vmcss(); | ||
| 122 | |||
| 91 | /* Booting kdump kernel with VMX or SVM enabled won't work, | 123 | /* Booting kdump kernel with VMX or SVM enabled won't work, |
| 92 | * because (among other limitations) we can't disable paging | 124 | * because (among other limitations) we can't disable paging |
| 93 | * with the virt flags. | 125 | * with the virt flags. |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4180a874c76..08b973f6403 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -42,6 +42,7 @@ | |||
| 42 | #include <asm/apic.h> | 42 | #include <asm/apic.h> |
| 43 | #include <asm/apicdef.h> | 43 | #include <asm/apicdef.h> |
| 44 | #include <asm/hypervisor.h> | 44 | #include <asm/hypervisor.h> |
| 45 | #include <asm/kvm_guest.h> | ||
| 45 | 46 | ||
| 46 | static int kvmapf = 1; | 47 | static int kvmapf = 1; |
| 47 | 48 | ||
| @@ -62,6 +63,15 @@ static int parse_no_stealacc(char *arg) | |||
| 62 | 63 | ||
| 63 | early_param("no-steal-acc", parse_no_stealacc); | 64 | early_param("no-steal-acc", parse_no_stealacc); |
| 64 | 65 | ||
| 66 | static int kvmclock_vsyscall = 1; | ||
| 67 | static int parse_no_kvmclock_vsyscall(char *arg) | ||
| 68 | { | ||
| 69 | kvmclock_vsyscall = 0; | ||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | |||
| 73 | early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); | ||
| 74 | |||
| 65 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | 75 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
| 66 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); | 76 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); |
| 67 | static int has_steal_clock = 0; | 77 | static int has_steal_clock = 0; |
| @@ -110,11 +120,6 @@ void kvm_async_pf_task_wait(u32 token) | |||
| 110 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | 120 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; |
| 111 | struct kvm_task_sleep_node n, *e; | 121 | struct kvm_task_sleep_node n, *e; |
| 112 | DEFINE_WAIT(wait); | 122 | DEFINE_WAIT(wait); |
| 113 | int cpu, idle; | ||
| 114 | |||
| 115 | cpu = get_cpu(); | ||
| 116 | idle = idle_cpu(cpu); | ||
| 117 | put_cpu(); | ||
| 118 | 123 | ||
| 119 | spin_lock(&b->lock); | 124 | spin_lock(&b->lock); |
| 120 | e = _find_apf_task(b, token); | 125 | e = _find_apf_task(b, token); |
| @@ -128,7 +133,7 @@ void kvm_async_pf_task_wait(u32 token) | |||
| 128 | 133 | ||
| 129 | n.token = token; | 134 | n.token = token; |
| 130 | n.cpu = smp_processor_id(); | 135 | n.cpu = smp_processor_id(); |
| 131 | n.halted = idle || preempt_count() > 1; | 136 | n.halted = is_idle_task(current) || preempt_count() > 1; |
| 132 | init_waitqueue_head(&n.wq); | 137 | init_waitqueue_head(&n.wq); |
| 133 | hlist_add_head(&n.link, &b->list); | 138 | hlist_add_head(&n.link, &b->list); |
| 134 | spin_unlock(&b->lock); | 139 | spin_unlock(&b->lock); |
| @@ -471,6 +476,9 @@ void __init kvm_guest_init(void) | |||
| 471 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | 476 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
| 472 | apic_set_eoi_write(kvm_guest_apic_eoi_write); | 477 | apic_set_eoi_write(kvm_guest_apic_eoi_write); |
| 473 | 478 | ||
| 479 | if (kvmclock_vsyscall) | ||
| 480 | kvm_setup_vsyscall_timeinfo(); | ||
| 481 | |||
| 474 | #ifdef CONFIG_SMP | 482 | #ifdef CONFIG_SMP |
| 475 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 483 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
| 476 | register_cpu_notifier(&kvm_cpu_notifier); | 484 | register_cpu_notifier(&kvm_cpu_notifier); |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f1b42b3a186..220a360010f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
| 24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
| 25 | #include <linux/hardirq.h> | 25 | #include <linux/hardirq.h> |
| 26 | #include <linux/memblock.h> | ||
| 26 | 27 | ||
| 27 | #include <asm/x86_init.h> | 28 | #include <asm/x86_init.h> |
| 28 | #include <asm/reboot.h> | 29 | #include <asm/reboot.h> |
| @@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg) | |||
| 39 | early_param("no-kvmclock", parse_no_kvmclock); | 40 | early_param("no-kvmclock", parse_no_kvmclock); |
| 40 | 41 | ||
| 41 | /* The hypervisor will put information about time periodically here */ | 42 | /* The hypervisor will put information about time periodically here */ |
| 42 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); | 43 | static struct pvclock_vsyscall_time_info *hv_clock; |
| 43 | static struct pvclock_wall_clock wall_clock; | 44 | static struct pvclock_wall_clock wall_clock; |
| 44 | 45 | ||
| 45 | /* | 46 | /* |
| @@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void) | |||
| 52 | struct pvclock_vcpu_time_info *vcpu_time; | 53 | struct pvclock_vcpu_time_info *vcpu_time; |
| 53 | struct timespec ts; | 54 | struct timespec ts; |
| 54 | int low, high; | 55 | int low, high; |
| 56 | int cpu; | ||
| 55 | 57 | ||
| 56 | low = (int)__pa_symbol(&wall_clock); | 58 | low = (int)__pa_symbol(&wall_clock); |
| 57 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 59 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
| 58 | 60 | ||
| 59 | native_write_msr(msr_kvm_wall_clock, low, high); | 61 | native_write_msr(msr_kvm_wall_clock, low, high); |
| 60 | 62 | ||
| 61 | vcpu_time = &get_cpu_var(hv_clock); | 63 | preempt_disable(); |
| 64 | cpu = smp_processor_id(); | ||
| 65 | |||
| 66 | vcpu_time = &hv_clock[cpu].pvti; | ||
| 62 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 67 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
| 63 | put_cpu_var(hv_clock); | 68 | |
| 69 | preempt_enable(); | ||
| 64 | 70 | ||
| 65 | return ts.tv_sec; | 71 | return ts.tv_sec; |
| 66 | } | 72 | } |
| @@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void) | |||
| 74 | { | 80 | { |
| 75 | struct pvclock_vcpu_time_info *src; | 81 | struct pvclock_vcpu_time_info *src; |
| 76 | cycle_t ret; | 82 | cycle_t ret; |
| 83 | int cpu; | ||
| 77 | 84 | ||
| 78 | preempt_disable_notrace(); | 85 | preempt_disable_notrace(); |
| 79 | src = &__get_cpu_var(hv_clock); | 86 | cpu = smp_processor_id(); |
| 87 | src = &hv_clock[cpu].pvti; | ||
| 80 | ret = pvclock_clocksource_read(src); | 88 | ret = pvclock_clocksource_read(src); |
| 81 | preempt_enable_notrace(); | 89 | preempt_enable_notrace(); |
| 82 | return ret; | 90 | return ret; |
| @@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs) | |||
| 99 | static unsigned long kvm_get_tsc_khz(void) | 107 | static unsigned long kvm_get_tsc_khz(void) |
| 100 | { | 108 | { |
| 101 | struct pvclock_vcpu_time_info *src; | 109 | struct pvclock_vcpu_time_info *src; |
| 102 | src = &per_cpu(hv_clock, 0); | 110 | int cpu; |
| 103 | return pvclock_tsc_khz(src); | 111 | unsigned long tsc_khz; |
| 112 | |||
| 113 | preempt_disable(); | ||
| 114 | cpu = smp_processor_id(); | ||
| 115 | src = &hv_clock[cpu].pvti; | ||
| 116 | tsc_khz = pvclock_tsc_khz(src); | ||
| 117 | preempt_enable(); | ||
| 118 | return tsc_khz; | ||
| 104 | } | 119 | } |
| 105 | 120 | ||
| 106 | static void kvm_get_preset_lpj(void) | 121 | static void kvm_get_preset_lpj(void) |
| @@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void) | |||
| 119 | { | 134 | { |
| 120 | bool ret = false; | 135 | bool ret = false; |
| 121 | struct pvclock_vcpu_time_info *src; | 136 | struct pvclock_vcpu_time_info *src; |
| 137 | int cpu = smp_processor_id(); | ||
| 122 | 138 | ||
| 123 | src = &__get_cpu_var(hv_clock); | 139 | if (!hv_clock) |
| 140 | return ret; | ||
| 141 | |||
| 142 | src = &hv_clock[cpu].pvti; | ||
| 124 | if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { | 143 | if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { |
| 125 | __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); | 144 | src->flags &= ~PVCLOCK_GUEST_STOPPED; |
| 126 | ret = true; | 145 | ret = true; |
| 127 | } | 146 | } |
| 128 | 147 | ||
| @@ -141,9 +160,10 @@ int kvm_register_clock(char *txt) | |||
| 141 | { | 160 | { |
| 142 | int cpu = smp_processor_id(); | 161 | int cpu = smp_processor_id(); |
| 143 | int low, high, ret; | 162 | int low, high, ret; |
| 163 | struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; | ||
| 144 | 164 | ||
| 145 | low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; | 165 | low = (int)__pa(src) | 1; |
| 146 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 166 | high = ((u64)__pa(src) >> 32); |
| 147 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); | 167 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); |
| 148 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 168 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
| 149 | cpu, high, low, txt); | 169 | cpu, high, low, txt); |
| @@ -197,6 +217,8 @@ static void kvm_shutdown(void) | |||
| 197 | 217 | ||
| 198 | void __init kvmclock_init(void) | 218 | void __init kvmclock_init(void) |
| 199 | { | 219 | { |
| 220 | unsigned long mem; | ||
| 221 | |||
| 200 | if (!kvm_para_available()) | 222 | if (!kvm_para_available()) |
| 201 | return; | 223 | return; |
| 202 | 224 | ||
| @@ -209,8 +231,18 @@ void __init kvmclock_init(void) | |||
| 209 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", | 231 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
| 210 | msr_kvm_system_time, msr_kvm_wall_clock); | 232 | msr_kvm_system_time, msr_kvm_wall_clock); |
| 211 | 233 | ||
| 212 | if (kvm_register_clock("boot clock")) | 234 | mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS, |
| 235 | PAGE_SIZE); | ||
| 236 | if (!mem) | ||
| 237 | return; | ||
| 238 | hv_clock = __va(mem); | ||
| 239 | |||
| 240 | if (kvm_register_clock("boot clock")) { | ||
| 241 | hv_clock = NULL; | ||
| 242 | memblock_free(mem, | ||
| 243 | sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | ||
| 213 | return; | 244 | return; |
| 245 | } | ||
| 214 | pv_time_ops.sched_clock = kvm_clock_read; | 246 | pv_time_ops.sched_clock = kvm_clock_read; |
| 215 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 247 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; |
| 216 | x86_platform.get_wallclock = kvm_get_wallclock; | 248 | x86_platform.get_wallclock = kvm_get_wallclock; |
| @@ -233,3 +265,37 @@ void __init kvmclock_init(void) | |||
| 233 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | 265 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) |
| 234 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | 266 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); |
| 235 | } | 267 | } |
| 268 | |||
| 269 | int __init kvm_setup_vsyscall_timeinfo(void) | ||
| 270 | { | ||
| 271 | #ifdef CONFIG_X86_64 | ||
| 272 | int cpu; | ||
| 273 | int ret; | ||
| 274 | u8 flags; | ||
| 275 | struct pvclock_vcpu_time_info *vcpu_time; | ||
| 276 | unsigned int size; | ||
| 277 | |||
| 278 | size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS; | ||
| 279 | |||
| 280 | preempt_disable(); | ||
| 281 | cpu = smp_processor_id(); | ||
| 282 | |||
| 283 | vcpu_time = &hv_clock[cpu].pvti; | ||
| 284 | flags = pvclock_read_flags(vcpu_time); | ||
| 285 | |||
| 286 | if (!(flags & PVCLOCK_TSC_STABLE_BIT)) { | ||
| 287 | preempt_enable(); | ||
| 288 | return 1; | ||
| 289 | } | ||
| 290 | |||
| 291 | if ((ret = pvclock_init_vsyscall(hv_clock, size))) { | ||
| 292 | preempt_enable(); | ||
| 293 | return ret; | ||
| 294 | } | ||
| 295 | |||
| 296 | preempt_enable(); | ||
| 297 | |||
| 298 | kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; | ||
| 299 | #endif | ||
| 300 | return 0; | ||
| 301 | } | ||
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 42eb3300dfc..85c39590c1a 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
| @@ -17,23 +17,13 @@ | |||
| 17 | 17 | ||
| 18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
| 19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
| 20 | #include <linux/notifier.h> | ||
| 21 | #include <linux/sched.h> | ||
| 22 | #include <linux/gfp.h> | ||
| 23 | #include <linux/bootmem.h> | ||
| 24 | #include <asm/fixmap.h> | ||
| 20 | #include <asm/pvclock.h> | 25 | #include <asm/pvclock.h> |
| 21 | 26 | ||
| 22 | /* | ||
| 23 | * These are perodically updated | ||
| 24 | * xen: magic shared_info page | ||
| 25 | * kvm: gpa registered via msr | ||
| 26 | * and then copied here. | ||
| 27 | */ | ||
| 28 | struct pvclock_shadow_time { | ||
| 29 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
| 30 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
| 31 | u32 tsc_to_nsec_mul; | ||
| 32 | int tsc_shift; | ||
| 33 | u32 version; | ||
| 34 | u8 flags; | ||
| 35 | }; | ||
| 36 | |||
| 37 | static u8 valid_flags __read_mostly = 0; | 27 | static u8 valid_flags __read_mostly = 0; |
| 38 | 28 | ||
| 39 | void pvclock_set_flags(u8 flags) | 29 | void pvclock_set_flags(u8 flags) |
| @@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags) | |||
| 41 | valid_flags = flags; | 31 | valid_flags = flags; |
| 42 | } | 32 | } |
| 43 | 33 | ||
| 44 | static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) | ||
| 45 | { | ||
| 46 | u64 delta = native_read_tsc() - shadow->tsc_timestamp; | ||
| 47 | return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul, | ||
| 48 | shadow->tsc_shift); | ||
| 49 | } | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Reads a consistent set of time-base values from hypervisor, | ||
| 53 | * into a shadow data area. | ||
| 54 | */ | ||
| 55 | static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | ||
| 56 | struct pvclock_vcpu_time_info *src) | ||
| 57 | { | ||
| 58 | do { | ||
| 59 | dst->version = src->version; | ||
| 60 | rmb(); /* fetch version before data */ | ||
| 61 | dst->tsc_timestamp = src->tsc_timestamp; | ||
| 62 | dst->system_timestamp = src->system_time; | ||
| 63 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
| 64 | dst->tsc_shift = src->tsc_shift; | ||
| 65 | dst->flags = src->flags; | ||
| 66 | rmb(); /* test version after fetching data */ | ||
| 67 | } while ((src->version & 1) || (dst->version != src->version)); | ||
| 68 | |||
| 69 | return dst->version; | ||
| 70 | } | ||
| 71 | |||
| 72 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | 34 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) |
| 73 | { | 35 | { |
| 74 | u64 pv_tsc_khz = 1000000ULL << 32; | 36 | u64 pv_tsc_khz = 1000000ULL << 32; |
| @@ -88,23 +50,32 @@ void pvclock_resume(void) | |||
| 88 | atomic64_set(&last_value, 0); | 50 | atomic64_set(&last_value, 0); |
| 89 | } | 51 | } |
| 90 | 52 | ||
| 53 | u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) | ||
| 54 | { | ||
| 55 | unsigned version; | ||
| 56 | cycle_t ret; | ||
| 57 | u8 flags; | ||
| 58 | |||
| 59 | do { | ||
| 60 | version = __pvclock_read_cycles(src, &ret, &flags); | ||
| 61 | } while ((src->version & 1) || version != src->version); | ||
| 62 | |||
| 63 | return flags & valid_flags; | ||
| 64 | } | ||
| 65 | |||
| 91 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 66 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
| 92 | { | 67 | { |
| 93 | struct pvclock_shadow_time shadow; | ||
| 94 | unsigned version; | 68 | unsigned version; |
| 95 | cycle_t ret, offset; | 69 | cycle_t ret; |
| 96 | u64 last; | 70 | u64 last; |
| 71 | u8 flags; | ||
| 97 | 72 | ||
| 98 | do { | 73 | do { |
| 99 | version = pvclock_get_time_values(&shadow, src); | 74 | version = __pvclock_read_cycles(src, &ret, &flags); |
| 100 | barrier(); | 75 | } while ((src->version & 1) || version != src->version); |
| 101 | offset = pvclock_get_nsec_offset(&shadow); | ||
| 102 | ret = shadow.system_timestamp + offset; | ||
| 103 | barrier(); | ||
| 104 | } while (version != src->version); | ||
| 105 | 76 | ||
| 106 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | 77 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && |
| 107 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | 78 | (flags & PVCLOCK_TSC_STABLE_BIT)) |
| 108 | return ret; | 79 | return ret; |
| 109 | 80 | ||
| 110 | /* | 81 | /* |
| @@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
| 156 | 127 | ||
| 157 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
| 158 | } | 129 | } |
| 130 | |||
| 131 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
| 132 | |||
| 133 | static struct pvclock_vsyscall_time_info * | ||
| 134 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
| 135 | { | ||
| 136 | if (!pvclock_vdso_info) { | ||
| 137 | BUG(); | ||
| 138 | return NULL; | ||
| 139 | } | ||
| 140 | |||
| 141 | return &pvclock_vdso_info[cpu]; | ||
| 142 | } | ||
| 143 | |||
| 144 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
| 145 | { | ||
| 146 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
| 147 | } | ||
| 148 | |||
| 149 | #ifdef CONFIG_X86_64 | ||
| 150 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
| 151 | void *v) | ||
| 152 | { | ||
| 153 | struct task_migration_notifier *mn = v; | ||
| 154 | struct pvclock_vsyscall_time_info *pvti; | ||
| 155 | |||
| 156 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
| 157 | |||
| 158 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
| 159 | if (unlikely(pvti == NULL)) | ||
| 160 | return NOTIFY_DONE; | ||
| 161 | |||
| 162 | pvti->migrate_count++; | ||
| 163 | |||
| 164 | return NOTIFY_DONE; | ||
| 165 | } | ||
| 166 | |||
| 167 | static struct notifier_block pvclock_migrate = { | ||
| 168 | .notifier_call = pvclock_task_migrate, | ||
| 169 | }; | ||
| 170 | |||
| 171 | /* | ||
| 172 | * Initialize the generic pvclock vsyscall state. This will allocate | ||
| 173 | * a/some page(s) for the per-vcpu pvclock information, set up a | ||
| 174 | * fixmap mapping for the page(s) | ||
| 175 | */ | ||
| 176 | |||
| 177 | int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | ||
| 178 | int size) | ||
| 179 | { | ||
| 180 | int idx; | ||
| 181 | |||
| 182 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | ||
| 183 | |||
| 184 | pvclock_vdso_info = i; | ||
| 185 | |||
| 186 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | ||
| 187 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | ||
| 188 | __pa_symbol(i) + (idx*PAGE_SIZE), | ||
| 189 | PAGE_KERNEL_VVAR); | ||
| 190 | } | ||
| 191 | |||
| 192 | |||
| 193 | register_task_migration_notifier(&pvclock_migrate); | ||
| 194 | |||
| 195 | return 0; | ||
| 196 | } | ||
| 197 | #endif | ||
