aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 20:16:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 20:16:21 -0400
commit98edb6ca4174f17a64890a02f44c211c8b44fb3c (patch)
tree033bc5f7da410046d28dd1cefcd2d63cda33d25b /arch/x86/kernel
parenta8251096b427283c47e7d8f9568be6b388dd68ec (diff)
parent8fbf065d625617bbbf6b72d5f78f84ad13c8b547 (diff)
Merge branch 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (269 commits) KVM: x86: Add missing locking to arch specific vcpu ioctls KVM: PPC: Add missing vcpu_load()/vcpu_put() in vcpu ioctls KVM: MMU: Segregate shadow pages with different cr0.wp KVM: x86: Check LMA bit before set_efer KVM: Don't allow lmsw to clear cr0.pe KVM: Add cpuid.txt file KVM: x86: Tell the guest we'll warn it about tsc stability x86, paravirt: don't compute pvclock adjustments if we trust the tsc x86: KVM guest: Try using new kvm clock msrs KVM: x86: export paravirtual cpuid flags in KVM_GET_SUPPORTED_CPUID KVM: x86: add new KVMCLOCK cpuid feature KVM: x86: change msr numbers for kvmclock x86, paravirt: Add a global synchronization point for pvclock x86, paravirt: Enable pvclock flags in vcpu_time_info structure KVM: x86: Inject #GP with the right rip on efer writes KVM: SVM: Don't allow nested guest to VMMCALL into host KVM: x86: Fix exception reinjection forced to true KVM: Fix wallclock version writing race KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots KVM: VMX: enable VMXON check with SMX enabled (Intel TXT) ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/kvmclock.c56
-rw-r--r--arch/x86/kernel/pvclock.c37
-rw-r--r--arch/x86/kernel/tboot.c1
3 files changed, 73 insertions, 21 deletions
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index feaeb0d3aa4f..eb9b76c716c2 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,6 +29,8 @@
29#define KVM_SCALE 22 29#define KVM_SCALE 22
30 30
31static int kvmclock = 1; 31static int kvmclock = 1;
32static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
33static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
32 34
33static int parse_no_kvmclock(char *arg) 35static int parse_no_kvmclock(char *arg)
34{ 36{
@@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void)
54 56
55 low = (int)__pa_symbol(&wall_clock); 57 low = (int)__pa_symbol(&wall_clock);
56 high = ((u64)__pa_symbol(&wall_clock) >> 32); 58 high = ((u64)__pa_symbol(&wall_clock) >> 32);
57 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 59
60 native_write_msr(msr_kvm_wall_clock, low, high);
58 61
59 vcpu_time = &get_cpu_var(hv_clock); 62 vcpu_time = &get_cpu_var(hv_clock);
60 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); 63 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
@@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt)
130 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 133 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
131 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 134 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
132 cpu, high, low, txt); 135 cpu, high, low, txt);
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 136
137 return native_write_msr_safe(msr_kvm_system_time, low, high);
134} 138}
135 139
136#ifdef CONFIG_X86_LOCAL_APIC 140#ifdef CONFIG_X86_LOCAL_APIC
@@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void)
165#ifdef CONFIG_KEXEC 169#ifdef CONFIG_KEXEC
166static void kvm_crash_shutdown(struct pt_regs *regs) 170static void kvm_crash_shutdown(struct pt_regs *regs)
167{ 171{
168 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); 172 native_write_msr(msr_kvm_system_time, 0, 0);
169 native_machine_crash_shutdown(regs); 173 native_machine_crash_shutdown(regs);
170} 174}
171#endif 175#endif
172 176
173static void kvm_shutdown(void) 177static void kvm_shutdown(void)
174{ 178{
175 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); 179 native_write_msr(msr_kvm_system_time, 0, 0);
176 native_machine_shutdown(); 180 native_machine_shutdown();
177} 181}
178 182
@@ -181,27 +185,37 @@ void __init kvmclock_init(void)
181 if (!kvm_para_available()) 185 if (!kvm_para_available())
182 return; 186 return;
183 187
184 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 188 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
185 if (kvm_register_clock("boot clock")) 189 msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
186 return; 190 msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
187 pv_time_ops.sched_clock = kvm_clock_read; 191 } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)))
188 x86_platform.calibrate_tsc = kvm_get_tsc_khz; 192 return;
189 x86_platform.get_wallclock = kvm_get_wallclock; 193
190 x86_platform.set_wallclock = kvm_set_wallclock; 194 printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
195 msr_kvm_system_time, msr_kvm_wall_clock);
196
197 if (kvm_register_clock("boot clock"))
198 return;
199 pv_time_ops.sched_clock = kvm_clock_read;
200 x86_platform.calibrate_tsc = kvm_get_tsc_khz;
201 x86_platform.get_wallclock = kvm_get_wallclock;
202 x86_platform.set_wallclock = kvm_set_wallclock;
191#ifdef CONFIG_X86_LOCAL_APIC 203#ifdef CONFIG_X86_LOCAL_APIC
192 x86_cpuinit.setup_percpu_clockev = 204 x86_cpuinit.setup_percpu_clockev =
193 kvm_setup_secondary_clock; 205 kvm_setup_secondary_clock;
194#endif 206#endif
195#ifdef CONFIG_SMP 207#ifdef CONFIG_SMP
196 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 208 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
197#endif 209#endif
198 machine_ops.shutdown = kvm_shutdown; 210 machine_ops.shutdown = kvm_shutdown;
199#ifdef CONFIG_KEXEC 211#ifdef CONFIG_KEXEC
200 machine_ops.crash_shutdown = kvm_crash_shutdown; 212 machine_ops.crash_shutdown = kvm_crash_shutdown;
201#endif 213#endif
202 kvm_get_preset_lpj(); 214 kvm_get_preset_lpj();
203 clocksource_register(&kvm_clock); 215 clocksource_register(&kvm_clock);
204 pv_info.paravirt_enabled = 1; 216 pv_info.paravirt_enabled = 1;
205 pv_info.name = "KVM"; 217 pv_info.name = "KVM";
206 } 218
219 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
220 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
207} 221}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 03801f2f761f..239427ca02af 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -31,8 +31,16 @@ struct pvclock_shadow_time {
31 u32 tsc_to_nsec_mul; 31 u32 tsc_to_nsec_mul;
32 int tsc_shift; 32 int tsc_shift;
33 u32 version; 33 u32 version;
34 u8 flags;
34}; 35};
35 36
37static u8 valid_flags __read_mostly = 0;
38
39void pvclock_set_flags(u8 flags)
40{
41 valid_flags = flags;
42}
43
36/* 44/*
37 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 45 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
38 * yielding a 64-bit result. 46 * yielding a 64-bit result.
@@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
91 dst->system_timestamp = src->system_time; 99 dst->system_timestamp = src->system_time;
92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul; 100 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
93 dst->tsc_shift = src->tsc_shift; 101 dst->tsc_shift = src->tsc_shift;
102 dst->flags = src->flags;
94 rmb(); /* test version after fetching data */ 103 rmb(); /* test version after fetching data */
95 } while ((src->version & 1) || (dst->version != src->version)); 104 } while ((src->version & 1) || (dst->version != src->version));
96 105
@@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
109 return pv_tsc_khz; 118 return pv_tsc_khz;
110} 119}
111 120
121static atomic64_t last_value = ATOMIC64_INIT(0);
122
112cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 123cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
113{ 124{
114 struct pvclock_shadow_time shadow; 125 struct pvclock_shadow_time shadow;
115 unsigned version; 126 unsigned version;
116 cycle_t ret, offset; 127 cycle_t ret, offset;
128 u64 last;
117 129
118 do { 130 do {
119 version = pvclock_get_time_values(&shadow, src); 131 version = pvclock_get_time_values(&shadow, src);
@@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
123 barrier(); 135 barrier();
124 } while (version != src->version); 136 } while (version != src->version);
125 137
138 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
139 (shadow.flags & PVCLOCK_TSC_STABLE_BIT))
140 return ret;
141
142 /*
143 * Assumption here is that last_value, a global accumulator, always goes
144 * forward. If we are less than that, we should not be much smaller.
145 * We assume there is an error marging we're inside, and then the correction
146 * does not sacrifice accuracy.
147 *
148 * For reads: global may have changed between test and return,
149 * but this means someone else updated poked the clock at a later time.
150 * We just need to make sure we are not seeing a backwards event.
151 *
152 * For updates: last_value = ret is not enough, since two vcpus could be
153 * updating at the same time, and one of them could be slightly behind,
154 * making the assumption that last_value always go forward fail to hold.
155 */
156 last = atomic64_read(&last_value);
157 do {
158 if (ret < last)
159 return last;
160 last = atomic64_cmpxchg(&last_value, last, ret);
161 } while (unlikely(last != ret));
162
126 return ret; 163 return ret;
127} 164}
128 165
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index cc2c60474fd0..c2f1b26141e2 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -46,6 +46,7 @@
46 46
47/* Global pointer to shared data; NULL means no measured launch. */ 47/* Global pointer to shared data; NULL means no measured launch. */
48struct tboot *tboot __read_mostly; 48struct tboot *tboot __read_mostly;
49EXPORT_SYMBOL(tboot);
49 50
50/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ 51/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
51#define AP_WAIT_TIMEOUT 1 52#define AP_WAIT_TIMEOUT 1