aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c1680
1 files changed, 1286 insertions, 394 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d261527c..c4f35b545c1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,11 +37,15 @@
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
40#include <linux/user-return-notifier.h>
41#include <linux/srcu.h>
42#include <linux/slab.h>
40#include <trace/events/kvm.h> 43#include <trace/events/kvm.h>
41#undef TRACE_INCLUDE_FILE 44#undef TRACE_INCLUDE_FILE
42#define CREATE_TRACE_POINTS 45#define CREATE_TRACE_POINTS
43#include "trace.h" 46#include "trace.h"
44 47
48#include <asm/debugreg.h>
45#include <asm/uaccess.h> 49#include <asm/uaccess.h>
46#include <asm/msr.h> 50#include <asm/msr.h>
47#include <asm/desc.h> 51#include <asm/desc.h>
@@ -87,6 +91,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
87int ignore_msrs = 0; 91int ignore_msrs = 0;
88module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); 92module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
89 93
94#define KVM_NR_SHARED_MSRS 16
95
96struct kvm_shared_msrs_global {
97 int nr;
98 u32 msrs[KVM_NR_SHARED_MSRS];
99};
100
101struct kvm_shared_msrs {
102 struct user_return_notifier urn;
103 bool registered;
104 struct kvm_shared_msr_values {
105 u64 host;
106 u64 curr;
107 } values[KVM_NR_SHARED_MSRS];
108};
109
110static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
111static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
112
90struct kvm_stats_debugfs_item debugfs_entries[] = { 113struct kvm_stats_debugfs_item debugfs_entries[] = {
91 { "pf_fixed", VCPU_STAT(pf_fixed) }, 114 { "pf_fixed", VCPU_STAT(pf_fixed) },
92 { "pf_guest", VCPU_STAT(pf_guest) }, 115 { "pf_guest", VCPU_STAT(pf_guest) },
@@ -123,6 +146,83 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
123 { NULL } 146 { NULL }
124}; 147};
125 148
149static void kvm_on_user_return(struct user_return_notifier *urn)
150{
151 unsigned slot;
152 struct kvm_shared_msrs *locals
153 = container_of(urn, struct kvm_shared_msrs, urn);
154 struct kvm_shared_msr_values *values;
155
156 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
157 values = &locals->values[slot];
158 if (values->host != values->curr) {
159 wrmsrl(shared_msrs_global.msrs[slot], values->host);
160 values->curr = values->host;
161 }
162 }
163 locals->registered = false;
164 user_return_notifier_unregister(urn);
165}
166
167static void shared_msr_update(unsigned slot, u32 msr)
168{
169 struct kvm_shared_msrs *smsr;
170 u64 value;
171
172 smsr = &__get_cpu_var(shared_msrs);
173 /* only read, and nobody should modify it at this time,
174 * so don't need lock */
175 if (slot >= shared_msrs_global.nr) {
176 printk(KERN_ERR "kvm: invalid MSR slot!");
177 return;
178 }
179 rdmsrl_safe(msr, &value);
180 smsr->values[slot].host = value;
181 smsr->values[slot].curr = value;
182}
183
184void kvm_define_shared_msr(unsigned slot, u32 msr)
185{
186 if (slot >= shared_msrs_global.nr)
187 shared_msrs_global.nr = slot + 1;
188 shared_msrs_global.msrs[slot] = msr;
189 /* we need ensured the shared_msr_global have been updated */
190 smp_wmb();
191}
192EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
193
194static void kvm_shared_msr_cpu_online(void)
195{
196 unsigned i;
197
198 for (i = 0; i < shared_msrs_global.nr; ++i)
199 shared_msr_update(i, shared_msrs_global.msrs[i]);
200}
201
202void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
203{
204 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
205
206 if (((value ^ smsr->values[slot].curr) & mask) == 0)
207 return;
208 smsr->values[slot].curr = value;
209 wrmsrl(shared_msrs_global.msrs[slot], value);
210 if (!smsr->registered) {
211 smsr->urn.on_user_return = kvm_on_user_return;
212 user_return_notifier_register(&smsr->urn);
213 smsr->registered = true;
214 }
215}
216EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
217
218static void drop_user_return_notifiers(void *ignore)
219{
220 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
221
222 if (smsr->registered)
223 kvm_on_user_return(&smsr->urn);
224}
225
126unsigned long segment_base(u16 selector) 226unsigned long segment_base(u16 selector)
127{ 227{
128 struct descriptor_table gdt; 228 struct descriptor_table gdt;
@@ -170,12 +270,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
170} 270}
171EXPORT_SYMBOL_GPL(kvm_set_apic_base); 271EXPORT_SYMBOL_GPL(kvm_set_apic_base);
172 272
273#define EXCPT_BENIGN 0
274#define EXCPT_CONTRIBUTORY 1
275#define EXCPT_PF 2
276
277static int exception_class(int vector)
278{
279 switch (vector) {
280 case PF_VECTOR:
281 return EXCPT_PF;
282 case DE_VECTOR:
283 case TS_VECTOR:
284 case NP_VECTOR:
285 case SS_VECTOR:
286 case GP_VECTOR:
287 return EXCPT_CONTRIBUTORY;
288 default:
289 break;
290 }
291 return EXCPT_BENIGN;
292}
293
294static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
295 unsigned nr, bool has_error, u32 error_code)
296{
297 u32 prev_nr;
298 int class1, class2;
299
300 if (!vcpu->arch.exception.pending) {
301 queue:
302 vcpu->arch.exception.pending = true;
303 vcpu->arch.exception.has_error_code = has_error;
304 vcpu->arch.exception.nr = nr;
305 vcpu->arch.exception.error_code = error_code;
306 return;
307 }
308
309 /* to check exception */
310 prev_nr = vcpu->arch.exception.nr;
311 if (prev_nr == DF_VECTOR) {
312 /* triple fault -> shutdown */
313 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
314 return;
315 }
316 class1 = exception_class(prev_nr);
317 class2 = exception_class(nr);
318 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
319 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
320 /* generate double fault per SDM Table 5-5 */
321 vcpu->arch.exception.pending = true;
322 vcpu->arch.exception.has_error_code = true;
323 vcpu->arch.exception.nr = DF_VECTOR;
324 vcpu->arch.exception.error_code = 0;
325 } else
326 /* replace previous exception with a new one in a hope
327 that instruction re-execution will regenerate lost
328 exception */
329 goto queue;
330}
331
173void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 332void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
174{ 333{
175 WARN_ON(vcpu->arch.exception.pending); 334 kvm_multiple_exception(vcpu, nr, false, 0);
176 vcpu->arch.exception.pending = true;
177 vcpu->arch.exception.has_error_code = false;
178 vcpu->arch.exception.nr = nr;
179} 335}
180EXPORT_SYMBOL_GPL(kvm_queue_exception); 336EXPORT_SYMBOL_GPL(kvm_queue_exception);
181 337
@@ -183,25 +339,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
183 u32 error_code) 339 u32 error_code)
184{ 340{
185 ++vcpu->stat.pf_guest; 341 ++vcpu->stat.pf_guest;
186
187 if (vcpu->arch.exception.pending) {
188 switch(vcpu->arch.exception.nr) {
189 case DF_VECTOR:
190 /* triple fault -> shutdown */
191 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
192 return;
193 case PF_VECTOR:
194 vcpu->arch.exception.nr = DF_VECTOR;
195 vcpu->arch.exception.error_code = 0;
196 return;
197 default:
198 /* replace previous exception with a new one in a hope
199 that instruction re-execution will regenerate lost
200 exception */
201 vcpu->arch.exception.pending = false;
202 break;
203 }
204 }
205 vcpu->arch.cr2 = addr; 342 vcpu->arch.cr2 = addr;
206 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 343 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
207} 344}
@@ -214,11 +351,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
214 351
215void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 352void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
216{ 353{
217 WARN_ON(vcpu->arch.exception.pending); 354 kvm_multiple_exception(vcpu, nr, true, error_code);
218 vcpu->arch.exception.pending = true;
219 vcpu->arch.exception.has_error_code = true;
220 vcpu->arch.exception.nr = nr;
221 vcpu->arch.exception.error_code = error_code;
222} 355}
223EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 356EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
224 357
@@ -296,41 +429,38 @@ out:
296 429
297void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 430void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
298{ 431{
299 if (cr0 & CR0_RESERVED_BITS) { 432 cr0 |= X86_CR0_ET;
300 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 433
301 cr0, vcpu->arch.cr0); 434#ifdef CONFIG_X86_64
435 if (cr0 & 0xffffffff00000000UL) {
302 kvm_inject_gp(vcpu, 0); 436 kvm_inject_gp(vcpu, 0);
303 return; 437 return;
304 } 438 }
439#endif
440
441 cr0 &= ~CR0_RESERVED_BITS;
305 442
306 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 443 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
307 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
308 kvm_inject_gp(vcpu, 0); 444 kvm_inject_gp(vcpu, 0);
309 return; 445 return;
310 } 446 }
311 447
312 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 448 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
313 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
314 "and a clear PE flag\n");
315 kvm_inject_gp(vcpu, 0); 449 kvm_inject_gp(vcpu, 0);
316 return; 450 return;
317 } 451 }
318 452
319 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 453 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
320#ifdef CONFIG_X86_64 454#ifdef CONFIG_X86_64
321 if ((vcpu->arch.shadow_efer & EFER_LME)) { 455 if ((vcpu->arch.efer & EFER_LME)) {
322 int cs_db, cs_l; 456 int cs_db, cs_l;
323 457
324 if (!is_pae(vcpu)) { 458 if (!is_pae(vcpu)) {
325 printk(KERN_DEBUG "set_cr0: #GP, start paging "
326 "in long mode while PAE is disabled\n");
327 kvm_inject_gp(vcpu, 0); 459 kvm_inject_gp(vcpu, 0);
328 return; 460 return;
329 } 461 }
330 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 462 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
331 if (cs_l) { 463 if (cs_l) {
332 printk(KERN_DEBUG "set_cr0: #GP, start paging "
333 "in long mode while CS.L == 1\n");
334 kvm_inject_gp(vcpu, 0); 464 kvm_inject_gp(vcpu, 0);
335 return; 465 return;
336 466
@@ -338,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
338 } else 468 } else
339#endif 469#endif
340 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 470 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
341 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
342 "reserved bits\n");
343 kvm_inject_gp(vcpu, 0); 471 kvm_inject_gp(vcpu, 0);
344 return; 472 return;
345 } 473 }
@@ -356,38 +484,33 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
356 484
357void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 485void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
358{ 486{
359 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 487 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f));
360} 488}
361EXPORT_SYMBOL_GPL(kvm_lmsw); 489EXPORT_SYMBOL_GPL(kvm_lmsw);
362 490
363void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 491void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
364{ 492{
365 unsigned long old_cr4 = vcpu->arch.cr4; 493 unsigned long old_cr4 = kvm_read_cr4(vcpu);
366 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 494 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
367 495
368 if (cr4 & CR4_RESERVED_BITS) { 496 if (cr4 & CR4_RESERVED_BITS) {
369 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
370 kvm_inject_gp(vcpu, 0); 497 kvm_inject_gp(vcpu, 0);
371 return; 498 return;
372 } 499 }
373 500
374 if (is_long_mode(vcpu)) { 501 if (is_long_mode(vcpu)) {
375 if (!(cr4 & X86_CR4_PAE)) { 502 if (!(cr4 & X86_CR4_PAE)) {
376 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
377 "in long mode\n");
378 kvm_inject_gp(vcpu, 0); 503 kvm_inject_gp(vcpu, 0);
379 return; 504 return;
380 } 505 }
381 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 506 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
382 && ((cr4 ^ old_cr4) & pdptr_bits) 507 && ((cr4 ^ old_cr4) & pdptr_bits)
383 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 508 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
384 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
385 kvm_inject_gp(vcpu, 0); 509 kvm_inject_gp(vcpu, 0);
386 return; 510 return;
387 } 511 }
388 512
389 if (cr4 & X86_CR4_VMXE) { 513 if (cr4 & X86_CR4_VMXE) {
390 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
391 kvm_inject_gp(vcpu, 0); 514 kvm_inject_gp(vcpu, 0);
392 return; 515 return;
393 } 516 }
@@ -408,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
408 531
409 if (is_long_mode(vcpu)) { 532 if (is_long_mode(vcpu)) {
410 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 533 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
411 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
412 kvm_inject_gp(vcpu, 0); 534 kvm_inject_gp(vcpu, 0);
413 return; 535 return;
414 } 536 }
415 } else { 537 } else {
416 if (is_pae(vcpu)) { 538 if (is_pae(vcpu)) {
417 if (cr3 & CR3_PAE_RESERVED_BITS) { 539 if (cr3 & CR3_PAE_RESERVED_BITS) {
418 printk(KERN_DEBUG
419 "set_cr3: #GP, reserved bits\n");
420 kvm_inject_gp(vcpu, 0); 540 kvm_inject_gp(vcpu, 0);
421 return; 541 return;
422 } 542 }
423 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 543 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
424 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
425 "reserved bits\n");
426 kvm_inject_gp(vcpu, 0); 544 kvm_inject_gp(vcpu, 0);
427 return; 545 return;
428 } 546 }
@@ -454,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
454void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 572void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
455{ 573{
456 if (cr8 & CR8_RESERVED_BITS) { 574 if (cr8 & CR8_RESERVED_BITS) {
457 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
458 kvm_inject_gp(vcpu, 0); 575 kvm_inject_gp(vcpu, 0);
459 return; 576 return;
460 } 577 }
@@ -484,16 +601,21 @@ static inline u32 bit(int bitno)
484 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 601 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
485 * 602 *
486 * This list is modified at module load time to reflect the 603 * This list is modified at module load time to reflect the
487 * capabilities of the host cpu. 604 * capabilities of the host cpu. This capabilities test skips MSRs that are
605 * kvm-specific. Those are put in the beginning of the list.
488 */ 606 */
607
608#define KVM_SAVE_MSRS_BEGIN 5
489static u32 msrs_to_save[] = { 609static u32 msrs_to_save[] = {
610 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
611 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
612 HV_X64_MSR_APIC_ASSIST_PAGE,
490 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 613 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
491 MSR_K6_STAR, 614 MSR_K6_STAR,
492#ifdef CONFIG_X86_64 615#ifdef CONFIG_X86_64
493 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 616 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
494#endif 617#endif
495 MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 618 MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
496 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
497}; 619};
498 620
499static unsigned num_msrs_to_save; 621static unsigned num_msrs_to_save;
@@ -505,15 +627,12 @@ static u32 emulated_msrs[] = {
505static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 627static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
506{ 628{
507 if (efer & efer_reserved_bits) { 629 if (efer & efer_reserved_bits) {
508 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
509 efer);
510 kvm_inject_gp(vcpu, 0); 630 kvm_inject_gp(vcpu, 0);
511 return; 631 return;
512 } 632 }
513 633
514 if (is_paging(vcpu) 634 if (is_paging(vcpu)
515 && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { 635 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
516 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
517 kvm_inject_gp(vcpu, 0); 636 kvm_inject_gp(vcpu, 0);
518 return; 637 return;
519 } 638 }
@@ -523,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
523 642
524 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 643 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
525 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 644 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
526 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
527 kvm_inject_gp(vcpu, 0); 645 kvm_inject_gp(vcpu, 0);
528 return; 646 return;
529 } 647 }
@@ -534,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
534 652
535 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 653 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
536 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 654 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
537 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
538 kvm_inject_gp(vcpu, 0); 655 kvm_inject_gp(vcpu, 0);
539 return; 656 return;
540 } 657 }
@@ -543,9 +660,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
543 kvm_x86_ops->set_efer(vcpu, efer); 660 kvm_x86_ops->set_efer(vcpu, efer);
544 661
545 efer &= ~EFER_LMA; 662 efer &= ~EFER_LMA;
546 efer |= vcpu->arch.shadow_efer & EFER_LMA; 663 efer |= vcpu->arch.efer & EFER_LMA;
547 664
548 vcpu->arch.shadow_efer = efer; 665 vcpu->arch.efer = efer;
549 666
550 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 667 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
551 kvm_mmu_reset_context(vcpu); 668 kvm_mmu_reset_context(vcpu);
@@ -580,7 +697,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
580{ 697{
581 static int version; 698 static int version;
582 struct pvclock_wall_clock wc; 699 struct pvclock_wall_clock wc;
583 struct timespec now, sys, boot; 700 struct timespec boot;
584 701
585 if (!wall_clock) 702 if (!wall_clock)
586 return; 703 return;
@@ -595,9 +712,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
595 * wall clock specified here. guest system time equals host 712 * wall clock specified here. guest system time equals host
596 * system time for us, thus we must fill in host boot time here. 713 * system time for us, thus we must fill in host boot time here.
597 */ 714 */
598 now = current_kernel_time(); 715 getboottime(&boot);
599 ktime_get_ts(&sys);
600 boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
601 716
602 wc.sec = boot.tv_sec; 717 wc.sec = boot.tv_sec;
603 wc.nsec = boot.tv_nsec; 718 wc.nsec = boot.tv_nsec;
@@ -672,12 +787,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
672 local_irq_save(flags); 787 local_irq_save(flags);
673 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 788 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
674 ktime_get_ts(&ts); 789 ktime_get_ts(&ts);
790 monotonic_to_bootbased(&ts);
675 local_irq_restore(flags); 791 local_irq_restore(flags);
676 792
677 /* With all the info we got, fill in the values */ 793 /* With all the info we got, fill in the values */
678 794
679 vcpu->hv_clock.system_time = ts.tv_nsec + 795 vcpu->hv_clock.system_time = ts.tv_nsec +
680 (NSEC_PER_SEC * (u64)ts.tv_sec); 796 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
797
681 /* 798 /*
682 * The interface expects us to write an even number signaling that the 799 * The interface expects us to write an even number signaling that the
683 * update is finished. Since the guest won't see the intermediate 800 * update is finished. Since the guest won't see the intermediate
@@ -823,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
823 if (msr >= MSR_IA32_MC0_CTL && 940 if (msr >= MSR_IA32_MC0_CTL &&
824 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 941 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
825 u32 offset = msr - MSR_IA32_MC0_CTL; 942 u32 offset = msr - MSR_IA32_MC0_CTL;
826 /* only 0 or all 1s can be written to IA32_MCi_CTL */ 943 /* only 0 or all 1s can be written to IA32_MCi_CTL
944 * some Linux kernels though clear bit 10 in bank 4 to
945 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
946 * this to avoid an uncatched #GP in the guest
947 */
827 if ((offset & 0x3) == 0 && 948 if ((offset & 0x3) == 0 &&
828 data != 0 && data != ~(u64)0) 949 data != 0 && (data | (1 << 10)) != ~(u64)0)
829 return -1; 950 return -1;
830 vcpu->arch.mce_banks[offset] = data; 951 vcpu->arch.mce_banks[offset] = data;
831 break; 952 break;
@@ -835,6 +956,132 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
835 return 0; 956 return 0;
836} 957}
837 958
959static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
960{
961 struct kvm *kvm = vcpu->kvm;
962 int lm = is_long_mode(vcpu);
963 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
964 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
965 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
966 : kvm->arch.xen_hvm_config.blob_size_32;
967 u32 page_num = data & ~PAGE_MASK;
968 u64 page_addr = data & PAGE_MASK;
969 u8 *page;
970 int r;
971
972 r = -E2BIG;
973 if (page_num >= blob_size)
974 goto out;
975 r = -ENOMEM;
976 page = kzalloc(PAGE_SIZE, GFP_KERNEL);
977 if (!page)
978 goto out;
979 r = -EFAULT;
980 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
981 goto out_free;
982 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
983 goto out_free;
984 r = 0;
985out_free:
986 kfree(page);
987out:
988 return r;
989}
990
991static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
992{
993 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
994}
995
996static bool kvm_hv_msr_partition_wide(u32 msr)
997{
998 bool r = false;
999 switch (msr) {
1000 case HV_X64_MSR_GUEST_OS_ID:
1001 case HV_X64_MSR_HYPERCALL:
1002 r = true;
1003 break;
1004 }
1005
1006 return r;
1007}
1008
1009static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1010{
1011 struct kvm *kvm = vcpu->kvm;
1012
1013 switch (msr) {
1014 case HV_X64_MSR_GUEST_OS_ID:
1015 kvm->arch.hv_guest_os_id = data;
1016 /* setting guest os id to zero disables hypercall page */
1017 if (!kvm->arch.hv_guest_os_id)
1018 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1019 break;
1020 case HV_X64_MSR_HYPERCALL: {
1021 u64 gfn;
1022 unsigned long addr;
1023 u8 instructions[4];
1024
1025 /* if guest os id is not set hypercall should remain disabled */
1026 if (!kvm->arch.hv_guest_os_id)
1027 break;
1028 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1029 kvm->arch.hv_hypercall = data;
1030 break;
1031 }
1032 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1033 addr = gfn_to_hva(kvm, gfn);
1034 if (kvm_is_error_hva(addr))
1035 return 1;
1036 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1037 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1038 if (copy_to_user((void __user *)addr, instructions, 4))
1039 return 1;
1040 kvm->arch.hv_hypercall = data;
1041 break;
1042 }
1043 default:
1044 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1045 "data 0x%llx\n", msr, data);
1046 return 1;
1047 }
1048 return 0;
1049}
1050
1051static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1052{
1053 switch (msr) {
1054 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1055 unsigned long addr;
1056
1057 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1058 vcpu->arch.hv_vapic = data;
1059 break;
1060 }
1061 addr = gfn_to_hva(vcpu->kvm, data >>
1062 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1063 if (kvm_is_error_hva(addr))
1064 return 1;
1065 if (clear_user((void __user *)addr, PAGE_SIZE))
1066 return 1;
1067 vcpu->arch.hv_vapic = data;
1068 break;
1069 }
1070 case HV_X64_MSR_EOI:
1071 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1072 case HV_X64_MSR_ICR:
1073 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1074 case HV_X64_MSR_TPR:
1075 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1076 default:
1077 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1078 "data 0x%llx\n", msr, data);
1079 return 1;
1080 }
1081
1082 return 0;
1083}
1084
838int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1085int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
839{ 1086{
840 switch (msr) { 1087 switch (msr) {
@@ -949,7 +1196,19 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
949 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1196 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
950 "0x%x data 0x%llx\n", msr, data); 1197 "0x%x data 0x%llx\n", msr, data);
951 break; 1198 break;
1199 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1200 if (kvm_hv_msr_partition_wide(msr)) {
1201 int r;
1202 mutex_lock(&vcpu->kvm->lock);
1203 r = set_msr_hyperv_pw(vcpu, msr, data);
1204 mutex_unlock(&vcpu->kvm->lock);
1205 return r;
1206 } else
1207 return set_msr_hyperv(vcpu, msr, data);
1208 break;
952 default: 1209 default:
1210 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1211 return xen_hvm_config(vcpu, data);
953 if (!ignore_msrs) { 1212 if (!ignore_msrs) {
954 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", 1213 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
955 msr, data); 1214 msr, data);
@@ -1046,6 +1305,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1046 return 0; 1305 return 0;
1047} 1306}
1048 1307
1308static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1309{
1310 u64 data = 0;
1311 struct kvm *kvm = vcpu->kvm;
1312
1313 switch (msr) {
1314 case HV_X64_MSR_GUEST_OS_ID:
1315 data = kvm->arch.hv_guest_os_id;
1316 break;
1317 case HV_X64_MSR_HYPERCALL:
1318 data = kvm->arch.hv_hypercall;
1319 break;
1320 default:
1321 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1322 return 1;
1323 }
1324
1325 *pdata = data;
1326 return 0;
1327}
1328
1329static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1330{
1331 u64 data = 0;
1332
1333 switch (msr) {
1334 case HV_X64_MSR_VP_INDEX: {
1335 int r;
1336 struct kvm_vcpu *v;
1337 kvm_for_each_vcpu(r, v, vcpu->kvm)
1338 if (v == vcpu)
1339 data = r;
1340 break;
1341 }
1342 case HV_X64_MSR_EOI:
1343 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1344 case HV_X64_MSR_ICR:
1345 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1346 case HV_X64_MSR_TPR:
1347 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1348 default:
1349 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1350 return 1;
1351 }
1352 *pdata = data;
1353 return 0;
1354}
1355
1049int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1356int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1050{ 1357{
1051 u64 data; 1358 u64 data;
@@ -1097,7 +1404,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1097 data |= (((uint64_t)4ULL) << 40); 1404 data |= (((uint64_t)4ULL) << 40);
1098 break; 1405 break;
1099 case MSR_EFER: 1406 case MSR_EFER:
1100 data = vcpu->arch.shadow_efer; 1407 data = vcpu->arch.efer;
1101 break; 1408 break;
1102 case MSR_KVM_WALL_CLOCK: 1409 case MSR_KVM_WALL_CLOCK:
1103 data = vcpu->kvm->arch.wall_clock; 1410 data = vcpu->kvm->arch.wall_clock;
@@ -1112,6 +1419,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1112 case MSR_IA32_MCG_STATUS: 1419 case MSR_IA32_MCG_STATUS:
1113 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1420 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1114 return get_msr_mce(vcpu, msr, pdata); 1421 return get_msr_mce(vcpu, msr, pdata);
1422 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1423 if (kvm_hv_msr_partition_wide(msr)) {
1424 int r;
1425 mutex_lock(&vcpu->kvm->lock);
1426 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1427 mutex_unlock(&vcpu->kvm->lock);
1428 return r;
1429 } else
1430 return get_msr_hyperv(vcpu, msr, pdata);
1431 break;
1115 default: 1432 default:
1116 if (!ignore_msrs) { 1433 if (!ignore_msrs) {
1117 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1434 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1137,15 +1454,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1137 int (*do_msr)(struct kvm_vcpu *vcpu, 1454 int (*do_msr)(struct kvm_vcpu *vcpu,
1138 unsigned index, u64 *data)) 1455 unsigned index, u64 *data))
1139{ 1456{
1140 int i; 1457 int i, idx;
1141 1458
1142 vcpu_load(vcpu); 1459 vcpu_load(vcpu);
1143 1460
1144 down_read(&vcpu->kvm->slots_lock); 1461 idx = srcu_read_lock(&vcpu->kvm->srcu);
1145 for (i = 0; i < msrs->nmsrs; ++i) 1462 for (i = 0; i < msrs->nmsrs; ++i)
1146 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1463 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1147 break; 1464 break;
1148 up_read(&vcpu->kvm->slots_lock); 1465 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1149 1466
1150 vcpu_put(vcpu); 1467 vcpu_put(vcpu);
1151 1468
@@ -1224,6 +1541,14 @@ int kvm_dev_ioctl_check_extension(long ext)
1224 case KVM_CAP_PIT2: 1541 case KVM_CAP_PIT2:
1225 case KVM_CAP_PIT_STATE2: 1542 case KVM_CAP_PIT_STATE2:
1226 case KVM_CAP_SET_IDENTITY_MAP_ADDR: 1543 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
1544 case KVM_CAP_XEN_HVM:
1545 case KVM_CAP_ADJUST_CLOCK:
1546 case KVM_CAP_VCPU_EVENTS:
1547 case KVM_CAP_HYPERV:
1548 case KVM_CAP_HYPERV_VAPIC:
1549 case KVM_CAP_HYPERV_SPIN:
1550 case KVM_CAP_PCI_SEGMENT:
1551 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1227 r = 1; 1552 r = 1;
1228 break; 1553 break;
1229 case KVM_CAP_COALESCED_MMIO: 1554 case KVM_CAP_COALESCED_MMIO:
@@ -1238,8 +1563,8 @@ int kvm_dev_ioctl_check_extension(long ext)
1238 case KVM_CAP_NR_MEMSLOTS: 1563 case KVM_CAP_NR_MEMSLOTS:
1239 r = KVM_MEMORY_SLOTS; 1564 r = KVM_MEMORY_SLOTS;
1240 break; 1565 break;
1241 case KVM_CAP_PV_MMU: 1566 case KVM_CAP_PV_MMU: /* obsolete */
1242 r = !tdp_enabled; 1567 r = 0;
1243 break; 1568 break;
1244 case KVM_CAP_IOMMU: 1569 case KVM_CAP_IOMMU:
1245 r = iommu_found(); 1570 r = iommu_found();
@@ -1326,13 +1651,19 @@ out:
1326void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1651void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1327{ 1652{
1328 kvm_x86_ops->vcpu_load(vcpu, cpu); 1653 kvm_x86_ops->vcpu_load(vcpu, cpu);
1654 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1655 unsigned long khz = cpufreq_quick_get(cpu);
1656 if (!khz)
1657 khz = tsc_khz;
1658 per_cpu(cpu_tsc_khz, cpu) = khz;
1659 }
1329 kvm_request_guest_time_update(vcpu); 1660 kvm_request_guest_time_update(vcpu);
1330} 1661}
1331 1662
1332void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1663void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1333{ 1664{
1334 kvm_x86_ops->vcpu_put(vcpu);
1335 kvm_put_guest_fpu(vcpu); 1665 kvm_put_guest_fpu(vcpu);
1666 kvm_x86_ops->vcpu_put(vcpu);
1336} 1667}
1337 1668
1338static int is_efer_nx(void) 1669static int is_efer_nx(void)
@@ -1381,6 +1712,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1381 if (copy_from_user(cpuid_entries, entries, 1712 if (copy_from_user(cpuid_entries, entries,
1382 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 1713 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
1383 goto out_free; 1714 goto out_free;
1715 vcpu_load(vcpu);
1384 for (i = 0; i < cpuid->nent; i++) { 1716 for (i = 0; i < cpuid->nent; i++) {
1385 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; 1717 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
1386 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; 1718 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
@@ -1397,6 +1729,8 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1397 cpuid_fix_nx_cap(vcpu); 1729 cpuid_fix_nx_cap(vcpu);
1398 r = 0; 1730 r = 0;
1399 kvm_apic_set_version(vcpu); 1731 kvm_apic_set_version(vcpu);
1732 kvm_x86_ops->cpuid_update(vcpu);
1733 vcpu_put(vcpu);
1400 1734
1401out_free: 1735out_free:
1402 vfree(cpuid_entries); 1736 vfree(cpuid_entries);
@@ -1417,8 +1751,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1417 if (copy_from_user(&vcpu->arch.cpuid_entries, entries, 1751 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
1418 cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1752 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
1419 goto out; 1753 goto out;
1754 vcpu_load(vcpu);
1420 vcpu->arch.cpuid_nent = cpuid->nent; 1755 vcpu->arch.cpuid_nent = cpuid->nent;
1421 kvm_apic_set_version(vcpu); 1756 kvm_apic_set_version(vcpu);
1757 kvm_x86_ops->cpuid_update(vcpu);
1758 vcpu_put(vcpu);
1422 return 0; 1759 return 0;
1423 1760
1424out: 1761out:
@@ -1461,12 +1798,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1461 u32 index, int *nent, int maxnent) 1798 u32 index, int *nent, int maxnent)
1462{ 1799{
1463 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1800 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1464 unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
1465#ifdef CONFIG_X86_64 1801#ifdef CONFIG_X86_64
1802 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
1803 ? F(GBPAGES) : 0;
1466 unsigned f_lm = F(LM); 1804 unsigned f_lm = F(LM);
1467#else 1805#else
1806 unsigned f_gbpages = 0;
1468 unsigned f_lm = 0; 1807 unsigned f_lm = 0;
1469#endif 1808#endif
1809 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
1470 1810
1471 /* cpuid 1.edx */ 1811 /* cpuid 1.edx */
1472 const u32 kvm_supported_word0_x86_features = 1812 const u32 kvm_supported_word0_x86_features =
@@ -1486,7 +1826,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1486 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1826 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1487 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1827 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1488 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1828 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1489 F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | 1829 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
1490 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1830 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1491 /* cpuid 1.ecx */ 1831 /* cpuid 1.ecx */
1492 const u32 kvm_supported_word4_x86_features = 1832 const u32 kvm_supported_word4_x86_features =
@@ -1733,7 +2073,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1733 return 0; 2073 return 0;
1734 if (mce->status & MCI_STATUS_UC) { 2074 if (mce->status & MCI_STATUS_UC) {
1735 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || 2075 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
1736 !(vcpu->arch.cr4 & X86_CR4_MCE)) { 2076 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
1737 printk(KERN_DEBUG "kvm: set_mce: " 2077 printk(KERN_DEBUG "kvm: set_mce: "
1738 "injects mce exception while " 2078 "injects mce exception while "
1739 "previous one is in progress!\n"); 2079 "previous one is in progress!\n");
@@ -1759,6 +2099,65 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1759 return 0; 2099 return 0;
1760} 2100}
1761 2101
2102static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2103 struct kvm_vcpu_events *events)
2104{
2105 vcpu_load(vcpu);
2106
2107 events->exception.injected = vcpu->arch.exception.pending;
2108 events->exception.nr = vcpu->arch.exception.nr;
2109 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2110 events->exception.error_code = vcpu->arch.exception.error_code;
2111
2112 events->interrupt.injected = vcpu->arch.interrupt.pending;
2113 events->interrupt.nr = vcpu->arch.interrupt.nr;
2114 events->interrupt.soft = vcpu->arch.interrupt.soft;
2115
2116 events->nmi.injected = vcpu->arch.nmi_injected;
2117 events->nmi.pending = vcpu->arch.nmi_pending;
2118 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2119
2120 events->sipi_vector = vcpu->arch.sipi_vector;
2121
2122 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2123 | KVM_VCPUEVENT_VALID_SIPI_VECTOR);
2124
2125 vcpu_put(vcpu);
2126}
2127
2128static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2129 struct kvm_vcpu_events *events)
2130{
2131 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2132 | KVM_VCPUEVENT_VALID_SIPI_VECTOR))
2133 return -EINVAL;
2134
2135 vcpu_load(vcpu);
2136
2137 vcpu->arch.exception.pending = events->exception.injected;
2138 vcpu->arch.exception.nr = events->exception.nr;
2139 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2140 vcpu->arch.exception.error_code = events->exception.error_code;
2141
2142 vcpu->arch.interrupt.pending = events->interrupt.injected;
2143 vcpu->arch.interrupt.nr = events->interrupt.nr;
2144 vcpu->arch.interrupt.soft = events->interrupt.soft;
2145 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2146 kvm_pic_clear_isr_ack(vcpu->kvm);
2147
2148 vcpu->arch.nmi_injected = events->nmi.injected;
2149 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2150 vcpu->arch.nmi_pending = events->nmi.pending;
2151 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2152
2153 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2154 vcpu->arch.sipi_vector = events->sipi_vector;
2155
2156 vcpu_put(vcpu);
2157
2158 return 0;
2159}
2160
1762long kvm_arch_vcpu_ioctl(struct file *filp, 2161long kvm_arch_vcpu_ioctl(struct file *filp,
1763 unsigned int ioctl, unsigned long arg) 2162 unsigned int ioctl, unsigned long arg)
1764{ 2163{
@@ -1769,6 +2168,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1769 2168
1770 switch (ioctl) { 2169 switch (ioctl) {
1771 case KVM_GET_LAPIC: { 2170 case KVM_GET_LAPIC: {
2171 r = -EINVAL;
2172 if (!vcpu->arch.apic)
2173 goto out;
1772 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2174 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1773 2175
1774 r = -ENOMEM; 2176 r = -ENOMEM;
@@ -1784,6 +2186,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1784 break; 2186 break;
1785 } 2187 }
1786 case KVM_SET_LAPIC: { 2188 case KVM_SET_LAPIC: {
2189 r = -EINVAL;
2190 if (!vcpu->arch.apic)
2191 goto out;
1787 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2192 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1788 r = -ENOMEM; 2193 r = -ENOMEM;
1789 if (!lapic) 2194 if (!lapic)
@@ -1910,6 +2315,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1910 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2315 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
1911 break; 2316 break;
1912 } 2317 }
2318 case KVM_GET_VCPU_EVENTS: {
2319 struct kvm_vcpu_events events;
2320
2321 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
2322
2323 r = -EFAULT;
2324 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
2325 break;
2326 r = 0;
2327 break;
2328 }
2329 case KVM_SET_VCPU_EVENTS: {
2330 struct kvm_vcpu_events events;
2331
2332 r = -EFAULT;
2333 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
2334 break;
2335
2336 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2337 break;
2338 }
1913 default: 2339 default:
1914 r = -EINVAL; 2340 r = -EINVAL;
1915 } 2341 }
@@ -1941,14 +2367,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
1941 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 2367 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
1942 return -EINVAL; 2368 return -EINVAL;
1943 2369
1944 down_write(&kvm->slots_lock); 2370 mutex_lock(&kvm->slots_lock);
1945 spin_lock(&kvm->mmu_lock); 2371 spin_lock(&kvm->mmu_lock);
1946 2372
1947 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 2373 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
1948 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 2374 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
1949 2375
1950 spin_unlock(&kvm->mmu_lock); 2376 spin_unlock(&kvm->mmu_lock);
1951 up_write(&kvm->slots_lock); 2377 mutex_unlock(&kvm->slots_lock);
1952 return 0; 2378 return 0;
1953} 2379}
1954 2380
@@ -1957,13 +2383,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
1957 return kvm->arch.n_alloc_mmu_pages; 2383 return kvm->arch.n_alloc_mmu_pages;
1958} 2384}
1959 2385
2386gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2387{
2388 int i;
2389 struct kvm_mem_alias *alias;
2390 struct kvm_mem_aliases *aliases;
2391
2392 aliases = rcu_dereference(kvm->arch.aliases);
2393
2394 for (i = 0; i < aliases->naliases; ++i) {
2395 alias = &aliases->aliases[i];
2396 if (alias->flags & KVM_ALIAS_INVALID)
2397 continue;
2398 if (gfn >= alias->base_gfn
2399 && gfn < alias->base_gfn + alias->npages)
2400 return alias->target_gfn + gfn - alias->base_gfn;
2401 }
2402 return gfn;
2403}
2404
1960gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2405gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
1961{ 2406{
1962 int i; 2407 int i;
1963 struct kvm_mem_alias *alias; 2408 struct kvm_mem_alias *alias;
2409 struct kvm_mem_aliases *aliases;
1964 2410
1965 for (i = 0; i < kvm->arch.naliases; ++i) { 2411 aliases = rcu_dereference(kvm->arch.aliases);
1966 alias = &kvm->arch.aliases[i]; 2412
2413 for (i = 0; i < aliases->naliases; ++i) {
2414 alias = &aliases->aliases[i];
1967 if (gfn >= alias->base_gfn 2415 if (gfn >= alias->base_gfn
1968 && gfn < alias->base_gfn + alias->npages) 2416 && gfn < alias->base_gfn + alias->npages)
1969 return alias->target_gfn + gfn - alias->base_gfn; 2417 return alias->target_gfn + gfn - alias->base_gfn;
@@ -1981,6 +2429,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1981{ 2429{
1982 int r, n; 2430 int r, n;
1983 struct kvm_mem_alias *p; 2431 struct kvm_mem_alias *p;
2432 struct kvm_mem_aliases *aliases, *old_aliases;
1984 2433
1985 r = -EINVAL; 2434 r = -EINVAL;
1986 /* General sanity checks */ 2435 /* General sanity checks */
@@ -1997,26 +2446,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1997 < alias->target_phys_addr) 2446 < alias->target_phys_addr)
1998 goto out; 2447 goto out;
1999 2448
2000 down_write(&kvm->slots_lock); 2449 r = -ENOMEM;
2001 spin_lock(&kvm->mmu_lock); 2450 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2451 if (!aliases)
2452 goto out;
2002 2453
2003 p = &kvm->arch.aliases[alias->slot]; 2454 mutex_lock(&kvm->slots_lock);
2455
2456 /* invalidate any gfn reference in case of deletion/shrinking */
2457 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2458 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
2459 old_aliases = kvm->arch.aliases;
2460 rcu_assign_pointer(kvm->arch.aliases, aliases);
2461 synchronize_srcu_expedited(&kvm->srcu);
2462 kvm_mmu_zap_all(kvm);
2463 kfree(old_aliases);
2464
2465 r = -ENOMEM;
2466 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2467 if (!aliases)
2468 goto out_unlock;
2469
2470 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2471
2472 p = &aliases->aliases[alias->slot];
2004 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2473 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
2005 p->npages = alias->memory_size >> PAGE_SHIFT; 2474 p->npages = alias->memory_size >> PAGE_SHIFT;
2006 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2475 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
2476 p->flags &= ~(KVM_ALIAS_INVALID);
2007 2477
2008 for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2478 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
2009 if (kvm->arch.aliases[n - 1].npages) 2479 if (aliases->aliases[n - 1].npages)
2010 break; 2480 break;
2011 kvm->arch.naliases = n; 2481 aliases->naliases = n;
2012
2013 spin_unlock(&kvm->mmu_lock);
2014 kvm_mmu_zap_all(kvm);
2015
2016 up_write(&kvm->slots_lock);
2017 2482
2018 return 0; 2483 old_aliases = kvm->arch.aliases;
2484 rcu_assign_pointer(kvm->arch.aliases, aliases);
2485 synchronize_srcu_expedited(&kvm->srcu);
2486 kfree(old_aliases);
2487 r = 0;
2019 2488
2489out_unlock:
2490 mutex_unlock(&kvm->slots_lock);
2020out: 2491out:
2021 return r; 2492 return r;
2022} 2493}
@@ -2038,9 +2509,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2038 sizeof(struct kvm_pic_state)); 2509 sizeof(struct kvm_pic_state));
2039 break; 2510 break;
2040 case KVM_IRQCHIP_IOAPIC: 2511 case KVM_IRQCHIP_IOAPIC:
2041 memcpy(&chip->chip.ioapic, 2512 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
2042 ioapic_irqchip(kvm),
2043 sizeof(struct kvm_ioapic_state));
2044 break; 2513 break;
2045 default: 2514 default:
2046 r = -EINVAL; 2515 r = -EINVAL;
@@ -2056,25 +2525,21 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2056 r = 0; 2525 r = 0;
2057 switch (chip->chip_id) { 2526 switch (chip->chip_id) {
2058 case KVM_IRQCHIP_PIC_MASTER: 2527 case KVM_IRQCHIP_PIC_MASTER:
2059 spin_lock(&pic_irqchip(kvm)->lock); 2528 raw_spin_lock(&pic_irqchip(kvm)->lock);
2060 memcpy(&pic_irqchip(kvm)->pics[0], 2529 memcpy(&pic_irqchip(kvm)->pics[0],
2061 &chip->chip.pic, 2530 &chip->chip.pic,
2062 sizeof(struct kvm_pic_state)); 2531 sizeof(struct kvm_pic_state));
2063 spin_unlock(&pic_irqchip(kvm)->lock); 2532 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2064 break; 2533 break;
2065 case KVM_IRQCHIP_PIC_SLAVE: 2534 case KVM_IRQCHIP_PIC_SLAVE:
2066 spin_lock(&pic_irqchip(kvm)->lock); 2535 raw_spin_lock(&pic_irqchip(kvm)->lock);
2067 memcpy(&pic_irqchip(kvm)->pics[1], 2536 memcpy(&pic_irqchip(kvm)->pics[1],
2068 &chip->chip.pic, 2537 &chip->chip.pic,
2069 sizeof(struct kvm_pic_state)); 2538 sizeof(struct kvm_pic_state));
2070 spin_unlock(&pic_irqchip(kvm)->lock); 2539 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2071 break; 2540 break;
2072 case KVM_IRQCHIP_IOAPIC: 2541 case KVM_IRQCHIP_IOAPIC:
2073 mutex_lock(&kvm->irq_lock); 2542 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
2074 memcpy(ioapic_irqchip(kvm),
2075 &chip->chip.ioapic,
2076 sizeof(struct kvm_ioapic_state));
2077 mutex_unlock(&kvm->irq_lock);
2078 break; 2543 break;
2079 default: 2544 default:
2080 r = -EINVAL; 2545 r = -EINVAL;
@@ -2151,29 +2616,63 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2151int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2616int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2152 struct kvm_dirty_log *log) 2617 struct kvm_dirty_log *log)
2153{ 2618{
2154 int r; 2619 int r, i;
2155 int n;
2156 struct kvm_memory_slot *memslot; 2620 struct kvm_memory_slot *memslot;
2157 int is_dirty = 0; 2621 unsigned long n;
2622 unsigned long is_dirty = 0;
2623 unsigned long *dirty_bitmap = NULL;
2158 2624
2159 down_write(&kvm->slots_lock); 2625 mutex_lock(&kvm->slots_lock);
2160 2626
2161 r = kvm_get_dirty_log(kvm, log, &is_dirty); 2627 r = -EINVAL;
2162 if (r) 2628 if (log->slot >= KVM_MEMORY_SLOTS)
2629 goto out;
2630
2631 memslot = &kvm->memslots->memslots[log->slot];
2632 r = -ENOENT;
2633 if (!memslot->dirty_bitmap)
2634 goto out;
2635
2636 n = kvm_dirty_bitmap_bytes(memslot);
2637
2638 r = -ENOMEM;
2639 dirty_bitmap = vmalloc(n);
2640 if (!dirty_bitmap)
2163 goto out; 2641 goto out;
2642 memset(dirty_bitmap, 0, n);
2643
2644 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
2645 is_dirty = memslot->dirty_bitmap[i];
2164 2646
2165 /* If nothing is dirty, don't bother messing with page tables. */ 2647 /* If nothing is dirty, don't bother messing with page tables. */
2166 if (is_dirty) { 2648 if (is_dirty) {
2649 struct kvm_memslots *slots, *old_slots;
2650
2167 spin_lock(&kvm->mmu_lock); 2651 spin_lock(&kvm->mmu_lock);
2168 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2652 kvm_mmu_slot_remove_write_access(kvm, log->slot);
2169 spin_unlock(&kvm->mmu_lock); 2653 spin_unlock(&kvm->mmu_lock);
2170 memslot = &kvm->memslots[log->slot]; 2654
2171 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2655 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
2172 memset(memslot->dirty_bitmap, 0, n); 2656 if (!slots)
2657 goto out_free;
2658
2659 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
2660 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
2661
2662 old_slots = kvm->memslots;
2663 rcu_assign_pointer(kvm->memslots, slots);
2664 synchronize_srcu_expedited(&kvm->srcu);
2665 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
2666 kfree(old_slots);
2173 } 2667 }
2668
2174 r = 0; 2669 r = 0;
2670 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
2671 r = -EFAULT;
2672out_free:
2673 vfree(dirty_bitmap);
2175out: 2674out:
2176 up_write(&kvm->slots_lock); 2675 mutex_unlock(&kvm->slots_lock);
2177 return r; 2676 return r;
2178} 2677}
2179 2678
@@ -2182,7 +2681,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2182{ 2681{
2183 struct kvm *kvm = filp->private_data; 2682 struct kvm *kvm = filp->private_data;
2184 void __user *argp = (void __user *)arg; 2683 void __user *argp = (void __user *)arg;
2185 int r = -EINVAL; 2684 int r = -ENOTTY;
2186 /* 2685 /*
2187 * This union makes it completely explicit to gcc-3.x 2686 * This union makes it completely explicit to gcc-3.x
2188 * that these two variables' stack usage should be 2687 * that these two variables' stack usage should be
@@ -2244,25 +2743,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
2244 if (r) 2743 if (r)
2245 goto out; 2744 goto out;
2246 break; 2745 break;
2247 case KVM_CREATE_IRQCHIP: 2746 case KVM_CREATE_IRQCHIP: {
2747 struct kvm_pic *vpic;
2748
2749 mutex_lock(&kvm->lock);
2750 r = -EEXIST;
2751 if (kvm->arch.vpic)
2752 goto create_irqchip_unlock;
2248 r = -ENOMEM; 2753 r = -ENOMEM;
2249 kvm->arch.vpic = kvm_create_pic(kvm); 2754 vpic = kvm_create_pic(kvm);
2250 if (kvm->arch.vpic) { 2755 if (vpic) {
2251 r = kvm_ioapic_init(kvm); 2756 r = kvm_ioapic_init(kvm);
2252 if (r) { 2757 if (r) {
2253 kfree(kvm->arch.vpic); 2758 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
2254 kvm->arch.vpic = NULL; 2759 &vpic->dev);
2255 goto out; 2760 kfree(vpic);
2761 goto create_irqchip_unlock;
2256 } 2762 }
2257 } else 2763 } else
2258 goto out; 2764 goto create_irqchip_unlock;
2765 smp_wmb();
2766 kvm->arch.vpic = vpic;
2767 smp_wmb();
2259 r = kvm_setup_default_irq_routing(kvm); 2768 r = kvm_setup_default_irq_routing(kvm);
2260 if (r) { 2769 if (r) {
2261 kfree(kvm->arch.vpic); 2770 mutex_lock(&kvm->irq_lock);
2262 kfree(kvm->arch.vioapic); 2771 kvm_ioapic_destroy(kvm);
2263 goto out; 2772 kvm_destroy_pic(kvm);
2773 mutex_unlock(&kvm->irq_lock);
2264 } 2774 }
2775 create_irqchip_unlock:
2776 mutex_unlock(&kvm->lock);
2265 break; 2777 break;
2778 }
2266 case KVM_CREATE_PIT: 2779 case KVM_CREATE_PIT:
2267 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; 2780 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
2268 goto create_pit; 2781 goto create_pit;
@@ -2272,7 +2785,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2272 sizeof(struct kvm_pit_config))) 2785 sizeof(struct kvm_pit_config)))
2273 goto out; 2786 goto out;
2274 create_pit: 2787 create_pit:
2275 down_write(&kvm->slots_lock); 2788 mutex_lock(&kvm->slots_lock);
2276 r = -EEXIST; 2789 r = -EEXIST;
2277 if (kvm->arch.vpit) 2790 if (kvm->arch.vpit)
2278 goto create_pit_unlock; 2791 goto create_pit_unlock;
@@ -2281,7 +2794,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2281 if (kvm->arch.vpit) 2794 if (kvm->arch.vpit)
2282 r = 0; 2795 r = 0;
2283 create_pit_unlock: 2796 create_pit_unlock:
2284 up_write(&kvm->slots_lock); 2797 mutex_unlock(&kvm->slots_lock);
2285 break; 2798 break;
2286 case KVM_IRQ_LINE_STATUS: 2799 case KVM_IRQ_LINE_STATUS:
2287 case KVM_IRQ_LINE: { 2800 case KVM_IRQ_LINE: {
@@ -2292,10 +2805,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2292 goto out; 2805 goto out;
2293 if (irqchip_in_kernel(kvm)) { 2806 if (irqchip_in_kernel(kvm)) {
2294 __s32 status; 2807 __s32 status;
2295 mutex_lock(&kvm->irq_lock);
2296 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2808 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
2297 irq_event.irq, irq_event.level); 2809 irq_event.irq, irq_event.level);
2298 mutex_unlock(&kvm->irq_lock);
2299 if (ioctl == KVM_IRQ_LINE_STATUS) { 2810 if (ioctl == KVM_IRQ_LINE_STATUS) {
2300 irq_event.status = status; 2811 irq_event.status = status;
2301 if (copy_to_user(argp, &irq_event, 2812 if (copy_to_user(argp, &irq_event,
@@ -2421,6 +2932,55 @@ long kvm_arch_vm_ioctl(struct file *filp,
2421 r = 0; 2932 r = 0;
2422 break; 2933 break;
2423 } 2934 }
2935 case KVM_XEN_HVM_CONFIG: {
2936 r = -EFAULT;
2937 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
2938 sizeof(struct kvm_xen_hvm_config)))
2939 goto out;
2940 r = -EINVAL;
2941 if (kvm->arch.xen_hvm_config.flags)
2942 goto out;
2943 r = 0;
2944 break;
2945 }
2946 case KVM_SET_CLOCK: {
2947 struct timespec now;
2948 struct kvm_clock_data user_ns;
2949 u64 now_ns;
2950 s64 delta;
2951
2952 r = -EFAULT;
2953 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
2954 goto out;
2955
2956 r = -EINVAL;
2957 if (user_ns.flags)
2958 goto out;
2959
2960 r = 0;
2961 ktime_get_ts(&now);
2962 now_ns = timespec_to_ns(&now);
2963 delta = user_ns.clock - now_ns;
2964 kvm->arch.kvmclock_offset = delta;
2965 break;
2966 }
2967 case KVM_GET_CLOCK: {
2968 struct timespec now;
2969 struct kvm_clock_data user_ns;
2970 u64 now_ns;
2971
2972 ktime_get_ts(&now);
2973 now_ns = timespec_to_ns(&now);
2974 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
2975 user_ns.flags = 0;
2976
2977 r = -EFAULT;
2978 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
2979 goto out;
2980 r = 0;
2981 break;
2982 }
2983
2424 default: 2984 default:
2425 ; 2985 ;
2426 } 2986 }
@@ -2433,7 +2993,8 @@ static void kvm_init_msr_list(void)
2433 u32 dummy[2]; 2993 u32 dummy[2];
2434 unsigned i, j; 2994 unsigned i, j;
2435 2995
2436 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { 2996 /* skip the first msrs in the list. KVM-specific */
2997 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
2437 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 2998 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
2438 continue; 2999 continue;
2439 if (j < i) 3000 if (j < i)
@@ -2450,7 +3011,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
2450 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3011 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
2451 return 0; 3012 return 0;
2452 3013
2453 return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); 3014 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2454} 3015}
2455 3016
2456static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3017static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2459,17 +3020,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2459 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3020 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
2460 return 0; 3021 return 0;
2461 3022
2462 return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); 3023 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2463} 3024}
2464 3025
2465static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3026gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
2466 struct kvm_vcpu *vcpu) 3027{
3028 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3029 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3030}
3031
3032 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3033{
3034 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3035 access |= PFERR_FETCH_MASK;
3036 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3037}
3038
3039gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3040{
3041 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3042 access |= PFERR_WRITE_MASK;
3043 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3044}
3045
3046/* uses this to access any guest's mapped memory without checking CPL */
3047gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3048{
3049 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
3050}
3051
3052static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3053 struct kvm_vcpu *vcpu, u32 access,
3054 u32 *error)
2467{ 3055{
2468 void *data = val; 3056 void *data = val;
2469 int r = X86EMUL_CONTINUE; 3057 int r = X86EMUL_CONTINUE;
2470 3058
2471 while (bytes) { 3059 while (bytes) {
2472 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3060 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
2473 unsigned offset = addr & (PAGE_SIZE-1); 3061 unsigned offset = addr & (PAGE_SIZE-1);
2474 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3062 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
2475 int ret; 3063 int ret;
@@ -2492,14 +3080,37 @@ out:
2492 return r; 3080 return r;
2493} 3081}
2494 3082
3083/* used for instruction fetching */
3084static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3085 struct kvm_vcpu *vcpu, u32 *error)
3086{
3087 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3088 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3089 access | PFERR_FETCH_MASK, error);
3090}
3091
3092static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3093 struct kvm_vcpu *vcpu, u32 *error)
3094{
3095 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3096 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3097 error);
3098}
3099
3100static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3101 struct kvm_vcpu *vcpu, u32 *error)
3102{
3103 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3104}
3105
2495static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3106static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
2496 struct kvm_vcpu *vcpu) 3107 struct kvm_vcpu *vcpu, u32 *error)
2497{ 3108{
2498 void *data = val; 3109 void *data = val;
2499 int r = X86EMUL_CONTINUE; 3110 int r = X86EMUL_CONTINUE;
2500 3111
2501 while (bytes) { 3112 while (bytes) {
2502 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3113 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
2503 unsigned offset = addr & (PAGE_SIZE-1); 3114 unsigned offset = addr & (PAGE_SIZE-1);
2504 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3115 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
2505 int ret; 3116 int ret;
@@ -2529,6 +3140,7 @@ static int emulator_read_emulated(unsigned long addr,
2529 struct kvm_vcpu *vcpu) 3140 struct kvm_vcpu *vcpu)
2530{ 3141{
2531 gpa_t gpa; 3142 gpa_t gpa;
3143 u32 error_code;
2532 3144
2533 if (vcpu->mmio_read_completed) { 3145 if (vcpu->mmio_read_completed) {
2534 memcpy(val, vcpu->mmio_data, bytes); 3146 memcpy(val, vcpu->mmio_data, bytes);
@@ -2538,17 +3150,20 @@ static int emulator_read_emulated(unsigned long addr,
2538 return X86EMUL_CONTINUE; 3150 return X86EMUL_CONTINUE;
2539 } 3151 }
2540 3152
2541 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3153 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
3154
3155 if (gpa == UNMAPPED_GVA) {
3156 kvm_inject_page_fault(vcpu, addr, error_code);
3157 return X86EMUL_PROPAGATE_FAULT;
3158 }
2542 3159
2543 /* For APIC access vmexit */ 3160 /* For APIC access vmexit */
2544 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3161 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2545 goto mmio; 3162 goto mmio;
2546 3163
2547 if (kvm_read_guest_virt(addr, val, bytes, vcpu) 3164 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
2548 == X86EMUL_CONTINUE) 3165 == X86EMUL_CONTINUE)
2549 return X86EMUL_CONTINUE; 3166 return X86EMUL_CONTINUE;
2550 if (gpa == UNMAPPED_GVA)
2551 return X86EMUL_PROPAGATE_FAULT;
2552 3167
2553mmio: 3168mmio:
2554 /* 3169 /*
@@ -2587,11 +3202,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
2587 struct kvm_vcpu *vcpu) 3202 struct kvm_vcpu *vcpu)
2588{ 3203{
2589 gpa_t gpa; 3204 gpa_t gpa;
3205 u32 error_code;
2590 3206
2591 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3207 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
2592 3208
2593 if (gpa == UNMAPPED_GVA) { 3209 if (gpa == UNMAPPED_GVA) {
2594 kvm_inject_page_fault(vcpu, addr, 2); 3210 kvm_inject_page_fault(vcpu, addr, error_code);
2595 return X86EMUL_PROPAGATE_FAULT; 3211 return X86EMUL_PROPAGATE_FAULT;
2596 } 3212 }
2597 3213
@@ -2655,7 +3271,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
2655 char *kaddr; 3271 char *kaddr;
2656 u64 val; 3272 u64 val;
2657 3273
2658 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3274 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
2659 3275
2660 if (gpa == UNMAPPED_GVA || 3276 if (gpa == UNMAPPED_GVA ||
2661 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3277 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -2692,35 +3308,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2692 3308
2693int emulate_clts(struct kvm_vcpu *vcpu) 3309int emulate_clts(struct kvm_vcpu *vcpu)
2694{ 3310{
2695 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 3311 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
3312 kvm_x86_ops->fpu_activate(vcpu);
2696 return X86EMUL_CONTINUE; 3313 return X86EMUL_CONTINUE;
2697} 3314}
2698 3315
2699int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3316int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
2700{ 3317{
2701 struct kvm_vcpu *vcpu = ctxt->vcpu; 3318 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest);
2702
2703 switch (dr) {
2704 case 0 ... 3:
2705 *dest = kvm_x86_ops->get_dr(vcpu, dr);
2706 return X86EMUL_CONTINUE;
2707 default:
2708 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
2709 return X86EMUL_UNHANDLEABLE;
2710 }
2711} 3319}
2712 3320
2713int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3321int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2714{ 3322{
2715 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3323 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
2716 int exception;
2717 3324
2718 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); 3325 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask);
2719 if (exception) {
2720 /* FIXME: better handling */
2721 return X86EMUL_UNHANDLEABLE;
2722 }
2723 return X86EMUL_CONTINUE;
2724} 3326}
2725 3327
2726void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3328void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -2734,7 +3336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2734 3336
2735 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3337 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
2736 3338
2737 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); 3339 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
2738 3340
2739 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 3341 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
2740 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 3342 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2742,7 +3344,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2742EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3344EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
2743 3345
2744static struct x86_emulate_ops emulate_ops = { 3346static struct x86_emulate_ops emulate_ops = {
2745 .read_std = kvm_read_guest_virt, 3347 .read_std = kvm_read_guest_virt_system,
3348 .fetch = kvm_fetch_guest_virt,
2746 .read_emulated = emulator_read_emulated, 3349 .read_emulated = emulator_read_emulated,
2747 .write_emulated = emulator_write_emulated, 3350 .write_emulated = emulator_write_emulated,
2748 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3351 .cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2757,13 +3360,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
2757} 3360}
2758 3361
2759int emulate_instruction(struct kvm_vcpu *vcpu, 3362int emulate_instruction(struct kvm_vcpu *vcpu,
2760 struct kvm_run *run,
2761 unsigned long cr2, 3363 unsigned long cr2,
2762 u16 error_code, 3364 u16 error_code,
2763 int emulation_type) 3365 int emulation_type)
2764{ 3366{
2765 int r, shadow_mask; 3367 int r, shadow_mask;
2766 struct decode_cache *c; 3368 struct decode_cache *c;
3369 struct kvm_run *run = vcpu->run;
2767 3370
2768 kvm_clear_exception_queue(vcpu); 3371 kvm_clear_exception_queue(vcpu);
2769 vcpu->arch.mmio_fault_cr2 = cr2; 3372 vcpu->arch.mmio_fault_cr2 = cr2;
@@ -2783,10 +3386,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2783 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3386 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
2784 3387
2785 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3388 vcpu->arch.emulate_ctxt.vcpu = vcpu;
2786 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); 3389 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
2787 vcpu->arch.emulate_ctxt.mode = 3390 vcpu->arch.emulate_ctxt.mode =
3391 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
2788 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3392 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
2789 ? X86EMUL_MODE_REAL : cs_l 3393 ? X86EMUL_MODE_VM86 : cs_l
2790 ? X86EMUL_MODE_PROT64 : cs_db 3394 ? X86EMUL_MODE_PROT64 : cs_db
2791 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3395 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
2792 3396
@@ -2861,7 +3465,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2861 return EMULATE_DO_MMIO; 3465 return EMULATE_DO_MMIO;
2862 } 3466 }
2863 3467
2864 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 3468 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
2865 3469
2866 if (vcpu->mmio_is_write) { 3470 if (vcpu->mmio_is_write) {
2867 vcpu->mmio_needed = 0; 3471 vcpu->mmio_needed = 0;
@@ -2878,12 +3482,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
2878 gva_t q = vcpu->arch.pio.guest_gva; 3482 gva_t q = vcpu->arch.pio.guest_gva;
2879 unsigned bytes; 3483 unsigned bytes;
2880 int ret; 3484 int ret;
3485 u32 error_code;
2881 3486
2882 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 3487 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
2883 if (vcpu->arch.pio.in) 3488 if (vcpu->arch.pio.in)
2884 ret = kvm_write_guest_virt(q, p, bytes, vcpu); 3489 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
2885 else 3490 else
2886 ret = kvm_read_guest_virt(q, p, bytes, vcpu); 3491 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3492
3493 if (ret == X86EMUL_PROPAGATE_FAULT)
3494 kvm_inject_page_fault(vcpu, q, error_code);
3495
2887 return ret; 3496 return ret;
2888} 3497}
2889 3498
@@ -2904,7 +3513,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
2904 if (io->in) { 3513 if (io->in) {
2905 r = pio_copy_data(vcpu); 3514 r = pio_copy_data(vcpu);
2906 if (r) 3515 if (r)
2907 return r; 3516 goto out;
2908 } 3517 }
2909 3518
2910 delta = 1; 3519 delta = 1;
@@ -2931,7 +3540,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
2931 kvm_register_write(vcpu, VCPU_REGS_RSI, val); 3540 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
2932 } 3541 }
2933 } 3542 }
2934 3543out:
2935 io->count -= io->cur_count; 3544 io->count -= io->cur_count;
2936 io->cur_count = 0; 3545 io->cur_count = 0;
2937 3546
@@ -2944,11 +3553,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
2944 int r; 3553 int r;
2945 3554
2946 if (vcpu->arch.pio.in) 3555 if (vcpu->arch.pio.in)
2947 r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3556 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
2948 vcpu->arch.pio.size, pd); 3557 vcpu->arch.pio.size, pd);
2949 else 3558 else
2950 r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3559 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
2951 vcpu->arch.pio.size, pd); 3560 vcpu->arch.pio.port, vcpu->arch.pio.size,
3561 pd);
2952 return r; 3562 return r;
2953} 3563}
2954 3564
@@ -2959,7 +3569,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
2959 int i, r = 0; 3569 int i, r = 0;
2960 3570
2961 for (i = 0; i < io->cur_count; i++) { 3571 for (i = 0; i < io->cur_count; i++) {
2962 if (kvm_io_bus_write(&vcpu->kvm->pio_bus, 3572 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
2963 io->port, io->size, pd)) { 3573 io->port, io->size, pd)) {
2964 r = -EOPNOTSUPP; 3574 r = -EOPNOTSUPP;
2965 break; 3575 break;
@@ -2969,11 +3579,12 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
2969 return r; 3579 return r;
2970} 3580}
2971 3581
2972int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 3582int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
2973 int size, unsigned port)
2974{ 3583{
2975 unsigned long val; 3584 unsigned long val;
2976 3585
3586 trace_kvm_pio(!in, port, size, 1);
3587
2977 vcpu->run->exit_reason = KVM_EXIT_IO; 3588 vcpu->run->exit_reason = KVM_EXIT_IO;
2978 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3589 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
2979 vcpu->run->io.size = vcpu->arch.pio.size = size; 3590 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -2985,11 +3596,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2985 vcpu->arch.pio.down = 0; 3596 vcpu->arch.pio.down = 0;
2986 vcpu->arch.pio.rep = 0; 3597 vcpu->arch.pio.rep = 0;
2987 3598
2988 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, 3599 if (!vcpu->arch.pio.in) {
2989 size, 1); 3600 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2990 3601 memcpy(vcpu->arch.pio_data, &val, 4);
2991 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3602 }
2992 memcpy(vcpu->arch.pio_data, &val, 4);
2993 3603
2994 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3604 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
2995 complete_pio(vcpu); 3605 complete_pio(vcpu);
@@ -2999,13 +3609,15 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2999} 3609}
3000EXPORT_SYMBOL_GPL(kvm_emulate_pio); 3610EXPORT_SYMBOL_GPL(kvm_emulate_pio);
3001 3611
3002int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 3612int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3003 int size, unsigned long count, int down, 3613 int size, unsigned long count, int down,
3004 gva_t address, int rep, unsigned port) 3614 gva_t address, int rep, unsigned port)
3005{ 3615{
3006 unsigned now, in_page; 3616 unsigned now, in_page;
3007 int ret = 0; 3617 int ret = 0;
3008 3618
3619 trace_kvm_pio(!in, port, size, count);
3620
3009 vcpu->run->exit_reason = KVM_EXIT_IO; 3621 vcpu->run->exit_reason = KVM_EXIT_IO;
3010 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3622 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3011 vcpu->run->io.size = vcpu->arch.pio.size = size; 3623 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3017,9 +3629,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
3017 vcpu->arch.pio.down = down; 3629 vcpu->arch.pio.down = down;
3018 vcpu->arch.pio.rep = rep; 3630 vcpu->arch.pio.rep = rep;
3019 3631
3020 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
3021 size, count);
3022
3023 if (!count) { 3632 if (!count) {
3024 kvm_x86_ops->skip_emulated_instruction(vcpu); 3633 kvm_x86_ops->skip_emulated_instruction(vcpu);
3025 return 1; 3634 return 1;
@@ -3051,10 +3660,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
3051 if (!vcpu->arch.pio.in) { 3660 if (!vcpu->arch.pio.in) {
3052 /* string PIO write */ 3661 /* string PIO write */
3053 ret = pio_copy_data(vcpu); 3662 ret = pio_copy_data(vcpu);
3054 if (ret == X86EMUL_PROPAGATE_FAULT) { 3663 if (ret == X86EMUL_PROPAGATE_FAULT)
3055 kvm_inject_gp(vcpu, 0);
3056 return 1; 3664 return 1;
3057 }
3058 if (ret == 0 && !pio_string_write(vcpu)) { 3665 if (ret == 0 && !pio_string_write(vcpu)) {
3059 complete_pio(vcpu); 3666 complete_pio(vcpu);
3060 if (vcpu->arch.pio.count == 0) 3667 if (vcpu->arch.pio.count == 0)
@@ -3072,9 +3679,6 @@ static void bounce_off(void *info)
3072 /* nothing */ 3679 /* nothing */
3073} 3680}
3074 3681
3075static unsigned int ref_freq;
3076static unsigned long tsc_khz_ref;
3077
3078static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 3682static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
3079 void *data) 3683 void *data)
3080{ 3684{
@@ -3083,14 +3687,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
3083 struct kvm_vcpu *vcpu; 3687 struct kvm_vcpu *vcpu;
3084 int i, send_ipi = 0; 3688 int i, send_ipi = 0;
3085 3689
3086 if (!ref_freq)
3087 ref_freq = freq->old;
3088
3089 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) 3690 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
3090 return 0; 3691 return 0;
3091 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) 3692 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
3092 return 0; 3693 return 0;
3093 per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 3694 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
3094 3695
3095 spin_lock(&kvm_lock); 3696 spin_lock(&kvm_lock);
3096 list_for_each_entry(kvm, &vm_list, vm_list) { 3697 list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3127,9 +3728,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
3127 .notifier_call = kvmclock_cpufreq_notifier 3728 .notifier_call = kvmclock_cpufreq_notifier
3128}; 3729};
3129 3730
3731static void kvm_timer_init(void)
3732{
3733 int cpu;
3734
3735 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3736 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
3737 CPUFREQ_TRANSITION_NOTIFIER);
3738 for_each_online_cpu(cpu) {
3739 unsigned long khz = cpufreq_get(cpu);
3740 if (!khz)
3741 khz = tsc_khz;
3742 per_cpu(cpu_tsc_khz, cpu) = khz;
3743 }
3744 } else {
3745 for_each_possible_cpu(cpu)
3746 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
3747 }
3748}
3749
3130int kvm_arch_init(void *opaque) 3750int kvm_arch_init(void *opaque)
3131{ 3751{
3132 int r, cpu; 3752 int r;
3133 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 3753 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
3134 3754
3135 if (kvm_x86_ops) { 3755 if (kvm_x86_ops) {
@@ -3161,13 +3781,7 @@ int kvm_arch_init(void *opaque)
3161 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 3781 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
3162 PT_DIRTY_MASK, PT64_NX_MASK, 0); 3782 PT_DIRTY_MASK, PT64_NX_MASK, 0);
3163 3783
3164 for_each_possible_cpu(cpu) 3784 kvm_timer_init();
3165 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
3166 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3167 tsc_khz_ref = tsc_khz;
3168 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
3169 CPUFREQ_TRANSITION_NOTIFIER);
3170 }
3171 3785
3172 return 0; 3786 return 0;
3173 3787
@@ -3206,11 +3820,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
3206 return a0 | ((gpa_t)a1 << 32); 3820 return a0 | ((gpa_t)a1 << 32);
3207} 3821}
3208 3822
3823int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
3824{
3825 u64 param, ingpa, outgpa, ret;
3826 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
3827 bool fast, longmode;
3828 int cs_db, cs_l;
3829
3830 /*
3831 * hypercall generates UD from non zero cpl and real mode
3832 * per HYPER-V spec
3833 */
3834 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
3835 kvm_queue_exception(vcpu, UD_VECTOR);
3836 return 0;
3837 }
3838
3839 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3840 longmode = is_long_mode(vcpu) && cs_l == 1;
3841
3842 if (!longmode) {
3843 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
3844 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
3845 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
3846 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
3847 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
3848 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
3849 }
3850#ifdef CONFIG_X86_64
3851 else {
3852 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
3853 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
3854 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
3855 }
3856#endif
3857
3858 code = param & 0xffff;
3859 fast = (param >> 16) & 0x1;
3860 rep_cnt = (param >> 32) & 0xfff;
3861 rep_idx = (param >> 48) & 0xfff;
3862
3863 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
3864
3865 switch (code) {
3866 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
3867 kvm_vcpu_on_spin(vcpu);
3868 break;
3869 default:
3870 res = HV_STATUS_INVALID_HYPERCALL_CODE;
3871 break;
3872 }
3873
3874 ret = res | (((u64)rep_done & 0xfff) << 32);
3875 if (longmode) {
3876 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
3877 } else {
3878 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
3879 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
3880 }
3881
3882 return 1;
3883}
3884
3209int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 3885int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
3210{ 3886{
3211 unsigned long nr, a0, a1, a2, a3, ret; 3887 unsigned long nr, a0, a1, a2, a3, ret;
3212 int r = 1; 3888 int r = 1;
3213 3889
3890 if (kvm_hv_hypercall_enabled(vcpu->kvm))
3891 return kvm_hv_hypercall(vcpu);
3892
3214 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 3893 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
3215 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); 3894 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
3216 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); 3895 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -3253,10 +3932,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
3253int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 3932int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3254{ 3933{
3255 char instruction[3]; 3934 char instruction[3];
3256 int ret = 0;
3257 unsigned long rip = kvm_rip_read(vcpu); 3935 unsigned long rip = kvm_rip_read(vcpu);
3258 3936
3259
3260 /* 3937 /*
3261 * Blow out the MMU to ensure that no other VCPU has an active mapping 3938 * Blow out the MMU to ensure that no other VCPU has an active mapping
3262 * to ensure that the updated hypercall appears atomically across all 3939 * to ensure that the updated hypercall appears atomically across all
@@ -3265,11 +3942,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3265 kvm_mmu_zap_all(vcpu->kvm); 3942 kvm_mmu_zap_all(vcpu->kvm);
3266 3943
3267 kvm_x86_ops->patch_hypercall(vcpu, instruction); 3944 kvm_x86_ops->patch_hypercall(vcpu, instruction);
3268 if (emulator_write_emulated(rip, instruction, 3, vcpu)
3269 != X86EMUL_CONTINUE)
3270 ret = -EFAULT;
3271 3945
3272 return ret; 3946 return emulator_write_emulated(rip, instruction, 3, vcpu);
3273} 3947}
3274 3948
3275static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3949static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -3295,17 +3969,16 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
3295 unsigned long *rflags) 3969 unsigned long *rflags)
3296{ 3970{
3297 kvm_lmsw(vcpu, msw); 3971 kvm_lmsw(vcpu, msw);
3298 *rflags = kvm_x86_ops->get_rflags(vcpu); 3972 *rflags = kvm_get_rflags(vcpu);
3299} 3973}
3300 3974
3301unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 3975unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3302{ 3976{
3303 unsigned long value; 3977 unsigned long value;
3304 3978
3305 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
3306 switch (cr) { 3979 switch (cr) {
3307 case 0: 3980 case 0:
3308 value = vcpu->arch.cr0; 3981 value = kvm_read_cr0(vcpu);
3309 break; 3982 break;
3310 case 2: 3983 case 2:
3311 value = vcpu->arch.cr2; 3984 value = vcpu->arch.cr2;
@@ -3314,7 +3987,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3314 value = vcpu->arch.cr3; 3987 value = vcpu->arch.cr3;
3315 break; 3988 break;
3316 case 4: 3989 case 4:
3317 value = vcpu->arch.cr4; 3990 value = kvm_read_cr4(vcpu);
3318 break; 3991 break;
3319 case 8: 3992 case 8:
3320 value = kvm_get_cr8(vcpu); 3993 value = kvm_get_cr8(vcpu);
@@ -3332,8 +4005,8 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3332{ 4005{
3333 switch (cr) { 4006 switch (cr) {
3334 case 0: 4007 case 0:
3335 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 4008 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3336 *rflags = kvm_x86_ops->get_rflags(vcpu); 4009 *rflags = kvm_get_rflags(vcpu);
3337 break; 4010 break;
3338 case 2: 4011 case 2:
3339 vcpu->arch.cr2 = val; 4012 vcpu->arch.cr2 = val;
@@ -3342,7 +4015,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3342 kvm_set_cr3(vcpu, val); 4015 kvm_set_cr3(vcpu, val);
3343 break; 4016 break;
3344 case 4: 4017 case 4:
3345 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 4018 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3346 break; 4019 break;
3347 case 8: 4020 case 8:
3348 kvm_set_cr8(vcpu, val & 0xfUL); 4021 kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3409,6 +4082,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
3409 } 4082 }
3410 return best; 4083 return best;
3411} 4084}
4085EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
3412 4086
3413int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) 4087int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
3414{ 4088{
@@ -3453,18 +4127,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
3453 * 4127 *
3454 * No need to exit to userspace if we already have an interrupt queued. 4128 * No need to exit to userspace if we already have an interrupt queued.
3455 */ 4129 */
3456static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, 4130static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
3457 struct kvm_run *kvm_run)
3458{ 4131{
3459 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && 4132 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
3460 kvm_run->request_interrupt_window && 4133 vcpu->run->request_interrupt_window &&
3461 kvm_arch_interrupt_allowed(vcpu)); 4134 kvm_arch_interrupt_allowed(vcpu));
3462} 4135}
3463 4136
3464static void post_kvm_run_save(struct kvm_vcpu *vcpu, 4137static void post_kvm_run_save(struct kvm_vcpu *vcpu)
3465 struct kvm_run *kvm_run)
3466{ 4138{
3467 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; 4139 struct kvm_run *kvm_run = vcpu->run;
4140
4141 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
3468 kvm_run->cr8 = kvm_get_cr8(vcpu); 4142 kvm_run->cr8 = kvm_get_cr8(vcpu);
3469 kvm_run->apic_base = kvm_get_apic_base(vcpu); 4143 kvm_run->apic_base = kvm_get_apic_base(vcpu);
3470 if (irqchip_in_kernel(vcpu->kvm)) 4144 if (irqchip_in_kernel(vcpu->kvm))
@@ -3492,14 +4166,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
3492static void vapic_exit(struct kvm_vcpu *vcpu) 4166static void vapic_exit(struct kvm_vcpu *vcpu)
3493{ 4167{
3494 struct kvm_lapic *apic = vcpu->arch.apic; 4168 struct kvm_lapic *apic = vcpu->arch.apic;
4169 int idx;
3495 4170
3496 if (!apic || !apic->vapic_addr) 4171 if (!apic || !apic->vapic_addr)
3497 return; 4172 return;
3498 4173
3499 down_read(&vcpu->kvm->slots_lock); 4174 idx = srcu_read_lock(&vcpu->kvm->srcu);
3500 kvm_release_page_dirty(apic->vapic_page); 4175 kvm_release_page_dirty(apic->vapic_page);
3501 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4176 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
3502 up_read(&vcpu->kvm->slots_lock); 4177 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3503} 4178}
3504 4179
3505static void update_cr8_intercept(struct kvm_vcpu *vcpu) 4180static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -3525,7 +4200,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
3525 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); 4200 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
3526} 4201}
3527 4202
3528static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4203static void inject_pending_event(struct kvm_vcpu *vcpu)
3529{ 4204{
3530 /* try to reinject previous events if any */ 4205 /* try to reinject previous events if any */
3531 if (vcpu->arch.exception.pending) { 4206 if (vcpu->arch.exception.pending) {
@@ -3561,11 +4236,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3561 } 4236 }
3562} 4237}
3563 4238
3564static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4239static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3565{ 4240{
3566 int r; 4241 int r;
3567 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 4242 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
3568 kvm_run->request_interrupt_window; 4243 vcpu->run->request_interrupt_window;
3569 4244
3570 if (vcpu->requests) 4245 if (vcpu->requests)
3571 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 4246 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3586,21 +4261,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3586 kvm_x86_ops->tlb_flush(vcpu); 4261 kvm_x86_ops->tlb_flush(vcpu);
3587 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 4262 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
3588 &vcpu->requests)) { 4263 &vcpu->requests)) {
3589 kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; 4264 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
3590 r = 0; 4265 r = 0;
3591 goto out; 4266 goto out;
3592 } 4267 }
3593 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { 4268 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
3594 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 4269 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
3595 r = 0; 4270 r = 0;
3596 goto out; 4271 goto out;
3597 } 4272 }
4273 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
4274 vcpu->fpu_active = 0;
4275 kvm_x86_ops->fpu_deactivate(vcpu);
4276 }
3598 } 4277 }
3599 4278
3600 preempt_disable(); 4279 preempt_disable();
3601 4280
3602 kvm_x86_ops->prepare_guest_switch(vcpu); 4281 kvm_x86_ops->prepare_guest_switch(vcpu);
3603 kvm_load_guest_fpu(vcpu); 4282 if (vcpu->fpu_active)
4283 kvm_load_guest_fpu(vcpu);
3604 4284
3605 local_irq_disable(); 4285 local_irq_disable();
3606 4286
@@ -3615,7 +4295,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3615 goto out; 4295 goto out;
3616 } 4296 }
3617 4297
3618 inject_pending_event(vcpu, kvm_run); 4298 inject_pending_event(vcpu);
3619 4299
3620 /* enable NMI/IRQ window open exits if needed */ 4300 /* enable NMI/IRQ window open exits if needed */
3621 if (vcpu->arch.nmi_pending) 4301 if (vcpu->arch.nmi_pending)
@@ -3628,7 +4308,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3628 kvm_lapic_sync_to_vapic(vcpu); 4308 kvm_lapic_sync_to_vapic(vcpu);
3629 } 4309 }
3630 4310
3631 up_read(&vcpu->kvm->slots_lock); 4311 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3632 4312
3633 kvm_guest_enter(); 4313 kvm_guest_enter();
3634 4314
@@ -3641,16 +4321,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3641 } 4321 }
3642 4322
3643 trace_kvm_entry(vcpu->vcpu_id); 4323 trace_kvm_entry(vcpu->vcpu_id);
3644 kvm_x86_ops->run(vcpu, kvm_run); 4324 kvm_x86_ops->run(vcpu);
3645 4325
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 4326 /*
3647 set_debugreg(current->thread.debugreg0, 0); 4327 * If the guest has used debug registers, at least dr7
3648 set_debugreg(current->thread.debugreg1, 1); 4328 * will be disabled while returning to the host.
3649 set_debugreg(current->thread.debugreg2, 2); 4329 * If we don't have active breakpoints in the host, we don't
3650 set_debugreg(current->thread.debugreg3, 3); 4330 * care about the messed up debug address registers. But if
3651 set_debugreg(current->thread.debugreg6, 6); 4331 * we have some of them active, restore the old state.
3652 set_debugreg(current->thread.debugreg7, 7); 4332 */
3653 } 4333 if (hw_breakpoint_active())
4334 hw_breakpoint_restore();
3654 4335
3655 set_bit(KVM_REQ_KICK, &vcpu->requests); 4336 set_bit(KVM_REQ_KICK, &vcpu->requests);
3656 local_irq_enable(); 4337 local_irq_enable();
@@ -3669,7 +4350,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3669 4350
3670 preempt_enable(); 4351 preempt_enable();
3671 4352
3672 down_read(&vcpu->kvm->slots_lock); 4353 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3673 4354
3674 /* 4355 /*
3675 * Profile KVM exit RIPs: 4356 * Profile KVM exit RIPs:
@@ -3682,15 +4363,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3682 4363
3683 kvm_lapic_sync_from_vapic(vcpu); 4364 kvm_lapic_sync_from_vapic(vcpu);
3684 4365
3685 r = kvm_x86_ops->handle_exit(kvm_run, vcpu); 4366 r = kvm_x86_ops->handle_exit(vcpu);
3686out: 4367out:
3687 return r; 4368 return r;
3688} 4369}
3689 4370
3690 4371
3691static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4372static int __vcpu_run(struct kvm_vcpu *vcpu)
3692{ 4373{
3693 int r; 4374 int r;
4375 struct kvm *kvm = vcpu->kvm;
3694 4376
3695 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { 4377 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
3696 pr_debug("vcpu %d received sipi with vector # %x\n", 4378 pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -3702,17 +4384,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3702 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4384 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3703 } 4385 }
3704 4386
3705 down_read(&vcpu->kvm->slots_lock); 4387 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3706 vapic_enter(vcpu); 4388 vapic_enter(vcpu);
3707 4389
3708 r = 1; 4390 r = 1;
3709 while (r > 0) { 4391 while (r > 0) {
3710 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 4392 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
3711 r = vcpu_enter_guest(vcpu, kvm_run); 4393 r = vcpu_enter_guest(vcpu);
3712 else { 4394 else {
3713 up_read(&vcpu->kvm->slots_lock); 4395 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
3714 kvm_vcpu_block(vcpu); 4396 kvm_vcpu_block(vcpu);
3715 down_read(&vcpu->kvm->slots_lock); 4397 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3716 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4398 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
3717 { 4399 {
3718 switch(vcpu->arch.mp_state) { 4400 switch(vcpu->arch.mp_state) {
@@ -3736,25 +4418,25 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3736 if (kvm_cpu_has_pending_timer(vcpu)) 4418 if (kvm_cpu_has_pending_timer(vcpu))
3737 kvm_inject_pending_timer_irqs(vcpu); 4419 kvm_inject_pending_timer_irqs(vcpu);
3738 4420
3739 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 4421 if (dm_request_for_irq_injection(vcpu)) {
3740 r = -EINTR; 4422 r = -EINTR;
3741 kvm_run->exit_reason = KVM_EXIT_INTR; 4423 vcpu->run->exit_reason = KVM_EXIT_INTR;
3742 ++vcpu->stat.request_irq_exits; 4424 ++vcpu->stat.request_irq_exits;
3743 } 4425 }
3744 if (signal_pending(current)) { 4426 if (signal_pending(current)) {
3745 r = -EINTR; 4427 r = -EINTR;
3746 kvm_run->exit_reason = KVM_EXIT_INTR; 4428 vcpu->run->exit_reason = KVM_EXIT_INTR;
3747 ++vcpu->stat.signal_exits; 4429 ++vcpu->stat.signal_exits;
3748 } 4430 }
3749 if (need_resched()) { 4431 if (need_resched()) {
3750 up_read(&vcpu->kvm->slots_lock); 4432 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
3751 kvm_resched(vcpu); 4433 kvm_resched(vcpu);
3752 down_read(&vcpu->kvm->slots_lock); 4434 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3753 } 4435 }
3754 } 4436 }
3755 4437
3756 up_read(&vcpu->kvm->slots_lock); 4438 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
3757 post_kvm_run_save(vcpu, kvm_run); 4439 post_kvm_run_save(vcpu);
3758 4440
3759 vapic_exit(vcpu); 4441 vapic_exit(vcpu);
3760 4442
@@ -3783,21 +4465,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3783 kvm_set_cr8(vcpu, kvm_run->cr8); 4465 kvm_set_cr8(vcpu, kvm_run->cr8);
3784 4466
3785 if (vcpu->arch.pio.cur_count) { 4467 if (vcpu->arch.pio.cur_count) {
4468 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3786 r = complete_pio(vcpu); 4469 r = complete_pio(vcpu);
4470 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3787 if (r) 4471 if (r)
3788 goto out; 4472 goto out;
3789 } 4473 }
3790#if CONFIG_HAS_IOMEM
3791 if (vcpu->mmio_needed) { 4474 if (vcpu->mmio_needed) {
3792 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 4475 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
3793 vcpu->mmio_read_completed = 1; 4476 vcpu->mmio_read_completed = 1;
3794 vcpu->mmio_needed = 0; 4477 vcpu->mmio_needed = 0;
3795 4478
3796 down_read(&vcpu->kvm->slots_lock); 4479 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3797 r = emulate_instruction(vcpu, kvm_run, 4480 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
3798 vcpu->arch.mmio_fault_cr2, 0,
3799 EMULTYPE_NO_DECODE); 4481 EMULTYPE_NO_DECODE);
3800 up_read(&vcpu->kvm->slots_lock); 4482 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3801 if (r == EMULATE_DO_MMIO) { 4483 if (r == EMULATE_DO_MMIO) {
3802 /* 4484 /*
3803 * Read-modify-write. Back to userspace. 4485 * Read-modify-write. Back to userspace.
@@ -3806,12 +4488,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3806 goto out; 4488 goto out;
3807 } 4489 }
3808 } 4490 }
3809#endif
3810 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) 4491 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
3811 kvm_register_write(vcpu, VCPU_REGS_RAX, 4492 kvm_register_write(vcpu, VCPU_REGS_RAX,
3812 kvm_run->hypercall.ret); 4493 kvm_run->hypercall.ret);
3813 4494
3814 r = __vcpu_run(vcpu, kvm_run); 4495 r = __vcpu_run(vcpu);
3815 4496
3816out: 4497out:
3817 if (vcpu->sigset_active) 4498 if (vcpu->sigset_active)
@@ -3845,13 +4526,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3845#endif 4526#endif
3846 4527
3847 regs->rip = kvm_rip_read(vcpu); 4528 regs->rip = kvm_rip_read(vcpu);
3848 regs->rflags = kvm_x86_ops->get_rflags(vcpu); 4529 regs->rflags = kvm_get_rflags(vcpu);
3849
3850 /*
3851 * Don't leak debug flags in case they were set for guest debugging
3852 */
3853 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3854 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
3855 4530
3856 vcpu_put(vcpu); 4531 vcpu_put(vcpu);
3857 4532
@@ -3879,12 +4554,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3879 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); 4554 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
3880 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); 4555 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
3881 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); 4556 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
3882
3883#endif 4557#endif
3884 4558
3885 kvm_rip_write(vcpu, regs->rip); 4559 kvm_rip_write(vcpu, regs->rip);
3886 kvm_x86_ops->set_rflags(vcpu, regs->rflags); 4560 kvm_set_rflags(vcpu, regs->rflags);
3887
3888 4561
3889 vcpu->arch.exception.pending = false; 4562 vcpu->arch.exception.pending = false;
3890 4563
@@ -3933,13 +4606,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3933 sregs->gdt.limit = dt.limit; 4606 sregs->gdt.limit = dt.limit;
3934 sregs->gdt.base = dt.base; 4607 sregs->gdt.base = dt.base;
3935 4608
3936 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 4609 sregs->cr0 = kvm_read_cr0(vcpu);
3937 sregs->cr0 = vcpu->arch.cr0;
3938 sregs->cr2 = vcpu->arch.cr2; 4610 sregs->cr2 = vcpu->arch.cr2;
3939 sregs->cr3 = vcpu->arch.cr3; 4611 sregs->cr3 = vcpu->arch.cr3;
3940 sregs->cr4 = vcpu->arch.cr4; 4612 sregs->cr4 = kvm_read_cr4(vcpu);
3941 sregs->cr8 = kvm_get_cr8(vcpu); 4613 sregs->cr8 = kvm_get_cr8(vcpu);
3942 sregs->efer = vcpu->arch.shadow_efer; 4614 sregs->efer = vcpu->arch.efer;
3943 sregs->apic_base = kvm_get_apic_base(vcpu); 4615 sregs->apic_base = kvm_get_apic_base(vcpu);
3944 4616
3945 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); 4617 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
@@ -4027,14 +4699,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4027{ 4699{
4028 struct descriptor_table dtable; 4700 struct descriptor_table dtable;
4029 u16 index = selector >> 3; 4701 u16 index = selector >> 3;
4702 int ret;
4703 u32 err;
4704 gva_t addr;
4030 4705
4031 get_segment_descriptor_dtable(vcpu, selector, &dtable); 4706 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4032 4707
4033 if (dtable.limit < index * 8 + 7) { 4708 if (dtable.limit < index * 8 + 7) {
4034 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4709 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4035 return 1; 4710 return X86EMUL_PROPAGATE_FAULT;
4036 } 4711 }
4037 return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4712 addr = dtable.base + index * 8;
4713 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4714 vcpu, &err);
4715 if (ret == X86EMUL_PROPAGATE_FAULT)
4716 kvm_inject_page_fault(vcpu, addr, err);
4717
4718 return ret;
4038} 4719}
4039 4720
4040/* allowed just for 8 bytes segments */ 4721/* allowed just for 8 bytes segments */
@@ -4048,15 +4729,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4048 4729
4049 if (dtable.limit < index * 8 + 7) 4730 if (dtable.limit < index * 8 + 7)
4050 return 1; 4731 return 1;
4051 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4732 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4733}
4734
4735static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4736 struct desc_struct *seg_desc)
4737{
4738 u32 base_addr = get_desc_base(seg_desc);
4739
4740 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4052} 4741}
4053 4742
4054static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, 4743static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4055 struct desc_struct *seg_desc) 4744 struct desc_struct *seg_desc)
4056{ 4745{
4057 u32 base_addr = get_desc_base(seg_desc); 4746 u32 base_addr = get_desc_base(seg_desc);
4058 4747
4059 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); 4748 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4060} 4749}
4061 4750
4062static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 4751static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -4067,18 +4756,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4067 return kvm_seg.selector; 4756 return kvm_seg.selector;
4068} 4757}
4069 4758
4070static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
4071 u16 selector,
4072 struct kvm_segment *kvm_seg)
4073{
4074 struct desc_struct seg_desc;
4075
4076 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
4077 return 1;
4078 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
4079 return 0;
4080}
4081
4082static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) 4759static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4083{ 4760{
4084 struct kvm_segment segvar = { 4761 struct kvm_segment segvar = {
@@ -4096,34 +4773,122 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
4096 .unusable = 0, 4773 .unusable = 0,
4097 }; 4774 };
4098 kvm_x86_ops->set_segment(vcpu, &segvar, seg); 4775 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4099 return 0; 4776 return X86EMUL_CONTINUE;
4100} 4777}
4101 4778
4102static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) 4779static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4103{ 4780{
4104 return (seg != VCPU_SREG_LDTR) && 4781 return (seg != VCPU_SREG_LDTR) &&
4105 (seg != VCPU_SREG_TR) && 4782 (seg != VCPU_SREG_TR) &&
4106 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM); 4783 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4107} 4784}
4108 4785
4109int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4786int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4110 int type_bits, int seg)
4111{ 4787{
4112 struct kvm_segment kvm_seg; 4788 struct kvm_segment kvm_seg;
4789 struct desc_struct seg_desc;
4790 u8 dpl, rpl, cpl;
4791 unsigned err_vec = GP_VECTOR;
4792 u32 err_code = 0;
4793 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4794 int ret;
4113 4795
4114 if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) 4796 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
4115 return kvm_load_realmode_segment(vcpu, selector, seg); 4797 return kvm_load_realmode_segment(vcpu, selector, seg);
4116 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
4117 return 1;
4118 kvm_seg.type |= type_bits;
4119 4798
4120 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && 4799 /* NULL selector is not valid for TR, CS and SS */
4121 seg != VCPU_SREG_LDTR) 4800 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
4122 if (!kvm_seg.s) 4801 && null_selector)
4123 kvm_seg.unusable = 1; 4802 goto exception;
4803
4804 /* TR should be in GDT only */
4805 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
4806 goto exception;
4807
4808 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4809 if (ret)
4810 return ret;
4811
4812 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4813
4814 if (null_selector) { /* for NULL selector skip all following checks */
4815 kvm_seg.unusable = 1;
4816 goto load;
4817 }
4818
4819 err_code = selector & 0xfffc;
4820 err_vec = GP_VECTOR;
4124 4821
4822 /* can't load system descriptor into segment selecor */
4823 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4824 goto exception;
4825
4826 if (!kvm_seg.present) {
4827 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4828 goto exception;
4829 }
4830
4831 rpl = selector & 3;
4832 dpl = kvm_seg.dpl;
4833 cpl = kvm_x86_ops->get_cpl(vcpu);
4834
4835 switch (seg) {
4836 case VCPU_SREG_SS:
4837 /*
4838 * segment is not a writable data segment or segment
4839 * selector's RPL != CPL or segment selector's RPL != CPL
4840 */
4841 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4842 goto exception;
4843 break;
4844 case VCPU_SREG_CS:
4845 if (!(kvm_seg.type & 8))
4846 goto exception;
4847
4848 if (kvm_seg.type & 4) {
4849 /* conforming */
4850 if (dpl > cpl)
4851 goto exception;
4852 } else {
4853 /* nonconforming */
4854 if (rpl > cpl || dpl != cpl)
4855 goto exception;
4856 }
4857 /* CS(RPL) <- CPL */
4858 selector = (selector & 0xfffc) | cpl;
4859 break;
4860 case VCPU_SREG_TR:
4861 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4862 goto exception;
4863 break;
4864 case VCPU_SREG_LDTR:
4865 if (kvm_seg.s || kvm_seg.type != 2)
4866 goto exception;
4867 break;
4868 default: /* DS, ES, FS, or GS */
4869 /*
4870 * segment is not a data or readable code segment or
4871 * ((segment is a data or nonconforming code segment)
4872 * and (both RPL and CPL > DPL))
4873 */
4874 if ((kvm_seg.type & 0xa) == 0x8 ||
4875 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4876 goto exception;
4877 break;
4878 }
4879
4880 if (!kvm_seg.unusable && kvm_seg.s) {
4881 /* mark segment as accessed */
4882 kvm_seg.type |= 1;
4883 seg_desc.type |= 1;
4884 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4885 }
4886load:
4125 kvm_set_segment(vcpu, &kvm_seg, seg); 4887 kvm_set_segment(vcpu, &kvm_seg, seg);
4126 return 0; 4888 return X86EMUL_CONTINUE;
4889exception:
4890 kvm_queue_exception_e(vcpu, err_vec, err_code);
4891 return X86EMUL_PROPAGATE_FAULT;
4127} 4892}
4128 4893
4129static void save_state_to_tss32(struct kvm_vcpu *vcpu, 4894static void save_state_to_tss32(struct kvm_vcpu *vcpu,
@@ -4131,7 +4896,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4131{ 4896{
4132 tss->cr3 = vcpu->arch.cr3; 4897 tss->cr3 = vcpu->arch.cr3;
4133 tss->eip = kvm_rip_read(vcpu); 4898 tss->eip = kvm_rip_read(vcpu);
4134 tss->eflags = kvm_x86_ops->get_rflags(vcpu); 4899 tss->eflags = kvm_get_rflags(vcpu);
4135 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4900 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4136 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4901 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4137 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4902 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4149,13 +4914,21 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4149 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4914 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4150} 4915}
4151 4916
4917static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4918{
4919 struct kvm_segment kvm_seg;
4920 kvm_get_segment(vcpu, &kvm_seg, seg);
4921 kvm_seg.selector = sel;
4922 kvm_set_segment(vcpu, &kvm_seg, seg);
4923}
4924
4152static int load_state_from_tss32(struct kvm_vcpu *vcpu, 4925static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4153 struct tss_segment_32 *tss) 4926 struct tss_segment_32 *tss)
4154{ 4927{
4155 kvm_set_cr3(vcpu, tss->cr3); 4928 kvm_set_cr3(vcpu, tss->cr3);
4156 4929
4157 kvm_rip_write(vcpu, tss->eip); 4930 kvm_rip_write(vcpu, tss->eip);
4158 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); 4931 kvm_set_rflags(vcpu, tss->eflags | 2);
4159 4932
4160 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); 4933 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
4161 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); 4934 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
@@ -4166,25 +4939,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4166 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); 4939 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
4167 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); 4940 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
4168 4941
4169 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 4942 /*
4943 * SDM says that segment selectors are loaded before segment
4944 * descriptors
4945 */
4946 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
4947 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
4948 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
4949 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
4950 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
4951 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
4952 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
4953
4954 /*
4955 * Now load segment descriptors. If fault happenes at this stage
4956 * it is handled in a context of new task
4957 */
4958 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
4170 return 1; 4959 return 1;
4171 4960
4172 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 4961 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4173 return 1; 4962 return 1;
4174 4963
4175 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 4964 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4176 return 1; 4965 return 1;
4177 4966
4178 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 4967 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4179 return 1; 4968 return 1;
4180 4969
4181 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 4970 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4182 return 1; 4971 return 1;
4183 4972
4184 if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) 4973 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
4185 return 1; 4974 return 1;
4186 4975
4187 if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) 4976 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
4188 return 1; 4977 return 1;
4189 return 0; 4978 return 0;
4190} 4979}
@@ -4193,7 +4982,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
4193 struct tss_segment_16 *tss) 4982 struct tss_segment_16 *tss)
4194{ 4983{
4195 tss->ip = kvm_rip_read(vcpu); 4984 tss->ip = kvm_rip_read(vcpu);
4196 tss->flag = kvm_x86_ops->get_rflags(vcpu); 4985 tss->flag = kvm_get_rflags(vcpu);
4197 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4986 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4198 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4987 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4199 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4988 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4208,14 +4997,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
4208 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 4997 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
4209 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); 4998 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
4210 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4999 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4211 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
4212} 5000}
4213 5001
4214static int load_state_from_tss16(struct kvm_vcpu *vcpu, 5002static int load_state_from_tss16(struct kvm_vcpu *vcpu,
4215 struct tss_segment_16 *tss) 5003 struct tss_segment_16 *tss)
4216{ 5004{
4217 kvm_rip_write(vcpu, tss->ip); 5005 kvm_rip_write(vcpu, tss->ip);
4218 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); 5006 kvm_set_rflags(vcpu, tss->flag | 2);
4219 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); 5007 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
4220 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); 5008 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
4221 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); 5009 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
@@ -4225,19 +5013,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
4225 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); 5013 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
4226 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); 5014 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
4227 5015
4228 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 5016 /*
5017 * SDM says that segment selectors are loaded before segment
5018 * descriptors
5019 */
5020 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5021 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5022 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5023 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5024 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5025
5026 /*
5027 * Now load segment descriptors. If fault happenes at this stage
5028 * it is handled in a context of new task
5029 */
5030 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
4229 return 1; 5031 return 1;
4230 5032
4231 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 5033 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4232 return 1; 5034 return 1;
4233 5035
4234 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 5036 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4235 return 1; 5037 return 1;
4236 5038
4237 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 5039 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4238 return 1; 5040 return 1;
4239 5041
4240 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 5042 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4241 return 1; 5043 return 1;
4242 return 0; 5044 return 0;
4243} 5045}
@@ -4259,7 +5061,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4259 sizeof tss_segment_16)) 5061 sizeof tss_segment_16))
4260 goto out; 5062 goto out;
4261 5063
4262 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5064 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4263 &tss_segment_16, sizeof tss_segment_16)) 5065 &tss_segment_16, sizeof tss_segment_16))
4264 goto out; 5066 goto out;
4265 5067
@@ -4267,7 +5069,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4267 tss_segment_16.prev_task_link = old_tss_sel; 5069 tss_segment_16.prev_task_link = old_tss_sel;
4268 5070
4269 if (kvm_write_guest(vcpu->kvm, 5071 if (kvm_write_guest(vcpu->kvm,
4270 get_tss_base_addr(vcpu, nseg_desc), 5072 get_tss_base_addr_write(vcpu, nseg_desc),
4271 &tss_segment_16.prev_task_link, 5073 &tss_segment_16.prev_task_link,
4272 sizeof tss_segment_16.prev_task_link)) 5074 sizeof tss_segment_16.prev_task_link))
4273 goto out; 5075 goto out;
@@ -4298,7 +5100,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4298 sizeof tss_segment_32)) 5100 sizeof tss_segment_32))
4299 goto out; 5101 goto out;
4300 5102
4301 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5103 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4302 &tss_segment_32, sizeof tss_segment_32)) 5104 &tss_segment_32, sizeof tss_segment_32))
4303 goto out; 5105 goto out;
4304 5106
@@ -4306,7 +5108,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4306 tss_segment_32.prev_task_link = old_tss_sel; 5108 tss_segment_32.prev_task_link = old_tss_sel;
4307 5109
4308 if (kvm_write_guest(vcpu->kvm, 5110 if (kvm_write_guest(vcpu->kvm,
4309 get_tss_base_addr(vcpu, nseg_desc), 5111 get_tss_base_addr_write(vcpu, nseg_desc),
4310 &tss_segment_32.prev_task_link, 5112 &tss_segment_32.prev_task_link,
4311 sizeof tss_segment_32.prev_task_link)) 5113 sizeof tss_segment_32.prev_task_link))
4312 goto out; 5114 goto out;
@@ -4328,8 +5130,9 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4328 int ret = 0; 5130 int ret = 0;
4329 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5131 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
4330 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5132 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5133 u32 desc_limit;
4331 5134
4332 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); 5135 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
4333 5136
4334 /* FIXME: Handle errors. Failure to read either TSS or their 5137 /* FIXME: Handle errors. Failure to read either TSS or their
4335 * descriptors should generate a pagefault. 5138 * descriptors should generate a pagefault.
@@ -4350,7 +5153,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4350 } 5153 }
4351 } 5154 }
4352 5155
4353 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { 5156 desc_limit = get_desc_limit(&nseg_desc);
5157 if (!nseg_desc.p ||
5158 ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
5159 desc_limit < 0x2b)) {
4354 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 5160 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
4355 return 1; 5161 return 1;
4356 } 5162 }
@@ -4361,8 +5167,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4361 } 5167 }
4362 5168
4363 if (reason == TASK_SWITCH_IRET) { 5169 if (reason == TASK_SWITCH_IRET) {
4364 u32 eflags = kvm_x86_ops->get_rflags(vcpu); 5170 u32 eflags = kvm_get_rflags(vcpu);
4365 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); 5171 kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
4366 } 5172 }
4367 5173
4368 /* set back link to prev task only if NT bit is set in eflags 5174 /* set back link to prev task only if NT bit is set in eflags
@@ -4370,11 +5176,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4370 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) 5176 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
4371 old_tss_sel = 0xffff; 5177 old_tss_sel = 0xffff;
4372 5178
4373 /* set back link to prev task only if NT bit is set in eflags
4374 note that old_tss_sel is not used afetr this point */
4375 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
4376 old_tss_sel = 0xffff;
4377
4378 if (nseg_desc.type & 8) 5179 if (nseg_desc.type & 8)
4379 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, 5180 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
4380 old_tss_base, &nseg_desc); 5181 old_tss_base, &nseg_desc);
@@ -4383,8 +5184,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4383 old_tss_base, &nseg_desc); 5184 old_tss_base, &nseg_desc);
4384 5185
4385 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 5186 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
4386 u32 eflags = kvm_x86_ops->get_rflags(vcpu); 5187 u32 eflags = kvm_get_rflags(vcpu);
4387 kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); 5188 kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
4388 } 5189 }
4389 5190
4390 if (reason != TASK_SWITCH_IRET) { 5191 if (reason != TASK_SWITCH_IRET) {
@@ -4393,7 +5194,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4393 &nseg_desc); 5194 &nseg_desc);
4394 } 5195 }
4395 5196
4396 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); 5197 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
4397 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 5198 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
4398 tr_seg.type = 11; 5199 tr_seg.type = 11;
4399 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 5200 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
@@ -4424,20 +5225,20 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4424 5225
4425 kvm_set_cr8(vcpu, sregs->cr8); 5226 kvm_set_cr8(vcpu, sregs->cr8);
4426 5227
4427 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 5228 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
4428 kvm_x86_ops->set_efer(vcpu, sregs->efer); 5229 kvm_x86_ops->set_efer(vcpu, sregs->efer);
4429 kvm_set_apic_base(vcpu, sregs->apic_base); 5230 kvm_set_apic_base(vcpu, sregs->apic_base);
4430 5231
4431 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 5232 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
4432
4433 mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
4434 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 5233 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
4435 vcpu->arch.cr0 = sregs->cr0; 5234 vcpu->arch.cr0 = sregs->cr0;
4436 5235
4437 mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; 5236 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
4438 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5237 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
4439 if (!is_long_mode(vcpu) && is_pae(vcpu)) 5238 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
4440 load_pdptrs(vcpu, vcpu->arch.cr3); 5239 load_pdptrs(vcpu, vcpu->arch.cr3);
5240 mmu_reset_needed = 1;
5241 }
4441 5242
4442 if (mmu_reset_needed) 5243 if (mmu_reset_needed)
4443 kvm_mmu_reset_context(vcpu); 5244 kvm_mmu_reset_context(vcpu);
@@ -4467,7 +5268,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4467 /* Older userspace won't unhalt the vcpu on reset. */ 5268 /* Older userspace won't unhalt the vcpu on reset. */
4468 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && 5269 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
4469 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 5270 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
4470 !(vcpu->arch.cr0 & X86_CR0_PE)) 5271 !is_protmode(vcpu))
4471 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5272 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4472 5273
4473 vcpu_put(vcpu); 5274 vcpu_put(vcpu);
@@ -4478,12 +5279,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4478int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 5279int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4479 struct kvm_guest_debug *dbg) 5280 struct kvm_guest_debug *dbg)
4480{ 5281{
5282 unsigned long rflags;
4481 int i, r; 5283 int i, r;
4482 5284
4483 vcpu_load(vcpu); 5285 vcpu_load(vcpu);
4484 5286
4485 if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == 5287 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
4486 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { 5288 r = -EBUSY;
5289 if (vcpu->arch.exception.pending)
5290 goto unlock_out;
5291 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
5292 kvm_queue_exception(vcpu, DB_VECTOR);
5293 else
5294 kvm_queue_exception(vcpu, BP_VECTOR);
5295 }
5296
5297 /*
5298 * Read rflags as long as potentially injected trace flags are still
5299 * filtered out.
5300 */
5301 rflags = kvm_get_rflags(vcpu);
5302
5303 vcpu->guest_debug = dbg->control;
5304 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
5305 vcpu->guest_debug = 0;
5306
5307 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
4487 for (i = 0; i < KVM_NR_DB_REGS; ++i) 5308 for (i = 0; i < KVM_NR_DB_REGS; ++i)
4488 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; 5309 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
4489 vcpu->arch.switch_db_regs = 5310 vcpu->arch.switch_db_regs =
@@ -4494,13 +5315,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4494 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 5315 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
4495 } 5316 }
4496 5317
4497 r = kvm_x86_ops->set_guest_debug(vcpu, dbg); 5318 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5319 vcpu->arch.singlestep_cs =
5320 get_segment_selector(vcpu, VCPU_SREG_CS);
5321 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
5322 }
5323
5324 /*
5325 * Trigger an rflags update that will inject or remove the trace
5326 * flags.
5327 */
5328 kvm_set_rflags(vcpu, rflags);
4498 5329
4499 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 5330 kvm_x86_ops->set_guest_debug(vcpu, dbg);
4500 kvm_queue_exception(vcpu, DB_VECTOR);
4501 else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
4502 kvm_queue_exception(vcpu, BP_VECTOR);
4503 5331
5332 r = 0;
5333
5334unlock_out:
4504 vcpu_put(vcpu); 5335 vcpu_put(vcpu);
4505 5336
4506 return r; 5337 return r;
@@ -4535,11 +5366,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4535{ 5366{
4536 unsigned long vaddr = tr->linear_address; 5367 unsigned long vaddr = tr->linear_address;
4537 gpa_t gpa; 5368 gpa_t gpa;
5369 int idx;
4538 5370
4539 vcpu_load(vcpu); 5371 vcpu_load(vcpu);
4540 down_read(&vcpu->kvm->slots_lock); 5372 idx = srcu_read_lock(&vcpu->kvm->srcu);
4541 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); 5373 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
4542 up_read(&vcpu->kvm->slots_lock); 5374 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4543 tr->physical_address = gpa; 5375 tr->physical_address = gpa;
4544 tr->valid = gpa != UNMAPPED_GVA; 5376 tr->valid = gpa != UNMAPPED_GVA;
4545 tr->writeable = 1; 5377 tr->writeable = 1;
@@ -4620,14 +5452,14 @@ EXPORT_SYMBOL_GPL(fx_init);
4620 5452
4621void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5453void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
4622{ 5454{
4623 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) 5455 if (vcpu->guest_fpu_loaded)
4624 return; 5456 return;
4625 5457
4626 vcpu->guest_fpu_loaded = 1; 5458 vcpu->guest_fpu_loaded = 1;
4627 kvm_fx_save(&vcpu->arch.host_fx_image); 5459 kvm_fx_save(&vcpu->arch.host_fx_image);
4628 kvm_fx_restore(&vcpu->arch.guest_fx_image); 5460 kvm_fx_restore(&vcpu->arch.guest_fx_image);
5461 trace_kvm_fpu(1);
4629} 5462}
4630EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
4631 5463
4632void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5464void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4633{ 5465{
@@ -4638,8 +5470,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4638 kvm_fx_save(&vcpu->arch.guest_fx_image); 5470 kvm_fx_save(&vcpu->arch.guest_fx_image);
4639 kvm_fx_restore(&vcpu->arch.host_fx_image); 5471 kvm_fx_restore(&vcpu->arch.host_fx_image);
4640 ++vcpu->stat.fpu_reload; 5472 ++vcpu->stat.fpu_reload;
5473 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
5474 trace_kvm_fpu(0);
4641} 5475}
4642EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
4643 5476
4644void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 5477void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
4645{ 5478{
@@ -4701,14 +5534,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
4701 return kvm_x86_ops->vcpu_reset(vcpu); 5534 return kvm_x86_ops->vcpu_reset(vcpu);
4702} 5535}
4703 5536
4704void kvm_arch_hardware_enable(void *garbage) 5537int kvm_arch_hardware_enable(void *garbage)
4705{ 5538{
4706 kvm_x86_ops->hardware_enable(garbage); 5539 /*
5540 * Since this may be called from a hotplug notifcation,
5541 * we can't get the CPU frequency directly.
5542 */
5543 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5544 int cpu = raw_smp_processor_id();
5545 per_cpu(cpu_tsc_khz, cpu) = 0;
5546 }
5547
5548 kvm_shared_msr_cpu_online();
5549
5550 return kvm_x86_ops->hardware_enable(garbage);
4707} 5551}
4708 5552
4709void kvm_arch_hardware_disable(void *garbage) 5553void kvm_arch_hardware_disable(void *garbage)
4710{ 5554{
4711 kvm_x86_ops->hardware_disable(garbage); 5555 kvm_x86_ops->hardware_disable(garbage);
5556 drop_user_return_notifiers(garbage);
4712} 5557}
4713 5558
4714int kvm_arch_hardware_setup(void) 5559int kvm_arch_hardware_setup(void)
@@ -4762,12 +5607,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
4762 GFP_KERNEL); 5607 GFP_KERNEL);
4763 if (!vcpu->arch.mce_banks) { 5608 if (!vcpu->arch.mce_banks) {
4764 r = -ENOMEM; 5609 r = -ENOMEM;
4765 goto fail_mmu_destroy; 5610 goto fail_free_lapic;
4766 } 5611 }
4767 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5612 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
4768 5613
4769 return 0; 5614 return 0;
4770 5615fail_free_lapic:
5616 kvm_free_lapic(vcpu);
4771fail_mmu_destroy: 5617fail_mmu_destroy:
4772 kvm_mmu_destroy(vcpu); 5618 kvm_mmu_destroy(vcpu);
4773fail_free_pio_data: 5619fail_free_pio_data:
@@ -4778,10 +5624,13 @@ fail:
4778 5624
4779void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5625void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
4780{ 5626{
5627 int idx;
5628
5629 kfree(vcpu->arch.mce_banks);
4781 kvm_free_lapic(vcpu); 5630 kvm_free_lapic(vcpu);
4782 down_read(&vcpu->kvm->slots_lock); 5631 idx = srcu_read_lock(&vcpu->kvm->srcu);
4783 kvm_mmu_destroy(vcpu); 5632 kvm_mmu_destroy(vcpu);
4784 up_read(&vcpu->kvm->slots_lock); 5633 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4785 free_page((unsigned long)vcpu->arch.pio_data); 5634 free_page((unsigned long)vcpu->arch.pio_data);
4786} 5635}
4787 5636
@@ -4792,6 +5641,12 @@ struct kvm *kvm_arch_create_vm(void)
4792 if (!kvm) 5641 if (!kvm)
4793 return ERR_PTR(-ENOMEM); 5642 return ERR_PTR(-ENOMEM);
4794 5643
5644 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
5645 if (!kvm->arch.aliases) {
5646 kfree(kvm);
5647 return ERR_PTR(-ENOMEM);
5648 }
5649
4795 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5650 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4796 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 5651 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
4797 5652
@@ -4848,16 +5703,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
4848 put_page(kvm->arch.apic_access_page); 5703 put_page(kvm->arch.apic_access_page);
4849 if (kvm->arch.ept_identity_pagetable) 5704 if (kvm->arch.ept_identity_pagetable)
4850 put_page(kvm->arch.ept_identity_pagetable); 5705 put_page(kvm->arch.ept_identity_pagetable);
5706 cleanup_srcu_struct(&kvm->srcu);
5707 kfree(kvm->arch.aliases);
4851 kfree(kvm); 5708 kfree(kvm);
4852} 5709}
4853 5710
4854int kvm_arch_set_memory_region(struct kvm *kvm, 5711int kvm_arch_prepare_memory_region(struct kvm *kvm,
4855 struct kvm_userspace_memory_region *mem, 5712 struct kvm_memory_slot *memslot,
4856 struct kvm_memory_slot old, 5713 struct kvm_memory_slot old,
5714 struct kvm_userspace_memory_region *mem,
4857 int user_alloc) 5715 int user_alloc)
4858{ 5716{
4859 int npages = mem->memory_size >> PAGE_SHIFT; 5717 int npages = memslot->npages;
4860 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
4861 5718
4862 /*To keep backward compatibility with older userspace, 5719 /*To keep backward compatibility with older userspace,
4863 *x86 needs to hanlde !user_alloc case. 5720 *x86 needs to hanlde !user_alloc case.
@@ -4877,26 +5734,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
4877 if (IS_ERR((void *)userspace_addr)) 5734 if (IS_ERR((void *)userspace_addr))
4878 return PTR_ERR((void *)userspace_addr); 5735 return PTR_ERR((void *)userspace_addr);
4879 5736
4880 /* set userspace_addr atomically for kvm_hva_to_rmapp */
4881 spin_lock(&kvm->mmu_lock);
4882 memslot->userspace_addr = userspace_addr; 5737 memslot->userspace_addr = userspace_addr;
4883 spin_unlock(&kvm->mmu_lock);
4884 } else {
4885 if (!old.user_alloc && old.rmap) {
4886 int ret;
4887
4888 down_write(&current->mm->mmap_sem);
4889 ret = do_munmap(current->mm, old.userspace_addr,
4890 old.npages * PAGE_SIZE);
4891 up_write(&current->mm->mmap_sem);
4892 if (ret < 0)
4893 printk(KERN_WARNING
4894 "kvm_vm_ioctl_set_memory_region: "
4895 "failed to munmap memory\n");
4896 }
4897 } 5738 }
4898 } 5739 }
4899 5740
5741
5742 return 0;
5743}
5744
5745void kvm_arch_commit_memory_region(struct kvm *kvm,
5746 struct kvm_userspace_memory_region *mem,
5747 struct kvm_memory_slot old,
5748 int user_alloc)
5749{
5750
5751 int npages = mem->memory_size >> PAGE_SHIFT;
5752
5753 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
5754 int ret;
5755
5756 down_write(&current->mm->mmap_sem);
5757 ret = do_munmap(current->mm, old.userspace_addr,
5758 old.npages * PAGE_SIZE);
5759 up_write(&current->mm->mmap_sem);
5760 if (ret < 0)
5761 printk(KERN_WARNING
5762 "kvm_vm_ioctl_set_memory_region: "
5763 "failed to munmap memory\n");
5764 }
5765
4900 spin_lock(&kvm->mmu_lock); 5766 spin_lock(&kvm->mmu_lock);
4901 if (!kvm->arch.n_requested_mmu_pages) { 5767 if (!kvm->arch.n_requested_mmu_pages) {
4902 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 5768 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -4905,8 +5771,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
4905 5771
4906 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 5772 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
4907 spin_unlock(&kvm->mmu_lock); 5773 spin_unlock(&kvm->mmu_lock);
4908
4909 return 0;
4910} 5774}
4911 5775
4912void kvm_arch_flush_shadow(struct kvm *kvm) 5776void kvm_arch_flush_shadow(struct kvm *kvm)
@@ -4946,8 +5810,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
4946 return kvm_x86_ops->interrupt_allowed(vcpu); 5810 return kvm_x86_ops->interrupt_allowed(vcpu);
4947} 5811}
4948 5812
5813unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
5814{
5815 unsigned long rflags;
5816
5817 rflags = kvm_x86_ops->get_rflags(vcpu);
5818 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5819 rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
5820 return rflags;
5821}
5822EXPORT_SYMBOL_GPL(kvm_get_rflags);
5823
5824void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
5825{
5826 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
5827 vcpu->arch.singlestep_cs ==
5828 get_segment_selector(vcpu, VCPU_SREG_CS) &&
5829 vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
5830 rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
5831 kvm_x86_ops->set_rflags(vcpu, rflags);
5832}
5833EXPORT_SYMBOL_GPL(kvm_set_rflags);
5834
4949EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 5835EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
4950EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 5836EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
4951EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 5837EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
4952EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); 5838EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
4953EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); 5839EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
5840EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
5841EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
5842EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
5843EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
5844EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
5845EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);