aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c1180
1 files changed, 652 insertions, 528 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05d571f6f196..25f19078b321 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6,6 +6,7 @@
6 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008 8 * Copyright IBM Corporation, 2008
9 * Copyright 2010 Red Hat, Inc. and/or its affilates.
9 * 10 *
10 * Authors: 11 * Authors:
11 * Avi Kivity <avi@qumranet.com> 12 * Avi Kivity <avi@qumranet.com>
@@ -41,17 +42,19 @@
41#include <linux/srcu.h> 42#include <linux/srcu.h>
42#include <linux/slab.h> 43#include <linux/slab.h>
43#include <linux/perf_event.h> 44#include <linux/perf_event.h>
45#include <linux/uaccess.h>
44#include <trace/events/kvm.h> 46#include <trace/events/kvm.h>
45 47
46#define CREATE_TRACE_POINTS 48#define CREATE_TRACE_POINTS
47#include "trace.h" 49#include "trace.h"
48 50
49#include <asm/debugreg.h> 51#include <asm/debugreg.h>
50#include <asm/uaccess.h>
51#include <asm/msr.h> 52#include <asm/msr.h>
52#include <asm/desc.h> 53#include <asm/desc.h>
53#include <asm/mtrr.h> 54#include <asm/mtrr.h>
54#include <asm/mce.h> 55#include <asm/mce.h>
56#include <asm/i387.h>
57#include <asm/xcr.h>
55 58
56#define MAX_IO_MSRS 256 59#define MAX_IO_MSRS 256
57#define CR0_RESERVED_BITS \ 60#define CR0_RESERVED_BITS \
@@ -62,6 +65,7 @@
62 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 65 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
63 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 66 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
64 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ 67 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
68 | X86_CR4_OSXSAVE \
65 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 69 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
66 70
67#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 71#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -147,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
147 { NULL } 151 { NULL }
148}; 152};
149 153
154u64 __read_mostly host_xcr0;
155
156static inline u32 bit(int bitno)
157{
158 return 1 << (bitno & 31);
159}
160
150static void kvm_on_user_return(struct user_return_notifier *urn) 161static void kvm_on_user_return(struct user_return_notifier *urn)
151{ 162{
152 unsigned slot; 163 unsigned slot;
@@ -285,7 +296,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
285 prev_nr = vcpu->arch.exception.nr; 296 prev_nr = vcpu->arch.exception.nr;
286 if (prev_nr == DF_VECTOR) { 297 if (prev_nr == DF_VECTOR) {
287 /* triple fault -> shutdown */ 298 /* triple fault -> shutdown */
288 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 299 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
289 return; 300 return;
290 } 301 }
291 class1 = exception_class(prev_nr); 302 class1 = exception_class(prev_nr);
@@ -414,121 +425,163 @@ out:
414 return changed; 425 return changed;
415} 426}
416 427
417void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 428int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
418{ 429{
430 unsigned long old_cr0 = kvm_read_cr0(vcpu);
431 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
432 X86_CR0_CD | X86_CR0_NW;
433
419 cr0 |= X86_CR0_ET; 434 cr0 |= X86_CR0_ET;
420 435
421#ifdef CONFIG_X86_64 436#ifdef CONFIG_X86_64
422 if (cr0 & 0xffffffff00000000UL) { 437 if (cr0 & 0xffffffff00000000UL)
423 kvm_inject_gp(vcpu, 0); 438 return 1;
424 return;
425 }
426#endif 439#endif
427 440
428 cr0 &= ~CR0_RESERVED_BITS; 441 cr0 &= ~CR0_RESERVED_BITS;
429 442
430 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 443 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
431 kvm_inject_gp(vcpu, 0); 444 return 1;
432 return;
433 }
434 445
435 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 446 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
436 kvm_inject_gp(vcpu, 0); 447 return 1;
437 return;
438 }
439 448
440 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 449 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
441#ifdef CONFIG_X86_64 450#ifdef CONFIG_X86_64
442 if ((vcpu->arch.efer & EFER_LME)) { 451 if ((vcpu->arch.efer & EFER_LME)) {
443 int cs_db, cs_l; 452 int cs_db, cs_l;
444 453
445 if (!is_pae(vcpu)) { 454 if (!is_pae(vcpu))
446 kvm_inject_gp(vcpu, 0); 455 return 1;
447 return;
448 }
449 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 456 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
450 if (cs_l) { 457 if (cs_l)
451 kvm_inject_gp(vcpu, 0); 458 return 1;
452 return;
453
454 }
455 } else 459 } else
456#endif 460#endif
457 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 461 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3))
458 kvm_inject_gp(vcpu, 0); 462 return 1;
459 return;
460 }
461
462 } 463 }
463 464
464 kvm_x86_ops->set_cr0(vcpu, cr0); 465 kvm_x86_ops->set_cr0(vcpu, cr0);
465 466
466 kvm_mmu_reset_context(vcpu); 467 if ((cr0 ^ old_cr0) & update_bits)
467 return; 468 kvm_mmu_reset_context(vcpu);
469 return 0;
468} 470}
469EXPORT_SYMBOL_GPL(kvm_set_cr0); 471EXPORT_SYMBOL_GPL(kvm_set_cr0);
470 472
471void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 473void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
472{ 474{
473 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); 475 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
474} 476}
475EXPORT_SYMBOL_GPL(kvm_lmsw); 477EXPORT_SYMBOL_GPL(kvm_lmsw);
476 478
477void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 479int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
478{ 480{
479 unsigned long old_cr4 = kvm_read_cr4(vcpu); 481 u64 xcr0;
480 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
481 482
482 if (cr4 & CR4_RESERVED_BITS) { 483 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
484 if (index != XCR_XFEATURE_ENABLED_MASK)
485 return 1;
486 xcr0 = xcr;
487 if (kvm_x86_ops->get_cpl(vcpu) != 0)
488 return 1;
489 if (!(xcr0 & XSTATE_FP))
490 return 1;
491 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
492 return 1;
493 if (xcr0 & ~host_xcr0)
494 return 1;
495 vcpu->arch.xcr0 = xcr0;
496 vcpu->guest_xcr0_loaded = 0;
497 return 0;
498}
499
500int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
501{
502 if (__kvm_set_xcr(vcpu, index, xcr)) {
483 kvm_inject_gp(vcpu, 0); 503 kvm_inject_gp(vcpu, 0);
504 return 1;
505 }
506 return 0;
507}
508EXPORT_SYMBOL_GPL(kvm_set_xcr);
509
510static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
511{
512 struct kvm_cpuid_entry2 *best;
513
514 best = kvm_find_cpuid_entry(vcpu, 1, 0);
515 return best && (best->ecx & bit(X86_FEATURE_XSAVE));
516}
517
518static void update_cpuid(struct kvm_vcpu *vcpu)
519{
520 struct kvm_cpuid_entry2 *best;
521
522 best = kvm_find_cpuid_entry(vcpu, 1, 0);
523 if (!best)
484 return; 524 return;
525
526 /* Update OSXSAVE bit */
527 if (cpu_has_xsave && best->function == 0x1) {
528 best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
529 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
530 best->ecx |= bit(X86_FEATURE_OSXSAVE);
485 } 531 }
532}
533
534int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
535{
536 unsigned long old_cr4 = kvm_read_cr4(vcpu);
537 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
538
539 if (cr4 & CR4_RESERVED_BITS)
540 return 1;
541
542 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
543 return 1;
486 544
487 if (is_long_mode(vcpu)) { 545 if (is_long_mode(vcpu)) {
488 if (!(cr4 & X86_CR4_PAE)) { 546 if (!(cr4 & X86_CR4_PAE))
489 kvm_inject_gp(vcpu, 0); 547 return 1;
490 return;
491 }
492 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 548 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
493 && ((cr4 ^ old_cr4) & pdptr_bits) 549 && ((cr4 ^ old_cr4) & pdptr_bits)
494 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 550 && !load_pdptrs(vcpu, vcpu->arch.cr3))
495 kvm_inject_gp(vcpu, 0); 551 return 1;
496 return; 552
497 } 553 if (cr4 & X86_CR4_VMXE)
554 return 1;
498 555
499 if (cr4 & X86_CR4_VMXE) {
500 kvm_inject_gp(vcpu, 0);
501 return;
502 }
503 kvm_x86_ops->set_cr4(vcpu, cr4); 556 kvm_x86_ops->set_cr4(vcpu, cr4);
504 vcpu->arch.cr4 = cr4; 557
505 kvm_mmu_reset_context(vcpu); 558 if ((cr4 ^ old_cr4) & pdptr_bits)
559 kvm_mmu_reset_context(vcpu);
560
561 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
562 update_cpuid(vcpu);
563
564 return 0;
506} 565}
507EXPORT_SYMBOL_GPL(kvm_set_cr4); 566EXPORT_SYMBOL_GPL(kvm_set_cr4);
508 567
509void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 568int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
510{ 569{
511 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 570 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
512 kvm_mmu_sync_roots(vcpu); 571 kvm_mmu_sync_roots(vcpu);
513 kvm_mmu_flush_tlb(vcpu); 572 kvm_mmu_flush_tlb(vcpu);
514 return; 573 return 0;
515 } 574 }
516 575
517 if (is_long_mode(vcpu)) { 576 if (is_long_mode(vcpu)) {
518 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 577 if (cr3 & CR3_L_MODE_RESERVED_BITS)
519 kvm_inject_gp(vcpu, 0); 578 return 1;
520 return;
521 }
522 } else { 579 } else {
523 if (is_pae(vcpu)) { 580 if (is_pae(vcpu)) {
524 if (cr3 & CR3_PAE_RESERVED_BITS) { 581 if (cr3 & CR3_PAE_RESERVED_BITS)
525 kvm_inject_gp(vcpu, 0); 582 return 1;
526 return; 583 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3))
527 } 584 return 1;
528 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
529 kvm_inject_gp(vcpu, 0);
530 return;
531 }
532 } 585 }
533 /* 586 /*
534 * We don't check reserved bits in nonpae mode, because 587 * We don't check reserved bits in nonpae mode, because
@@ -546,24 +599,28 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
546 * to debug) behavior on the guest side. 599 * to debug) behavior on the guest side.
547 */ 600 */
548 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 601 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
549 kvm_inject_gp(vcpu, 0); 602 return 1;
550 else { 603 vcpu->arch.cr3 = cr3;
551 vcpu->arch.cr3 = cr3; 604 vcpu->arch.mmu.new_cr3(vcpu);
552 vcpu->arch.mmu.new_cr3(vcpu); 605 return 0;
553 }
554} 606}
555EXPORT_SYMBOL_GPL(kvm_set_cr3); 607EXPORT_SYMBOL_GPL(kvm_set_cr3);
556 608
557void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 609int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
558{ 610{
559 if (cr8 & CR8_RESERVED_BITS) { 611 if (cr8 & CR8_RESERVED_BITS)
560 kvm_inject_gp(vcpu, 0); 612 return 1;
561 return;
562 }
563 if (irqchip_in_kernel(vcpu->kvm)) 613 if (irqchip_in_kernel(vcpu->kvm))
564 kvm_lapic_set_tpr(vcpu, cr8); 614 kvm_lapic_set_tpr(vcpu, cr8);
565 else 615 else
566 vcpu->arch.cr8 = cr8; 616 vcpu->arch.cr8 = cr8;
617 return 0;
618}
619
620void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
621{
622 if (__kvm_set_cr8(vcpu, cr8))
623 kvm_inject_gp(vcpu, 0);
567} 624}
568EXPORT_SYMBOL_GPL(kvm_set_cr8); 625EXPORT_SYMBOL_GPL(kvm_set_cr8);
569 626
@@ -576,7 +633,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
576} 633}
577EXPORT_SYMBOL_GPL(kvm_get_cr8); 634EXPORT_SYMBOL_GPL(kvm_get_cr8);
578 635
579int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 636static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
580{ 637{
581 switch (dr) { 638 switch (dr) {
582 case 0 ... 3: 639 case 0 ... 3:
@@ -585,29 +642,21 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
585 vcpu->arch.eff_db[dr] = val; 642 vcpu->arch.eff_db[dr] = val;
586 break; 643 break;
587 case 4: 644 case 4:
588 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 645 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
589 kvm_queue_exception(vcpu, UD_VECTOR); 646 return 1; /* #UD */
590 return 1;
591 }
592 /* fall through */ 647 /* fall through */
593 case 6: 648 case 6:
594 if (val & 0xffffffff00000000ULL) { 649 if (val & 0xffffffff00000000ULL)
595 kvm_inject_gp(vcpu, 0); 650 return -1; /* #GP */
596 return 1;
597 }
598 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 651 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
599 break; 652 break;
600 case 5: 653 case 5:
601 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 654 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
602 kvm_queue_exception(vcpu, UD_VECTOR); 655 return 1; /* #UD */
603 return 1;
604 }
605 /* fall through */ 656 /* fall through */
606 default: /* 7 */ 657 default: /* 7 */
607 if (val & 0xffffffff00000000ULL) { 658 if (val & 0xffffffff00000000ULL)
608 kvm_inject_gp(vcpu, 0); 659 return -1; /* #GP */
609 return 1;
610 }
611 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 660 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
612 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 661 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
613 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); 662 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
@@ -618,28 +667,37 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
618 667
619 return 0; 668 return 0;
620} 669}
670
671int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
672{
673 int res;
674
675 res = __kvm_set_dr(vcpu, dr, val);
676 if (res > 0)
677 kvm_queue_exception(vcpu, UD_VECTOR);
678 else if (res < 0)
679 kvm_inject_gp(vcpu, 0);
680
681 return res;
682}
621EXPORT_SYMBOL_GPL(kvm_set_dr); 683EXPORT_SYMBOL_GPL(kvm_set_dr);
622 684
623int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 685static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
624{ 686{
625 switch (dr) { 687 switch (dr) {
626 case 0 ... 3: 688 case 0 ... 3:
627 *val = vcpu->arch.db[dr]; 689 *val = vcpu->arch.db[dr];
628 break; 690 break;
629 case 4: 691 case 4:
630 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 692 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
631 kvm_queue_exception(vcpu, UD_VECTOR);
632 return 1; 693 return 1;
633 }
634 /* fall through */ 694 /* fall through */
635 case 6: 695 case 6:
636 *val = vcpu->arch.dr6; 696 *val = vcpu->arch.dr6;
637 break; 697 break;
638 case 5: 698 case 5:
639 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 699 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
640 kvm_queue_exception(vcpu, UD_VECTOR);
641 return 1; 700 return 1;
642 }
643 /* fall through */ 701 /* fall through */
644 default: /* 7 */ 702 default: /* 7 */
645 *val = vcpu->arch.dr7; 703 *val = vcpu->arch.dr7;
@@ -648,12 +706,16 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
648 706
649 return 0; 707 return 0;
650} 708}
651EXPORT_SYMBOL_GPL(kvm_get_dr);
652 709
653static inline u32 bit(int bitno) 710int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
654{ 711{
655 return 1 << (bitno & 31); 712 if (_kvm_get_dr(vcpu, dr, val)) {
713 kvm_queue_exception(vcpu, UD_VECTOR);
714 return 1;
715 }
716 return 0;
656} 717}
718EXPORT_SYMBOL_GPL(kvm_get_dr);
657 719
658/* 720/*
659 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 721 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -671,7 +733,7 @@ static u32 msrs_to_save[] = {
671 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 733 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
672 HV_X64_MSR_APIC_ASSIST_PAGE, 734 HV_X64_MSR_APIC_ASSIST_PAGE,
673 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 735 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
674 MSR_K6_STAR, 736 MSR_STAR,
675#ifdef CONFIG_X86_64 737#ifdef CONFIG_X86_64
676 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 738 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
677#endif 739#endif
@@ -682,10 +744,14 @@ static unsigned num_msrs_to_save;
682 744
683static u32 emulated_msrs[] = { 745static u32 emulated_msrs[] = {
684 MSR_IA32_MISC_ENABLE, 746 MSR_IA32_MISC_ENABLE,
747 MSR_IA32_MCG_STATUS,
748 MSR_IA32_MCG_CTL,
685}; 749};
686 750
687static int set_efer(struct kvm_vcpu *vcpu, u64 efer) 751static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
688{ 752{
753 u64 old_efer = vcpu->arch.efer;
754
689 if (efer & efer_reserved_bits) 755 if (efer & efer_reserved_bits)
690 return 1; 756 return 1;
691 757
@@ -714,11 +780,13 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
714 780
715 kvm_x86_ops->set_efer(vcpu, efer); 781 kvm_x86_ops->set_efer(vcpu, efer);
716 782
717 vcpu->arch.efer = efer;
718
719 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 783 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
720 kvm_mmu_reset_context(vcpu); 784 kvm_mmu_reset_context(vcpu);
721 785
786 /* Update reserved bits */
787 if ((efer ^ old_efer) & EFER_NX)
788 kvm_mmu_reset_context(vcpu);
789
722 return 0; 790 return 0;
723} 791}
724 792
@@ -882,7 +950,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v)
882 950
883 if (!vcpu->time_page) 951 if (!vcpu->time_page)
884 return 0; 952 return 0;
885 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); 953 kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
886 return 1; 954 return 1;
887} 955}
888 956
@@ -1524,16 +1592,12 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1524{ 1592{
1525 int i, idx; 1593 int i, idx;
1526 1594
1527 vcpu_load(vcpu);
1528
1529 idx = srcu_read_lock(&vcpu->kvm->srcu); 1595 idx = srcu_read_lock(&vcpu->kvm->srcu);
1530 for (i = 0; i < msrs->nmsrs; ++i) 1596 for (i = 0; i < msrs->nmsrs; ++i)
1531 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1597 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1532 break; 1598 break;
1533 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1599 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1534 1600
1535 vcpu_put(vcpu);
1536
1537 return i; 1601 return i;
1538} 1602}
1539 1603
@@ -1562,7 +1626,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
1562 1626
1563 r = -ENOMEM; 1627 r = -ENOMEM;
1564 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; 1628 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
1565 entries = vmalloc(size); 1629 entries = kmalloc(size, GFP_KERNEL);
1566 if (!entries) 1630 if (!entries)
1567 goto out; 1631 goto out;
1568 1632
@@ -1581,7 +1645,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
1581 r = n; 1645 r = n;
1582 1646
1583out_free: 1647out_free:
1584 vfree(entries); 1648 kfree(entries);
1585out: 1649out:
1586 return r; 1650 return r;
1587} 1651}
@@ -1618,6 +1682,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1618 case KVM_CAP_PCI_SEGMENT: 1682 case KVM_CAP_PCI_SEGMENT:
1619 case KVM_CAP_DEBUGREGS: 1683 case KVM_CAP_DEBUGREGS:
1620 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1684 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1685 case KVM_CAP_XSAVE:
1621 r = 1; 1686 r = 1;
1622 break; 1687 break;
1623 case KVM_CAP_COALESCED_MMIO: 1688 case KVM_CAP_COALESCED_MMIO:
@@ -1641,6 +1706,9 @@ int kvm_dev_ioctl_check_extension(long ext)
1641 case KVM_CAP_MCE: 1706 case KVM_CAP_MCE:
1642 r = KVM_MAX_MCE_BANKS; 1707 r = KVM_MAX_MCE_BANKS;
1643 break; 1708 break;
1709 case KVM_CAP_XCRS:
1710 r = cpu_has_xsave;
1711 break;
1644 default: 1712 default:
1645 r = 0; 1713 r = 0;
1646 break; 1714 break;
@@ -1717,8 +1785,28 @@ out:
1717 return r; 1785 return r;
1718} 1786}
1719 1787
1788static void wbinvd_ipi(void *garbage)
1789{
1790 wbinvd();
1791}
1792
1793static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
1794{
1795 return vcpu->kvm->arch.iommu_domain &&
1796 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
1797}
1798
1720void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1799void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1721{ 1800{
1801 /* Address WBINVD may be executed by guest */
1802 if (need_emulate_wbinvd(vcpu)) {
1803 if (kvm_x86_ops->has_wbinvd_exit())
1804 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
1805 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
1806 smp_call_function_single(vcpu->cpu,
1807 wbinvd_ipi, NULL, 1);
1808 }
1809
1722 kvm_x86_ops->vcpu_load(vcpu, cpu); 1810 kvm_x86_ops->vcpu_load(vcpu, cpu);
1723 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { 1811 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1724 unsigned long khz = cpufreq_quick_get(cpu); 1812 unsigned long khz = cpufreq_quick_get(cpu);
@@ -1731,8 +1819,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1731 1819
1732void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1820void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1733{ 1821{
1734 kvm_put_guest_fpu(vcpu);
1735 kvm_x86_ops->vcpu_put(vcpu); 1822 kvm_x86_ops->vcpu_put(vcpu);
1823 kvm_put_guest_fpu(vcpu);
1736} 1824}
1737 1825
1738static int is_efer_nx(void) 1826static int is_efer_nx(void)
@@ -1781,7 +1869,6 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1781 if (copy_from_user(cpuid_entries, entries, 1869 if (copy_from_user(cpuid_entries, entries,
1782 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 1870 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
1783 goto out_free; 1871 goto out_free;
1784 vcpu_load(vcpu);
1785 for (i = 0; i < cpuid->nent; i++) { 1872 for (i = 0; i < cpuid->nent; i++) {
1786 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; 1873 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
1787 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; 1874 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
@@ -1799,7 +1886,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1799 r = 0; 1886 r = 0;
1800 kvm_apic_set_version(vcpu); 1887 kvm_apic_set_version(vcpu);
1801 kvm_x86_ops->cpuid_update(vcpu); 1888 kvm_x86_ops->cpuid_update(vcpu);
1802 vcpu_put(vcpu); 1889 update_cpuid(vcpu);
1803 1890
1804out_free: 1891out_free:
1805 vfree(cpuid_entries); 1892 vfree(cpuid_entries);
@@ -1820,11 +1907,10 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1820 if (copy_from_user(&vcpu->arch.cpuid_entries, entries, 1907 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
1821 cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1908 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
1822 goto out; 1909 goto out;
1823 vcpu_load(vcpu);
1824 vcpu->arch.cpuid_nent = cpuid->nent; 1910 vcpu->arch.cpuid_nent = cpuid->nent;
1825 kvm_apic_set_version(vcpu); 1911 kvm_apic_set_version(vcpu);
1826 kvm_x86_ops->cpuid_update(vcpu); 1912 kvm_x86_ops->cpuid_update(vcpu);
1827 vcpu_put(vcpu); 1913 update_cpuid(vcpu);
1828 return 0; 1914 return 0;
1829 1915
1830out: 1916out:
@@ -1837,7 +1923,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1837{ 1923{
1838 int r; 1924 int r;
1839 1925
1840 vcpu_load(vcpu);
1841 r = -E2BIG; 1926 r = -E2BIG;
1842 if (cpuid->nent < vcpu->arch.cpuid_nent) 1927 if (cpuid->nent < vcpu->arch.cpuid_nent)
1843 goto out; 1928 goto out;
@@ -1849,7 +1934,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1849 1934
1850out: 1935out:
1851 cpuid->nent = vcpu->arch.cpuid_nent; 1936 cpuid->nent = vcpu->arch.cpuid_nent;
1852 vcpu_put(vcpu);
1853 return r; 1937 return r;
1854} 1938}
1855 1939
@@ -1901,13 +1985,13 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1901 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1985 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1902 /* cpuid 1.ecx */ 1986 /* cpuid 1.ecx */
1903 const u32 kvm_supported_word4_x86_features = 1987 const u32 kvm_supported_word4_x86_features =
1904 F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | 1988 F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
1905 0 /* DS-CPL, VMX, SMX, EST */ | 1989 0 /* DS-CPL, VMX, SMX, EST */ |
1906 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 1990 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
1907 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1908 0 /* Reserved, DCA */ | F(XMM4_1) | 1992 0 /* Reserved, DCA */ | F(XMM4_1) |
1909 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
1910 0 /* Reserved, XSAVE, OSXSAVE */; 1994 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX);
1911 /* cpuid 0x80000001.ecx */ 1995 /* cpuid 0x80000001.ecx */
1912 const u32 kvm_supported_word6_x86_features = 1996 const u32 kvm_supported_word6_x86_features =
1913 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | 1997 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
@@ -1922,7 +2006,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1922 2006
1923 switch (function) { 2007 switch (function) {
1924 case 0: 2008 case 0:
1925 entry->eax = min(entry->eax, (u32)0xb); 2009 entry->eax = min(entry->eax, (u32)0xd);
1926 break; 2010 break;
1927 case 1: 2011 case 1:
1928 entry->edx &= kvm_supported_word0_x86_features; 2012 entry->edx &= kvm_supported_word0_x86_features;
@@ -1980,6 +2064,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1980 } 2064 }
1981 break; 2065 break;
1982 } 2066 }
2067 case 0xd: {
2068 int i;
2069
2070 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2071 for (i = 1; *nent < maxnent; ++i) {
2072 if (entry[i - 1].eax == 0 && i != 2)
2073 break;
2074 do_cpuid_1_ent(&entry[i], function, i);
2075 entry[i].flags |=
2076 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2077 ++*nent;
2078 }
2079 break;
2080 }
1983 case KVM_CPUID_SIGNATURE: { 2081 case KVM_CPUID_SIGNATURE: {
1984 char signature[12] = "KVMKVMKVM\0\0"; 2082 char signature[12] = "KVMKVMKVM\0\0";
1985 u32 *sigptr = (u32 *)signature; 2083 u32 *sigptr = (u32 *)signature;
@@ -2081,9 +2179,7 @@ out:
2081static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2179static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2082 struct kvm_lapic_state *s) 2180 struct kvm_lapic_state *s)
2083{ 2181{
2084 vcpu_load(vcpu);
2085 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); 2182 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2086 vcpu_put(vcpu);
2087 2183
2088 return 0; 2184 return 0;
2089} 2185}
@@ -2091,11 +2187,9 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2091static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, 2187static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2092 struct kvm_lapic_state *s) 2188 struct kvm_lapic_state *s)
2093{ 2189{
2094 vcpu_load(vcpu);
2095 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 2190 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2096 kvm_apic_post_state_restore(vcpu); 2191 kvm_apic_post_state_restore(vcpu);
2097 update_cr8_intercept(vcpu); 2192 update_cr8_intercept(vcpu);
2098 vcpu_put(vcpu);
2099 2193
2100 return 0; 2194 return 0;
2101} 2195}
@@ -2107,20 +2201,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2107 return -EINVAL; 2201 return -EINVAL;
2108 if (irqchip_in_kernel(vcpu->kvm)) 2202 if (irqchip_in_kernel(vcpu->kvm))
2109 return -ENXIO; 2203 return -ENXIO;
2110 vcpu_load(vcpu);
2111 2204
2112 kvm_queue_interrupt(vcpu, irq->irq, false); 2205 kvm_queue_interrupt(vcpu, irq->irq, false);
2113 2206
2114 vcpu_put(vcpu);
2115
2116 return 0; 2207 return 0;
2117} 2208}
2118 2209
2119static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) 2210static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2120{ 2211{
2121 vcpu_load(vcpu);
2122 kvm_inject_nmi(vcpu); 2212 kvm_inject_nmi(vcpu);
2123 vcpu_put(vcpu);
2124 2213
2125 return 0; 2214 return 0;
2126} 2215}
@@ -2140,7 +2229,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2140 int r; 2229 int r;
2141 unsigned bank_num = mcg_cap & 0xff, bank; 2230 unsigned bank_num = mcg_cap & 0xff, bank;
2142 2231
2143 vcpu_load(vcpu);
2144 r = -EINVAL; 2232 r = -EINVAL;
2145 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2233 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2146 goto out; 2234 goto out;
@@ -2155,7 +2243,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2155 for (bank = 0; bank < bank_num; bank++) 2243 for (bank = 0; bank < bank_num; bank++)
2156 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2244 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2157out: 2245out:
2158 vcpu_put(vcpu);
2159 return r; 2246 return r;
2160} 2247}
2161 2248
@@ -2188,7 +2275,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2188 printk(KERN_DEBUG "kvm: set_mce: " 2275 printk(KERN_DEBUG "kvm: set_mce: "
2189 "injects mce exception while " 2276 "injects mce exception while "
2190 "previous one is in progress!\n"); 2277 "previous one is in progress!\n");
2191 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 2278 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2192 return 0; 2279 return 0;
2193 } 2280 }
2194 if (banks[1] & MCI_STATUS_VAL) 2281 if (banks[1] & MCI_STATUS_VAL)
@@ -2213,8 +2300,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2213static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 2300static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2214 struct kvm_vcpu_events *events) 2301 struct kvm_vcpu_events *events)
2215{ 2302{
2216 vcpu_load(vcpu);
2217
2218 events->exception.injected = 2303 events->exception.injected =
2219 vcpu->arch.exception.pending && 2304 vcpu->arch.exception.pending &&
2220 !kvm_exception_is_soft(vcpu->arch.exception.nr); 2305 !kvm_exception_is_soft(vcpu->arch.exception.nr);
@@ -2239,8 +2324,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2239 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2324 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2240 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2325 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2241 | KVM_VCPUEVENT_VALID_SHADOW); 2326 | KVM_VCPUEVENT_VALID_SHADOW);
2242
2243 vcpu_put(vcpu);
2244} 2327}
2245 2328
2246static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, 2329static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
@@ -2251,8 +2334,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2251 | KVM_VCPUEVENT_VALID_SHADOW)) 2334 | KVM_VCPUEVENT_VALID_SHADOW))
2252 return -EINVAL; 2335 return -EINVAL;
2253 2336
2254 vcpu_load(vcpu);
2255
2256 vcpu->arch.exception.pending = events->exception.injected; 2337 vcpu->arch.exception.pending = events->exception.injected;
2257 vcpu->arch.exception.nr = events->exception.nr; 2338 vcpu->arch.exception.nr = events->exception.nr;
2258 vcpu->arch.exception.has_error_code = events->exception.has_error_code; 2339 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@ -2275,22 +2356,16 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2275 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) 2356 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2276 vcpu->arch.sipi_vector = events->sipi_vector; 2357 vcpu->arch.sipi_vector = events->sipi_vector;
2277 2358
2278 vcpu_put(vcpu);
2279
2280 return 0; 2359 return 0;
2281} 2360}
2282 2361
2283static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2362static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2284 struct kvm_debugregs *dbgregs) 2363 struct kvm_debugregs *dbgregs)
2285{ 2364{
2286 vcpu_load(vcpu);
2287
2288 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2365 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2289 dbgregs->dr6 = vcpu->arch.dr6; 2366 dbgregs->dr6 = vcpu->arch.dr6;
2290 dbgregs->dr7 = vcpu->arch.dr7; 2367 dbgregs->dr7 = vcpu->arch.dr7;
2291 dbgregs->flags = 0; 2368 dbgregs->flags = 0;
2292
2293 vcpu_put(vcpu);
2294} 2369}
2295 2370
2296static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, 2371static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
@@ -2299,40 +2374,113 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2299 if (dbgregs->flags) 2374 if (dbgregs->flags)
2300 return -EINVAL; 2375 return -EINVAL;
2301 2376
2302 vcpu_load(vcpu);
2303
2304 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2377 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2305 vcpu->arch.dr6 = dbgregs->dr6; 2378 vcpu->arch.dr6 = dbgregs->dr6;
2306 vcpu->arch.dr7 = dbgregs->dr7; 2379 vcpu->arch.dr7 = dbgregs->dr7;
2307 2380
2308 vcpu_put(vcpu); 2381 return 0;
2382}
2383
2384static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2385 struct kvm_xsave *guest_xsave)
2386{
2387 if (cpu_has_xsave)
2388 memcpy(guest_xsave->region,
2389 &vcpu->arch.guest_fpu.state->xsave,
2390 sizeof(struct xsave_struct));
2391 else {
2392 memcpy(guest_xsave->region,
2393 &vcpu->arch.guest_fpu.state->fxsave,
2394 sizeof(struct i387_fxsave_struct));
2395 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2396 XSTATE_FPSSE;
2397 }
2398}
2399
2400static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
2401 struct kvm_xsave *guest_xsave)
2402{
2403 u64 xstate_bv =
2404 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
2309 2405
2406 if (cpu_has_xsave)
2407 memcpy(&vcpu->arch.guest_fpu.state->xsave,
2408 guest_xsave->region, sizeof(struct xsave_struct));
2409 else {
2410 if (xstate_bv & ~XSTATE_FPSSE)
2411 return -EINVAL;
2412 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
2413 guest_xsave->region, sizeof(struct i387_fxsave_struct));
2414 }
2310 return 0; 2415 return 0;
2311} 2416}
2312 2417
2418static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
2419 struct kvm_xcrs *guest_xcrs)
2420{
2421 if (!cpu_has_xsave) {
2422 guest_xcrs->nr_xcrs = 0;
2423 return;
2424 }
2425
2426 guest_xcrs->nr_xcrs = 1;
2427 guest_xcrs->flags = 0;
2428 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
2429 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
2430}
2431
2432static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2433 struct kvm_xcrs *guest_xcrs)
2434{
2435 int i, r = 0;
2436
2437 if (!cpu_has_xsave)
2438 return -EINVAL;
2439
2440 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
2441 return -EINVAL;
2442
2443 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
2444 /* Only support XCR0 currently */
2445 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
2446 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
2447 guest_xcrs->xcrs[0].value);
2448 break;
2449 }
2450 if (r)
2451 r = -EINVAL;
2452 return r;
2453}
2454
2313long kvm_arch_vcpu_ioctl(struct file *filp, 2455long kvm_arch_vcpu_ioctl(struct file *filp,
2314 unsigned int ioctl, unsigned long arg) 2456 unsigned int ioctl, unsigned long arg)
2315{ 2457{
2316 struct kvm_vcpu *vcpu = filp->private_data; 2458 struct kvm_vcpu *vcpu = filp->private_data;
2317 void __user *argp = (void __user *)arg; 2459 void __user *argp = (void __user *)arg;
2318 int r; 2460 int r;
2319 struct kvm_lapic_state *lapic = NULL; 2461 union {
2462 struct kvm_lapic_state *lapic;
2463 struct kvm_xsave *xsave;
2464 struct kvm_xcrs *xcrs;
2465 void *buffer;
2466 } u;
2320 2467
2468 u.buffer = NULL;
2321 switch (ioctl) { 2469 switch (ioctl) {
2322 case KVM_GET_LAPIC: { 2470 case KVM_GET_LAPIC: {
2323 r = -EINVAL; 2471 r = -EINVAL;
2324 if (!vcpu->arch.apic) 2472 if (!vcpu->arch.apic)
2325 goto out; 2473 goto out;
2326 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2474 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2327 2475
2328 r = -ENOMEM; 2476 r = -ENOMEM;
2329 if (!lapic) 2477 if (!u.lapic)
2330 goto out; 2478 goto out;
2331 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); 2479 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
2332 if (r) 2480 if (r)
2333 goto out; 2481 goto out;
2334 r = -EFAULT; 2482 r = -EFAULT;
2335 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) 2483 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
2336 goto out; 2484 goto out;
2337 r = 0; 2485 r = 0;
2338 break; 2486 break;
@@ -2341,14 +2489,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2341 r = -EINVAL; 2489 r = -EINVAL;
2342 if (!vcpu->arch.apic) 2490 if (!vcpu->arch.apic)
2343 goto out; 2491 goto out;
2344 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2492 u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2345 r = -ENOMEM; 2493 r = -ENOMEM;
2346 if (!lapic) 2494 if (!u.lapic)
2347 goto out; 2495 goto out;
2348 r = -EFAULT; 2496 r = -EFAULT;
2349 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) 2497 if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
2350 goto out; 2498 goto out;
2351 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); 2499 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
2352 if (r) 2500 if (r)
2353 goto out; 2501 goto out;
2354 r = 0; 2502 r = 0;
@@ -2464,9 +2612,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2464 r = -EFAULT; 2612 r = -EFAULT;
2465 if (copy_from_user(&mce, argp, sizeof mce)) 2613 if (copy_from_user(&mce, argp, sizeof mce))
2466 goto out; 2614 goto out;
2467 vcpu_load(vcpu);
2468 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2615 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
2469 vcpu_put(vcpu);
2470 break; 2616 break;
2471 } 2617 }
2472 case KVM_GET_VCPU_EVENTS: { 2618 case KVM_GET_VCPU_EVENTS: {
@@ -2513,11 +2659,67 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2513 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); 2659 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
2514 break; 2660 break;
2515 } 2661 }
2662 case KVM_GET_XSAVE: {
2663 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
2664 r = -ENOMEM;
2665 if (!u.xsave)
2666 break;
2667
2668 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
2669
2670 r = -EFAULT;
2671 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
2672 break;
2673 r = 0;
2674 break;
2675 }
2676 case KVM_SET_XSAVE: {
2677 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
2678 r = -ENOMEM;
2679 if (!u.xsave)
2680 break;
2681
2682 r = -EFAULT;
2683 if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
2684 break;
2685
2686 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
2687 break;
2688 }
2689 case KVM_GET_XCRS: {
2690 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
2691 r = -ENOMEM;
2692 if (!u.xcrs)
2693 break;
2694
2695 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
2696
2697 r = -EFAULT;
2698 if (copy_to_user(argp, u.xcrs,
2699 sizeof(struct kvm_xcrs)))
2700 break;
2701 r = 0;
2702 break;
2703 }
2704 case KVM_SET_XCRS: {
2705 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
2706 r = -ENOMEM;
2707 if (!u.xcrs)
2708 break;
2709
2710 r = -EFAULT;
2711 if (copy_from_user(u.xcrs, argp,
2712 sizeof(struct kvm_xcrs)))
2713 break;
2714
2715 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
2716 break;
2717 }
2516 default: 2718 default:
2517 r = -EINVAL; 2719 r = -EINVAL;
2518 } 2720 }
2519out: 2721out:
2520 kfree(lapic); 2722 kfree(u.buffer);
2521 return r; 2723 return r;
2522} 2724}
2523 2725
@@ -2560,115 +2762,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
2560 return kvm->arch.n_alloc_mmu_pages; 2762 return kvm->arch.n_alloc_mmu_pages;
2561} 2763}
2562 2764
2563gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2564{
2565 int i;
2566 struct kvm_mem_alias *alias;
2567 struct kvm_mem_aliases *aliases;
2568
2569 aliases = kvm_aliases(kvm);
2570
2571 for (i = 0; i < aliases->naliases; ++i) {
2572 alias = &aliases->aliases[i];
2573 if (alias->flags & KVM_ALIAS_INVALID)
2574 continue;
2575 if (gfn >= alias->base_gfn
2576 && gfn < alias->base_gfn + alias->npages)
2577 return alias->target_gfn + gfn - alias->base_gfn;
2578 }
2579 return gfn;
2580}
2581
2582gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2583{
2584 int i;
2585 struct kvm_mem_alias *alias;
2586 struct kvm_mem_aliases *aliases;
2587
2588 aliases = kvm_aliases(kvm);
2589
2590 for (i = 0; i < aliases->naliases; ++i) {
2591 alias = &aliases->aliases[i];
2592 if (gfn >= alias->base_gfn
2593 && gfn < alias->base_gfn + alias->npages)
2594 return alias->target_gfn + gfn - alias->base_gfn;
2595 }
2596 return gfn;
2597}
2598
2599/*
2600 * Set a new alias region. Aliases map a portion of physical memory into
2601 * another portion. This is useful for memory windows, for example the PC
2602 * VGA region.
2603 */
2604static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2605 struct kvm_memory_alias *alias)
2606{
2607 int r, n;
2608 struct kvm_mem_alias *p;
2609 struct kvm_mem_aliases *aliases, *old_aliases;
2610
2611 r = -EINVAL;
2612 /* General sanity checks */
2613 if (alias->memory_size & (PAGE_SIZE - 1))
2614 goto out;
2615 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
2616 goto out;
2617 if (alias->slot >= KVM_ALIAS_SLOTS)
2618 goto out;
2619 if (alias->guest_phys_addr + alias->memory_size
2620 < alias->guest_phys_addr)
2621 goto out;
2622 if (alias->target_phys_addr + alias->memory_size
2623 < alias->target_phys_addr)
2624 goto out;
2625
2626 r = -ENOMEM;
2627 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2628 if (!aliases)
2629 goto out;
2630
2631 mutex_lock(&kvm->slots_lock);
2632
2633 /* invalidate any gfn reference in case of deletion/shrinking */
2634 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2635 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
2636 old_aliases = kvm->arch.aliases;
2637 rcu_assign_pointer(kvm->arch.aliases, aliases);
2638 synchronize_srcu_expedited(&kvm->srcu);
2639 kvm_mmu_zap_all(kvm);
2640 kfree(old_aliases);
2641
2642 r = -ENOMEM;
2643 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2644 if (!aliases)
2645 goto out_unlock;
2646
2647 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2648
2649 p = &aliases->aliases[alias->slot];
2650 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
2651 p->npages = alias->memory_size >> PAGE_SHIFT;
2652 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
2653 p->flags &= ~(KVM_ALIAS_INVALID);
2654
2655 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
2656 if (aliases->aliases[n - 1].npages)
2657 break;
2658 aliases->naliases = n;
2659
2660 old_aliases = kvm->arch.aliases;
2661 rcu_assign_pointer(kvm->arch.aliases, aliases);
2662 synchronize_srcu_expedited(&kvm->srcu);
2663 kfree(old_aliases);
2664 r = 0;
2665
2666out_unlock:
2667 mutex_unlock(&kvm->slots_lock);
2668out:
2669 return r;
2670}
2671
2672static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 2765static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2673{ 2766{
2674 int r; 2767 int r;
@@ -2797,7 +2890,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2797 struct kvm_memory_slot *memslot; 2890 struct kvm_memory_slot *memslot;
2798 unsigned long n; 2891 unsigned long n;
2799 unsigned long is_dirty = 0; 2892 unsigned long is_dirty = 0;
2800 unsigned long *dirty_bitmap = NULL;
2801 2893
2802 mutex_lock(&kvm->slots_lock); 2894 mutex_lock(&kvm->slots_lock);
2803 2895
@@ -2812,27 +2904,30 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2812 2904
2813 n = kvm_dirty_bitmap_bytes(memslot); 2905 n = kvm_dirty_bitmap_bytes(memslot);
2814 2906
2815 r = -ENOMEM;
2816 dirty_bitmap = vmalloc(n);
2817 if (!dirty_bitmap)
2818 goto out;
2819 memset(dirty_bitmap, 0, n);
2820
2821 for (i = 0; !is_dirty && i < n/sizeof(long); i++) 2907 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
2822 is_dirty = memslot->dirty_bitmap[i]; 2908 is_dirty = memslot->dirty_bitmap[i];
2823 2909
2824 /* If nothing is dirty, don't bother messing with page tables. */ 2910 /* If nothing is dirty, don't bother messing with page tables. */
2825 if (is_dirty) { 2911 if (is_dirty) {
2826 struct kvm_memslots *slots, *old_slots; 2912 struct kvm_memslots *slots, *old_slots;
2913 unsigned long *dirty_bitmap;
2827 2914
2828 spin_lock(&kvm->mmu_lock); 2915 spin_lock(&kvm->mmu_lock);
2829 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2916 kvm_mmu_slot_remove_write_access(kvm, log->slot);
2830 spin_unlock(&kvm->mmu_lock); 2917 spin_unlock(&kvm->mmu_lock);
2831 2918
2832 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 2919 r = -ENOMEM;
2833 if (!slots) 2920 dirty_bitmap = vmalloc(n);
2834 goto out_free; 2921 if (!dirty_bitmap)
2922 goto out;
2923 memset(dirty_bitmap, 0, n);
2835 2924
2925 r = -ENOMEM;
2926 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
2927 if (!slots) {
2928 vfree(dirty_bitmap);
2929 goto out;
2930 }
2836 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 2931 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
2837 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; 2932 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
2838 2933
@@ -2841,13 +2936,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2841 synchronize_srcu_expedited(&kvm->srcu); 2936 synchronize_srcu_expedited(&kvm->srcu);
2842 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; 2937 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
2843 kfree(old_slots); 2938 kfree(old_slots);
2939
2940 r = -EFAULT;
2941 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
2942 vfree(dirty_bitmap);
2943 goto out;
2944 }
2945 vfree(dirty_bitmap);
2946 } else {
2947 r = -EFAULT;
2948 if (clear_user(log->dirty_bitmap, n))
2949 goto out;
2844 } 2950 }
2845 2951
2846 r = 0; 2952 r = 0;
2847 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
2848 r = -EFAULT;
2849out_free:
2850 vfree(dirty_bitmap);
2851out: 2953out:
2852 mutex_unlock(&kvm->slots_lock); 2954 mutex_unlock(&kvm->slots_lock);
2853 return r; 2955 return r;
@@ -2867,7 +2969,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
2867 union { 2969 union {
2868 struct kvm_pit_state ps; 2970 struct kvm_pit_state ps;
2869 struct kvm_pit_state2 ps2; 2971 struct kvm_pit_state2 ps2;
2870 struct kvm_memory_alias alias;
2871 struct kvm_pit_config pit_config; 2972 struct kvm_pit_config pit_config;
2872 } u; 2973 } u;
2873 2974
@@ -2888,22 +2989,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
2888 goto out; 2989 goto out;
2889 break; 2990 break;
2890 } 2991 }
2891 case KVM_SET_MEMORY_REGION: {
2892 struct kvm_memory_region kvm_mem;
2893 struct kvm_userspace_memory_region kvm_userspace_mem;
2894
2895 r = -EFAULT;
2896 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
2897 goto out;
2898 kvm_userspace_mem.slot = kvm_mem.slot;
2899 kvm_userspace_mem.flags = kvm_mem.flags;
2900 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
2901 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
2902 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
2903 if (r)
2904 goto out;
2905 break;
2906 }
2907 case KVM_SET_NR_MMU_PAGES: 2992 case KVM_SET_NR_MMU_PAGES:
2908 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); 2993 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
2909 if (r) 2994 if (r)
@@ -2912,14 +2997,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
2912 case KVM_GET_NR_MMU_PAGES: 2997 case KVM_GET_NR_MMU_PAGES:
2913 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 2998 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
2914 break; 2999 break;
2915 case KVM_SET_MEMORY_ALIAS:
2916 r = -EFAULT;
2917 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
2918 goto out;
2919 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
2920 if (r)
2921 goto out;
2922 break;
2923 case KVM_CREATE_IRQCHIP: { 3000 case KVM_CREATE_IRQCHIP: {
2924 struct kvm_pic *vpic; 3001 struct kvm_pic *vpic;
2925 3002
@@ -3259,7 +3336,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3259 } 3336 }
3260 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 3337 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3261 if (ret < 0) { 3338 if (ret < 0) {
3262 r = X86EMUL_UNHANDLEABLE; 3339 r = X86EMUL_IO_NEEDED;
3263 goto out; 3340 goto out;
3264 } 3341 }
3265 3342
@@ -3315,7 +3392,7 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val,
3315 } 3392 }
3316 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); 3393 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3317 if (ret < 0) { 3394 if (ret < 0) {
3318 r = X86EMUL_UNHANDLEABLE; 3395 r = X86EMUL_IO_NEEDED;
3319 goto out; 3396 goto out;
3320 } 3397 }
3321 3398
@@ -3330,10 +3407,10 @@ out:
3330static int emulator_read_emulated(unsigned long addr, 3407static int emulator_read_emulated(unsigned long addr,
3331 void *val, 3408 void *val,
3332 unsigned int bytes, 3409 unsigned int bytes,
3410 unsigned int *error_code,
3333 struct kvm_vcpu *vcpu) 3411 struct kvm_vcpu *vcpu)
3334{ 3412{
3335 gpa_t gpa; 3413 gpa_t gpa;
3336 u32 error_code;
3337 3414
3338 if (vcpu->mmio_read_completed) { 3415 if (vcpu->mmio_read_completed) {
3339 memcpy(val, vcpu->mmio_data, bytes); 3416 memcpy(val, vcpu->mmio_data, bytes);
@@ -3343,12 +3420,10 @@ static int emulator_read_emulated(unsigned long addr,
3343 return X86EMUL_CONTINUE; 3420 return X86EMUL_CONTINUE;
3344 } 3421 }
3345 3422
3346 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); 3423 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code);
3347 3424
3348 if (gpa == UNMAPPED_GVA) { 3425 if (gpa == UNMAPPED_GVA)
3349 kvm_inject_page_fault(vcpu, addr, error_code);
3350 return X86EMUL_PROPAGATE_FAULT; 3426 return X86EMUL_PROPAGATE_FAULT;
3351 }
3352 3427
3353 /* For APIC access vmexit */ 3428 /* For APIC access vmexit */
3354 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3429 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -3370,11 +3445,12 @@ mmio:
3370 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 3445 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3371 3446
3372 vcpu->mmio_needed = 1; 3447 vcpu->mmio_needed = 1;
3373 vcpu->mmio_phys_addr = gpa; 3448 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3374 vcpu->mmio_size = bytes; 3449 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3375 vcpu->mmio_is_write = 0; 3450 vcpu->run->mmio.len = vcpu->mmio_size = bytes;
3451 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
3376 3452
3377 return X86EMUL_UNHANDLEABLE; 3453 return X86EMUL_IO_NEEDED;
3378} 3454}
3379 3455
3380int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 3456int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -3392,17 +3468,15 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3392static int emulator_write_emulated_onepage(unsigned long addr, 3468static int emulator_write_emulated_onepage(unsigned long addr,
3393 const void *val, 3469 const void *val,
3394 unsigned int bytes, 3470 unsigned int bytes,
3471 unsigned int *error_code,
3395 struct kvm_vcpu *vcpu) 3472 struct kvm_vcpu *vcpu)
3396{ 3473{
3397 gpa_t gpa; 3474 gpa_t gpa;
3398 u32 error_code;
3399 3475
3400 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); 3476 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code);
3401 3477
3402 if (gpa == UNMAPPED_GVA) { 3478 if (gpa == UNMAPPED_GVA)
3403 kvm_inject_page_fault(vcpu, addr, error_code);
3404 return X86EMUL_PROPAGATE_FAULT; 3479 return X86EMUL_PROPAGATE_FAULT;
3405 }
3406 3480
3407 /* For APIC access vmexit */ 3481 /* For APIC access vmexit */
3408 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3482 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -3420,10 +3494,11 @@ mmio:
3420 return X86EMUL_CONTINUE; 3494 return X86EMUL_CONTINUE;
3421 3495
3422 vcpu->mmio_needed = 1; 3496 vcpu->mmio_needed = 1;
3423 vcpu->mmio_phys_addr = gpa; 3497 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3424 vcpu->mmio_size = bytes; 3498 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3425 vcpu->mmio_is_write = 1; 3499 vcpu->run->mmio.len = vcpu->mmio_size = bytes;
3426 memcpy(vcpu->mmio_data, val, bytes); 3500 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
3501 memcpy(vcpu->run->mmio.data, val, bytes);
3427 3502
3428 return X86EMUL_CONTINUE; 3503 return X86EMUL_CONTINUE;
3429} 3504}
@@ -3431,6 +3506,7 @@ mmio:
3431int emulator_write_emulated(unsigned long addr, 3506int emulator_write_emulated(unsigned long addr,
3432 const void *val, 3507 const void *val,
3433 unsigned int bytes, 3508 unsigned int bytes,
3509 unsigned int *error_code,
3434 struct kvm_vcpu *vcpu) 3510 struct kvm_vcpu *vcpu)
3435{ 3511{
3436 /* Crossing a page boundary? */ 3512 /* Crossing a page boundary? */
@@ -3438,16 +3514,17 @@ int emulator_write_emulated(unsigned long addr,
3438 int rc, now; 3514 int rc, now;
3439 3515
3440 now = -addr & ~PAGE_MASK; 3516 now = -addr & ~PAGE_MASK;
3441 rc = emulator_write_emulated_onepage(addr, val, now, vcpu); 3517 rc = emulator_write_emulated_onepage(addr, val, now, error_code,
3518 vcpu);
3442 if (rc != X86EMUL_CONTINUE) 3519 if (rc != X86EMUL_CONTINUE)
3443 return rc; 3520 return rc;
3444 addr += now; 3521 addr += now;
3445 val += now; 3522 val += now;
3446 bytes -= now; 3523 bytes -= now;
3447 } 3524 }
3448 return emulator_write_emulated_onepage(addr, val, bytes, vcpu); 3525 return emulator_write_emulated_onepage(addr, val, bytes, error_code,
3526 vcpu);
3449} 3527}
3450EXPORT_SYMBOL_GPL(emulator_write_emulated);
3451 3528
3452#define CMPXCHG_TYPE(t, ptr, old, new) \ 3529#define CMPXCHG_TYPE(t, ptr, old, new) \
3453 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) 3530 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
@@ -3463,6 +3540,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
3463 const void *old, 3540 const void *old,
3464 const void *new, 3541 const void *new,
3465 unsigned int bytes, 3542 unsigned int bytes,
3543 unsigned int *error_code,
3466 struct kvm_vcpu *vcpu) 3544 struct kvm_vcpu *vcpu)
3467{ 3545{
3468 gpa_t gpa; 3546 gpa_t gpa;
@@ -3484,6 +3562,10 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
3484 goto emul_write; 3562 goto emul_write;
3485 3563
3486 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3564 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3565 if (is_error_page(page)) {
3566 kvm_release_page_clean(page);
3567 goto emul_write;
3568 }
3487 3569
3488 kaddr = kmap_atomic(page, KM_USER0); 3570 kaddr = kmap_atomic(page, KM_USER0);
3489 kaddr += offset_in_page(gpa); 3571 kaddr += offset_in_page(gpa);
@@ -3516,7 +3598,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
3516emul_write: 3598emul_write:
3517 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3599 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3518 3600
3519 return emulator_write_emulated(addr, new, bytes, vcpu); 3601 return emulator_write_emulated(addr, new, bytes, error_code, vcpu);
3520} 3602}
3521 3603
3522static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 3604static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
@@ -3604,42 +3686,38 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
3604 return X86EMUL_CONTINUE; 3686 return X86EMUL_CONTINUE;
3605} 3687}
3606 3688
3607int emulate_clts(struct kvm_vcpu *vcpu) 3689int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
3608{ 3690{
3609 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 3691 if (!need_emulate_wbinvd(vcpu))
3610 kvm_x86_ops->fpu_activate(vcpu); 3692 return X86EMUL_CONTINUE;
3693
3694 if (kvm_x86_ops->has_wbinvd_exit()) {
3695 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
3696 wbinvd_ipi, NULL, 1);
3697 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
3698 }
3699 wbinvd();
3611 return X86EMUL_CONTINUE; 3700 return X86EMUL_CONTINUE;
3612} 3701}
3702EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
3613 3703
3614int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3704int emulate_clts(struct kvm_vcpu *vcpu)
3615{ 3705{
3616 return kvm_get_dr(ctxt->vcpu, dr, dest); 3706 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
3707 kvm_x86_ops->fpu_activate(vcpu);
3708 return X86EMUL_CONTINUE;
3617} 3709}
3618 3710
3619int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3711int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
3620{ 3712{
3621 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3713 return _kvm_get_dr(vcpu, dr, dest);
3622
3623 return kvm_set_dr(ctxt->vcpu, dr, value & mask);
3624} 3714}
3625 3715
3626void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3716int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
3627{ 3717{
3628 u8 opcodes[4];
3629 unsigned long rip = kvm_rip_read(vcpu);
3630 unsigned long rip_linear;
3631
3632 if (!printk_ratelimit())
3633 return;
3634 3718
3635 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3719 return __kvm_set_dr(vcpu, dr, value);
3636
3637 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
3638
3639 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
3640 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
3641} 3720}
3642EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3643 3721
3644static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3722static u64 mk_cr_64(u64 curr_cr, u32 new_val)
3645{ 3723{
@@ -3674,27 +3752,32 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
3674 return value; 3752 return value;
3675} 3753}
3676 3754
3677static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) 3755static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
3678{ 3756{
3757 int res = 0;
3758
3679 switch (cr) { 3759 switch (cr) {
3680 case 0: 3760 case 0:
3681 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 3761 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3682 break; 3762 break;
3683 case 2: 3763 case 2:
3684 vcpu->arch.cr2 = val; 3764 vcpu->arch.cr2 = val;
3685 break; 3765 break;
3686 case 3: 3766 case 3:
3687 kvm_set_cr3(vcpu, val); 3767 res = kvm_set_cr3(vcpu, val);
3688 break; 3768 break;
3689 case 4: 3769 case 4:
3690 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 3770 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3691 break; 3771 break;
3692 case 8: 3772 case 8:
3693 kvm_set_cr8(vcpu, val & 0xfUL); 3773 res = __kvm_set_cr8(vcpu, val & 0xfUL);
3694 break; 3774 break;
3695 default: 3775 default:
3696 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3776 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3777 res = -1;
3697 } 3778 }
3779
3780 return res;
3698} 3781}
3699 3782
3700static int emulator_get_cpl(struct kvm_vcpu *vcpu) 3783static int emulator_get_cpl(struct kvm_vcpu *vcpu)
@@ -3707,6 +3790,12 @@ static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
3707 kvm_x86_ops->get_gdt(vcpu, dt); 3790 kvm_x86_ops->get_gdt(vcpu, dt);
3708} 3791}
3709 3792
3793static unsigned long emulator_get_cached_segment_base(int seg,
3794 struct kvm_vcpu *vcpu)
3795{
3796 return get_segment_base(vcpu, seg);
3797}
3798
3710static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, 3799static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
3711 struct kvm_vcpu *vcpu) 3800 struct kvm_vcpu *vcpu)
3712{ 3801{
@@ -3779,11 +3868,6 @@ static void emulator_set_segment_selector(u16 sel, int seg,
3779 kvm_set_segment(vcpu, &kvm_seg, seg); 3868 kvm_set_segment(vcpu, &kvm_seg, seg);
3780} 3869}
3781 3870
3782static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
3783{
3784 kvm_x86_ops->set_rflags(vcpu, rflags);
3785}
3786
3787static struct x86_emulate_ops emulate_ops = { 3871static struct x86_emulate_ops emulate_ops = {
3788 .read_std = kvm_read_guest_virt_system, 3872 .read_std = kvm_read_guest_virt_system,
3789 .write_std = kvm_write_guest_virt_system, 3873 .write_std = kvm_write_guest_virt_system,
@@ -3797,11 +3881,15 @@ static struct x86_emulate_ops emulate_ops = {
3797 .set_cached_descriptor = emulator_set_cached_descriptor, 3881 .set_cached_descriptor = emulator_set_cached_descriptor,
3798 .get_segment_selector = emulator_get_segment_selector, 3882 .get_segment_selector = emulator_get_segment_selector,
3799 .set_segment_selector = emulator_set_segment_selector, 3883 .set_segment_selector = emulator_set_segment_selector,
3884 .get_cached_segment_base = emulator_get_cached_segment_base,
3800 .get_gdt = emulator_get_gdt, 3885 .get_gdt = emulator_get_gdt,
3801 .get_cr = emulator_get_cr, 3886 .get_cr = emulator_get_cr,
3802 .set_cr = emulator_set_cr, 3887 .set_cr = emulator_set_cr,
3803 .cpl = emulator_get_cpl, 3888 .cpl = emulator_get_cpl,
3804 .set_rflags = emulator_set_rflags, 3889 .get_dr = emulator_get_dr,
3890 .set_dr = emulator_set_dr,
3891 .set_msr = kvm_set_msr,
3892 .get_msr = kvm_get_msr,
3805}; 3893};
3806 3894
3807static void cache_all_regs(struct kvm_vcpu *vcpu) 3895static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -3812,14 +3900,75 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
3812 vcpu->arch.regs_dirty = ~0; 3900 vcpu->arch.regs_dirty = ~0;
3813} 3901}
3814 3902
3903static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
3904{
3905 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
3906 /*
3907 * an sti; sti; sequence only disable interrupts for the first
3908 * instruction. So, if the last instruction, be it emulated or
3909 * not, left the system with the INT_STI flag enabled, it
3910 * means that the last instruction is an sti. We should not
3911 * leave the flag on in this case. The same goes for mov ss
3912 */
3913 if (!(int_shadow & mask))
3914 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
3915}
3916
3917static void inject_emulated_exception(struct kvm_vcpu *vcpu)
3918{
3919 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
3920 if (ctxt->exception == PF_VECTOR)
3921 kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code);
3922 else if (ctxt->error_code_valid)
3923 kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
3924 else
3925 kvm_queue_exception(vcpu, ctxt->exception);
3926}
3927
3928static int handle_emulation_failure(struct kvm_vcpu *vcpu)
3929{
3930 ++vcpu->stat.insn_emulation_fail;
3931 trace_kvm_emulate_insn_failed(vcpu);
3932 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3933 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3934 vcpu->run->internal.ndata = 0;
3935 kvm_queue_exception(vcpu, UD_VECTOR);
3936 return EMULATE_FAIL;
3937}
3938
3939static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
3940{
3941 gpa_t gpa;
3942
3943 if (tdp_enabled)
3944 return false;
3945
3946 /*
3947 * if emulation was due to access to shadowed page table
3948 * and it failed try to unshadow page and re-entetr the
3949 * guest to let CPU execute the instruction.
3950 */
3951 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
3952 return true;
3953
3954 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
3955
3956 if (gpa == UNMAPPED_GVA)
3957 return true; /* let cpu generate fault */
3958
3959 if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
3960 return true;
3961
3962 return false;
3963}
3964
3815int emulate_instruction(struct kvm_vcpu *vcpu, 3965int emulate_instruction(struct kvm_vcpu *vcpu,
3816 unsigned long cr2, 3966 unsigned long cr2,
3817 u16 error_code, 3967 u16 error_code,
3818 int emulation_type) 3968 int emulation_type)
3819{ 3969{
3820 int r, shadow_mask; 3970 int r;
3821 struct decode_cache *c; 3971 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
3822 struct kvm_run *run = vcpu->run;
3823 3972
3824 kvm_clear_exception_queue(vcpu); 3973 kvm_clear_exception_queue(vcpu);
3825 vcpu->arch.mmio_fault_cr2 = cr2; 3974 vcpu->arch.mmio_fault_cr2 = cr2;
@@ -3831,8 +3980,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3831 */ 3980 */
3832 cache_all_regs(vcpu); 3981 cache_all_regs(vcpu);
3833 3982
3834 vcpu->mmio_is_write = 0;
3835
3836 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3983 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
3837 int cs_db, cs_l; 3984 int cs_db, cs_l;
3838 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3985 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
@@ -3846,13 +3993,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3846 ? X86EMUL_MODE_VM86 : cs_l 3993 ? X86EMUL_MODE_VM86 : cs_l
3847 ? X86EMUL_MODE_PROT64 : cs_db 3994 ? X86EMUL_MODE_PROT64 : cs_db
3848 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3995 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3996 memset(c, 0, sizeof(struct decode_cache));
3997 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
3998 vcpu->arch.emulate_ctxt.interruptibility = 0;
3999 vcpu->arch.emulate_ctxt.exception = -1;
3849 4000
3850 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 4001 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3851 trace_kvm_emulate_insn_start(vcpu); 4002 trace_kvm_emulate_insn_start(vcpu);
3852 4003
3853 /* Only allow emulation of specific instructions on #UD 4004 /* Only allow emulation of specific instructions on #UD
3854 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 4005 * (namely VMMCALL, sysenter, sysexit, syscall)*/
3855 c = &vcpu->arch.emulate_ctxt.decode;
3856 if (emulation_type & EMULTYPE_TRAP_UD) { 4006 if (emulation_type & EMULTYPE_TRAP_UD) {
3857 if (!c->twobyte) 4007 if (!c->twobyte)
3858 return EMULATE_FAIL; 4008 return EMULATE_FAIL;
@@ -3880,11 +4030,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3880 4030
3881 ++vcpu->stat.insn_emulation; 4031 ++vcpu->stat.insn_emulation;
3882 if (r) { 4032 if (r) {
3883 ++vcpu->stat.insn_emulation_fail; 4033 if (reexecute_instruction(vcpu, cr2))
3884 trace_kvm_emulate_insn_failed(vcpu);
3885 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3886 return EMULATE_DONE; 4034 return EMULATE_DONE;
3887 return EMULATE_FAIL; 4035 if (emulation_type & EMULTYPE_SKIP)
4036 return EMULATE_FAIL;
4037 return handle_emulation_failure(vcpu);
3888 } 4038 }
3889 } 4039 }
3890 4040
@@ -3893,48 +4043,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3893 return EMULATE_DONE; 4043 return EMULATE_DONE;
3894 } 4044 }
3895 4045
4046 /* this is needed for vmware backdor interface to work since it
4047 changes registers values during IO operation */
4048 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4049
3896restart: 4050restart:
3897 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 4051 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3898 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
3899 4052
3900 if (r == 0) 4053 if (r) { /* emulation failed */
3901 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); 4054 if (reexecute_instruction(vcpu, cr2))
4055 return EMULATE_DONE;
3902 4056
3903 if (vcpu->arch.pio.count) { 4057 return handle_emulation_failure(vcpu);
3904 if (!vcpu->arch.pio.in)
3905 vcpu->arch.pio.count = 0;
3906 return EMULATE_DO_MMIO;
3907 } 4058 }
3908 4059
3909 if (r || vcpu->mmio_is_write) { 4060 toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
3910 run->exit_reason = KVM_EXIT_MMIO; 4061 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
3911 run->mmio.phys_addr = vcpu->mmio_phys_addr; 4062 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
3912 memcpy(run->mmio.data, vcpu->mmio_data, 8); 4063 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
3913 run->mmio.len = vcpu->mmio_size; 4064
3914 run->mmio.is_write = vcpu->mmio_is_write; 4065 if (vcpu->arch.emulate_ctxt.exception >= 0) {
4066 inject_emulated_exception(vcpu);
4067 return EMULATE_DONE;
3915 } 4068 }
3916 4069
3917 if (r) { 4070 if (vcpu->arch.pio.count) {
3918 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 4071 if (!vcpu->arch.pio.in)
3919 goto done; 4072 vcpu->arch.pio.count = 0;
3920 if (!vcpu->mmio_needed) {
3921 ++vcpu->stat.insn_emulation_fail;
3922 trace_kvm_emulate_insn_failed(vcpu);
3923 kvm_report_emulation_failure(vcpu, "mmio");
3924 return EMULATE_FAIL;
3925 }
3926 return EMULATE_DO_MMIO; 4073 return EMULATE_DO_MMIO;
3927 } 4074 }
3928 4075
3929 if (vcpu->mmio_is_write) { 4076 if (vcpu->mmio_needed) {
3930 vcpu->mmio_needed = 0; 4077 if (vcpu->mmio_is_write)
4078 vcpu->mmio_needed = 0;
3931 return EMULATE_DO_MMIO; 4079 return EMULATE_DO_MMIO;
3932 } 4080 }
3933 4081
3934done:
3935 if (vcpu->arch.exception.pending)
3936 vcpu->arch.emulate_ctxt.restart = false;
3937
3938 if (vcpu->arch.emulate_ctxt.restart) 4082 if (vcpu->arch.emulate_ctxt.restart)
3939 goto restart; 4083 goto restart;
3940 4084
@@ -4108,6 +4252,9 @@ int kvm_arch_init(void *opaque)
4108 4252
4109 perf_register_guest_info_callbacks(&kvm_guest_cbs); 4253 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4110 4254
4255 if (cpu_has_xsave)
4256 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4257
4111 return 0; 4258 return 0;
4112 4259
4113out: 4260out:
@@ -4270,7 +4417,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
4270 4417
4271 kvm_x86_ops->patch_hypercall(vcpu, instruction); 4418 kvm_x86_ops->patch_hypercall(vcpu, instruction);
4272 4419
4273 return emulator_write_emulated(rip, instruction, 3, vcpu); 4420 return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
4274} 4421}
4275 4422
4276void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4423void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
@@ -4506,59 +4653,78 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
4506 } 4653 }
4507} 4654}
4508 4655
4656static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
4657{
4658 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
4659 !vcpu->guest_xcr0_loaded) {
4660 /* kvm_set_xcr() also depends on this */
4661 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
4662 vcpu->guest_xcr0_loaded = 1;
4663 }
4664}
4665
4666static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
4667{
4668 if (vcpu->guest_xcr0_loaded) {
4669 if (vcpu->arch.xcr0 != host_xcr0)
4670 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
4671 vcpu->guest_xcr0_loaded = 0;
4672 }
4673}
4674
4509static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 4675static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
4510{ 4676{
4511 int r; 4677 int r;
4512 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 4678 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
4513 vcpu->run->request_interrupt_window; 4679 vcpu->run->request_interrupt_window;
4514 4680
4515 if (vcpu->requests)
4516 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
4517 kvm_mmu_unload(vcpu);
4518
4519 r = kvm_mmu_reload(vcpu);
4520 if (unlikely(r))
4521 goto out;
4522
4523 if (vcpu->requests) { 4681 if (vcpu->requests) {
4524 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 4682 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
4683 kvm_mmu_unload(vcpu);
4684 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
4525 __kvm_migrate_timers(vcpu); 4685 __kvm_migrate_timers(vcpu);
4526 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) 4686 if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))
4527 kvm_write_guest_time(vcpu); 4687 kvm_write_guest_time(vcpu);
4528 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 4688 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
4529 kvm_mmu_sync_roots(vcpu); 4689 kvm_mmu_sync_roots(vcpu);
4530 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 4690 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
4531 kvm_x86_ops->tlb_flush(vcpu); 4691 kvm_x86_ops->tlb_flush(vcpu);
4532 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 4692 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
4533 &vcpu->requests)) {
4534 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; 4693 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
4535 r = 0; 4694 r = 0;
4536 goto out; 4695 goto out;
4537 } 4696 }
4538 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { 4697 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
4539 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 4698 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
4540 r = 0; 4699 r = 0;
4541 goto out; 4700 goto out;
4542 } 4701 }
4543 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { 4702 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
4544 vcpu->fpu_active = 0; 4703 vcpu->fpu_active = 0;
4545 kvm_x86_ops->fpu_deactivate(vcpu); 4704 kvm_x86_ops->fpu_deactivate(vcpu);
4546 } 4705 }
4547 } 4706 }
4548 4707
4708 r = kvm_mmu_reload(vcpu);
4709 if (unlikely(r))
4710 goto out;
4711
4549 preempt_disable(); 4712 preempt_disable();
4550 4713
4551 kvm_x86_ops->prepare_guest_switch(vcpu); 4714 kvm_x86_ops->prepare_guest_switch(vcpu);
4552 if (vcpu->fpu_active) 4715 if (vcpu->fpu_active)
4553 kvm_load_guest_fpu(vcpu); 4716 kvm_load_guest_fpu(vcpu);
4717 kvm_load_guest_xcr0(vcpu);
4554 4718
4555 local_irq_disable(); 4719 atomic_set(&vcpu->guest_mode, 1);
4720 smp_wmb();
4556 4721
4557 clear_bit(KVM_REQ_KICK, &vcpu->requests); 4722 local_irq_disable();
4558 smp_mb__after_clear_bit();
4559 4723
4560 if (vcpu->requests || need_resched() || signal_pending(current)) { 4724 if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
4561 set_bit(KVM_REQ_KICK, &vcpu->requests); 4725 || need_resched() || signal_pending(current)) {
4726 atomic_set(&vcpu->guest_mode, 0);
4727 smp_wmb();
4562 local_irq_enable(); 4728 local_irq_enable();
4563 preempt_enable(); 4729 preempt_enable();
4564 r = 1; 4730 r = 1;
@@ -4603,7 +4769,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
4603 if (hw_breakpoint_active()) 4769 if (hw_breakpoint_active())
4604 hw_breakpoint_restore(); 4770 hw_breakpoint_restore();
4605 4771
4606 set_bit(KVM_REQ_KICK, &vcpu->requests); 4772 atomic_set(&vcpu->guest_mode, 0);
4773 smp_wmb();
4607 local_irq_enable(); 4774 local_irq_enable();
4608 4775
4609 ++vcpu->stat.exits; 4776 ++vcpu->stat.exits;
@@ -4665,7 +4832,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
4665 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4832 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4666 kvm_vcpu_block(vcpu); 4833 kvm_vcpu_block(vcpu);
4667 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4834 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4668 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4835 if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
4669 { 4836 {
4670 switch(vcpu->arch.mp_state) { 4837 switch(vcpu->arch.mp_state) {
4671 case KVM_MP_STATE_HALTED: 4838 case KVM_MP_STATE_HALTED:
@@ -4717,8 +4884,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4717 int r; 4884 int r;
4718 sigset_t sigsaved; 4885 sigset_t sigsaved;
4719 4886
4720 vcpu_load(vcpu);
4721
4722 if (vcpu->sigset_active) 4887 if (vcpu->sigset_active)
4723 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 4888 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
4724 4889
@@ -4743,7 +4908,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4743 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4908 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4744 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); 4909 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
4745 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4910 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4746 if (r == EMULATE_DO_MMIO) { 4911 if (r != EMULATE_DONE) {
4747 r = 0; 4912 r = 0;
4748 goto out; 4913 goto out;
4749 } 4914 }
@@ -4759,14 +4924,11 @@ out:
4759 if (vcpu->sigset_active) 4924 if (vcpu->sigset_active)
4760 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4925 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
4761 4926
4762 vcpu_put(vcpu);
4763 return r; 4927 return r;
4764} 4928}
4765 4929
4766int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4930int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4767{ 4931{
4768 vcpu_load(vcpu);
4769
4770 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4932 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4771 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); 4933 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4772 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4934 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -4789,15 +4951,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4789 regs->rip = kvm_rip_read(vcpu); 4951 regs->rip = kvm_rip_read(vcpu);
4790 regs->rflags = kvm_get_rflags(vcpu); 4952 regs->rflags = kvm_get_rflags(vcpu);
4791 4953
4792 vcpu_put(vcpu);
4793
4794 return 0; 4954 return 0;
4795} 4955}
4796 4956
4797int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4957int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4798{ 4958{
4799 vcpu_load(vcpu);
4800
4801 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); 4959 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
4802 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); 4960 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
4803 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); 4961 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
@@ -4822,8 +4980,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4822 4980
4823 vcpu->arch.exception.pending = false; 4981 vcpu->arch.exception.pending = false;
4824 4982
4825 vcpu_put(vcpu);
4826
4827 return 0; 4983 return 0;
4828} 4984}
4829 4985
@@ -4842,8 +4998,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4842{ 4998{
4843 struct desc_ptr dt; 4999 struct desc_ptr dt;
4844 5000
4845 vcpu_load(vcpu);
4846
4847 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 5001 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
4848 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 5002 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
4849 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); 5003 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -4875,32 +5029,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4875 set_bit(vcpu->arch.interrupt.nr, 5029 set_bit(vcpu->arch.interrupt.nr,
4876 (unsigned long *)sregs->interrupt_bitmap); 5030 (unsigned long *)sregs->interrupt_bitmap);
4877 5031
4878 vcpu_put(vcpu);
4879
4880 return 0; 5032 return 0;
4881} 5033}
4882 5034
4883int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 5035int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4884 struct kvm_mp_state *mp_state) 5036 struct kvm_mp_state *mp_state)
4885{ 5037{
4886 vcpu_load(vcpu);
4887 mp_state->mp_state = vcpu->arch.mp_state; 5038 mp_state->mp_state = vcpu->arch.mp_state;
4888 vcpu_put(vcpu);
4889 return 0; 5039 return 0;
4890} 5040}
4891 5041
4892int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 5042int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4893 struct kvm_mp_state *mp_state) 5043 struct kvm_mp_state *mp_state)
4894{ 5044{
4895 vcpu_load(vcpu);
4896 vcpu->arch.mp_state = mp_state->mp_state; 5045 vcpu->arch.mp_state = mp_state->mp_state;
4897 vcpu_put(vcpu);
4898 return 0; 5046 return 0;
4899} 5047}
4900 5048
4901int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 5049int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
4902 bool has_error_code, u32 error_code) 5050 bool has_error_code, u32 error_code)
4903{ 5051{
5052 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4904 int cs_db, cs_l, ret; 5053 int cs_db, cs_l, ret;
4905 cache_all_regs(vcpu); 5054 cache_all_regs(vcpu);
4906 5055
@@ -4915,6 +5064,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
4915 ? X86EMUL_MODE_VM86 : cs_l 5064 ? X86EMUL_MODE_VM86 : cs_l
4916 ? X86EMUL_MODE_PROT64 : cs_db 5065 ? X86EMUL_MODE_PROT64 : cs_db
4917 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 5066 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
5067 memset(c, 0, sizeof(struct decode_cache));
5068 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4918 5069
4919 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, 5070 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
4920 tss_selector, reason, has_error_code, 5071 tss_selector, reason, has_error_code,
@@ -4923,6 +5074,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
4923 if (ret) 5074 if (ret)
4924 return EMULATE_FAIL; 5075 return EMULATE_FAIL;
4925 5076
5077 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
5078 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4926 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 5079 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4927 return EMULATE_DONE; 5080 return EMULATE_DONE;
4928} 5081}
@@ -4935,8 +5088,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4935 int pending_vec, max_bits; 5088 int pending_vec, max_bits;
4936 struct desc_ptr dt; 5089 struct desc_ptr dt;
4937 5090
4938 vcpu_load(vcpu);
4939
4940 dt.size = sregs->idt.limit; 5091 dt.size = sregs->idt.limit;
4941 dt.address = sregs->idt.base; 5092 dt.address = sregs->idt.base;
4942 kvm_x86_ops->set_idt(vcpu, &dt); 5093 kvm_x86_ops->set_idt(vcpu, &dt);
@@ -4996,8 +5147,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4996 !is_protmode(vcpu)) 5147 !is_protmode(vcpu))
4997 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5148 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4998 5149
4999 vcpu_put(vcpu);
5000
5001 return 0; 5150 return 0;
5002} 5151}
5003 5152
@@ -5007,12 +5156,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5007 unsigned long rflags; 5156 unsigned long rflags;
5008 int i, r; 5157 int i, r;
5009 5158
5010 vcpu_load(vcpu);
5011
5012 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { 5159 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
5013 r = -EBUSY; 5160 r = -EBUSY;
5014 if (vcpu->arch.exception.pending) 5161 if (vcpu->arch.exception.pending)
5015 goto unlock_out; 5162 goto out;
5016 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 5163 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
5017 kvm_queue_exception(vcpu, DB_VECTOR); 5164 kvm_queue_exception(vcpu, DB_VECTOR);
5018 else 5165 else
@@ -5054,34 +5201,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5054 5201
5055 r = 0; 5202 r = 0;
5056 5203
5057unlock_out: 5204out:
5058 vcpu_put(vcpu);
5059 5205
5060 return r; 5206 return r;
5061} 5207}
5062 5208
5063/* 5209/*
5064 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
5065 * we have asm/x86/processor.h
5066 */
5067struct fxsave {
5068 u16 cwd;
5069 u16 swd;
5070 u16 twd;
5071 u16 fop;
5072 u64 rip;
5073 u64 rdp;
5074 u32 mxcsr;
5075 u32 mxcsr_mask;
5076 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
5077#ifdef CONFIG_X86_64
5078 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
5079#else
5080 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
5081#endif
5082};
5083
5084/*
5085 * Translate a guest virtual address to a guest physical address. 5210 * Translate a guest virtual address to a guest physical address.
5086 */ 5211 */
5087int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 5212int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -5091,7 +5216,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
5091 gpa_t gpa; 5216 gpa_t gpa;
5092 int idx; 5217 int idx;
5093 5218
5094 vcpu_load(vcpu);
5095 idx = srcu_read_lock(&vcpu->kvm->srcu); 5219 idx = srcu_read_lock(&vcpu->kvm->srcu);
5096 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); 5220 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
5097 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5221 srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -5099,16 +5223,14 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
5099 tr->valid = gpa != UNMAPPED_GVA; 5223 tr->valid = gpa != UNMAPPED_GVA;
5100 tr->writeable = 1; 5224 tr->writeable = 1;
5101 tr->usermode = 0; 5225 tr->usermode = 0;
5102 vcpu_put(vcpu);
5103 5226
5104 return 0; 5227 return 0;
5105} 5228}
5106 5229
5107int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5230int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5108{ 5231{
5109 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5232 struct i387_fxsave_struct *fxsave =
5110 5233 &vcpu->arch.guest_fpu.state->fxsave;
5111 vcpu_load(vcpu);
5112 5234
5113 memcpy(fpu->fpr, fxsave->st_space, 128); 5235 memcpy(fpu->fpr, fxsave->st_space, 128);
5114 fpu->fcw = fxsave->cwd; 5236 fpu->fcw = fxsave->cwd;
@@ -5119,16 +5241,13 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5119 fpu->last_dp = fxsave->rdp; 5241 fpu->last_dp = fxsave->rdp;
5120 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); 5242 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
5121 5243
5122 vcpu_put(vcpu);
5123
5124 return 0; 5244 return 0;
5125} 5245}
5126 5246
5127int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5247int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5128{ 5248{
5129 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5249 struct i387_fxsave_struct *fxsave =
5130 5250 &vcpu->arch.guest_fpu.state->fxsave;
5131 vcpu_load(vcpu);
5132 5251
5133 memcpy(fxsave->st_space, fpu->fpr, 128); 5252 memcpy(fxsave->st_space, fpu->fpr, 128);
5134 fxsave->cwd = fpu->fcw; 5253 fxsave->cwd = fpu->fcw;
@@ -5139,61 +5258,63 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5139 fxsave->rdp = fpu->last_dp; 5258 fxsave->rdp = fpu->last_dp;
5140 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); 5259 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
5141 5260
5142 vcpu_put(vcpu);
5143
5144 return 0; 5261 return 0;
5145} 5262}
5146 5263
5147void fx_init(struct kvm_vcpu *vcpu) 5264int fx_init(struct kvm_vcpu *vcpu)
5148{ 5265{
5149 unsigned after_mxcsr_mask; 5266 int err;
5267
5268 err = fpu_alloc(&vcpu->arch.guest_fpu);
5269 if (err)
5270 return err;
5271
5272 fpu_finit(&vcpu->arch.guest_fpu);
5150 5273
5151 /* 5274 /*
5152 * Touch the fpu the first time in non atomic context as if 5275 * Ensure guest xcr0 is valid for loading
5153 * this is the first fpu instruction the exception handler
5154 * will fire before the instruction returns and it'll have to
5155 * allocate ram with GFP_KERNEL.
5156 */ 5276 */
5157 if (!used_math()) 5277 vcpu->arch.xcr0 = XSTATE_FP;
5158 kvm_fx_save(&vcpu->arch.host_fx_image);
5159
5160 /* Initialize guest FPU by resetting ours and saving into guest's */
5161 preempt_disable();
5162 kvm_fx_save(&vcpu->arch.host_fx_image);
5163 kvm_fx_finit();
5164 kvm_fx_save(&vcpu->arch.guest_fx_image);
5165 kvm_fx_restore(&vcpu->arch.host_fx_image);
5166 preempt_enable();
5167 5278
5168 vcpu->arch.cr0 |= X86_CR0_ET; 5279 vcpu->arch.cr0 |= X86_CR0_ET;
5169 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); 5280
5170 vcpu->arch.guest_fx_image.mxcsr = 0x1f80; 5281 return 0;
5171 memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
5172 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
5173} 5282}
5174EXPORT_SYMBOL_GPL(fx_init); 5283EXPORT_SYMBOL_GPL(fx_init);
5175 5284
5285static void fx_free(struct kvm_vcpu *vcpu)
5286{
5287 fpu_free(&vcpu->arch.guest_fpu);
5288}
5289
5176void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5290void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
5177{ 5291{
5178 if (vcpu->guest_fpu_loaded) 5292 if (vcpu->guest_fpu_loaded)
5179 return; 5293 return;
5180 5294
5295 /*
5296 * Restore all possible states in the guest,
5297 * and assume host would use all available bits.
5298 * Guest xcr0 would be loaded later.
5299 */
5300 kvm_put_guest_xcr0(vcpu);
5181 vcpu->guest_fpu_loaded = 1; 5301 vcpu->guest_fpu_loaded = 1;
5182 kvm_fx_save(&vcpu->arch.host_fx_image); 5302 unlazy_fpu(current);
5183 kvm_fx_restore(&vcpu->arch.guest_fx_image); 5303 fpu_restore_checking(&vcpu->arch.guest_fpu);
5184 trace_kvm_fpu(1); 5304 trace_kvm_fpu(1);
5185} 5305}
5186 5306
5187void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5307void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
5188{ 5308{
5309 kvm_put_guest_xcr0(vcpu);
5310
5189 if (!vcpu->guest_fpu_loaded) 5311 if (!vcpu->guest_fpu_loaded)
5190 return; 5312 return;
5191 5313
5192 vcpu->guest_fpu_loaded = 0; 5314 vcpu->guest_fpu_loaded = 0;
5193 kvm_fx_save(&vcpu->arch.guest_fx_image); 5315 fpu_save_init(&vcpu->arch.guest_fpu);
5194 kvm_fx_restore(&vcpu->arch.host_fx_image);
5195 ++vcpu->stat.fpu_reload; 5316 ++vcpu->stat.fpu_reload;
5196 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); 5317 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
5197 trace_kvm_fpu(0); 5318 trace_kvm_fpu(0);
5198} 5319}
5199 5320
@@ -5204,6 +5325,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
5204 vcpu->arch.time_page = NULL; 5325 vcpu->arch.time_page = NULL;
5205 } 5326 }
5206 5327
5328 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
5329 fx_free(vcpu);
5207 kvm_x86_ops->vcpu_free(vcpu); 5330 kvm_x86_ops->vcpu_free(vcpu);
5208} 5331}
5209 5332
@@ -5217,9 +5340,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
5217{ 5340{
5218 int r; 5341 int r;
5219 5342
5220 /* We do fxsave: this must be aligned. */
5221 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
5222
5223 vcpu->arch.mtrr_state.have_fixed = 1; 5343 vcpu->arch.mtrr_state.have_fixed = 1;
5224 vcpu_load(vcpu); 5344 vcpu_load(vcpu);
5225 r = kvm_arch_vcpu_reset(vcpu); 5345 r = kvm_arch_vcpu_reset(vcpu);
@@ -5241,6 +5361,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
5241 kvm_mmu_unload(vcpu); 5361 kvm_mmu_unload(vcpu);
5242 vcpu_put(vcpu); 5362 vcpu_put(vcpu);
5243 5363
5364 fx_free(vcpu);
5244 kvm_x86_ops->vcpu_free(vcpu); 5365 kvm_x86_ops->vcpu_free(vcpu);
5245} 5366}
5246 5367
@@ -5334,7 +5455,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5334 } 5455 }
5335 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5456 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5336 5457
5458 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
5459 goto fail_free_mce_banks;
5460
5337 return 0; 5461 return 0;
5462fail_free_mce_banks:
5463 kfree(vcpu->arch.mce_banks);
5338fail_free_lapic: 5464fail_free_lapic:
5339 kvm_free_lapic(vcpu); 5465 kvm_free_lapic(vcpu);
5340fail_mmu_destroy: 5466fail_mmu_destroy:
@@ -5364,12 +5490,6 @@ struct kvm *kvm_arch_create_vm(void)
5364 if (!kvm) 5490 if (!kvm)
5365 return ERR_PTR(-ENOMEM); 5491 return ERR_PTR(-ENOMEM);
5366 5492
5367 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
5368 if (!kvm->arch.aliases) {
5369 kfree(kvm);
5370 return ERR_PTR(-ENOMEM);
5371 }
5372
5373 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5493 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5374 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 5494 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5375 5495
@@ -5412,12 +5532,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
5412void kvm_arch_sync_events(struct kvm *kvm) 5532void kvm_arch_sync_events(struct kvm *kvm)
5413{ 5533{
5414 kvm_free_all_assigned_devices(kvm); 5534 kvm_free_all_assigned_devices(kvm);
5535 kvm_free_pit(kvm);
5415} 5536}
5416 5537
5417void kvm_arch_destroy_vm(struct kvm *kvm) 5538void kvm_arch_destroy_vm(struct kvm *kvm)
5418{ 5539{
5419 kvm_iommu_unmap_guest(kvm); 5540 kvm_iommu_unmap_guest(kvm);
5420 kvm_free_pit(kvm);
5421 kfree(kvm->arch.vpic); 5541 kfree(kvm->arch.vpic);
5422 kfree(kvm->arch.vioapic); 5542 kfree(kvm->arch.vioapic);
5423 kvm_free_vcpus(kvm); 5543 kvm_free_vcpus(kvm);
@@ -5427,7 +5547,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
5427 if (kvm->arch.ept_identity_pagetable) 5547 if (kvm->arch.ept_identity_pagetable)
5428 put_page(kvm->arch.ept_identity_pagetable); 5548 put_page(kvm->arch.ept_identity_pagetable);
5429 cleanup_srcu_struct(&kvm->srcu); 5549 cleanup_srcu_struct(&kvm->srcu);
5430 kfree(kvm->arch.aliases);
5431 kfree(kvm); 5550 kfree(kvm);
5432} 5551}
5433 5552
@@ -5438,6 +5557,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
5438 int user_alloc) 5557 int user_alloc)
5439{ 5558{
5440 int npages = memslot->npages; 5559 int npages = memslot->npages;
5560 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
5561
5562 /* Prevent internal slot pages from being moved by fork()/COW. */
5563 if (memslot->id >= KVM_MEMORY_SLOTS)
5564 map_flags = MAP_SHARED | MAP_ANONYMOUS;
5441 5565
5442 /*To keep backward compatibility with older userspace, 5566 /*To keep backward compatibility with older userspace,
5443 *x86 needs to hanlde !user_alloc case. 5567 *x86 needs to hanlde !user_alloc case.
@@ -5450,7 +5574,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
5450 userspace_addr = do_mmap(NULL, 0, 5574 userspace_addr = do_mmap(NULL, 0,
5451 npages * PAGE_SIZE, 5575 npages * PAGE_SIZE,
5452 PROT_READ | PROT_WRITE, 5576 PROT_READ | PROT_WRITE,
5453 MAP_PRIVATE | MAP_ANONYMOUS, 5577 map_flags,
5454 0); 5578 0);
5455 up_write(&current->mm->mmap_sem); 5579 up_write(&current->mm->mmap_sem);
5456 5580
@@ -5523,7 +5647,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
5523 5647
5524 me = get_cpu(); 5648 me = get_cpu();
5525 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) 5649 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
5526 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) 5650 if (atomic_xchg(&vcpu->guest_mode, 0))
5527 smp_send_reschedule(cpu); 5651 smp_send_reschedule(cpu);
5528 put_cpu(); 5652 put_cpu();
5529} 5653}