aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c184
1 files changed, 120 insertions, 64 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c243b81e3c74..f71500af1f81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
872 872
873 kvm_x86_ops->set_efer(vcpu, efer); 873 kvm_x86_ops->set_efer(vcpu, efer);
874 874
875 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
876
877 /* Update reserved bits */ 875 /* Update reserved bits */
878 if ((efer ^ old_efer) & EFER_NX) 876 if ((efer ^ old_efer) & EFER_NX)
879 kvm_mmu_reset_context(vcpu); 877 kvm_mmu_reset_context(vcpu);
@@ -1881,6 +1879,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1881 u64 data = msr_info->data; 1879 u64 data = msr_info->data;
1882 1880
1883 switch (msr) { 1881 switch (msr) {
1882 case MSR_AMD64_NB_CFG:
1883 case MSR_IA32_UCODE_REV:
1884 case MSR_IA32_UCODE_WRITE:
1885 case MSR_VM_HSAVE_PA:
1886 case MSR_AMD64_PATCH_LOADER:
1887 case MSR_AMD64_BU_CFG2:
1888 break;
1889
1884 case MSR_EFER: 1890 case MSR_EFER:
1885 return set_efer(vcpu, data); 1891 return set_efer(vcpu, data);
1886 case MSR_K7_HWCR: 1892 case MSR_K7_HWCR:
@@ -1900,8 +1906,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1900 return 1; 1906 return 1;
1901 } 1907 }
1902 break; 1908 break;
1903 case MSR_AMD64_NB_CFG:
1904 break;
1905 case MSR_IA32_DEBUGCTLMSR: 1909 case MSR_IA32_DEBUGCTLMSR:
1906 if (!data) { 1910 if (!data) {
1907 /* We support the non-activated case already */ 1911 /* We support the non-activated case already */
@@ -1914,11 +1918,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1914 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1918 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1915 __func__, data); 1919 __func__, data);
1916 break; 1920 break;
1917 case MSR_IA32_UCODE_REV:
1918 case MSR_IA32_UCODE_WRITE:
1919 case MSR_VM_HSAVE_PA:
1920 case MSR_AMD64_PATCH_LOADER:
1921 break;
1922 case 0x200 ... 0x2ff: 1921 case 0x200 ... 0x2ff:
1923 return set_msr_mtrr(vcpu, msr, data); 1922 return set_msr_mtrr(vcpu, msr, data);
1924 case MSR_IA32_APICBASE: 1923 case MSR_IA32_APICBASE:
@@ -2253,6 +2252,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2253 case MSR_K8_INT_PENDING_MSG: 2252 case MSR_K8_INT_PENDING_MSG:
2254 case MSR_AMD64_NB_CFG: 2253 case MSR_AMD64_NB_CFG:
2255 case MSR_FAM10H_MMIO_CONF_BASE: 2254 case MSR_FAM10H_MMIO_CONF_BASE:
2255 case MSR_AMD64_BU_CFG2:
2256 data = 0; 2256 data = 0;
2257 break; 2257 break;
2258 case MSR_P6_PERFCTR0: 2258 case MSR_P6_PERFCTR0:
@@ -2520,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2520 r = KVM_MAX_VCPUS; 2520 r = KVM_MAX_VCPUS;
2521 break; 2521 break;
2522 case KVM_CAP_NR_MEMSLOTS: 2522 case KVM_CAP_NR_MEMSLOTS:
2523 r = KVM_MEMORY_SLOTS; 2523 r = KVM_USER_MEM_SLOTS;
2524 break; 2524 break;
2525 case KVM_CAP_PV_MMU: /* obsolete */ 2525 case KVM_CAP_PV_MMU: /* obsolete */
2526 r = 0; 2526 r = 0;
@@ -3272,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3272 return -EINVAL; 3272 return -EINVAL;
3273 3273
3274 mutex_lock(&kvm->slots_lock); 3274 mutex_lock(&kvm->slots_lock);
3275 spin_lock(&kvm->mmu_lock);
3276 3275
3277 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 3276 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3278 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 3277 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3279 3278
3280 spin_unlock(&kvm->mmu_lock);
3281 mutex_unlock(&kvm->slots_lock); 3279 mutex_unlock(&kvm->slots_lock);
3282 return 0; 3280 return 0;
3283} 3281}
@@ -3437,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3437 mutex_lock(&kvm->slots_lock); 3435 mutex_lock(&kvm->slots_lock);
3438 3436
3439 r = -EINVAL; 3437 r = -EINVAL;
3440 if (log->slot >= KVM_MEMORY_SLOTS) 3438 if (log->slot >= KVM_USER_MEM_SLOTS)
3441 goto out; 3439 goto out;
3442 3440
3443 memslot = id_to_memslot(kvm->memslots, log->slot); 3441 memslot = id_to_memslot(kvm->memslots, log->slot);
@@ -4493,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4493 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); 4491 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4494 *selector = var.selector; 4492 *selector = var.selector;
4495 4493
4496 if (var.unusable) 4494 if (var.unusable) {
4495 memset(desc, 0, sizeof(*desc));
4497 return false; 4496 return false;
4497 }
4498 4498
4499 if (var.g) 4499 if (var.g)
4500 var.limit >>= 12; 4500 var.limit >>= 12;
@@ -4755,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4755 return r; 4755 return r;
4756} 4756}
4757 4757
4758static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) 4758static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4759 bool write_fault_to_shadow_pgtable)
4759{ 4760{
4760 gpa_t gpa; 4761 gpa_t gpa = cr2;
4761 pfn_t pfn; 4762 pfn_t pfn;
4762 4763
4763 if (tdp_enabled) 4764 if (!vcpu->arch.mmu.direct_map) {
4764 return false; 4765 /*
4765 4766 * Write permission should be allowed since only
4766 /* 4767 * write access need to be emulated.
4767 * if emulation was due to access to shadowed page table 4768 */
4768 * and it failed try to unshadow page and re-enter the 4769 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4769 * guest to let CPU execute the instruction.
4770 */
4771 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4772 return true;
4773
4774 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4775 4770
4776 if (gpa == UNMAPPED_GVA) 4771 /*
4777 return true; /* let cpu generate fault */ 4772 * If the mapping is invalid in guest, let cpu retry
4773 * it to generate fault.
4774 */
4775 if (gpa == UNMAPPED_GVA)
4776 return true;
4777 }
4778 4778
4779 /* 4779 /*
4780 * Do not retry the unhandleable instruction if it faults on the 4780 * Do not retry the unhandleable instruction if it faults on the
@@ -4783,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4783 * instruction -> ... 4783 * instruction -> ...
4784 */ 4784 */
4785 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); 4785 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4786 if (!is_error_noslot_pfn(pfn)) { 4786
4787 kvm_release_pfn_clean(pfn); 4787 /*
4788 * If the instruction failed on the error pfn, it can not be fixed,
4789 * report the error to userspace.
4790 */
4791 if (is_error_noslot_pfn(pfn))
4792 return false;
4793
4794 kvm_release_pfn_clean(pfn);
4795
4796 /* The instructions are well-emulated on direct mmu. */
4797 if (vcpu->arch.mmu.direct_map) {
4798 unsigned int indirect_shadow_pages;
4799
4800 spin_lock(&vcpu->kvm->mmu_lock);
4801 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
4802 spin_unlock(&vcpu->kvm->mmu_lock);
4803
4804 if (indirect_shadow_pages)
4805 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4806
4788 return true; 4807 return true;
4789 } 4808 }
4790 4809
4791 return false; 4810 /*
4811 * if emulation was due to access to shadowed page table
4812 * and it failed try to unshadow page and re-enter the
4813 * guest to let CPU execute the instruction.
4814 */
4815 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4816
4817 /*
4818 * If the access faults on its page table, it can not
4819 * be fixed by unprotecting shadow page and it should
4820 * be reported to userspace.
4821 */
4822 return !write_fault_to_shadow_pgtable;
4792} 4823}
4793 4824
4794static bool retry_instruction(struct x86_emulate_ctxt *ctxt, 4825static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -4830,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4830 if (!vcpu->arch.mmu.direct_map) 4861 if (!vcpu->arch.mmu.direct_map)
4831 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); 4862 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4832 4863
4833 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 4864 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4834 4865
4835 return true; 4866 return true;
4836} 4867}
@@ -4847,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4847 int r; 4878 int r;
4848 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4879 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4849 bool writeback = true; 4880 bool writeback = true;
4881 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
4850 4882
4883 /*
4884 * Clear write_fault_to_shadow_pgtable here to ensure it is
4885 * never reused.
4886 */
4887 vcpu->arch.write_fault_to_shadow_pgtable = false;
4851 kvm_clear_exception_queue(vcpu); 4888 kvm_clear_exception_queue(vcpu);
4852 4889
4853 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4890 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
@@ -4866,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4866 if (r != EMULATION_OK) { 4903 if (r != EMULATION_OK) {
4867 if (emulation_type & EMULTYPE_TRAP_UD) 4904 if (emulation_type & EMULTYPE_TRAP_UD)
4868 return EMULATE_FAIL; 4905 return EMULATE_FAIL;
4869 if (reexecute_instruction(vcpu, cr2)) 4906 if (reexecute_instruction(vcpu, cr2,
4907 write_fault_to_spt))
4870 return EMULATE_DONE; 4908 return EMULATE_DONE;
4871 if (emulation_type & EMULTYPE_SKIP) 4909 if (emulation_type & EMULTYPE_SKIP)
4872 return EMULATE_FAIL; 4910 return EMULATE_FAIL;
@@ -4896,7 +4934,7 @@ restart:
4896 return EMULATE_DONE; 4934 return EMULATE_DONE;
4897 4935
4898 if (r == EMULATION_FAILED) { 4936 if (r == EMULATION_FAILED) {
4899 if (reexecute_instruction(vcpu, cr2)) 4937 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
4900 return EMULATE_DONE; 4938 return EMULATE_DONE;
4901 4939
4902 return handle_emulation_failure(vcpu); 4940 return handle_emulation_failure(vcpu);
@@ -5539,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5539 vcpu->arch.nmi_injected = true; 5577 vcpu->arch.nmi_injected = true;
5540 kvm_x86_ops->set_nmi(vcpu); 5578 kvm_x86_ops->set_nmi(vcpu);
5541 } 5579 }
5542 } else if (kvm_cpu_has_interrupt(vcpu)) { 5580 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5543 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 5581 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5544 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 5582 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5545 false); 5583 false);
@@ -5607,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
5607#endif 5645#endif
5608} 5646}
5609 5647
5648static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
5649{
5650 u64 eoi_exit_bitmap[4];
5651
5652 memset(eoi_exit_bitmap, 0, 32);
5653
5654 kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
5655 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5656}
5657
5610static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5658static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5611{ 5659{
5612 int r; 5660 int r;
@@ -5660,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5660 kvm_handle_pmu_event(vcpu); 5708 kvm_handle_pmu_event(vcpu);
5661 if (kvm_check_request(KVM_REQ_PMI, vcpu)) 5709 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5662 kvm_deliver_pmi(vcpu); 5710 kvm_deliver_pmi(vcpu);
5711 if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
5712 update_eoi_exitmap(vcpu);
5663 } 5713 }
5664 5714
5665 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5715 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5668,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5668 /* enable NMI/IRQ window open exits if needed */ 5718 /* enable NMI/IRQ window open exits if needed */
5669 if (vcpu->arch.nmi_pending) 5719 if (vcpu->arch.nmi_pending)
5670 kvm_x86_ops->enable_nmi_window(vcpu); 5720 kvm_x86_ops->enable_nmi_window(vcpu);
5671 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5721 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5672 kvm_x86_ops->enable_irq_window(vcpu); 5722 kvm_x86_ops->enable_irq_window(vcpu);
5673 5723
5674 if (kvm_lapic_enabled(vcpu)) { 5724 if (kvm_lapic_enabled(vcpu)) {
5725 /*
5726 * Update architecture specific hints for APIC
5727 * virtual interrupt delivery.
5728 */
5729 if (kvm_x86_ops->hwapic_irr_update)
5730 kvm_x86_ops->hwapic_irr_update(vcpu,
5731 kvm_lapic_find_highest_irr(vcpu));
5675 update_cr8_intercept(vcpu); 5732 update_cr8_intercept(vcpu);
5676 kvm_lapic_sync_to_vapic(vcpu); 5733 kvm_lapic_sync_to_vapic(vcpu);
5677 } 5734 }
@@ -6851,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
6851 struct kvm_memory_slot *memslot, 6908 struct kvm_memory_slot *memslot,
6852 struct kvm_memory_slot old, 6909 struct kvm_memory_slot old,
6853 struct kvm_userspace_memory_region *mem, 6910 struct kvm_userspace_memory_region *mem,
6854 int user_alloc) 6911 bool user_alloc)
6855{ 6912{
6856 int npages = memslot->npages; 6913 int npages = memslot->npages;
6857 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6858
6859 /* Prevent internal slot pages from being moved by fork()/COW. */
6860 if (memslot->id >= KVM_MEMORY_SLOTS)
6861 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6862 6914
6863 /*To keep backward compatibility with older userspace, 6915 /*
6864 *x86 needs to handle !user_alloc case. 6916 * Only private memory slots need to be mapped here since
6917 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
6865 */ 6918 */
6866 if (!user_alloc) { 6919 if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
6867 if (npages && !old.npages) { 6920 unsigned long userspace_addr;
6868 unsigned long userspace_addr;
6869 6921
6870 userspace_addr = vm_mmap(NULL, 0, 6922 /*
6871 npages * PAGE_SIZE, 6923 * MAP_SHARED to prevent internal slot pages from being moved
6872 PROT_READ | PROT_WRITE, 6924 * by fork()/COW.
6873 map_flags, 6925 */
6874 0); 6926 userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
6927 PROT_READ | PROT_WRITE,
6928 MAP_SHARED | MAP_ANONYMOUS, 0);
6875 6929
6876 if (IS_ERR((void *)userspace_addr)) 6930 if (IS_ERR((void *)userspace_addr))
6877 return PTR_ERR((void *)userspace_addr); 6931 return PTR_ERR((void *)userspace_addr);
6878 6932
6879 memslot->userspace_addr = userspace_addr; 6933 memslot->userspace_addr = userspace_addr;
6880 }
6881 } 6934 }
6882 6935
6883
6884 return 0; 6936 return 0;
6885} 6937}
6886 6938
6887void kvm_arch_commit_memory_region(struct kvm *kvm, 6939void kvm_arch_commit_memory_region(struct kvm *kvm,
6888 struct kvm_userspace_memory_region *mem, 6940 struct kvm_userspace_memory_region *mem,
6889 struct kvm_memory_slot old, 6941 struct kvm_memory_slot old,
6890 int user_alloc) 6942 bool user_alloc)
6891{ 6943{
6892 6944
6893 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; 6945 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6894 6946
6895 if (!user_alloc && !old.user_alloc && old.npages && !npages) { 6947 if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
6896 int ret; 6948 int ret;
6897 6949
6898 ret = vm_munmap(old.userspace_addr, 6950 ret = vm_munmap(old.userspace_addr,
@@ -6906,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
6906 if (!kvm->arch.n_requested_mmu_pages) 6958 if (!kvm->arch.n_requested_mmu_pages)
6907 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 6959 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6908 6960
6909 spin_lock(&kvm->mmu_lock);
6910 if (nr_mmu_pages) 6961 if (nr_mmu_pages)
6911 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 6962 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6912 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 6963 /*
6913 spin_unlock(&kvm->mmu_lock); 6964 * Write protect all pages for dirty logging.
6965 * Existing largepage mappings are destroyed here and new ones will
6966 * not be created until the end of the logging.
6967 */
6968 if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
6969 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6914 /* 6970 /*
6915 * If memory slot is created, or moved, we need to clear all 6971 * If memory slot is created, or moved, we need to clear all
6916 * mmio sptes. 6972 * mmio sptes.