aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c168
1 files changed, 111 insertions, 57 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 37040079cd6b..f71500af1f81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
872 872
873 kvm_x86_ops->set_efer(vcpu, efer); 873 kvm_x86_ops->set_efer(vcpu, efer);
874 874
875 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
876
877 /* Update reserved bits */ 875 /* Update reserved bits */
878 if ((efer ^ old_efer) & EFER_NX) 876 if ((efer ^ old_efer) & EFER_NX)
879 kvm_mmu_reset_context(vcpu); 877 kvm_mmu_reset_context(vcpu);
@@ -2522,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2522 r = KVM_MAX_VCPUS; 2520 r = KVM_MAX_VCPUS;
2523 break; 2521 break;
2524 case KVM_CAP_NR_MEMSLOTS: 2522 case KVM_CAP_NR_MEMSLOTS:
2525 r = KVM_MEMORY_SLOTS; 2523 r = KVM_USER_MEM_SLOTS;
2526 break; 2524 break;
2527 case KVM_CAP_PV_MMU: /* obsolete */ 2525 case KVM_CAP_PV_MMU: /* obsolete */
2528 r = 0; 2526 r = 0;
@@ -3274,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3274 return -EINVAL; 3272 return -EINVAL;
3275 3273
3276 mutex_lock(&kvm->slots_lock); 3274 mutex_lock(&kvm->slots_lock);
3277 spin_lock(&kvm->mmu_lock);
3278 3275
3279 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 3276 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3280 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 3277 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3281 3278
3282 spin_unlock(&kvm->mmu_lock);
3283 mutex_unlock(&kvm->slots_lock); 3279 mutex_unlock(&kvm->slots_lock);
3284 return 0; 3280 return 0;
3285} 3281}
@@ -3439,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3439 mutex_lock(&kvm->slots_lock); 3435 mutex_lock(&kvm->slots_lock);
3440 3436
3441 r = -EINVAL; 3437 r = -EINVAL;
3442 if (log->slot >= KVM_MEMORY_SLOTS) 3438 if (log->slot >= KVM_USER_MEM_SLOTS)
3443 goto out; 3439 goto out;
3444 3440
3445 memslot = id_to_memslot(kvm->memslots, log->slot); 3441 memslot = id_to_memslot(kvm->memslots, log->slot);
@@ -4495,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4495 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); 4491 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4496 *selector = var.selector; 4492 *selector = var.selector;
4497 4493
4498 if (var.unusable) 4494 if (var.unusable) {
4495 memset(desc, 0, sizeof(*desc));
4499 return false; 4496 return false;
4497 }
4500 4498
4501 if (var.g) 4499 if (var.g)
4502 var.limit >>= 12; 4500 var.limit >>= 12;
@@ -4757,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4757 return r; 4755 return r;
4758} 4756}
4759 4757
4760static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) 4758static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4759 bool write_fault_to_shadow_pgtable)
4761{ 4760{
4762 gpa_t gpa; 4761 gpa_t gpa = cr2;
4763 pfn_t pfn; 4762 pfn_t pfn;
4764 4763
4765 if (tdp_enabled) 4764 if (!vcpu->arch.mmu.direct_map) {
4766 return false; 4765 /*
4767 4766 * Write permission should be allowed since only
4768 /* 4767 * write access need to be emulated.
4769 * if emulation was due to access to shadowed page table 4768 */
4770 * and it failed try to unshadow page and re-enter the 4769 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4771 * guest to let CPU execute the instruction.
4772 */
4773 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4774 return true;
4775
4776 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4777 4770
4778 if (gpa == UNMAPPED_GVA) 4771 /*
4779 return true; /* let cpu generate fault */ 4772 * If the mapping is invalid in guest, let cpu retry
4773 * it to generate fault.
4774 */
4775 if (gpa == UNMAPPED_GVA)
4776 return true;
4777 }
4780 4778
4781 /* 4779 /*
4782 * Do not retry the unhandleable instruction if it faults on the 4780 * Do not retry the unhandleable instruction if it faults on the
@@ -4785,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4785 * instruction -> ... 4783 * instruction -> ...
4786 */ 4784 */
4787 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); 4785 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4788 if (!is_error_noslot_pfn(pfn)) { 4786
4789 kvm_release_pfn_clean(pfn); 4787 /*
4788 * If the instruction failed on the error pfn, it can not be fixed,
4789 * report the error to userspace.
4790 */
4791 if (is_error_noslot_pfn(pfn))
4792 return false;
4793
4794 kvm_release_pfn_clean(pfn);
4795
4796 /* The instructions are well-emulated on direct mmu. */
4797 if (vcpu->arch.mmu.direct_map) {
4798 unsigned int indirect_shadow_pages;
4799
4800 spin_lock(&vcpu->kvm->mmu_lock);
4801 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
4802 spin_unlock(&vcpu->kvm->mmu_lock);
4803
4804 if (indirect_shadow_pages)
4805 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4806
4790 return true; 4807 return true;
4791 } 4808 }
4792 4809
4793 return false; 4810 /*
4811 * if emulation was due to access to shadowed page table
4812 * and it failed try to unshadow page and re-enter the
4813 * guest to let CPU execute the instruction.
4814 */
4815 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4816
4817 /*
4818 * If the access faults on its page table, it can not
4819 * be fixed by unprotecting shadow page and it should
4820 * be reported to userspace.
4821 */
4822 return !write_fault_to_shadow_pgtable;
4794} 4823}
4795 4824
4796static bool retry_instruction(struct x86_emulate_ctxt *ctxt, 4825static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -4832,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4832 if (!vcpu->arch.mmu.direct_map) 4861 if (!vcpu->arch.mmu.direct_map)
4833 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); 4862 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4834 4863
4835 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 4864 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4836 4865
4837 return true; 4866 return true;
4838} 4867}
@@ -4849,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4849 int r; 4878 int r;
4850 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4879 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4851 bool writeback = true; 4880 bool writeback = true;
4881 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
4852 4882
4883 /*
4884 * Clear write_fault_to_shadow_pgtable here to ensure it is
4885 * never reused.
4886 */
4887 vcpu->arch.write_fault_to_shadow_pgtable = false;
4853 kvm_clear_exception_queue(vcpu); 4888 kvm_clear_exception_queue(vcpu);
4854 4889
4855 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4890 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
@@ -4868,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4868 if (r != EMULATION_OK) { 4903 if (r != EMULATION_OK) {
4869 if (emulation_type & EMULTYPE_TRAP_UD) 4904 if (emulation_type & EMULTYPE_TRAP_UD)
4870 return EMULATE_FAIL; 4905 return EMULATE_FAIL;
4871 if (reexecute_instruction(vcpu, cr2)) 4906 if (reexecute_instruction(vcpu, cr2,
4907 write_fault_to_spt))
4872 return EMULATE_DONE; 4908 return EMULATE_DONE;
4873 if (emulation_type & EMULTYPE_SKIP) 4909 if (emulation_type & EMULTYPE_SKIP)
4874 return EMULATE_FAIL; 4910 return EMULATE_FAIL;
@@ -4898,7 +4934,7 @@ restart:
4898 return EMULATE_DONE; 4934 return EMULATE_DONE;
4899 4935
4900 if (r == EMULATION_FAILED) { 4936 if (r == EMULATION_FAILED) {
4901 if (reexecute_instruction(vcpu, cr2)) 4937 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
4902 return EMULATE_DONE; 4938 return EMULATE_DONE;
4903 4939
4904 return handle_emulation_failure(vcpu); 4940 return handle_emulation_failure(vcpu);
@@ -5541,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5541 vcpu->arch.nmi_injected = true; 5577 vcpu->arch.nmi_injected = true;
5542 kvm_x86_ops->set_nmi(vcpu); 5578 kvm_x86_ops->set_nmi(vcpu);
5543 } 5579 }
5544 } else if (kvm_cpu_has_interrupt(vcpu)) { 5580 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5545 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 5581 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5546 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 5582 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5547 false); 5583 false);
@@ -5609,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
5609#endif 5645#endif
5610} 5646}
5611 5647
5648static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
5649{
5650 u64 eoi_exit_bitmap[4];
5651
5652 memset(eoi_exit_bitmap, 0, 32);
5653
5654 kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
5655 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5656}
5657
5612static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5658static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5613{ 5659{
5614 int r; 5660 int r;
@@ -5662,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5662 kvm_handle_pmu_event(vcpu); 5708 kvm_handle_pmu_event(vcpu);
5663 if (kvm_check_request(KVM_REQ_PMI, vcpu)) 5709 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5664 kvm_deliver_pmi(vcpu); 5710 kvm_deliver_pmi(vcpu);
5711 if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
5712 update_eoi_exitmap(vcpu);
5665 } 5713 }
5666 5714
5667 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5715 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5670,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5670 /* enable NMI/IRQ window open exits if needed */ 5718 /* enable NMI/IRQ window open exits if needed */
5671 if (vcpu->arch.nmi_pending) 5719 if (vcpu->arch.nmi_pending)
5672 kvm_x86_ops->enable_nmi_window(vcpu); 5720 kvm_x86_ops->enable_nmi_window(vcpu);
5673 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5721 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5674 kvm_x86_ops->enable_irq_window(vcpu); 5722 kvm_x86_ops->enable_irq_window(vcpu);
5675 5723
5676 if (kvm_lapic_enabled(vcpu)) { 5724 if (kvm_lapic_enabled(vcpu)) {
5725 /*
5726 * Update architecture specific hints for APIC
5727 * virtual interrupt delivery.
5728 */
5729 if (kvm_x86_ops->hwapic_irr_update)
5730 kvm_x86_ops->hwapic_irr_update(vcpu,
5731 kvm_lapic_find_highest_irr(vcpu));
5677 update_cr8_intercept(vcpu); 5732 update_cr8_intercept(vcpu);
5678 kvm_lapic_sync_to_vapic(vcpu); 5733 kvm_lapic_sync_to_vapic(vcpu);
5679 } 5734 }
@@ -6853,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
6853 struct kvm_memory_slot *memslot, 6908 struct kvm_memory_slot *memslot,
6854 struct kvm_memory_slot old, 6909 struct kvm_memory_slot old,
6855 struct kvm_userspace_memory_region *mem, 6910 struct kvm_userspace_memory_region *mem,
6856 int user_alloc) 6911 bool user_alloc)
6857{ 6912{
6858 int npages = memslot->npages; 6913 int npages = memslot->npages;
6859 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6860 6914
6861 /* Prevent internal slot pages from being moved by fork()/COW. */ 6915 /*
6862 if (memslot->id >= KVM_MEMORY_SLOTS) 6916 * Only private memory slots need to be mapped here since
6863 map_flags = MAP_SHARED | MAP_ANONYMOUS; 6917 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
6864
6865 /*To keep backward compatibility with older userspace,
6866 *x86 needs to handle !user_alloc case.
6867 */ 6918 */
6868 if (!user_alloc) { 6919 if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
6869 if (npages && !old.npages) { 6920 unsigned long userspace_addr;
6870 unsigned long userspace_addr;
6871 6921
6872 userspace_addr = vm_mmap(NULL, 0, 6922 /*
6873 npages * PAGE_SIZE, 6923 * MAP_SHARED to prevent internal slot pages from being moved
6874 PROT_READ | PROT_WRITE, 6924 * by fork()/COW.
6875 map_flags, 6925 */
6876 0); 6926 userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
6927 PROT_READ | PROT_WRITE,
6928 MAP_SHARED | MAP_ANONYMOUS, 0);
6877 6929
6878 if (IS_ERR((void *)userspace_addr)) 6930 if (IS_ERR((void *)userspace_addr))
6879 return PTR_ERR((void *)userspace_addr); 6931 return PTR_ERR((void *)userspace_addr);
6880 6932
6881 memslot->userspace_addr = userspace_addr; 6933 memslot->userspace_addr = userspace_addr;
6882 }
6883 } 6934 }
6884 6935
6885
6886 return 0; 6936 return 0;
6887} 6937}
6888 6938
6889void kvm_arch_commit_memory_region(struct kvm *kvm, 6939void kvm_arch_commit_memory_region(struct kvm *kvm,
6890 struct kvm_userspace_memory_region *mem, 6940 struct kvm_userspace_memory_region *mem,
6891 struct kvm_memory_slot old, 6941 struct kvm_memory_slot old,
6892 int user_alloc) 6942 bool user_alloc)
6893{ 6943{
6894 6944
6895 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; 6945 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6896 6946
6897 if (!user_alloc && !old.user_alloc && old.npages && !npages) { 6947 if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
6898 int ret; 6948 int ret;
6899 6949
6900 ret = vm_munmap(old.userspace_addr, 6950 ret = vm_munmap(old.userspace_addr,
@@ -6908,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
6908 if (!kvm->arch.n_requested_mmu_pages) 6958 if (!kvm->arch.n_requested_mmu_pages)
6909 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 6959 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6910 6960
6911 spin_lock(&kvm->mmu_lock);
6912 if (nr_mmu_pages) 6961 if (nr_mmu_pages)
6913 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 6962 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6914 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 6963 /*
6915 spin_unlock(&kvm->mmu_lock); 6964 * Write protect all pages for dirty logging.
6965 * Existing largepage mappings are destroyed here and new ones will
6966 * not be created until the end of the logging.
6967 */
6968 if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
6969 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6916 /* 6970 /*
6917 * If memory slot is created, or moved, we need to clear all 6971 * If memory slot is created, or moved, we need to clear all
6918 * mmio sptes. 6972 * mmio sptes.