diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 168 |
1 files changed, 111 insertions, 57 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 37040079cd6b..f71500af1f81 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
872 | 872 | ||
873 | kvm_x86_ops->set_efer(vcpu, efer); | 873 | kvm_x86_ops->set_efer(vcpu, efer); |
874 | 874 | ||
875 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | ||
876 | |||
877 | /* Update reserved bits */ | 875 | /* Update reserved bits */ |
878 | if ((efer ^ old_efer) & EFER_NX) | 876 | if ((efer ^ old_efer) & EFER_NX) |
879 | kvm_mmu_reset_context(vcpu); | 877 | kvm_mmu_reset_context(vcpu); |
@@ -2522,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2522 | r = KVM_MAX_VCPUS; | 2520 | r = KVM_MAX_VCPUS; |
2523 | break; | 2521 | break; |
2524 | case KVM_CAP_NR_MEMSLOTS: | 2522 | case KVM_CAP_NR_MEMSLOTS: |
2525 | r = KVM_MEMORY_SLOTS; | 2523 | r = KVM_USER_MEM_SLOTS; |
2526 | break; | 2524 | break; |
2527 | case KVM_CAP_PV_MMU: /* obsolete */ | 2525 | case KVM_CAP_PV_MMU: /* obsolete */ |
2528 | r = 0; | 2526 | r = 0; |
@@ -3274,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
3274 | return -EINVAL; | 3272 | return -EINVAL; |
3275 | 3273 | ||
3276 | mutex_lock(&kvm->slots_lock); | 3274 | mutex_lock(&kvm->slots_lock); |
3277 | spin_lock(&kvm->mmu_lock); | ||
3278 | 3275 | ||
3279 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 3276 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
3280 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 3277 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
3281 | 3278 | ||
3282 | spin_unlock(&kvm->mmu_lock); | ||
3283 | mutex_unlock(&kvm->slots_lock); | 3279 | mutex_unlock(&kvm->slots_lock); |
3284 | return 0; | 3280 | return 0; |
3285 | } | 3281 | } |
@@ -3439,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
3439 | mutex_lock(&kvm->slots_lock); | 3435 | mutex_lock(&kvm->slots_lock); |
3440 | 3436 | ||
3441 | r = -EINVAL; | 3437 | r = -EINVAL; |
3442 | if (log->slot >= KVM_MEMORY_SLOTS) | 3438 | if (log->slot >= KVM_USER_MEM_SLOTS) |
3443 | goto out; | 3439 | goto out; |
3444 | 3440 | ||
3445 | memslot = id_to_memslot(kvm->memslots, log->slot); | 3441 | memslot = id_to_memslot(kvm->memslots, log->slot); |
@@ -4495,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, | |||
4495 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); | 4491 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); |
4496 | *selector = var.selector; | 4492 | *selector = var.selector; |
4497 | 4493 | ||
4498 | if (var.unusable) | 4494 | if (var.unusable) { |
4495 | memset(desc, 0, sizeof(*desc)); | ||
4499 | return false; | 4496 | return false; |
4497 | } | ||
4500 | 4498 | ||
4501 | if (var.g) | 4499 | if (var.g) |
4502 | var.limit >>= 12; | 4500 | var.limit >>= 12; |
@@ -4757,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
4757 | return r; | 4755 | return r; |
4758 | } | 4756 | } |
4759 | 4757 | ||
4760 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | 4758 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, |
4759 | bool write_fault_to_shadow_pgtable) | ||
4761 | { | 4760 | { |
4762 | gpa_t gpa; | 4761 | gpa_t gpa = cr2; |
4763 | pfn_t pfn; | 4762 | pfn_t pfn; |
4764 | 4763 | ||
4765 | if (tdp_enabled) | 4764 | if (!vcpu->arch.mmu.direct_map) { |
4766 | return false; | 4765 | /* |
4767 | 4766 | * Write permission should be allowed since only | |
4768 | /* | 4767 | * write access need to be emulated. |
4769 | * if emulation was due to access to shadowed page table | 4768 | */ |
4770 | * and it failed try to unshadow page and re-enter the | 4769 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); |
4771 | * guest to let CPU execute the instruction. | ||
4772 | */ | ||
4773 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | ||
4774 | return true; | ||
4775 | |||
4776 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); | ||
4777 | 4770 | ||
4778 | if (gpa == UNMAPPED_GVA) | 4771 | /* |
4779 | return true; /* let cpu generate fault */ | 4772 | * If the mapping is invalid in guest, let cpu retry |
4773 | * it to generate fault. | ||
4774 | */ | ||
4775 | if (gpa == UNMAPPED_GVA) | ||
4776 | return true; | ||
4777 | } | ||
4780 | 4778 | ||
4781 | /* | 4779 | /* |
4782 | * Do not retry the unhandleable instruction if it faults on the | 4780 | * Do not retry the unhandleable instruction if it faults on the |
@@ -4785,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4785 | * instruction -> ... | 4783 | * instruction -> ... |
4786 | */ | 4784 | */ |
4787 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); | 4785 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); |
4788 | if (!is_error_noslot_pfn(pfn)) { | 4786 | |
4789 | kvm_release_pfn_clean(pfn); | 4787 | /* |
4788 | * If the instruction failed on the error pfn, it can not be fixed, | ||
4789 | * report the error to userspace. | ||
4790 | */ | ||
4791 | if (is_error_noslot_pfn(pfn)) | ||
4792 | return false; | ||
4793 | |||
4794 | kvm_release_pfn_clean(pfn); | ||
4795 | |||
4796 | /* The instructions are well-emulated on direct mmu. */ | ||
4797 | if (vcpu->arch.mmu.direct_map) { | ||
4798 | unsigned int indirect_shadow_pages; | ||
4799 | |||
4800 | spin_lock(&vcpu->kvm->mmu_lock); | ||
4801 | indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; | ||
4802 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
4803 | |||
4804 | if (indirect_shadow_pages) | ||
4805 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); | ||
4806 | |||
4790 | return true; | 4807 | return true; |
4791 | } | 4808 | } |
4792 | 4809 | ||
4793 | return false; | 4810 | /* |
4811 | * if emulation was due to access to shadowed page table | ||
4812 | * and it failed try to unshadow page and re-enter the | ||
4813 | * guest to let CPU execute the instruction. | ||
4814 | */ | ||
4815 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); | ||
4816 | |||
4817 | /* | ||
4818 | * If the access faults on its page table, it can not | ||
4819 | * be fixed by unprotecting shadow page and it should | ||
4820 | * be reported to userspace. | ||
4821 | */ | ||
4822 | return !write_fault_to_shadow_pgtable; | ||
4794 | } | 4823 | } |
4795 | 4824 | ||
4796 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | 4825 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, |
@@ -4832,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
4832 | if (!vcpu->arch.mmu.direct_map) | 4861 | if (!vcpu->arch.mmu.direct_map) |
4833 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); | 4862 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); |
4834 | 4863 | ||
4835 | kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 4864 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); |
4836 | 4865 | ||
4837 | return true; | 4866 | return true; |
4838 | } | 4867 | } |
@@ -4849,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4849 | int r; | 4878 | int r; |
4850 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4879 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4851 | bool writeback = true; | 4880 | bool writeback = true; |
4881 | bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; | ||
4852 | 4882 | ||
4883 | /* | ||
4884 | * Clear write_fault_to_shadow_pgtable here to ensure it is | ||
4885 | * never reused. | ||
4886 | */ | ||
4887 | vcpu->arch.write_fault_to_shadow_pgtable = false; | ||
4853 | kvm_clear_exception_queue(vcpu); | 4888 | kvm_clear_exception_queue(vcpu); |
4854 | 4889 | ||
4855 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4890 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
@@ -4868,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4868 | if (r != EMULATION_OK) { | 4903 | if (r != EMULATION_OK) { |
4869 | if (emulation_type & EMULTYPE_TRAP_UD) | 4904 | if (emulation_type & EMULTYPE_TRAP_UD) |
4870 | return EMULATE_FAIL; | 4905 | return EMULATE_FAIL; |
4871 | if (reexecute_instruction(vcpu, cr2)) | 4906 | if (reexecute_instruction(vcpu, cr2, |
4907 | write_fault_to_spt)) | ||
4872 | return EMULATE_DONE; | 4908 | return EMULATE_DONE; |
4873 | if (emulation_type & EMULTYPE_SKIP) | 4909 | if (emulation_type & EMULTYPE_SKIP) |
4874 | return EMULATE_FAIL; | 4910 | return EMULATE_FAIL; |
@@ -4898,7 +4934,7 @@ restart: | |||
4898 | return EMULATE_DONE; | 4934 | return EMULATE_DONE; |
4899 | 4935 | ||
4900 | if (r == EMULATION_FAILED) { | 4936 | if (r == EMULATION_FAILED) { |
4901 | if (reexecute_instruction(vcpu, cr2)) | 4937 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) |
4902 | return EMULATE_DONE; | 4938 | return EMULATE_DONE; |
4903 | 4939 | ||
4904 | return handle_emulation_failure(vcpu); | 4940 | return handle_emulation_failure(vcpu); |
@@ -5541,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
5541 | vcpu->arch.nmi_injected = true; | 5577 | vcpu->arch.nmi_injected = true; |
5542 | kvm_x86_ops->set_nmi(vcpu); | 5578 | kvm_x86_ops->set_nmi(vcpu); |
5543 | } | 5579 | } |
5544 | } else if (kvm_cpu_has_interrupt(vcpu)) { | 5580 | } else if (kvm_cpu_has_injectable_intr(vcpu)) { |
5545 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { | 5581 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { |
5546 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), | 5582 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), |
5547 | false); | 5583 | false); |
@@ -5609,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
5609 | #endif | 5645 | #endif |
5610 | } | 5646 | } |
5611 | 5647 | ||
5648 | static void update_eoi_exitmap(struct kvm_vcpu *vcpu) | ||
5649 | { | ||
5650 | u64 eoi_exit_bitmap[4]; | ||
5651 | |||
5652 | memset(eoi_exit_bitmap, 0, 32); | ||
5653 | |||
5654 | kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
5655 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
5656 | } | ||
5657 | |||
5612 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5658 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5613 | { | 5659 | { |
5614 | int r; | 5660 | int r; |
@@ -5662,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5662 | kvm_handle_pmu_event(vcpu); | 5708 | kvm_handle_pmu_event(vcpu); |
5663 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 5709 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
5664 | kvm_deliver_pmi(vcpu); | 5710 | kvm_deliver_pmi(vcpu); |
5711 | if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) | ||
5712 | update_eoi_exitmap(vcpu); | ||
5665 | } | 5713 | } |
5666 | 5714 | ||
5667 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5715 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
@@ -5670,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5670 | /* enable NMI/IRQ window open exits if needed */ | 5718 | /* enable NMI/IRQ window open exits if needed */ |
5671 | if (vcpu->arch.nmi_pending) | 5719 | if (vcpu->arch.nmi_pending) |
5672 | kvm_x86_ops->enable_nmi_window(vcpu); | 5720 | kvm_x86_ops->enable_nmi_window(vcpu); |
5673 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5721 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
5674 | kvm_x86_ops->enable_irq_window(vcpu); | 5722 | kvm_x86_ops->enable_irq_window(vcpu); |
5675 | 5723 | ||
5676 | if (kvm_lapic_enabled(vcpu)) { | 5724 | if (kvm_lapic_enabled(vcpu)) { |
5725 | /* | ||
5726 | * Update architecture specific hints for APIC | ||
5727 | * virtual interrupt delivery. | ||
5728 | */ | ||
5729 | if (kvm_x86_ops->hwapic_irr_update) | ||
5730 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
5731 | kvm_lapic_find_highest_irr(vcpu)); | ||
5677 | update_cr8_intercept(vcpu); | 5732 | update_cr8_intercept(vcpu); |
5678 | kvm_lapic_sync_to_vapic(vcpu); | 5733 | kvm_lapic_sync_to_vapic(vcpu); |
5679 | } | 5734 | } |
@@ -6853,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
6853 | struct kvm_memory_slot *memslot, | 6908 | struct kvm_memory_slot *memslot, |
6854 | struct kvm_memory_slot old, | 6909 | struct kvm_memory_slot old, |
6855 | struct kvm_userspace_memory_region *mem, | 6910 | struct kvm_userspace_memory_region *mem, |
6856 | int user_alloc) | 6911 | bool user_alloc) |
6857 | { | 6912 | { |
6858 | int npages = memslot->npages; | 6913 | int npages = memslot->npages; |
6859 | int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; | ||
6860 | 6914 | ||
6861 | /* Prevent internal slot pages from being moved by fork()/COW. */ | 6915 | /* |
6862 | if (memslot->id >= KVM_MEMORY_SLOTS) | 6916 | * Only private memory slots need to be mapped here since |
6863 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | 6917 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. |
6864 | |||
6865 | /*To keep backward compatibility with older userspace, | ||
6866 | *x86 needs to handle !user_alloc case. | ||
6867 | */ | 6918 | */ |
6868 | if (!user_alloc) { | 6919 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { |
6869 | if (npages && !old.npages) { | 6920 | unsigned long userspace_addr; |
6870 | unsigned long userspace_addr; | ||
6871 | 6921 | ||
6872 | userspace_addr = vm_mmap(NULL, 0, | 6922 | /* |
6873 | npages * PAGE_SIZE, | 6923 | * MAP_SHARED to prevent internal slot pages from being moved |
6874 | PROT_READ | PROT_WRITE, | 6924 | * by fork()/COW. |
6875 | map_flags, | 6925 | */ |
6876 | 0); | 6926 | userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, |
6927 | PROT_READ | PROT_WRITE, | ||
6928 | MAP_SHARED | MAP_ANONYMOUS, 0); | ||
6877 | 6929 | ||
6878 | if (IS_ERR((void *)userspace_addr)) | 6930 | if (IS_ERR((void *)userspace_addr)) |
6879 | return PTR_ERR((void *)userspace_addr); | 6931 | return PTR_ERR((void *)userspace_addr); |
6880 | 6932 | ||
6881 | memslot->userspace_addr = userspace_addr; | 6933 | memslot->userspace_addr = userspace_addr; |
6882 | } | ||
6883 | } | 6934 | } |
6884 | 6935 | ||
6885 | |||
6886 | return 0; | 6936 | return 0; |
6887 | } | 6937 | } |
6888 | 6938 | ||
6889 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 6939 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
6890 | struct kvm_userspace_memory_region *mem, | 6940 | struct kvm_userspace_memory_region *mem, |
6891 | struct kvm_memory_slot old, | 6941 | struct kvm_memory_slot old, |
6892 | int user_alloc) | 6942 | bool user_alloc) |
6893 | { | 6943 | { |
6894 | 6944 | ||
6895 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; | 6945 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
6896 | 6946 | ||
6897 | if (!user_alloc && !old.user_alloc && old.npages && !npages) { | 6947 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { |
6898 | int ret; | 6948 | int ret; |
6899 | 6949 | ||
6900 | ret = vm_munmap(old.userspace_addr, | 6950 | ret = vm_munmap(old.userspace_addr, |
@@ -6908,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6908 | if (!kvm->arch.n_requested_mmu_pages) | 6958 | if (!kvm->arch.n_requested_mmu_pages) |
6909 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 6959 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
6910 | 6960 | ||
6911 | spin_lock(&kvm->mmu_lock); | ||
6912 | if (nr_mmu_pages) | 6961 | if (nr_mmu_pages) |
6913 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6962 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
6914 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6963 | /* |
6915 | spin_unlock(&kvm->mmu_lock); | 6964 | * Write protect all pages for dirty logging. |
6965 | * Existing largepage mappings are destroyed here and new ones will | ||
6966 | * not be created until the end of the logging. | ||
6967 | */ | ||
6968 | if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | ||
6969 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | ||
6916 | /* | 6970 | /* |
6917 | * If memory slot is created, or moved, we need to clear all | 6971 | * If memory slot is created, or moved, we need to clear all |
6918 | * mmio sptes. | 6972 | * mmio sptes. |