aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-09-08 18:52:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-09-08 18:52:45 -0400
commitf8f65382c98a28e3c2b20df9dd4231dca5a11682 (patch)
treea65cb40ea7c78d459721e67103b9f7310cdc8ff7
parent0f3aa48ad4c307fb72b8eb43add26c8f314d396a (diff)
parentbdf7ffc89922a52a4f08a12f7421ea24bb7626a0 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Radim Krčmář: "ARM: - Fix a VFP corruption in 32-bit guest - Add missing cache invalidation for CoW pages - Two small cleanups s390: - Fallout from the hugetlbfs support: pfmf interpretion and locking - VSIE: fix keywrapping for nested guests PPC: - Fix a bug where pages might not get marked dirty, causing guest memory corruption on migration - Fix a bug causing reads from guest memory to use the wrong guest real address for very large HPT guests (>256G of memory), leading to failures in instruction emulation. x86: - Fix out of bound access from malicious pv ipi hypercalls (introduced in rc1) - Fix delivery of pending interrupts when entering a nested guest, preventing arbitrarily late injection - Sanitize kvm_stat output after destroying a guest - Fix infinite loop when emulating a nested guest page fault and improve the surrounding emulation code - Two minor cleanups" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits) KVM: LAPIC: Fix pv ipis out-of-bounds access KVM: nVMX: Fix loss of pending IRQ/NMI before entering L2 arm64: KVM: Remove pgd_lock KVM: Remove obsolete kvm_unmap_hva notifier backend arm64: KVM: Only force FPEXC32_EL2.EN if trapping FPSIMD KVM: arm/arm64: Clean dcache to PoC when changing PTE due to CoW KVM: s390: Properly lock mm context allow_gmap_hpage_1m setting KVM: s390: vsie: copy wrapping keys to right place KVM: s390: Fix pfmf and conditional skey emulation tools/kvm_stat: re-animate display of dead guests tools/kvm_stat: indicate dead guests as such tools/kvm_stat: handle guest removals more gracefully tools/kvm_stat: don't reset stats when setting PID filter for debugfs tools/kvm_stat: fix updates for dead guests tools/kvm_stat: fix handling of invalid paths in debugfs provider tools/kvm_stat: fix python3 issues KVM: x86: Unexport x86_emulate_instruction() KVM: x86: Rename emulate_instruction() to kvm_emulate_instruction() KVM: x86: Do not re-{try,execute} after failed emulation in L2 KVM: x86: Default to not allowing emulation retry in kvm_mmu_page_fault ...
-rw-r--r--arch/arm/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/kvm_host.h4
-rw-r--r--arch/arm64/kvm/hyp/switch.c9
-rw-r--r--arch/mips/include/asm/kvm_host.h1
-rw-r--r--arch/mips/kvm/mmu.c10
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c6
-rw-r--r--arch/s390/include/asm/mmu.h8
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/kvm/priv.c30
-rw-r--r--arch/s390/kvm/vsie.c3
-rw-r--r--arch/x86/include/asm/kvm_host.h22
-rw-r--r--arch/x86/kvm/lapic.c27
-rw-r--r--arch/x86/kvm/mmu.c26
-rw-r--r--arch/x86/kvm/svm.c19
-rw-r--r--arch/x86/kvm/vmx.c43
-rw-r--r--arch/x86/kvm/x86.c28
-rw-r--r--arch/x86/kvm/x86.h2
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat59
-rw-r--r--virt/kvm/arm/mmu.c21
-rw-r--r--virt/kvm/arm/trace.h15
21 files changed, 204 insertions, 134 deletions
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 79906cecb091..3ad482d2f1eb 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -223,7 +223,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
223 struct kvm_vcpu_events *events); 223 struct kvm_vcpu_events *events);
224 224
225#define KVM_ARCH_WANT_MMU_NOTIFIER 225#define KVM_ARCH_WANT_MMU_NOTIFIER
226int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
227int kvm_unmap_hva_range(struct kvm *kvm, 226int kvm_unmap_hva_range(struct kvm *kvm,
228 unsigned long start, unsigned long end); 227 unsigned long start, unsigned long end);
229void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 228void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f26055f2306e..3d6d7336f871 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -61,8 +61,7 @@ struct kvm_arch {
61 u64 vmid_gen; 61 u64 vmid_gen;
62 u32 vmid; 62 u32 vmid;
63 63
64 /* 1-level 2nd stage table and lock */ 64 /* 1-level 2nd stage table, protected by kvm->mmu_lock */
65 spinlock_t pgd_lock;
66 pgd_t *pgd; 65 pgd_t *pgd;
67 66
68 /* VTTBR value associated with above pgd and vmid */ 67 /* VTTBR value associated with above pgd and vmid */
@@ -357,7 +356,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
357 struct kvm_vcpu_events *events); 356 struct kvm_vcpu_events *events);
358 357
359#define KVM_ARCH_WANT_MMU_NOTIFIER 358#define KVM_ARCH_WANT_MMU_NOTIFIER
360int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
361int kvm_unmap_hva_range(struct kvm *kvm, 359int kvm_unmap_hva_range(struct kvm *kvm,
362 unsigned long start, unsigned long end); 360 unsigned long start, unsigned long end);
363void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 361void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index d496ef579859..ca46153d7915 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -98,8 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
98 val = read_sysreg(cpacr_el1); 98 val = read_sysreg(cpacr_el1);
99 val |= CPACR_EL1_TTA; 99 val |= CPACR_EL1_TTA;
100 val &= ~CPACR_EL1_ZEN; 100 val &= ~CPACR_EL1_ZEN;
101 if (!update_fp_enabled(vcpu)) 101 if (!update_fp_enabled(vcpu)) {
102 val &= ~CPACR_EL1_FPEN; 102 val &= ~CPACR_EL1_FPEN;
103 __activate_traps_fpsimd32(vcpu);
104 }
103 105
104 write_sysreg(val, cpacr_el1); 106 write_sysreg(val, cpacr_el1);
105 107
@@ -114,8 +116,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
114 116
115 val = CPTR_EL2_DEFAULT; 117 val = CPTR_EL2_DEFAULT;
116 val |= CPTR_EL2_TTA | CPTR_EL2_TZ; 118 val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
117 if (!update_fp_enabled(vcpu)) 119 if (!update_fp_enabled(vcpu)) {
118 val |= CPTR_EL2_TFP; 120 val |= CPTR_EL2_TFP;
121 __activate_traps_fpsimd32(vcpu);
122 }
119 123
120 write_sysreg(val, cptr_el2); 124 write_sysreg(val, cptr_el2);
121} 125}
@@ -129,7 +133,6 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
129 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 133 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
130 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 134 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
131 135
132 __activate_traps_fpsimd32(vcpu);
133 if (has_vhe()) 136 if (has_vhe())
134 activate_traps_vhe(vcpu); 137 activate_traps_vhe(vcpu);
135 else 138 else
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index a9af1d2dcd69..2c1c53d12179 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -931,7 +931,6 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
931 bool write); 931 bool write);
932 932
933#define KVM_ARCH_WANT_MMU_NOTIFIER 933#define KVM_ARCH_WANT_MMU_NOTIFIER
934int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
935int kvm_unmap_hva_range(struct kvm *kvm, 934int kvm_unmap_hva_range(struct kvm *kvm,
936 unsigned long start, unsigned long end); 935 unsigned long start, unsigned long end);
937void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 936void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index ee64db032793..d8dcdb350405 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -512,16 +512,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
512 return 1; 512 return 1;
513} 513}
514 514
515int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
516{
517 unsigned long end = hva + PAGE_SIZE;
518
519 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
520
521 kvm_mips_callbacks->flush_shadow_all(kvm);
522 return 0;
523}
524
525int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 515int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
526{ 516{
527 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); 517 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3c0e8fb2b773..68e14afecac8 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -358,7 +358,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
358 unsigned long pp, key; 358 unsigned long pp, key;
359 unsigned long v, orig_v, gr; 359 unsigned long v, orig_v, gr;
360 __be64 *hptep; 360 __be64 *hptep;
361 int index; 361 long int index;
362 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 362 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
363 363
364 if (kvm_is_radix(vcpu->kvm)) 364 if (kvm_is_radix(vcpu->kvm))
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 0af1c0aea1fe..fd6e8c13685f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -725,10 +725,10 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
725 gpa, shift); 725 gpa, shift);
726 kvmppc_radix_tlbie_page(kvm, gpa, shift); 726 kvmppc_radix_tlbie_page(kvm, gpa, shift);
727 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { 727 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
728 unsigned long npages = 1; 728 unsigned long psize = PAGE_SIZE;
729 if (shift) 729 if (shift)
730 npages = 1ul << (shift - PAGE_SHIFT); 730 psize = 1ul << shift;
731 kvmppc_update_dirty_map(memslot, gfn, npages); 731 kvmppc_update_dirty_map(memslot, gfn, psize);
732 } 732 }
733 } 733 }
734 return 0; 734 return 0;
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index f31a15044c24..a8418e1379eb 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,7 +16,13 @@ typedef struct {
16 unsigned long asce; 16 unsigned long asce;
17 unsigned long asce_limit; 17 unsigned long asce_limit;
18 unsigned long vdso_base; 18 unsigned long vdso_base;
19 /* The mmu context allocates 4K page tables. */ 19 /*
20 * The following bitfields need a down_write on the mm
21 * semaphore when they are written to. As they are only
22 * written once, they can be read without a lock.
23 *
24 * The mmu context allocates 4K page tables.
25 */
20 unsigned int alloc_pgste:1; 26 unsigned int alloc_pgste:1;
21 /* The mmu context uses extended page tables. */ 27 /* The mmu context uses extended page tables. */
22 unsigned int has_pgste:1; 28 unsigned int has_pgste:1;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 91ad4a9425c0..f69333fd2fa3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -695,7 +695,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
695 r = -EINVAL; 695 r = -EINVAL;
696 else { 696 else {
697 r = 0; 697 r = 0;
698 down_write(&kvm->mm->mmap_sem);
698 kvm->mm->context.allow_gmap_hpage_1m = 1; 699 kvm->mm->context.allow_gmap_hpage_1m = 1;
700 up_write(&kvm->mm->mmap_sem);
699 /* 701 /*
700 * We might have to create fake 4k page 702 * We might have to create fake 4k page
701 * tables. To avoid that the hardware works on 703 * tables. To avoid that the hardware works on
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d68f10441a16..8679bd74d337 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -280,9 +280,11 @@ retry:
280 goto retry; 280 goto retry;
281 } 281 }
282 } 282 }
283 if (rc)
284 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
285 up_read(&current->mm->mmap_sem); 283 up_read(&current->mm->mmap_sem);
284 if (rc == -EFAULT)
285 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
286 if (rc < 0)
287 return rc;
286 vcpu->run->s.regs.gprs[reg1] &= ~0xff; 288 vcpu->run->s.regs.gprs[reg1] &= ~0xff;
287 vcpu->run->s.regs.gprs[reg1] |= key; 289 vcpu->run->s.regs.gprs[reg1] |= key;
288 return 0; 290 return 0;
@@ -324,9 +326,11 @@ retry:
324 goto retry; 326 goto retry;
325 } 327 }
326 } 328 }
327 if (rc < 0)
328 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
329 up_read(&current->mm->mmap_sem); 329 up_read(&current->mm->mmap_sem);
330 if (rc == -EFAULT)
331 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
332 if (rc < 0)
333 return rc;
330 kvm_s390_set_psw_cc(vcpu, rc); 334 kvm_s390_set_psw_cc(vcpu, rc);
331 return 0; 335 return 0;
332} 336}
@@ -390,12 +394,12 @@ static int handle_sske(struct kvm_vcpu *vcpu)
390 FAULT_FLAG_WRITE, &unlocked); 394 FAULT_FLAG_WRITE, &unlocked);
391 rc = !rc ? -EAGAIN : rc; 395 rc = !rc ? -EAGAIN : rc;
392 } 396 }
397 up_read(&current->mm->mmap_sem);
393 if (rc == -EFAULT) 398 if (rc == -EFAULT)
394 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 399 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
395 400 if (rc < 0)
396 up_read(&current->mm->mmap_sem); 401 return rc;
397 if (rc >= 0) 402 start += PAGE_SIZE;
398 start += PAGE_SIZE;
399 } 403 }
400 404
401 if (m3 & (SSKE_MC | SSKE_MR)) { 405 if (m3 & (SSKE_MC | SSKE_MR)) {
@@ -1002,13 +1006,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
1002 FAULT_FLAG_WRITE, &unlocked); 1006 FAULT_FLAG_WRITE, &unlocked);
1003 rc = !rc ? -EAGAIN : rc; 1007 rc = !rc ? -EAGAIN : rc;
1004 } 1008 }
1009 up_read(&current->mm->mmap_sem);
1005 if (rc == -EFAULT) 1010 if (rc == -EFAULT)
1006 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 1011 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1007 1012 if (rc == -EAGAIN)
1008 up_read(&current->mm->mmap_sem); 1013 continue;
1009 if (rc >= 0) 1014 if (rc < 0)
1010 start += PAGE_SIZE; 1015 return rc;
1011 } 1016 }
1017 start += PAGE_SIZE;
1012 } 1018 }
1013 if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { 1019 if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
1014 if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { 1020 if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 63844b95c22c..a2b28cd1e3fe 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -173,7 +173,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
173 return set_validity_icpt(scb_s, 0x0039U); 173 return set_validity_icpt(scb_s, 0x0039U);
174 174
175 /* copy only the wrapping keys */ 175 /* copy only the wrapping keys */
176 if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56)) 176 if (read_guest_real(vcpu, crycb_addr + 72,
177 vsie_page->crycb.dea_wrapping_key_mask, 56))
177 return set_validity_icpt(scb_s, 0x0035U); 178 return set_validity_icpt(scb_s, 0x0035U);
178 179
179 scb_s->ecb3 |= ecb3_flags; 180 scb_s->ecb3 |= ecb3_flags;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 00ddb0c9e612..8e90488c3d56 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1237,19 +1237,12 @@ enum emulation_result {
1237#define EMULTYPE_NO_DECODE (1 << 0) 1237#define EMULTYPE_NO_DECODE (1 << 0)
1238#define EMULTYPE_TRAP_UD (1 << 1) 1238#define EMULTYPE_TRAP_UD (1 << 1)
1239#define EMULTYPE_SKIP (1 << 2) 1239#define EMULTYPE_SKIP (1 << 2)
1240#define EMULTYPE_RETRY (1 << 3) 1240#define EMULTYPE_ALLOW_RETRY (1 << 3)
1241#define EMULTYPE_NO_REEXECUTE (1 << 4) 1241#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
1242#define EMULTYPE_NO_UD_ON_FAIL (1 << 5) 1242#define EMULTYPE_VMWARE (1 << 5)
1243#define EMULTYPE_VMWARE (1 << 6) 1243int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
1244int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, 1244int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
1245 int emulation_type, void *insn, int insn_len); 1245 void *insn, int insn_len);
1246
1247static inline int emulate_instruction(struct kvm_vcpu *vcpu,
1248 int emulation_type)
1249{
1250 return x86_emulate_instruction(vcpu, 0,
1251 emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
1252}
1253 1246
1254void kvm_enable_efer_bits(u64); 1247void kvm_enable_efer_bits(u64);
1255bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); 1248bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
@@ -1450,7 +1443,6 @@ asmlinkage void kvm_spurious_fault(void);
1450 ____kvm_handle_fault_on_reboot(insn, "") 1443 ____kvm_handle_fault_on_reboot(insn, "")
1451 1444
1452#define KVM_ARCH_WANT_MMU_NOTIFIER 1445#define KVM_ARCH_WANT_MMU_NOTIFIER
1453int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
1454int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); 1446int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
1455int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); 1447int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
1456int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 1448int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@@ -1463,7 +1455,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
1463void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); 1455void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
1464 1456
1465int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 1457int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
1466 unsigned long ipi_bitmap_high, int min, 1458 unsigned long ipi_bitmap_high, u32 min,
1467 unsigned long icr, int op_64_bit); 1459 unsigned long icr, int op_64_bit);
1468 1460
1469u64 kvm_get_arch_capabilities(void); 1461u64 kvm_get_arch_capabilities(void);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0cefba28c864..17c0472c5b34 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
548} 548}
549 549
550int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 550int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
551 unsigned long ipi_bitmap_high, int min, 551 unsigned long ipi_bitmap_high, u32 min,
552 unsigned long icr, int op_64_bit) 552 unsigned long icr, int op_64_bit)
553{ 553{
554 int i; 554 int i;
@@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
571 rcu_read_lock(); 571 rcu_read_lock();
572 map = rcu_dereference(kvm->arch.apic_map); 572 map = rcu_dereference(kvm->arch.apic_map);
573 573
574 if (min > map->max_apic_id)
575 goto out;
574 /* Bits above cluster_size are masked in the caller. */ 576 /* Bits above cluster_size are masked in the caller. */
575 for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) { 577 for_each_set_bit(i, &ipi_bitmap_low,
576 vcpu = map->phys_map[min + i]->vcpu; 578 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
577 count += kvm_apic_set_irq(vcpu, &irq, NULL); 579 if (map->phys_map[min + i]) {
580 vcpu = map->phys_map[min + i]->vcpu;
581 count += kvm_apic_set_irq(vcpu, &irq, NULL);
582 }
578 } 583 }
579 584
580 min += cluster_size; 585 min += cluster_size;
581 for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) { 586
582 vcpu = map->phys_map[min + i]->vcpu; 587 if (min > map->max_apic_id)
583 count += kvm_apic_set_irq(vcpu, &irq, NULL); 588 goto out;
589
590 for_each_set_bit(i, &ipi_bitmap_high,
591 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
592 if (map->phys_map[min + i]) {
593 vcpu = map->phys_map[min + i]->vcpu;
594 count += kvm_apic_set_irq(vcpu, &irq, NULL);
595 }
584 } 596 }
585 597
598out:
586 rcu_read_unlock(); 599 rcu_read_unlock();
587 return count; 600 return count;
588} 601}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a282321329b5..e24ea7067373 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
1853 return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); 1853 return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
1854} 1854}
1855 1855
1856int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1857{
1858 return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
1859}
1860
1861int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 1856int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
1862{ 1857{
1863 return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); 1858 return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
@@ -5217,7 +5212,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
5217int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, 5212int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
5218 void *insn, int insn_len) 5213 void *insn, int insn_len)
5219{ 5214{
5220 int r, emulation_type = EMULTYPE_RETRY; 5215 int r, emulation_type = 0;
5221 enum emulation_result er; 5216 enum emulation_result er;
5222 bool direct = vcpu->arch.mmu.direct_map; 5217 bool direct = vcpu->arch.mmu.direct_map;
5223 5218
@@ -5230,10 +5225,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
5230 r = RET_PF_INVALID; 5225 r = RET_PF_INVALID;
5231 if (unlikely(error_code & PFERR_RSVD_MASK)) { 5226 if (unlikely(error_code & PFERR_RSVD_MASK)) {
5232 r = handle_mmio_page_fault(vcpu, cr2, direct); 5227 r = handle_mmio_page_fault(vcpu, cr2, direct);
5233 if (r == RET_PF_EMULATE) { 5228 if (r == RET_PF_EMULATE)
5234 emulation_type = 0;
5235 goto emulate; 5229 goto emulate;
5236 }
5237 } 5230 }
5238 5231
5239 if (r == RET_PF_INVALID) { 5232 if (r == RET_PF_INVALID) {
@@ -5260,8 +5253,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
5260 return 1; 5253 return 1;
5261 } 5254 }
5262 5255
5263 if (mmio_info_in_cache(vcpu, cr2, direct)) 5256 /*
5264 emulation_type = 0; 5257 * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
5258 * optimistically try to just unprotect the page and let the processor
5259 * re-execute the instruction that caused the page fault. Do not allow
5260 * retrying MMIO emulation, as it's not only pointless but could also
5261 * cause us to enter an infinite loop because the processor will keep
5262 * faulting on the non-existent MMIO address. Retrying an instruction
5263 * from a nested guest is also pointless and dangerous as we are only
5264 * explicitly shadowing L1's page tables, i.e. unprotecting something
5265 * for L1 isn't going to magically fix whatever issue cause L2 to fail.
5266 */
5267 if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
5268 emulation_type = EMULTYPE_ALLOW_RETRY;
5265emulate: 5269emulate:
5266 /* 5270 /*
5267 * On AMD platforms, under certain conditions insn_len may be zero on #NPF. 5271 * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6276140044d0..89c4c5aa15f1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
776 } 776 }
777 777
778 if (!svm->next_rip) { 778 if (!svm->next_rip) {
779 if (emulate_instruction(vcpu, EMULTYPE_SKIP) != 779 if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
780 EMULATE_DONE) 780 EMULATE_DONE)
781 printk(KERN_DEBUG "%s: NOP\n", __func__); 781 printk(KERN_DEBUG "%s: NOP\n", __func__);
782 return; 782 return;
@@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm)
2715 2715
2716 WARN_ON_ONCE(!enable_vmware_backdoor); 2716 WARN_ON_ONCE(!enable_vmware_backdoor);
2717 2717
2718 er = emulate_instruction(vcpu, 2718 er = kvm_emulate_instruction(vcpu,
2719 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); 2719 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
2720 if (er == EMULATE_USER_EXIT) 2720 if (er == EMULATE_USER_EXIT)
2721 return 0; 2721 return 0;
@@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm)
2819 string = (io_info & SVM_IOIO_STR_MASK) != 0; 2819 string = (io_info & SVM_IOIO_STR_MASK) != 0;
2820 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 2820 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
2821 if (string) 2821 if (string)
2822 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 2822 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
2823 2823
2824 port = io_info >> 16; 2824 port = io_info >> 16;
2825 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 2825 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm)
3861static int invlpg_interception(struct vcpu_svm *svm) 3861static int invlpg_interception(struct vcpu_svm *svm)
3862{ 3862{
3863 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 3863 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3864 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3864 return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3865 3865
3866 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); 3866 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
3867 return kvm_skip_emulated_instruction(&svm->vcpu); 3867 return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3869,13 +3869,13 @@ static int invlpg_interception(struct vcpu_svm *svm)
3869 3869
3870static int emulate_on_interception(struct vcpu_svm *svm) 3870static int emulate_on_interception(struct vcpu_svm *svm)
3871{ 3871{
3872 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3872 return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3873} 3873}
3874 3874
3875static int rsm_interception(struct vcpu_svm *svm) 3875static int rsm_interception(struct vcpu_svm *svm)
3876{ 3876{
3877 return x86_emulate_instruction(&svm->vcpu, 0, 0, 3877 return kvm_emulate_instruction_from_buffer(&svm->vcpu,
3878 rsm_ins_bytes, 2) == EMULATE_DONE; 3878 rsm_ins_bytes, 2) == EMULATE_DONE;
3879} 3879}
3880 3880
3881static int rdpmc_interception(struct vcpu_svm *svm) 3881static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
4700 ret = avic_unaccel_trap_write(svm); 4700 ret = avic_unaccel_trap_write(svm);
4701 } else { 4701 } else {
4702 /* Handling Fault */ 4702 /* Handling Fault */
4703 ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); 4703 ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
4704 } 4704 }
4705 4705
4706 return ret; 4706 return ret;
@@ -6747,7 +6747,7 @@ e_free:
6747static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) 6747static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
6748{ 6748{
6749 unsigned long vaddr, vaddr_end, next_vaddr; 6749 unsigned long vaddr, vaddr_end, next_vaddr;
6750 unsigned long dst_vaddr, dst_vaddr_end; 6750 unsigned long dst_vaddr;
6751 struct page **src_p, **dst_p; 6751 struct page **src_p, **dst_p;
6752 struct kvm_sev_dbg debug; 6752 struct kvm_sev_dbg debug;
6753 unsigned long n; 6753 unsigned long n;
@@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
6763 size = debug.len; 6763 size = debug.len;
6764 vaddr_end = vaddr + size; 6764 vaddr_end = vaddr + size;
6765 dst_vaddr = debug.dst_uaddr; 6765 dst_vaddr = debug.dst_uaddr;
6766 dst_vaddr_end = dst_vaddr + size;
6767 6766
6768 for (; vaddr < vaddr_end; vaddr = next_vaddr) { 6767 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
6769 int len, s_off, d_off; 6768 int len, s_off, d_off;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d26f3c4985b..533a327372c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
6983 * Cause the #SS fault with 0 error code in VM86 mode. 6983 * Cause the #SS fault with 0 error code in VM86 mode.
6984 */ 6984 */
6985 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { 6985 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
6986 if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { 6986 if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
6987 if (vcpu->arch.halt_request) { 6987 if (vcpu->arch.halt_request) {
6988 vcpu->arch.halt_request = 0; 6988 vcpu->arch.halt_request = 0;
6989 return kvm_vcpu_halt(vcpu); 6989 return kvm_vcpu_halt(vcpu);
@@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
7054 7054
7055 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { 7055 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
7056 WARN_ON_ONCE(!enable_vmware_backdoor); 7056 WARN_ON_ONCE(!enable_vmware_backdoor);
7057 er = emulate_instruction(vcpu, 7057 er = kvm_emulate_instruction(vcpu,
7058 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); 7058 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
7059 if (er == EMULATE_USER_EXIT) 7059 if (er == EMULATE_USER_EXIT)
7060 return 0; 7060 return 0;
@@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
7157 ++vcpu->stat.io_exits; 7157 ++vcpu->stat.io_exits;
7158 7158
7159 if (string) 7159 if (string)
7160 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7160 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
7161 7161
7162 port = exit_qualification >> 16; 7162 port = exit_qualification >> 16;
7163 size = (exit_qualification & 7) + 1; 7163 size = (exit_qualification & 7) + 1;
@@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
7231static int handle_desc(struct kvm_vcpu *vcpu) 7231static int handle_desc(struct kvm_vcpu *vcpu)
7232{ 7232{
7233 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); 7233 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
7234 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7234 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
7235} 7235}
7236 7236
7237static int handle_cr(struct kvm_vcpu *vcpu) 7237static int handle_cr(struct kvm_vcpu *vcpu)
@@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
7480 7480
7481static int handle_invd(struct kvm_vcpu *vcpu) 7481static int handle_invd(struct kvm_vcpu *vcpu)
7482{ 7482{
7483 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7483 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
7484} 7484}
7485 7485
7486static int handle_invlpg(struct kvm_vcpu *vcpu) 7486static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
7547 return kvm_skip_emulated_instruction(vcpu); 7547 return kvm_skip_emulated_instruction(vcpu);
7548 } 7548 }
7549 } 7549 }
7550 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7550 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
7551} 7551}
7552 7552
7553static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) 7553static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
7704 if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) 7704 if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
7705 return kvm_skip_emulated_instruction(vcpu); 7705 return kvm_skip_emulated_instruction(vcpu);
7706 else 7706 else
7707 return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, 7707 return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
7708 NULL, 0) == EMULATE_DONE; 7708 EMULATE_DONE;
7709 } 7709 }
7710 7710
7711 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); 7711 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
7748 if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 7748 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
7749 return 1; 7749 return 1;
7750 7750
7751 err = emulate_instruction(vcpu, 0); 7751 err = kvm_emulate_instruction(vcpu, 0);
7752 7752
7753 if (err == EMULATE_USER_EXIT) { 7753 if (err == EMULATE_USER_EXIT) {
7754 ++vcpu->stat.mmio_exits; 7754 ++vcpu->stat.mmio_exits;
@@ -12537,8 +12537,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
12537 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 12537 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
12538 bool from_vmentry = !!exit_qual; 12538 bool from_vmentry = !!exit_qual;
12539 u32 dummy_exit_qual; 12539 u32 dummy_exit_qual;
12540 u32 vmcs01_cpu_exec_ctrl;
12540 int r = 0; 12541 int r = 0;
12541 12542
12543 vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
12544
12542 enter_guest_mode(vcpu); 12545 enter_guest_mode(vcpu);
12543 12546
12544 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) 12547 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -12575,6 +12578,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
12575 } 12578 }
12576 12579
12577 /* 12580 /*
12581 * If L1 had a pending IRQ/NMI until it executed
12582 * VMLAUNCH/VMRESUME which wasn't delivered because it was
12583 * disallowed (e.g. interrupts disabled), L0 needs to
12584 * evaluate if this pending event should cause an exit from L2
12585 * to L1 or delivered directly to L2 (e.g. In case L1 don't
12586 * intercept EXTERNAL_INTERRUPT).
12587 *
12588 * Usually this would be handled by L0 requesting a
12589 * IRQ/NMI window by setting VMCS accordingly. However,
12590 * this setting was done on VMCS01 and now VMCS02 is active
12591 * instead. Thus, we force L0 to perform pending event
12592 * evaluation by requesting a KVM_REQ_EVENT.
12593 */
12594 if (vmcs01_cpu_exec_ctrl &
12595 (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
12596 kvm_make_request(KVM_REQ_EVENT, vcpu);
12597 }
12598
12599 /*
12578 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 12600 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
12579 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 12601 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
12580 * returned as far as L1 is concerned. It will only return (and set 12602 * returned as far as L1 is concerned. It will only return (and set
@@ -13988,9 +14010,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
13988 check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) 14010 check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
13989 return -EINVAL; 14011 return -EINVAL;
13990 14012
13991 if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
13992 vmx->nested.nested_run_pending = 1;
13993
13994 vmx->nested.dirty_vmcs12 = true; 14013 vmx->nested.dirty_vmcs12 = true;
13995 ret = enter_vmx_non_root_mode(vcpu, NULL); 14014 ret = enter_vmx_non_root_mode(vcpu, NULL);
13996 if (ret) 14015 if (ret)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 506bd2b4b8bb..542f6315444d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
4987 emul_type = 0; 4987 emul_type = 0;
4988 } 4988 }
4989 4989
4990 er = emulate_instruction(vcpu, emul_type); 4990 er = kvm_emulate_instruction(vcpu, emul_type);
4991 if (er == EMULATE_USER_EXIT) 4991 if (er == EMULATE_USER_EXIT)
4992 return 0; 4992 return 0;
4993 if (er != EMULATE_DONE) 4993 if (er != EMULATE_DONE)
@@ -5870,7 +5870,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
5870 gpa_t gpa = cr2; 5870 gpa_t gpa = cr2;
5871 kvm_pfn_t pfn; 5871 kvm_pfn_t pfn;
5872 5872
5873 if (emulation_type & EMULTYPE_NO_REEXECUTE) 5873 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
5874 return false;
5875
5876 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
5874 return false; 5877 return false;
5875 5878
5876 if (!vcpu->arch.mmu.direct_map) { 5879 if (!vcpu->arch.mmu.direct_map) {
@@ -5958,7 +5961,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
5958 */ 5961 */
5959 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; 5962 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
5960 5963
5961 if (!(emulation_type & EMULTYPE_RETRY)) 5964 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
5965 return false;
5966
5967 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
5962 return false; 5968 return false;
5963 5969
5964 if (x86_page_table_writing_insn(ctxt)) 5970 if (x86_page_table_writing_insn(ctxt))
@@ -6276,7 +6282,19 @@ restart:
6276 6282
6277 return r; 6283 return r;
6278} 6284}
6279EXPORT_SYMBOL_GPL(x86_emulate_instruction); 6285
6286int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
6287{
6288 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
6289}
6290EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
6291
6292int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
6293 void *insn, int insn_len)
6294{
6295 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
6296}
6297EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
6280 6298
6281static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, 6299static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
6282 unsigned short port) 6300 unsigned short port)
@@ -7734,7 +7752,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
7734{ 7752{
7735 int r; 7753 int r;
7736 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 7754 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
7737 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); 7755 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
7738 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 7756 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
7739 if (r != EMULATE_DONE) 7757 if (r != EMULATE_DONE)
7740 return 0; 7758 return 0;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 257f27620bc2..67b9568613f3 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
274bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, 274bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
275 int page_num); 275 int page_num);
276bool kvm_vector_hashing_enabled(void); 276bool kvm_vector_hashing_enabled(void);
277int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
278 int emulation_type, void *insn, int insn_len);
277 279
278#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ 280#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
279 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ 281 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 56c4b3f8a01b..439b8a27488d 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -759,12 +759,18 @@ class DebugfsProvider(Provider):
759 if len(vms) == 0: 759 if len(vms) == 0:
760 self.do_read = False 760 self.do_read = False
761 761
762 self.paths = filter(lambda x: "{}-".format(pid) in x, vms) 762 self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
763 763
764 else: 764 else:
765 self.paths = [] 765 self.paths = []
766 self.do_read = True 766 self.do_read = True
767 self.reset() 767
768 def _verify_paths(self):
769 """Remove invalid paths"""
770 for path in self.paths:
771 if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
772 self.paths.remove(path)
773 continue
768 774
769 def read(self, reset=0, by_guest=0): 775 def read(self, reset=0, by_guest=0):
770 """Returns a dict with format:'file name / field -> current value'. 776 """Returns a dict with format:'file name / field -> current value'.
@@ -780,6 +786,7 @@ class DebugfsProvider(Provider):
780 # If no debugfs filtering support is available, then don't read. 786 # If no debugfs filtering support is available, then don't read.
781 if not self.do_read: 787 if not self.do_read:
782 return results 788 return results
789 self._verify_paths()
783 790
784 paths = self.paths 791 paths = self.paths
785 if self._pid == 0: 792 if self._pid == 0:
@@ -1096,15 +1103,16 @@ class Tui(object):
1096 pid = self.stats.pid_filter 1103 pid = self.stats.pid_filter
1097 self.screen.erase() 1104 self.screen.erase()
1098 gname = self.get_gname_from_pid(pid) 1105 gname = self.get_gname_from_pid(pid)
1106 self._gname = gname
1099 if gname: 1107 if gname:
1100 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' 1108 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
1101 if len(gname) > MAX_GUEST_NAME_LEN 1109 if len(gname) > MAX_GUEST_NAME_LEN
1102 else gname)) 1110 else gname))
1103 if pid > 0: 1111 if pid > 0:
1104 self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}' 1112 self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
1105 .format(pid, gname), curses.A_BOLD)
1106 else: 1113 else:
1107 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) 1114 self._headline = 'kvm statistics - summary'
1115 self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
1108 if self.stats.fields_filter: 1116 if self.stats.fields_filter:
1109 regex = self.stats.fields_filter 1117 regex = self.stats.fields_filter
1110 if len(regex) > MAX_REGEX_LEN: 1118 if len(regex) > MAX_REGEX_LEN:
@@ -1162,6 +1170,19 @@ class Tui(object):
1162 1170
1163 return sorted_items 1171 return sorted_items
1164 1172
1173 if not self._is_running_guest(self.stats.pid_filter):
1174 if self._gname:
1175 try: # ...to identify the guest by name in case it's back
1176 pids = self.get_pid_from_gname(self._gname)
1177 if len(pids) == 1:
1178 self._refresh_header(pids[0])
1179 self._update_pid(pids[0])
1180 return
1181 except:
1182 pass
1183 self._display_guest_dead()
1184 # leave final data on screen
1185 return
1165 row = 3 1186 row = 3
1166 self.screen.move(row, 0) 1187 self.screen.move(row, 0)
1167 self.screen.clrtobot() 1188 self.screen.clrtobot()
@@ -1184,6 +1205,7 @@ class Tui(object):
1184 # print events 1205 # print events
1185 tavg = 0 1206 tavg = 0
1186 tcur = 0 1207 tcur = 0
1208 guest_removed = False
1187 for key, values in get_sorted_events(self, stats): 1209 for key, values in get_sorted_events(self, stats):
1188 if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0): 1210 if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
1189 break 1211 break
@@ -1191,7 +1213,10 @@ class Tui(object):
1191 key = self.get_gname_from_pid(key) 1213 key = self.get_gname_from_pid(key)
1192 if not key: 1214 if not key:
1193 continue 1215 continue
1194 cur = int(round(values.delta / sleeptime)) if values.delta else '' 1216 cur = int(round(values.delta / sleeptime)) if values.delta else 0
1217 if cur < 0:
1218 guest_removed = True
1219 continue
1195 if key[0] != ' ': 1220 if key[0] != ' ':
1196 if values.delta: 1221 if values.delta:
1197 tcur += values.delta 1222 tcur += values.delta
@@ -1204,13 +1229,21 @@ class Tui(object):
1204 values.value * 100 / float(ltotal), cur)) 1229 values.value * 100 / float(ltotal), cur))
1205 row += 1 1230 row += 1
1206 if row == 3: 1231 if row == 3:
1207 self.screen.addstr(4, 1, 'No matching events reported yet') 1232 if guest_removed:
1233 self.screen.addstr(4, 1, 'Guest removed, updating...')
1234 else:
1235 self.screen.addstr(4, 1, 'No matching events reported yet')
1208 if row > 4: 1236 if row > 4:
1209 tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' 1237 tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
1210 self.screen.addstr(row, 1, '%-40s %10d %8s' % 1238 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1211 ('Total', total, tavg), curses.A_BOLD) 1239 ('Total', total, tavg), curses.A_BOLD)
1212 self.screen.refresh() 1240 self.screen.refresh()
1213 1241
1242 def _display_guest_dead(self):
1243 marker = ' Guest is DEAD '
1244 y = min(len(self._headline), 80 - len(marker))
1245 self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
1246
1214 def _show_msg(self, text): 1247 def _show_msg(self, text):
1215 """Display message centered text and exit on key press""" 1248 """Display message centered text and exit on key press"""
1216 hint = 'Press any key to continue' 1249 hint = 'Press any key to continue'
@@ -1219,10 +1252,10 @@ class Tui(object):
1219 (x, term_width) = self.screen.getmaxyx() 1252 (x, term_width) = self.screen.getmaxyx()
1220 row = 2 1253 row = 2
1221 for line in text: 1254 for line in text:
1222 start = (term_width - len(line)) / 2 1255 start = (term_width - len(line)) // 2
1223 self.screen.addstr(row, start, line) 1256 self.screen.addstr(row, start, line)
1224 row += 1 1257 row += 1
1225 self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint, 1258 self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
1226 curses.A_STANDOUT) 1259 curses.A_STANDOUT)
1227 self.screen.getkey() 1260 self.screen.getkey()
1228 1261
@@ -1319,6 +1352,12 @@ class Tui(object):
1319 msg = '"' + str(val) + '": Invalid value' 1352 msg = '"' + str(val) + '": Invalid value'
1320 self._refresh_header() 1353 self._refresh_header()
1321 1354
1355 def _is_running_guest(self, pid):
1356 """Check if pid is still a running process."""
1357 if not pid:
1358 return True
1359 return os.path.isdir(os.path.join('/proc/', str(pid)))
1360
1322 def _show_vm_selection_by_guest(self): 1361 def _show_vm_selection_by_guest(self):
1323 """Draws guest selection mask. 1362 """Draws guest selection mask.
1324 1363
@@ -1346,7 +1385,7 @@ class Tui(object):
1346 if not guest or guest == '0': 1385 if not guest or guest == '0':
1347 break 1386 break
1348 if guest.isdigit(): 1387 if guest.isdigit():
1349 if not os.path.isdir(os.path.join('/proc/', guest)): 1388 if not self._is_running_guest(guest):
1350 msg = '"' + guest + '": Not a running process' 1389 msg = '"' + guest + '": Not a running process'
1351 continue 1390 continue
1352 pid = int(guest) 1391 pid = int(guest)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 91aaf73b00df..ed162a6c57c5 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1817,18 +1817,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat
1817 return 0; 1817 return 0;
1818} 1818}
1819 1819
1820int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1821{
1822 unsigned long end = hva + PAGE_SIZE;
1823
1824 if (!kvm->arch.pgd)
1825 return 0;
1826
1827 trace_kvm_unmap_hva(hva);
1828 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
1829 return 0;
1830}
1831
1832int kvm_unmap_hva_range(struct kvm *kvm, 1820int kvm_unmap_hva_range(struct kvm *kvm,
1833 unsigned long start, unsigned long end) 1821 unsigned long start, unsigned long end)
1834{ 1822{
@@ -1860,13 +1848,20 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
1860void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1848void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1861{ 1849{
1862 unsigned long end = hva + PAGE_SIZE; 1850 unsigned long end = hva + PAGE_SIZE;
1851 kvm_pfn_t pfn = pte_pfn(pte);
1863 pte_t stage2_pte; 1852 pte_t stage2_pte;
1864 1853
1865 if (!kvm->arch.pgd) 1854 if (!kvm->arch.pgd)
1866 return; 1855 return;
1867 1856
1868 trace_kvm_set_spte_hva(hva); 1857 trace_kvm_set_spte_hva(hva);
1869 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2); 1858
1859 /*
1860 * We've moved a page around, probably through CoW, so let's treat it
1861 * just like a translation fault and clean the cache to the PoC.
1862 */
1863 clean_dcache_guest_page(pfn, PAGE_SIZE);
1864 stage2_pte = pfn_pte(pfn, PAGE_S2);
1870 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); 1865 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
1871} 1866}
1872 1867
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index e53b596f483b..57b3edebbb40 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -134,21 +134,6 @@ TRACE_EVENT(kvm_mmio_emulate,
134 __entry->vcpu_pc, __entry->instr, __entry->cpsr) 134 __entry->vcpu_pc, __entry->instr, __entry->cpsr)
135); 135);
136 136
137TRACE_EVENT(kvm_unmap_hva,
138 TP_PROTO(unsigned long hva),
139 TP_ARGS(hva),
140
141 TP_STRUCT__entry(
142 __field( unsigned long, hva )
143 ),
144
145 TP_fast_assign(
146 __entry->hva = hva;
147 ),
148
149 TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
150);
151
152TRACE_EVENT(kvm_unmap_hva_range, 137TRACE_EVENT(kvm_unmap_hva_range,
153 TP_PROTO(unsigned long start, unsigned long end), 138 TP_PROTO(unsigned long start, unsigned long end),
154 TP_ARGS(start, end), 139 TP_ARGS(start, end),