aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/emulate.c14
-rw-r--r--arch/x86/kvm/paging_tmpl.h20
-rw-r--r--arch/x86/kvm/vmx.c13
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--virt/kvm/async_pf.c5
-rw-r--r--virt/kvm/kvm_main.c14
6 files changed, 59 insertions, 8 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2bc1e81045b0..ddc3f3d2afdb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2025 return rc; 2025 return rc;
2026} 2026}
2027 2027
2028static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2029{
2030 int rc;
2031
2032 rc = em_ret_far(ctxt);
2033 if (rc != X86EMUL_CONTINUE)
2034 return rc;
2035 rsp_increment(ctxt, ctxt->src.val);
2036 return X86EMUL_CONTINUE;
2037}
2038
2028static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) 2039static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2029{ 2040{
2030 /* Save real source value, then compare EAX against destination. */ 2041 /* Save real source value, then compare EAX against destination. */
@@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = {
3763 G(ByteOp, group11), G(0, group11), 3774 G(ByteOp, group11), G(0, group11),
3764 /* 0xC8 - 0xCF */ 3775 /* 0xC8 - 0xCF */
3765 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), 3776 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
3766 N, I(ImplicitOps | Stack, em_ret_far), 3777 I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm),
3778 I(ImplicitOps | Stack, em_ret_far),
3767 D(ImplicitOps), DI(SrcImmByte, intn), 3779 D(ImplicitOps), DI(SrcImmByte, intn),
3768 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), 3780 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
3769 /* 0xD0 - 0xD7 */ 3781 /* 0xD0 - 0xD7 */
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 043330159179..ad75d77999d0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -99,6 +99,7 @@ struct guest_walker {
99 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; 99 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
100 gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; 100 gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
101 pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; 101 pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
102 bool pte_writable[PT_MAX_FULL_LEVELS];
102 unsigned pt_access; 103 unsigned pt_access;
103 unsigned pte_access; 104 unsigned pte_access;
104 gfn_t gfn; 105 gfn_t gfn;
@@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
235 if (pte == orig_pte) 236 if (pte == orig_pte)
236 continue; 237 continue;
237 238
239 /*
240 * If the slot is read-only, simply do not process the accessed
241 * and dirty bits. This is the correct thing to do if the slot
242 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
243 * are only supported if the accessed and dirty bits are already
244 * set in the ROM (so that MMIO writes are never needed).
245 *
246 * Note that NPT does not allow this at all and faults, since
247 * it always wants nested page table entries for the guest
248 * page tables to be writable. And EPT works but will simply
249 * overwrite the read-only memory to set the accessed and dirty
250 * bits.
251 */
252 if (unlikely(!walker->pte_writable[level - 1]))
253 continue;
254
238 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); 255 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
239 if (ret) 256 if (ret)
240 return ret; 257 return ret;
@@ -309,7 +326,8 @@ retry_walk:
309 goto error; 326 goto error;
310 real_gfn = gpa_to_gfn(real_gfn); 327 real_gfn = gpa_to_gfn(real_gfn);
311 328
312 host_addr = gfn_to_hva(vcpu->kvm, real_gfn); 329 host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
330 &walker->pte_writable[walker->level - 1]);
313 if (unlikely(kvm_is_error_hva(host_addr))) 331 if (unlikely(kvm_is_error_hva(host_addr)))
314 goto error; 332 goto error;
315 333
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f1da43ff2a2..a1216de9ffda 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5339,6 +5339,15 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
5339 return 0; 5339 return 0;
5340 } 5340 }
5341 5341
5342 /*
5343 * EPT violation happened while executing iret from NMI,
5344 * "blocked by NMI" bit has to be set before next VM entry.
5345 * There are errata that may cause this bit to not be set:
5346 * AAK134, BY25.
5347 */
5348 if (exit_qualification & INTR_INFO_UNBLOCK_NMI)
5349 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
5350
5342 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 5351 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5343 trace_kvm_page_fault(gpa, exit_qualification); 5352 trace_kvm_page_fault(gpa, exit_qualification);
5344 5353
@@ -7766,6 +7775,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7766 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); 7775 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
7767 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); 7776 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
7768 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); 7777 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
7778 __clear_bit(VCPU_EXREG_PDPTR,
7779 (unsigned long *)&vcpu->arch.regs_avail);
7780 __clear_bit(VCPU_EXREG_PDPTR,
7781 (unsigned long *)&vcpu->arch.regs_dirty);
7769 } 7782 }
7770 7783
7771 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); 7784 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ca645a01d37a..0fbbc7aa02cb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
533 533
534struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 534struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
535unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); 535unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
536unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
536unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); 537unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
537void kvm_release_page_clean(struct page *page); 538void kvm_release_page_clean(struct page *page);
538void kvm_release_page_dirty(struct page *page); 539void kvm_release_page_dirty(struct page *page);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index ea475cd03511..8a39dda7a325 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -101,8 +101,11 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
101 typeof(*work), queue); 101 typeof(*work), queue);
102 cancel_work_sync(&work->work); 102 cancel_work_sync(&work->work);
103 list_del(&work->queue); 103 list_del(&work->queue);
104 if (!work->done) /* work was canceled */ 104 if (!work->done) { /* work was canceled */
105 mmdrop(work->mm);
106 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
105 kmem_cache_free(async_pf_cache, work); 107 kmem_cache_free(async_pf_cache, work);
108 }
106 } 109 }
107 110
108 spin_lock(&vcpu->async_pf.lock); 111 spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bf040c4e02b3..979bff485fb0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
1058EXPORT_SYMBOL_GPL(gfn_to_hva); 1058EXPORT_SYMBOL_GPL(gfn_to_hva);
1059 1059
1060/* 1060/*
1061 * The hva returned by this function is only allowed to be read. 1061 * If writable is set to false, the hva returned by this function is only
1062 * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). 1062 * allowed to be read.
1063 */ 1063 */
1064static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) 1064unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1065{ 1065{
1066 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1067 if (writable)
1068 *writable = !memslot_is_readonly(slot);
1069
1066 return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); 1070 return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
1067} 1071}
1068 1072
@@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
1430 int r; 1434 int r;
1431 unsigned long addr; 1435 unsigned long addr;
1432 1436
1433 addr = gfn_to_hva_read(kvm, gfn); 1437 addr = gfn_to_hva_prot(kvm, gfn, NULL);
1434 if (kvm_is_error_hva(addr)) 1438 if (kvm_is_error_hva(addr))
1435 return -EFAULT; 1439 return -EFAULT;
1436 r = kvm_read_hva(data, (void __user *)addr + offset, len); 1440 r = kvm_read_hva(data, (void __user *)addr + offset, len);
@@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
1468 gfn_t gfn = gpa >> PAGE_SHIFT; 1472 gfn_t gfn = gpa >> PAGE_SHIFT;
1469 int offset = offset_in_page(gpa); 1473 int offset = offset_in_page(gpa);
1470 1474
1471 addr = gfn_to_hva_read(kvm, gfn); 1475 addr = gfn_to_hva_prot(kvm, gfn, NULL);
1472 if (kvm_is_error_hva(addr)) 1476 if (kvm_is_error_hva(addr))
1473 return -EFAULT; 1477 return -EFAULT;
1474 pagefault_disable(); 1478 pagefault_disable();