aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@cn.fujitsu.com>2011-09-22 05:02:48 -0400
committerAvi Kivity <avi@redhat.com>2011-12-27 04:16:50 -0500
commit1cb3f3ae5a3855ba430430706da4201ace1d6ec4 (patch)
tree1ff844904958cf669c86650da4601eb392f14a91 /arch/x86
parentd5ae7ce835cc89556dc18e2070e754f026402efa (diff)
KVM: x86: retry non-page-table writing instructions
If the emulation is caused by #PF and it is non-page_table writing instruction, it means the VM-EXIT is caused by shadow page protected, we can zap the shadow page and retry this instruction directly The idea is from Avi Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/kvm/emulate.c5
-rw-r--r--arch/x86/kvm/mmu.c25
-rw-r--r--arch/x86/kvm/x86.c47
5 files changed, 77 insertions, 6 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index a026507893e9..9a4acf41709c 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -364,6 +364,7 @@ enum x86_intercept {
364#endif 364#endif
365 365
366int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); 366int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
367bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
367#define EMULATION_FAILED -1 368#define EMULATION_FAILED -1
368#define EMULATION_OK 0 369#define EMULATION_OK 0
369#define EMULATION_RESTART 1 370#define EMULATION_RESTART 1
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4973f4dab98..4ceefa9567ed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -444,6 +444,9 @@ struct kvm_vcpu_arch {
444 444
445 cpumask_var_t wbinvd_dirty_mask; 445 cpumask_var_t wbinvd_dirty_mask;
446 446
447 unsigned long last_retry_eip;
448 unsigned long last_retry_addr;
449
447 struct { 450 struct {
448 bool halted; 451 bool halted;
449 gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; 452 gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
@@ -692,6 +695,7 @@ enum emulation_result {
692#define EMULTYPE_NO_DECODE (1 << 0) 695#define EMULTYPE_NO_DECODE (1 << 0)
693#define EMULTYPE_TRAP_UD (1 << 1) 696#define EMULTYPE_TRAP_UD (1 << 1)
694#define EMULTYPE_SKIP (1 << 2) 697#define EMULTYPE_SKIP (1 << 2)
698#define EMULTYPE_RETRY (1 << 3)
695int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, 699int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
696 int emulation_type, void *insn, int insn_len); 700 int emulation_type, void *insn, int insn_len);
697 701
@@ -756,6 +760,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
756void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 760void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
757 const u8 *new, int bytes, 761 const u8 *new, int bytes,
758 bool guest_initiated); 762 bool guest_initiated);
763int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
759int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 764int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
760void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 765void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
761int kvm_mmu_load(struct kvm_vcpu *vcpu); 766int kvm_mmu_load(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a10950a37928..8547958e3582 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3702,6 +3702,11 @@ done:
3702 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 3702 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
3703} 3703}
3704 3704
3705bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
3706{
3707 return ctxt->d & PageTable;
3708}
3709
3705static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) 3710static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
3706{ 3711{
3707 /* The second termination condition only applies for REPE 3712 /* The second termination condition only applies for REPE
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 232c5a30ddc8..7a22eb81b4ca 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1998,7 +1998,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
1998 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; 1998 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
1999} 1999}
2000 2000
2001static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) 2001int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2002{ 2002{
2003 struct kvm_mmu_page *sp; 2003 struct kvm_mmu_page *sp;
2004 struct hlist_node *node; 2004 struct hlist_node *node;
@@ -2007,7 +2007,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2007 2007
2008 pgprintk("%s: looking for gfn %llx\n", __func__, gfn); 2008 pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
2009 r = 0; 2009 r = 0;
2010 2010 spin_lock(&kvm->mmu_lock);
2011 for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { 2011 for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
2012 pgprintk("%s: gfn %llx role %x\n", __func__, gfn, 2012 pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
2013 sp->role.word); 2013 sp->role.word);
@@ -2015,8 +2015,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2015 kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); 2015 kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
2016 } 2016 }
2017 kvm_mmu_commit_zap_page(kvm, &invalid_list); 2017 kvm_mmu_commit_zap_page(kvm, &invalid_list);
2018 spin_unlock(&kvm->mmu_lock);
2019
2018 return r; 2020 return r;
2019} 2021}
2022EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
2020 2023
2021static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) 2024static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
2022{ 2025{
@@ -3698,9 +3701,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
3698 3701
3699 gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); 3702 gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
3700 3703
3701 spin_lock(&vcpu->kvm->mmu_lock);
3702 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3704 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3703 spin_unlock(&vcpu->kvm->mmu_lock); 3705
3704 return r; 3706 return r;
3705} 3707}
3706EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); 3708EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
@@ -3721,10 +3723,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
3721 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 3723 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
3722} 3724}
3723 3725
3726static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
3727{
3728 if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
3729 return vcpu_match_mmio_gpa(vcpu, addr);
3730
3731 return vcpu_match_mmio_gva(vcpu, addr);
3732}
3733
3724int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, 3734int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
3725 void *insn, int insn_len) 3735 void *insn, int insn_len)
3726{ 3736{
3727 int r; 3737 int r, emulation_type = EMULTYPE_RETRY;
3728 enum emulation_result er; 3738 enum emulation_result er;
3729 3739
3730 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); 3740 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
@@ -3736,7 +3746,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
3736 goto out; 3746 goto out;
3737 } 3747 }
3738 3748
3739 er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); 3749 if (is_mmio_page_fault(vcpu, cr2))
3750 emulation_type = 0;
3751
3752 er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
3740 3753
3741 switch (er) { 3754 switch (er) {
3742 case EMULATE_DONE: 3755 case EMULATE_DONE:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e24edbc7f2ec..7ba1ab73fd03 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4836,6 +4836,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4836 return false; 4836 return false;
4837} 4837}
4838 4838
4839static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4840 unsigned long cr2, int emulation_type)
4841{
4842 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4843 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
4844
4845 last_retry_eip = vcpu->arch.last_retry_eip;
4846 last_retry_addr = vcpu->arch.last_retry_addr;
4847
4848 /*
4849 * If the emulation is caused by #PF and it is non-page_table
4850 * writing instruction, it means the VM-EXIT is caused by shadow
4851 * page protected, we can zap the shadow page and retry this
4852 * instruction directly.
4853 *
4854 * Note: if the guest uses a non-page-table modifying instruction
4855 * on the PDE that points to the instruction, then we will unmap
4856 * the instruction and go to an infinite loop. So, we cache the
4857 * last retried eip and the last fault address, if we meet the eip
4858 * and the address again, we can break out of the potential infinite
4859 * loop.
4860 */
4861 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
4862
4863 if (!(emulation_type & EMULTYPE_RETRY))
4864 return false;
4865
4866 if (x86_page_table_writing_insn(ctxt))
4867 return false;
4868
4869 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
4870 return false;
4871
4872 vcpu->arch.last_retry_eip = ctxt->eip;
4873 vcpu->arch.last_retry_addr = cr2;
4874
4875 if (!vcpu->arch.mmu.direct_map)
4876 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4877
4878 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4879
4880 return true;
4881}
4882
4839int x86_emulate_instruction(struct kvm_vcpu *vcpu, 4883int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4840 unsigned long cr2, 4884 unsigned long cr2,
4841 int emulation_type, 4885 int emulation_type,
@@ -4877,6 +4921,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4877 return EMULATE_DONE; 4921 return EMULATE_DONE;
4878 } 4922 }
4879 4923
4924 if (retry_instruction(ctxt, cr2, emulation_type))
4925 return EMULATE_DONE;
4926
4880 /* this is needed for vmware backdoor interface to work since it 4927 /* this is needed for vmware backdoor interface to work since it
4881 changes registers values during IO operation */ 4928 changes registers values during IO operation */
4882 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { 4929 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {