diff options
author | Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> | 2011-09-22 05:02:48 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-12-27 04:16:50 -0500 |
commit | 1cb3f3ae5a3855ba430430706da4201ace1d6ec4 (patch) | |
tree | 1ff844904958cf669c86650da4601eb392f14a91 /arch/x86 | |
parent | d5ae7ce835cc89556dc18e2070e754f026402efa (diff) |
KVM: x86: retry non-page-table writing instructions
If the emulation is caused by #PF and it is non-page_table writing instruction,
it means the VM-EXIT is caused by shadow page protected, we can zap the shadow
page and retry this instruction directly
The idea is from Avi
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 5 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 25 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 47 |
5 files changed, 77 insertions, 6 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a026507893e9..9a4acf41709c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -364,6 +364,7 @@ enum x86_intercept { | |||
364 | #endif | 364 | #endif |
365 | 365 | ||
366 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); | 366 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); |
367 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); | ||
367 | #define EMULATION_FAILED -1 | 368 | #define EMULATION_FAILED -1 |
368 | #define EMULATION_OK 0 | 369 | #define EMULATION_OK 0 |
369 | #define EMULATION_RESTART 1 | 370 | #define EMULATION_RESTART 1 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4973f4dab98..4ceefa9567ed 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -444,6 +444,9 @@ struct kvm_vcpu_arch { | |||
444 | 444 | ||
445 | cpumask_var_t wbinvd_dirty_mask; | 445 | cpumask_var_t wbinvd_dirty_mask; |
446 | 446 | ||
447 | unsigned long last_retry_eip; | ||
448 | unsigned long last_retry_addr; | ||
449 | |||
447 | struct { | 450 | struct { |
448 | bool halted; | 451 | bool halted; |
449 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; | 452 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; |
@@ -692,6 +695,7 @@ enum emulation_result { | |||
692 | #define EMULTYPE_NO_DECODE (1 << 0) | 695 | #define EMULTYPE_NO_DECODE (1 << 0) |
693 | #define EMULTYPE_TRAP_UD (1 << 1) | 696 | #define EMULTYPE_TRAP_UD (1 << 1) |
694 | #define EMULTYPE_SKIP (1 << 2) | 697 | #define EMULTYPE_SKIP (1 << 2) |
698 | #define EMULTYPE_RETRY (1 << 3) | ||
695 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 699 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
696 | int emulation_type, void *insn, int insn_len); | 700 | int emulation_type, void *insn, int insn_len); |
697 | 701 | ||
@@ -756,6 +760,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | |||
756 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 760 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
757 | const u8 *new, int bytes, | 761 | const u8 *new, int bytes, |
758 | bool guest_initiated); | 762 | bool guest_initiated); |
763 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); | ||
759 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 764 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
760 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 765 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
761 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 766 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a10950a37928..8547958e3582 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -3702,6 +3702,11 @@ done: | |||
3702 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 3702 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
3703 | } | 3703 | } |
3704 | 3704 | ||
3705 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt) | ||
3706 | { | ||
3707 | return ctxt->d & PageTable; | ||
3708 | } | ||
3709 | |||
3705 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3710 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
3706 | { | 3711 | { |
3707 | /* The second termination condition only applies for REPE | 3712 | /* The second termination condition only applies for REPE |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 232c5a30ddc8..7a22eb81b4ca 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1998,7 +1998,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
1998 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; | 1998 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; |
1999 | } | 1999 | } |
2000 | 2000 | ||
2001 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 2001 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
2002 | { | 2002 | { |
2003 | struct kvm_mmu_page *sp; | 2003 | struct kvm_mmu_page *sp; |
2004 | struct hlist_node *node; | 2004 | struct hlist_node *node; |
@@ -2007,7 +2007,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
2007 | 2007 | ||
2008 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); | 2008 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); |
2009 | r = 0; | 2009 | r = 0; |
2010 | 2010 | spin_lock(&kvm->mmu_lock); | |
2011 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2011 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
2012 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, | 2012 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, |
2013 | sp->role.word); | 2013 | sp->role.word); |
@@ -2015,8 +2015,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
2015 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 2015 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
2016 | } | 2016 | } |
2017 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 2017 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
2018 | spin_unlock(&kvm->mmu_lock); | ||
2019 | |||
2018 | return r; | 2020 | return r; |
2019 | } | 2021 | } |
2022 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | ||
2020 | 2023 | ||
2021 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | 2024 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) |
2022 | { | 2025 | { |
@@ -3698,9 +3701,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
3698 | 3701 | ||
3699 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | 3702 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
3700 | 3703 | ||
3701 | spin_lock(&vcpu->kvm->mmu_lock); | ||
3702 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3704 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3703 | spin_unlock(&vcpu->kvm->mmu_lock); | 3705 | |
3704 | return r; | 3706 | return r; |
3705 | } | 3707 | } |
3706 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 3708 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
@@ -3721,10 +3723,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
3721 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3723 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3722 | } | 3724 | } |
3723 | 3725 | ||
3726 | static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) | ||
3727 | { | ||
3728 | if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu)) | ||
3729 | return vcpu_match_mmio_gpa(vcpu, addr); | ||
3730 | |||
3731 | return vcpu_match_mmio_gva(vcpu, addr); | ||
3732 | } | ||
3733 | |||
3724 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | 3734 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
3725 | void *insn, int insn_len) | 3735 | void *insn, int insn_len) |
3726 | { | 3736 | { |
3727 | int r; | 3737 | int r, emulation_type = EMULTYPE_RETRY; |
3728 | enum emulation_result er; | 3738 | enum emulation_result er; |
3729 | 3739 | ||
3730 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); | 3740 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
@@ -3736,7 +3746,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
3736 | goto out; | 3746 | goto out; |
3737 | } | 3747 | } |
3738 | 3748 | ||
3739 | er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); | 3749 | if (is_mmio_page_fault(vcpu, cr2)) |
3750 | emulation_type = 0; | ||
3751 | |||
3752 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); | ||
3740 | 3753 | ||
3741 | switch (er) { | 3754 | switch (er) { |
3742 | case EMULATE_DONE: | 3755 | case EMULATE_DONE: |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e24edbc7f2ec..7ba1ab73fd03 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -4836,6 +4836,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4836 | return false; | 4836 | return false; |
4837 | } | 4837 | } |
4838 | 4838 | ||
4839 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | ||
4840 | unsigned long cr2, int emulation_type) | ||
4841 | { | ||
4842 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4843 | unsigned long last_retry_eip, last_retry_addr, gpa = cr2; | ||
4844 | |||
4845 | last_retry_eip = vcpu->arch.last_retry_eip; | ||
4846 | last_retry_addr = vcpu->arch.last_retry_addr; | ||
4847 | |||
4848 | /* | ||
4849 | * If the emulation is caused by #PF and it is non-page_table | ||
4850 | * writing instruction, it means the VM-EXIT is caused by shadow | ||
4851 | * page protected, we can zap the shadow page and retry this | ||
4852 | * instruction directly. | ||
4853 | * | ||
4854 | * Note: if the guest uses a non-page-table modifying instruction | ||
4855 | * on the PDE that points to the instruction, then we will unmap | ||
4856 | * the instruction and go to an infinite loop. So, we cache the | ||
4857 | * last retried eip and the last fault address, if we meet the eip | ||
4858 | * and the address again, we can break out of the potential infinite | ||
4859 | * loop. | ||
4860 | */ | ||
4861 | vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; | ||
4862 | |||
4863 | if (!(emulation_type & EMULTYPE_RETRY)) | ||
4864 | return false; | ||
4865 | |||
4866 | if (x86_page_table_writing_insn(ctxt)) | ||
4867 | return false; | ||
4868 | |||
4869 | if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) | ||
4870 | return false; | ||
4871 | |||
4872 | vcpu->arch.last_retry_eip = ctxt->eip; | ||
4873 | vcpu->arch.last_retry_addr = cr2; | ||
4874 | |||
4875 | if (!vcpu->arch.mmu.direct_map) | ||
4876 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); | ||
4877 | |||
4878 | kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
4879 | |||
4880 | return true; | ||
4881 | } | ||
4882 | |||
4839 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 4883 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4840 | unsigned long cr2, | 4884 | unsigned long cr2, |
4841 | int emulation_type, | 4885 | int emulation_type, |
@@ -4877,6 +4921,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4877 | return EMULATE_DONE; | 4921 | return EMULATE_DONE; |
4878 | } | 4922 | } |
4879 | 4923 | ||
4924 | if (retry_instruction(ctxt, cr2, emulation_type)) | ||
4925 | return EMULATE_DONE; | ||
4926 | |||
4880 | /* this is needed for vmware backdoor interface to work since it | 4927 | /* this is needed for vmware backdoor interface to work since it |
4881 | changes registers values during IO operation */ | 4928 | changes registers values during IO operation */ |
4882 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { | 4929 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |