diff options
author | Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> | 2013-01-13 10:49:07 -0500 |
---|---|---|
committer | Marcelo Tosatti <mtosatti@redhat.com> | 2013-01-21 19:58:33 -0500 |
commit | 93c05d3ef25275829d421a255271595ac219a518 (patch) | |
tree | b987097751977ca63bd9a9ef53956cd0d14127c3 /arch | |
parent | 95b3cf69bdf8b27a02d878e24ca353cebb4e009e (diff) |
KVM: x86: improve reexecute_instruction
The current reexecute_instruction can not well detect the failed instruction
emulation. It allows guest to retry all the instructions except it accesses
on error pfn
For example, some cases are nested-write-protect - if the page we want to
write is used as PDE but it chains to itself. Under this case, we should
stop the emulation and report the case to userspace
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 7 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 27 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 22 |
3 files changed, 45 insertions, 11 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f75e1feb6ec5..77d56a4ba89c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -497,6 +497,13 @@ struct kvm_vcpu_arch { | |||
497 | u64 msr_val; | 497 | u64 msr_val; |
498 | struct gfn_to_hva_cache data; | 498 | struct gfn_to_hva_cache data; |
499 | } pv_eoi; | 499 | } pv_eoi; |
500 | |||
501 | /* | ||
502 | * Indicate whether the access faults on its page table in guest | ||
503 | * which is set when fix page fault and used to detect unhandeable | ||
504 | * instruction. | ||
505 | */ | ||
506 | bool write_fault_to_shadow_pgtable; | ||
500 | }; | 507 | }; |
501 | 508 | ||
502 | struct kvm_lpage_info { | 509 | struct kvm_lpage_info { |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3d1a35237dbf..ca69dcccbe31 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -497,26 +497,34 @@ out_gpte_changed: | |||
497 | * created when kvm establishes shadow page table that stop kvm using large | 497 | * created when kvm establishes shadow page table that stop kvm using large |
498 | * page size. Do it early can avoid unnecessary #PF and emulation. | 498 | * page size. Do it early can avoid unnecessary #PF and emulation. |
499 | * | 499 | * |
500 | * @write_fault_to_shadow_pgtable will return true if the fault gfn is | ||
501 | * currently used as its page table. | ||
502 | * | ||
500 | * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok | 503 | * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok |
501 | * since the PDPT is always shadowed, that means, we can not use large page | 504 | * since the PDPT is always shadowed, that means, we can not use large page |
502 | * size to map the gfn which is used as PDPT. | 505 | * size to map the gfn which is used as PDPT. |
503 | */ | 506 | */ |
504 | static bool | 507 | static bool |
505 | FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, | 508 | FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, |
506 | struct guest_walker *walker, int user_fault) | 509 | struct guest_walker *walker, int user_fault, |
510 | bool *write_fault_to_shadow_pgtable) | ||
507 | { | 511 | { |
508 | int level; | 512 | int level; |
509 | gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); | 513 | gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); |
514 | bool self_changed = false; | ||
510 | 515 | ||
511 | if (!(walker->pte_access & ACC_WRITE_MASK || | 516 | if (!(walker->pte_access & ACC_WRITE_MASK || |
512 | (!is_write_protection(vcpu) && !user_fault))) | 517 | (!is_write_protection(vcpu) && !user_fault))) |
513 | return false; | 518 | return false; |
514 | 519 | ||
515 | for (level = walker->level; level <= walker->max_level; level++) | 520 | for (level = walker->level; level <= walker->max_level; level++) { |
516 | if (!((walker->gfn ^ walker->table_gfn[level - 1]) & mask)) | 521 | gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1]; |
517 | return true; | 522 | |
523 | self_changed |= !(gfn & mask); | ||
524 | *write_fault_to_shadow_pgtable |= !gfn; | ||
525 | } | ||
518 | 526 | ||
519 | return false; | 527 | return self_changed; |
520 | } | 528 | } |
521 | 529 | ||
522 | /* | 530 | /* |
@@ -544,7 +552,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
544 | int level = PT_PAGE_TABLE_LEVEL; | 552 | int level = PT_PAGE_TABLE_LEVEL; |
545 | int force_pt_level; | 553 | int force_pt_level; |
546 | unsigned long mmu_seq; | 554 | unsigned long mmu_seq; |
547 | bool map_writable; | 555 | bool map_writable, is_self_change_mapping; |
548 | 556 | ||
549 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 557 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
550 | 558 | ||
@@ -572,9 +580,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
572 | return 0; | 580 | return 0; |
573 | } | 581 | } |
574 | 582 | ||
583 | vcpu->arch.write_fault_to_shadow_pgtable = false; | ||
584 | |||
585 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, | ||
586 | &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); | ||
587 | |||
575 | if (walker.level >= PT_DIRECTORY_LEVEL) | 588 | if (walker.level >= PT_DIRECTORY_LEVEL) |
576 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) | 589 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) |
577 | || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault); | 590 | || is_self_change_mapping; |
578 | else | 591 | else |
579 | force_pt_level = 1; | 592 | force_pt_level = 1; |
580 | if (!force_pt_level) { | 593 | if (!force_pt_level) { |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f9cab071eca..e00dd0515a84 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -4751,7 +4751,8 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
4751 | return r; | 4751 | return r; |
4752 | } | 4752 | } |
4753 | 4753 | ||
4754 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2) | 4754 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, |
4755 | bool write_fault_to_shadow_pgtable) | ||
4755 | { | 4756 | { |
4756 | gpa_t gpa = cr2; | 4757 | gpa_t gpa = cr2; |
4757 | pfn_t pfn; | 4758 | pfn_t pfn; |
@@ -4808,7 +4809,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2) | |||
4808 | * guest to let CPU execute the instruction. | 4809 | * guest to let CPU execute the instruction. |
4809 | */ | 4810 | */ |
4810 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); | 4811 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); |
4811 | return true; | 4812 | |
4813 | /* | ||
4814 | * If the access faults on its page table, it can not | ||
4815 | * be fixed by unprotecting shadow page and it should | ||
4816 | * be reported to userspace. | ||
4817 | */ | ||
4818 | return !write_fault_to_shadow_pgtable; | ||
4812 | } | 4819 | } |
4813 | 4820 | ||
4814 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | 4821 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, |
@@ -4867,7 +4874,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4867 | int r; | 4874 | int r; |
4868 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4875 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4869 | bool writeback = true; | 4876 | bool writeback = true; |
4877 | bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; | ||
4870 | 4878 | ||
4879 | /* | ||
4880 | * Clear write_fault_to_shadow_pgtable here to ensure it is | ||
4881 | * never reused. | ||
4882 | */ | ||
4883 | vcpu->arch.write_fault_to_shadow_pgtable = false; | ||
4871 | kvm_clear_exception_queue(vcpu); | 4884 | kvm_clear_exception_queue(vcpu); |
4872 | 4885 | ||
4873 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4886 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
@@ -4886,7 +4899,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4886 | if (r != EMULATION_OK) { | 4899 | if (r != EMULATION_OK) { |
4887 | if (emulation_type & EMULTYPE_TRAP_UD) | 4900 | if (emulation_type & EMULTYPE_TRAP_UD) |
4888 | return EMULATE_FAIL; | 4901 | return EMULATE_FAIL; |
4889 | if (reexecute_instruction(vcpu, cr2)) | 4902 | if (reexecute_instruction(vcpu, cr2, |
4903 | write_fault_to_spt)) | ||
4890 | return EMULATE_DONE; | 4904 | return EMULATE_DONE; |
4891 | if (emulation_type & EMULTYPE_SKIP) | 4905 | if (emulation_type & EMULTYPE_SKIP) |
4892 | return EMULATE_FAIL; | 4906 | return EMULATE_FAIL; |
@@ -4916,7 +4930,7 @@ restart: | |||
4916 | return EMULATE_DONE; | 4930 | return EMULATE_DONE; |
4917 | 4931 | ||
4918 | if (r == EMULATION_FAILED) { | 4932 | if (r == EMULATION_FAILED) { |
4919 | if (reexecute_instruction(vcpu, cr2)) | 4933 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) |
4920 | return EMULATE_DONE; | 4934 | return EMULATE_DONE; |
4921 | 4935 | ||
4922 | return handle_emulation_failure(vcpu); | 4936 | return handle_emulation_failure(vcpu); |