diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2013-09-09 07:52:33 -0400 |
---|---|---|
committer | Gleb Natapov <gleb@redhat.com> | 2013-09-17 05:52:31 -0400 |
commit | ba6a3541545542721ce821d1e7e5ce35752e6fdf (patch) | |
tree | b53ff602454f383dc8148d38d7d870972d7866db | |
parent | 3261107ebfd8f6bba57cfcdb89385779fd149a00 (diff) |
KVM: mmu: allow page tables to be in read-only slots
Page tables in a read-only memory slot will currently cause a triple
fault because the page walker uses gfn_to_hva and it fails on such a slot.
OVMF uses such a page table; however, real hardware seems to be fine with
that as long as the accessed/dirty bits are set. Save whether the slot
is readonly, and later check it when updating the accessed and dirty bits.
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 20 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 1 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 14 |
3 files changed, 29 insertions, 6 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 043330159179..ad75d77999d0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -99,6 +99,7 @@ struct guest_walker { | |||
99 | pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; | 99 | pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; |
100 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; | 100 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; |
101 | pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; | 101 | pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; |
102 | bool pte_writable[PT_MAX_FULL_LEVELS]; | ||
102 | unsigned pt_access; | 103 | unsigned pt_access; |
103 | unsigned pte_access; | 104 | unsigned pte_access; |
104 | gfn_t gfn; | 105 | gfn_t gfn; |
@@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | |||
235 | if (pte == orig_pte) | 236 | if (pte == orig_pte) |
236 | continue; | 237 | continue; |
237 | 238 | ||
239 | /* | ||
240 | * If the slot is read-only, simply do not process the accessed | ||
241 | * and dirty bits. This is the correct thing to do if the slot | ||
242 | * is ROM, and page tables in read-as-ROM/write-as-MMIO slots | ||
243 | * are only supported if the accessed and dirty bits are already | ||
244 | * set in the ROM (so that MMIO writes are never needed). | ||
245 | * | ||
246 | * Note that NPT does not allow this at all and faults, since | ||
247 | * it always wants nested page table entries for the guest | ||
248 | * page tables to be writable. And EPT works but will simply | ||
249 | * overwrite the read-only memory to set the accessed and dirty | ||
250 | * bits. | ||
251 | */ | ||
252 | if (unlikely(!walker->pte_writable[level - 1])) | ||
253 | continue; | ||
254 | |||
238 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); | 255 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); |
239 | if (ret) | 256 | if (ret) |
240 | return ret; | 257 | return ret; |
@@ -309,7 +326,8 @@ retry_walk: | |||
309 | goto error; | 326 | goto error; |
310 | real_gfn = gpa_to_gfn(real_gfn); | 327 | real_gfn = gpa_to_gfn(real_gfn); |
311 | 328 | ||
312 | host_addr = gfn_to_hva(vcpu->kvm, real_gfn); | 329 | host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, |
330 | &walker->pte_writable[walker->level - 1]); | ||
313 | if (unlikely(kvm_is_error_hva(host_addr))) | 331 | if (unlikely(kvm_is_error_hva(host_addr))) |
314 | goto error; | 332 | goto error; |
315 | 333 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca645a01d37a..0fbbc7aa02cb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | |||
533 | 533 | ||
534 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | 534 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
535 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | 535 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); |
536 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); | ||
536 | unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); | 537 | unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); |
537 | void kvm_release_page_clean(struct page *page); | 538 | void kvm_release_page_clean(struct page *page); |
538 | void kvm_release_page_dirty(struct page *page); | 539 | void kvm_release_page_dirty(struct page *page); |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bf040c4e02b3..979bff485fb0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
1058 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 1058 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
1059 | 1059 | ||
1060 | /* | 1060 | /* |
1061 | * The hva returned by this function is only allowed to be read. | 1061 | * If writable is set to false, the hva returned by this function is only |
1062 | * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). | 1062 | * allowed to be read. |
1063 | */ | 1063 | */ |
1064 | static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) | 1064 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) |
1065 | { | 1065 | { |
1066 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1067 | if (writable) | ||
1068 | *writable = !memslot_is_readonly(slot); | ||
1069 | |||
1066 | return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); | 1070 | return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); |
1067 | } | 1071 | } |
1068 | 1072 | ||
@@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
1430 | int r; | 1434 | int r; |
1431 | unsigned long addr; | 1435 | unsigned long addr; |
1432 | 1436 | ||
1433 | addr = gfn_to_hva_read(kvm, gfn); | 1437 | addr = gfn_to_hva_prot(kvm, gfn, NULL); |
1434 | if (kvm_is_error_hva(addr)) | 1438 | if (kvm_is_error_hva(addr)) |
1435 | return -EFAULT; | 1439 | return -EFAULT; |
1436 | r = kvm_read_hva(data, (void __user *)addr + offset, len); | 1440 | r = kvm_read_hva(data, (void __user *)addr + offset, len); |
@@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
1468 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1472 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1469 | int offset = offset_in_page(gpa); | 1473 | int offset = offset_in_page(gpa); |
1470 | 1474 | ||
1471 | addr = gfn_to_hva_read(kvm, gfn); | 1475 | addr = gfn_to_hva_prot(kvm, gfn, NULL); |
1472 | if (kvm_is_error_hva(addr)) | 1476 | if (kvm_is_error_hva(addr)) |
1473 | return -EFAULT; | 1477 | return -EFAULT; |
1474 | pagefault_disable(); | 1478 | pagefault_disable(); |