aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-03-08 10:13:32 -0500
committerAvi Kivity <avi@qumranet.com>2007-03-18 04:49:09 -0400
commitac1b714e78c8f0b252f8d8872e6ce6f898a123b3 (patch)
tree6068b184778a7f43c92b0518eaefcf617309779b
parentf5b42c3324494ea3f9bf795e2a7e4d3cbb06c607 (diff)
KVM: MMU: Fix guest writes to nonpae pde
KVM shadow page tables are always in pae mode, regardless of the guest setting. This means that a guest pde (mapping 4MB of memory) is mapped to two shadow pdes (mapping 2MB each). When the guest writes to a pte or pde, we intercept the write and emulate it. We also remove any shadowed mappings corresponding to the write. Since the mmu did not account for the doubling in the number of pdes, it removed the wrong entry, resulting in a mismatch between shadow page tables and guest page tables, followed shortly by guest memory corruption. This patch fixes the problem by detecting the special case of writing to a non-pae pde and adjusting the address and number of shadow pdes zapped accordingly. Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--drivers/kvm/mmu.c46
1 files changed, 34 insertions, 12 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index a1a93368f314..2cb48937be44 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1093,22 +1093,40 @@ out:
1093 return r; 1093 return r;
1094} 1094}
1095 1095
1096static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu,
1097 struct kvm_mmu_page *page,
1098 u64 *spte)
1099{
1100 u64 pte;
1101 struct kvm_mmu_page *child;
1102
1103 pte = *spte;
1104 if (is_present_pte(pte)) {
1105 if (page->role.level == PT_PAGE_TABLE_LEVEL)
1106 rmap_remove(vcpu, spte);
1107 else {
1108 child = page_header(pte & PT64_BASE_ADDR_MASK);
1109 mmu_page_remove_parent_pte(vcpu, child, spte);
1110 }
1111 }
1112 *spte = 0;
1113}
1114
1096void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) 1115void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1097{ 1116{
1098 gfn_t gfn = gpa >> PAGE_SHIFT; 1117 gfn_t gfn = gpa >> PAGE_SHIFT;
1099 struct kvm_mmu_page *page; 1118 struct kvm_mmu_page *page;
1100 struct kvm_mmu_page *child;
1101 struct hlist_node *node, *n; 1119 struct hlist_node *node, *n;
1102 struct hlist_head *bucket; 1120 struct hlist_head *bucket;
1103 unsigned index; 1121 unsigned index;
1104 u64 *spte; 1122 u64 *spte;
1105 u64 pte;
1106 unsigned offset = offset_in_page(gpa); 1123 unsigned offset = offset_in_page(gpa);
1107 unsigned pte_size; 1124 unsigned pte_size;
1108 unsigned page_offset; 1125 unsigned page_offset;
1109 unsigned misaligned; 1126 unsigned misaligned;
1110 int level; 1127 int level;
1111 int flooded = 0; 1128 int flooded = 0;
1129 int npte;
1112 1130
1113 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); 1131 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
1114 if (gfn == vcpu->last_pt_write_gfn) { 1132 if (gfn == vcpu->last_pt_write_gfn) {
@@ -1144,22 +1162,26 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1144 } 1162 }
1145 page_offset = offset; 1163 page_offset = offset;
1146 level = page->role.level; 1164 level = page->role.level;
1165 npte = 1;
1147 if (page->role.glevels == PT32_ROOT_LEVEL) { 1166 if (page->role.glevels == PT32_ROOT_LEVEL) {
1148 page_offset <<= 1; /* 32->64 */ 1167 page_offset <<= 1; /* 32->64 */
1168 /*
1169 * A 32-bit pde maps 4MB while the shadow pdes map
1170 * only 2MB. So we need to double the offset again
1171 * and zap two pdes instead of one.
1172 */
1173 if (level == PT32_ROOT_LEVEL) {
1174 page_offset <<= 1;
1175 npte = 2;
1176 }
1149 page_offset &= ~PAGE_MASK; 1177 page_offset &= ~PAGE_MASK;
1150 } 1178 }
1151 spte = __va(page->page_hpa); 1179 spte = __va(page->page_hpa);
1152 spte += page_offset / sizeof(*spte); 1180 spte += page_offset / sizeof(*spte);
1153 pte = *spte; 1181 while (npte--) {
1154 if (is_present_pte(pte)) { 1182 mmu_pre_write_zap_pte(vcpu, page, spte);
1155 if (level == PT_PAGE_TABLE_LEVEL) 1183 ++spte;
1156 rmap_remove(vcpu, spte);
1157 else {
1158 child = page_header(pte & PT64_BASE_ADDR_MASK);
1159 mmu_page_remove_parent_pte(vcpu, child, spte);
1160 }
1161 } 1184 }
1162 *spte = 0;
1163 } 1185 }
1164} 1186}
1165 1187