diff options
author | Avi Kivity <avi@qumranet.com> | 2007-03-08 10:13:32 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2007-03-18 04:49:09 -0400 |
commit | ac1b714e78c8f0b252f8d8872e6ce6f898a123b3 (patch) | |
tree | 6068b184778a7f43c92b0518eaefcf617309779b /drivers/kvm | |
parent | f5b42c3324494ea3f9bf795e2a7e4d3cbb06c607 (diff) |
KVM: MMU: Fix guest writes to nonpae pde
KVM shadow page tables are always in pae mode, regardless of the guest
setting. This means that a guest pde (mapping 4MB of memory) is mapped
to two shadow pdes (mapping 2MB each).
When the guest writes to a pte or pde, we intercept the write and emulate it.
We also remove any shadowed mappings corresponding to the write. Since the
mmu did not account for the doubling in the number of pdes, it removed the
wrong entry, resulting in a mismatch between shadow page tables and guest
page tables, followed shortly by guest memory corruption.
This patch fixes the problem by detecting the special case of writing to
a non-pae pde and adjusting the address and number of shadow pdes zapped
accordingly.
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm')
-rw-r--r-- | drivers/kvm/mmu.c | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a1a93368f314..2cb48937be44 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -1093,22 +1093,40 @@ out: | |||
1093 | return r; | 1093 | return r; |
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, | ||
1097 | struct kvm_mmu_page *page, | ||
1098 | u64 *spte) | ||
1099 | { | ||
1100 | u64 pte; | ||
1101 | struct kvm_mmu_page *child; | ||
1102 | |||
1103 | pte = *spte; | ||
1104 | if (is_present_pte(pte)) { | ||
1105 | if (page->role.level == PT_PAGE_TABLE_LEVEL) | ||
1106 | rmap_remove(vcpu, spte); | ||
1107 | else { | ||
1108 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
1109 | mmu_page_remove_parent_pte(vcpu, child, spte); | ||
1110 | } | ||
1111 | } | ||
1112 | *spte = 0; | ||
1113 | } | ||
1114 | |||
1096 | void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | 1115 | void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) |
1097 | { | 1116 | { |
1098 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1117 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1099 | struct kvm_mmu_page *page; | 1118 | struct kvm_mmu_page *page; |
1100 | struct kvm_mmu_page *child; | ||
1101 | struct hlist_node *node, *n; | 1119 | struct hlist_node *node, *n; |
1102 | struct hlist_head *bucket; | 1120 | struct hlist_head *bucket; |
1103 | unsigned index; | 1121 | unsigned index; |
1104 | u64 *spte; | 1122 | u64 *spte; |
1105 | u64 pte; | ||
1106 | unsigned offset = offset_in_page(gpa); | 1123 | unsigned offset = offset_in_page(gpa); |
1107 | unsigned pte_size; | 1124 | unsigned pte_size; |
1108 | unsigned page_offset; | 1125 | unsigned page_offset; |
1109 | unsigned misaligned; | 1126 | unsigned misaligned; |
1110 | int level; | 1127 | int level; |
1111 | int flooded = 0; | 1128 | int flooded = 0; |
1129 | int npte; | ||
1112 | 1130 | ||
1113 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | 1131 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); |
1114 | if (gfn == vcpu->last_pt_write_gfn) { | 1132 | if (gfn == vcpu->last_pt_write_gfn) { |
@@ -1144,22 +1162,26 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | |||
1144 | } | 1162 | } |
1145 | page_offset = offset; | 1163 | page_offset = offset; |
1146 | level = page->role.level; | 1164 | level = page->role.level; |
1165 | npte = 1; | ||
1147 | if (page->role.glevels == PT32_ROOT_LEVEL) { | 1166 | if (page->role.glevels == PT32_ROOT_LEVEL) { |
1148 | page_offset <<= 1; /* 32->64 */ | 1167 | page_offset <<= 1; /* 32->64 */ |
1168 | /* | ||
1169 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
1170 | * only 2MB. So we need to double the offset again | ||
1171 | * and zap two pdes instead of one. | ||
1172 | */ | ||
1173 | if (level == PT32_ROOT_LEVEL) { | ||
1174 | page_offset <<= 1; | ||
1175 | npte = 2; | ||
1176 | } | ||
1149 | page_offset &= ~PAGE_MASK; | 1177 | page_offset &= ~PAGE_MASK; |
1150 | } | 1178 | } |
1151 | spte = __va(page->page_hpa); | 1179 | spte = __va(page->page_hpa); |
1152 | spte += page_offset / sizeof(*spte); | 1180 | spte += page_offset / sizeof(*spte); |
1153 | pte = *spte; | 1181 | while (npte--) { |
1154 | if (is_present_pte(pte)) { | 1182 | mmu_pre_write_zap_pte(vcpu, page, spte); |
1155 | if (level == PT_PAGE_TABLE_LEVEL) | 1183 | ++spte; |
1156 | rmap_remove(vcpu, spte); | ||
1157 | else { | ||
1158 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
1159 | mmu_page_remove_parent_pte(vcpu, child, spte); | ||
1160 | } | ||
1161 | } | 1184 | } |
1162 | *spte = 0; | ||
1163 | } | 1185 | } |
1164 | } | 1186 | } |
1165 | 1187 | ||