diff options
author | Avi Kivity <avi@qumranet.com> | 2007-01-05 19:36:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2007-01-06 02:55:24 -0500 |
commit | cd4a4e5374110444dc38831af517e51ff5a053c3 (patch) | |
tree | ed82756435e7909c7698d112195994c2e8062328 | |
parent | 399badf315bd4dd571b4b3b7cf666d9a2af40229 (diff) |
[PATCH] KVM: MMU: Implement simple reverse mapping
Keep in each host page frame's page->private a pointer to the shadow pte which
maps it. If there are multiple shadow ptes mapping the page, set bit 0 of
page->private, and use the rest as a pointer to a linked list of all such
mappings.
Reverse mappings are needed because we when we cache shadow page tables, we
must protect the guest page tables from being modified by the guest, as that
would invalidate the cached ptes.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/kvm/kvm.h | 1 | ||||
-rw-r--r-- | drivers/kvm/kvm_main.c | 1 | ||||
-rw-r--r-- | drivers/kvm/mmu.c | 152 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 1 |
4 files changed, 142 insertions, 13 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index e8fe1039e3b5..b65511ed4388 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h | |||
@@ -236,6 +236,7 @@ struct kvm { | |||
236 | struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; | 236 | struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; |
237 | int memory_config_version; | 237 | int memory_config_version; |
238 | int busy; | 238 | int busy; |
239 | unsigned long rmap_overflow; | ||
239 | }; | 240 | }; |
240 | 241 | ||
241 | struct kvm_stat { | 242 | struct kvm_stat { |
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index bc88c334664b..f2a6b6f0e929 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -638,6 +638,7 @@ raced: | |||
638 | | __GFP_ZERO); | 638 | | __GFP_ZERO); |
639 | if (!new.phys_mem[i]) | 639 | if (!new.phys_mem[i]) |
640 | goto out_free; | 640 | goto out_free; |
641 | new.phys_mem[i]->private = 0; | ||
641 | } | 642 | } |
642 | } | 643 | } |
643 | 644 | ||
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 790423c5f23d..0f27beb6c5df 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "kvm.h" | 27 | #include "kvm.h" |
28 | 28 | ||
29 | #define pgprintk(x...) do { } while (0) | 29 | #define pgprintk(x...) do { } while (0) |
30 | #define rmap_printk(x...) do { } while (0) | ||
30 | 31 | ||
31 | #define ASSERT(x) \ | 32 | #define ASSERT(x) \ |
32 | if (!(x)) { \ | 33 | if (!(x)) { \ |
@@ -125,6 +126,13 @@ | |||
125 | #define PT_DIRECTORY_LEVEL 2 | 126 | #define PT_DIRECTORY_LEVEL 2 |
126 | #define PT_PAGE_TABLE_LEVEL 1 | 127 | #define PT_PAGE_TABLE_LEVEL 1 |
127 | 128 | ||
129 | #define RMAP_EXT 4 | ||
130 | |||
131 | struct kvm_rmap_desc { | ||
132 | u64 *shadow_ptes[RMAP_EXT]; | ||
133 | struct kvm_rmap_desc *more; | ||
134 | }; | ||
135 | |||
128 | static int is_write_protection(struct kvm_vcpu *vcpu) | 136 | static int is_write_protection(struct kvm_vcpu *vcpu) |
129 | { | 137 | { |
130 | return vcpu->cr0 & CR0_WP_MASK; | 138 | return vcpu->cr0 & CR0_WP_MASK; |
@@ -150,6 +158,120 @@ static int is_io_pte(unsigned long pte) | |||
150 | return pte & PT_SHADOW_IO_MARK; | 158 | return pte & PT_SHADOW_IO_MARK; |
151 | } | 159 | } |
152 | 160 | ||
161 | static int is_rmap_pte(u64 pte) | ||
162 | { | ||
163 | return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK)) | ||
164 | == (PT_WRITABLE_MASK | PT_PRESENT_MASK); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Reverse mapping data structures: | ||
169 | * | ||
170 | * If page->private bit zero is zero, then page->private points to the | ||
171 | * shadow page table entry that points to page_address(page). | ||
172 | * | ||
173 | * If page->private bit zero is one, (then page->private & ~1) points | ||
174 | * to a struct kvm_rmap_desc containing more mappings. | ||
175 | */ | ||
176 | static void rmap_add(struct kvm *kvm, u64 *spte) | ||
177 | { | ||
178 | struct page *page; | ||
179 | struct kvm_rmap_desc *desc; | ||
180 | int i; | ||
181 | |||
182 | if (!is_rmap_pte(*spte)) | ||
183 | return; | ||
184 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | ||
185 | if (!page->private) { | ||
186 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | ||
187 | page->private = (unsigned long)spte; | ||
188 | } else if (!(page->private & 1)) { | ||
189 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); | ||
190 | desc = kzalloc(sizeof *desc, GFP_NOWAIT); | ||
191 | if (!desc) | ||
192 | BUG(); /* FIXME: return error */ | ||
193 | desc->shadow_ptes[0] = (u64 *)page->private; | ||
194 | desc->shadow_ptes[1] = spte; | ||
195 | page->private = (unsigned long)desc | 1; | ||
196 | } else { | ||
197 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | ||
198 | desc = (struct kvm_rmap_desc *)(page->private & ~1ul); | ||
199 | while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) | ||
200 | desc = desc->more; | ||
201 | if (desc->shadow_ptes[RMAP_EXT-1]) { | ||
202 | desc->more = kzalloc(sizeof *desc->more, GFP_NOWAIT); | ||
203 | if (!desc->more) | ||
204 | BUG(); /* FIXME: return error */ | ||
205 | desc = desc->more; | ||
206 | } | ||
207 | for (i = 0; desc->shadow_ptes[i]; ++i) | ||
208 | ; | ||
209 | desc->shadow_ptes[i] = spte; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | static void rmap_desc_remove_entry(struct page *page, | ||
214 | struct kvm_rmap_desc *desc, | ||
215 | int i, | ||
216 | struct kvm_rmap_desc *prev_desc) | ||
217 | { | ||
218 | int j; | ||
219 | |||
220 | for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) | ||
221 | ; | ||
222 | desc->shadow_ptes[i] = desc->shadow_ptes[j]; | ||
223 | desc->shadow_ptes[j] = 0; | ||
224 | if (j != 0) | ||
225 | return; | ||
226 | if (!prev_desc && !desc->more) | ||
227 | page->private = (unsigned long)desc->shadow_ptes[0]; | ||
228 | else | ||
229 | if (prev_desc) | ||
230 | prev_desc->more = desc->more; | ||
231 | else | ||
232 | page->private = (unsigned long)desc->more | 1; | ||
233 | kfree(desc); | ||
234 | } | ||
235 | |||
236 | static void rmap_remove(struct kvm *kvm, u64 *spte) | ||
237 | { | ||
238 | struct page *page; | ||
239 | struct kvm_rmap_desc *desc; | ||
240 | struct kvm_rmap_desc *prev_desc; | ||
241 | int i; | ||
242 | |||
243 | if (!is_rmap_pte(*spte)) | ||
244 | return; | ||
245 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | ||
246 | if (!page->private) { | ||
247 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | ||
248 | BUG(); | ||
249 | } else if (!(page->private & 1)) { | ||
250 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); | ||
251 | if ((u64 *)page->private != spte) { | ||
252 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", | ||
253 | spte, *spte); | ||
254 | BUG(); | ||
255 | } | ||
256 | page->private = 0; | ||
257 | } else { | ||
258 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); | ||
259 | desc = (struct kvm_rmap_desc *)(page->private & ~1ul); | ||
260 | prev_desc = NULL; | ||
261 | while (desc) { | ||
262 | for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) | ||
263 | if (desc->shadow_ptes[i] == spte) { | ||
264 | rmap_desc_remove_entry(page, desc, i, | ||
265 | prev_desc); | ||
266 | return; | ||
267 | } | ||
268 | prev_desc = desc; | ||
269 | desc = desc->more; | ||
270 | } | ||
271 | BUG(); | ||
272 | } | ||
273 | } | ||
274 | |||
153 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) | 275 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) |
154 | { | 276 | { |
155 | struct kvm_mmu_page *page_head = page_header(page_hpa); | 277 | struct kvm_mmu_page *page_head = page_header(page_hpa); |
@@ -229,27 +351,27 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) | |||
229 | static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa, | 351 | static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa, |
230 | int level) | 352 | int level) |
231 | { | 353 | { |
354 | u64 *pos; | ||
355 | u64 *end; | ||
356 | |||
232 | ASSERT(vcpu); | 357 | ASSERT(vcpu); |
233 | ASSERT(VALID_PAGE(page_hpa)); | 358 | ASSERT(VALID_PAGE(page_hpa)); |
234 | ASSERT(level <= PT64_ROOT_LEVEL && level > 0); | 359 | ASSERT(level <= PT64_ROOT_LEVEL && level > 0); |
235 | 360 | ||
236 | if (level == 1) | 361 | for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE; |
237 | memset(__va(page_hpa), 0, PAGE_SIZE); | 362 | pos != end; pos++) { |
238 | else { | 363 | u64 current_ent = *pos; |
239 | u64 *pos; | ||
240 | u64 *end; | ||
241 | 364 | ||
242 | for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE; | 365 | if (is_present_pte(current_ent)) { |
243 | pos != end; pos++) { | 366 | if (level != 1) |
244 | u64 current_ent = *pos; | ||
245 | |||
246 | *pos = 0; | ||
247 | if (is_present_pte(current_ent)) | ||
248 | release_pt_page_64(vcpu, | 367 | release_pt_page_64(vcpu, |
249 | current_ent & | 368 | current_ent & |
250 | PT64_BASE_ADDR_MASK, | 369 | PT64_BASE_ADDR_MASK, |
251 | level - 1); | 370 | level - 1); |
371 | else | ||
372 | rmap_remove(vcpu->kvm, pos); | ||
252 | } | 373 | } |
374 | *pos = 0; | ||
253 | } | 375 | } |
254 | kvm_mmu_free_page(vcpu, page_hpa); | 376 | kvm_mmu_free_page(vcpu, page_hpa); |
255 | } | 377 | } |
@@ -275,6 +397,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
275 | page_header_update_slot(vcpu->kvm, table, v); | 397 | page_header_update_slot(vcpu->kvm, table, v); |
276 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 398 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
277 | PT_USER_MASK; | 399 | PT_USER_MASK; |
400 | rmap_add(vcpu->kvm, &table[index]); | ||
278 | return 0; | 401 | return 0; |
279 | } | 402 | } |
280 | 403 | ||
@@ -437,6 +560,7 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, | |||
437 | } else { | 560 | } else { |
438 | *shadow_pte |= paddr; | 561 | *shadow_pte |= paddr; |
439 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | 562 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); |
563 | rmap_add(vcpu->kvm, shadow_pte); | ||
440 | } | 564 | } |
441 | } | 565 | } |
442 | 566 | ||
@@ -489,6 +613,7 @@ static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) | |||
489 | u64 *table = __va(page_addr); | 613 | u64 *table = __va(page_addr); |
490 | 614 | ||
491 | if (level == PT_PAGE_TABLE_LEVEL ) { | 615 | if (level == PT_PAGE_TABLE_LEVEL ) { |
616 | rmap_remove(vcpu->kvm, &table[index]); | ||
492 | table[index] = 0; | 617 | table[index] = 0; |
493 | return; | 618 | return; |
494 | } | 619 | } |
@@ -679,8 +804,9 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
679 | pt = __va(page->page_hpa); | 804 | pt = __va(page->page_hpa); |
680 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 805 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
681 | /* avoid RMW */ | 806 | /* avoid RMW */ |
682 | if (pt[i] & PT_WRITABLE_MASK) | 807 | if (pt[i] & PT_WRITABLE_MASK) { |
808 | rmap_remove(kvm, &pt[i]); | ||
683 | pt[i] &= ~PT_WRITABLE_MASK; | 809 | pt[i] &= ~PT_WRITABLE_MASK; |
684 | 810 | } | |
685 | } | 811 | } |
686 | } | 812 | } |
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 09bb9b4ed12d..8c48528a6e89 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -261,6 +261,7 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, | |||
261 | mark_page_dirty(vcpu->kvm, gfn); | 261 | mark_page_dirty(vcpu->kvm, gfn); |
262 | *shadow_ent |= PT_WRITABLE_MASK; | 262 | *shadow_ent |= PT_WRITABLE_MASK; |
263 | *guest_ent |= PT_DIRTY_MASK; | 263 | *guest_ent |= PT_DIRTY_MASK; |
264 | rmap_add(vcpu->kvm, shadow_ent); | ||
264 | 265 | ||
265 | return 1; | 266 | return 1; |
266 | } | 267 | } |