aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-01-05 19:36:38 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2007-01-06 02:55:24 -0500
commitcd4a4e5374110444dc38831af517e51ff5a053c3 (patch)
treeed82756435e7909c7698d112195994c2e8062328
parent399badf315bd4dd571b4b3b7cf666d9a2af40229 (diff)
[PATCH] KVM: MMU: Implement simple reverse mapping
Keep in each host page frame's page->private a pointer to the shadow pte which maps it. If there are multiple shadow ptes mapping the page, set bit 0 of page->private, and use the rest as a pointer to a linked list of all such mappings. Reverse mappings are needed because we when we cache shadow page tables, we must protect the guest page tables from being modified by the guest, as that would invalidate the cached ptes. Signed-off-by: Avi Kivity <avi@qumranet.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/kvm/kvm.h1
-rw-r--r--drivers/kvm/kvm_main.c1
-rw-r--r--drivers/kvm/mmu.c152
-rw-r--r--drivers/kvm/paging_tmpl.h1
4 files changed, 142 insertions, 13 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index e8fe1039e3b5..b65511ed4388 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -236,6 +236,7 @@ struct kvm {
236 struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; 236 struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
237 int memory_config_version; 237 int memory_config_version;
238 int busy; 238 int busy;
239 unsigned long rmap_overflow;
239}; 240};
240 241
241struct kvm_stat { 242struct kvm_stat {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index bc88c334664b..f2a6b6f0e929 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -638,6 +638,7 @@ raced:
638 | __GFP_ZERO); 638 | __GFP_ZERO);
639 if (!new.phys_mem[i]) 639 if (!new.phys_mem[i])
640 goto out_free; 640 goto out_free;
641 new.phys_mem[i]->private = 0;
641 } 642 }
642 } 643 }
643 644
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 790423c5f23d..0f27beb6c5df 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -27,6 +27,7 @@
27#include "kvm.h" 27#include "kvm.h"
28 28
29#define pgprintk(x...) do { } while (0) 29#define pgprintk(x...) do { } while (0)
30#define rmap_printk(x...) do { } while (0)
30 31
31#define ASSERT(x) \ 32#define ASSERT(x) \
32 if (!(x)) { \ 33 if (!(x)) { \
@@ -125,6 +126,13 @@
125#define PT_DIRECTORY_LEVEL 2 126#define PT_DIRECTORY_LEVEL 2
126#define PT_PAGE_TABLE_LEVEL 1 127#define PT_PAGE_TABLE_LEVEL 1
127 128
129#define RMAP_EXT 4
130
131struct kvm_rmap_desc {
132 u64 *shadow_ptes[RMAP_EXT];
133 struct kvm_rmap_desc *more;
134};
135
128static int is_write_protection(struct kvm_vcpu *vcpu) 136static int is_write_protection(struct kvm_vcpu *vcpu)
129{ 137{
130 return vcpu->cr0 & CR0_WP_MASK; 138 return vcpu->cr0 & CR0_WP_MASK;
@@ -150,6 +158,120 @@ static int is_io_pte(unsigned long pte)
150 return pte & PT_SHADOW_IO_MARK; 158 return pte & PT_SHADOW_IO_MARK;
151} 159}
152 160
161static int is_rmap_pte(u64 pte)
162{
163 return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
164 == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
165}
166
167/*
168 * Reverse mapping data structures:
169 *
170 * If page->private bit zero is zero, then page->private points to the
171 * shadow page table entry that points to page_address(page).
172 *
173 * If page->private bit zero is one, (then page->private & ~1) points
174 * to a struct kvm_rmap_desc containing more mappings.
175 */
176static void rmap_add(struct kvm *kvm, u64 *spte)
177{
178 struct page *page;
179 struct kvm_rmap_desc *desc;
180 int i;
181
182 if (!is_rmap_pte(*spte))
183 return;
184 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
185 if (!page->private) {
186 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
187 page->private = (unsigned long)spte;
188 } else if (!(page->private & 1)) {
189 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
190 desc = kzalloc(sizeof *desc, GFP_NOWAIT);
191 if (!desc)
192 BUG(); /* FIXME: return error */
193 desc->shadow_ptes[0] = (u64 *)page->private;
194 desc->shadow_ptes[1] = spte;
195 page->private = (unsigned long)desc | 1;
196 } else {
197 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
198 desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
199 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
200 desc = desc->more;
201 if (desc->shadow_ptes[RMAP_EXT-1]) {
202 desc->more = kzalloc(sizeof *desc->more, GFP_NOWAIT);
203 if (!desc->more)
204 BUG(); /* FIXME: return error */
205 desc = desc->more;
206 }
207 for (i = 0; desc->shadow_ptes[i]; ++i)
208 ;
209 desc->shadow_ptes[i] = spte;
210 }
211}
212
213static void rmap_desc_remove_entry(struct page *page,
214 struct kvm_rmap_desc *desc,
215 int i,
216 struct kvm_rmap_desc *prev_desc)
217{
218 int j;
219
220 for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
221 ;
222 desc->shadow_ptes[i] = desc->shadow_ptes[j];
223 desc->shadow_ptes[j] = 0;
224 if (j != 0)
225 return;
226 if (!prev_desc && !desc->more)
227 page->private = (unsigned long)desc->shadow_ptes[0];
228 else
229 if (prev_desc)
230 prev_desc->more = desc->more;
231 else
232 page->private = (unsigned long)desc->more | 1;
233 kfree(desc);
234}
235
236static void rmap_remove(struct kvm *kvm, u64 *spte)
237{
238 struct page *page;
239 struct kvm_rmap_desc *desc;
240 struct kvm_rmap_desc *prev_desc;
241 int i;
242
243 if (!is_rmap_pte(*spte))
244 return;
245 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
246 if (!page->private) {
247 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
248 BUG();
249 } else if (!(page->private & 1)) {
250 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
251 if ((u64 *)page->private != spte) {
252 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
253 spte, *spte);
254 BUG();
255 }
256 page->private = 0;
257 } else {
258 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
259 desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
260 prev_desc = NULL;
261 while (desc) {
262 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
263 if (desc->shadow_ptes[i] == spte) {
264 rmap_desc_remove_entry(page, desc, i,
265 prev_desc);
266 return;
267 }
268 prev_desc = desc;
269 desc = desc->more;
270 }
271 BUG();
272 }
273}
274
153static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) 275static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
154{ 276{
155 struct kvm_mmu_page *page_head = page_header(page_hpa); 277 struct kvm_mmu_page *page_head = page_header(page_hpa);
@@ -229,27 +351,27 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
229static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa, 351static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
230 int level) 352 int level)
231{ 353{
354 u64 *pos;
355 u64 *end;
356
232 ASSERT(vcpu); 357 ASSERT(vcpu);
233 ASSERT(VALID_PAGE(page_hpa)); 358 ASSERT(VALID_PAGE(page_hpa));
234 ASSERT(level <= PT64_ROOT_LEVEL && level > 0); 359 ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
235 360
236 if (level == 1) 361 for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
237 memset(__va(page_hpa), 0, PAGE_SIZE); 362 pos != end; pos++) {
238 else { 363 u64 current_ent = *pos;
239 u64 *pos;
240 u64 *end;
241 364
242 for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE; 365 if (is_present_pte(current_ent)) {
243 pos != end; pos++) { 366 if (level != 1)
244 u64 current_ent = *pos;
245
246 *pos = 0;
247 if (is_present_pte(current_ent))
248 release_pt_page_64(vcpu, 367 release_pt_page_64(vcpu,
249 current_ent & 368 current_ent &
250 PT64_BASE_ADDR_MASK, 369 PT64_BASE_ADDR_MASK,
251 level - 1); 370 level - 1);
371 else
372 rmap_remove(vcpu->kvm, pos);
252 } 373 }
374 *pos = 0;
253 } 375 }
254 kvm_mmu_free_page(vcpu, page_hpa); 376 kvm_mmu_free_page(vcpu, page_hpa);
255} 377}
@@ -275,6 +397,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
275 page_header_update_slot(vcpu->kvm, table, v); 397 page_header_update_slot(vcpu->kvm, table, v);
276 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | 398 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
277 PT_USER_MASK; 399 PT_USER_MASK;
400 rmap_add(vcpu->kvm, &table[index]);
278 return 0; 401 return 0;
279 } 402 }
280 403
@@ -437,6 +560,7 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
437 } else { 560 } else {
438 *shadow_pte |= paddr; 561 *shadow_pte |= paddr;
439 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); 562 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
563 rmap_add(vcpu->kvm, shadow_pte);
440 } 564 }
441} 565}
442 566
@@ -489,6 +613,7 @@ static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
489 u64 *table = __va(page_addr); 613 u64 *table = __va(page_addr);
490 614
491 if (level == PT_PAGE_TABLE_LEVEL ) { 615 if (level == PT_PAGE_TABLE_LEVEL ) {
616 rmap_remove(vcpu->kvm, &table[index]);
492 table[index] = 0; 617 table[index] = 0;
493 return; 618 return;
494 } 619 }
@@ -679,8 +804,9 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
679 pt = __va(page->page_hpa); 804 pt = __va(page->page_hpa);
680 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 805 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
681 /* avoid RMW */ 806 /* avoid RMW */
682 if (pt[i] & PT_WRITABLE_MASK) 807 if (pt[i] & PT_WRITABLE_MASK) {
808 rmap_remove(kvm, &pt[i]);
683 pt[i] &= ~PT_WRITABLE_MASK; 809 pt[i] &= ~PT_WRITABLE_MASK;
684 810 }
685 } 811 }
686} 812}
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 09bb9b4ed12d..8c48528a6e89 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -261,6 +261,7 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
261 mark_page_dirty(vcpu->kvm, gfn); 261 mark_page_dirty(vcpu->kvm, gfn);
262 *shadow_ent |= PT_WRITABLE_MASK; 262 *shadow_ent |= PT_WRITABLE_MASK;
263 *guest_ent |= PT_DIRTY_MASK; 263 *guest_ent |= PT_DIRTY_MASK;
264 rmap_add(vcpu->kvm, shadow_ent);
264 265
265 return 1; 266 return 1;
266} 267}