aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/paging_tmpl.h
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-01-05 19:36:43 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2007-01-06 02:55:24 -0500
commitcea0f0e7ea54753c3265dc77f605a6dad1912cfc (patch)
treee0a3e64b45fe83f1f0ae89556e1f6fcf92f07185 /drivers/kvm/paging_tmpl.h
parent25c0de2cc6c26cb99553c2444936a7951c120c09 (diff)
[PATCH] KVM: MMU: Shadow page table caching
Define a hashtable for caching shadow page tables. Look up the cache on context switch (cr3 change) or during page faults. The key to the cache is a combination of - the guest page table frame number - the number of paging levels in the guest * we can cache real mode, 32-bit mode, pae, and long mode page tables simultaneously. this is useful for smp bootup. - the guest page table table * some kernels use a page as both a page table and a page directory. this allows multiple shadow pages to exist for that page, one per level - the "quadrant" * 32-bit mode page tables span 4MB, whereas a shadow page table spans 2MB. similarly, a 32-bit page directory spans 4GB, while a shadow page directory spans 1GB. the quadrant allows caching up to 4 shadow page tables for one guest page in one level. - a "metaphysical" bit * for real mode, and for pse pages, there is no guest page table, so set the bit to avoid write protecting the page. Signed-off-by: Avi Kivity <avi@qumranet.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/kvm/paging_tmpl.h')
-rw-r--r--drivers/kvm/paging_tmpl.h62
1 files changed, 53 insertions, 9 deletions
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 11cac9ddf26a..f7cce443ca6f 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -32,6 +32,11 @@
32 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 32 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
33 #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) 33 #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
34 #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK 34 #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
35 #ifdef CONFIG_X86_64
36 #define PT_MAX_FULL_LEVELS 4
37 #else
38 #define PT_MAX_FULL_LEVELS 2
39 #endif
35#elif PTTYPE == 32 40#elif PTTYPE == 32
36 #define pt_element_t u32 41 #define pt_element_t u32
37 #define guest_walker guest_walker32 42 #define guest_walker guest_walker32
@@ -42,6 +47,7 @@
42 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 47 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
43 #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) 48 #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
44 #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK 49 #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
50 #define PT_MAX_FULL_LEVELS 2
45#else 51#else
46 #error Invalid PTTYPE value 52 #error Invalid PTTYPE value
47#endif 53#endif
@@ -52,7 +58,7 @@
52 */ 58 */
53struct guest_walker { 59struct guest_walker {
54 int level; 60 int level;
55 gfn_t table_gfn; 61 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
56 pt_element_t *table; 62 pt_element_t *table;
57 pt_element_t *ptep; 63 pt_element_t *ptep;
58 pt_element_t inherited_ar; 64 pt_element_t inherited_ar;
@@ -68,7 +74,9 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
68 struct kvm_memory_slot *slot; 74 struct kvm_memory_slot *slot;
69 pt_element_t *ptep; 75 pt_element_t *ptep;
70 pt_element_t root; 76 pt_element_t root;
77 gfn_t table_gfn;
71 78
79 pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
72 walker->level = vcpu->mmu.root_level; 80 walker->level = vcpu->mmu.root_level;
73 walker->table = NULL; 81 walker->table = NULL;
74 root = vcpu->cr3; 82 root = vcpu->cr3;
@@ -81,8 +89,11 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
81 --walker->level; 89 --walker->level;
82 } 90 }
83#endif 91#endif
84 walker->table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; 92 table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
85 slot = gfn_to_memslot(vcpu->kvm, walker->table_gfn); 93 walker->table_gfn[walker->level - 1] = table_gfn;
94 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
95 walker->level - 1, table_gfn);
96 slot = gfn_to_memslot(vcpu->kvm, table_gfn);
86 hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); 97 hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
87 walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); 98 walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
88 99
@@ -111,12 +122,15 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
111 122
112 if (walker->level != 3 || is_long_mode(vcpu)) 123 if (walker->level != 3 || is_long_mode(vcpu))
113 walker->inherited_ar &= walker->table[index]; 124 walker->inherited_ar &= walker->table[index];
114 walker->table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; 125 table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
115 paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); 126 paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
116 kunmap_atomic(walker->table, KM_USER0); 127 kunmap_atomic(walker->table, KM_USER0);
117 walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT), 128 walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
118 KM_USER0); 129 KM_USER0);
119 --walker->level; 130 --walker->level;
131 walker->table_gfn[walker->level - 1 ] = table_gfn;
132 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
133 walker->level - 1, table_gfn);
120 } 134 }
121 walker->ptep = ptep; 135 walker->ptep = ptep;
122} 136}
@@ -181,6 +195,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
181 u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index; 195 u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
182 struct kvm_mmu_page *shadow_page; 196 struct kvm_mmu_page *shadow_page;
183 u64 shadow_pte; 197 u64 shadow_pte;
198 int metaphysical;
199 gfn_t table_gfn;
184 200
185 if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { 201 if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
186 if (level == PT_PAGE_TABLE_LEVEL) 202 if (level == PT_PAGE_TABLE_LEVEL)
@@ -205,7 +221,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
205 return shadow_ent; 221 return shadow_ent;
206 } 222 }
207 223
208 shadow_page = kvm_mmu_alloc_page(vcpu, shadow_ent); 224 if (level - 1 == PT_PAGE_TABLE_LEVEL
225 && walker->level == PT_DIRECTORY_LEVEL) {
226 metaphysical = 1;
227 table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
228 >> PAGE_SHIFT;
229 } else {
230 metaphysical = 0;
231 table_gfn = walker->table_gfn[level - 2];
232 }
233 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
234 metaphysical, shadow_ent);
209 if (!shadow_page) 235 if (!shadow_page)
210 return ERR_PTR(-ENOMEM); 236 return ERR_PTR(-ENOMEM);
211 shadow_addr = shadow_page->page_hpa; 237 shadow_addr = shadow_page->page_hpa;
@@ -227,7 +253,8 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
227 u64 *shadow_ent, 253 u64 *shadow_ent,
228 struct guest_walker *walker, 254 struct guest_walker *walker,
229 gva_t addr, 255 gva_t addr,
230 int user) 256 int user,
257 int *write_pt)
231{ 258{
232 pt_element_t *guest_ent; 259 pt_element_t *guest_ent;
233 int writable_shadow; 260 int writable_shadow;
@@ -264,6 +291,12 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
264 } 291 }
265 292
266 gfn = (*guest_ent & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; 293 gfn = (*guest_ent & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
294 if (kvm_mmu_lookup_page(vcpu, gfn)) {
295 pgprintk("%s: found shadow page for %lx, marking ro\n",
296 __FUNCTION__, gfn);
297 *write_pt = 1;
298 return 0;
299 }
267 mark_page_dirty(vcpu->kvm, gfn); 300 mark_page_dirty(vcpu->kvm, gfn);
268 *shadow_ent |= PT_WRITABLE_MASK; 301 *shadow_ent |= PT_WRITABLE_MASK;
269 *guest_ent |= PT_DIRTY_MASK; 302 *guest_ent |= PT_DIRTY_MASK;
@@ -294,7 +327,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
294 struct guest_walker walker; 327 struct guest_walker walker;
295 u64 *shadow_pte; 328 u64 *shadow_pte;
296 int fixed; 329 int fixed;
330 int write_pt = 0;
297 331
332 pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
298 /* 333 /*
299 * Look up the shadow pte for the faulting address. 334 * Look up the shadow pte for the faulting address.
300 */ 335 */
@@ -302,6 +337,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
302 FNAME(walk_addr)(&walker, vcpu, addr); 337 FNAME(walk_addr)(&walker, vcpu, addr);
303 shadow_pte = FNAME(fetch)(vcpu, addr, &walker); 338 shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
304 if (IS_ERR(shadow_pte)) { /* must be -ENOMEM */ 339 if (IS_ERR(shadow_pte)) { /* must be -ENOMEM */
340 printk("%s: oom\n", __FUNCTION__);
305 nonpaging_flush(vcpu); 341 nonpaging_flush(vcpu);
306 FNAME(release_walker)(&walker); 342 FNAME(release_walker)(&walker);
307 continue; 343 continue;
@@ -313,20 +349,27 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
313 * The page is not mapped by the guest. Let the guest handle it. 349 * The page is not mapped by the guest. Let the guest handle it.
314 */ 350 */
315 if (!shadow_pte) { 351 if (!shadow_pte) {
352 pgprintk("%s: not mapped\n", __FUNCTION__);
316 inject_page_fault(vcpu, addr, error_code); 353 inject_page_fault(vcpu, addr, error_code);
317 FNAME(release_walker)(&walker); 354 FNAME(release_walker)(&walker);
318 return 0; 355 return 0;
319 } 356 }
320 357
358 pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
359 shadow_pte, *shadow_pte);
360
321 /* 361 /*
322 * Update the shadow pte. 362 * Update the shadow pte.
323 */ 363 */
324 if (write_fault) 364 if (write_fault)
325 fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, 365 fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
326 user_fault); 366 user_fault, &write_pt);
327 else 367 else
328 fixed = fix_read_pf(shadow_pte); 368 fixed = fix_read_pf(shadow_pte);
329 369
370 pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__,
371 shadow_pte, *shadow_pte);
372
330 FNAME(release_walker)(&walker); 373 FNAME(release_walker)(&walker);
331 374
332 /* 375 /*
@@ -344,14 +387,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
344 /* 387 /*
345 * pte not present, guest page fault. 388 * pte not present, guest page fault.
346 */ 389 */
347 if (pte_present && !fixed) { 390 if (pte_present && !fixed && !write_pt) {
348 inject_page_fault(vcpu, addr, error_code); 391 inject_page_fault(vcpu, addr, error_code);
349 return 0; 392 return 0;
350 } 393 }
351 394
352 ++kvm_stat.pf_fixed; 395 ++kvm_stat.pf_fixed;
353 396
354 return 0; 397 return write_pt;
355} 398}
356 399
357static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 400static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -395,3 +438,4 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
395#undef PT_PTE_COPY_MASK 438#undef PT_PTE_COPY_MASK
396#undef PT_NON_PTE_COPY_MASK 439#undef PT_NON_PTE_COPY_MASK
397#undef PT_DIR_BASE_ADDR_MASK 440#undef PT_DIR_BASE_ADDR_MASK
441#undef PT_MAX_FULL_LEVELS