aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/paging_tmpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r--arch/x86/kvm/paging_tmpl.h202
1 files changed, 164 insertions, 38 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 51ef9097960d..cd7a833a3b52 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -7,7 +7,7 @@
7 * MMU support 7 * MMU support
8 * 8 *
9 * Copyright (C) 2006 Qumranet, Inc. 9 * Copyright (C) 2006 Qumranet, Inc.
10 * Copyright 2010 Red Hat, Inc. and/or its affilates. 10 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
11 * 11 *
12 * Authors: 12 * Authors:
13 * Yaniv Kamay <yaniv@qumranet.com> 13 * Yaniv Kamay <yaniv@qumranet.com>
@@ -67,6 +67,7 @@ struct guest_walker {
67 int level; 67 int level;
68 gfn_t table_gfn[PT_MAX_FULL_LEVELS]; 68 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
69 pt_element_t ptes[PT_MAX_FULL_LEVELS]; 69 pt_element_t ptes[PT_MAX_FULL_LEVELS];
70 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
70 gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; 71 gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
71 unsigned pt_access; 72 unsigned pt_access;
72 unsigned pte_access; 73 unsigned pte_access;
@@ -104,7 +105,7 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
104 105
105 access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; 106 access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
106#if PTTYPE == 64 107#if PTTYPE == 64
107 if (is_nx(vcpu)) 108 if (vcpu->arch.mmu.nx)
108 access &= ~(gpte >> PT64_NX_SHIFT); 109 access &= ~(gpte >> PT64_NX_SHIFT);
109#endif 110#endif
110 return access; 111 return access;
@@ -113,26 +114,32 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
113/* 114/*
114 * Fetch a guest pte for a guest virtual address 115 * Fetch a guest pte for a guest virtual address
115 */ 116 */
116static int FNAME(walk_addr)(struct guest_walker *walker, 117static int FNAME(walk_addr_generic)(struct guest_walker *walker,
117 struct kvm_vcpu *vcpu, gva_t addr, 118 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
118 int write_fault, int user_fault, int fetch_fault) 119 gva_t addr, u32 access)
119{ 120{
120 pt_element_t pte; 121 pt_element_t pte;
121 gfn_t table_gfn; 122 gfn_t table_gfn;
122 unsigned index, pt_access, uninitialized_var(pte_access); 123 unsigned index, pt_access, uninitialized_var(pte_access);
123 gpa_t pte_gpa; 124 gpa_t pte_gpa;
124 bool eperm, present, rsvd_fault; 125 bool eperm, present, rsvd_fault;
126 int offset, write_fault, user_fault, fetch_fault;
127
128 write_fault = access & PFERR_WRITE_MASK;
129 user_fault = access & PFERR_USER_MASK;
130 fetch_fault = access & PFERR_FETCH_MASK;
125 131
126 trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, 132 trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
127 fetch_fault); 133 fetch_fault);
128walk: 134walk:
129 present = true; 135 present = true;
130 eperm = rsvd_fault = false; 136 eperm = rsvd_fault = false;
131 walker->level = vcpu->arch.mmu.root_level; 137 walker->level = mmu->root_level;
132 pte = vcpu->arch.cr3; 138 pte = mmu->get_cr3(vcpu);
139
133#if PTTYPE == 64 140#if PTTYPE == 64
134 if (!is_long_mode(vcpu)) { 141 if (walker->level == PT32E_ROOT_LEVEL) {
135 pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); 142 pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
136 trace_kvm_mmu_paging_element(pte, walker->level); 143 trace_kvm_mmu_paging_element(pte, walker->level);
137 if (!is_present_gpte(pte)) { 144 if (!is_present_gpte(pte)) {
138 present = false; 145 present = false;
@@ -142,7 +149,7 @@ walk:
142 } 149 }
143#endif 150#endif
144 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 151 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
145 (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0); 152 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
146 153
147 pt_access = ACC_ALL; 154 pt_access = ACC_ALL;
148 155
@@ -150,12 +157,14 @@ walk:
150 index = PT_INDEX(addr, walker->level); 157 index = PT_INDEX(addr, walker->level);
151 158
152 table_gfn = gpte_to_gfn(pte); 159 table_gfn = gpte_to_gfn(pte);
153 pte_gpa = gfn_to_gpa(table_gfn); 160 offset = index * sizeof(pt_element_t);
154 pte_gpa += index * sizeof(pt_element_t); 161 pte_gpa = gfn_to_gpa(table_gfn) + offset;
155 walker->table_gfn[walker->level - 1] = table_gfn; 162 walker->table_gfn[walker->level - 1] = table_gfn;
156 walker->pte_gpa[walker->level - 1] = pte_gpa; 163 walker->pte_gpa[walker->level - 1] = pte_gpa;
157 164
158 if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { 165 if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
166 offset, sizeof(pte),
167 PFERR_USER_MASK|PFERR_WRITE_MASK)) {
159 present = false; 168 present = false;
160 break; 169 break;
161 } 170 }
@@ -167,7 +176,7 @@ walk:
167 break; 176 break;
168 } 177 }
169 178
170 if (is_rsvd_bits_set(vcpu, pte, walker->level)) { 179 if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
171 rsvd_fault = true; 180 rsvd_fault = true;
172 break; 181 break;
173 } 182 }
@@ -204,17 +213,28 @@ walk:
204 (PTTYPE == 64 || is_pse(vcpu))) || 213 (PTTYPE == 64 || is_pse(vcpu))) ||
205 ((walker->level == PT_PDPE_LEVEL) && 214 ((walker->level == PT_PDPE_LEVEL) &&
206 is_large_pte(pte) && 215 is_large_pte(pte) &&
207 is_long_mode(vcpu))) { 216 mmu->root_level == PT64_ROOT_LEVEL)) {
208 int lvl = walker->level; 217 int lvl = walker->level;
218 gpa_t real_gpa;
219 gfn_t gfn;
220 u32 ac;
209 221
210 walker->gfn = gpte_to_gfn_lvl(pte, lvl); 222 gfn = gpte_to_gfn_lvl(pte, lvl);
211 walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) 223 gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
212 >> PAGE_SHIFT;
213 224
214 if (PTTYPE == 32 && 225 if (PTTYPE == 32 &&
215 walker->level == PT_DIRECTORY_LEVEL && 226 walker->level == PT_DIRECTORY_LEVEL &&
216 is_cpuid_PSE36()) 227 is_cpuid_PSE36())
217 walker->gfn += pse36_gfn_delta(pte); 228 gfn += pse36_gfn_delta(pte);
229
230 ac = write_fault | fetch_fault | user_fault;
231
232 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
233 ac);
234 if (real_gpa == UNMAPPED_GVA)
235 return 0;
236
237 walker->gfn = real_gpa >> PAGE_SHIFT;
218 238
219 break; 239 break;
220 } 240 }
@@ -249,18 +269,36 @@ error:
249 walker->error_code = 0; 269 walker->error_code = 0;
250 if (present) 270 if (present)
251 walker->error_code |= PFERR_PRESENT_MASK; 271 walker->error_code |= PFERR_PRESENT_MASK;
252 if (write_fault) 272
253 walker->error_code |= PFERR_WRITE_MASK; 273 walker->error_code |= write_fault | user_fault;
254 if (user_fault) 274
255 walker->error_code |= PFERR_USER_MASK; 275 if (fetch_fault && mmu->nx)
256 if (fetch_fault && is_nx(vcpu))
257 walker->error_code |= PFERR_FETCH_MASK; 276 walker->error_code |= PFERR_FETCH_MASK;
258 if (rsvd_fault) 277 if (rsvd_fault)
259 walker->error_code |= PFERR_RSVD_MASK; 278 walker->error_code |= PFERR_RSVD_MASK;
279
280 vcpu->arch.fault.address = addr;
281 vcpu->arch.fault.error_code = walker->error_code;
282
260 trace_kvm_mmu_walker_error(walker->error_code); 283 trace_kvm_mmu_walker_error(walker->error_code);
261 return 0; 284 return 0;
262} 285}
263 286
287static int FNAME(walk_addr)(struct guest_walker *walker,
288 struct kvm_vcpu *vcpu, gva_t addr, u32 access)
289{
290 return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
291 access);
292}
293
294static int FNAME(walk_addr_nested)(struct guest_walker *walker,
295 struct kvm_vcpu *vcpu, gva_t addr,
296 u32 access)
297{
298 return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
299 addr, access);
300}
301
264static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 302static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
265 u64 *spte, const void *pte) 303 u64 *spte, const void *pte)
266{ 304{
@@ -302,14 +340,87 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
302static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, 340static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
303 struct guest_walker *gw, int level) 341 struct guest_walker *gw, int level)
304{ 342{
305 int r;
306 pt_element_t curr_pte; 343 pt_element_t curr_pte;
307 344 gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
308 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], 345 u64 mask;
346 int r, index;
347
348 if (level == PT_PAGE_TABLE_LEVEL) {
349 mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
350 base_gpa = pte_gpa & ~mask;
351 index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
352
353 r = kvm_read_guest_atomic(vcpu->kvm, base_gpa,
354 gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
355 curr_pte = gw->prefetch_ptes[index];
356 } else
357 r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa,
309 &curr_pte, sizeof(curr_pte)); 358 &curr_pte, sizeof(curr_pte));
359
310 return r || curr_pte != gw->ptes[level - 1]; 360 return r || curr_pte != gw->ptes[level - 1];
311} 361}
312 362
363static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
364 u64 *sptep)
365{
366 struct kvm_mmu_page *sp;
367 struct kvm_mmu *mmu = &vcpu->arch.mmu;
368 pt_element_t *gptep = gw->prefetch_ptes;
369 u64 *spte;
370 int i;
371
372 sp = page_header(__pa(sptep));
373
374 if (sp->role.level > PT_PAGE_TABLE_LEVEL)
375 return;
376
377 if (sp->role.direct)
378 return __direct_pte_prefetch(vcpu, sp, sptep);
379
380 i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
381 spte = sp->spt + i;
382
383 for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
384 pt_element_t gpte;
385 unsigned pte_access;
386 gfn_t gfn;
387 pfn_t pfn;
388 bool dirty;
389
390 if (spte == sptep)
391 continue;
392
393 if (*spte != shadow_trap_nonpresent_pte)
394 continue;
395
396 gpte = gptep[i];
397
398 if (!is_present_gpte(gpte) ||
399 is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
400 if (!sp->unsync)
401 __set_spte(spte, shadow_notrap_nonpresent_pte);
402 continue;
403 }
404
405 if (!(gpte & PT_ACCESSED_MASK))
406 continue;
407
408 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
409 gfn = gpte_to_gfn(gpte);
410 dirty = is_dirty_gpte(gpte);
411 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
412 (pte_access & ACC_WRITE_MASK) && dirty);
413 if (is_error_pfn(pfn)) {
414 kvm_release_pfn_clean(pfn);
415 break;
416 }
417
418 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
419 dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn,
420 pfn, true, true);
421 }
422}
423
313/* 424/*
314 * Fetch a shadow pte for a specific level in the paging hierarchy. 425 * Fetch a shadow pte for a specific level in the paging hierarchy.
315 */ 426 */
@@ -391,6 +502,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
391 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, 502 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
392 user_fault, write_fault, dirty, ptwrite, it.level, 503 user_fault, write_fault, dirty, ptwrite, it.level,
393 gw->gfn, pfn, false, true); 504 gw->gfn, pfn, false, true);
505 FNAME(pte_prefetch)(vcpu, gw, it.sptep);
394 506
395 return it.sptep; 507 return it.sptep;
396 508
@@ -420,7 +532,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
420{ 532{
421 int write_fault = error_code & PFERR_WRITE_MASK; 533 int write_fault = error_code & PFERR_WRITE_MASK;
422 int user_fault = error_code & PFERR_USER_MASK; 534 int user_fault = error_code & PFERR_USER_MASK;
423 int fetch_fault = error_code & PFERR_FETCH_MASK;
424 struct guest_walker walker; 535 struct guest_walker walker;
425 u64 *sptep; 536 u64 *sptep;
426 int write_pt = 0; 537 int write_pt = 0;
@@ -430,7 +541,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
430 unsigned long mmu_seq; 541 unsigned long mmu_seq;
431 542
432 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 543 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
433 kvm_mmu_audit(vcpu, "pre page fault");
434 544
435 r = mmu_topup_memory_caches(vcpu); 545 r = mmu_topup_memory_caches(vcpu);
436 if (r) 546 if (r)
@@ -439,15 +549,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
439 /* 549 /*
440 * Look up the guest pte for the faulting address. 550 * Look up the guest pte for the faulting address.
441 */ 551 */
442 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, 552 r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
443 fetch_fault);
444 553
445 /* 554 /*
446 * The page is not mapped by the guest. Let the guest handle it. 555 * The page is not mapped by the guest. Let the guest handle it.
447 */ 556 */
448 if (!r) { 557 if (!r) {
449 pgprintk("%s: guest page fault\n", __func__); 558 pgprintk("%s: guest page fault\n", __func__);
450 inject_page_fault(vcpu, addr, walker.error_code); 559 inject_page_fault(vcpu);
451 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 560 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
452 return 0; 561 return 0;
453 } 562 }
@@ -468,6 +577,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
468 spin_lock(&vcpu->kvm->mmu_lock); 577 spin_lock(&vcpu->kvm->mmu_lock);
469 if (mmu_notifier_retry(vcpu, mmu_seq)) 578 if (mmu_notifier_retry(vcpu, mmu_seq))
470 goto out_unlock; 579 goto out_unlock;
580
581 trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
471 kvm_mmu_free_some_pages(vcpu); 582 kvm_mmu_free_some_pages(vcpu);
472 sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 583 sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
473 level, &write_pt, pfn); 584 level, &write_pt, pfn);
@@ -479,7 +590,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
479 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 590 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
480 591
481 ++vcpu->stat.pf_fixed; 592 ++vcpu->stat.pf_fixed;
482 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 593 trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
483 spin_unlock(&vcpu->kvm->mmu_lock); 594 spin_unlock(&vcpu->kvm->mmu_lock);
484 595
485 return write_pt; 596 return write_pt;
@@ -556,10 +667,25 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
556 gpa_t gpa = UNMAPPED_GVA; 667 gpa_t gpa = UNMAPPED_GVA;
557 int r; 668 int r;
558 669
559 r = FNAME(walk_addr)(&walker, vcpu, vaddr, 670 r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
560 !!(access & PFERR_WRITE_MASK), 671
561 !!(access & PFERR_USER_MASK), 672 if (r) {
562 !!(access & PFERR_FETCH_MASK)); 673 gpa = gfn_to_gpa(walker.gfn);
674 gpa |= vaddr & ~PAGE_MASK;
675 } else if (error)
676 *error = walker.error_code;
677
678 return gpa;
679}
680
681static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
682 u32 access, u32 *error)
683{
684 struct guest_walker walker;
685 gpa_t gpa = UNMAPPED_GVA;
686 int r;
687
688 r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
563 689
564 if (r) { 690 if (r) {
565 gpa = gfn_to_gpa(walker.gfn); 691 gpa = gfn_to_gpa(walker.gfn);
@@ -638,7 +764,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
638 return -EINVAL; 764 return -EINVAL;
639 765
640 gfn = gpte_to_gfn(gpte); 766 gfn = gpte_to_gfn(gpte);
641 if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) 767 if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
642 || gfn != sp->gfns[i] || !is_present_gpte(gpte) 768 || gfn != sp->gfns[i] || !is_present_gpte(gpte)
643 || !(gpte & PT_ACCESSED_MASK)) { 769 || !(gpte & PT_ACCESSED_MASK)) {
644 u64 nonpresent; 770 u64 nonpresent;