aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-01-26 03:56:41 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-01-26 16:50:57 -0500
commit7993ba43db1c07245ada067791f91dbf018095ac (patch)
tree444617c9b709ef38b3eadcc6a61ed03bf25d6de7
parent6f00e68f210c0407dd666743ce61ae543cfd509d (diff)
[PATCH] KVM: MMU: Perform access checks in walk_addr()
Check pte permission bits in walk_addr(), instead of scattering the checks all over the code. This has the following benefits: 1. We no longer set the accessed bit for accessed which fail permission checks. 2. Setting the accessed bit is simplified. 3. Under some circumstances, we used to pretend a page fault was fixed when it would actually fail the access checks. This caused an unnecessary vmexit. 4. The error code for guest page faults is now correct. The fix helps netbsd further along booting, and allows kvm to pass the new mmu testsuite. Signed-off-by: Avi Kivity <avi@qumranet.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/kvm/mmu.c10
-rw-r--r--drivers/kvm/paging_tmpl.h68
2 files changed, 38 insertions, 40 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index c6f972914f08..a05d0609d918 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -992,16 +992,6 @@ static inline int fix_read_pf(u64 *shadow_ent)
992 return 0; 992 return 0;
993} 993}
994 994
995static int may_access(u64 pte, int write, int user)
996{
997
998 if (user && !(pte & PT_USER_MASK))
999 return 0;
1000 if (write && !(pte & PT_WRITABLE_MASK))
1001 return 0;
1002 return 1;
1003}
1004
1005static void paging_free(struct kvm_vcpu *vcpu) 995static void paging_free(struct kvm_vcpu *vcpu)
1006{ 996{
1007 nonpaging_free(vcpu); 997 nonpaging_free(vcpu);
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 6bc41950fbb3..afcd2a8f45bb 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -63,13 +63,15 @@ struct guest_walker {
63 pt_element_t *ptep; 63 pt_element_t *ptep;
64 pt_element_t inherited_ar; 64 pt_element_t inherited_ar;
65 gfn_t gfn; 65 gfn_t gfn;
66 u32 error_code;
66}; 67};
67 68
68/* 69/*
69 * Fetch a guest pte for a guest virtual address 70 * Fetch a guest pte for a guest virtual address
70 */ 71 */
71static void FNAME(walk_addr)(struct guest_walker *walker, 72static int FNAME(walk_addr)(struct guest_walker *walker,
72 struct kvm_vcpu *vcpu, gva_t addr) 73 struct kvm_vcpu *vcpu, gva_t addr,
74 int write_fault, int user_fault)
73{ 75{
74 hpa_t hpa; 76 hpa_t hpa;
75 struct kvm_memory_slot *slot; 77 struct kvm_memory_slot *slot;
@@ -86,7 +88,7 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
86 walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; 88 walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
87 root = *walker->ptep; 89 root = *walker->ptep;
88 if (!(root & PT_PRESENT_MASK)) 90 if (!(root & PT_PRESENT_MASK))
89 return; 91 goto not_present;
90 --walker->level; 92 --walker->level;
91 } 93 }
92#endif 94#endif
@@ -111,11 +113,18 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
111 ASSERT(((unsigned long)walker->table & PAGE_MASK) == 113 ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
112 ((unsigned long)ptep & PAGE_MASK)); 114 ((unsigned long)ptep & PAGE_MASK));
113 115
114 if (is_present_pte(*ptep) && !(*ptep & PT_ACCESSED_MASK))
115 *ptep |= PT_ACCESSED_MASK;
116
117 if (!is_present_pte(*ptep)) 116 if (!is_present_pte(*ptep))
118 break; 117 goto not_present;
118
119 if (write_fault && !is_writeble_pte(*ptep))
120 if (user_fault || is_write_protection(vcpu))
121 goto access_error;
122
123 if (user_fault && !(*ptep & PT_USER_MASK))
124 goto access_error;
125
126 if (!(*ptep & PT_ACCESSED_MASK))
127 *ptep |= PT_ACCESSED_MASK; /* avoid rmw */
119 128
120 if (walker->level == PT_PAGE_TABLE_LEVEL) { 129 if (walker->level == PT_PAGE_TABLE_LEVEL) {
121 walker->gfn = (*ptep & PT_BASE_ADDR_MASK) 130 walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
@@ -146,6 +155,21 @@ static void FNAME(walk_addr)(struct guest_walker *walker,
146 } 155 }
147 walker->ptep = ptep; 156 walker->ptep = ptep;
148 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); 157 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
158 return 1;
159
160not_present:
161 walker->error_code = 0;
162 goto err;
163
164access_error:
165 walker->error_code = PFERR_PRESENT_MASK;
166
167err:
168 if (write_fault)
169 walker->error_code |= PFERR_WRITE_MASK;
170 if (user_fault)
171 walker->error_code |= PFERR_USER_MASK;
172 return 0;
149} 173}
150 174
151static void FNAME(release_walker)(struct guest_walker *walker) 175static void FNAME(release_walker)(struct guest_walker *walker)
@@ -347,7 +371,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
347 u32 error_code) 371 u32 error_code)
348{ 372{
349 int write_fault = error_code & PFERR_WRITE_MASK; 373 int write_fault = error_code & PFERR_WRITE_MASK;
350 int pte_present = error_code & PFERR_PRESENT_MASK;
351 int user_fault = error_code & PFERR_USER_MASK; 374 int user_fault = error_code & PFERR_USER_MASK;
352 struct guest_walker walker; 375 struct guest_walker walker;
353 u64 *shadow_pte; 376 u64 *shadow_pte;
@@ -365,19 +388,19 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
365 /* 388 /*
366 * Look up the shadow pte for the faulting address. 389 * Look up the shadow pte for the faulting address.
367 */ 390 */
368 FNAME(walk_addr)(&walker, vcpu, addr); 391 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault);
369 shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
370 392
371 /* 393 /*
372 * The page is not mapped by the guest. Let the guest handle it. 394 * The page is not mapped by the guest. Let the guest handle it.
373 */ 395 */
374 if (!shadow_pte) { 396 if (!r) {
375 pgprintk("%s: not mapped\n", __FUNCTION__); 397 pgprintk("%s: guest page fault\n", __FUNCTION__);
376 inject_page_fault(vcpu, addr, error_code); 398 inject_page_fault(vcpu, addr, walker.error_code);
377 FNAME(release_walker)(&walker); 399 FNAME(release_walker)(&walker);
378 return 0; 400 return 0;
379 } 401 }
380 402
403 shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
381 pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, 404 pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
382 shadow_pte, *shadow_pte); 405 shadow_pte, *shadow_pte);
383 406
@@ -399,22 +422,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
399 * mmio: emulate if accessible, otherwise its a guest fault. 422 * mmio: emulate if accessible, otherwise its a guest fault.
400 */ 423 */
401 if (is_io_pte(*shadow_pte)) { 424 if (is_io_pte(*shadow_pte)) {
402 if (may_access(*shadow_pte, write_fault, user_fault)) 425 return 1;
403 return 1;
404 pgprintk("%s: io work, no access\n", __FUNCTION__);
405 inject_page_fault(vcpu, addr,
406 error_code | PFERR_PRESENT_MASK);
407 kvm_mmu_audit(vcpu, "post page fault (io)");
408 return 0;
409 }
410
411 /*
412 * pte not present, guest page fault.
413 */
414 if (pte_present && !fixed && !write_pt) {
415 inject_page_fault(vcpu, addr, error_code);
416 kvm_mmu_audit(vcpu, "post page fault (guest)");
417 return 0;
418 } 426 }
419 427
420 ++kvm_stat.pf_fixed; 428 ++kvm_stat.pf_fixed;
@@ -429,7 +437,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
429 pt_element_t guest_pte; 437 pt_element_t guest_pte;
430 gpa_t gpa; 438 gpa_t gpa;
431 439
432 FNAME(walk_addr)(&walker, vcpu, vaddr); 440 FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0);
433 guest_pte = *walker.ptep; 441 guest_pte = *walker.ptep;
434 FNAME(release_walker)(&walker); 442 FNAME(release_walker)(&walker);
435 443