aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/paging_tmpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r--arch/x86/kvm/paging_tmpl.h86
1 files changed, 49 insertions, 37 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ecc0856268c4..156fe10288ae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
130 unsigned index, pt_access, pte_access; 130 unsigned index, pt_access, pte_access;
131 gpa_t pte_gpa; 131 gpa_t pte_gpa;
132 132
133 pgprintk("%s: addr %lx\n", __FUNCTION__, addr); 133 pgprintk("%s: addr %lx\n", __func__, addr);
134walk: 134walk:
135 walker->level = vcpu->arch.mmu.root_level; 135 walker->level = vcpu->arch.mmu.root_level;
136 pte = vcpu->arch.cr3; 136 pte = vcpu->arch.cr3;
@@ -155,7 +155,7 @@ walk:
155 pte_gpa += index * sizeof(pt_element_t); 155 pte_gpa += index * sizeof(pt_element_t);
156 walker->table_gfn[walker->level - 1] = table_gfn; 156 walker->table_gfn[walker->level - 1] = table_gfn;
157 walker->pte_gpa[walker->level - 1] = pte_gpa; 157 walker->pte_gpa[walker->level - 1] = pte_gpa;
158 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, 158 pgprintk("%s: table_gfn[%d] %lx\n", __func__,
159 walker->level - 1, table_gfn); 159 walker->level - 1, table_gfn);
160 160
161 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); 161 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
@@ -222,7 +222,7 @@ walk:
222 walker->pt_access = pt_access; 222 walker->pt_access = pt_access;
223 walker->pte_access = pte_access; 223 walker->pte_access = pte_access;
224 pgprintk("%s: pte %llx pte_access %x pt_access %x\n", 224 pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
225 __FUNCTION__, (u64)pte, pt_access, pte_access); 225 __func__, (u64)pte, pt_access, pte_access);
226 return 1; 226 return 1;
227 227
228not_present: 228not_present:
@@ -243,31 +243,30 @@ err:
243} 243}
244 244
245static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, 245static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
246 u64 *spte, const void *pte, int bytes, 246 u64 *spte, const void *pte)
247 int offset_in_pte)
248{ 247{
249 pt_element_t gpte; 248 pt_element_t gpte;
250 unsigned pte_access; 249 unsigned pte_access;
251 struct page *npage; 250 pfn_t pfn;
251 int largepage = vcpu->arch.update_pte.largepage;
252 252
253 gpte = *(const pt_element_t *)pte; 253 gpte = *(const pt_element_t *)pte;
254 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { 254 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
255 if (!offset_in_pte && !is_present_pte(gpte)) 255 if (!is_present_pte(gpte))
256 set_shadow_pte(spte, shadow_notrap_nonpresent_pte); 256 set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
257 return; 257 return;
258 } 258 }
259 if (bytes < sizeof(pt_element_t)) 259 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
260 return;
261 pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
262 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); 260 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
263 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) 261 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
264 return; 262 return;
265 npage = vcpu->arch.update_pte.page; 263 pfn = vcpu->arch.update_pte.pfn;
266 if (!npage) 264 if (is_error_pfn(pfn))
267 return; 265 return;
268 get_page(npage); 266 kvm_get_pfn(pfn);
269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
270 gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage); 268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
269 pfn, true);
271} 270}
272 271
273/* 272/*
@@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
275 */ 274 */
276static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 275static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
277 struct guest_walker *walker, 276 struct guest_walker *walker,
278 int user_fault, int write_fault, int *ptwrite, 277 int user_fault, int write_fault, int largepage,
279 struct page *page) 278 int *ptwrite, pfn_t pfn)
280{ 279{
281 hpa_t shadow_addr; 280 hpa_t shadow_addr;
282 int level; 281 int level;
@@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
304 shadow_ent = ((u64 *)__va(shadow_addr)) + index; 303 shadow_ent = ((u64 *)__va(shadow_addr)) + index;
305 if (level == PT_PAGE_TABLE_LEVEL) 304 if (level == PT_PAGE_TABLE_LEVEL)
306 break; 305 break;
307 if (is_shadow_present_pte(*shadow_ent)) { 306
307 if (largepage && level == PT_DIRECTORY_LEVEL)
308 break;
309
310 if (is_shadow_present_pte(*shadow_ent)
311 && !is_large_pte(*shadow_ent)) {
308 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; 312 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
309 continue; 313 continue;
310 } 314 }
311 315
316 if (is_large_pte(*shadow_ent))
317 rmap_remove(vcpu->kvm, shadow_ent);
318
312 if (level - 1 == PT_PAGE_TABLE_LEVEL 319 if (level - 1 == PT_PAGE_TABLE_LEVEL
313 && walker->level == PT_DIRECTORY_LEVEL) { 320 && walker->level == PT_DIRECTORY_LEVEL) {
314 metaphysical = 1; 321 metaphysical = 1;
@@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
329 walker->pte_gpa[level - 2], 336 walker->pte_gpa[level - 2],
330 &curr_pte, sizeof(curr_pte)); 337 &curr_pte, sizeof(curr_pte));
331 if (r || curr_pte != walker->ptes[level - 2]) { 338 if (r || curr_pte != walker->ptes[level - 2]) {
332 kvm_release_page_clean(page); 339 kvm_release_pfn_clean(pfn);
333 return NULL; 340 return NULL;
334 } 341 }
335 } 342 }
@@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
342 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, 349 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
343 user_fault, write_fault, 350 user_fault, write_fault,
344 walker->ptes[walker->level-1] & PT_DIRTY_MASK, 351 walker->ptes[walker->level-1] & PT_DIRTY_MASK,
345 ptwrite, walker->gfn, page); 352 ptwrite, largepage, walker->gfn, pfn, false);
346 353
347 return shadow_ent; 354 return shadow_ent;
348} 355}
@@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
371 u64 *shadow_pte; 378 u64 *shadow_pte;
372 int write_pt = 0; 379 int write_pt = 0;
373 int r; 380 int r;
374 struct page *page; 381 pfn_t pfn;
382 int largepage = 0;
375 383
376 pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); 384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
377 kvm_mmu_audit(vcpu, "pre page fault"); 385 kvm_mmu_audit(vcpu, "pre page fault");
378 386
379 r = mmu_topup_memory_caches(vcpu); 387 r = mmu_topup_memory_caches(vcpu);
380 if (r) 388 if (r)
381 return r; 389 return r;
382 390
383 down_read(&vcpu->kvm->slots_lock);
384 /* 391 /*
385 * Look up the shadow pte for the faulting address. 392 * Look up the shadow pte for the faulting address.
386 */ 393 */
@@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
391 * The page is not mapped by the guest. Let the guest handle it. 398 * The page is not mapped by the guest. Let the guest handle it.
392 */ 399 */
393 if (!r) { 400 if (!r) {
394 pgprintk("%s: guest page fault\n", __FUNCTION__); 401 pgprintk("%s: guest page fault\n", __func__);
395 inject_page_fault(vcpu, addr, walker.error_code); 402 inject_page_fault(vcpu, addr, walker.error_code);
396 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 403 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
397 up_read(&vcpu->kvm->slots_lock);
398 return 0; 404 return 0;
399 } 405 }
400 406
401 down_read(&current->mm->mmap_sem); 407 down_read(&current->mm->mmap_sem);
402 page = gfn_to_page(vcpu->kvm, walker.gfn); 408 if (walker.level == PT_DIRECTORY_LEVEL) {
409 gfn_t large_gfn;
410 large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
411 if (is_largepage_backed(vcpu, large_gfn)) {
412 walker.gfn = large_gfn;
413 largepage = 1;
414 }
415 }
416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
403 up_read(&current->mm->mmap_sem); 417 up_read(&current->mm->mmap_sem);
404 418
419 /* mmio */
420 if (is_error_pfn(pfn)) {
421 pgprintk("gfn %x is mmio\n", walker.gfn);
422 kvm_release_pfn_clean(pfn);
423 return 1;
424 }
425
405 spin_lock(&vcpu->kvm->mmu_lock); 426 spin_lock(&vcpu->kvm->mmu_lock);
406 kvm_mmu_free_some_pages(vcpu); 427 kvm_mmu_free_some_pages(vcpu);
407 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
408 &write_pt, page); 429 largepage, &write_pt, pfn);
409 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, 430
431 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
410 shadow_pte, *shadow_pte, write_pt); 432 shadow_pte, *shadow_pte, write_pt);
411 433
412 if (!write_pt) 434 if (!write_pt)
413 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 435 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
414 436
415 /*
416 * mmio: emulate if accessible, otherwise its a guest fault.
417 */
418 if (shadow_pte && is_io_pte(*shadow_pte)) {
419 spin_unlock(&vcpu->kvm->mmu_lock);
420 up_read(&vcpu->kvm->slots_lock);
421 return 1;
422 }
423
424 ++vcpu->stat.pf_fixed; 437 ++vcpu->stat.pf_fixed;
425 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 438 kvm_mmu_audit(vcpu, "post page fault (fixed)");
426 spin_unlock(&vcpu->kvm->mmu_lock); 439 spin_unlock(&vcpu->kvm->mmu_lock);
427 up_read(&vcpu->kvm->slots_lock);
428 440
429 return write_pt; 441 return write_pt;
430} 442}