diff options
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 249 |
1 files changed, 168 insertions, 81 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4a814bff21f2..613ec9aa674a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -25,11 +25,11 @@ | |||
25 | #if PTTYPE == 64 | 25 | #if PTTYPE == 64 |
26 | #define pt_element_t u64 | 26 | #define pt_element_t u64 |
27 | #define guest_walker guest_walker64 | 27 | #define guest_walker guest_walker64 |
28 | #define shadow_walker shadow_walker64 | ||
28 | #define FNAME(name) paging##64_##name | 29 | #define FNAME(name) paging##64_##name |
29 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK | 30 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK |
30 | #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK | 31 | #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK |
31 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 32 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
32 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | 33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) |
34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
35 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
@@ -42,11 +42,11 @@ | |||
42 | #elif PTTYPE == 32 | 42 | #elif PTTYPE == 32 |
43 | #define pt_element_t u32 | 43 | #define pt_element_t u32 |
44 | #define guest_walker guest_walker32 | 44 | #define guest_walker guest_walker32 |
45 | #define shadow_walker shadow_walker32 | ||
45 | #define FNAME(name) paging##32_##name | 46 | #define FNAME(name) paging##32_##name |
46 | #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK | 47 | #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK |
47 | #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK | 48 | #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK |
48 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
49 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
50 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | 50 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) |
51 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 51 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
52 | #define PT_MAX_FULL_LEVELS 2 | 52 | #define PT_MAX_FULL_LEVELS 2 |
@@ -73,6 +73,17 @@ struct guest_walker { | |||
73 | u32 error_code; | 73 | u32 error_code; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | struct shadow_walker { | ||
77 | struct kvm_shadow_walk walker; | ||
78 | struct guest_walker *guest_walker; | ||
79 | int user_fault; | ||
80 | int write_fault; | ||
81 | int largepage; | ||
82 | int *ptwrite; | ||
83 | pfn_t pfn; | ||
84 | u64 *sptep; | ||
85 | }; | ||
86 | |||
76 | static gfn_t gpte_to_gfn(pt_element_t gpte) | 87 | static gfn_t gpte_to_gfn(pt_element_t gpte) |
77 | { | 88 | { |
78 | return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; | 89 | return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; |
@@ -91,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, | |||
91 | pt_element_t *table; | 102 | pt_element_t *table; |
92 | struct page *page; | 103 | struct page *page; |
93 | 104 | ||
94 | down_read(¤t->mm->mmap_sem); | ||
95 | page = gfn_to_page(kvm, table_gfn); | 105 | page = gfn_to_page(kvm, table_gfn); |
96 | up_read(¤t->mm->mmap_sem); | ||
97 | 106 | ||
98 | table = kmap_atomic(page, KM_USER0); | 107 | table = kmap_atomic(page, KM_USER0); |
99 | |||
100 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | 108 | ret = CMPXCHG(&table[index], orig_pte, new_pte); |
101 | |||
102 | kunmap_atomic(table, KM_USER0); | 109 | kunmap_atomic(table, KM_USER0); |
103 | 110 | ||
104 | kvm_release_page_dirty(page); | 111 | kvm_release_page_dirty(page); |
@@ -274,86 +281,89 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
274 | /* | 281 | /* |
275 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 282 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
276 | */ | 283 | */ |
277 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 284 | static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, |
278 | struct guest_walker *walker, | 285 | struct kvm_vcpu *vcpu, u64 addr, |
279 | int user_fault, int write_fault, int largepage, | 286 | u64 *sptep, int level) |
280 | int *ptwrite, pfn_t pfn) | ||
281 | { | 287 | { |
282 | hpa_t shadow_addr; | 288 | struct shadow_walker *sw = |
283 | int level; | 289 | container_of(_sw, struct shadow_walker, walker); |
284 | u64 *shadow_ent; | 290 | struct guest_walker *gw = sw->guest_walker; |
285 | unsigned access = walker->pt_access; | 291 | unsigned access = gw->pt_access; |
286 | 292 | struct kvm_mmu_page *shadow_page; | |
287 | if (!is_present_pte(walker->ptes[walker->level - 1])) | 293 | u64 spte; |
288 | return NULL; | 294 | int metaphysical; |
289 | 295 | gfn_t table_gfn; | |
290 | shadow_addr = vcpu->arch.mmu.root_hpa; | 296 | int r; |
291 | level = vcpu->arch.mmu.shadow_root_level; | 297 | pt_element_t curr_pte; |
292 | if (level == PT32E_ROOT_LEVEL) { | 298 | |
293 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | 299 | if (level == PT_PAGE_TABLE_LEVEL |
294 | shadow_addr &= PT64_BASE_ADDR_MASK; | 300 | || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { |
295 | --level; | 301 | mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, |
302 | sw->user_fault, sw->write_fault, | ||
303 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | ||
304 | sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, | ||
305 | false); | ||
306 | sw->sptep = sptep; | ||
307 | return 1; | ||
296 | } | 308 | } |
297 | 309 | ||
298 | for (; ; level--) { | 310 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) |
299 | u32 index = SHADOW_PT_INDEX(addr, level); | 311 | return 0; |
300 | struct kvm_mmu_page *shadow_page; | ||
301 | u64 shadow_pte; | ||
302 | int metaphysical; | ||
303 | gfn_t table_gfn; | ||
304 | |||
305 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | ||
306 | if (level == PT_PAGE_TABLE_LEVEL) | ||
307 | break; | ||
308 | |||
309 | if (largepage && level == PT_DIRECTORY_LEVEL) | ||
310 | break; | ||
311 | 312 | ||
312 | if (is_shadow_present_pte(*shadow_ent) | 313 | if (is_large_pte(*sptep)) { |
313 | && !is_large_pte(*shadow_ent)) { | 314 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); |
314 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 315 | kvm_flush_remote_tlbs(vcpu->kvm); |
315 | continue; | 316 | rmap_remove(vcpu->kvm, sptep); |
316 | } | 317 | } |
317 | 318 | ||
318 | if (is_large_pte(*shadow_ent)) | 319 | if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { |
319 | rmap_remove(vcpu->kvm, shadow_ent); | 320 | metaphysical = 1; |
320 | 321 | if (!is_dirty_pte(gw->ptes[level - 1])) | |
321 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 322 | access &= ~ACC_WRITE_MASK; |
322 | && walker->level == PT_DIRECTORY_LEVEL) { | 323 | table_gfn = gpte_to_gfn(gw->ptes[level - 1]); |
323 | metaphysical = 1; | 324 | } else { |
324 | if (!is_dirty_pte(walker->ptes[level - 1])) | 325 | metaphysical = 0; |
325 | access &= ~ACC_WRITE_MASK; | 326 | table_gfn = gw->table_gfn[level - 2]; |
326 | table_gfn = gpte_to_gfn(walker->ptes[level - 1]); | 327 | } |
327 | } else { | 328 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1, |
328 | metaphysical = 0; | 329 | metaphysical, access, sptep); |
329 | table_gfn = walker->table_gfn[level - 2]; | 330 | if (!metaphysical) { |
330 | } | 331 | r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], |
331 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 332 | &curr_pte, sizeof(curr_pte)); |
332 | metaphysical, access, | 333 | if (r || curr_pte != gw->ptes[level - 2]) { |
333 | shadow_ent); | 334 | kvm_release_pfn_clean(sw->pfn); |
334 | if (!metaphysical) { | 335 | sw->sptep = NULL; |
335 | int r; | 336 | return 1; |
336 | pt_element_t curr_pte; | ||
337 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
338 | walker->pte_gpa[level - 2], | ||
339 | &curr_pte, sizeof(curr_pte)); | ||
340 | if (r || curr_pte != walker->ptes[level - 2]) { | ||
341 | kvm_release_pfn_clean(pfn); | ||
342 | return NULL; | ||
343 | } | ||
344 | } | 337 | } |
345 | shadow_addr = __pa(shadow_page->spt); | ||
346 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | ||
347 | | PT_WRITABLE_MASK | PT_USER_MASK; | ||
348 | set_shadow_pte(shadow_ent, shadow_pte); | ||
349 | } | 338 | } |
350 | 339 | ||
351 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 340 | spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK |
352 | user_fault, write_fault, | 341 | | PT_WRITABLE_MASK | PT_USER_MASK; |
353 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | 342 | *sptep = spte; |
354 | ptwrite, largepage, walker->gfn, pfn, false); | 343 | return 0; |
344 | } | ||
345 | |||
346 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | ||
347 | struct guest_walker *guest_walker, | ||
348 | int user_fault, int write_fault, int largepage, | ||
349 | int *ptwrite, pfn_t pfn) | ||
350 | { | ||
351 | struct shadow_walker walker = { | ||
352 | .walker = { .entry = FNAME(shadow_walk_entry), }, | ||
353 | .guest_walker = guest_walker, | ||
354 | .user_fault = user_fault, | ||
355 | .write_fault = write_fault, | ||
356 | .largepage = largepage, | ||
357 | .ptwrite = ptwrite, | ||
358 | .pfn = pfn, | ||
359 | }; | ||
360 | |||
361 | if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) | ||
362 | return NULL; | ||
363 | |||
364 | walk_shadow(&walker.walker, vcpu, addr); | ||
355 | 365 | ||
356 | return shadow_ent; | 366 | return walker.sptep; |
357 | } | 367 | } |
358 | 368 | ||
359 | /* | 369 | /* |
@@ -407,7 +417,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
407 | return 0; | 417 | return 0; |
408 | } | 418 | } |
409 | 419 | ||
410 | down_read(¤t->mm->mmap_sem); | ||
411 | if (walker.level == PT_DIRECTORY_LEVEL) { | 420 | if (walker.level == PT_DIRECTORY_LEVEL) { |
412 | gfn_t large_gfn; | 421 | gfn_t large_gfn; |
413 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); | 422 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); |
@@ -417,9 +426,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
417 | } | 426 | } |
418 | } | 427 | } |
419 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 428 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
420 | /* implicit mb(), we'll read before PT lock is unlocked */ | 429 | smp_rmb(); |
421 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 430 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
422 | up_read(¤t->mm->mmap_sem); | ||
423 | 431 | ||
424 | /* mmio */ | 432 | /* mmio */ |
425 | if (is_error_pfn(pfn)) { | 433 | if (is_error_pfn(pfn)) { |
@@ -453,6 +461,31 @@ out_unlock: | |||
453 | return 0; | 461 | return 0; |
454 | } | 462 | } |
455 | 463 | ||
464 | static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, | ||
465 | struct kvm_vcpu *vcpu, u64 addr, | ||
466 | u64 *sptep, int level) | ||
467 | { | ||
468 | |||
469 | if (level == PT_PAGE_TABLE_LEVEL) { | ||
470 | if (is_shadow_present_pte(*sptep)) | ||
471 | rmap_remove(vcpu->kvm, sptep); | ||
472 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); | ||
473 | return 1; | ||
474 | } | ||
475 | if (!is_shadow_present_pte(*sptep)) | ||
476 | return 1; | ||
477 | return 0; | ||
478 | } | ||
479 | |||
480 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | ||
481 | { | ||
482 | struct shadow_walker walker = { | ||
483 | .walker = { .entry = FNAME(shadow_invlpg_entry), }, | ||
484 | }; | ||
485 | |||
486 | walk_shadow(&walker.walker, vcpu, gva); | ||
487 | } | ||
488 | |||
456 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 489 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
457 | { | 490 | { |
458 | struct guest_walker walker; | 491 | struct guest_walker walker; |
@@ -499,12 +532,66 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
499 | } | 532 | } |
500 | } | 533 | } |
501 | 534 | ||
535 | /* | ||
536 | * Using the cached information from sp->gfns is safe because: | ||
537 | * - The spte has a reference to the struct page, so the pfn for a given gfn | ||
538 | * can't change unless all sptes pointing to it are nuked first. | ||
539 | * - Alias changes zap the entire shadow cache. | ||
540 | */ | ||
541 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
542 | { | ||
543 | int i, offset, nr_present; | ||
544 | |||
545 | offset = nr_present = 0; | ||
546 | |||
547 | if (PTTYPE == 32) | ||
548 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
549 | |||
550 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | ||
551 | unsigned pte_access; | ||
552 | pt_element_t gpte; | ||
553 | gpa_t pte_gpa; | ||
554 | gfn_t gfn = sp->gfns[i]; | ||
555 | |||
556 | if (!is_shadow_present_pte(sp->spt[i])) | ||
557 | continue; | ||
558 | |||
559 | pte_gpa = gfn_to_gpa(sp->gfn); | ||
560 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
561 | |||
562 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
563 | sizeof(pt_element_t))) | ||
564 | return -EINVAL; | ||
565 | |||
566 | if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) || | ||
567 | !(gpte & PT_ACCESSED_MASK)) { | ||
568 | u64 nonpresent; | ||
569 | |||
570 | rmap_remove(vcpu->kvm, &sp->spt[i]); | ||
571 | if (is_present_pte(gpte)) | ||
572 | nonpresent = shadow_trap_nonpresent_pte; | ||
573 | else | ||
574 | nonpresent = shadow_notrap_nonpresent_pte; | ||
575 | set_shadow_pte(&sp->spt[i], nonpresent); | ||
576 | continue; | ||
577 | } | ||
578 | |||
579 | nr_present++; | ||
580 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | ||
581 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | ||
582 | is_dirty_pte(gpte), 0, gfn, | ||
583 | spte_to_pfn(sp->spt[i]), true, false); | ||
584 | } | ||
585 | |||
586 | return !nr_present; | ||
587 | } | ||
588 | |||
502 | #undef pt_element_t | 589 | #undef pt_element_t |
503 | #undef guest_walker | 590 | #undef guest_walker |
591 | #undef shadow_walker | ||
504 | #undef FNAME | 592 | #undef FNAME |
505 | #undef PT_BASE_ADDR_MASK | 593 | #undef PT_BASE_ADDR_MASK |
506 | #undef PT_INDEX | 594 | #undef PT_INDEX |
507 | #undef SHADOW_PT_INDEX | ||
508 | #undef PT_LEVEL_MASK | 595 | #undef PT_LEVEL_MASK |
509 | #undef PT_DIR_BASE_ADDR_MASK | 596 | #undef PT_DIR_BASE_ADDR_MASK |
510 | #undef PT_LEVEL_BITS | 597 | #undef PT_LEVEL_BITS |