aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c807
1 files changed, 497 insertions, 310 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b1ed0a1a5913..0dcc95e09876 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -7,6 +7,7 @@
7 * MMU support 7 * MMU support
8 * 8 *
9 * Copyright (C) 2006 Qumranet, Inc. 9 * Copyright (C) 2006 Qumranet, Inc.
10 * Copyright 2010 Red Hat, Inc. and/or its affilates.
10 * 11 *
11 * Authors: 12 * Authors:
12 * Yaniv Kamay <yaniv@qumranet.com> 13 * Yaniv Kamay <yaniv@qumranet.com>
@@ -32,6 +33,7 @@
32#include <linux/compiler.h> 33#include <linux/compiler.h>
33#include <linux/srcu.h> 34#include <linux/srcu.h>
34#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/uaccess.h>
35 37
36#include <asm/page.h> 38#include <asm/page.h>
37#include <asm/cmpxchg.h> 39#include <asm/cmpxchg.h>
@@ -90,8 +92,6 @@ module_param(oos_shadow, bool, 0644);
90#define PT_FIRST_AVAIL_BITS_SHIFT 9 92#define PT_FIRST_AVAIL_BITS_SHIFT 9
91#define PT64_SECOND_AVAIL_BITS_SHIFT 52 93#define PT64_SECOND_AVAIL_BITS_SHIFT 52
92 94
93#define VALID_PAGE(x) ((x) != INVALID_PAGE)
94
95#define PT64_LEVEL_BITS 9 95#define PT64_LEVEL_BITS 9
96 96
97#define PT64_LEVEL_SHIFT(level) \ 97#define PT64_LEVEL_SHIFT(level) \
@@ -173,7 +173,7 @@ struct kvm_shadow_walk_iterator {
173 shadow_walk_okay(&(_walker)); \ 173 shadow_walk_okay(&(_walker)); \
174 shadow_walk_next(&(_walker))) 174 shadow_walk_next(&(_walker)))
175 175
176typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); 176typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
177 177
178static struct kmem_cache *pte_chain_cache; 178static struct kmem_cache *pte_chain_cache;
179static struct kmem_cache *rmap_desc_cache; 179static struct kmem_cache *rmap_desc_cache;
@@ -288,6 +288,35 @@ static void __set_spte(u64 *sptep, u64 spte)
288#endif 288#endif
289} 289}
290 290
291static u64 __xchg_spte(u64 *sptep, u64 new_spte)
292{
293#ifdef CONFIG_X86_64
294 return xchg(sptep, new_spte);
295#else
296 u64 old_spte;
297
298 do {
299 old_spte = *sptep;
300 } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte);
301
302 return old_spte;
303#endif
304}
305
306static void update_spte(u64 *sptep, u64 new_spte)
307{
308 u64 old_spte;
309
310 if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) ||
311 !is_rmap_spte(*sptep))
312 __set_spte(sptep, new_spte);
313 else {
314 old_spte = __xchg_spte(sptep, new_spte);
315 if (old_spte & shadow_accessed_mask)
316 mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte)));
317 }
318}
319
291static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 320static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
292 struct kmem_cache *base_cache, int min) 321 struct kmem_cache *base_cache, int min)
293{ 322{
@@ -304,10 +333,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
304 return 0; 333 return 0;
305} 334}
306 335
307static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) 336static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
337 struct kmem_cache *cache)
308{ 338{
309 while (mc->nobjs) 339 while (mc->nobjs)
310 kfree(mc->objects[--mc->nobjs]); 340 kmem_cache_free(cache, mc->objects[--mc->nobjs]);
311} 341}
312 342
313static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, 343static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
@@ -355,10 +385,11 @@ out:
355 385
356static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) 386static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
357{ 387{
358 mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache); 388 mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache);
359 mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache); 389 mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache);
360 mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); 390 mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
361 mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); 391 mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
392 mmu_page_header_cache);
362} 393}
363 394
364static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, 395static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
@@ -379,7 +410,7 @@ static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
379 410
380static void mmu_free_pte_chain(struct kvm_pte_chain *pc) 411static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
381{ 412{
382 kfree(pc); 413 kmem_cache_free(pte_chain_cache, pc);
383} 414}
384 415
385static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) 416static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
@@ -390,7 +421,23 @@ static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
390 421
391static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) 422static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
392{ 423{
393 kfree(rd); 424 kmem_cache_free(rmap_desc_cache, rd);
425}
426
427static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
428{
429 if (!sp->role.direct)
430 return sp->gfns[index];
431
432 return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS));
433}
434
435static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
436{
437 if (sp->role.direct)
438 BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
439 else
440 sp->gfns[index] = gfn;
394} 441}
395 442
396/* 443/*
@@ -403,8 +450,8 @@ static int *slot_largepage_idx(gfn_t gfn,
403{ 450{
404 unsigned long idx; 451 unsigned long idx;
405 452
406 idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - 453 idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
407 (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); 454 (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
408 return &slot->lpage_info[level - 2][idx].write_count; 455 return &slot->lpage_info[level - 2][idx].write_count;
409} 456}
410 457
@@ -414,9 +461,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
414 int *write_count; 461 int *write_count;
415 int i; 462 int i;
416 463
417 gfn = unalias_gfn(kvm, gfn); 464 slot = gfn_to_memslot(kvm, gfn);
418
419 slot = gfn_to_memslot_unaliased(kvm, gfn);
420 for (i = PT_DIRECTORY_LEVEL; 465 for (i = PT_DIRECTORY_LEVEL;
421 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 466 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
422 write_count = slot_largepage_idx(gfn, slot, i); 467 write_count = slot_largepage_idx(gfn, slot, i);
@@ -430,8 +475,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
430 int *write_count; 475 int *write_count;
431 int i; 476 int i;
432 477
433 gfn = unalias_gfn(kvm, gfn); 478 slot = gfn_to_memslot(kvm, gfn);
434 slot = gfn_to_memslot_unaliased(kvm, gfn);
435 for (i = PT_DIRECTORY_LEVEL; 479 for (i = PT_DIRECTORY_LEVEL;
436 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 480 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
437 write_count = slot_largepage_idx(gfn, slot, i); 481 write_count = slot_largepage_idx(gfn, slot, i);
@@ -447,8 +491,7 @@ static int has_wrprotected_page(struct kvm *kvm,
447 struct kvm_memory_slot *slot; 491 struct kvm_memory_slot *slot;
448 int *largepage_idx; 492 int *largepage_idx;
449 493
450 gfn = unalias_gfn(kvm, gfn); 494 slot = gfn_to_memslot(kvm, gfn);
451 slot = gfn_to_memslot_unaliased(kvm, gfn);
452 if (slot) { 495 if (slot) {
453 largepage_idx = slot_largepage_idx(gfn, slot, level); 496 largepage_idx = slot_largepage_idx(gfn, slot, level);
454 return *largepage_idx; 497 return *largepage_idx;
@@ -501,7 +544,6 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
501 544
502/* 545/*
503 * Take gfn and return the reverse mapping to it. 546 * Take gfn and return the reverse mapping to it.
504 * Note: gfn must be unaliased before this function get called
505 */ 547 */
506 548
507static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) 549static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
@@ -513,8 +555,8 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
513 if (likely(level == PT_PAGE_TABLE_LEVEL)) 555 if (likely(level == PT_PAGE_TABLE_LEVEL))
514 return &slot->rmap[gfn - slot->base_gfn]; 556 return &slot->rmap[gfn - slot->base_gfn];
515 557
516 idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - 558 idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
517 (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); 559 (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
518 560
519 return &slot->lpage_info[level - 2][idx].rmap_pde; 561 return &slot->lpage_info[level - 2][idx].rmap_pde;
520} 562}
@@ -541,9 +583,8 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
541 583
542 if (!is_rmap_spte(*spte)) 584 if (!is_rmap_spte(*spte))
543 return count; 585 return count;
544 gfn = unalias_gfn(vcpu->kvm, gfn);
545 sp = page_header(__pa(spte)); 586 sp = page_header(__pa(spte));
546 sp->gfns[spte - sp->spt] = gfn; 587 kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
547 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 588 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
548 if (!*rmapp) { 589 if (!*rmapp) {
549 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); 590 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
@@ -600,19 +641,13 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
600 struct kvm_rmap_desc *desc; 641 struct kvm_rmap_desc *desc;
601 struct kvm_rmap_desc *prev_desc; 642 struct kvm_rmap_desc *prev_desc;
602 struct kvm_mmu_page *sp; 643 struct kvm_mmu_page *sp;
603 pfn_t pfn; 644 gfn_t gfn;
604 unsigned long *rmapp; 645 unsigned long *rmapp;
605 int i; 646 int i;
606 647
607 if (!is_rmap_spte(*spte))
608 return;
609 sp = page_header(__pa(spte)); 648 sp = page_header(__pa(spte));
610 pfn = spte_to_pfn(*spte); 649 gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
611 if (*spte & shadow_accessed_mask) 650 rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
612 kvm_set_pfn_accessed(pfn);
613 if (is_writable_pte(*spte))
614 kvm_set_pfn_dirty(pfn);
615 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
616 if (!*rmapp) { 651 if (!*rmapp) {
617 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 652 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
618 BUG(); 653 BUG();
@@ -644,6 +679,32 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
644 } 679 }
645} 680}
646 681
682static void set_spte_track_bits(u64 *sptep, u64 new_spte)
683{
684 pfn_t pfn;
685 u64 old_spte = *sptep;
686
687 if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) ||
688 old_spte & shadow_accessed_mask) {
689 __set_spte(sptep, new_spte);
690 } else
691 old_spte = __xchg_spte(sptep, new_spte);
692
693 if (!is_rmap_spte(old_spte))
694 return;
695 pfn = spte_to_pfn(old_spte);
696 if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
697 kvm_set_pfn_accessed(pfn);
698 if (is_writable_pte(old_spte))
699 kvm_set_pfn_dirty(pfn);
700}
701
702static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
703{
704 set_spte_track_bits(sptep, new_spte);
705 rmap_remove(kvm, sptep);
706}
707
647static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) 708static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
648{ 709{
649 struct kvm_rmap_desc *desc; 710 struct kvm_rmap_desc *desc;
@@ -676,7 +737,6 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
676 u64 *spte; 737 u64 *spte;
677 int i, write_protected = 0; 738 int i, write_protected = 0;
678 739
679 gfn = unalias_gfn(kvm, gfn);
680 rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); 740 rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL);
681 741
682 spte = rmap_next(kvm, rmapp, NULL); 742 spte = rmap_next(kvm, rmapp, NULL);
@@ -685,7 +745,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
685 BUG_ON(!(*spte & PT_PRESENT_MASK)); 745 BUG_ON(!(*spte & PT_PRESENT_MASK));
686 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 746 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
687 if (is_writable_pte(*spte)) { 747 if (is_writable_pte(*spte)) {
688 __set_spte(spte, *spte & ~PT_WRITABLE_MASK); 748 update_spte(spte, *spte & ~PT_WRITABLE_MASK);
689 write_protected = 1; 749 write_protected = 1;
690 } 750 }
691 spte = rmap_next(kvm, rmapp, spte); 751 spte = rmap_next(kvm, rmapp, spte);
@@ -709,9 +769,9 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
709 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); 769 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
710 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); 770 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
711 if (is_writable_pte(*spte)) { 771 if (is_writable_pte(*spte)) {
712 rmap_remove(kvm, spte); 772 drop_spte(kvm, spte,
773 shadow_trap_nonpresent_pte);
713 --kvm->stat.lpages; 774 --kvm->stat.lpages;
714 __set_spte(spte, shadow_trap_nonpresent_pte);
715 spte = NULL; 775 spte = NULL;
716 write_protected = 1; 776 write_protected = 1;
717 } 777 }
@@ -731,8 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
731 while ((spte = rmap_next(kvm, rmapp, NULL))) { 791 while ((spte = rmap_next(kvm, rmapp, NULL))) {
732 BUG_ON(!(*spte & PT_PRESENT_MASK)); 792 BUG_ON(!(*spte & PT_PRESENT_MASK));
733 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); 793 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
734 rmap_remove(kvm, spte); 794 drop_spte(kvm, spte, shadow_trap_nonpresent_pte);
735 __set_spte(spte, shadow_trap_nonpresent_pte);
736 need_tlb_flush = 1; 795 need_tlb_flush = 1;
737 } 796 }
738 return need_tlb_flush; 797 return need_tlb_flush;
@@ -754,8 +813,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
754 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); 813 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
755 need_flush = 1; 814 need_flush = 1;
756 if (pte_write(*ptep)) { 815 if (pte_write(*ptep)) {
757 rmap_remove(kvm, spte); 816 drop_spte(kvm, spte, shadow_trap_nonpresent_pte);
758 __set_spte(spte, shadow_trap_nonpresent_pte);
759 spte = rmap_next(kvm, rmapp, NULL); 817 spte = rmap_next(kvm, rmapp, NULL);
760 } else { 818 } else {
761 new_spte = *spte &~ (PT64_BASE_ADDR_MASK); 819 new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
@@ -763,9 +821,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
763 821
764 new_spte &= ~PT_WRITABLE_MASK; 822 new_spte &= ~PT_WRITABLE_MASK;
765 new_spte &= ~SPTE_HOST_WRITEABLE; 823 new_spte &= ~SPTE_HOST_WRITEABLE;
766 if (is_writable_pte(*spte)) 824 new_spte &= ~shadow_accessed_mask;
767 kvm_set_pfn_dirty(spte_to_pfn(*spte)); 825 set_spte_track_bits(spte, new_spte);
768 __set_spte(spte, new_spte);
769 spte = rmap_next(kvm, rmapp, spte); 826 spte = rmap_next(kvm, rmapp, spte);
770 } 827 }
771 } 828 }
@@ -799,8 +856,12 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
799 ret = handler(kvm, &memslot->rmap[gfn_offset], data); 856 ret = handler(kvm, &memslot->rmap[gfn_offset], data);
800 857
801 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 858 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
802 int idx = gfn_offset; 859 unsigned long idx;
803 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 860 int sh;
861
862 sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j);
863 idx = ((memslot->base_gfn+gfn_offset) >> sh) -
864 (memslot->base_gfn >> sh);
804 ret |= handler(kvm, 865 ret |= handler(kvm,
805 &memslot->lpage_info[j][idx].rmap_pde, 866 &memslot->lpage_info[j][idx].rmap_pde,
806 data); 867 data);
@@ -863,7 +924,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
863 924
864 sp = page_header(__pa(spte)); 925 sp = page_header(__pa(spte));
865 926
866 gfn = unalias_gfn(vcpu->kvm, gfn);
867 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 927 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
868 928
869 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); 929 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
@@ -894,10 +954,12 @@ static int is_empty_shadow_page(u64 *spt)
894static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) 954static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
895{ 955{
896 ASSERT(is_empty_shadow_page(sp->spt)); 956 ASSERT(is_empty_shadow_page(sp->spt));
957 hlist_del(&sp->hash_link);
897 list_del(&sp->link); 958 list_del(&sp->link);
898 __free_page(virt_to_page(sp->spt)); 959 __free_page(virt_to_page(sp->spt));
899 __free_page(virt_to_page(sp->gfns)); 960 if (!sp->role.direct)
900 kfree(sp); 961 __free_page(virt_to_page(sp->gfns));
962 kmem_cache_free(mmu_page_header_cache, sp);
901 ++kvm->arch.n_free_mmu_pages; 963 ++kvm->arch.n_free_mmu_pages;
902} 964}
903 965
@@ -907,13 +969,15 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
907} 969}
908 970
909static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 971static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
910 u64 *parent_pte) 972 u64 *parent_pte, int direct)
911{ 973{
912 struct kvm_mmu_page *sp; 974 struct kvm_mmu_page *sp;
913 975
914 sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); 976 sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
915 sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 977 sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
916 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 978 if (!direct)
979 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache,
980 PAGE_SIZE);
917 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 981 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
918 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 982 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
919 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 983 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
@@ -998,7 +1062,6 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
998 BUG(); 1062 BUG();
999} 1063}
1000 1064
1001
1002static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) 1065static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
1003{ 1066{
1004 struct kvm_pte_chain *pte_chain; 1067 struct kvm_pte_chain *pte_chain;
@@ -1008,63 +1071,37 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
1008 1071
1009 if (!sp->multimapped && sp->parent_pte) { 1072 if (!sp->multimapped && sp->parent_pte) {
1010 parent_sp = page_header(__pa(sp->parent_pte)); 1073 parent_sp = page_header(__pa(sp->parent_pte));
1011 fn(parent_sp); 1074 fn(parent_sp, sp->parent_pte);
1012 mmu_parent_walk(parent_sp, fn);
1013 return; 1075 return;
1014 } 1076 }
1077
1015 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) 1078 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
1016 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { 1079 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
1017 if (!pte_chain->parent_ptes[i]) 1080 u64 *spte = pte_chain->parent_ptes[i];
1081
1082 if (!spte)
1018 break; 1083 break;
1019 parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); 1084 parent_sp = page_header(__pa(spte));
1020 fn(parent_sp); 1085 fn(parent_sp, spte);
1021 mmu_parent_walk(parent_sp, fn);
1022 } 1086 }
1023} 1087}
1024 1088
1025static void kvm_mmu_update_unsync_bitmap(u64 *spte) 1089static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte);
1090static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
1026{ 1091{
1027 unsigned int index; 1092 mmu_parent_walk(sp, mark_unsync);
1028 struct kvm_mmu_page *sp = page_header(__pa(spte));
1029
1030 index = spte - sp->spt;
1031 if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
1032 sp->unsync_children++;
1033 WARN_ON(!sp->unsync_children);
1034} 1093}
1035 1094
1036static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) 1095static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte)
1037{ 1096{
1038 struct kvm_pte_chain *pte_chain; 1097 unsigned int index;
1039 struct hlist_node *node;
1040 int i;
1041 1098
1042 if (!sp->parent_pte) 1099 index = spte - sp->spt;
1100 if (__test_and_set_bit(index, sp->unsync_child_bitmap))
1043 return; 1101 return;
1044 1102 if (sp->unsync_children++)
1045 if (!sp->multimapped) {
1046 kvm_mmu_update_unsync_bitmap(sp->parent_pte);
1047 return; 1103 return;
1048 } 1104 kvm_mmu_mark_parents_unsync(sp);
1049
1050 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
1051 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
1052 if (!pte_chain->parent_ptes[i])
1053 break;
1054 kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]);
1055 }
1056}
1057
1058static int unsync_walk_fn(struct kvm_mmu_page *sp)
1059{
1060 kvm_mmu_update_parents_unsync(sp);
1061 return 1;
1062}
1063
1064static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
1065{
1066 mmu_parent_walk(sp, unsync_walk_fn);
1067 kvm_mmu_update_parents_unsync(sp);
1068} 1105}
1069 1106
1070static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, 1107static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
@@ -1077,7 +1114,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
1077} 1114}
1078 1115
1079static int nonpaging_sync_page(struct kvm_vcpu *vcpu, 1116static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
1080 struct kvm_mmu_page *sp) 1117 struct kvm_mmu_page *sp, bool clear_unsync)
1081{ 1118{
1082 return 1; 1119 return 1;
1083} 1120}
@@ -1123,35 +1160,40 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
1123 int i, ret, nr_unsync_leaf = 0; 1160 int i, ret, nr_unsync_leaf = 0;
1124 1161
1125 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1162 for_each_unsync_children(sp->unsync_child_bitmap, i) {
1163 struct kvm_mmu_page *child;
1126 u64 ent = sp->spt[i]; 1164 u64 ent = sp->spt[i];
1127 1165
1128 if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { 1166 if (!is_shadow_present_pte(ent) || is_large_pte(ent))
1129 struct kvm_mmu_page *child; 1167 goto clear_child_bitmap;
1130 child = page_header(ent & PT64_BASE_ADDR_MASK); 1168
1131 1169 child = page_header(ent & PT64_BASE_ADDR_MASK);
1132 if (child->unsync_children) { 1170
1133 if (mmu_pages_add(pvec, child, i)) 1171 if (child->unsync_children) {
1134 return -ENOSPC; 1172 if (mmu_pages_add(pvec, child, i))
1135 1173 return -ENOSPC;
1136 ret = __mmu_unsync_walk(child, pvec); 1174
1137 if (!ret) 1175 ret = __mmu_unsync_walk(child, pvec);
1138 __clear_bit(i, sp->unsync_child_bitmap); 1176 if (!ret)
1139 else if (ret > 0) 1177 goto clear_child_bitmap;
1140 nr_unsync_leaf += ret; 1178 else if (ret > 0)
1141 else 1179 nr_unsync_leaf += ret;
1142 return ret; 1180 else
1143 } 1181 return ret;
1182 } else if (child->unsync) {
1183 nr_unsync_leaf++;
1184 if (mmu_pages_add(pvec, child, i))
1185 return -ENOSPC;
1186 } else
1187 goto clear_child_bitmap;
1144 1188
1145 if (child->unsync) { 1189 continue;
1146 nr_unsync_leaf++; 1190
1147 if (mmu_pages_add(pvec, child, i)) 1191clear_child_bitmap:
1148 return -ENOSPC; 1192 __clear_bit(i, sp->unsync_child_bitmap);
1149 } 1193 sp->unsync_children--;
1150 } 1194 WARN_ON((int)sp->unsync_children < 0);
1151 } 1195 }
1152 1196
1153 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
1154 sp->unsync_children = 0;
1155 1197
1156 return nr_unsync_leaf; 1198 return nr_unsync_leaf;
1157} 1199}
@@ -1166,26 +1208,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
1166 return __mmu_unsync_walk(sp, pvec); 1208 return __mmu_unsync_walk(sp, pvec);
1167} 1209}
1168 1210
1169static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1170{
1171 unsigned index;
1172 struct hlist_head *bucket;
1173 struct kvm_mmu_page *sp;
1174 struct hlist_node *node;
1175
1176 pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
1177 index = kvm_page_table_hashfn(gfn);
1178 bucket = &kvm->arch.mmu_page_hash[index];
1179 hlist_for_each_entry(sp, node, bucket, hash_link)
1180 if (sp->gfn == gfn && !sp->role.direct
1181 && !sp->role.invalid) {
1182 pgprintk("%s: found role %x\n",
1183 __func__, sp->role.word);
1184 return sp;
1185 }
1186 return NULL;
1187}
1188
1189static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1211static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1190{ 1212{
1191 WARN_ON(!sp->unsync); 1213 WARN_ON(!sp->unsync);
@@ -1194,20 +1216,36 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1194 --kvm->stat.mmu_unsync; 1216 --kvm->stat.mmu_unsync;
1195} 1217}
1196 1218
1197static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); 1219static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1220 struct list_head *invalid_list);
1221static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1222 struct list_head *invalid_list);
1223
1224#define for_each_gfn_sp(kvm, sp, gfn, pos) \
1225 hlist_for_each_entry(sp, pos, \
1226 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
1227 if ((sp)->gfn != (gfn)) {} else
1228
1229#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \
1230 hlist_for_each_entry(sp, pos, \
1231 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
1232 if ((sp)->gfn != (gfn) || (sp)->role.direct || \
1233 (sp)->role.invalid) {} else
1198 1234
1199static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1235/* @sp->gfn should be write-protected at the call site */
1236static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1237 struct list_head *invalid_list, bool clear_unsync)
1200{ 1238{
1201 if (sp->role.cr4_pae != !!is_pae(vcpu)) { 1239 if (sp->role.cr4_pae != !!is_pae(vcpu)) {
1202 kvm_mmu_zap_page(vcpu->kvm, sp); 1240 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
1203 return 1; 1241 return 1;
1204 } 1242 }
1205 1243
1206 if (rmap_write_protect(vcpu->kvm, sp->gfn)) 1244 if (clear_unsync)
1207 kvm_flush_remote_tlbs(vcpu->kvm); 1245 kvm_unlink_unsync_page(vcpu->kvm, sp);
1208 kvm_unlink_unsync_page(vcpu->kvm, sp); 1246
1209 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1247 if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) {
1210 kvm_mmu_zap_page(vcpu->kvm, sp); 1248 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
1211 return 1; 1249 return 1;
1212 } 1250 }
1213 1251
@@ -1215,6 +1253,52 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1215 return 0; 1253 return 0;
1216} 1254}
1217 1255
1256static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
1257 struct kvm_mmu_page *sp)
1258{
1259 LIST_HEAD(invalid_list);
1260 int ret;
1261
1262 ret = __kvm_sync_page(vcpu, sp, &invalid_list, false);
1263 if (ret)
1264 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
1265
1266 return ret;
1267}
1268
1269static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1270 struct list_head *invalid_list)
1271{
1272 return __kvm_sync_page(vcpu, sp, invalid_list, true);
1273}
1274
1275/* @gfn should be write-protected at the call site */
1276static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
1277{
1278 struct kvm_mmu_page *s;
1279 struct hlist_node *node;
1280 LIST_HEAD(invalid_list);
1281 bool flush = false;
1282
1283 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
1284 if (!s->unsync)
1285 continue;
1286
1287 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
1288 if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
1289 (vcpu->arch.mmu.sync_page(vcpu, s, true))) {
1290 kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
1291 continue;
1292 }
1293 kvm_unlink_unsync_page(vcpu->kvm, s);
1294 flush = true;
1295 }
1296
1297 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
1298 if (flush)
1299 kvm_mmu_flush_tlb(vcpu);
1300}
1301
1218struct mmu_page_path { 1302struct mmu_page_path {
1219 struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; 1303 struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
1220 unsigned int idx[PT64_ROOT_LEVEL-1]; 1304 unsigned int idx[PT64_ROOT_LEVEL-1];
@@ -1281,6 +1365,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
1281 struct kvm_mmu_page *sp; 1365 struct kvm_mmu_page *sp;
1282 struct mmu_page_path parents; 1366 struct mmu_page_path parents;
1283 struct kvm_mmu_pages pages; 1367 struct kvm_mmu_pages pages;
1368 LIST_HEAD(invalid_list);
1284 1369
1285 kvm_mmu_pages_init(parent, &parents, &pages); 1370 kvm_mmu_pages_init(parent, &parents, &pages);
1286 while (mmu_unsync_walk(parent, &pages)) { 1371 while (mmu_unsync_walk(parent, &pages)) {
@@ -1293,9 +1378,10 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
1293 kvm_flush_remote_tlbs(vcpu->kvm); 1378 kvm_flush_remote_tlbs(vcpu->kvm);
1294 1379
1295 for_each_sp(pages, sp, parents, i) { 1380 for_each_sp(pages, sp, parents, i) {
1296 kvm_sync_page(vcpu, sp); 1381 kvm_sync_page(vcpu, sp, &invalid_list);
1297 mmu_pages_clear_parents(&parents); 1382 mmu_pages_clear_parents(&parents);
1298 } 1383 }
1384 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
1299 cond_resched_lock(&vcpu->kvm->mmu_lock); 1385 cond_resched_lock(&vcpu->kvm->mmu_lock);
1300 kvm_mmu_pages_init(parent, &parents, &pages); 1386 kvm_mmu_pages_init(parent, &parents, &pages);
1301 } 1387 }
@@ -1310,11 +1396,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1310 u64 *parent_pte) 1396 u64 *parent_pte)
1311{ 1397{
1312 union kvm_mmu_page_role role; 1398 union kvm_mmu_page_role role;
1313 unsigned index;
1314 unsigned quadrant; 1399 unsigned quadrant;
1315 struct hlist_head *bucket;
1316 struct kvm_mmu_page *sp; 1400 struct kvm_mmu_page *sp;
1317 struct hlist_node *node, *tmp; 1401 struct hlist_node *node;
1402 bool need_sync = false;
1318 1403
1319 role = vcpu->arch.mmu.base_role; 1404 role = vcpu->arch.mmu.base_role;
1320 role.level = level; 1405 role.level = level;
@@ -1322,40 +1407,45 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1322 if (role.direct) 1407 if (role.direct)
1323 role.cr4_pae = 0; 1408 role.cr4_pae = 0;
1324 role.access = access; 1409 role.access = access;
1325 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1410 if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
1326 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 1411 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
1327 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 1412 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
1328 role.quadrant = quadrant; 1413 role.quadrant = quadrant;
1329 } 1414 }
1330 index = kvm_page_table_hashfn(gfn); 1415 for_each_gfn_sp(vcpu->kvm, sp, gfn, node) {
1331 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1416 if (!need_sync && sp->unsync)
1332 hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) 1417 need_sync = true;
1333 if (sp->gfn == gfn) {
1334 if (sp->unsync)
1335 if (kvm_sync_page(vcpu, sp))
1336 continue;
1337 1418
1338 if (sp->role.word != role.word) 1419 if (sp->role.word != role.word)
1339 continue; 1420 continue;
1340 1421
1341 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1422 if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
1342 if (sp->unsync_children) { 1423 break;
1343 set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); 1424
1344 kvm_mmu_mark_parents_unsync(sp); 1425 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
1345 } 1426 if (sp->unsync_children) {
1346 trace_kvm_mmu_get_page(sp, false); 1427 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
1347 return sp; 1428 kvm_mmu_mark_parents_unsync(sp);
1348 } 1429 } else if (sp->unsync)
1430 kvm_mmu_mark_parents_unsync(sp);
1431
1432 trace_kvm_mmu_get_page(sp, false);
1433 return sp;
1434 }
1349 ++vcpu->kvm->stat.mmu_cache_miss; 1435 ++vcpu->kvm->stat.mmu_cache_miss;
1350 sp = kvm_mmu_alloc_page(vcpu, parent_pte); 1436 sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
1351 if (!sp) 1437 if (!sp)
1352 return sp; 1438 return sp;
1353 sp->gfn = gfn; 1439 sp->gfn = gfn;
1354 sp->role = role; 1440 sp->role = role;
1355 hlist_add_head(&sp->hash_link, bucket); 1441 hlist_add_head(&sp->hash_link,
1442 &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
1356 if (!direct) { 1443 if (!direct) {
1357 if (rmap_write_protect(vcpu->kvm, gfn)) 1444 if (rmap_write_protect(vcpu->kvm, gfn))
1358 kvm_flush_remote_tlbs(vcpu->kvm); 1445 kvm_flush_remote_tlbs(vcpu->kvm);
1446 if (level > PT_PAGE_TABLE_LEVEL && need_sync)
1447 kvm_sync_pages(vcpu, gfn);
1448
1359 account_shadowed(vcpu->kvm, gfn); 1449 account_shadowed(vcpu->kvm, gfn);
1360 } 1450 }
1361 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) 1451 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1402,6 +1492,47 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
1402 --iterator->level; 1492 --iterator->level;
1403} 1493}
1404 1494
1495static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
1496{
1497 u64 spte;
1498
1499 spte = __pa(sp->spt)
1500 | PT_PRESENT_MASK | PT_ACCESSED_MASK
1501 | PT_WRITABLE_MASK | PT_USER_MASK;
1502 __set_spte(sptep, spte);
1503}
1504
1505static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1506{
1507 if (is_large_pte(*sptep)) {
1508 drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
1509 kvm_flush_remote_tlbs(vcpu->kvm);
1510 }
1511}
1512
1513static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1514 unsigned direct_access)
1515{
1516 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
1517 struct kvm_mmu_page *child;
1518
1519 /*
1520 * For the direct sp, if the guest pte's dirty bit
1521 * changed form clean to dirty, it will corrupt the
1522 * sp's access: allow writable in the read-only sp,
1523 * so we should update the spte at this point to get
1524 * a new sp with the correct access.
1525 */
1526 child = page_header(*sptep & PT64_BASE_ADDR_MASK);
1527 if (child->role.access == direct_access)
1528 return;
1529
1530 mmu_page_remove_parent_pte(child, sptep);
1531 __set_spte(sptep, shadow_trap_nonpresent_pte);
1532 kvm_flush_remote_tlbs(vcpu->kvm);
1533 }
1534}
1535
1405static void kvm_mmu_page_unlink_children(struct kvm *kvm, 1536static void kvm_mmu_page_unlink_children(struct kvm *kvm,
1406 struct kvm_mmu_page *sp) 1537 struct kvm_mmu_page *sp)
1407{ 1538{
@@ -1422,7 +1553,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
1422 } else { 1553 } else {
1423 if (is_large_pte(ent)) 1554 if (is_large_pte(ent))
1424 --kvm->stat.lpages; 1555 --kvm->stat.lpages;
1425 rmap_remove(kvm, &pt[i]); 1556 drop_spte(kvm, &pt[i],
1557 shadow_trap_nonpresent_pte);
1426 } 1558 }
1427 } 1559 }
1428 pt[i] = shadow_trap_nonpresent_pte; 1560 pt[i] = shadow_trap_nonpresent_pte;
@@ -1464,7 +1596,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1464} 1596}
1465 1597
1466static int mmu_zap_unsync_children(struct kvm *kvm, 1598static int mmu_zap_unsync_children(struct kvm *kvm,
1467 struct kvm_mmu_page *parent) 1599 struct kvm_mmu_page *parent,
1600 struct list_head *invalid_list)
1468{ 1601{
1469 int i, zapped = 0; 1602 int i, zapped = 0;
1470 struct mmu_page_path parents; 1603 struct mmu_page_path parents;
@@ -1478,7 +1611,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
1478 struct kvm_mmu_page *sp; 1611 struct kvm_mmu_page *sp;
1479 1612
1480 for_each_sp(pages, sp, parents, i) { 1613 for_each_sp(pages, sp, parents, i) {
1481 kvm_mmu_zap_page(kvm, sp); 1614 kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
1482 mmu_pages_clear_parents(&parents); 1615 mmu_pages_clear_parents(&parents);
1483 zapped++; 1616 zapped++;
1484 } 1617 }
@@ -1488,32 +1621,52 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
1488 return zapped; 1621 return zapped;
1489} 1622}
1490 1623
1491static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1624static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1625 struct list_head *invalid_list)
1492{ 1626{
1493 int ret; 1627 int ret;
1494 1628
1495 trace_kvm_mmu_zap_page(sp); 1629 trace_kvm_mmu_prepare_zap_page(sp);
1496 ++kvm->stat.mmu_shadow_zapped; 1630 ++kvm->stat.mmu_shadow_zapped;
1497 ret = mmu_zap_unsync_children(kvm, sp); 1631 ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
1498 kvm_mmu_page_unlink_children(kvm, sp); 1632 kvm_mmu_page_unlink_children(kvm, sp);
1499 kvm_mmu_unlink_parents(kvm, sp); 1633 kvm_mmu_unlink_parents(kvm, sp);
1500 kvm_flush_remote_tlbs(kvm);
1501 if (!sp->role.invalid && !sp->role.direct) 1634 if (!sp->role.invalid && !sp->role.direct)
1502 unaccount_shadowed(kvm, sp->gfn); 1635 unaccount_shadowed(kvm, sp->gfn);
1503 if (sp->unsync) 1636 if (sp->unsync)
1504 kvm_unlink_unsync_page(kvm, sp); 1637 kvm_unlink_unsync_page(kvm, sp);
1505 if (!sp->root_count) { 1638 if (!sp->root_count) {
1506 hlist_del(&sp->hash_link); 1639 /* Count self */
1507 kvm_mmu_free_page(kvm, sp); 1640 ret++;
1641 list_move(&sp->link, invalid_list);
1508 } else { 1642 } else {
1509 sp->role.invalid = 1;
1510 list_move(&sp->link, &kvm->arch.active_mmu_pages); 1643 list_move(&sp->link, &kvm->arch.active_mmu_pages);
1511 kvm_reload_remote_mmus(kvm); 1644 kvm_reload_remote_mmus(kvm);
1512 } 1645 }
1646
1647 sp->role.invalid = 1;
1513 kvm_mmu_reset_last_pte_updated(kvm); 1648 kvm_mmu_reset_last_pte_updated(kvm);
1514 return ret; 1649 return ret;
1515} 1650}
1516 1651
1652static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1653 struct list_head *invalid_list)
1654{
1655 struct kvm_mmu_page *sp;
1656
1657 if (list_empty(invalid_list))
1658 return;
1659
1660 kvm_flush_remote_tlbs(kvm);
1661
1662 do {
1663 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
1664 WARN_ON(!sp->role.invalid || sp->root_count);
1665 kvm_mmu_free_page(kvm, sp);
1666 } while (!list_empty(invalid_list));
1667
1668}
1669
1517/* 1670/*
1518 * Changing the number of mmu pages allocated to the vm 1671 * Changing the number of mmu pages allocated to the vm
1519 * Note: if kvm_nr_mmu_pages is too small, you will get dead lock 1672 * Note: if kvm_nr_mmu_pages is too small, you will get dead lock
@@ -1521,6 +1674,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1521void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1674void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1522{ 1675{
1523 int used_pages; 1676 int used_pages;
1677 LIST_HEAD(invalid_list);
1524 1678
1525 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; 1679 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
1526 used_pages = max(0, used_pages); 1680 used_pages = max(0, used_pages);
@@ -1538,9 +1692,10 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1538 1692
1539 page = container_of(kvm->arch.active_mmu_pages.prev, 1693 page = container_of(kvm->arch.active_mmu_pages.prev,
1540 struct kvm_mmu_page, link); 1694 struct kvm_mmu_page, link);
1541 used_pages -= kvm_mmu_zap_page(kvm, page); 1695 used_pages -= kvm_mmu_prepare_zap_page(kvm, page,
1542 used_pages--; 1696 &invalid_list);
1543 } 1697 }
1698 kvm_mmu_commit_zap_page(kvm, &invalid_list);
1544 kvm_nr_mmu_pages = used_pages; 1699 kvm_nr_mmu_pages = used_pages;
1545 kvm->arch.n_free_mmu_pages = 0; 1700 kvm->arch.n_free_mmu_pages = 0;
1546 } 1701 }
@@ -1553,47 +1708,36 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1553 1708
1554static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) 1709static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1555{ 1710{
1556 unsigned index;
1557 struct hlist_head *bucket;
1558 struct kvm_mmu_page *sp; 1711 struct kvm_mmu_page *sp;
1559 struct hlist_node *node, *n; 1712 struct hlist_node *node;
1713 LIST_HEAD(invalid_list);
1560 int r; 1714 int r;
1561 1715
1562 pgprintk("%s: looking for gfn %lx\n", __func__, gfn); 1716 pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
1563 r = 0; 1717 r = 0;
1564 index = kvm_page_table_hashfn(gfn); 1718
1565 bucket = &kvm->arch.mmu_page_hash[index]; 1719 for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
1566restart: 1720 pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
1567 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 1721 sp->role.word);
1568 if (sp->gfn == gfn && !sp->role.direct) { 1722 r = 1;
1569 pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1723 kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
1570 sp->role.word); 1724 }
1571 r = 1; 1725 kvm_mmu_commit_zap_page(kvm, &invalid_list);
1572 if (kvm_mmu_zap_page(kvm, sp))
1573 goto restart;
1574 }
1575 return r; 1726 return r;
1576} 1727}
1577 1728
1578static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) 1729static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1579{ 1730{
1580 unsigned index;
1581 struct hlist_head *bucket;
1582 struct kvm_mmu_page *sp; 1731 struct kvm_mmu_page *sp;
1583 struct hlist_node *node, *nn; 1732 struct hlist_node *node;
1733 LIST_HEAD(invalid_list);
1584 1734
1585 index = kvm_page_table_hashfn(gfn); 1735 for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
1586 bucket = &kvm->arch.mmu_page_hash[index]; 1736 pgprintk("%s: zap %lx %x\n",
1587restart: 1737 __func__, gfn, sp->role.word);
1588 hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { 1738 kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
1589 if (sp->gfn == gfn && !sp->role.direct
1590 && !sp->role.invalid) {
1591 pgprintk("%s: zap %lx %x\n",
1592 __func__, gfn, sp->role.word);
1593 if (kvm_mmu_zap_page(kvm, sp))
1594 goto restart;
1595 }
1596 } 1739 }
1740 kvm_mmu_commit_zap_page(kvm, &invalid_list);
1597} 1741}
1598 1742
1599static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) 1743static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
@@ -1723,47 +1867,51 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
1723} 1867}
1724EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); 1868EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
1725 1869
1726static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1870static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1727{ 1871{
1728 unsigned index;
1729 struct hlist_head *bucket;
1730 struct kvm_mmu_page *s;
1731 struct hlist_node *node, *n;
1732
1733 index = kvm_page_table_hashfn(sp->gfn);
1734 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1735 /* don't unsync if pagetable is shadowed with multiple roles */
1736 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
1737 if (s->gfn != sp->gfn || s->role.direct)
1738 continue;
1739 if (s->role.word != sp->role.word)
1740 return 1;
1741 }
1742 trace_kvm_mmu_unsync_page(sp); 1872 trace_kvm_mmu_unsync_page(sp);
1743 ++vcpu->kvm->stat.mmu_unsync; 1873 ++vcpu->kvm->stat.mmu_unsync;
1744 sp->unsync = 1; 1874 sp->unsync = 1;
1745 1875
1746 kvm_mmu_mark_parents_unsync(sp); 1876 kvm_mmu_mark_parents_unsync(sp);
1747
1748 mmu_convert_notrap(sp); 1877 mmu_convert_notrap(sp);
1749 return 0; 1878}
1879
1880static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
1881{
1882 struct kvm_mmu_page *s;
1883 struct hlist_node *node;
1884
1885 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
1886 if (s->unsync)
1887 continue;
1888 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
1889 __kvm_unsync_page(vcpu, s);
1890 }
1750} 1891}
1751 1892
1752static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, 1893static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1753 bool can_unsync) 1894 bool can_unsync)
1754{ 1895{
1755 struct kvm_mmu_page *shadow; 1896 struct kvm_mmu_page *s;
1897 struct hlist_node *node;
1898 bool need_unsync = false;
1756 1899
1757 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1900 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
1758 if (shadow) { 1901 if (!can_unsync)
1759 if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
1760 return 1; 1902 return 1;
1761 if (shadow->unsync) 1903
1762 return 0; 1904 if (s->role.level != PT_PAGE_TABLE_LEVEL)
1763 if (can_unsync && oos_shadow) 1905 return 1;
1764 return kvm_unsync_page(vcpu, shadow); 1906
1765 return 1; 1907 if (!need_unsync && !s->unsync) {
1908 if (!oos_shadow)
1909 return 1;
1910 need_unsync = true;
1911 }
1766 } 1912 }
1913 if (need_unsync)
1914 kvm_unsync_pages(vcpu, gfn);
1767 return 0; 1915 return 0;
1768} 1916}
1769 1917
@@ -1804,13 +1952,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1804 spte |= (u64)pfn << PAGE_SHIFT; 1952 spte |= (u64)pfn << PAGE_SHIFT;
1805 1953
1806 if ((pte_access & ACC_WRITE_MASK) 1954 if ((pte_access & ACC_WRITE_MASK)
1807 || (write_fault && !is_write_protection(vcpu) && !user_fault)) { 1955 || (!tdp_enabled && write_fault && !is_write_protection(vcpu)
1956 && !user_fault)) {
1808 1957
1809 if (level > PT_PAGE_TABLE_LEVEL && 1958 if (level > PT_PAGE_TABLE_LEVEL &&
1810 has_wrprotected_page(vcpu->kvm, gfn, level)) { 1959 has_wrprotected_page(vcpu->kvm, gfn, level)) {
1811 ret = 1; 1960 ret = 1;
1812 spte = shadow_trap_nonpresent_pte; 1961 drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
1813 goto set_pte; 1962 goto done;
1814 } 1963 }
1815 1964
1816 spte |= PT_WRITABLE_MASK; 1965 spte |= PT_WRITABLE_MASK;
@@ -1841,7 +1990,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1841 mark_page_dirty(vcpu->kvm, gfn); 1990 mark_page_dirty(vcpu->kvm, gfn);
1842 1991
1843set_pte: 1992set_pte:
1844 __set_spte(sptep, spte); 1993 if (is_writable_pte(*sptep) && !is_writable_pte(spte))
1994 kvm_set_pfn_dirty(pfn);
1995 update_spte(sptep, spte);
1996done:
1845 return ret; 1997 return ret;
1846} 1998}
1847 1999
@@ -1853,7 +2005,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1853 bool reset_host_protection) 2005 bool reset_host_protection)
1854{ 2006{
1855 int was_rmapped = 0; 2007 int was_rmapped = 0;
1856 int was_writable = is_writable_pte(*sptep);
1857 int rmap_count; 2008 int rmap_count;
1858 2009
1859 pgprintk("%s: spte %llx access %x write_fault %d" 2010 pgprintk("%s: spte %llx access %x write_fault %d"
@@ -1878,8 +2029,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1878 } else if (pfn != spte_to_pfn(*sptep)) { 2029 } else if (pfn != spte_to_pfn(*sptep)) {
1879 pgprintk("hfn old %lx new %lx\n", 2030 pgprintk("hfn old %lx new %lx\n",
1880 spte_to_pfn(*sptep), pfn); 2031 spte_to_pfn(*sptep), pfn);
1881 rmap_remove(vcpu->kvm, sptep); 2032 drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
1882 __set_spte(sptep, shadow_trap_nonpresent_pte);
1883 kvm_flush_remote_tlbs(vcpu->kvm); 2033 kvm_flush_remote_tlbs(vcpu->kvm);
1884 } else 2034 } else
1885 was_rmapped = 1; 2035 was_rmapped = 1;
@@ -1890,7 +2040,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1890 reset_host_protection)) { 2040 reset_host_protection)) {
1891 if (write_fault) 2041 if (write_fault)
1892 *ptwrite = 1; 2042 *ptwrite = 1;
1893 kvm_x86_ops->tlb_flush(vcpu); 2043 kvm_mmu_flush_tlb(vcpu);
1894 } 2044 }
1895 2045
1896 pgprintk("%s: setting spte %llx\n", __func__, *sptep); 2046 pgprintk("%s: setting spte %llx\n", __func__, *sptep);
@@ -1904,15 +2054,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1904 page_header_update_slot(vcpu->kvm, sptep, gfn); 2054 page_header_update_slot(vcpu->kvm, sptep, gfn);
1905 if (!was_rmapped) { 2055 if (!was_rmapped) {
1906 rmap_count = rmap_add(vcpu, sptep, gfn); 2056 rmap_count = rmap_add(vcpu, sptep, gfn);
1907 kvm_release_pfn_clean(pfn);
1908 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 2057 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1909 rmap_recycle(vcpu, sptep, gfn); 2058 rmap_recycle(vcpu, sptep, gfn);
1910 } else {
1911 if (was_writable)
1912 kvm_release_pfn_dirty(pfn);
1913 else
1914 kvm_release_pfn_clean(pfn);
1915 } 2059 }
2060 kvm_release_pfn_clean(pfn);
1916 if (speculative) { 2061 if (speculative) {
1917 vcpu->arch.last_pte_updated = sptep; 2062 vcpu->arch.last_pte_updated = sptep;
1918 vcpu->arch.last_pte_gfn = gfn; 2063 vcpu->arch.last_pte_gfn = gfn;
@@ -1941,7 +2086,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1941 } 2086 }
1942 2087
1943 if (*iterator.sptep == shadow_trap_nonpresent_pte) { 2088 if (*iterator.sptep == shadow_trap_nonpresent_pte) {
1944 pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; 2089 u64 base_addr = iterator.addr;
2090
2091 base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
2092 pseudo_gfn = base_addr >> PAGE_SHIFT;
1945 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, 2093 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
1946 iterator.level - 1, 2094 iterator.level - 1,
1947 1, ACC_ALL, iterator.sptep); 2095 1, ACC_ALL, iterator.sptep);
@@ -1960,6 +2108,29 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1960 return pt_write; 2108 return pt_write;
1961} 2109}
1962 2110
2111static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
2112{
2113 char buf[1];
2114 void __user *hva;
2115 int r;
2116
2117 /* Touch the page, so send SIGBUS */
2118 hva = (void __user *)gfn_to_hva(kvm, gfn);
2119 r = copy_from_user(buf, hva, 1);
2120}
2121
2122static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
2123{
2124 kvm_release_pfn_clean(pfn);
2125 if (is_hwpoison_pfn(pfn)) {
2126 kvm_send_hwpoison_signal(kvm, gfn);
2127 return 0;
2128 } else if (is_fault_pfn(pfn))
2129 return -EFAULT;
2130
2131 return 1;
2132}
2133
1963static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 2134static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1964{ 2135{
1965 int r; 2136 int r;
@@ -1983,10 +2154,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1983 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2154 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1984 2155
1985 /* mmio */ 2156 /* mmio */
1986 if (is_error_pfn(pfn)) { 2157 if (is_error_pfn(pfn))
1987 kvm_release_pfn_clean(pfn); 2158 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
1988 return 1;
1989 }
1990 2159
1991 spin_lock(&vcpu->kvm->mmu_lock); 2160 spin_lock(&vcpu->kvm->mmu_lock);
1992 if (mmu_notifier_retry(vcpu, mmu_seq)) 2161 if (mmu_notifier_retry(vcpu, mmu_seq))
@@ -2009,6 +2178,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
2009{ 2178{
2010 int i; 2179 int i;
2011 struct kvm_mmu_page *sp; 2180 struct kvm_mmu_page *sp;
2181 LIST_HEAD(invalid_list);
2012 2182
2013 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 2183 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2014 return; 2184 return;
@@ -2018,8 +2188,10 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
2018 2188
2019 sp = page_header(root); 2189 sp = page_header(root);
2020 --sp->root_count; 2190 --sp->root_count;
2021 if (!sp->root_count && sp->role.invalid) 2191 if (!sp->root_count && sp->role.invalid) {
2022 kvm_mmu_zap_page(vcpu->kvm, sp); 2192 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
2193 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
2194 }
2023 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 2195 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
2024 spin_unlock(&vcpu->kvm->mmu_lock); 2196 spin_unlock(&vcpu->kvm->mmu_lock);
2025 return; 2197 return;
@@ -2032,10 +2204,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
2032 sp = page_header(root); 2204 sp = page_header(root);
2033 --sp->root_count; 2205 --sp->root_count;
2034 if (!sp->root_count && sp->role.invalid) 2206 if (!sp->root_count && sp->role.invalid)
2035 kvm_mmu_zap_page(vcpu->kvm, sp); 2207 kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
2208 &invalid_list);
2036 } 2209 }
2037 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 2210 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
2038 } 2211 }
2212 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
2039 spin_unlock(&vcpu->kvm->mmu_lock); 2213 spin_unlock(&vcpu->kvm->mmu_lock);
2040 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 2214 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
2041} 2215}
@@ -2045,7 +2219,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
2045 int ret = 0; 2219 int ret = 0;
2046 2220
2047 if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { 2221 if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
2048 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 2222 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2049 ret = 1; 2223 ret = 1;
2050 } 2224 }
2051 2225
@@ -2073,6 +2247,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
2073 root_gfn = 0; 2247 root_gfn = 0;
2074 } 2248 }
2075 spin_lock(&vcpu->kvm->mmu_lock); 2249 spin_lock(&vcpu->kvm->mmu_lock);
2250 kvm_mmu_free_some_pages(vcpu);
2076 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 2251 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
2077 PT64_ROOT_LEVEL, direct, 2252 PT64_ROOT_LEVEL, direct,
2078 ACC_ALL, NULL); 2253 ACC_ALL, NULL);
@@ -2103,6 +2278,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
2103 root_gfn = i << 30; 2278 root_gfn = i << 30;
2104 } 2279 }
2105 spin_lock(&vcpu->kvm->mmu_lock); 2280 spin_lock(&vcpu->kvm->mmu_lock);
2281 kvm_mmu_free_some_pages(vcpu);
2106 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 2282 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
2107 PT32_ROOT_LEVEL, direct, 2283 PT32_ROOT_LEVEL, direct,
2108 ACC_ALL, NULL); 2284 ACC_ALL, NULL);
@@ -2198,10 +2374,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
2198 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2374 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2199 smp_rmb(); 2375 smp_rmb();
2200 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2376 pfn = gfn_to_pfn(vcpu->kvm, gfn);
2201 if (is_error_pfn(pfn)) { 2377 if (is_error_pfn(pfn))
2202 kvm_release_pfn_clean(pfn); 2378 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
2203 return 1;
2204 }
2205 spin_lock(&vcpu->kvm->mmu_lock); 2379 spin_lock(&vcpu->kvm->mmu_lock);
2206 if (mmu_notifier_retry(vcpu, mmu_seq)) 2380 if (mmu_notifier_retry(vcpu, mmu_seq))
2207 goto out_unlock; 2381 goto out_unlock;
@@ -2243,7 +2417,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
2243void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 2417void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
2244{ 2418{
2245 ++vcpu->stat.tlb_flush; 2419 ++vcpu->stat.tlb_flush;
2246 kvm_x86_ops->tlb_flush(vcpu); 2420 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2247} 2421}
2248 2422
2249static void paging_new_cr3(struct kvm_vcpu *vcpu) 2423static void paging_new_cr3(struct kvm_vcpu *vcpu)
@@ -2457,10 +2631,9 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
2457static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 2631static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
2458{ 2632{
2459 ASSERT(vcpu); 2633 ASSERT(vcpu);
2460 if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) { 2634 if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
2635 /* mmu.free() should set root_hpa = INVALID_PAGE */
2461 vcpu->arch.mmu.free(vcpu); 2636 vcpu->arch.mmu.free(vcpu);
2462 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
2463 }
2464} 2637}
2465 2638
2466int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 2639int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -2477,9 +2650,6 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
2477 r = mmu_topup_memory_caches(vcpu); 2650 r = mmu_topup_memory_caches(vcpu);
2478 if (r) 2651 if (r)
2479 goto out; 2652 goto out;
2480 spin_lock(&vcpu->kvm->mmu_lock);
2481 kvm_mmu_free_some_pages(vcpu);
2482 spin_unlock(&vcpu->kvm->mmu_lock);
2483 r = mmu_alloc_roots(vcpu); 2653 r = mmu_alloc_roots(vcpu);
2484 spin_lock(&vcpu->kvm->mmu_lock); 2654 spin_lock(&vcpu->kvm->mmu_lock);
2485 mmu_sync_roots(vcpu); 2655 mmu_sync_roots(vcpu);
@@ -2508,7 +2678,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
2508 pte = *spte; 2678 pte = *spte;
2509 if (is_shadow_present_pte(pte)) { 2679 if (is_shadow_present_pte(pte)) {
2510 if (is_last_spte(pte, sp->role.level)) 2680 if (is_last_spte(pte, sp->role.level))
2511 rmap_remove(vcpu->kvm, spte); 2681 drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte);
2512 else { 2682 else {
2513 child = page_header(pte & PT64_BASE_ADDR_MASK); 2683 child = page_header(pte & PT64_BASE_ADDR_MASK);
2514 mmu_page_remove_parent_pte(child, spte); 2684 mmu_page_remove_parent_pte(child, spte);
@@ -2529,6 +2699,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
2529 return; 2699 return;
2530 } 2700 }
2531 2701
2702 if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
2703 return;
2704
2532 ++vcpu->kvm->stat.mmu_pte_updated; 2705 ++vcpu->kvm->stat.mmu_pte_updated;
2533 if (!sp->role.cr4_pae) 2706 if (!sp->role.cr4_pae)
2534 paging32_update_pte(vcpu, sp, spte, new); 2707 paging32_update_pte(vcpu, sp, spte, new);
@@ -2549,11 +2722,15 @@ static bool need_remote_flush(u64 old, u64 new)
2549 return (old & ~new & PT64_PERM_MASK) != 0; 2722 return (old & ~new & PT64_PERM_MASK) != 0;
2550} 2723}
2551 2724
2552static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new) 2725static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
2726 bool remote_flush, bool local_flush)
2553{ 2727{
2554 if (need_remote_flush(old, new)) 2728 if (zap_page)
2729 return;
2730
2731 if (remote_flush)
2555 kvm_flush_remote_tlbs(vcpu->kvm); 2732 kvm_flush_remote_tlbs(vcpu->kvm);
2556 else 2733 else if (local_flush)
2557 kvm_mmu_flush_tlb(vcpu); 2734 kvm_mmu_flush_tlb(vcpu);
2558} 2735}
2559 2736
@@ -2603,10 +2780,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2603 bool guest_initiated) 2780 bool guest_initiated)
2604{ 2781{
2605 gfn_t gfn = gpa >> PAGE_SHIFT; 2782 gfn_t gfn = gpa >> PAGE_SHIFT;
2783 union kvm_mmu_page_role mask = { .word = 0 };
2606 struct kvm_mmu_page *sp; 2784 struct kvm_mmu_page *sp;
2607 struct hlist_node *node, *n; 2785 struct hlist_node *node;
2608 struct hlist_head *bucket; 2786 LIST_HEAD(invalid_list);
2609 unsigned index;
2610 u64 entry, gentry; 2787 u64 entry, gentry;
2611 u64 *spte; 2788 u64 *spte;
2612 unsigned offset = offset_in_page(gpa); 2789 unsigned offset = offset_in_page(gpa);
@@ -2619,6 +2796,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2619 int npte; 2796 int npte;
2620 int r; 2797 int r;
2621 int invlpg_counter; 2798 int invlpg_counter;
2799 bool remote_flush, local_flush, zap_page;
2800
2801 zap_page = remote_flush = local_flush = false;
2622 2802
2623 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 2803 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
2624 2804
@@ -2674,13 +2854,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2674 vcpu->arch.last_pte_updated = NULL; 2854 vcpu->arch.last_pte_updated = NULL;
2675 } 2855 }
2676 } 2856 }
2677 index = kvm_page_table_hashfn(gfn);
2678 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
2679 2857
2680restart: 2858 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
2681 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 2859 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
2682 if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
2683 continue;
2684 pte_size = sp->role.cr4_pae ? 8 : 4; 2860 pte_size = sp->role.cr4_pae ? 8 : 4;
2685 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2861 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
2686 misaligned |= bytes < 4; 2862 misaligned |= bytes < 4;
@@ -2697,8 +2873,8 @@ restart:
2697 */ 2873 */
2698 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 2874 pgprintk("misaligned: gpa %llx bytes %d role %x\n",
2699 gpa, bytes, sp->role.word); 2875 gpa, bytes, sp->role.word);
2700 if (kvm_mmu_zap_page(vcpu->kvm, sp)) 2876 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
2701 goto restart; 2877 &invalid_list);
2702 ++vcpu->kvm->stat.mmu_flooded; 2878 ++vcpu->kvm->stat.mmu_flooded;
2703 continue; 2879 continue;
2704 } 2880 }
@@ -2722,16 +2898,22 @@ restart:
2722 if (quadrant != sp->role.quadrant) 2898 if (quadrant != sp->role.quadrant)
2723 continue; 2899 continue;
2724 } 2900 }
2901 local_flush = true;
2725 spte = &sp->spt[page_offset / sizeof(*spte)]; 2902 spte = &sp->spt[page_offset / sizeof(*spte)];
2726 while (npte--) { 2903 while (npte--) {
2727 entry = *spte; 2904 entry = *spte;
2728 mmu_pte_write_zap_pte(vcpu, sp, spte); 2905 mmu_pte_write_zap_pte(vcpu, sp, spte);
2729 if (gentry) 2906 if (gentry &&
2907 !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
2908 & mask.word))
2730 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 2909 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
2731 mmu_pte_write_flush_tlb(vcpu, entry, *spte); 2910 if (!remote_flush && need_remote_flush(entry, *spte))
2911 remote_flush = true;
2732 ++spte; 2912 ++spte;
2733 } 2913 }
2734 } 2914 }
2915 mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush);
2916 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
2735 kvm_mmu_audit(vcpu, "post pte write"); 2917 kvm_mmu_audit(vcpu, "post pte write");
2736 spin_unlock(&vcpu->kvm->mmu_lock); 2918 spin_unlock(&vcpu->kvm->mmu_lock);
2737 if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { 2919 if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
@@ -2759,15 +2941,21 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
2759 2941
2760void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 2942void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
2761{ 2943{
2762 while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES && 2944 int free_pages;
2945 LIST_HEAD(invalid_list);
2946
2947 free_pages = vcpu->kvm->arch.n_free_mmu_pages;
2948 while (free_pages < KVM_REFILL_PAGES &&
2763 !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { 2949 !list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
2764 struct kvm_mmu_page *sp; 2950 struct kvm_mmu_page *sp;
2765 2951
2766 sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, 2952 sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
2767 struct kvm_mmu_page, link); 2953 struct kvm_mmu_page, link);
2768 kvm_mmu_zap_page(vcpu->kvm, sp); 2954 free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
2955 &invalid_list);
2769 ++vcpu->kvm->stat.mmu_recycled; 2956 ++vcpu->kvm->stat.mmu_recycled;
2770 } 2957 }
2958 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
2771} 2959}
2772 2960
2773int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) 2961int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
@@ -2795,11 +2983,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
2795 return 1; 2983 return 1;
2796 case EMULATE_DO_MMIO: 2984 case EMULATE_DO_MMIO:
2797 ++vcpu->stat.mmio_exits; 2985 ++vcpu->stat.mmio_exits;
2798 return 0; 2986 /* fall through */
2799 case EMULATE_FAIL: 2987 case EMULATE_FAIL:
2800 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2801 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2802 vcpu->run->internal.ndata = 0;
2803 return 0; 2988 return 0;
2804 default: 2989 default:
2805 BUG(); 2990 BUG();
@@ -2896,7 +3081,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2896 pt = sp->spt; 3081 pt = sp->spt;
2897 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 3082 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
2898 /* avoid RMW */ 3083 /* avoid RMW */
2899 if (pt[i] & PT_WRITABLE_MASK) 3084 if (is_writable_pte(pt[i]))
2900 pt[i] &= ~PT_WRITABLE_MASK; 3085 pt[i] &= ~PT_WRITABLE_MASK;
2901 } 3086 }
2902 kvm_flush_remote_tlbs(kvm); 3087 kvm_flush_remote_tlbs(kvm);
@@ -2905,25 +3090,26 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2905void kvm_mmu_zap_all(struct kvm *kvm) 3090void kvm_mmu_zap_all(struct kvm *kvm)
2906{ 3091{
2907 struct kvm_mmu_page *sp, *node; 3092 struct kvm_mmu_page *sp, *node;
3093 LIST_HEAD(invalid_list);
2908 3094
2909 spin_lock(&kvm->mmu_lock); 3095 spin_lock(&kvm->mmu_lock);
2910restart: 3096restart:
2911 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 3097 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
2912 if (kvm_mmu_zap_page(kvm, sp)) 3098 if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
2913 goto restart; 3099 goto restart;
2914 3100
3101 kvm_mmu_commit_zap_page(kvm, &invalid_list);
2915 spin_unlock(&kvm->mmu_lock); 3102 spin_unlock(&kvm->mmu_lock);
2916
2917 kvm_flush_remote_tlbs(kvm);
2918} 3103}
2919 3104
2920static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) 3105static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
3106 struct list_head *invalid_list)
2921{ 3107{
2922 struct kvm_mmu_page *page; 3108 struct kvm_mmu_page *page;
2923 3109
2924 page = container_of(kvm->arch.active_mmu_pages.prev, 3110 page = container_of(kvm->arch.active_mmu_pages.prev,
2925 struct kvm_mmu_page, link); 3111 struct kvm_mmu_page, link);
2926 return kvm_mmu_zap_page(kvm, page) + 1; 3112 return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
2927} 3113}
2928 3114
2929static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 3115static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
@@ -2936,6 +3122,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
2936 3122
2937 list_for_each_entry(kvm, &vm_list, vm_list) { 3123 list_for_each_entry(kvm, &vm_list, vm_list) {
2938 int npages, idx, freed_pages; 3124 int npages, idx, freed_pages;
3125 LIST_HEAD(invalid_list);
2939 3126
2940 idx = srcu_read_lock(&kvm->srcu); 3127 idx = srcu_read_lock(&kvm->srcu);
2941 spin_lock(&kvm->mmu_lock); 3128 spin_lock(&kvm->mmu_lock);
@@ -2943,12 +3130,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
2943 kvm->arch.n_free_mmu_pages; 3130 kvm->arch.n_free_mmu_pages;
2944 cache_count += npages; 3131 cache_count += npages;
2945 if (!kvm_freed && nr_to_scan > 0 && npages > 0) { 3132 if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
2946 freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); 3133 freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm,
3134 &invalid_list);
2947 cache_count -= freed_pages; 3135 cache_count -= freed_pages;
2948 kvm_freed = kvm; 3136 kvm_freed = kvm;
2949 } 3137 }
2950 nr_to_scan--; 3138 nr_to_scan--;
2951 3139
3140 kvm_mmu_commit_zap_page(kvm, &invalid_list);
2952 spin_unlock(&kvm->mmu_lock); 3141 spin_unlock(&kvm->mmu_lock);
2953 srcu_read_unlock(&kvm->srcu, idx); 3142 srcu_read_unlock(&kvm->srcu, idx);
2954 } 3143 }
@@ -3074,7 +3263,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
3074 3263
3075static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) 3264static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
3076{ 3265{
3077 kvm_set_cr3(vcpu, vcpu->arch.cr3); 3266 (void)kvm_set_cr3(vcpu, vcpu->arch.cr3);
3078 return 1; 3267 return 1;
3079} 3268}
3080 3269
@@ -3331,9 +3520,9 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
3331 struct kvm_mmu_page *rev_sp; 3520 struct kvm_mmu_page *rev_sp;
3332 gfn_t gfn; 3521 gfn_t gfn;
3333 3522
3334 if (*sptep & PT_WRITABLE_MASK) { 3523 if (is_writable_pte(*sptep)) {
3335 rev_sp = page_header(__pa(sptep)); 3524 rev_sp = page_header(__pa(sptep));
3336 gfn = rev_sp->gfns[sptep - rev_sp->spt]; 3525 gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
3337 3526
3338 if (!gfn_to_memslot(kvm, gfn)) { 3527 if (!gfn_to_memslot(kvm, gfn)) {
3339 if (!printk_ratelimit()) 3528 if (!printk_ratelimit())
@@ -3347,8 +3536,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
3347 return; 3536 return;
3348 } 3537 }
3349 3538
3350 rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], 3539 rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
3351 rev_sp->role.level);
3352 if (!*rmapp) { 3540 if (!*rmapp) {
3353 if (!printk_ratelimit()) 3541 if (!printk_ratelimit())
3354 return; 3542 return;
@@ -3381,7 +3569,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
3381 3569
3382 if (!(ent & PT_PRESENT_MASK)) 3570 if (!(ent & PT_PRESENT_MASK))
3383 continue; 3571 continue;
3384 if (!(ent & PT_WRITABLE_MASK)) 3572 if (!is_writable_pte(ent))
3385 continue; 3573 continue;
3386 inspect_spte_has_rmap(vcpu->kvm, &pt[i]); 3574 inspect_spte_has_rmap(vcpu->kvm, &pt[i]);
3387 } 3575 }
@@ -3409,13 +3597,12 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
3409 if (sp->unsync) 3597 if (sp->unsync)
3410 continue; 3598 continue;
3411 3599
3412 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3600 slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
3413 slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
3414 rmapp = &slot->rmap[gfn - slot->base_gfn]; 3601 rmapp = &slot->rmap[gfn - slot->base_gfn];
3415 3602
3416 spte = rmap_next(vcpu->kvm, rmapp, NULL); 3603 spte = rmap_next(vcpu->kvm, rmapp, NULL);
3417 while (spte) { 3604 while (spte) {
3418 if (*spte & PT_WRITABLE_MASK) 3605 if (is_writable_pte(*spte))
3419 printk(KERN_ERR "%s: (%s) shadow page has " 3606 printk(KERN_ERR "%s: (%s) shadow page has "
3420 "writable mappings: gfn %lx role %x\n", 3607 "writable mappings: gfn %lx role %x\n",
3421 __func__, audit_msg, sp->gfn, 3608 __func__, audit_msg, sp->gfn,