diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-05 15:07:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-05 15:07:39 -0400 |
commit | 19d031e052bc213cdcbee70696d476136994b8c1 (patch) | |
tree | 906e78a07d15e0154692f82489387e3ad7f3f94b /arch/x86/kvm/mmu.c | |
parent | e8d809c61325a2f799dc753b0ac72ace6958b92c (diff) | |
parent | 3da0dd433dc399a8c0124d0614d82a09b6a49bce (diff) |
Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: add support for change_pte mmu notifiers
KVM: MMU: add SPTE_HOST_WRITEABLE flag to the shadow ptes
KVM: MMU: dont hold pagecount reference for mapped sptes pages
KVM: Prevent overflow in KVM_GET_SUPPORTED_CPUID
KVM: VMX: flush TLB with INVEPT on cpu migration
KVM: fix LAPIC timer period overflow
KVM: s390: fix memsize >= 4G
KVM: SVM: Handle tsc in svm_get_msr/svm_set_msr correctly
KVM: SVM: Fix tsc offset adjustment when running nested
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 84 |
1 files changed, 66 insertions, 18 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca41ae9f453..685a4ffac8e6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); | |||
156 | #define CREATE_TRACE_POINTS | 156 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 157 | #include "mmutrace.h" |
158 | 158 | ||
159 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
160 | |||
159 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 161 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
160 | 162 | ||
161 | struct kvm_rmap_desc { | 163 | struct kvm_rmap_desc { |
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
634 | if (*spte & shadow_accessed_mask) | 636 | if (*spte & shadow_accessed_mask) |
635 | kvm_set_pfn_accessed(pfn); | 637 | kvm_set_pfn_accessed(pfn); |
636 | if (is_writeble_pte(*spte)) | 638 | if (is_writeble_pte(*spte)) |
637 | kvm_release_pfn_dirty(pfn); | 639 | kvm_set_pfn_dirty(pfn); |
638 | else | ||
639 | kvm_release_pfn_clean(pfn); | ||
640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
641 | if (!*rmapp) { | 641 | if (!*rmapp) { |
642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
@@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
748 | return write_protected; | 748 | return write_protected; |
749 | } | 749 | } |
750 | 750 | ||
751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) |
752 | { | 752 | { |
753 | u64 *spte; | 753 | u64 *spte; |
754 | int need_tlb_flush = 0; | 754 | int need_tlb_flush = 0; |
@@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | |||
763 | return need_tlb_flush; | 763 | return need_tlb_flush; |
764 | } | 764 | } |
765 | 765 | ||
766 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 766 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) |
767 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | 767 | { |
768 | int need_flush = 0; | ||
769 | u64 *spte, new_spte; | ||
770 | pte_t *ptep = (pte_t *)data; | ||
771 | pfn_t new_pfn; | ||
772 | |||
773 | WARN_ON(pte_huge(*ptep)); | ||
774 | new_pfn = pte_pfn(*ptep); | ||
775 | spte = rmap_next(kvm, rmapp, NULL); | ||
776 | while (spte) { | ||
777 | BUG_ON(!is_shadow_present_pte(*spte)); | ||
778 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | ||
779 | need_flush = 1; | ||
780 | if (pte_write(*ptep)) { | ||
781 | rmap_remove(kvm, spte); | ||
782 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
783 | spte = rmap_next(kvm, rmapp, NULL); | ||
784 | } else { | ||
785 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | ||
786 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | ||
787 | |||
788 | new_spte &= ~PT_WRITABLE_MASK; | ||
789 | new_spte &= ~SPTE_HOST_WRITEABLE; | ||
790 | if (is_writeble_pte(*spte)) | ||
791 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | ||
792 | __set_spte(spte, new_spte); | ||
793 | spte = rmap_next(kvm, rmapp, spte); | ||
794 | } | ||
795 | } | ||
796 | if (need_flush) | ||
797 | kvm_flush_remote_tlbs(kvm); | ||
798 | |||
799 | return 0; | ||
800 | } | ||
801 | |||
802 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data, | ||
803 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
804 | u64 data)) | ||
768 | { | 805 | { |
769 | int i, j; | 806 | int i, j; |
770 | int retval = 0; | 807 | int retval = 0; |
@@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
786 | if (hva >= start && hva < end) { | 823 | if (hva >= start && hva < end) { |
787 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 824 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
788 | 825 | ||
789 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | 826 | retval |= handler(kvm, &memslot->rmap[gfn_offset], |
827 | data); | ||
790 | 828 | ||
791 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 829 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
792 | int idx = gfn_offset; | 830 | int idx = gfn_offset; |
793 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 831 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
794 | retval |= handler(kvm, | 832 | retval |= handler(kvm, |
795 | &memslot->lpage_info[j][idx].rmap_pde); | 833 | &memslot->lpage_info[j][idx].rmap_pde, |
834 | data); | ||
796 | } | 835 | } |
797 | } | 836 | } |
798 | } | 837 | } |
@@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
802 | 841 | ||
803 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 842 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
804 | { | 843 | { |
805 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | 844 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
806 | } | 845 | } |
807 | 846 | ||
808 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | 847 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
848 | { | ||
849 | kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp); | ||
850 | } | ||
851 | |||
852 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) | ||
809 | { | 853 | { |
810 | u64 *spte; | 854 | u64 *spte; |
811 | int young = 0; | 855 | int young = 0; |
@@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
841 | gfn = unalias_gfn(vcpu->kvm, gfn); | 885 | gfn = unalias_gfn(vcpu->kvm, gfn); |
842 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 886 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
843 | 887 | ||
844 | kvm_unmap_rmapp(vcpu->kvm, rmapp); | 888 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
845 | kvm_flush_remote_tlbs(vcpu->kvm); | 889 | kvm_flush_remote_tlbs(vcpu->kvm); |
846 | } | 890 | } |
847 | 891 | ||
848 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 892 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
849 | { | 893 | { |
850 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | 894 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
851 | } | 895 | } |
852 | 896 | ||
853 | #ifdef MMU_DEBUG | 897 | #ifdef MMU_DEBUG |
@@ -1756,7 +1800,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1756 | unsigned pte_access, int user_fault, | 1800 | unsigned pte_access, int user_fault, |
1757 | int write_fault, int dirty, int level, | 1801 | int write_fault, int dirty, int level, |
1758 | gfn_t gfn, pfn_t pfn, bool speculative, | 1802 | gfn_t gfn, pfn_t pfn, bool speculative, |
1759 | bool can_unsync) | 1803 | bool can_unsync, bool reset_host_protection) |
1760 | { | 1804 | { |
1761 | u64 spte; | 1805 | u64 spte; |
1762 | int ret = 0; | 1806 | int ret = 0; |
@@ -1783,6 +1827,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1783 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 1827 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1784 | kvm_is_mmio_pfn(pfn)); | 1828 | kvm_is_mmio_pfn(pfn)); |
1785 | 1829 | ||
1830 | if (reset_host_protection) | ||
1831 | spte |= SPTE_HOST_WRITEABLE; | ||
1832 | |||
1786 | spte |= (u64)pfn << PAGE_SHIFT; | 1833 | spte |= (u64)pfn << PAGE_SHIFT; |
1787 | 1834 | ||
1788 | if ((pte_access & ACC_WRITE_MASK) | 1835 | if ((pte_access & ACC_WRITE_MASK) |
@@ -1828,7 +1875,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1828 | unsigned pt_access, unsigned pte_access, | 1875 | unsigned pt_access, unsigned pte_access, |
1829 | int user_fault, int write_fault, int dirty, | 1876 | int user_fault, int write_fault, int dirty, |
1830 | int *ptwrite, int level, gfn_t gfn, | 1877 | int *ptwrite, int level, gfn_t gfn, |
1831 | pfn_t pfn, bool speculative) | 1878 | pfn_t pfn, bool speculative, |
1879 | bool reset_host_protection) | ||
1832 | { | 1880 | { |
1833 | int was_rmapped = 0; | 1881 | int was_rmapped = 0; |
1834 | int was_writeble = is_writeble_pte(*sptep); | 1882 | int was_writeble = is_writeble_pte(*sptep); |
@@ -1860,7 +1908,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1860 | } | 1908 | } |
1861 | 1909 | ||
1862 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 1910 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
1863 | dirty, level, gfn, pfn, speculative, true)) { | 1911 | dirty, level, gfn, pfn, speculative, true, |
1912 | reset_host_protection)) { | ||
1864 | if (write_fault) | 1913 | if (write_fault) |
1865 | *ptwrite = 1; | 1914 | *ptwrite = 1; |
1866 | kvm_x86_ops->tlb_flush(vcpu); | 1915 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1877,8 +1926,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1877 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 1926 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
1878 | if (!was_rmapped) { | 1927 | if (!was_rmapped) { |
1879 | rmap_count = rmap_add(vcpu, sptep, gfn); | 1928 | rmap_count = rmap_add(vcpu, sptep, gfn); |
1880 | if (!is_rmap_spte(*sptep)) | 1929 | kvm_release_pfn_clean(pfn); |
1881 | kvm_release_pfn_clean(pfn); | ||
1882 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1930 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1883 | rmap_recycle(vcpu, sptep, gfn); | 1931 | rmap_recycle(vcpu, sptep, gfn); |
1884 | } else { | 1932 | } else { |
@@ -1909,7 +1957,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1909 | if (iterator.level == level) { | 1957 | if (iterator.level == level) { |
1910 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 1958 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
1911 | 0, write, 1, &pt_write, | 1959 | 0, write, 1, &pt_write, |
1912 | level, gfn, pfn, false); | 1960 | level, gfn, pfn, false, true); |
1913 | ++vcpu->stat.pf_fixed; | 1961 | ++vcpu->stat.pf_fixed; |
1914 | break; | 1962 | break; |
1915 | } | 1963 | } |