diff options
author | Joerg Roedel <joerg.roedel@amd.com> | 2009-07-27 10:30:44 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2009-09-10 01:33:18 -0400 |
commit | 852e3c19ac64b7c3912e8efe42d3ce090ebc0161 (patch) | |
tree | cc4fd27e59002191dc09ccf6a6e4006c66c3df38 /arch/x86/kvm/mmu.c | |
parent | d25797b24c0ff2efc2b2fabaebb0ec0cafc0d3e3 (diff) |
KVM: MMU: make direct mapping paths aware of mapping levels
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 83 |
1 files changed, 49 insertions, 34 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c707936b2414..110c224ed1fb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -257,7 +257,7 @@ static int is_last_spte(u64 pte, int level) | |||
257 | { | 257 | { |
258 | if (level == PT_PAGE_TABLE_LEVEL) | 258 | if (level == PT_PAGE_TABLE_LEVEL) |
259 | return 1; | 259 | return 1; |
260 | if (level == PT_DIRECTORY_LEVEL && is_large_pte(pte)) | 260 | if (is_large_pte(pte)) |
261 | return 1; | 261 | return 1; |
262 | return 0; | 262 | return 0; |
263 | } | 263 | } |
@@ -753,7 +753,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | |||
753 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 753 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, |
754 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | 754 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) |
755 | { | 755 | { |
756 | int i; | 756 | int i, j; |
757 | int retval = 0; | 757 | int retval = 0; |
758 | 758 | ||
759 | /* | 759 | /* |
@@ -772,11 +772,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
772 | end = start + (memslot->npages << PAGE_SHIFT); | 772 | end = start + (memslot->npages << PAGE_SHIFT); |
773 | if (hva >= start && hva < end) { | 773 | if (hva >= start && hva < end) { |
774 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 774 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
775 | int idx = gfn_offset / | 775 | |
776 | KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL); | ||
777 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | 776 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); |
778 | retval |= handler(kvm, | 777 | |
779 | &memslot->lpage_info[0][idx].rmap_pde); | 778 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
779 | int idx = gfn_offset; | ||
780 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | ||
781 | retval |= handler(kvm, | ||
782 | &memslot->lpage_info[j][idx].rmap_pde); | ||
783 | } | ||
780 | } | 784 | } |
781 | } | 785 | } |
782 | 786 | ||
@@ -814,12 +818,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | |||
814 | 818 | ||
815 | #define RMAP_RECYCLE_THRESHOLD 1000 | 819 | #define RMAP_RECYCLE_THRESHOLD 1000 |
816 | 820 | ||
817 | static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) | 821 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
818 | { | 822 | { |
819 | unsigned long *rmapp; | 823 | unsigned long *rmapp; |
824 | struct kvm_mmu_page *sp; | ||
825 | |||
826 | sp = page_header(__pa(spte)); | ||
820 | 827 | ||
821 | gfn = unalias_gfn(vcpu->kvm, gfn); | 828 | gfn = unalias_gfn(vcpu->kvm, gfn); |
822 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); | 829 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
823 | 830 | ||
824 | kvm_unmap_rmapp(vcpu->kvm, rmapp); | 831 | kvm_unmap_rmapp(vcpu->kvm, rmapp); |
825 | kvm_flush_remote_tlbs(vcpu->kvm); | 832 | kvm_flush_remote_tlbs(vcpu->kvm); |
@@ -1734,7 +1741,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
1734 | 1741 | ||
1735 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 1742 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
1736 | unsigned pte_access, int user_fault, | 1743 | unsigned pte_access, int user_fault, |
1737 | int write_fault, int dirty, int largepage, | 1744 | int write_fault, int dirty, int level, |
1738 | gfn_t gfn, pfn_t pfn, bool speculative, | 1745 | gfn_t gfn, pfn_t pfn, bool speculative, |
1739 | bool can_unsync) | 1746 | bool can_unsync) |
1740 | { | 1747 | { |
@@ -1757,7 +1764,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1757 | spte |= shadow_nx_mask; | 1764 | spte |= shadow_nx_mask; |
1758 | if (pte_access & ACC_USER_MASK) | 1765 | if (pte_access & ACC_USER_MASK) |
1759 | spte |= shadow_user_mask; | 1766 | spte |= shadow_user_mask; |
1760 | if (largepage) | 1767 | if (level > PT_PAGE_TABLE_LEVEL) |
1761 | spte |= PT_PAGE_SIZE_MASK; | 1768 | spte |= PT_PAGE_SIZE_MASK; |
1762 | if (tdp_enabled) | 1769 | if (tdp_enabled) |
1763 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 1770 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
@@ -1768,7 +1775,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1768 | if ((pte_access & ACC_WRITE_MASK) | 1775 | if ((pte_access & ACC_WRITE_MASK) |
1769 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1776 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { |
1770 | 1777 | ||
1771 | if (largepage && has_wrprotected_page(vcpu->kvm, gfn, 1)) { | 1778 | if (level > PT_PAGE_TABLE_LEVEL && |
1779 | has_wrprotected_page(vcpu->kvm, gfn, level)) { | ||
1772 | ret = 1; | 1780 | ret = 1; |
1773 | spte = shadow_trap_nonpresent_pte; | 1781 | spte = shadow_trap_nonpresent_pte; |
1774 | goto set_pte; | 1782 | goto set_pte; |
@@ -1806,7 +1814,7 @@ set_pte: | |||
1806 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 1814 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
1807 | unsigned pt_access, unsigned pte_access, | 1815 | unsigned pt_access, unsigned pte_access, |
1808 | int user_fault, int write_fault, int dirty, | 1816 | int user_fault, int write_fault, int dirty, |
1809 | int *ptwrite, int largepage, gfn_t gfn, | 1817 | int *ptwrite, int level, gfn_t gfn, |
1810 | pfn_t pfn, bool speculative) | 1818 | pfn_t pfn, bool speculative) |
1811 | { | 1819 | { |
1812 | int was_rmapped = 0; | 1820 | int was_rmapped = 0; |
@@ -1823,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1823 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | 1831 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink |
1824 | * the parent of the now unreachable PTE. | 1832 | * the parent of the now unreachable PTE. |
1825 | */ | 1833 | */ |
1826 | if (largepage && !is_large_pte(*sptep)) { | 1834 | if (level > PT_PAGE_TABLE_LEVEL && |
1835 | !is_large_pte(*sptep)) { | ||
1827 | struct kvm_mmu_page *child; | 1836 | struct kvm_mmu_page *child; |
1828 | u64 pte = *sptep; | 1837 | u64 pte = *sptep; |
1829 | 1838 | ||
@@ -1836,8 +1845,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1836 | } else | 1845 | } else |
1837 | was_rmapped = 1; | 1846 | was_rmapped = 1; |
1838 | } | 1847 | } |
1848 | |||
1839 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 1849 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
1840 | dirty, largepage, gfn, pfn, speculative, true)) { | 1850 | dirty, level, gfn, pfn, speculative, true)) { |
1841 | if (write_fault) | 1851 | if (write_fault) |
1842 | *ptwrite = 1; | 1852 | *ptwrite = 1; |
1843 | kvm_x86_ops->tlb_flush(vcpu); | 1853 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1857,7 +1867,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1857 | if (!is_rmap_spte(*sptep)) | 1867 | if (!is_rmap_spte(*sptep)) |
1858 | kvm_release_pfn_clean(pfn); | 1868 | kvm_release_pfn_clean(pfn); |
1859 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1869 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1860 | rmap_recycle(vcpu, gfn, largepage); | 1870 | rmap_recycle(vcpu, sptep, gfn); |
1861 | } else { | 1871 | } else { |
1862 | if (was_writeble) | 1872 | if (was_writeble) |
1863 | kvm_release_pfn_dirty(pfn); | 1873 | kvm_release_pfn_dirty(pfn); |
@@ -1875,7 +1885,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
1875 | } | 1885 | } |
1876 | 1886 | ||
1877 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 1887 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
1878 | int largepage, gfn_t gfn, pfn_t pfn) | 1888 | int level, gfn_t gfn, pfn_t pfn) |
1879 | { | 1889 | { |
1880 | struct kvm_shadow_walk_iterator iterator; | 1890 | struct kvm_shadow_walk_iterator iterator; |
1881 | struct kvm_mmu_page *sp; | 1891 | struct kvm_mmu_page *sp; |
@@ -1883,11 +1893,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1883 | gfn_t pseudo_gfn; | 1893 | gfn_t pseudo_gfn; |
1884 | 1894 | ||
1885 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 1895 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
1886 | if (iterator.level == PT_PAGE_TABLE_LEVEL | 1896 | if (iterator.level == level) { |
1887 | || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { | ||
1888 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 1897 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
1889 | 0, write, 1, &pt_write, | 1898 | 0, write, 1, &pt_write, |
1890 | largepage, gfn, pfn, false); | 1899 | level, gfn, pfn, false); |
1891 | ++vcpu->stat.pf_fixed; | 1900 | ++vcpu->stat.pf_fixed; |
1892 | break; | 1901 | break; |
1893 | } | 1902 | } |
@@ -1915,14 +1924,20 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1915 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 1924 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
1916 | { | 1925 | { |
1917 | int r; | 1926 | int r; |
1918 | int largepage = 0; | 1927 | int level; |
1919 | pfn_t pfn; | 1928 | pfn_t pfn; |
1920 | unsigned long mmu_seq; | 1929 | unsigned long mmu_seq; |
1921 | 1930 | ||
1922 | if (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL) { | 1931 | level = mapping_level(vcpu, gfn); |
1923 | gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1); | 1932 | |
1924 | largepage = 1; | 1933 | /* |
1925 | } | 1934 | * This path builds a PAE pagetable - so we can map 2mb pages at |
1935 | * maximum. Therefore check if the level is larger than that. | ||
1936 | */ | ||
1937 | if (level > PT_DIRECTORY_LEVEL) | ||
1938 | level = PT_DIRECTORY_LEVEL; | ||
1939 | |||
1940 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | ||
1926 | 1941 | ||
1927 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 1942 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
1928 | smp_rmb(); | 1943 | smp_rmb(); |
@@ -1938,7 +1953,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1938 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 1953 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
1939 | goto out_unlock; | 1954 | goto out_unlock; |
1940 | kvm_mmu_free_some_pages(vcpu); | 1955 | kvm_mmu_free_some_pages(vcpu); |
1941 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn); | 1956 | r = __direct_map(vcpu, v, write, level, gfn, pfn); |
1942 | spin_unlock(&vcpu->kvm->mmu_lock); | 1957 | spin_unlock(&vcpu->kvm->mmu_lock); |
1943 | 1958 | ||
1944 | 1959 | ||
@@ -2114,7 +2129,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2114 | { | 2129 | { |
2115 | pfn_t pfn; | 2130 | pfn_t pfn; |
2116 | int r; | 2131 | int r; |
2117 | int largepage = 0; | 2132 | int level; |
2118 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2133 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2119 | unsigned long mmu_seq; | 2134 | unsigned long mmu_seq; |
2120 | 2135 | ||
@@ -2125,10 +2140,10 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2125 | if (r) | 2140 | if (r) |
2126 | return r; | 2141 | return r; |
2127 | 2142 | ||
2128 | if (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL) { | 2143 | level = mapping_level(vcpu, gfn); |
2129 | gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1); | 2144 | |
2130 | largepage = 1; | 2145 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2131 | } | 2146 | |
2132 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2147 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2133 | smp_rmb(); | 2148 | smp_rmb(); |
2134 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2149 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
@@ -2141,7 +2156,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2141 | goto out_unlock; | 2156 | goto out_unlock; |
2142 | kvm_mmu_free_some_pages(vcpu); | 2157 | kvm_mmu_free_some_pages(vcpu); |
2143 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 2158 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
2144 | largepage, gfn, pfn); | 2159 | level, gfn, pfn); |
2145 | spin_unlock(&vcpu->kvm->mmu_lock); | 2160 | spin_unlock(&vcpu->kvm->mmu_lock); |
2146 | 2161 | ||
2147 | return r; | 2162 | return r; |
@@ -2448,7 +2463,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2448 | const void *new) | 2463 | const void *new) |
2449 | { | 2464 | { |
2450 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 2465 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
2451 | if (!vcpu->arch.update_pte.largepage || | 2466 | if (vcpu->arch.update_pte.level == PT_PAGE_TABLE_LEVEL || |
2452 | sp->role.glevels == PT32_ROOT_LEVEL) { | 2467 | sp->role.glevels == PT32_ROOT_LEVEL) { |
2453 | ++vcpu->kvm->stat.mmu_pde_zapped; | 2468 | ++vcpu->kvm->stat.mmu_pde_zapped; |
2454 | return; | 2469 | return; |
@@ -2498,7 +2513,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2498 | u64 gpte = 0; | 2513 | u64 gpte = 0; |
2499 | pfn_t pfn; | 2514 | pfn_t pfn; |
2500 | 2515 | ||
2501 | vcpu->arch.update_pte.largepage = 0; | 2516 | vcpu->arch.update_pte.level = PT_PAGE_TABLE_LEVEL; |
2502 | 2517 | ||
2503 | if (bytes != 4 && bytes != 8) | 2518 | if (bytes != 4 && bytes != 8) |
2504 | return; | 2519 | return; |
@@ -2530,7 +2545,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2530 | if (is_large_pte(gpte) && | 2545 | if (is_large_pte(gpte) && |
2531 | (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) { | 2546 | (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) { |
2532 | gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1); | 2547 | gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1); |
2533 | vcpu->arch.update_pte.largepage = 1; | 2548 | vcpu->arch.update_pte.level = PT_DIRECTORY_LEVEL; |
2534 | } | 2549 | } |
2535 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2550 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2536 | smp_rmb(); | 2551 | smp_rmb(); |