diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 152 |
1 files changed, 73 insertions, 79 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 818b92ad82cf..19a8906bcaa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | #include "x86.h" | ||
21 | #include "kvm_cache_regs.h" | 22 | #include "kvm_cache_regs.h" |
22 | 23 | ||
23 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
@@ -29,6 +30,8 @@ | |||
29 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
30 | #include <linux/hugetlb.h> | 31 | #include <linux/hugetlb.h> |
31 | #include <linux/compiler.h> | 32 | #include <linux/compiler.h> |
33 | #include <linux/srcu.h> | ||
34 | #include <linux/slab.h> | ||
32 | 35 | ||
33 | #include <asm/page.h> | 36 | #include <asm/page.h> |
34 | #include <asm/cmpxchg.h> | 37 | #include <asm/cmpxchg.h> |
@@ -136,16 +139,6 @@ module_param(oos_shadow, bool, 0644); | |||
136 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 139 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
137 | | PT64_NX_MASK) | 140 | | PT64_NX_MASK) |
138 | 141 | ||
139 | #define PFERR_PRESENT_MASK (1U << 0) | ||
140 | #define PFERR_WRITE_MASK (1U << 1) | ||
141 | #define PFERR_USER_MASK (1U << 2) | ||
142 | #define PFERR_RSVD_MASK (1U << 3) | ||
143 | #define PFERR_FETCH_MASK (1U << 4) | ||
144 | |||
145 | #define PT_PDPE_LEVEL 3 | ||
146 | #define PT_DIRECTORY_LEVEL 2 | ||
147 | #define PT_PAGE_TABLE_LEVEL 1 | ||
148 | |||
149 | #define RMAP_EXT 4 | 142 | #define RMAP_EXT 4 |
150 | 143 | ||
151 | #define ACC_EXEC_MASK 1 | 144 | #define ACC_EXEC_MASK 1 |
@@ -153,6 +146,9 @@ module_param(oos_shadow, bool, 0644); | |||
153 | #define ACC_USER_MASK PT_USER_MASK | 146 | #define ACC_USER_MASK PT_USER_MASK |
154 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 147 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
155 | 148 | ||
149 | #include <trace/events/kvm.h> | ||
150 | |||
151 | #undef TRACE_INCLUDE_FILE | ||
156 | #define CREATE_TRACE_POINTS | 152 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 153 | #include "mmutrace.h" |
158 | 154 | ||
@@ -229,7 +225,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | |||
229 | 225 | ||
230 | static int is_write_protection(struct kvm_vcpu *vcpu) | 226 | static int is_write_protection(struct kvm_vcpu *vcpu) |
231 | { | 227 | { |
232 | return vcpu->arch.cr0 & X86_CR0_WP; | 228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
233 | } | 229 | } |
234 | 230 | ||
235 | static int is_cpuid_PSE36(void) | 231 | static int is_cpuid_PSE36(void) |
@@ -239,7 +235,7 @@ static int is_cpuid_PSE36(void) | |||
239 | 235 | ||
240 | static int is_nx(struct kvm_vcpu *vcpu) | 236 | static int is_nx(struct kvm_vcpu *vcpu) |
241 | { | 237 | { |
242 | return vcpu->arch.shadow_efer & EFER_NX; | 238 | return vcpu->arch.efer & EFER_NX; |
243 | } | 239 | } |
244 | 240 | ||
245 | static int is_shadow_present_pte(u64 pte) | 241 | static int is_shadow_present_pte(u64 pte) |
@@ -253,7 +249,7 @@ static int is_large_pte(u64 pte) | |||
253 | return pte & PT_PAGE_SIZE_MASK; | 249 | return pte & PT_PAGE_SIZE_MASK; |
254 | } | 250 | } |
255 | 251 | ||
256 | static int is_writeble_pte(unsigned long pte) | 252 | static int is_writable_pte(unsigned long pte) |
257 | { | 253 | { |
258 | return pte & PT_WRITABLE_MASK; | 254 | return pte & PT_WRITABLE_MASK; |
259 | } | 255 | } |
@@ -470,24 +466,10 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
470 | 466 | ||
471 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 467 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
472 | { | 468 | { |
473 | unsigned long page_size = PAGE_SIZE; | 469 | unsigned long page_size; |
474 | struct vm_area_struct *vma; | ||
475 | unsigned long addr; | ||
476 | int i, ret = 0; | 470 | int i, ret = 0; |
477 | 471 | ||
478 | addr = gfn_to_hva(kvm, gfn); | 472 | page_size = kvm_host_page_size(kvm, gfn); |
479 | if (kvm_is_error_hva(addr)) | ||
480 | return page_size; | ||
481 | |||
482 | down_read(¤t->mm->mmap_sem); | ||
483 | vma = find_vma(current->mm, addr); | ||
484 | if (!vma) | ||
485 | goto out; | ||
486 | |||
487 | page_size = vma_kernel_pagesize(vma); | ||
488 | |||
489 | out: | ||
490 | up_read(¤t->mm->mmap_sem); | ||
491 | 473 | ||
492 | for (i = PT_PAGE_TABLE_LEVEL; | 474 | for (i = PT_PAGE_TABLE_LEVEL; |
493 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { | 475 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { |
@@ -503,8 +485,7 @@ out: | |||
503 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 485 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
504 | { | 486 | { |
505 | struct kvm_memory_slot *slot; | 487 | struct kvm_memory_slot *slot; |
506 | int host_level; | 488 | int host_level, level, max_level; |
507 | int level = PT_PAGE_TABLE_LEVEL; | ||
508 | 489 | ||
509 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 490 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
510 | if (slot && slot->dirty_bitmap) | 491 | if (slot && slot->dirty_bitmap) |
@@ -515,11 +496,12 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
515 | if (host_level == PT_PAGE_TABLE_LEVEL) | 496 | if (host_level == PT_PAGE_TABLE_LEVEL) |
516 | return host_level; | 497 | return host_level; |
517 | 498 | ||
518 | for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { | 499 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? |
500 | kvm_x86_ops->get_lpage_level() : host_level; | ||
519 | 501 | ||
502 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | ||
520 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 503 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) |
521 | break; | 504 | break; |
522 | } | ||
523 | 505 | ||
524 | return level - 1; | 506 | return level - 1; |
525 | } | 507 | } |
@@ -635,7 +617,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
635 | pfn = spte_to_pfn(*spte); | 617 | pfn = spte_to_pfn(*spte); |
636 | if (*spte & shadow_accessed_mask) | 618 | if (*spte & shadow_accessed_mask) |
637 | kvm_set_pfn_accessed(pfn); | 619 | kvm_set_pfn_accessed(pfn); |
638 | if (is_writeble_pte(*spte)) | 620 | if (is_writable_pte(*spte)) |
639 | kvm_set_pfn_dirty(pfn); | 621 | kvm_set_pfn_dirty(pfn); |
640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 622 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
641 | if (!*rmapp) { | 623 | if (!*rmapp) { |
@@ -664,6 +646,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
664 | prev_desc = desc; | 646 | prev_desc = desc; |
665 | desc = desc->more; | 647 | desc = desc->more; |
666 | } | 648 | } |
649 | pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); | ||
667 | BUG(); | 650 | BUG(); |
668 | } | 651 | } |
669 | } | 652 | } |
@@ -710,7 +693,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
710 | BUG_ON(!spte); | 693 | BUG_ON(!spte); |
711 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 694 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
712 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 695 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
713 | if (is_writeble_pte(*spte)) { | 696 | if (is_writable_pte(*spte)) { |
714 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); | 697 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); |
715 | write_protected = 1; | 698 | write_protected = 1; |
716 | } | 699 | } |
@@ -734,7 +717,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
734 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 717 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
735 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 718 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
736 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 719 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
737 | if (is_writeble_pte(*spte)) { | 720 | if (is_writable_pte(*spte)) { |
738 | rmap_remove(kvm, spte); | 721 | rmap_remove(kvm, spte); |
739 | --kvm->stat.lpages; | 722 | --kvm->stat.lpages; |
740 | __set_spte(spte, shadow_trap_nonpresent_pte); | 723 | __set_spte(spte, shadow_trap_nonpresent_pte); |
@@ -789,7 +772,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
789 | 772 | ||
790 | new_spte &= ~PT_WRITABLE_MASK; | 773 | new_spte &= ~PT_WRITABLE_MASK; |
791 | new_spte &= ~SPTE_HOST_WRITEABLE; | 774 | new_spte &= ~SPTE_HOST_WRITEABLE; |
792 | if (is_writeble_pte(*spte)) | 775 | if (is_writable_pte(*spte)) |
793 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 776 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); |
794 | __set_spte(spte, new_spte); | 777 | __set_spte(spte, new_spte); |
795 | spte = rmap_next(kvm, rmapp, spte); | 778 | spte = rmap_next(kvm, rmapp, spte); |
@@ -807,35 +790,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
807 | unsigned long data)) | 790 | unsigned long data)) |
808 | { | 791 | { |
809 | int i, j; | 792 | int i, j; |
793 | int ret; | ||
810 | int retval = 0; | 794 | int retval = 0; |
795 | struct kvm_memslots *slots; | ||
811 | 796 | ||
812 | /* | 797 | slots = rcu_dereference(kvm->memslots); |
813 | * If mmap_sem isn't taken, we can look the memslots with only | 798 | |
814 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | 799 | for (i = 0; i < slots->nmemslots; i++) { |
815 | */ | 800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
816 | for (i = 0; i < kvm->nmemslots; i++) { | ||
817 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
818 | unsigned long start = memslot->userspace_addr; | 801 | unsigned long start = memslot->userspace_addr; |
819 | unsigned long end; | 802 | unsigned long end; |
820 | 803 | ||
821 | /* mmu_lock protects userspace_addr */ | ||
822 | if (!start) | ||
823 | continue; | ||
824 | |||
825 | end = start + (memslot->npages << PAGE_SHIFT); | 804 | end = start + (memslot->npages << PAGE_SHIFT); |
826 | if (hva >= start && hva < end) { | 805 | if (hva >= start && hva < end) { |
827 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 806 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
828 | 807 | ||
829 | retval |= handler(kvm, &memslot->rmap[gfn_offset], | 808 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
830 | data); | ||
831 | 809 | ||
832 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 810 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
833 | int idx = gfn_offset; | 811 | int idx = gfn_offset; |
834 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 812 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
835 | retval |= handler(kvm, | 813 | ret |= handler(kvm, |
836 | &memslot->lpage_info[j][idx].rmap_pde, | 814 | &memslot->lpage_info[j][idx].rmap_pde, |
837 | data); | 815 | data); |
838 | } | 816 | } |
817 | trace_kvm_age_page(hva, memslot, ret); | ||
818 | retval |= ret; | ||
839 | } | 819 | } |
840 | } | 820 | } |
841 | 821 | ||
@@ -858,9 +838,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
858 | u64 *spte; | 838 | u64 *spte; |
859 | int young = 0; | 839 | int young = 0; |
860 | 840 | ||
861 | /* always return old for EPT */ | 841 | /* |
842 | * Emulate the accessed bit for EPT, by checking if this page has | ||
843 | * an EPT mapping, and clearing it if it does. On the next access, | ||
844 | * a new EPT mapping will be established. | ||
845 | * This has some overhead, but not as much as the cost of swapping | ||
846 | * out actively used pages or breaking up actively used hugepages. | ||
847 | */ | ||
862 | if (!shadow_accessed_mask) | 848 | if (!shadow_accessed_mask) |
863 | return 0; | 849 | return kvm_unmap_rmapp(kvm, rmapp, data); |
864 | 850 | ||
865 | spte = rmap_next(kvm, rmapp, NULL); | 851 | spte = rmap_next(kvm, rmapp, NULL); |
866 | while (spte) { | 852 | while (spte) { |
@@ -1504,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1504 | for_each_sp(pages, sp, parents, i) { | 1490 | for_each_sp(pages, sp, parents, i) { |
1505 | kvm_mmu_zap_page(kvm, sp); | 1491 | kvm_mmu_zap_page(kvm, sp); |
1506 | mmu_pages_clear_parents(&parents); | 1492 | mmu_pages_clear_parents(&parents); |
1493 | zapped++; | ||
1507 | } | 1494 | } |
1508 | zapped += pages.nr; | ||
1509 | kvm_mmu_pages_init(parent, &parents, &pages); | 1495 | kvm_mmu_pages_init(parent, &parents, &pages); |
1510 | } | 1496 | } |
1511 | 1497 | ||
@@ -1556,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1556 | */ | 1542 | */ |
1557 | 1543 | ||
1558 | if (used_pages > kvm_nr_mmu_pages) { | 1544 | if (used_pages > kvm_nr_mmu_pages) { |
1559 | while (used_pages > kvm_nr_mmu_pages) { | 1545 | while (used_pages > kvm_nr_mmu_pages && |
1546 | !list_empty(&kvm->arch.active_mmu_pages)) { | ||
1560 | struct kvm_mmu_page *page; | 1547 | struct kvm_mmu_page *page; |
1561 | 1548 | ||
1562 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1549 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1563 | struct kvm_mmu_page, link); | 1550 | struct kvm_mmu_page, link); |
1564 | kvm_mmu_zap_page(kvm, page); | 1551 | used_pages -= kvm_mmu_zap_page(kvm, page); |
1565 | used_pages--; | 1552 | used_pages--; |
1566 | } | 1553 | } |
1554 | kvm_nr_mmu_pages = used_pages; | ||
1567 | kvm->arch.n_free_mmu_pages = 0; | 1555 | kvm->arch.n_free_mmu_pages = 0; |
1568 | } | 1556 | } |
1569 | else | 1557 | else |
@@ -1610,14 +1598,15 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1610 | && !sp->role.invalid) { | 1598 | && !sp->role.invalid) { |
1611 | pgprintk("%s: zap %lx %x\n", | 1599 | pgprintk("%s: zap %lx %x\n", |
1612 | __func__, gfn, sp->role.word); | 1600 | __func__, gfn, sp->role.word); |
1613 | kvm_mmu_zap_page(kvm, sp); | 1601 | if (kvm_mmu_zap_page(kvm, sp)) |
1602 | nn = bucket->first; | ||
1614 | } | 1603 | } |
1615 | } | 1604 | } |
1616 | } | 1605 | } |
1617 | 1606 | ||
1618 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1607 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
1619 | { | 1608 | { |
1620 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); | 1609 | int slot = memslot_id(kvm, gfn); |
1621 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1610 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1622 | 1611 | ||
1623 | __set_bit(slot, sp->slot_bitmap); | 1612 | __set_bit(slot, sp->slot_bitmap); |
@@ -1641,7 +1630,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
1641 | { | 1630 | { |
1642 | struct page *page; | 1631 | struct page *page; |
1643 | 1632 | ||
1644 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
1645 | 1634 | ||
1646 | if (gpa == UNMAPPED_GVA) | 1635 | if (gpa == UNMAPPED_GVA) |
1647 | return NULL; | 1636 | return NULL; |
@@ -1854,7 +1843,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1854 | * is responsibility of mmu_get_page / kvm_sync_page. | 1843 | * is responsibility of mmu_get_page / kvm_sync_page. |
1855 | * Same reasoning can be applied to dirty page accounting. | 1844 | * Same reasoning can be applied to dirty page accounting. |
1856 | */ | 1845 | */ |
1857 | if (!can_unsync && is_writeble_pte(*sptep)) | 1846 | if (!can_unsync && is_writable_pte(*sptep)) |
1858 | goto set_pte; | 1847 | goto set_pte; |
1859 | 1848 | ||
1860 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 1849 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
@@ -1862,7 +1851,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1862 | __func__, gfn); | 1851 | __func__, gfn); |
1863 | ret = 1; | 1852 | ret = 1; |
1864 | pte_access &= ~ACC_WRITE_MASK; | 1853 | pte_access &= ~ACC_WRITE_MASK; |
1865 | if (is_writeble_pte(spte)) | 1854 | if (is_writable_pte(spte)) |
1866 | spte &= ~PT_WRITABLE_MASK; | 1855 | spte &= ~PT_WRITABLE_MASK; |
1867 | } | 1856 | } |
1868 | } | 1857 | } |
@@ -1883,7 +1872,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1883 | bool reset_host_protection) | 1872 | bool reset_host_protection) |
1884 | { | 1873 | { |
1885 | int was_rmapped = 0; | 1874 | int was_rmapped = 0; |
1886 | int was_writeble = is_writeble_pte(*sptep); | 1875 | int was_writable = is_writable_pte(*sptep); |
1887 | int rmap_count; | 1876 | int rmap_count; |
1888 | 1877 | ||
1889 | pgprintk("%s: spte %llx access %x write_fault %d" | 1878 | pgprintk("%s: spte %llx access %x write_fault %d" |
@@ -1934,7 +1923,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1934 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1923 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1935 | rmap_recycle(vcpu, sptep, gfn); | 1924 | rmap_recycle(vcpu, sptep, gfn); |
1936 | } else { | 1925 | } else { |
1937 | if (was_writeble) | 1926 | if (was_writable) |
1938 | kvm_release_pfn_dirty(pfn); | 1927 | kvm_release_pfn_dirty(pfn); |
1939 | else | 1928 | else |
1940 | kvm_release_pfn_clean(pfn); | 1929 | kvm_release_pfn_clean(pfn); |
@@ -2164,8 +2153,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2164 | spin_unlock(&vcpu->kvm->mmu_lock); | 2153 | spin_unlock(&vcpu->kvm->mmu_lock); |
2165 | } | 2154 | } |
2166 | 2155 | ||
2167 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2156 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2157 | u32 access, u32 *error) | ||
2168 | { | 2158 | { |
2159 | if (error) | ||
2160 | *error = 0; | ||
2169 | return vaddr; | 2161 | return vaddr; |
2170 | } | 2162 | } |
2171 | 2163 | ||
@@ -2749,7 +2741,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
2749 | if (tdp_enabled) | 2741 | if (tdp_enabled) |
2750 | return 0; | 2742 | return 0; |
2751 | 2743 | ||
2752 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 2744 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
2753 | 2745 | ||
2754 | spin_lock(&vcpu->kvm->mmu_lock); | 2746 | spin_lock(&vcpu->kvm->mmu_lock); |
2755 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 2747 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
@@ -2789,7 +2781,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2789 | if (r) | 2781 | if (r) |
2790 | goto out; | 2782 | goto out; |
2791 | 2783 | ||
2792 | er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); | 2784 | er = emulate_instruction(vcpu, cr2, error_code, 0); |
2793 | 2785 | ||
2794 | switch (er) { | 2786 | switch (er) { |
2795 | case EMULATE_DONE: | 2787 | case EMULATE_DONE: |
@@ -2800,6 +2792,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2800 | case EMULATE_FAIL: | 2792 | case EMULATE_FAIL: |
2801 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 2793 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2802 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 2794 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
2795 | vcpu->run->internal.ndata = 0; | ||
2803 | return 0; | 2796 | return 0; |
2804 | default: | 2797 | default: |
2805 | BUG(); | 2798 | BUG(); |
@@ -2848,16 +2841,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
2848 | */ | 2841 | */ |
2849 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | 2842 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); |
2850 | if (!page) | 2843 | if (!page) |
2851 | goto error_1; | 2844 | return -ENOMEM; |
2845 | |||
2852 | vcpu->arch.mmu.pae_root = page_address(page); | 2846 | vcpu->arch.mmu.pae_root = page_address(page); |
2853 | for (i = 0; i < 4; ++i) | 2847 | for (i = 0; i < 4; ++i) |
2854 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2848 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2855 | 2849 | ||
2856 | return 0; | 2850 | return 0; |
2857 | |||
2858 | error_1: | ||
2859 | free_mmu_pages(vcpu); | ||
2860 | return -ENOMEM; | ||
2861 | } | 2851 | } |
2862 | 2852 | ||
2863 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 2853 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
@@ -2937,10 +2927,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2937 | spin_lock(&kvm_lock); | 2927 | spin_lock(&kvm_lock); |
2938 | 2928 | ||
2939 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2929 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2940 | int npages; | 2930 | int npages, idx; |
2941 | 2931 | ||
2942 | if (!down_read_trylock(&kvm->slots_lock)) | 2932 | idx = srcu_read_lock(&kvm->srcu); |
2943 | continue; | ||
2944 | spin_lock(&kvm->mmu_lock); | 2933 | spin_lock(&kvm->mmu_lock); |
2945 | npages = kvm->arch.n_alloc_mmu_pages - | 2934 | npages = kvm->arch.n_alloc_mmu_pages - |
2946 | kvm->arch.n_free_mmu_pages; | 2935 | kvm->arch.n_free_mmu_pages; |
@@ -2953,7 +2942,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2953 | nr_to_scan--; | 2942 | nr_to_scan--; |
2954 | 2943 | ||
2955 | spin_unlock(&kvm->mmu_lock); | 2944 | spin_unlock(&kvm->mmu_lock); |
2956 | up_read(&kvm->slots_lock); | 2945 | srcu_read_unlock(&kvm->srcu, idx); |
2957 | } | 2946 | } |
2958 | if (kvm_freed) | 2947 | if (kvm_freed) |
2959 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 2948 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
@@ -3020,9 +3009,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3020 | int i; | 3009 | int i; |
3021 | unsigned int nr_mmu_pages; | 3010 | unsigned int nr_mmu_pages; |
3022 | unsigned int nr_pages = 0; | 3011 | unsigned int nr_pages = 0; |
3012 | struct kvm_memslots *slots; | ||
3023 | 3013 | ||
3024 | for (i = 0; i < kvm->nmemslots; i++) | 3014 | slots = rcu_dereference(kvm->memslots); |
3025 | nr_pages += kvm->memslots[i].npages; | 3015 | for (i = 0; i < slots->nmemslots; i++) |
3016 | nr_pages += slots->memslots[i].npages; | ||
3026 | 3017 | ||
3027 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3018 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3028 | nr_mmu_pages = max(nr_mmu_pages, | 3019 | nr_mmu_pages = max(nr_mmu_pages, |
@@ -3247,7 +3238,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
3247 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) | 3238 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) |
3248 | audit_mappings_page(vcpu, ent, va, level - 1); | 3239 | audit_mappings_page(vcpu, ent, va, level - 1); |
3249 | else { | 3240 | else { |
3250 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 3241 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); |
3251 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3242 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3252 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | 3243 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); |
3253 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | 3244 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; |
@@ -3292,10 +3283,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3292 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3283 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3293 | { | 3284 | { |
3294 | int nmaps = 0; | 3285 | int nmaps = 0; |
3295 | int i, j, k; | 3286 | int i, j, k, idx; |
3296 | 3287 | ||
3288 | idx = srcu_read_lock(&kvm->srcu); | ||
3289 | slots = rcu_dereference(kvm->memslots); | ||
3297 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3298 | struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; | 3291 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3299 | struct kvm_rmap_desc *d; | 3292 | struct kvm_rmap_desc *d; |
3300 | 3293 | ||
3301 | for (j = 0; j < m->npages; ++j) { | 3294 | for (j = 0; j < m->npages; ++j) { |
@@ -3318,6 +3311,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3318 | } | 3311 | } |
3319 | } | 3312 | } |
3320 | } | 3313 | } |
3314 | srcu_read_unlock(&kvm->srcu, idx); | ||
3321 | return nmaps; | 3315 | return nmaps; |
3322 | } | 3316 | } |
3323 | 3317 | ||