diff options
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r-- | drivers/kvm/mmu.c | 89 |
1 files changed, 68 insertions, 21 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index feb5ac986c5d..069ce83f018e 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -156,6 +156,16 @@ static struct kmem_cache *pte_chain_cache; | |||
156 | static struct kmem_cache *rmap_desc_cache; | 156 | static struct kmem_cache *rmap_desc_cache; |
157 | static struct kmem_cache *mmu_page_header_cache; | 157 | static struct kmem_cache *mmu_page_header_cache; |
158 | 158 | ||
159 | static u64 __read_mostly shadow_trap_nonpresent_pte; | ||
160 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | ||
161 | |||
162 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | ||
163 | { | ||
164 | shadow_trap_nonpresent_pte = trap_pte; | ||
165 | shadow_notrap_nonpresent_pte = notrap_pte; | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | ||
168 | |||
159 | static int is_write_protection(struct kvm_vcpu *vcpu) | 169 | static int is_write_protection(struct kvm_vcpu *vcpu) |
160 | { | 170 | { |
161 | return vcpu->cr0 & X86_CR0_WP; | 171 | return vcpu->cr0 & X86_CR0_WP; |
@@ -176,6 +186,13 @@ static int is_present_pte(unsigned long pte) | |||
176 | return pte & PT_PRESENT_MASK; | 186 | return pte & PT_PRESENT_MASK; |
177 | } | 187 | } |
178 | 188 | ||
189 | static int is_shadow_present_pte(u64 pte) | ||
190 | { | ||
191 | pte &= ~PT_SHADOW_IO_MARK; | ||
192 | return pte != shadow_trap_nonpresent_pte | ||
193 | && pte != shadow_notrap_nonpresent_pte; | ||
194 | } | ||
195 | |||
179 | static int is_writeble_pte(unsigned long pte) | 196 | static int is_writeble_pte(unsigned long pte) |
180 | { | 197 | { |
181 | return pte & PT_WRITABLE_MASK; | 198 | return pte & PT_WRITABLE_MASK; |
@@ -450,7 +467,7 @@ static int is_empty_shadow_page(u64 *spt) | |||
450 | u64 *end; | 467 | u64 *end; |
451 | 468 | ||
452 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | 469 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
453 | if (*pos != 0) { | 470 | if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) { |
454 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | 471 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, |
455 | pos, *pos); | 472 | pos, *pos); |
456 | return 0; | 473 | return 0; |
@@ -632,6 +649,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
632 | page->gfn = gfn; | 649 | page->gfn = gfn; |
633 | page->role = role; | 650 | page->role = role; |
634 | hlist_add_head(&page->hash_link, bucket); | 651 | hlist_add_head(&page->hash_link, bucket); |
652 | vcpu->mmu.prefetch_page(vcpu, page); | ||
635 | if (!metaphysical) | 653 | if (!metaphysical) |
636 | rmap_write_protect(vcpu, gfn); | 654 | rmap_write_protect(vcpu, gfn); |
637 | return page; | 655 | return page; |
@@ -648,9 +666,9 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
648 | 666 | ||
649 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { | 667 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { |
650 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 668 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
651 | if (pt[i] & PT_PRESENT_MASK) | 669 | if (is_shadow_present_pte(pt[i])) |
652 | rmap_remove(&pt[i]); | 670 | rmap_remove(&pt[i]); |
653 | pt[i] = 0; | 671 | pt[i] = shadow_trap_nonpresent_pte; |
654 | } | 672 | } |
655 | kvm_flush_remote_tlbs(kvm); | 673 | kvm_flush_remote_tlbs(kvm); |
656 | return; | 674 | return; |
@@ -659,8 +677,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
659 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 677 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
660 | ent = pt[i]; | 678 | ent = pt[i]; |
661 | 679 | ||
662 | pt[i] = 0; | 680 | pt[i] = shadow_trap_nonpresent_pte; |
663 | if (!(ent & PT_PRESENT_MASK)) | 681 | if (!is_shadow_present_pte(ent)) |
664 | continue; | 682 | continue; |
665 | ent &= PT64_BASE_ADDR_MASK; | 683 | ent &= PT64_BASE_ADDR_MASK; |
666 | mmu_page_remove_parent_pte(page_header(ent), &pt[i]); | 684 | mmu_page_remove_parent_pte(page_header(ent), &pt[i]); |
@@ -691,7 +709,7 @@ static void kvm_mmu_zap_page(struct kvm *kvm, | |||
691 | } | 709 | } |
692 | BUG_ON(!parent_pte); | 710 | BUG_ON(!parent_pte); |
693 | kvm_mmu_put_page(page, parent_pte); | 711 | kvm_mmu_put_page(page, parent_pte); |
694 | set_shadow_pte(parent_pte, 0); | 712 | set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte); |
695 | } | 713 | } |
696 | kvm_mmu_page_unlink_children(kvm, page); | 714 | kvm_mmu_page_unlink_children(kvm, page); |
697 | if (!page->root_count) { | 715 | if (!page->root_count) { |
@@ -798,7 +816,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
798 | 816 | ||
799 | if (level == 1) { | 817 | if (level == 1) { |
800 | pte = table[index]; | 818 | pte = table[index]; |
801 | if (is_present_pte(pte) && is_writeble_pte(pte)) | 819 | if (is_shadow_present_pte(pte) && is_writeble_pte(pte)) |
802 | return 0; | 820 | return 0; |
803 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); | 821 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); |
804 | page_header_update_slot(vcpu->kvm, table, v); | 822 | page_header_update_slot(vcpu->kvm, table, v); |
@@ -808,7 +826,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
808 | return 0; | 826 | return 0; |
809 | } | 827 | } |
810 | 828 | ||
811 | if (table[index] == 0) { | 829 | if (table[index] == shadow_trap_nonpresent_pte) { |
812 | struct kvm_mmu_page *new_table; | 830 | struct kvm_mmu_page *new_table; |
813 | gfn_t pseudo_gfn; | 831 | gfn_t pseudo_gfn; |
814 | 832 | ||
@@ -829,6 +847,15 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
829 | } | 847 | } |
830 | } | 848 | } |
831 | 849 | ||
850 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | ||
851 | struct kvm_mmu_page *sp) | ||
852 | { | ||
853 | int i; | ||
854 | |||
855 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
856 | sp->spt[i] = shadow_trap_nonpresent_pte; | ||
857 | } | ||
858 | |||
832 | static void mmu_free_roots(struct kvm_vcpu *vcpu) | 859 | static void mmu_free_roots(struct kvm_vcpu *vcpu) |
833 | { | 860 | { |
834 | int i; | 861 | int i; |
@@ -943,6 +970,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
943 | context->page_fault = nonpaging_page_fault; | 970 | context->page_fault = nonpaging_page_fault; |
944 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 971 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
945 | context->free = nonpaging_free; | 972 | context->free = nonpaging_free; |
973 | context->prefetch_page = nonpaging_prefetch_page; | ||
946 | context->root_level = 0; | 974 | context->root_level = 0; |
947 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 975 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
948 | context->root_hpa = INVALID_PAGE; | 976 | context->root_hpa = INVALID_PAGE; |
@@ -989,6 +1017,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
989 | context->new_cr3 = paging_new_cr3; | 1017 | context->new_cr3 = paging_new_cr3; |
990 | context->page_fault = paging64_page_fault; | 1018 | context->page_fault = paging64_page_fault; |
991 | context->gva_to_gpa = paging64_gva_to_gpa; | 1019 | context->gva_to_gpa = paging64_gva_to_gpa; |
1020 | context->prefetch_page = paging64_prefetch_page; | ||
992 | context->free = paging_free; | 1021 | context->free = paging_free; |
993 | context->root_level = level; | 1022 | context->root_level = level; |
994 | context->shadow_root_level = level; | 1023 | context->shadow_root_level = level; |
@@ -1009,6 +1038,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
1009 | context->page_fault = paging32_page_fault; | 1038 | context->page_fault = paging32_page_fault; |
1010 | context->gva_to_gpa = paging32_gva_to_gpa; | 1039 | context->gva_to_gpa = paging32_gva_to_gpa; |
1011 | context->free = paging_free; | 1040 | context->free = paging_free; |
1041 | context->prefetch_page = paging32_prefetch_page; | ||
1012 | context->root_level = PT32_ROOT_LEVEL; | 1042 | context->root_level = PT32_ROOT_LEVEL; |
1013 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1043 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
1014 | context->root_hpa = INVALID_PAGE; | 1044 | context->root_hpa = INVALID_PAGE; |
@@ -1081,7 +1111,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1081 | struct kvm_mmu_page *child; | 1111 | struct kvm_mmu_page *child; |
1082 | 1112 | ||
1083 | pte = *spte; | 1113 | pte = *spte; |
1084 | if (is_present_pte(pte)) { | 1114 | if (is_shadow_present_pte(pte)) { |
1085 | if (page->role.level == PT_PAGE_TABLE_LEVEL) | 1115 | if (page->role.level == PT_PAGE_TABLE_LEVEL) |
1086 | rmap_remove(spte); | 1116 | rmap_remove(spte); |
1087 | else { | 1117 | else { |
@@ -1089,22 +1119,25 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1089 | mmu_page_remove_parent_pte(child, spte); | 1119 | mmu_page_remove_parent_pte(child, spte); |
1090 | } | 1120 | } |
1091 | } | 1121 | } |
1092 | set_shadow_pte(spte, 0); | 1122 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); |
1093 | kvm_flush_remote_tlbs(vcpu->kvm); | 1123 | kvm_flush_remote_tlbs(vcpu->kvm); |
1094 | } | 1124 | } |
1095 | 1125 | ||
1096 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 1126 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
1097 | struct kvm_mmu_page *page, | 1127 | struct kvm_mmu_page *page, |
1098 | u64 *spte, | 1128 | u64 *spte, |
1099 | const void *new, int bytes) | 1129 | const void *new, int bytes, |
1130 | int offset_in_pte) | ||
1100 | { | 1131 | { |
1101 | if (page->role.level != PT_PAGE_TABLE_LEVEL) | 1132 | if (page->role.level != PT_PAGE_TABLE_LEVEL) |
1102 | return; | 1133 | return; |
1103 | 1134 | ||
1104 | if (page->role.glevels == PT32_ROOT_LEVEL) | 1135 | if (page->role.glevels == PT32_ROOT_LEVEL) |
1105 | paging32_update_pte(vcpu, page, spte, new, bytes); | 1136 | paging32_update_pte(vcpu, page, spte, new, bytes, |
1137 | offset_in_pte); | ||
1106 | else | 1138 | else |
1107 | paging64_update_pte(vcpu, page, spte, new, bytes); | 1139 | paging64_update_pte(vcpu, page, spte, new, bytes, |
1140 | offset_in_pte); | ||
1108 | } | 1141 | } |
1109 | 1142 | ||
1110 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1143 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -1126,6 +1159,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1126 | int npte; | 1159 | int npte; |
1127 | 1160 | ||
1128 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | 1161 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); |
1162 | kvm_mmu_audit(vcpu, "pre pte write"); | ||
1129 | if (gfn == vcpu->last_pt_write_gfn) { | 1163 | if (gfn == vcpu->last_pt_write_gfn) { |
1130 | ++vcpu->last_pt_write_count; | 1164 | ++vcpu->last_pt_write_count; |
1131 | if (vcpu->last_pt_write_count >= 3) | 1165 | if (vcpu->last_pt_write_count >= 3) |
@@ -1181,10 +1215,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1181 | spte = &page->spt[page_offset / sizeof(*spte)]; | 1215 | spte = &page->spt[page_offset / sizeof(*spte)]; |
1182 | while (npte--) { | 1216 | while (npte--) { |
1183 | mmu_pte_write_zap_pte(vcpu, page, spte); | 1217 | mmu_pte_write_zap_pte(vcpu, page, spte); |
1184 | mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); | 1218 | mmu_pte_write_new_pte(vcpu, page, spte, new, bytes, |
1219 | page_offset & (pte_size - 1)); | ||
1185 | ++spte; | 1220 | ++spte; |
1186 | } | 1221 | } |
1187 | } | 1222 | } |
1223 | kvm_mmu_audit(vcpu, "post pte write"); | ||
1188 | } | 1224 | } |
1189 | 1225 | ||
1190 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 1226 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -1359,22 +1395,33 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
1359 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { | 1395 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { |
1360 | u64 ent = pt[i]; | 1396 | u64 ent = pt[i]; |
1361 | 1397 | ||
1362 | if (!(ent & PT_PRESENT_MASK)) | 1398 | if (ent == shadow_trap_nonpresent_pte) |
1363 | continue; | 1399 | continue; |
1364 | 1400 | ||
1365 | va = canonicalize(va); | 1401 | va = canonicalize(va); |
1366 | if (level > 1) | 1402 | if (level > 1) { |
1403 | if (ent == shadow_notrap_nonpresent_pte) | ||
1404 | printk(KERN_ERR "audit: (%s) nontrapping pte" | ||
1405 | " in nonleaf level: levels %d gva %lx" | ||
1406 | " level %d pte %llx\n", audit_msg, | ||
1407 | vcpu->mmu.root_level, va, level, ent); | ||
1408 | |||
1367 | audit_mappings_page(vcpu, ent, va, level - 1); | 1409 | audit_mappings_page(vcpu, ent, va, level - 1); |
1368 | else { | 1410 | } else { |
1369 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); | 1411 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); |
1370 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); | 1412 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); |
1371 | 1413 | ||
1372 | if ((ent & PT_PRESENT_MASK) | 1414 | if (is_shadow_present_pte(ent) |
1373 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 1415 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
1374 | printk(KERN_ERR "audit error: (%s) levels %d" | 1416 | printk(KERN_ERR "xx audit error: (%s) levels %d" |
1375 | " gva %lx gpa %llx hpa %llx ent %llx\n", | 1417 | " gva %lx gpa %llx hpa %llx ent %llx %d\n", |
1376 | audit_msg, vcpu->mmu.root_level, | 1418 | audit_msg, vcpu->mmu.root_level, |
1377 | va, gpa, hpa, ent); | 1419 | va, gpa, hpa, ent, is_shadow_present_pte(ent)); |
1420 | else if (ent == shadow_notrap_nonpresent_pte | ||
1421 | && !is_error_hpa(hpa)) | ||
1422 | printk(KERN_ERR "audit: (%s) notrap shadow," | ||
1423 | " valid guest gva %lx\n", audit_msg, va); | ||
1424 | |||
1378 | } | 1425 | } |
1379 | } | 1426 | } |
1380 | } | 1427 | } |