diff options
-rw-r--r-- | drivers/kvm/kvm.h | 3 | ||||
-rw-r--r-- | drivers/kvm/kvm_main.c | 4 | ||||
-rw-r--r-- | drivers/kvm/mmu.c | 89 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 52 | ||||
-rw-r--r-- | drivers/kvm/vmx.c | 11 |
5 files changed, 122 insertions, 37 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index e885b190b79..7de948e9e64 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h | |||
@@ -150,6 +150,8 @@ struct kvm_mmu { | |||
150 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); | 150 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); |
151 | void (*free)(struct kvm_vcpu *vcpu); | 151 | void (*free)(struct kvm_vcpu *vcpu); |
152 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); | 152 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); |
153 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | ||
154 | struct kvm_mmu_page *page); | ||
153 | hpa_t root_hpa; | 155 | hpa_t root_hpa; |
154 | int root_level; | 156 | int root_level; |
155 | int shadow_root_level; | 157 | int shadow_root_level; |
@@ -536,6 +538,7 @@ void kvm_mmu_module_exit(void); | |||
536 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | 538 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); |
537 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | 539 | int kvm_mmu_create(struct kvm_vcpu *vcpu); |
538 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); | 540 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); |
541 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); | ||
539 | 542 | ||
540 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 543 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
541 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 544 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 710483669f3..82cc7ae0fc8 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -3501,7 +3501,9 @@ int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size, | |||
3501 | kvm_preempt_ops.sched_in = kvm_sched_in; | 3501 | kvm_preempt_ops.sched_in = kvm_sched_in; |
3502 | kvm_preempt_ops.sched_out = kvm_sched_out; | 3502 | kvm_preempt_ops.sched_out = kvm_sched_out; |
3503 | 3503 | ||
3504 | return r; | 3504 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
3505 | |||
3506 | return 0; | ||
3505 | 3507 | ||
3506 | out_free: | 3508 | out_free: |
3507 | kmem_cache_destroy(kvm_vcpu_cache); | 3509 | kmem_cache_destroy(kvm_vcpu_cache); |
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index feb5ac986c5..069ce83f018 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -156,6 +156,16 @@ static struct kmem_cache *pte_chain_cache; | |||
156 | static struct kmem_cache *rmap_desc_cache; | 156 | static struct kmem_cache *rmap_desc_cache; |
157 | static struct kmem_cache *mmu_page_header_cache; | 157 | static struct kmem_cache *mmu_page_header_cache; |
158 | 158 | ||
159 | static u64 __read_mostly shadow_trap_nonpresent_pte; | ||
160 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | ||
161 | |||
162 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | ||
163 | { | ||
164 | shadow_trap_nonpresent_pte = trap_pte; | ||
165 | shadow_notrap_nonpresent_pte = notrap_pte; | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | ||
168 | |||
159 | static int is_write_protection(struct kvm_vcpu *vcpu) | 169 | static int is_write_protection(struct kvm_vcpu *vcpu) |
160 | { | 170 | { |
161 | return vcpu->cr0 & X86_CR0_WP; | 171 | return vcpu->cr0 & X86_CR0_WP; |
@@ -176,6 +186,13 @@ static int is_present_pte(unsigned long pte) | |||
176 | return pte & PT_PRESENT_MASK; | 186 | return pte & PT_PRESENT_MASK; |
177 | } | 187 | } |
178 | 188 | ||
189 | static int is_shadow_present_pte(u64 pte) | ||
190 | { | ||
191 | pte &= ~PT_SHADOW_IO_MARK; | ||
192 | return pte != shadow_trap_nonpresent_pte | ||
193 | && pte != shadow_notrap_nonpresent_pte; | ||
194 | } | ||
195 | |||
179 | static int is_writeble_pte(unsigned long pte) | 196 | static int is_writeble_pte(unsigned long pte) |
180 | { | 197 | { |
181 | return pte & PT_WRITABLE_MASK; | 198 | return pte & PT_WRITABLE_MASK; |
@@ -450,7 +467,7 @@ static int is_empty_shadow_page(u64 *spt) | |||
450 | u64 *end; | 467 | u64 *end; |
451 | 468 | ||
452 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | 469 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
453 | if (*pos != 0) { | 470 | if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) { |
454 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | 471 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, |
455 | pos, *pos); | 472 | pos, *pos); |
456 | return 0; | 473 | return 0; |
@@ -632,6 +649,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
632 | page->gfn = gfn; | 649 | page->gfn = gfn; |
633 | page->role = role; | 650 | page->role = role; |
634 | hlist_add_head(&page->hash_link, bucket); | 651 | hlist_add_head(&page->hash_link, bucket); |
652 | vcpu->mmu.prefetch_page(vcpu, page); | ||
635 | if (!metaphysical) | 653 | if (!metaphysical) |
636 | rmap_write_protect(vcpu, gfn); | 654 | rmap_write_protect(vcpu, gfn); |
637 | return page; | 655 | return page; |
@@ -648,9 +666,9 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
648 | 666 | ||
649 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { | 667 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { |
650 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 668 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
651 | if (pt[i] & PT_PRESENT_MASK) | 669 | if (is_shadow_present_pte(pt[i])) |
652 | rmap_remove(&pt[i]); | 670 | rmap_remove(&pt[i]); |
653 | pt[i] = 0; | 671 | pt[i] = shadow_trap_nonpresent_pte; |
654 | } | 672 | } |
655 | kvm_flush_remote_tlbs(kvm); | 673 | kvm_flush_remote_tlbs(kvm); |
656 | return; | 674 | return; |
@@ -659,8 +677,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
659 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 677 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
660 | ent = pt[i]; | 678 | ent = pt[i]; |
661 | 679 | ||
662 | pt[i] = 0; | 680 | pt[i] = shadow_trap_nonpresent_pte; |
663 | if (!(ent & PT_PRESENT_MASK)) | 681 | if (!is_shadow_present_pte(ent)) |
664 | continue; | 682 | continue; |
665 | ent &= PT64_BASE_ADDR_MASK; | 683 | ent &= PT64_BASE_ADDR_MASK; |
666 | mmu_page_remove_parent_pte(page_header(ent), &pt[i]); | 684 | mmu_page_remove_parent_pte(page_header(ent), &pt[i]); |
@@ -691,7 +709,7 @@ static void kvm_mmu_zap_page(struct kvm *kvm, | |||
691 | } | 709 | } |
692 | BUG_ON(!parent_pte); | 710 | BUG_ON(!parent_pte); |
693 | kvm_mmu_put_page(page, parent_pte); | 711 | kvm_mmu_put_page(page, parent_pte); |
694 | set_shadow_pte(parent_pte, 0); | 712 | set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte); |
695 | } | 713 | } |
696 | kvm_mmu_page_unlink_children(kvm, page); | 714 | kvm_mmu_page_unlink_children(kvm, page); |
697 | if (!page->root_count) { | 715 | if (!page->root_count) { |
@@ -798,7 +816,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
798 | 816 | ||
799 | if (level == 1) { | 817 | if (level == 1) { |
800 | pte = table[index]; | 818 | pte = table[index]; |
801 | if (is_present_pte(pte) && is_writeble_pte(pte)) | 819 | if (is_shadow_present_pte(pte) && is_writeble_pte(pte)) |
802 | return 0; | 820 | return 0; |
803 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); | 821 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); |
804 | page_header_update_slot(vcpu->kvm, table, v); | 822 | page_header_update_slot(vcpu->kvm, table, v); |
@@ -808,7 +826,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
808 | return 0; | 826 | return 0; |
809 | } | 827 | } |
810 | 828 | ||
811 | if (table[index] == 0) { | 829 | if (table[index] == shadow_trap_nonpresent_pte) { |
812 | struct kvm_mmu_page *new_table; | 830 | struct kvm_mmu_page *new_table; |
813 | gfn_t pseudo_gfn; | 831 | gfn_t pseudo_gfn; |
814 | 832 | ||
@@ -829,6 +847,15 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
829 | } | 847 | } |
830 | } | 848 | } |
831 | 849 | ||
850 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | ||
851 | struct kvm_mmu_page *sp) | ||
852 | { | ||
853 | int i; | ||
854 | |||
855 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
856 | sp->spt[i] = shadow_trap_nonpresent_pte; | ||
857 | } | ||
858 | |||
832 | static void mmu_free_roots(struct kvm_vcpu *vcpu) | 859 | static void mmu_free_roots(struct kvm_vcpu *vcpu) |
833 | { | 860 | { |
834 | int i; | 861 | int i; |
@@ -943,6 +970,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
943 | context->page_fault = nonpaging_page_fault; | 970 | context->page_fault = nonpaging_page_fault; |
944 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 971 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
945 | context->free = nonpaging_free; | 972 | context->free = nonpaging_free; |
973 | context->prefetch_page = nonpaging_prefetch_page; | ||
946 | context->root_level = 0; | 974 | context->root_level = 0; |
947 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 975 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
948 | context->root_hpa = INVALID_PAGE; | 976 | context->root_hpa = INVALID_PAGE; |
@@ -989,6 +1017,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
989 | context->new_cr3 = paging_new_cr3; | 1017 | context->new_cr3 = paging_new_cr3; |
990 | context->page_fault = paging64_page_fault; | 1018 | context->page_fault = paging64_page_fault; |
991 | context->gva_to_gpa = paging64_gva_to_gpa; | 1019 | context->gva_to_gpa = paging64_gva_to_gpa; |
1020 | context->prefetch_page = paging64_prefetch_page; | ||
992 | context->free = paging_free; | 1021 | context->free = paging_free; |
993 | context->root_level = level; | 1022 | context->root_level = level; |
994 | context->shadow_root_level = level; | 1023 | context->shadow_root_level = level; |
@@ -1009,6 +1038,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
1009 | context->page_fault = paging32_page_fault; | 1038 | context->page_fault = paging32_page_fault; |
1010 | context->gva_to_gpa = paging32_gva_to_gpa; | 1039 | context->gva_to_gpa = paging32_gva_to_gpa; |
1011 | context->free = paging_free; | 1040 | context->free = paging_free; |
1041 | context->prefetch_page = paging32_prefetch_page; | ||
1012 | context->root_level = PT32_ROOT_LEVEL; | 1042 | context->root_level = PT32_ROOT_LEVEL; |
1013 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1043 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
1014 | context->root_hpa = INVALID_PAGE; | 1044 | context->root_hpa = INVALID_PAGE; |
@@ -1081,7 +1111,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1081 | struct kvm_mmu_page *child; | 1111 | struct kvm_mmu_page *child; |
1082 | 1112 | ||
1083 | pte = *spte; | 1113 | pte = *spte; |
1084 | if (is_present_pte(pte)) { | 1114 | if (is_shadow_present_pte(pte)) { |
1085 | if (page->role.level == PT_PAGE_TABLE_LEVEL) | 1115 | if (page->role.level == PT_PAGE_TABLE_LEVEL) |
1086 | rmap_remove(spte); | 1116 | rmap_remove(spte); |
1087 | else { | 1117 | else { |
@@ -1089,22 +1119,25 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1089 | mmu_page_remove_parent_pte(child, spte); | 1119 | mmu_page_remove_parent_pte(child, spte); |
1090 | } | 1120 | } |
1091 | } | 1121 | } |
1092 | set_shadow_pte(spte, 0); | 1122 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); |
1093 | kvm_flush_remote_tlbs(vcpu->kvm); | 1123 | kvm_flush_remote_tlbs(vcpu->kvm); |
1094 | } | 1124 | } |
1095 | 1125 | ||
1096 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 1126 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
1097 | struct kvm_mmu_page *page, | 1127 | struct kvm_mmu_page *page, |
1098 | u64 *spte, | 1128 | u64 *spte, |
1099 | const void *new, int bytes) | 1129 | const void *new, int bytes, |
1130 | int offset_in_pte) | ||
1100 | { | 1131 | { |
1101 | if (page->role.level != PT_PAGE_TABLE_LEVEL) | 1132 | if (page->role.level != PT_PAGE_TABLE_LEVEL) |
1102 | return; | 1133 | return; |
1103 | 1134 | ||
1104 | if (page->role.glevels == PT32_ROOT_LEVEL) | 1135 | if (page->role.glevels == PT32_ROOT_LEVEL) |
1105 | paging32_update_pte(vcpu, page, spte, new, bytes); | 1136 | paging32_update_pte(vcpu, page, spte, new, bytes, |
1137 | offset_in_pte); | ||
1106 | else | 1138 | else |
1107 | paging64_update_pte(vcpu, page, spte, new, bytes); | 1139 | paging64_update_pte(vcpu, page, spte, new, bytes, |
1140 | offset_in_pte); | ||
1108 | } | 1141 | } |
1109 | 1142 | ||
1110 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1143 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -1126,6 +1159,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1126 | int npte; | 1159 | int npte; |
1127 | 1160 | ||
1128 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | 1161 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); |
1162 | kvm_mmu_audit(vcpu, "pre pte write"); | ||
1129 | if (gfn == vcpu->last_pt_write_gfn) { | 1163 | if (gfn == vcpu->last_pt_write_gfn) { |
1130 | ++vcpu->last_pt_write_count; | 1164 | ++vcpu->last_pt_write_count; |
1131 | if (vcpu->last_pt_write_count >= 3) | 1165 | if (vcpu->last_pt_write_count >= 3) |
@@ -1181,10 +1215,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1181 | spte = &page->spt[page_offset / sizeof(*spte)]; | 1215 | spte = &page->spt[page_offset / sizeof(*spte)]; |
1182 | while (npte--) { | 1216 | while (npte--) { |
1183 | mmu_pte_write_zap_pte(vcpu, page, spte); | 1217 | mmu_pte_write_zap_pte(vcpu, page, spte); |
1184 | mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); | 1218 | mmu_pte_write_new_pte(vcpu, page, spte, new, bytes, |
1219 | page_offset & (pte_size - 1)); | ||
1185 | ++spte; | 1220 | ++spte; |
1186 | } | 1221 | } |
1187 | } | 1222 | } |
1223 | kvm_mmu_audit(vcpu, "post pte write"); | ||
1188 | } | 1224 | } |
1189 | 1225 | ||
1190 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 1226 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -1359,22 +1395,33 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
1359 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { | 1395 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { |
1360 | u64 ent = pt[i]; | 1396 | u64 ent = pt[i]; |
1361 | 1397 | ||
1362 | if (!(ent & PT_PRESENT_MASK)) | 1398 | if (ent == shadow_trap_nonpresent_pte) |
1363 | continue; | 1399 | continue; |
1364 | 1400 | ||
1365 | va = canonicalize(va); | 1401 | va = canonicalize(va); |
1366 | if (level > 1) | 1402 | if (level > 1) { |
1403 | if (ent == shadow_notrap_nonpresent_pte) | ||
1404 | printk(KERN_ERR "audit: (%s) nontrapping pte" | ||
1405 | " in nonleaf level: levels %d gva %lx" | ||
1406 | " level %d pte %llx\n", audit_msg, | ||
1407 | vcpu->mmu.root_level, va, level, ent); | ||
1408 | |||
1367 | audit_mappings_page(vcpu, ent, va, level - 1); | 1409 | audit_mappings_page(vcpu, ent, va, level - 1); |
1368 | else { | 1410 | } else { |
1369 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); | 1411 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); |
1370 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); | 1412 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); |
1371 | 1413 | ||
1372 | if ((ent & PT_PRESENT_MASK) | 1414 | if (is_shadow_present_pte(ent) |
1373 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 1415 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
1374 | printk(KERN_ERR "audit error: (%s) levels %d" | 1416 | printk(KERN_ERR "xx audit error: (%s) levels %d" |
1375 | " gva %lx gpa %llx hpa %llx ent %llx\n", | 1417 | " gva %lx gpa %llx hpa %llx ent %llx %d\n", |
1376 | audit_msg, vcpu->mmu.root_level, | 1418 | audit_msg, vcpu->mmu.root_level, |
1377 | va, gpa, hpa, ent); | 1419 | va, gpa, hpa, ent, is_shadow_present_pte(ent)); |
1420 | else if (ent == shadow_notrap_nonpresent_pte | ||
1421 | && !is_error_hpa(hpa)) | ||
1422 | printk(KERN_ERR "audit: (%s) notrap shadow," | ||
1423 | " valid guest gva %lx\n", audit_msg, va); | ||
1424 | |||
1378 | } | 1425 | } |
1379 | } | 1426 | } |
1380 | } | 1427 | } |
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 6b094b44f8f..99ac9b15f77 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 31 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
32 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 32 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | 33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) |
34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | ||
34 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
35 | #define PT_MAX_FULL_LEVELS 4 | 36 | #define PT_MAX_FULL_LEVELS 4 |
36 | #else | 37 | #else |
@@ -45,6 +46,7 @@ | |||
45 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 46 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
46 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 47 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
47 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | 48 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) |
49 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | ||
48 | #define PT_MAX_FULL_LEVELS 2 | 50 | #define PT_MAX_FULL_LEVELS 2 |
49 | #else | 51 | #else |
50 | #error Invalid PTTYPE value | 52 | #error Invalid PTTYPE value |
@@ -211,12 +213,12 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
211 | { | 213 | { |
212 | hpa_t paddr; | 214 | hpa_t paddr; |
213 | int dirty = gpte & PT_DIRTY_MASK; | 215 | int dirty = gpte & PT_DIRTY_MASK; |
214 | u64 spte = *shadow_pte; | 216 | u64 spte; |
215 | int was_rmapped = is_rmap_pte(spte); | 217 | int was_rmapped = is_rmap_pte(*shadow_pte); |
216 | 218 | ||
217 | pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" | 219 | pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" |
218 | " user_fault %d gfn %lx\n", | 220 | " user_fault %d gfn %lx\n", |
219 | __FUNCTION__, spte, (u64)gpte, access_bits, | 221 | __FUNCTION__, *shadow_pte, (u64)gpte, access_bits, |
220 | write_fault, user_fault, gfn); | 222 | write_fault, user_fault, gfn); |
221 | 223 | ||
222 | if (write_fault && !dirty) { | 224 | if (write_fault && !dirty) { |
@@ -236,7 +238,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
236 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | 238 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); |
237 | } | 239 | } |
238 | 240 | ||
239 | spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; | 241 | spte = PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; |
240 | spte |= gpte & PT64_NX_MASK; | 242 | spte |= gpte & PT64_NX_MASK; |
241 | if (!dirty) | 243 | if (!dirty) |
242 | access_bits &= ~PT_WRITABLE_MASK; | 244 | access_bits &= ~PT_WRITABLE_MASK; |
@@ -248,10 +250,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
248 | spte |= PT_USER_MASK; | 250 | spte |= PT_USER_MASK; |
249 | 251 | ||
250 | if (is_error_hpa(paddr)) { | 252 | if (is_error_hpa(paddr)) { |
251 | spte |= gaddr; | 253 | set_shadow_pte(shadow_pte, |
252 | spte |= PT_SHADOW_IO_MARK; | 254 | shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK); |
253 | spte &= ~PT_PRESENT_MASK; | ||
254 | set_shadow_pte(shadow_pte, spte); | ||
255 | return; | 255 | return; |
256 | } | 256 | } |
257 | 257 | ||
@@ -286,6 +286,7 @@ unshadowed: | |||
286 | if (access_bits & PT_WRITABLE_MASK) | 286 | if (access_bits & PT_WRITABLE_MASK) |
287 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | 287 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); |
288 | 288 | ||
289 | pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); | ||
289 | set_shadow_pte(shadow_pte, spte); | 290 | set_shadow_pte(shadow_pte, spte); |
290 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | 291 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); |
291 | if (!was_rmapped) | 292 | if (!was_rmapped) |
@@ -304,14 +305,18 @@ static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte, | |||
304 | } | 305 | } |
305 | 306 | ||
306 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 307 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, |
307 | u64 *spte, const void *pte, int bytes) | 308 | u64 *spte, const void *pte, int bytes, |
309 | int offset_in_pte) | ||
308 | { | 310 | { |
309 | pt_element_t gpte; | 311 | pt_element_t gpte; |
310 | 312 | ||
311 | if (bytes < sizeof(pt_element_t)) | ||
312 | return; | ||
313 | gpte = *(const pt_element_t *)pte; | 313 | gpte = *(const pt_element_t *)pte; |
314 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) | 314 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
315 | if (!offset_in_pte && !is_present_pte(gpte)) | ||
316 | set_shadow_pte(spte, shadow_notrap_nonpresent_pte); | ||
317 | return; | ||
318 | } | ||
319 | if (bytes < sizeof(pt_element_t)) | ||
315 | return; | 320 | return; |
316 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); | 321 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); |
317 | FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, | 322 | FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, |
@@ -368,7 +373,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
368 | unsigned hugepage_access = 0; | 373 | unsigned hugepage_access = 0; |
369 | 374 | ||
370 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 375 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
371 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { | 376 | if (is_shadow_present_pte(*shadow_ent)) { |
372 | if (level == PT_PAGE_TABLE_LEVEL) | 377 | if (level == PT_PAGE_TABLE_LEVEL) |
373 | break; | 378 | break; |
374 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 379 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; |
@@ -500,6 +505,26 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | |||
500 | return gpa; | 505 | return gpa; |
501 | } | 506 | } |
502 | 507 | ||
508 | static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | ||
509 | struct kvm_mmu_page *sp) | ||
510 | { | ||
511 | int i; | ||
512 | pt_element_t *gpt; | ||
513 | |||
514 | if (sp->role.metaphysical || PTTYPE == 32) { | ||
515 | nonpaging_prefetch_page(vcpu, sp); | ||
516 | return; | ||
517 | } | ||
518 | |||
519 | gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0); | ||
520 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
521 | if (is_present_pte(gpt[i])) | ||
522 | sp->spt[i] = shadow_trap_nonpresent_pte; | ||
523 | else | ||
524 | sp->spt[i] = shadow_notrap_nonpresent_pte; | ||
525 | kunmap_atomic(gpt, KM_USER0); | ||
526 | } | ||
527 | |||
503 | #undef pt_element_t | 528 | #undef pt_element_t |
504 | #undef guest_walker | 529 | #undef guest_walker |
505 | #undef FNAME | 530 | #undef FNAME |
@@ -508,4 +533,5 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | |||
508 | #undef SHADOW_PT_INDEX | 533 | #undef SHADOW_PT_INDEX |
509 | #undef PT_LEVEL_MASK | 534 | #undef PT_LEVEL_MASK |
510 | #undef PT_DIR_BASE_ADDR_MASK | 535 | #undef PT_DIR_BASE_ADDR_MASK |
536 | #undef PT_LEVEL_BITS | ||
511 | #undef PT_MAX_FULL_LEVELS | 537 | #undef PT_MAX_FULL_LEVELS |
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 8eb49e055ec..27a3318fa6c 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
29 | #include <linux/moduleparam.h> | ||
29 | 30 | ||
30 | #include <asm/io.h> | 31 | #include <asm/io.h> |
31 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
@@ -33,6 +34,9 @@ | |||
33 | MODULE_AUTHOR("Qumranet"); | 34 | MODULE_AUTHOR("Qumranet"); |
34 | MODULE_LICENSE("GPL"); | 35 | MODULE_LICENSE("GPL"); |
35 | 36 | ||
37 | static int bypass_guest_pf = 1; | ||
38 | module_param(bypass_guest_pf, bool, 0); | ||
39 | |||
36 | struct vmcs { | 40 | struct vmcs { |
37 | u32 revision_id; | 41 | u32 revision_id; |
38 | u32 abort; | 42 | u32 abort; |
@@ -1535,8 +1539,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1535 | } | 1539 | } |
1536 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | 1540 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); |
1537 | 1541 | ||
1538 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); | 1542 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); |
1539 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); | 1543 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
1540 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 1544 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
1541 | 1545 | ||
1542 | vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ | 1546 | vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ |
@@ -2582,6 +2586,9 @@ static int __init vmx_init(void) | |||
2582 | if (r) | 2586 | if (r) |
2583 | goto out1; | 2587 | goto out1; |
2584 | 2588 | ||
2589 | if (bypass_guest_pf) | ||
2590 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | ||
2591 | |||
2585 | return 0; | 2592 | return 0; |
2586 | 2593 | ||
2587 | out1: | 2594 | out1: |