diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2008-04-02 15:46:56 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-04-27 05:01:15 -0400 |
commit | 35149e2129fe34fc8cb5917e1ecf5156b0fa3415 (patch) | |
tree | b67cb16fa6054769ee476fce99a32601b126af10 | |
parent | fdae862f91728aec6dd8fd62cd2398868c906b6b (diff) |
KVM: MMU: Don't assume struct page for x86
This patch introduces a gfn_to_pfn() function and corresponding functions like
kvm_release_pfn_dirty(). Using these new functions, we can modify the x86
MMU to no longer assume that it can always get a struct page for any given gfn.
We don't want to eliminate gfn_to_page() entirely because a number of places
assume they can do gfn_to_page() and then kmap() the results. When we support
IO memory, gfn_to_page() will fail for IO pages although gfn_to_pfn() will
succeed.
This does not implement support for avoiding reference counting for reserved
RAM or for IO memory. However, it should make those things pretty straight
forward.
Since we're only introducing new common symbols, I don't think it will break
the non-x86 architectures but I haven't tested those. I've tested Intel,
AMD, NPT, and hugetlbfs with Windows and Linux guests.
[avi: fix overflow when shifting left pfns by adding casts]
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r-- | arch/x86/kvm/mmu.c | 89 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 26 | ||||
-rw-r--r-- | include/asm-x86/kvm_host.h | 4 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 12 | ||||
-rw-r--r-- | include/linux/kvm_types.h | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 68 |
6 files changed, 133 insertions, 68 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c89bf230af67..078a7f1ac34c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -240,11 +240,9 @@ static int is_rmap_pte(u64 pte) | |||
240 | return is_shadow_present_pte(pte); | 240 | return is_shadow_present_pte(pte); |
241 | } | 241 | } |
242 | 242 | ||
243 | static struct page *spte_to_page(u64 pte) | 243 | static pfn_t spte_to_pfn(u64 pte) |
244 | { | 244 | { |
245 | hfn_t hfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 245 | return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
246 | |||
247 | return pfn_to_page(hfn); | ||
248 | } | 246 | } |
249 | 247 | ||
250 | static gfn_t pse36_gfn_delta(u32 gpte) | 248 | static gfn_t pse36_gfn_delta(u32 gpte) |
@@ -541,20 +539,20 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
541 | struct kvm_rmap_desc *desc; | 539 | struct kvm_rmap_desc *desc; |
542 | struct kvm_rmap_desc *prev_desc; | 540 | struct kvm_rmap_desc *prev_desc; |
543 | struct kvm_mmu_page *sp; | 541 | struct kvm_mmu_page *sp; |
544 | struct page *page; | 542 | pfn_t pfn; |
545 | unsigned long *rmapp; | 543 | unsigned long *rmapp; |
546 | int i; | 544 | int i; |
547 | 545 | ||
548 | if (!is_rmap_pte(*spte)) | 546 | if (!is_rmap_pte(*spte)) |
549 | return; | 547 | return; |
550 | sp = page_header(__pa(spte)); | 548 | sp = page_header(__pa(spte)); |
551 | page = spte_to_page(*spte); | 549 | pfn = spte_to_pfn(*spte); |
552 | if (*spte & PT_ACCESSED_MASK) | 550 | if (*spte & PT_ACCESSED_MASK) |
553 | mark_page_accessed(page); | 551 | kvm_set_pfn_accessed(pfn); |
554 | if (is_writeble_pte(*spte)) | 552 | if (is_writeble_pte(*spte)) |
555 | kvm_release_page_dirty(page); | 553 | kvm_release_pfn_dirty(pfn); |
556 | else | 554 | else |
557 | kvm_release_page_clean(page); | 555 | kvm_release_pfn_clean(pfn); |
558 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte)); | 556 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte)); |
559 | if (!*rmapp) { | 557 | if (!*rmapp) { |
560 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 558 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
@@ -635,11 +633,11 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
635 | spte = rmap_next(kvm, rmapp, spte); | 633 | spte = rmap_next(kvm, rmapp, spte); |
636 | } | 634 | } |
637 | if (write_protected) { | 635 | if (write_protected) { |
638 | struct page *page; | 636 | pfn_t pfn; |
639 | 637 | ||
640 | spte = rmap_next(kvm, rmapp, NULL); | 638 | spte = rmap_next(kvm, rmapp, NULL); |
641 | page = spte_to_page(*spte); | 639 | pfn = spte_to_pfn(*spte); |
642 | SetPageDirty(page); | 640 | kvm_set_pfn_dirty(pfn); |
643 | } | 641 | } |
644 | 642 | ||
645 | /* check for huge page mappings */ | 643 | /* check for huge page mappings */ |
@@ -1036,7 +1034,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1036 | unsigned pt_access, unsigned pte_access, | 1034 | unsigned pt_access, unsigned pte_access, |
1037 | int user_fault, int write_fault, int dirty, | 1035 | int user_fault, int write_fault, int dirty, |
1038 | int *ptwrite, int largepage, gfn_t gfn, | 1036 | int *ptwrite, int largepage, gfn_t gfn, |
1039 | struct page *page, bool speculative) | 1037 | pfn_t pfn, bool speculative) |
1040 | { | 1038 | { |
1041 | u64 spte; | 1039 | u64 spte; |
1042 | int was_rmapped = 0; | 1040 | int was_rmapped = 0; |
@@ -1058,10 +1056,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1058 | 1056 | ||
1059 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1057 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
1060 | mmu_page_remove_parent_pte(child, shadow_pte); | 1058 | mmu_page_remove_parent_pte(child, shadow_pte); |
1061 | } else if (page != spte_to_page(*shadow_pte)) { | 1059 | } else if (pfn != spte_to_pfn(*shadow_pte)) { |
1062 | pgprintk("hfn old %lx new %lx\n", | 1060 | pgprintk("hfn old %lx new %lx\n", |
1063 | page_to_pfn(spte_to_page(*shadow_pte)), | 1061 | spte_to_pfn(*shadow_pte), pfn); |
1064 | page_to_pfn(page)); | ||
1065 | rmap_remove(vcpu->kvm, shadow_pte); | 1062 | rmap_remove(vcpu->kvm, shadow_pte); |
1066 | } else { | 1063 | } else { |
1067 | if (largepage) | 1064 | if (largepage) |
@@ -1090,7 +1087,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1090 | if (largepage) | 1087 | if (largepage) |
1091 | spte |= PT_PAGE_SIZE_MASK; | 1088 | spte |= PT_PAGE_SIZE_MASK; |
1092 | 1089 | ||
1093 | spte |= page_to_phys(page); | 1090 | spte |= (u64)pfn << PAGE_SHIFT; |
1094 | 1091 | ||
1095 | if ((pte_access & ACC_WRITE_MASK) | 1092 | if ((pte_access & ACC_WRITE_MASK) |
1096 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1093 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { |
@@ -1135,12 +1132,12 @@ unshadowed: | |||
1135 | if (!was_rmapped) { | 1132 | if (!was_rmapped) { |
1136 | rmap_add(vcpu, shadow_pte, gfn, largepage); | 1133 | rmap_add(vcpu, shadow_pte, gfn, largepage); |
1137 | if (!is_rmap_pte(*shadow_pte)) | 1134 | if (!is_rmap_pte(*shadow_pte)) |
1138 | kvm_release_page_clean(page); | 1135 | kvm_release_pfn_clean(pfn); |
1139 | } else { | 1136 | } else { |
1140 | if (was_writeble) | 1137 | if (was_writeble) |
1141 | kvm_release_page_dirty(page); | 1138 | kvm_release_pfn_dirty(pfn); |
1142 | else | 1139 | else |
1143 | kvm_release_page_clean(page); | 1140 | kvm_release_pfn_clean(pfn); |
1144 | } | 1141 | } |
1145 | if (!ptwrite || !*ptwrite) | 1142 | if (!ptwrite || !*ptwrite) |
1146 | vcpu->arch.last_pte_updated = shadow_pte; | 1143 | vcpu->arch.last_pte_updated = shadow_pte; |
@@ -1151,7 +1148,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
1151 | } | 1148 | } |
1152 | 1149 | ||
1153 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 1150 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
1154 | int largepage, gfn_t gfn, struct page *page, | 1151 | int largepage, gfn_t gfn, pfn_t pfn, |
1155 | int level) | 1152 | int level) |
1156 | { | 1153 | { |
1157 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; | 1154 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; |
@@ -1166,13 +1163,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1166 | 1163 | ||
1167 | if (level == 1) { | 1164 | if (level == 1) { |
1168 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1165 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, |
1169 | 0, write, 1, &pt_write, 0, gfn, page, false); | 1166 | 0, write, 1, &pt_write, 0, gfn, pfn, false); |
1170 | return pt_write; | 1167 | return pt_write; |
1171 | } | 1168 | } |
1172 | 1169 | ||
1173 | if (largepage && level == 2) { | 1170 | if (largepage && level == 2) { |
1174 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1171 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, |
1175 | 0, write, 1, &pt_write, 1, gfn, page, false); | 1172 | 0, write, 1, &pt_write, 1, gfn, pfn, false); |
1176 | return pt_write; | 1173 | return pt_write; |
1177 | } | 1174 | } |
1178 | 1175 | ||
@@ -1187,7 +1184,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1187 | 1, ACC_ALL, &table[index]); | 1184 | 1, ACC_ALL, &table[index]); |
1188 | if (!new_table) { | 1185 | if (!new_table) { |
1189 | pgprintk("nonpaging_map: ENOMEM\n"); | 1186 | pgprintk("nonpaging_map: ENOMEM\n"); |
1190 | kvm_release_page_clean(page); | 1187 | kvm_release_pfn_clean(pfn); |
1191 | return -ENOMEM; | 1188 | return -ENOMEM; |
1192 | } | 1189 | } |
1193 | 1190 | ||
@@ -1202,8 +1199,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1202 | { | 1199 | { |
1203 | int r; | 1200 | int r; |
1204 | int largepage = 0; | 1201 | int largepage = 0; |
1205 | 1202 | pfn_t pfn; | |
1206 | struct page *page; | ||
1207 | 1203 | ||
1208 | down_read(¤t->mm->mmap_sem); | 1204 | down_read(¤t->mm->mmap_sem); |
1209 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1205 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
@@ -1211,18 +1207,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1211 | largepage = 1; | 1207 | largepage = 1; |
1212 | } | 1208 | } |
1213 | 1209 | ||
1214 | page = gfn_to_page(vcpu->kvm, gfn); | 1210 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1215 | up_read(¤t->mm->mmap_sem); | 1211 | up_read(¤t->mm->mmap_sem); |
1216 | 1212 | ||
1217 | /* mmio */ | 1213 | /* mmio */ |
1218 | if (is_error_page(page)) { | 1214 | if (is_error_pfn(pfn)) { |
1219 | kvm_release_page_clean(page); | 1215 | kvm_release_pfn_clean(pfn); |
1220 | return 1; | 1216 | return 1; |
1221 | } | 1217 | } |
1222 | 1218 | ||
1223 | spin_lock(&vcpu->kvm->mmu_lock); | 1219 | spin_lock(&vcpu->kvm->mmu_lock); |
1224 | kvm_mmu_free_some_pages(vcpu); | 1220 | kvm_mmu_free_some_pages(vcpu); |
1225 | r = __direct_map(vcpu, v, write, largepage, gfn, page, | 1221 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, |
1226 | PT32E_ROOT_LEVEL); | 1222 | PT32E_ROOT_LEVEL); |
1227 | spin_unlock(&vcpu->kvm->mmu_lock); | 1223 | spin_unlock(&vcpu->kvm->mmu_lock); |
1228 | 1224 | ||
@@ -1355,7 +1351,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
1355 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | 1351 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, |
1356 | u32 error_code) | 1352 | u32 error_code) |
1357 | { | 1353 | { |
1358 | struct page *page; | 1354 | pfn_t pfn; |
1359 | int r; | 1355 | int r; |
1360 | int largepage = 0; | 1356 | int largepage = 0; |
1361 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1357 | gfn_t gfn = gpa >> PAGE_SHIFT; |
@@ -1372,16 +1368,16 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1372 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1368 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1373 | largepage = 1; | 1369 | largepage = 1; |
1374 | } | 1370 | } |
1375 | page = gfn_to_page(vcpu->kvm, gfn); | 1371 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1376 | up_read(¤t->mm->mmap_sem); | 1372 | up_read(¤t->mm->mmap_sem); |
1377 | if (is_error_page(page)) { | 1373 | if (is_error_pfn(pfn)) { |
1378 | kvm_release_page_clean(page); | 1374 | kvm_release_pfn_clean(pfn); |
1379 | return 1; | 1375 | return 1; |
1380 | } | 1376 | } |
1381 | spin_lock(&vcpu->kvm->mmu_lock); | 1377 | spin_lock(&vcpu->kvm->mmu_lock); |
1382 | kvm_mmu_free_some_pages(vcpu); | 1378 | kvm_mmu_free_some_pages(vcpu); |
1383 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1379 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
1384 | largepage, gfn, page, TDP_ROOT_LEVEL); | 1380 | largepage, gfn, pfn, TDP_ROOT_LEVEL); |
1385 | spin_unlock(&vcpu->kvm->mmu_lock); | 1381 | spin_unlock(&vcpu->kvm->mmu_lock); |
1386 | 1382 | ||
1387 | return r; | 1383 | return r; |
@@ -1525,6 +1521,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
1525 | 1521 | ||
1526 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 1522 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) |
1527 | { | 1523 | { |
1524 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
1525 | |||
1528 | if (tdp_enabled) | 1526 | if (tdp_enabled) |
1529 | return init_kvm_tdp_mmu(vcpu); | 1527 | return init_kvm_tdp_mmu(vcpu); |
1530 | else | 1528 | else |
@@ -1644,7 +1642,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1644 | gfn_t gfn; | 1642 | gfn_t gfn; |
1645 | int r; | 1643 | int r; |
1646 | u64 gpte = 0; | 1644 | u64 gpte = 0; |
1647 | struct page *page; | 1645 | pfn_t pfn; |
1648 | 1646 | ||
1649 | vcpu->arch.update_pte.largepage = 0; | 1647 | vcpu->arch.update_pte.largepage = 0; |
1650 | 1648 | ||
@@ -1680,15 +1678,15 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1680 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1678 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1681 | vcpu->arch.update_pte.largepage = 1; | 1679 | vcpu->arch.update_pte.largepage = 1; |
1682 | } | 1680 | } |
1683 | page = gfn_to_page(vcpu->kvm, gfn); | 1681 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1684 | up_read(¤t->mm->mmap_sem); | 1682 | up_read(¤t->mm->mmap_sem); |
1685 | 1683 | ||
1686 | if (is_error_page(page)) { | 1684 | if (is_error_pfn(pfn)) { |
1687 | kvm_release_page_clean(page); | 1685 | kvm_release_pfn_clean(pfn); |
1688 | return; | 1686 | return; |
1689 | } | 1687 | } |
1690 | vcpu->arch.update_pte.gfn = gfn; | 1688 | vcpu->arch.update_pte.gfn = gfn; |
1691 | vcpu->arch.update_pte.page = page; | 1689 | vcpu->arch.update_pte.pfn = pfn; |
1692 | } | 1690 | } |
1693 | 1691 | ||
1694 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1692 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -1793,9 +1791,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1793 | } | 1791 | } |
1794 | kvm_mmu_audit(vcpu, "post pte write"); | 1792 | kvm_mmu_audit(vcpu, "post pte write"); |
1795 | spin_unlock(&vcpu->kvm->mmu_lock); | 1793 | spin_unlock(&vcpu->kvm->mmu_lock); |
1796 | if (vcpu->arch.update_pte.page) { | 1794 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
1797 | kvm_release_page_clean(vcpu->arch.update_pte.page); | 1795 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); |
1798 | vcpu->arch.update_pte.page = NULL; | 1796 | vcpu->arch.update_pte.pfn = bad_pfn; |
1799 | } | 1797 | } |
1800 | } | 1798 | } |
1801 | 1799 | ||
@@ -2236,8 +2234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
2236 | audit_mappings_page(vcpu, ent, va, level - 1); | 2234 | audit_mappings_page(vcpu, ent, va, level - 1); |
2237 | } else { | 2235 | } else { |
2238 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 2236 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); |
2239 | struct page *page = gpa_to_page(vcpu, gpa); | 2237 | hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; |
2240 | hpa_t hpa = page_to_phys(page); | ||
2241 | 2238 | ||
2242 | if (is_shadow_present_pte(ent) | 2239 | if (is_shadow_present_pte(ent) |
2243 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 2240 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
@@ -2250,7 +2247,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
2250 | && !is_error_hpa(hpa)) | 2247 | && !is_error_hpa(hpa)) |
2251 | printk(KERN_ERR "audit: (%s) notrap shadow," | 2248 | printk(KERN_ERR "audit: (%s) notrap shadow," |
2252 | " valid guest gva %lx\n", audit_msg, va); | 2249 | " valid guest gva %lx\n", audit_msg, va); |
2253 | kvm_release_page_clean(page); | 2250 | kvm_release_pfn_clean(pfn); |
2254 | 2251 | ||
2255 | } | 2252 | } |
2256 | } | 2253 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 57d872aec663..156fe10288ae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -247,7 +247,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
247 | { | 247 | { |
248 | pt_element_t gpte; | 248 | pt_element_t gpte; |
249 | unsigned pte_access; | 249 | unsigned pte_access; |
250 | struct page *npage; | 250 | pfn_t pfn; |
251 | int largepage = vcpu->arch.update_pte.largepage; | 251 | int largepage = vcpu->arch.update_pte.largepage; |
252 | 252 | ||
253 | gpte = *(const pt_element_t *)pte; | 253 | gpte = *(const pt_element_t *)pte; |
@@ -260,13 +260,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
260 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); | 260 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); |
261 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 261 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
262 | return; | 262 | return; |
263 | npage = vcpu->arch.update_pte.page; | 263 | pfn = vcpu->arch.update_pte.pfn; |
264 | if (!npage) | 264 | if (is_error_pfn(pfn)) |
265 | return; | 265 | return; |
266 | get_page(npage); | 266 | kvm_get_pfn(pfn); |
267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), | 268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), |
269 | npage, true); | 269 | pfn, true); |
270 | } | 270 | } |
271 | 271 | ||
272 | /* | 272 | /* |
@@ -275,7 +275,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
275 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 275 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
276 | struct guest_walker *walker, | 276 | struct guest_walker *walker, |
277 | int user_fault, int write_fault, int largepage, | 277 | int user_fault, int write_fault, int largepage, |
278 | int *ptwrite, struct page *page) | 278 | int *ptwrite, pfn_t pfn) |
279 | { | 279 | { |
280 | hpa_t shadow_addr; | 280 | hpa_t shadow_addr; |
281 | int level; | 281 | int level; |
@@ -336,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
336 | walker->pte_gpa[level - 2], | 336 | walker->pte_gpa[level - 2], |
337 | &curr_pte, sizeof(curr_pte)); | 337 | &curr_pte, sizeof(curr_pte)); |
338 | if (r || curr_pte != walker->ptes[level - 2]) { | 338 | if (r || curr_pte != walker->ptes[level - 2]) { |
339 | kvm_release_page_clean(page); | 339 | kvm_release_pfn_clean(pfn); |
340 | return NULL; | 340 | return NULL; |
341 | } | 341 | } |
342 | } | 342 | } |
@@ -349,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
349 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 349 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, |
350 | user_fault, write_fault, | 350 | user_fault, write_fault, |
351 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | 351 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, |
352 | ptwrite, largepage, walker->gfn, page, false); | 352 | ptwrite, largepage, walker->gfn, pfn, false); |
353 | 353 | ||
354 | return shadow_ent; | 354 | return shadow_ent; |
355 | } | 355 | } |
@@ -378,7 +378,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
378 | u64 *shadow_pte; | 378 | u64 *shadow_pte; |
379 | int write_pt = 0; | 379 | int write_pt = 0; |
380 | int r; | 380 | int r; |
381 | struct page *page; | 381 | pfn_t pfn; |
382 | int largepage = 0; | 382 | int largepage = 0; |
383 | 383 | ||
384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
@@ -413,20 +413,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
413 | largepage = 1; | 413 | largepage = 1; |
414 | } | 414 | } |
415 | } | 415 | } |
416 | page = gfn_to_page(vcpu->kvm, walker.gfn); | 416 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
417 | up_read(¤t->mm->mmap_sem); | 417 | up_read(¤t->mm->mmap_sem); |
418 | 418 | ||
419 | /* mmio */ | 419 | /* mmio */ |
420 | if (is_error_page(page)) { | 420 | if (is_error_pfn(pfn)) { |
421 | pgprintk("gfn %x is mmio\n", walker.gfn); | 421 | pgprintk("gfn %x is mmio\n", walker.gfn); |
422 | kvm_release_page_clean(page); | 422 | kvm_release_pfn_clean(pfn); |
423 | return 1; | 423 | return 1; |
424 | } | 424 | } |
425 | 425 | ||
426 | spin_lock(&vcpu->kvm->mmu_lock); | 426 | spin_lock(&vcpu->kvm->mmu_lock); |
427 | kvm_mmu_free_some_pages(vcpu); | 427 | kvm_mmu_free_some_pages(vcpu); |
428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
429 | largepage, &write_pt, page); | 429 | largepage, &write_pt, pfn); |
430 | 430 | ||
431 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 431 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, |
432 | shadow_pte, *shadow_pte, write_pt); | 432 | shadow_pte, *shadow_pte, write_pt); |
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index b9230490d777..de3eccfb767c 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h | |||
@@ -248,8 +248,8 @@ struct kvm_vcpu_arch { | |||
248 | u64 *last_pte_updated; | 248 | u64 *last_pte_updated; |
249 | 249 | ||
250 | struct { | 250 | struct { |
251 | gfn_t gfn; /* presumed gfn during guest pte update */ | 251 | gfn_t gfn; /* presumed gfn during guest pte update */ |
252 | struct page *page; /* page corresponding to that gfn */ | 252 | pfn_t pfn; /* pfn corresponding to that gfn */ |
253 | int largepage; | 253 | int largepage; |
254 | } update_pte; | 254 | } update_pte; |
255 | 255 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a2ceb51b4274..578c3638bbba 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -150,8 +150,10 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | |||
150 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); | 150 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); |
151 | 151 | ||
152 | extern struct page *bad_page; | 152 | extern struct page *bad_page; |
153 | extern pfn_t bad_pfn; | ||
153 | 154 | ||
154 | int is_error_page(struct page *page); | 155 | int is_error_page(struct page *page); |
156 | int is_error_pfn(pfn_t pfn); | ||
155 | int kvm_is_error_hva(unsigned long addr); | 157 | int kvm_is_error_hva(unsigned long addr); |
156 | int kvm_set_memory_region(struct kvm *kvm, | 158 | int kvm_set_memory_region(struct kvm *kvm, |
157 | struct kvm_userspace_memory_region *mem, | 159 | struct kvm_userspace_memory_region *mem, |
@@ -168,6 +170,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | |||
168 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | 170 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); |
169 | void kvm_release_page_clean(struct page *page); | 171 | void kvm_release_page_clean(struct page *page); |
170 | void kvm_release_page_dirty(struct page *page); | 172 | void kvm_release_page_dirty(struct page *page); |
173 | void kvm_set_page_dirty(struct page *page); | ||
174 | void kvm_set_page_accessed(struct page *page); | ||
175 | |||
176 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | ||
177 | void kvm_release_pfn_dirty(pfn_t); | ||
178 | void kvm_release_pfn_clean(pfn_t pfn); | ||
179 | void kvm_set_pfn_dirty(pfn_t pfn); | ||
180 | void kvm_set_pfn_accessed(pfn_t pfn); | ||
181 | void kvm_get_pfn(pfn_t pfn); | ||
182 | |||
171 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | 183 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, |
172 | int len); | 184 | int len); |
173 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | 185 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, |
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 1c4e46decb22..9b6f395c9625 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h | |||
@@ -38,6 +38,8 @@ typedef unsigned long hva_t; | |||
38 | typedef u64 hpa_t; | 38 | typedef u64 hpa_t; |
39 | typedef unsigned long hfn_t; | 39 | typedef unsigned long hfn_t; |
40 | 40 | ||
41 | typedef hfn_t pfn_t; | ||
42 | |||
41 | struct kvm_pio_request { | 43 | struct kvm_pio_request { |
42 | unsigned long count; | 44 | unsigned long count; |
43 | int cur_count; | 45 | int cur_count; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 93ed78b015c0..6a52c084e068 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/kvm_para.h> | 40 | #include <linux/kvm_para.h> |
41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
42 | #include <linux/mman.h> | 42 | #include <linux/mman.h> |
43 | #include <linux/swap.h> | ||
43 | 44 | ||
44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
45 | #include <asm/io.h> | 46 | #include <asm/io.h> |
@@ -458,6 +459,12 @@ int is_error_page(struct page *page) | |||
458 | } | 459 | } |
459 | EXPORT_SYMBOL_GPL(is_error_page); | 460 | EXPORT_SYMBOL_GPL(is_error_page); |
460 | 461 | ||
462 | int is_error_pfn(pfn_t pfn) | ||
463 | { | ||
464 | return pfn == bad_pfn; | ||
465 | } | ||
466 | EXPORT_SYMBOL_GPL(is_error_pfn); | ||
467 | |||
461 | static inline unsigned long bad_hva(void) | 468 | static inline unsigned long bad_hva(void) |
462 | { | 469 | { |
463 | return PAGE_OFFSET; | 470 | return PAGE_OFFSET; |
@@ -519,7 +526,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
519 | /* | 526 | /* |
520 | * Requires current->mm->mmap_sem to be held | 527 | * Requires current->mm->mmap_sem to be held |
521 | */ | 528 | */ |
522 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 529 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) |
523 | { | 530 | { |
524 | struct page *page[1]; | 531 | struct page *page[1]; |
525 | unsigned long addr; | 532 | unsigned long addr; |
@@ -530,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
530 | addr = gfn_to_hva(kvm, gfn); | 537 | addr = gfn_to_hva(kvm, gfn); |
531 | if (kvm_is_error_hva(addr)) { | 538 | if (kvm_is_error_hva(addr)) { |
532 | get_page(bad_page); | 539 | get_page(bad_page); |
533 | return bad_page; | 540 | return page_to_pfn(bad_page); |
534 | } | 541 | } |
535 | 542 | ||
536 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, | 543 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, |
@@ -538,27 +545,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
538 | 545 | ||
539 | if (npages != 1) { | 546 | if (npages != 1) { |
540 | get_page(bad_page); | 547 | get_page(bad_page); |
541 | return bad_page; | 548 | return page_to_pfn(bad_page); |
542 | } | 549 | } |
543 | 550 | ||
544 | return page[0]; | 551 | return page_to_pfn(page[0]); |
552 | } | ||
553 | |||
554 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | ||
555 | |||
556 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | ||
557 | { | ||
558 | return pfn_to_page(gfn_to_pfn(kvm, gfn)); | ||
545 | } | 559 | } |
546 | 560 | ||
547 | EXPORT_SYMBOL_GPL(gfn_to_page); | 561 | EXPORT_SYMBOL_GPL(gfn_to_page); |
548 | 562 | ||
549 | void kvm_release_page_clean(struct page *page) | 563 | void kvm_release_page_clean(struct page *page) |
550 | { | 564 | { |
551 | put_page(page); | 565 | kvm_release_pfn_clean(page_to_pfn(page)); |
552 | } | 566 | } |
553 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); | 567 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); |
554 | 568 | ||
569 | void kvm_release_pfn_clean(pfn_t pfn) | ||
570 | { | ||
571 | put_page(pfn_to_page(pfn)); | ||
572 | } | ||
573 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | ||
574 | |||
555 | void kvm_release_page_dirty(struct page *page) | 575 | void kvm_release_page_dirty(struct page *page) |
556 | { | 576 | { |
577 | kvm_release_pfn_dirty(page_to_pfn(page)); | ||
578 | } | ||
579 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | ||
580 | |||
581 | void kvm_release_pfn_dirty(pfn_t pfn) | ||
582 | { | ||
583 | kvm_set_pfn_dirty(pfn); | ||
584 | kvm_release_pfn_clean(pfn); | ||
585 | } | ||
586 | EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); | ||
587 | |||
588 | void kvm_set_page_dirty(struct page *page) | ||
589 | { | ||
590 | kvm_set_pfn_dirty(page_to_pfn(page)); | ||
591 | } | ||
592 | EXPORT_SYMBOL_GPL(kvm_set_page_dirty); | ||
593 | |||
594 | void kvm_set_pfn_dirty(pfn_t pfn) | ||
595 | { | ||
596 | struct page *page = pfn_to_page(pfn); | ||
557 | if (!PageReserved(page)) | 597 | if (!PageReserved(page)) |
558 | SetPageDirty(page); | 598 | SetPageDirty(page); |
559 | put_page(page); | ||
560 | } | 599 | } |
561 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | 600 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); |
601 | |||
602 | void kvm_set_pfn_accessed(pfn_t pfn) | ||
603 | { | ||
604 | mark_page_accessed(pfn_to_page(pfn)); | ||
605 | } | ||
606 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | ||
607 | |||
608 | void kvm_get_pfn(pfn_t pfn) | ||
609 | { | ||
610 | get_page(pfn_to_page(pfn)); | ||
611 | } | ||
612 | EXPORT_SYMBOL_GPL(kvm_get_pfn); | ||
562 | 613 | ||
563 | static int next_segment(unsigned long len, int offset) | 614 | static int next_segment(unsigned long len, int offset) |
564 | { | 615 | { |
@@ -1351,6 +1402,7 @@ static struct sys_device kvm_sysdev = { | |||
1351 | }; | 1402 | }; |
1352 | 1403 | ||
1353 | struct page *bad_page; | 1404 | struct page *bad_page; |
1405 | pfn_t bad_pfn; | ||
1354 | 1406 | ||
1355 | static inline | 1407 | static inline |
1356 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 1408 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
@@ -1392,6 +1444,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
1392 | goto out; | 1444 | goto out; |
1393 | } | 1445 | } |
1394 | 1446 | ||
1447 | bad_pfn = page_to_pfn(bad_page); | ||
1448 | |||
1395 | r = kvm_arch_hardware_setup(); | 1449 | r = kvm_arch_hardware_setup(); |
1396 | if (r < 0) | 1450 | if (r < 0) |
1397 | goto out_free_0; | 1451 | goto out_free_0; |