aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2008-04-02 15:46:56 -0400
committerAvi Kivity <avi@qumranet.com>2008-04-27 05:01:15 -0400
commit35149e2129fe34fc8cb5917e1ecf5156b0fa3415 (patch)
treeb67cb16fa6054769ee476fce99a32601b126af10
parentfdae862f91728aec6dd8fd62cd2398868c906b6b (diff)
KVM: MMU: Don't assume struct page for x86
This patch introduces a gfn_to_pfn() function and corresponding functions like kvm_release_pfn_dirty(). Using these new functions, we can modify the x86 MMU to no longer assume that it can always get a struct page for any given gfn. We don't want to eliminate gfn_to_page() entirely because a number of places assume they can do gfn_to_page() and then kmap() the results. When we support IO memory, gfn_to_page() will fail for IO pages although gfn_to_pfn() will succeed. This does not implement support for avoiding reference counting for reserved RAM or for IO memory. However, it should make those things pretty straight forward. Since we're only introducing new common symbols, I don't think it will break the non-x86 architectures but I haven't tested those. I've tested Intel, AMD, NPT, and hugetlbfs with Windows and Linux guests. [avi: fix overflow when shifting left pfns by adding casts] Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--arch/x86/kvm/mmu.c89
-rw-r--r--arch/x86/kvm/paging_tmpl.h26
-rw-r--r--include/asm-x86/kvm_host.h4
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--include/linux/kvm_types.h2
-rw-r--r--virt/kvm/kvm_main.c68
6 files changed, 133 insertions, 68 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c89bf230af67..078a7f1ac34c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -240,11 +240,9 @@ static int is_rmap_pte(u64 pte)
240 return is_shadow_present_pte(pte); 240 return is_shadow_present_pte(pte);
241} 241}
242 242
243static struct page *spte_to_page(u64 pte) 243static pfn_t spte_to_pfn(u64 pte)
244{ 244{
245 hfn_t hfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; 245 return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
246
247 return pfn_to_page(hfn);
248} 246}
249 247
250static gfn_t pse36_gfn_delta(u32 gpte) 248static gfn_t pse36_gfn_delta(u32 gpte)
@@ -541,20 +539,20 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
541 struct kvm_rmap_desc *desc; 539 struct kvm_rmap_desc *desc;
542 struct kvm_rmap_desc *prev_desc; 540 struct kvm_rmap_desc *prev_desc;
543 struct kvm_mmu_page *sp; 541 struct kvm_mmu_page *sp;
544 struct page *page; 542 pfn_t pfn;
545 unsigned long *rmapp; 543 unsigned long *rmapp;
546 int i; 544 int i;
547 545
548 if (!is_rmap_pte(*spte)) 546 if (!is_rmap_pte(*spte))
549 return; 547 return;
550 sp = page_header(__pa(spte)); 548 sp = page_header(__pa(spte));
551 page = spte_to_page(*spte); 549 pfn = spte_to_pfn(*spte);
552 if (*spte & PT_ACCESSED_MASK) 550 if (*spte & PT_ACCESSED_MASK)
553 mark_page_accessed(page); 551 kvm_set_pfn_accessed(pfn);
554 if (is_writeble_pte(*spte)) 552 if (is_writeble_pte(*spte))
555 kvm_release_page_dirty(page); 553 kvm_release_pfn_dirty(pfn);
556 else 554 else
557 kvm_release_page_clean(page); 555 kvm_release_pfn_clean(pfn);
558 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte)); 556 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
559 if (!*rmapp) { 557 if (!*rmapp) {
560 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 558 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -635,11 +633,11 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
635 spte = rmap_next(kvm, rmapp, spte); 633 spte = rmap_next(kvm, rmapp, spte);
636 } 634 }
637 if (write_protected) { 635 if (write_protected) {
638 struct page *page; 636 pfn_t pfn;
639 637
640 spte = rmap_next(kvm, rmapp, NULL); 638 spte = rmap_next(kvm, rmapp, NULL);
641 page = spte_to_page(*spte); 639 pfn = spte_to_pfn(*spte);
642 SetPageDirty(page); 640 kvm_set_pfn_dirty(pfn);
643 } 641 }
644 642
645 /* check for huge page mappings */ 643 /* check for huge page mappings */
@@ -1036,7 +1034,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1036 unsigned pt_access, unsigned pte_access, 1034 unsigned pt_access, unsigned pte_access,
1037 int user_fault, int write_fault, int dirty, 1035 int user_fault, int write_fault, int dirty,
1038 int *ptwrite, int largepage, gfn_t gfn, 1036 int *ptwrite, int largepage, gfn_t gfn,
1039 struct page *page, bool speculative) 1037 pfn_t pfn, bool speculative)
1040{ 1038{
1041 u64 spte; 1039 u64 spte;
1042 int was_rmapped = 0; 1040 int was_rmapped = 0;
@@ -1058,10 +1056,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1058 1056
1059 child = page_header(pte & PT64_BASE_ADDR_MASK); 1057 child = page_header(pte & PT64_BASE_ADDR_MASK);
1060 mmu_page_remove_parent_pte(child, shadow_pte); 1058 mmu_page_remove_parent_pte(child, shadow_pte);
1061 } else if (page != spte_to_page(*shadow_pte)) { 1059 } else if (pfn != spte_to_pfn(*shadow_pte)) {
1062 pgprintk("hfn old %lx new %lx\n", 1060 pgprintk("hfn old %lx new %lx\n",
1063 page_to_pfn(spte_to_page(*shadow_pte)), 1061 spte_to_pfn(*shadow_pte), pfn);
1064 page_to_pfn(page));
1065 rmap_remove(vcpu->kvm, shadow_pte); 1062 rmap_remove(vcpu->kvm, shadow_pte);
1066 } else { 1063 } else {
1067 if (largepage) 1064 if (largepage)
@@ -1090,7 +1087,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1090 if (largepage) 1087 if (largepage)
1091 spte |= PT_PAGE_SIZE_MASK; 1088 spte |= PT_PAGE_SIZE_MASK;
1092 1089
1093 spte |= page_to_phys(page); 1090 spte |= (u64)pfn << PAGE_SHIFT;
1094 1091
1095 if ((pte_access & ACC_WRITE_MASK) 1092 if ((pte_access & ACC_WRITE_MASK)
1096 || (write_fault && !is_write_protection(vcpu) && !user_fault)) { 1093 || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
@@ -1135,12 +1132,12 @@ unshadowed:
1135 if (!was_rmapped) { 1132 if (!was_rmapped) {
1136 rmap_add(vcpu, shadow_pte, gfn, largepage); 1133 rmap_add(vcpu, shadow_pte, gfn, largepage);
1137 if (!is_rmap_pte(*shadow_pte)) 1134 if (!is_rmap_pte(*shadow_pte))
1138 kvm_release_page_clean(page); 1135 kvm_release_pfn_clean(pfn);
1139 } else { 1136 } else {
1140 if (was_writeble) 1137 if (was_writeble)
1141 kvm_release_page_dirty(page); 1138 kvm_release_pfn_dirty(pfn);
1142 else 1139 else
1143 kvm_release_page_clean(page); 1140 kvm_release_pfn_clean(pfn);
1144 } 1141 }
1145 if (!ptwrite || !*ptwrite) 1142 if (!ptwrite || !*ptwrite)
1146 vcpu->arch.last_pte_updated = shadow_pte; 1143 vcpu->arch.last_pte_updated = shadow_pte;
@@ -1151,7 +1148,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
1151} 1148}
1152 1149
1153static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 1150static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1154 int largepage, gfn_t gfn, struct page *page, 1151 int largepage, gfn_t gfn, pfn_t pfn,
1155 int level) 1152 int level)
1156{ 1153{
1157 hpa_t table_addr = vcpu->arch.mmu.root_hpa; 1154 hpa_t table_addr = vcpu->arch.mmu.root_hpa;
@@ -1166,13 +1163,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1166 1163
1167 if (level == 1) { 1164 if (level == 1) {
1168 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, 1165 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
1169 0, write, 1, &pt_write, 0, gfn, page, false); 1166 0, write, 1, &pt_write, 0, gfn, pfn, false);
1170 return pt_write; 1167 return pt_write;
1171 } 1168 }
1172 1169
1173 if (largepage && level == 2) { 1170 if (largepage && level == 2) {
1174 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, 1171 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
1175 0, write, 1, &pt_write, 1, gfn, page, false); 1172 0, write, 1, &pt_write, 1, gfn, pfn, false);
1176 return pt_write; 1173 return pt_write;
1177 } 1174 }
1178 1175
@@ -1187,7 +1184,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1187 1, ACC_ALL, &table[index]); 1184 1, ACC_ALL, &table[index]);
1188 if (!new_table) { 1185 if (!new_table) {
1189 pgprintk("nonpaging_map: ENOMEM\n"); 1186 pgprintk("nonpaging_map: ENOMEM\n");
1190 kvm_release_page_clean(page); 1187 kvm_release_pfn_clean(pfn);
1191 return -ENOMEM; 1188 return -ENOMEM;
1192 } 1189 }
1193 1190
@@ -1202,8 +1199,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1202{ 1199{
1203 int r; 1200 int r;
1204 int largepage = 0; 1201 int largepage = 0;
1205 1202 pfn_t pfn;
1206 struct page *page;
1207 1203
1208 down_read(&current->mm->mmap_sem); 1204 down_read(&current->mm->mmap_sem);
1209 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { 1205 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1211,18 +1207,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1211 largepage = 1; 1207 largepage = 1;
1212 } 1208 }
1213 1209
1214 page = gfn_to_page(vcpu->kvm, gfn); 1210 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1215 up_read(&current->mm->mmap_sem); 1211 up_read(&current->mm->mmap_sem);
1216 1212
1217 /* mmio */ 1213 /* mmio */
1218 if (is_error_page(page)) { 1214 if (is_error_pfn(pfn)) {
1219 kvm_release_page_clean(page); 1215 kvm_release_pfn_clean(pfn);
1220 return 1; 1216 return 1;
1221 } 1217 }
1222 1218
1223 spin_lock(&vcpu->kvm->mmu_lock); 1219 spin_lock(&vcpu->kvm->mmu_lock);
1224 kvm_mmu_free_some_pages(vcpu); 1220 kvm_mmu_free_some_pages(vcpu);
1225 r = __direct_map(vcpu, v, write, largepage, gfn, page, 1221 r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
1226 PT32E_ROOT_LEVEL); 1222 PT32E_ROOT_LEVEL);
1227 spin_unlock(&vcpu->kvm->mmu_lock); 1223 spin_unlock(&vcpu->kvm->mmu_lock);
1228 1224
@@ -1355,7 +1351,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1355static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, 1351static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1356 u32 error_code) 1352 u32 error_code)
1357{ 1353{
1358 struct page *page; 1354 pfn_t pfn;
1359 int r; 1355 int r;
1360 int largepage = 0; 1356 int largepage = 0;
1361 gfn_t gfn = gpa >> PAGE_SHIFT; 1357 gfn_t gfn = gpa >> PAGE_SHIFT;
@@ -1372,16 +1368,16 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1372 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1368 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1373 largepage = 1; 1369 largepage = 1;
1374 } 1370 }
1375 page = gfn_to_page(vcpu->kvm, gfn); 1371 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1376 up_read(&current->mm->mmap_sem); 1372 up_read(&current->mm->mmap_sem);
1377 if (is_error_page(page)) { 1373 if (is_error_pfn(pfn)) {
1378 kvm_release_page_clean(page); 1374 kvm_release_pfn_clean(pfn);
1379 return 1; 1375 return 1;
1380 } 1376 }
1381 spin_lock(&vcpu->kvm->mmu_lock); 1377 spin_lock(&vcpu->kvm->mmu_lock);
1382 kvm_mmu_free_some_pages(vcpu); 1378 kvm_mmu_free_some_pages(vcpu);
1383 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 1379 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1384 largepage, gfn, page, TDP_ROOT_LEVEL); 1380 largepage, gfn, pfn, TDP_ROOT_LEVEL);
1385 spin_unlock(&vcpu->kvm->mmu_lock); 1381 spin_unlock(&vcpu->kvm->mmu_lock);
1386 1382
1387 return r; 1383 return r;
@@ -1525,6 +1521,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
1525 1521
1526static int init_kvm_mmu(struct kvm_vcpu *vcpu) 1522static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1527{ 1523{
1524 vcpu->arch.update_pte.pfn = bad_pfn;
1525
1528 if (tdp_enabled) 1526 if (tdp_enabled)
1529 return init_kvm_tdp_mmu(vcpu); 1527 return init_kvm_tdp_mmu(vcpu);
1530 else 1528 else
@@ -1644,7 +1642,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1644 gfn_t gfn; 1642 gfn_t gfn;
1645 int r; 1643 int r;
1646 u64 gpte = 0; 1644 u64 gpte = 0;
1647 struct page *page; 1645 pfn_t pfn;
1648 1646
1649 vcpu->arch.update_pte.largepage = 0; 1647 vcpu->arch.update_pte.largepage = 0;
1650 1648
@@ -1680,15 +1678,15 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1680 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1678 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1681 vcpu->arch.update_pte.largepage = 1; 1679 vcpu->arch.update_pte.largepage = 1;
1682 } 1680 }
1683 page = gfn_to_page(vcpu->kvm, gfn); 1681 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1684 up_read(&current->mm->mmap_sem); 1682 up_read(&current->mm->mmap_sem);
1685 1683
1686 if (is_error_page(page)) { 1684 if (is_error_pfn(pfn)) {
1687 kvm_release_page_clean(page); 1685 kvm_release_pfn_clean(pfn);
1688 return; 1686 return;
1689 } 1687 }
1690 vcpu->arch.update_pte.gfn = gfn; 1688 vcpu->arch.update_pte.gfn = gfn;
1691 vcpu->arch.update_pte.page = page; 1689 vcpu->arch.update_pte.pfn = pfn;
1692} 1690}
1693 1691
1694void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1692void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1793,9 +1791,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1793 } 1791 }
1794 kvm_mmu_audit(vcpu, "post pte write"); 1792 kvm_mmu_audit(vcpu, "post pte write");
1795 spin_unlock(&vcpu->kvm->mmu_lock); 1793 spin_unlock(&vcpu->kvm->mmu_lock);
1796 if (vcpu->arch.update_pte.page) { 1794 if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
1797 kvm_release_page_clean(vcpu->arch.update_pte.page); 1795 kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
1798 vcpu->arch.update_pte.page = NULL; 1796 vcpu->arch.update_pte.pfn = bad_pfn;
1799 } 1797 }
1800} 1798}
1801 1799
@@ -2236,8 +2234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
2236 audit_mappings_page(vcpu, ent, va, level - 1); 2234 audit_mappings_page(vcpu, ent, va, level - 1);
2237 } else { 2235 } else {
2238 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); 2236 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
2239 struct page *page = gpa_to_page(vcpu, gpa); 2237 hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
2240 hpa_t hpa = page_to_phys(page);
2241 2238
2242 if (is_shadow_present_pte(ent) 2239 if (is_shadow_present_pte(ent)
2243 && (ent & PT64_BASE_ADDR_MASK) != hpa) 2240 && (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -2250,7 +2247,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
2250 && !is_error_hpa(hpa)) 2247 && !is_error_hpa(hpa))
2251 printk(KERN_ERR "audit: (%s) notrap shadow," 2248 printk(KERN_ERR "audit: (%s) notrap shadow,"
2252 " valid guest gva %lx\n", audit_msg, va); 2249 " valid guest gva %lx\n", audit_msg, va);
2253 kvm_release_page_clean(page); 2250 kvm_release_pfn_clean(pfn);
2254 2251
2255 } 2252 }
2256 } 2253 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 57d872aec663..156fe10288ae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -247,7 +247,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
247{ 247{
248 pt_element_t gpte; 248 pt_element_t gpte;
249 unsigned pte_access; 249 unsigned pte_access;
250 struct page *npage; 250 pfn_t pfn;
251 int largepage = vcpu->arch.update_pte.largepage; 251 int largepage = vcpu->arch.update_pte.largepage;
252 252
253 gpte = *(const pt_element_t *)pte; 253 gpte = *(const pt_element_t *)pte;
@@ -260,13 +260,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
260 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); 260 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
261 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) 261 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
262 return; 262 return;
263 npage = vcpu->arch.update_pte.page; 263 pfn = vcpu->arch.update_pte.pfn;
264 if (!npage) 264 if (is_error_pfn(pfn))
265 return; 265 return;
266 get_page(npage); 266 kvm_get_pfn(pfn);
267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
269 npage, true); 269 pfn, true);
270} 270}
271 271
272/* 272/*
@@ -275,7 +275,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
275static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 275static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
276 struct guest_walker *walker, 276 struct guest_walker *walker,
277 int user_fault, int write_fault, int largepage, 277 int user_fault, int write_fault, int largepage,
278 int *ptwrite, struct page *page) 278 int *ptwrite, pfn_t pfn)
279{ 279{
280 hpa_t shadow_addr; 280 hpa_t shadow_addr;
281 int level; 281 int level;
@@ -336,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
336 walker->pte_gpa[level - 2], 336 walker->pte_gpa[level - 2],
337 &curr_pte, sizeof(curr_pte)); 337 &curr_pte, sizeof(curr_pte));
338 if (r || curr_pte != walker->ptes[level - 2]) { 338 if (r || curr_pte != walker->ptes[level - 2]) {
339 kvm_release_page_clean(page); 339 kvm_release_pfn_clean(pfn);
340 return NULL; 340 return NULL;
341 } 341 }
342 } 342 }
@@ -349,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
349 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, 349 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
350 user_fault, write_fault, 350 user_fault, write_fault,
351 walker->ptes[walker->level-1] & PT_DIRTY_MASK, 351 walker->ptes[walker->level-1] & PT_DIRTY_MASK,
352 ptwrite, largepage, walker->gfn, page, false); 352 ptwrite, largepage, walker->gfn, pfn, false);
353 353
354 return shadow_ent; 354 return shadow_ent;
355} 355}
@@ -378,7 +378,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
378 u64 *shadow_pte; 378 u64 *shadow_pte;
379 int write_pt = 0; 379 int write_pt = 0;
380 int r; 380 int r;
381 struct page *page; 381 pfn_t pfn;
382 int largepage = 0; 382 int largepage = 0;
383 383
384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -413,20 +413,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
413 largepage = 1; 413 largepage = 1;
414 } 414 }
415 } 415 }
416 page = gfn_to_page(vcpu->kvm, walker.gfn); 416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
417 up_read(&current->mm->mmap_sem); 417 up_read(&current->mm->mmap_sem);
418 418
419 /* mmio */ 419 /* mmio */
420 if (is_error_page(page)) { 420 if (is_error_pfn(pfn)) {
421 pgprintk("gfn %x is mmio\n", walker.gfn); 421 pgprintk("gfn %x is mmio\n", walker.gfn);
422 kvm_release_page_clean(page); 422 kvm_release_pfn_clean(pfn);
423 return 1; 423 return 1;
424 } 424 }
425 425
426 spin_lock(&vcpu->kvm->mmu_lock); 426 spin_lock(&vcpu->kvm->mmu_lock);
427 kvm_mmu_free_some_pages(vcpu); 427 kvm_mmu_free_some_pages(vcpu);
428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
429 largepage, &write_pt, page); 429 largepage, &write_pt, pfn);
430 430
431 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, 431 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
432 shadow_pte, *shadow_pte, write_pt); 432 shadow_pte, *shadow_pte, write_pt);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index b9230490d777..de3eccfb767c 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -248,8 +248,8 @@ struct kvm_vcpu_arch {
248 u64 *last_pte_updated; 248 u64 *last_pte_updated;
249 249
250 struct { 250 struct {
251 gfn_t gfn; /* presumed gfn during guest pte update */ 251 gfn_t gfn; /* presumed gfn during guest pte update */
252 struct page *page; /* page corresponding to that gfn */ 252 pfn_t pfn; /* pfn corresponding to that gfn */
253 int largepage; 253 int largepage;
254 } update_pte; 254 } update_pte;
255 255
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a2ceb51b4274..578c3638bbba 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -150,8 +150,10 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
150struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); 150struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
151 151
152extern struct page *bad_page; 152extern struct page *bad_page;
153extern pfn_t bad_pfn;
153 154
154int is_error_page(struct page *page); 155int is_error_page(struct page *page);
156int is_error_pfn(pfn_t pfn);
155int kvm_is_error_hva(unsigned long addr); 157int kvm_is_error_hva(unsigned long addr);
156int kvm_set_memory_region(struct kvm *kvm, 158int kvm_set_memory_region(struct kvm *kvm,
157 struct kvm_userspace_memory_region *mem, 159 struct kvm_userspace_memory_region *mem,
@@ -168,6 +170,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
168unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); 170unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
169void kvm_release_page_clean(struct page *page); 171void kvm_release_page_clean(struct page *page);
170void kvm_release_page_dirty(struct page *page); 172void kvm_release_page_dirty(struct page *page);
173void kvm_set_page_dirty(struct page *page);
174void kvm_set_page_accessed(struct page *page);
175
176pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
177void kvm_release_pfn_dirty(pfn_t);
178void kvm_release_pfn_clean(pfn_t pfn);
179void kvm_set_pfn_dirty(pfn_t pfn);
180void kvm_set_pfn_accessed(pfn_t pfn);
181void kvm_get_pfn(pfn_t pfn);
182
171int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 183int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
172 int len); 184 int len);
173int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 185int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 1c4e46decb22..9b6f395c9625 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -38,6 +38,8 @@ typedef unsigned long hva_t;
38typedef u64 hpa_t; 38typedef u64 hpa_t;
39typedef unsigned long hfn_t; 39typedef unsigned long hfn_t;
40 40
41typedef hfn_t pfn_t;
42
41struct kvm_pio_request { 43struct kvm_pio_request {
42 unsigned long count; 44 unsigned long count;
43 int cur_count; 45 int cur_count;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 93ed78b015c0..6a52c084e068 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -40,6 +40,7 @@
40#include <linux/kvm_para.h> 40#include <linux/kvm_para.h>
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/mman.h> 42#include <linux/mman.h>
43#include <linux/swap.h>
43 44
44#include <asm/processor.h> 45#include <asm/processor.h>
45#include <asm/io.h> 46#include <asm/io.h>
@@ -458,6 +459,12 @@ int is_error_page(struct page *page)
458} 459}
459EXPORT_SYMBOL_GPL(is_error_page); 460EXPORT_SYMBOL_GPL(is_error_page);
460 461
462int is_error_pfn(pfn_t pfn)
463{
464 return pfn == bad_pfn;
465}
466EXPORT_SYMBOL_GPL(is_error_pfn);
467
461static inline unsigned long bad_hva(void) 468static inline unsigned long bad_hva(void)
462{ 469{
463 return PAGE_OFFSET; 470 return PAGE_OFFSET;
@@ -519,7 +526,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
519/* 526/*
520 * Requires current->mm->mmap_sem to be held 527 * Requires current->mm->mmap_sem to be held
521 */ 528 */
522struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 529pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
523{ 530{
524 struct page *page[1]; 531 struct page *page[1];
525 unsigned long addr; 532 unsigned long addr;
@@ -530,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
530 addr = gfn_to_hva(kvm, gfn); 537 addr = gfn_to_hva(kvm, gfn);
531 if (kvm_is_error_hva(addr)) { 538 if (kvm_is_error_hva(addr)) {
532 get_page(bad_page); 539 get_page(bad_page);
533 return bad_page; 540 return page_to_pfn(bad_page);
534 } 541 }
535 542
536 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, 543 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
@@ -538,27 +545,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
538 545
539 if (npages != 1) { 546 if (npages != 1) {
540 get_page(bad_page); 547 get_page(bad_page);
541 return bad_page; 548 return page_to_pfn(bad_page);
542 } 549 }
543 550
544 return page[0]; 551 return page_to_pfn(page[0]);
552}
553
554EXPORT_SYMBOL_GPL(gfn_to_pfn);
555
556struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
557{
558 return pfn_to_page(gfn_to_pfn(kvm, gfn));
545} 559}
546 560
547EXPORT_SYMBOL_GPL(gfn_to_page); 561EXPORT_SYMBOL_GPL(gfn_to_page);
548 562
549void kvm_release_page_clean(struct page *page) 563void kvm_release_page_clean(struct page *page)
550{ 564{
551 put_page(page); 565 kvm_release_pfn_clean(page_to_pfn(page));
552} 566}
553EXPORT_SYMBOL_GPL(kvm_release_page_clean); 567EXPORT_SYMBOL_GPL(kvm_release_page_clean);
554 568
569void kvm_release_pfn_clean(pfn_t pfn)
570{
571 put_page(pfn_to_page(pfn));
572}
573EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
574
555void kvm_release_page_dirty(struct page *page) 575void kvm_release_page_dirty(struct page *page)
556{ 576{
577 kvm_release_pfn_dirty(page_to_pfn(page));
578}
579EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
580
581void kvm_release_pfn_dirty(pfn_t pfn)
582{
583 kvm_set_pfn_dirty(pfn);
584 kvm_release_pfn_clean(pfn);
585}
586EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
587
588void kvm_set_page_dirty(struct page *page)
589{
590 kvm_set_pfn_dirty(page_to_pfn(page));
591}
592EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
593
594void kvm_set_pfn_dirty(pfn_t pfn)
595{
596 struct page *page = pfn_to_page(pfn);
557 if (!PageReserved(page)) 597 if (!PageReserved(page))
558 SetPageDirty(page); 598 SetPageDirty(page);
559 put_page(page);
560} 599}
561EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 600EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
601
602void kvm_set_pfn_accessed(pfn_t pfn)
603{
604 mark_page_accessed(pfn_to_page(pfn));
605}
606EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
607
608void kvm_get_pfn(pfn_t pfn)
609{
610 get_page(pfn_to_page(pfn));
611}
612EXPORT_SYMBOL_GPL(kvm_get_pfn);
562 613
563static int next_segment(unsigned long len, int offset) 614static int next_segment(unsigned long len, int offset)
564{ 615{
@@ -1351,6 +1402,7 @@ static struct sys_device kvm_sysdev = {
1351}; 1402};
1352 1403
1353struct page *bad_page; 1404struct page *bad_page;
1405pfn_t bad_pfn;
1354 1406
1355static inline 1407static inline
1356struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 1408struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
@@ -1392,6 +1444,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
1392 goto out; 1444 goto out;
1393 } 1445 }
1394 1446
1447 bad_pfn = page_to_pfn(bad_page);
1448
1395 r = kvm_arch_hardware_setup(); 1449 r = kvm_arch_hardware_setup();
1396 if (r < 0) 1450 if (r < 0)
1397 goto out_free_0; 1451 goto out_free_0;