aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIzik Eidus <avi@qumranet.com>2007-10-18 05:09:33 -0400
committerAvi Kivity <avi@qumranet.com>2008-01-30 10:52:54 -0500
commit8a7ae055f3533b520401c170ac55e30628b34df5 (patch)
treef9654746dc92fa18ef66e49e12537dc6cb1d32e6
parentcea7bb21280e3a825e64b54740edc5d3e6e4193c (diff)
KVM: MMU: Partial swapping of guest memory
This allows guest memory to be swapped. Pages which are currently mapped via shadow page tables are pinned into memory, but all other pages can be freely swapped. The patch makes gfn_to_page() elevate the page's reference count, and introduces kvm_release_page() that pairs with it. Signed-off-by: Izik Eidus <izike@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--drivers/kvm/kvm.h2
-rw-r--r--drivers/kvm/kvm_main.c83
-rw-r--r--drivers/kvm/mmu.c14
-rw-r--r--drivers/kvm/paging_tmpl.h26
4 files changed, 84 insertions, 41 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0c17c76d030f..df0711ce9baa 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -409,6 +409,7 @@ struct kvm_memory_slot {
409 unsigned long *rmap; 409 unsigned long *rmap;
410 unsigned long *dirty_bitmap; 410 unsigned long *dirty_bitmap;
411 int user_alloc; /* user allocated memory */ 411 int user_alloc; /* user allocated memory */
412 unsigned long userspace_addr;
412}; 413};
413 414
414struct kvm { 415struct kvm {
@@ -570,6 +571,7 @@ extern struct page *bad_page;
570int is_error_page(struct page *page); 571int is_error_page(struct page *page);
571gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); 572gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
572struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 573struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
574void kvm_release_page(struct page *page);
573int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 575int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
574 int len); 576 int len);
575int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); 577int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 47000be25479..f86a47c2f255 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -300,19 +300,6 @@ static struct kvm *kvm_create_vm(void)
300 return kvm; 300 return kvm;
301} 301}
302 302
303static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
304{
305 int i;
306
307 for (i = 0; i < free->npages; ++i) {
308 if (free->phys_mem[i]) {
309 if (!PageReserved(free->phys_mem[i]))
310 SetPageDirty(free->phys_mem[i]);
311 page_cache_release(free->phys_mem[i]);
312 }
313 }
314}
315
316static void kvm_free_kernel_physmem(struct kvm_memory_slot *free) 303static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
317{ 304{
318 int i; 305 int i;
@@ -330,9 +317,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
330{ 317{
331 if (!dont || free->phys_mem != dont->phys_mem) 318 if (!dont || free->phys_mem != dont->phys_mem)
332 if (free->phys_mem) { 319 if (free->phys_mem) {
333 if (free->user_alloc) 320 if (!free->user_alloc)
334 kvm_free_userspace_physmem(free);
335 else
336 kvm_free_kernel_physmem(free); 321 kvm_free_kernel_physmem(free);
337 vfree(free->phys_mem); 322 vfree(free->phys_mem);
338 } 323 }
@@ -361,7 +346,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
361 346
362 for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i) 347 for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
363 if (vcpu->pio.guest_pages[i]) { 348 if (vcpu->pio.guest_pages[i]) {
364 __free_page(vcpu->pio.guest_pages[i]); 349 kvm_release_page(vcpu->pio.guest_pages[i]);
365 vcpu->pio.guest_pages[i] = NULL; 350 vcpu->pio.guest_pages[i] = NULL;
366 } 351 }
367} 352}
@@ -752,19 +737,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
752 memset(new.phys_mem, 0, npages * sizeof(struct page *)); 737 memset(new.phys_mem, 0, npages * sizeof(struct page *));
753 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 738 memset(new.rmap, 0, npages * sizeof(*new.rmap));
754 if (user_alloc) { 739 if (user_alloc) {
755 unsigned long pages_num;
756
757 new.user_alloc = 1; 740 new.user_alloc = 1;
758 down_read(&current->mm->mmap_sem); 741 new.userspace_addr = mem->userspace_addr;
759
760 pages_num = get_user_pages(current, current->mm,
761 mem->userspace_addr,
762 npages, 1, 1, new.phys_mem,
763 NULL);
764
765 up_read(&current->mm->mmap_sem);
766 if (pages_num != npages)
767 goto out_unlock;
768 } else { 742 } else {
769 for (i = 0; i < npages; ++i) { 743 for (i = 0; i < npages; ++i) {
770 new.phys_mem[i] = alloc_page(GFP_HIGHUSER 744 new.phys_mem[i] = alloc_page(GFP_HIGHUSER
@@ -1039,12 +1013,39 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
1039 1013
1040 gfn = unalias_gfn(kvm, gfn); 1014 gfn = unalias_gfn(kvm, gfn);
1041 slot = __gfn_to_memslot(kvm, gfn); 1015 slot = __gfn_to_memslot(kvm, gfn);
1042 if (!slot) 1016 if (!slot) {
1017 get_page(bad_page);
1043 return bad_page; 1018 return bad_page;
1019 }
1020 if (slot->user_alloc) {
1021 struct page *page[1];
1022 int npages;
1023
1024 down_read(&current->mm->mmap_sem);
1025 npages = get_user_pages(current, current->mm,
1026 slot->userspace_addr
1027 + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
1028 1, 1, page, NULL);
1029 up_read(&current->mm->mmap_sem);
1030 if (npages != 1) {
1031 get_page(bad_page);
1032 return bad_page;
1033 }
1034 return page[0];
1035 }
1036 get_page(slot->phys_mem[gfn - slot->base_gfn]);
1044 return slot->phys_mem[gfn - slot->base_gfn]; 1037 return slot->phys_mem[gfn - slot->base_gfn];
1045} 1038}
1046EXPORT_SYMBOL_GPL(gfn_to_page); 1039EXPORT_SYMBOL_GPL(gfn_to_page);
1047 1040
1041void kvm_release_page(struct page *page)
1042{
1043 if (!PageReserved(page))
1044 SetPageDirty(page);
1045 put_page(page);
1046}
1047EXPORT_SYMBOL_GPL(kvm_release_page);
1048
1048static int next_segment(unsigned long len, int offset) 1049static int next_segment(unsigned long len, int offset)
1049{ 1050{
1050 if (len > PAGE_SIZE - offset) 1051 if (len > PAGE_SIZE - offset)
@@ -1060,13 +1061,16 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
1060 struct page *page; 1061 struct page *page;
1061 1062
1062 page = gfn_to_page(kvm, gfn); 1063 page = gfn_to_page(kvm, gfn);
1063 if (is_error_page(page)) 1064 if (is_error_page(page)) {
1065 kvm_release_page(page);
1064 return -EFAULT; 1066 return -EFAULT;
1067 }
1065 page_virt = kmap_atomic(page, KM_USER0); 1068 page_virt = kmap_atomic(page, KM_USER0);
1066 1069
1067 memcpy(data, page_virt + offset, len); 1070 memcpy(data, page_virt + offset, len);
1068 1071
1069 kunmap_atomic(page_virt, KM_USER0); 1072 kunmap_atomic(page_virt, KM_USER0);
1073 kvm_release_page(page);
1070 return 0; 1074 return 0;
1071} 1075}
1072EXPORT_SYMBOL_GPL(kvm_read_guest_page); 1076EXPORT_SYMBOL_GPL(kvm_read_guest_page);
@@ -1098,14 +1102,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
1098 struct page *page; 1102 struct page *page;
1099 1103
1100 page = gfn_to_page(kvm, gfn); 1104 page = gfn_to_page(kvm, gfn);
1101 if (is_error_page(page)) 1105 if (is_error_page(page)) {
1106 kvm_release_page(page);
1102 return -EFAULT; 1107 return -EFAULT;
1108 }
1103 page_virt = kmap_atomic(page, KM_USER0); 1109 page_virt = kmap_atomic(page, KM_USER0);
1104 1110
1105 memcpy(page_virt + offset, data, len); 1111 memcpy(page_virt + offset, data, len);
1106 1112
1107 kunmap_atomic(page_virt, KM_USER0); 1113 kunmap_atomic(page_virt, KM_USER0);
1108 mark_page_dirty(kvm, gfn); 1114 mark_page_dirty(kvm, gfn);
1115 kvm_release_page(page);
1109 return 0; 1116 return 0;
1110} 1117}
1111EXPORT_SYMBOL_GPL(kvm_write_guest_page); 1118EXPORT_SYMBOL_GPL(kvm_write_guest_page);
@@ -1136,13 +1143,16 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
1136 struct page *page; 1143 struct page *page;
1137 1144
1138 page = gfn_to_page(kvm, gfn); 1145 page = gfn_to_page(kvm, gfn);
1139 if (is_error_page(page)) 1146 if (is_error_page(page)) {
1147 kvm_release_page(page);
1140 return -EFAULT; 1148 return -EFAULT;
1149 }
1141 page_virt = kmap_atomic(page, KM_USER0); 1150 page_virt = kmap_atomic(page, KM_USER0);
1142 1151
1143 memset(page_virt + offset, 0, len); 1152 memset(page_virt + offset, 0, len);
1144 1153
1145 kunmap_atomic(page_virt, KM_USER0); 1154 kunmap_atomic(page_virt, KM_USER0);
1155 kvm_release_page(page);
1146 return 0; 1156 return 0;
1147} 1157}
1148EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1158EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
@@ -2070,8 +2080,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2070 for (i = 0; i < nr_pages; ++i) { 2080 for (i = 0; i < nr_pages; ++i) {
2071 mutex_lock(&vcpu->kvm->lock); 2081 mutex_lock(&vcpu->kvm->lock);
2072 page = gva_to_page(vcpu, address + i * PAGE_SIZE); 2082 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2073 if (page)
2074 get_page(page);
2075 vcpu->pio.guest_pages[i] = page; 2083 vcpu->pio.guest_pages[i] = page;
2076 mutex_unlock(&vcpu->kvm->lock); 2084 mutex_unlock(&vcpu->kvm->lock);
2077 if (!page) { 2085 if (!page) {
@@ -3074,9 +3082,10 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
3074 3082
3075 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 3083 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
3076 page = gfn_to_page(kvm, pgoff); 3084 page = gfn_to_page(kvm, pgoff);
3077 if (is_error_page(page)) 3085 if (is_error_page(page)) {
3086 kvm_release_page(page);
3078 return NOPAGE_SIGBUS; 3087 return NOPAGE_SIGBUS;
3079 get_page(page); 3088 }
3080 if (type != NULL) 3089 if (type != NULL)
3081 *type = VM_FAULT_MINOR; 3090 *type = VM_FAULT_MINOR;
3082 3091
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 2ad14fbdcfa0..5d7af4bde595 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -425,6 +425,8 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
425 if (!is_rmap_pte(*spte)) 425 if (!is_rmap_pte(*spte))
426 return; 426 return;
427 page = page_header(__pa(spte)); 427 page = page_header(__pa(spte));
428 kvm_release_page(pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >>
429 PAGE_SHIFT));
428 rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]); 430 rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
429 if (!*rmapp) { 431 if (!*rmapp) {
430 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 432 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -911,6 +913,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
911 PT_USER_MASK; 913 PT_USER_MASK;
912 if (!was_rmapped) 914 if (!was_rmapped)
913 rmap_add(vcpu, &table[index], v >> PAGE_SHIFT); 915 rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
916 else
917 kvm_release_page(pfn_to_page(p >> PAGE_SHIFT));
914 return 0; 918 return 0;
915 } 919 }
916 920
@@ -925,6 +929,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
925 1, 3, &table[index]); 929 1, 3, &table[index]);
926 if (!new_table) { 930 if (!new_table) {
927 pgprintk("nonpaging_map: ENOMEM\n"); 931 pgprintk("nonpaging_map: ENOMEM\n");
932 kvm_release_page(pfn_to_page(p >> PAGE_SHIFT));
928 return -ENOMEM; 933 return -ENOMEM;
929 } 934 }
930 935
@@ -1039,8 +1044,11 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1039 1044
1040 paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK); 1045 paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
1041 1046
1042 if (is_error_hpa(paddr)) 1047 if (is_error_hpa(paddr)) {
1048 kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
1049 >> PAGE_SHIFT));
1043 return 1; 1050 return 1;
1051 }
1044 1052
1045 return nonpaging_map(vcpu, addr & PAGE_MASK, paddr); 1053 return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
1046} 1054}
@@ -1507,6 +1515,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1507 } else { 1515 } else {
1508 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); 1516 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
1509 hpa_t hpa = gpa_to_hpa(vcpu, gpa); 1517 hpa_t hpa = gpa_to_hpa(vcpu, gpa);
1518 struct page *page;
1510 1519
1511 if (is_shadow_present_pte(ent) 1520 if (is_shadow_present_pte(ent)
1512 && (ent & PT64_BASE_ADDR_MASK) != hpa) 1521 && (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -1519,6 +1528,9 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1519 && !is_error_hpa(hpa)) 1528 && !is_error_hpa(hpa))
1520 printk(KERN_ERR "audit: (%s) notrap shadow," 1529 printk(KERN_ERR "audit: (%s) notrap shadow,"
1521 " valid guest gva %lx\n", audit_msg, va); 1530 " valid guest gva %lx\n", audit_msg, va);
1531 page = pfn_to_page((gpa & PT64_BASE_ADDR_MASK)
1532 >> PAGE_SHIFT);
1533 kvm_release_page(page);
1522 1534
1523 } 1535 }
1524 } 1536 }
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 572e5b6d9a7a..0f0266af3f68 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -72,7 +72,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
72 struct kvm_vcpu *vcpu, gva_t addr, 72 struct kvm_vcpu *vcpu, gva_t addr,
73 int write_fault, int user_fault, int fetch_fault) 73 int write_fault, int user_fault, int fetch_fault)
74{ 74{
75 struct page *page; 75 struct page *page = NULL;
76 pt_element_t *table; 76 pt_element_t *table;
77 pt_element_t pte; 77 pt_element_t pte;
78 gfn_t table_gfn; 78 gfn_t table_gfn;
@@ -149,6 +149,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
149 149
150 walker->inherited_ar &= pte; 150 walker->inherited_ar &= pte;
151 --walker->level; 151 --walker->level;
152 kvm_release_page(page);
152 } 153 }
153 154
154 if (write_fault && !is_dirty_pte(pte)) { 155 if (write_fault && !is_dirty_pte(pte)) {
@@ -162,6 +163,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
162 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); 163 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
163 } 164 }
164 165
166 kvm_release_page(page);
165 walker->pte = pte; 167 walker->pte = pte;
166 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte); 168 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
167 return 1; 169 return 1;
@@ -180,6 +182,8 @@ err:
180 walker->error_code |= PFERR_USER_MASK; 182 walker->error_code |= PFERR_USER_MASK;
181 if (fetch_fault) 183 if (fetch_fault)
182 walker->error_code |= PFERR_FETCH_MASK; 184 walker->error_code |= PFERR_FETCH_MASK;
185 if (page)
186 kvm_release_page(page);
183 return 0; 187 return 0;
184} 188}
185 189
@@ -223,6 +227,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
223 if (is_error_hpa(paddr)) { 227 if (is_error_hpa(paddr)) {
224 set_shadow_pte(shadow_pte, 228 set_shadow_pte(shadow_pte,
225 shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK); 229 shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
230 kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
231 >> PAGE_SHIFT));
226 return; 232 return;
227 } 233 }
228 234
@@ -260,9 +266,20 @@ unshadowed:
260 pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); 266 pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
261 set_shadow_pte(shadow_pte, spte); 267 set_shadow_pte(shadow_pte, spte);
262 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); 268 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
263 if (!was_rmapped) 269 if (!was_rmapped) {
264 rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK) 270 rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK)
265 >> PAGE_SHIFT); 271 >> PAGE_SHIFT);
272 if (!is_rmap_pte(*shadow_pte)) {
273 struct page *page;
274
275 page = pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
276 >> PAGE_SHIFT);
277 kvm_release_page(page);
278 }
279 }
280 else
281 kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
282 >> PAGE_SHIFT));
266 if (!ptwrite || !*ptwrite) 283 if (!ptwrite || !*ptwrite)
267 vcpu->last_pte_updated = shadow_pte; 284 vcpu->last_pte_updated = shadow_pte;
268} 285}
@@ -486,19 +503,22 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
486{ 503{
487 int i; 504 int i;
488 pt_element_t *gpt; 505 pt_element_t *gpt;
506 struct page *page;
489 507
490 if (sp->role.metaphysical || PTTYPE == 32) { 508 if (sp->role.metaphysical || PTTYPE == 32) {
491 nonpaging_prefetch_page(vcpu, sp); 509 nonpaging_prefetch_page(vcpu, sp);
492 return; 510 return;
493 } 511 }
494 512
495 gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0); 513 page = gfn_to_page(vcpu->kvm, sp->gfn);
514 gpt = kmap_atomic(page, KM_USER0);
496 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 515 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
497 if (is_present_pte(gpt[i])) 516 if (is_present_pte(gpt[i]))
498 sp->spt[i] = shadow_trap_nonpresent_pte; 517 sp->spt[i] = shadow_trap_nonpresent_pte;
499 else 518 else
500 sp->spt[i] = shadow_notrap_nonpresent_pte; 519 sp->spt[i] = shadow_notrap_nonpresent_pte;
501 kunmap_atomic(gpt, KM_USER0); 520 kunmap_atomic(gpt, KM_USER0);
521 kvm_release_page(page);
502} 522}
503 523
504#undef pt_element_t 524#undef pt_element_t