aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/mmu.c
diff options
context:
space:
mode:
authorIzik Eidus <izike@qumranet.com>2007-09-27 08:11:22 -0400
committerAvi Kivity <avi@qumranet.com>2008-01-30 10:52:50 -0500
commit290fc38da8187b53b78dd4d5ab27a20b88ef8b61 (patch)
tree983b2b4cecbe489f7b84391c5eed34aa9f073da0 /drivers/kvm/mmu.c
parentf566e09fc2c9f4164e1f0017c8c1c7a18bad7d72 (diff)
KVM: Remove the usage of page->private field by rmap
When kvm uses user-allocated pages in the future for the guest, we won't be able to use page->private for rmap, since page->rmap is reserved for the filesystem. So we move the rmap base pointers to the memory slot. A side effect of this is that we need to store the gfn of each gpte in the shadow pages, since the memory slot is addressed by gfn, instead of hfn like struct page. Signed-off-by: Izik Eidus <izik@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r--drivers/kvm/mmu.c122
1 files changed, 70 insertions, 52 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index d347e895736e..72757db15065 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -276,7 +276,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
276 rmap_desc_cache, 1); 276 rmap_desc_cache, 1);
277 if (r) 277 if (r)
278 goto out; 278 goto out;
279 r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4); 279 r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 8);
280 if (r) 280 if (r)
281 goto out; 281 goto out;
282 r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, 282 r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
@@ -327,35 +327,52 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
327} 327}
328 328
329/* 329/*
330 * Take gfn and return the reverse mapping to it.
331 * Note: gfn must be unaliased before this function get called
332 */
333
334static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
335{
336 struct kvm_memory_slot *slot;
337
338 slot = gfn_to_memslot(kvm, gfn);
339 return &slot->rmap[gfn - slot->base_gfn];
340}
341
342/*
330 * Reverse mapping data structures: 343 * Reverse mapping data structures:
331 * 344 *
332 * If page->private bit zero is zero, then page->private points to the 345 * If rmapp bit zero is zero, then rmapp point to the shadw page table entry
333 * shadow page table entry that points to page_address(page). 346 * that points to page_address(page).
334 * 347 *
335 * If page->private bit zero is one, (then page->private & ~1) points 348 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
336 * to a struct kvm_rmap_desc containing more mappings. 349 * containing more mappings.
337 */ 350 */
338static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) 351static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
339{ 352{
340 struct page *page; 353 struct kvm_mmu_page *page;
341 struct kvm_rmap_desc *desc; 354 struct kvm_rmap_desc *desc;
355 unsigned long *rmapp;
342 int i; 356 int i;
343 357
344 if (!is_rmap_pte(*spte)) 358 if (!is_rmap_pte(*spte))
345 return; 359 return;
346 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); 360 gfn = unalias_gfn(vcpu->kvm, gfn);
347 if (!page_private(page)) { 361 page = page_header(__pa(spte));
362 page->gfns[spte - page->spt] = gfn;
363 rmapp = gfn_to_rmap(vcpu->kvm, gfn);
364 if (!*rmapp) {
348 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); 365 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
349 set_page_private(page,(unsigned long)spte); 366 *rmapp = (unsigned long)spte;
350 } else if (!(page_private(page) & 1)) { 367 } else if (!(*rmapp & 1)) {
351 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); 368 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
352 desc = mmu_alloc_rmap_desc(vcpu); 369 desc = mmu_alloc_rmap_desc(vcpu);
353 desc->shadow_ptes[0] = (u64 *)page_private(page); 370 desc->shadow_ptes[0] = (u64 *)*rmapp;
354 desc->shadow_ptes[1] = spte; 371 desc->shadow_ptes[1] = spte;
355 set_page_private(page,(unsigned long)desc | 1); 372 *rmapp = (unsigned long)desc | 1;
356 } else { 373 } else {
357 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 374 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
358 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); 375 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
359 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 376 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
360 desc = desc->more; 377 desc = desc->more;
361 if (desc->shadow_ptes[RMAP_EXT-1]) { 378 if (desc->shadow_ptes[RMAP_EXT-1]) {
@@ -368,7 +385,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
368 } 385 }
369} 386}
370 387
371static void rmap_desc_remove_entry(struct page *page, 388static void rmap_desc_remove_entry(unsigned long *rmapp,
372 struct kvm_rmap_desc *desc, 389 struct kvm_rmap_desc *desc,
373 int i, 390 int i,
374 struct kvm_rmap_desc *prev_desc) 391 struct kvm_rmap_desc *prev_desc)
@@ -382,44 +399,46 @@ static void rmap_desc_remove_entry(struct page *page,
382 if (j != 0) 399 if (j != 0)
383 return; 400 return;
384 if (!prev_desc && !desc->more) 401 if (!prev_desc && !desc->more)
385 set_page_private(page,(unsigned long)desc->shadow_ptes[0]); 402 *rmapp = (unsigned long)desc->shadow_ptes[0];
386 else 403 else
387 if (prev_desc) 404 if (prev_desc)
388 prev_desc->more = desc->more; 405 prev_desc->more = desc->more;
389 else 406 else
390 set_page_private(page,(unsigned long)desc->more | 1); 407 *rmapp = (unsigned long)desc->more | 1;
391 mmu_free_rmap_desc(desc); 408 mmu_free_rmap_desc(desc);
392} 409}
393 410
394static void rmap_remove(u64 *spte) 411static void rmap_remove(struct kvm *kvm, u64 *spte)
395{ 412{
396 struct page *page;
397 struct kvm_rmap_desc *desc; 413 struct kvm_rmap_desc *desc;
398 struct kvm_rmap_desc *prev_desc; 414 struct kvm_rmap_desc *prev_desc;
415 struct kvm_mmu_page *page;
416 unsigned long *rmapp;
399 int i; 417 int i;
400 418
401 if (!is_rmap_pte(*spte)) 419 if (!is_rmap_pte(*spte))
402 return; 420 return;
403 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); 421 page = page_header(__pa(spte));
404 if (!page_private(page)) { 422 rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
423 if (!*rmapp) {
405 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 424 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
406 BUG(); 425 BUG();
407 } else if (!(page_private(page) & 1)) { 426 } else if (!(*rmapp & 1)) {
408 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); 427 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
409 if ((u64 *)page_private(page) != spte) { 428 if ((u64 *)*rmapp != spte) {
410 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", 429 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
411 spte, *spte); 430 spte, *spte);
412 BUG(); 431 BUG();
413 } 432 }
414 set_page_private(page,0); 433 *rmapp = 0;
415 } else { 434 } else {
416 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); 435 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
417 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); 436 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
418 prev_desc = NULL; 437 prev_desc = NULL;
419 while (desc) { 438 while (desc) {
420 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) 439 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
421 if (desc->shadow_ptes[i] == spte) { 440 if (desc->shadow_ptes[i] == spte) {
422 rmap_desc_remove_entry(page, 441 rmap_desc_remove_entry(rmapp,
423 desc, i, 442 desc, i,
424 prev_desc); 443 prev_desc);
425 return; 444 return;
@@ -433,28 +452,25 @@ static void rmap_remove(u64 *spte)
433 452
434static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) 453static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
435{ 454{
436 struct kvm *kvm = vcpu->kvm;
437 struct page *page;
438 struct kvm_rmap_desc *desc; 455 struct kvm_rmap_desc *desc;
456 unsigned long *rmapp;
439 u64 *spte; 457 u64 *spte;
440 458
441 page = gfn_to_page(kvm, gfn); 459 gfn = unalias_gfn(vcpu->kvm, gfn);
442 BUG_ON(!page); 460 rmapp = gfn_to_rmap(vcpu->kvm, gfn);
443 461
444 while (page_private(page)) { 462 while (*rmapp) {
445 if (!(page_private(page) & 1)) 463 if (!(*rmapp & 1))
446 spte = (u64 *)page_private(page); 464 spte = (u64 *)*rmapp;
447 else { 465 else {
448 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); 466 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
449 spte = desc->shadow_ptes[0]; 467 spte = desc->shadow_ptes[0];
450 } 468 }
451 BUG_ON(!spte); 469 BUG_ON(!spte);
452 BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
453 != page_to_pfn(page));
454 BUG_ON(!(*spte & PT_PRESENT_MASK)); 470 BUG_ON(!(*spte & PT_PRESENT_MASK));
455 BUG_ON(!(*spte & PT_WRITABLE_MASK)); 471 BUG_ON(!(*spte & PT_WRITABLE_MASK));
456 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 472 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
457 rmap_remove(spte); 473 rmap_remove(vcpu->kvm, spte);
458 set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); 474 set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
459 kvm_flush_remote_tlbs(vcpu->kvm); 475 kvm_flush_remote_tlbs(vcpu->kvm);
460 } 476 }
@@ -482,6 +498,7 @@ static void kvm_mmu_free_page(struct kvm *kvm,
482 ASSERT(is_empty_shadow_page(page_head->spt)); 498 ASSERT(is_empty_shadow_page(page_head->spt));
483 list_del(&page_head->link); 499 list_del(&page_head->link);
484 __free_page(virt_to_page(page_head->spt)); 500 __free_page(virt_to_page(page_head->spt));
501 __free_page(virt_to_page(page_head->gfns));
485 kfree(page_head); 502 kfree(page_head);
486 ++kvm->n_free_mmu_pages; 503 ++kvm->n_free_mmu_pages;
487} 504}
@@ -502,6 +519,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
502 page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, 519 page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
503 sizeof *page); 520 sizeof *page);
504 page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); 521 page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
522 page->gfns = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
505 set_page_private(virt_to_page(page->spt), (unsigned long)page); 523 set_page_private(virt_to_page(page->spt), (unsigned long)page);
506 list_add(&page->link, &vcpu->kvm->active_mmu_pages); 524 list_add(&page->link, &vcpu->kvm->active_mmu_pages);
507 ASSERT(is_empty_shadow_page(page->spt)); 525 ASSERT(is_empty_shadow_page(page->spt));
@@ -667,7 +685,7 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
667 if (page->role.level == PT_PAGE_TABLE_LEVEL) { 685 if (page->role.level == PT_PAGE_TABLE_LEVEL) {
668 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 686 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
669 if (is_shadow_present_pte(pt[i])) 687 if (is_shadow_present_pte(pt[i]))
670 rmap_remove(&pt[i]); 688 rmap_remove(kvm, &pt[i]);
671 pt[i] = shadow_trap_nonpresent_pte; 689 pt[i] = shadow_trap_nonpresent_pte;
672 } 690 }
673 kvm_flush_remote_tlbs(kvm); 691 kvm_flush_remote_tlbs(kvm);
@@ -832,7 +850,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
832 page_header_update_slot(vcpu->kvm, table, v); 850 page_header_update_slot(vcpu->kvm, table, v);
833 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | 851 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
834 PT_USER_MASK; 852 PT_USER_MASK;
835 rmap_add(vcpu, &table[index]); 853 rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
836 return 0; 854 return 0;
837 } 855 }
838 856
@@ -1123,7 +1141,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1123 pte = *spte; 1141 pte = *spte;
1124 if (is_shadow_present_pte(pte)) { 1142 if (is_shadow_present_pte(pte)) {
1125 if (page->role.level == PT_PAGE_TABLE_LEVEL) 1143 if (page->role.level == PT_PAGE_TABLE_LEVEL)
1126 rmap_remove(spte); 1144 rmap_remove(vcpu->kvm, spte);
1127 else { 1145 else {
1128 child = page_header(pte & PT64_BASE_ADDR_MASK); 1146 child = page_header(pte & PT64_BASE_ADDR_MASK);
1129 mmu_page_remove_parent_pte(child, spte); 1147 mmu_page_remove_parent_pte(child, spte);
@@ -1340,7 +1358,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
1340 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 1358 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1341 /* avoid RMW */ 1359 /* avoid RMW */
1342 if (pt[i] & PT_WRITABLE_MASK) { 1360 if (pt[i] & PT_WRITABLE_MASK) {
1343 rmap_remove(&pt[i]); 1361 rmap_remove(kvm, &pt[i]);
1344 pt[i] &= ~PT_WRITABLE_MASK; 1362 pt[i] &= ~PT_WRITABLE_MASK;
1345 } 1363 }
1346 } 1364 }
@@ -1470,15 +1488,15 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
1470 struct kvm_rmap_desc *d; 1488 struct kvm_rmap_desc *d;
1471 1489
1472 for (j = 0; j < m->npages; ++j) { 1490 for (j = 0; j < m->npages; ++j) {
1473 struct page *page = m->phys_mem[j]; 1491 unsigned long *rmapp = &m->rmap[j];
1474 1492
1475 if (!page->private) 1493 if (!*rmapp)
1476 continue; 1494 continue;
1477 if (!(page->private & 1)) { 1495 if (!(*rmapp & 1)) {
1478 ++nmaps; 1496 ++nmaps;
1479 continue; 1497 continue;
1480 } 1498 }
1481 d = (struct kvm_rmap_desc *)(page->private & ~1ul); 1499 d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
1482 while (d) { 1500 while (d) {
1483 for (k = 0; k < RMAP_EXT; ++k) 1501 for (k = 0; k < RMAP_EXT; ++k)
1484 if (d->shadow_ptes[k]) 1502 if (d->shadow_ptes[k])
@@ -1530,18 +1548,18 @@ static void audit_rmap(struct kvm_vcpu *vcpu)
1530static void audit_write_protection(struct kvm_vcpu *vcpu) 1548static void audit_write_protection(struct kvm_vcpu *vcpu)
1531{ 1549{
1532 struct kvm_mmu_page *page; 1550 struct kvm_mmu_page *page;
1551 struct kvm_memory_slot *slot;
1552 unsigned long *rmapp;
1553 gfn_t gfn;
1533 1554
1534 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { 1555 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1535 hfn_t hfn;
1536 struct page *pg;
1537
1538 if (page->role.metaphysical) 1556 if (page->role.metaphysical)
1539 continue; 1557 continue;
1540 1558
1541 hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT) 1559 slot = gfn_to_memslot(vcpu->kvm, page->gfn);
1542 >> PAGE_SHIFT; 1560 gfn = unalias_gfn(vcpu->kvm, page->gfn);
1543 pg = pfn_to_page(hfn); 1561 rmapp = &slot->rmap[gfn - slot->base_gfn];
1544 if (pg->private) 1562 if (*rmapp)
1545 printk(KERN_ERR "%s: (%s) shadow page has writable" 1563 printk(KERN_ERR "%s: (%s) shadow page has writable"
1546 " mappings: gfn %lx role %x\n", 1564 " mappings: gfn %lx role %x\n",
1547 __FUNCTION__, audit_msg, page->gfn, 1565 __FUNCTION__, audit_msg, page->gfn,