diff options
author | Izik Eidus <izike@qumranet.com> | 2007-09-27 08:11:22 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-01-30 10:52:50 -0500 |
commit | 290fc38da8187b53b78dd4d5ab27a20b88ef8b61 (patch) | |
tree | 983b2b4cecbe489f7b84391c5eed34aa9f073da0 /drivers/kvm/mmu.c | |
parent | f566e09fc2c9f4164e1f0017c8c1c7a18bad7d72 (diff) |
KVM: Remove the usage of page->private field by rmap
When kvm uses user-allocated pages in the future for the guest, we won't
be able to use page->private for rmap, since page->rmap is reserved for
the filesystem. So we move the rmap base pointers to the memory slot.
A side effect of this is that we need to store the gfn of each gpte in
the shadow pages, since the memory slot is addressed by gfn, instead of
hfn like struct page.
Signed-off-by: Izik Eidus <izik@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r-- | drivers/kvm/mmu.c | 122 |
1 files changed, 70 insertions, 52 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index d347e895736e..72757db15065 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -276,7 +276,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | |||
276 | rmap_desc_cache, 1); | 276 | rmap_desc_cache, 1); |
277 | if (r) | 277 | if (r) |
278 | goto out; | 278 | goto out; |
279 | r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4); | 279 | r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 8); |
280 | if (r) | 280 | if (r) |
281 | goto out; | 281 | goto out; |
282 | r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, | 282 | r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, |
@@ -327,35 +327,52 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | |||
327 | } | 327 | } |
328 | 328 | ||
329 | /* | 329 | /* |
330 | * Take gfn and return the reverse mapping to it. | ||
331 | * Note: gfn must be unaliased before this function get called | ||
332 | */ | ||
333 | |||
334 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) | ||
335 | { | ||
336 | struct kvm_memory_slot *slot; | ||
337 | |||
338 | slot = gfn_to_memslot(kvm, gfn); | ||
339 | return &slot->rmap[gfn - slot->base_gfn]; | ||
340 | } | ||
341 | |||
342 | /* | ||
330 | * Reverse mapping data structures: | 343 | * Reverse mapping data structures: |
331 | * | 344 | * |
332 | * If page->private bit zero is zero, then page->private points to the | 345 | * If rmapp bit zero is zero, then rmapp point to the shadw page table entry |
333 | * shadow page table entry that points to page_address(page). | 346 | * that points to page_address(page). |
334 | * | 347 | * |
335 | * If page->private bit zero is one, (then page->private & ~1) points | 348 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc |
336 | * to a struct kvm_rmap_desc containing more mappings. | 349 | * containing more mappings. |
337 | */ | 350 | */ |
338 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) | 351 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
339 | { | 352 | { |
340 | struct page *page; | 353 | struct kvm_mmu_page *page; |
341 | struct kvm_rmap_desc *desc; | 354 | struct kvm_rmap_desc *desc; |
355 | unsigned long *rmapp; | ||
342 | int i; | 356 | int i; |
343 | 357 | ||
344 | if (!is_rmap_pte(*spte)) | 358 | if (!is_rmap_pte(*spte)) |
345 | return; | 359 | return; |
346 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | 360 | gfn = unalias_gfn(vcpu->kvm, gfn); |
347 | if (!page_private(page)) { | 361 | page = page_header(__pa(spte)); |
362 | page->gfns[spte - page->spt] = gfn; | ||
363 | rmapp = gfn_to_rmap(vcpu->kvm, gfn); | ||
364 | if (!*rmapp) { | ||
348 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 365 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
349 | set_page_private(page,(unsigned long)spte); | 366 | *rmapp = (unsigned long)spte; |
350 | } else if (!(page_private(page) & 1)) { | 367 | } else if (!(*rmapp & 1)) { |
351 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); | 368 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); |
352 | desc = mmu_alloc_rmap_desc(vcpu); | 369 | desc = mmu_alloc_rmap_desc(vcpu); |
353 | desc->shadow_ptes[0] = (u64 *)page_private(page); | 370 | desc->shadow_ptes[0] = (u64 *)*rmapp; |
354 | desc->shadow_ptes[1] = spte; | 371 | desc->shadow_ptes[1] = spte; |
355 | set_page_private(page,(unsigned long)desc | 1); | 372 | *rmapp = (unsigned long)desc | 1; |
356 | } else { | 373 | } else { |
357 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | 374 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); |
358 | desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); | 375 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
359 | while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) | 376 | while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) |
360 | desc = desc->more; | 377 | desc = desc->more; |
361 | if (desc->shadow_ptes[RMAP_EXT-1]) { | 378 | if (desc->shadow_ptes[RMAP_EXT-1]) { |
@@ -368,7 +385,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) | |||
368 | } | 385 | } |
369 | } | 386 | } |
370 | 387 | ||
371 | static void rmap_desc_remove_entry(struct page *page, | 388 | static void rmap_desc_remove_entry(unsigned long *rmapp, |
372 | struct kvm_rmap_desc *desc, | 389 | struct kvm_rmap_desc *desc, |
373 | int i, | 390 | int i, |
374 | struct kvm_rmap_desc *prev_desc) | 391 | struct kvm_rmap_desc *prev_desc) |
@@ -382,44 +399,46 @@ static void rmap_desc_remove_entry(struct page *page, | |||
382 | if (j != 0) | 399 | if (j != 0) |
383 | return; | 400 | return; |
384 | if (!prev_desc && !desc->more) | 401 | if (!prev_desc && !desc->more) |
385 | set_page_private(page,(unsigned long)desc->shadow_ptes[0]); | 402 | *rmapp = (unsigned long)desc->shadow_ptes[0]; |
386 | else | 403 | else |
387 | if (prev_desc) | 404 | if (prev_desc) |
388 | prev_desc->more = desc->more; | 405 | prev_desc->more = desc->more; |
389 | else | 406 | else |
390 | set_page_private(page,(unsigned long)desc->more | 1); | 407 | *rmapp = (unsigned long)desc->more | 1; |
391 | mmu_free_rmap_desc(desc); | 408 | mmu_free_rmap_desc(desc); |
392 | } | 409 | } |
393 | 410 | ||
394 | static void rmap_remove(u64 *spte) | 411 | static void rmap_remove(struct kvm *kvm, u64 *spte) |
395 | { | 412 | { |
396 | struct page *page; | ||
397 | struct kvm_rmap_desc *desc; | 413 | struct kvm_rmap_desc *desc; |
398 | struct kvm_rmap_desc *prev_desc; | 414 | struct kvm_rmap_desc *prev_desc; |
415 | struct kvm_mmu_page *page; | ||
416 | unsigned long *rmapp; | ||
399 | int i; | 417 | int i; |
400 | 418 | ||
401 | if (!is_rmap_pte(*spte)) | 419 | if (!is_rmap_pte(*spte)) |
402 | return; | 420 | return; |
403 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | 421 | page = page_header(__pa(spte)); |
404 | if (!page_private(page)) { | 422 | rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]); |
423 | if (!*rmapp) { | ||
405 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 424 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
406 | BUG(); | 425 | BUG(); |
407 | } else if (!(page_private(page) & 1)) { | 426 | } else if (!(*rmapp & 1)) { |
408 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); | 427 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); |
409 | if ((u64 *)page_private(page) != spte) { | 428 | if ((u64 *)*rmapp != spte) { |
410 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", | 429 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", |
411 | spte, *spte); | 430 | spte, *spte); |
412 | BUG(); | 431 | BUG(); |
413 | } | 432 | } |
414 | set_page_private(page,0); | 433 | *rmapp = 0; |
415 | } else { | 434 | } else { |
416 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); | 435 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); |
417 | desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); | 436 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
418 | prev_desc = NULL; | 437 | prev_desc = NULL; |
419 | while (desc) { | 438 | while (desc) { |
420 | for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) | 439 | for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) |
421 | if (desc->shadow_ptes[i] == spte) { | 440 | if (desc->shadow_ptes[i] == spte) { |
422 | rmap_desc_remove_entry(page, | 441 | rmap_desc_remove_entry(rmapp, |
423 | desc, i, | 442 | desc, i, |
424 | prev_desc); | 443 | prev_desc); |
425 | return; | 444 | return; |
@@ -433,28 +452,25 @@ static void rmap_remove(u64 *spte) | |||
433 | 452 | ||
434 | static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | 453 | static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) |
435 | { | 454 | { |
436 | struct kvm *kvm = vcpu->kvm; | ||
437 | struct page *page; | ||
438 | struct kvm_rmap_desc *desc; | 455 | struct kvm_rmap_desc *desc; |
456 | unsigned long *rmapp; | ||
439 | u64 *spte; | 457 | u64 *spte; |
440 | 458 | ||
441 | page = gfn_to_page(kvm, gfn); | 459 | gfn = unalias_gfn(vcpu->kvm, gfn); |
442 | BUG_ON(!page); | 460 | rmapp = gfn_to_rmap(vcpu->kvm, gfn); |
443 | 461 | ||
444 | while (page_private(page)) { | 462 | while (*rmapp) { |
445 | if (!(page_private(page) & 1)) | 463 | if (!(*rmapp & 1)) |
446 | spte = (u64 *)page_private(page); | 464 | spte = (u64 *)*rmapp; |
447 | else { | 465 | else { |
448 | desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); | 466 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
449 | spte = desc->shadow_ptes[0]; | 467 | spte = desc->shadow_ptes[0]; |
450 | } | 468 | } |
451 | BUG_ON(!spte); | 469 | BUG_ON(!spte); |
452 | BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT | ||
453 | != page_to_pfn(page)); | ||
454 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 470 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
455 | BUG_ON(!(*spte & PT_WRITABLE_MASK)); | 471 | BUG_ON(!(*spte & PT_WRITABLE_MASK)); |
456 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 472 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
457 | rmap_remove(spte); | 473 | rmap_remove(vcpu->kvm, spte); |
458 | set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); | 474 | set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); |
459 | kvm_flush_remote_tlbs(vcpu->kvm); | 475 | kvm_flush_remote_tlbs(vcpu->kvm); |
460 | } | 476 | } |
@@ -482,6 +498,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, | |||
482 | ASSERT(is_empty_shadow_page(page_head->spt)); | 498 | ASSERT(is_empty_shadow_page(page_head->spt)); |
483 | list_del(&page_head->link); | 499 | list_del(&page_head->link); |
484 | __free_page(virt_to_page(page_head->spt)); | 500 | __free_page(virt_to_page(page_head->spt)); |
501 | __free_page(virt_to_page(page_head->gfns)); | ||
485 | kfree(page_head); | 502 | kfree(page_head); |
486 | ++kvm->n_free_mmu_pages; | 503 | ++kvm->n_free_mmu_pages; |
487 | } | 504 | } |
@@ -502,6 +519,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
502 | page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, | 519 | page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, |
503 | sizeof *page); | 520 | sizeof *page); |
504 | page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); | 521 | page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); |
522 | page->gfns = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); | ||
505 | set_page_private(virt_to_page(page->spt), (unsigned long)page); | 523 | set_page_private(virt_to_page(page->spt), (unsigned long)page); |
506 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); | 524 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); |
507 | ASSERT(is_empty_shadow_page(page->spt)); | 525 | ASSERT(is_empty_shadow_page(page->spt)); |
@@ -667,7 +685,7 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
667 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { | 685 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { |
668 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 686 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
669 | if (is_shadow_present_pte(pt[i])) | 687 | if (is_shadow_present_pte(pt[i])) |
670 | rmap_remove(&pt[i]); | 688 | rmap_remove(kvm, &pt[i]); |
671 | pt[i] = shadow_trap_nonpresent_pte; | 689 | pt[i] = shadow_trap_nonpresent_pte; |
672 | } | 690 | } |
673 | kvm_flush_remote_tlbs(kvm); | 691 | kvm_flush_remote_tlbs(kvm); |
@@ -832,7 +850,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
832 | page_header_update_slot(vcpu->kvm, table, v); | 850 | page_header_update_slot(vcpu->kvm, table, v); |
833 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 851 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
834 | PT_USER_MASK; | 852 | PT_USER_MASK; |
835 | rmap_add(vcpu, &table[index]); | 853 | rmap_add(vcpu, &table[index], v >> PAGE_SHIFT); |
836 | return 0; | 854 | return 0; |
837 | } | 855 | } |
838 | 856 | ||
@@ -1123,7 +1141,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1123 | pte = *spte; | 1141 | pte = *spte; |
1124 | if (is_shadow_present_pte(pte)) { | 1142 | if (is_shadow_present_pte(pte)) { |
1125 | if (page->role.level == PT_PAGE_TABLE_LEVEL) | 1143 | if (page->role.level == PT_PAGE_TABLE_LEVEL) |
1126 | rmap_remove(spte); | 1144 | rmap_remove(vcpu->kvm, spte); |
1127 | else { | 1145 | else { |
1128 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1146 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
1129 | mmu_page_remove_parent_pte(child, spte); | 1147 | mmu_page_remove_parent_pte(child, spte); |
@@ -1340,7 +1358,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
1340 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 1358 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
1341 | /* avoid RMW */ | 1359 | /* avoid RMW */ |
1342 | if (pt[i] & PT_WRITABLE_MASK) { | 1360 | if (pt[i] & PT_WRITABLE_MASK) { |
1343 | rmap_remove(&pt[i]); | 1361 | rmap_remove(kvm, &pt[i]); |
1344 | pt[i] &= ~PT_WRITABLE_MASK; | 1362 | pt[i] &= ~PT_WRITABLE_MASK; |
1345 | } | 1363 | } |
1346 | } | 1364 | } |
@@ -1470,15 +1488,15 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
1470 | struct kvm_rmap_desc *d; | 1488 | struct kvm_rmap_desc *d; |
1471 | 1489 | ||
1472 | for (j = 0; j < m->npages; ++j) { | 1490 | for (j = 0; j < m->npages; ++j) { |
1473 | struct page *page = m->phys_mem[j]; | 1491 | unsigned long *rmapp = &m->rmap[j]; |
1474 | 1492 | ||
1475 | if (!page->private) | 1493 | if (!*rmapp) |
1476 | continue; | 1494 | continue; |
1477 | if (!(page->private & 1)) { | 1495 | if (!(*rmapp & 1)) { |
1478 | ++nmaps; | 1496 | ++nmaps; |
1479 | continue; | 1497 | continue; |
1480 | } | 1498 | } |
1481 | d = (struct kvm_rmap_desc *)(page->private & ~1ul); | 1499 | d = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
1482 | while (d) { | 1500 | while (d) { |
1483 | for (k = 0; k < RMAP_EXT; ++k) | 1501 | for (k = 0; k < RMAP_EXT; ++k) |
1484 | if (d->shadow_ptes[k]) | 1502 | if (d->shadow_ptes[k]) |
@@ -1530,18 +1548,18 @@ static void audit_rmap(struct kvm_vcpu *vcpu) | |||
1530 | static void audit_write_protection(struct kvm_vcpu *vcpu) | 1548 | static void audit_write_protection(struct kvm_vcpu *vcpu) |
1531 | { | 1549 | { |
1532 | struct kvm_mmu_page *page; | 1550 | struct kvm_mmu_page *page; |
1551 | struct kvm_memory_slot *slot; | ||
1552 | unsigned long *rmapp; | ||
1553 | gfn_t gfn; | ||
1533 | 1554 | ||
1534 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { | 1555 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { |
1535 | hfn_t hfn; | ||
1536 | struct page *pg; | ||
1537 | |||
1538 | if (page->role.metaphysical) | 1556 | if (page->role.metaphysical) |
1539 | continue; | 1557 | continue; |
1540 | 1558 | ||
1541 | hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT) | 1559 | slot = gfn_to_memslot(vcpu->kvm, page->gfn); |
1542 | >> PAGE_SHIFT; | 1560 | gfn = unalias_gfn(vcpu->kvm, page->gfn); |
1543 | pg = pfn_to_page(hfn); | 1561 | rmapp = &slot->rmap[gfn - slot->base_gfn]; |
1544 | if (pg->private) | 1562 | if (*rmapp) |
1545 | printk(KERN_ERR "%s: (%s) shadow page has writable" | 1563 | printk(KERN_ERR "%s: (%s) shadow page has writable" |
1546 | " mappings: gfn %lx role %x\n", | 1564 | " mappings: gfn %lx role %x\n", |
1547 | __FUNCTION__, audit_msg, page->gfn, | 1565 | __FUNCTION__, audit_msg, page->gfn, |