diff options
author | Paul Mackerras <paulus@samba.org> | 2011-12-14 21:02:02 -0500 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2012-03-05 07:52:39 -0500 |
commit | bad3b5075eeb18cb1641b4171618add638bc0fa7 (patch) | |
tree | 11c599691f17ef931ef5dbb8d8e788c500752554 | |
parent | a92bce95f0f967dfa6205527d7143d276b0be6a7 (diff) |
KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits
This allows both the guest and the host to use the referenced (R) and
changed (C) bits in the guest hashed page table. The guest has a view
of R and C that is maintained in the guest_rpte field of the revmap
entry for the HPTE, and the host has a view that is maintained in the
rmap entry for the associated gfn.
Both view are updated from the guest HPT. If a bit (R or C) is zero
in either view, it will be initially set to zero in the HPTE (or HPTEs),
until set to 1 by hardware. When an HPTE is removed for any reason,
the R and C bits from the HPTE are ORed into both views. We have to
be careful to read the R and C bits from the HPTE after invalidating
it, but before unlocking it, in case of any late updates by the hardware.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 5 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 48 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 45 |
3 files changed, 59 insertions, 39 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 968f3aa61cd1..1cb6e522485b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -200,8 +200,9 @@ struct revmap_entry { | |||
200 | * index in the guest HPT of a HPTE that points to the page. | 200 | * index in the guest HPT of a HPTE that points to the page. |
201 | */ | 201 | */ |
202 | #define KVMPPC_RMAP_LOCK_BIT 63 | 202 | #define KVMPPC_RMAP_LOCK_BIT 63 |
203 | #define KVMPPC_RMAP_REF_BIT 33 | 203 | #define KVMPPC_RMAP_RC_SHIFT 32 |
204 | #define KVMPPC_RMAP_REFERENCED (1ul << KVMPPC_RMAP_REF_BIT) | 204 | #define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) |
205 | #define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT) | ||
205 | #define KVMPPC_RMAP_PRESENT 0x100000000ul | 206 | #define KVMPPC_RMAP_PRESENT 0x100000000ul |
206 | #define KVMPPC_RMAP_INDEX 0xfffffffful | 207 | #define KVMPPC_RMAP_INDEX 0xfffffffful |
207 | 208 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 66d6452c1081..aa51ddef468e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -505,6 +505,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
505 | unsigned long is_io; | 505 | unsigned long is_io; |
506 | unsigned int writing, write_ok; | 506 | unsigned int writing, write_ok; |
507 | struct vm_area_struct *vma; | 507 | struct vm_area_struct *vma; |
508 | unsigned long rcbits; | ||
508 | 509 | ||
509 | /* | 510 | /* |
510 | * Real-mode code has already searched the HPT and found the | 511 | * Real-mode code has already searched the HPT and found the |
@@ -640,11 +641,17 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
640 | goto out_unlock; | 641 | goto out_unlock; |
641 | } | 642 | } |
642 | 643 | ||
644 | /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */ | ||
645 | rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; | ||
646 | r &= rcbits | ~(HPTE_R_R | HPTE_R_C); | ||
647 | |||
643 | if (hptep[0] & HPTE_V_VALID) { | 648 | if (hptep[0] & HPTE_V_VALID) { |
644 | /* HPTE was previously valid, so we need to invalidate it */ | 649 | /* HPTE was previously valid, so we need to invalidate it */ |
645 | unlock_rmap(rmap); | 650 | unlock_rmap(rmap); |
646 | hptep[0] |= HPTE_V_ABSENT; | 651 | hptep[0] |= HPTE_V_ABSENT; |
647 | kvmppc_invalidate_hpte(kvm, hptep, index); | 652 | kvmppc_invalidate_hpte(kvm, hptep, index); |
653 | /* don't lose previous R and C bits */ | ||
654 | r |= hptep[1] & (HPTE_R_R | HPTE_R_C); | ||
648 | } else { | 655 | } else { |
649 | kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); | 656 | kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); |
650 | } | 657 | } |
@@ -701,50 +708,55 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
701 | struct revmap_entry *rev = kvm->arch.revmap; | 708 | struct revmap_entry *rev = kvm->arch.revmap; |
702 | unsigned long h, i, j; | 709 | unsigned long h, i, j; |
703 | unsigned long *hptep; | 710 | unsigned long *hptep; |
704 | unsigned long ptel, psize; | 711 | unsigned long ptel, psize, rcbits; |
705 | 712 | ||
706 | for (;;) { | 713 | for (;;) { |
707 | while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) | 714 | lock_rmap(rmapp); |
708 | cpu_relax(); | ||
709 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { | 715 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { |
710 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); | 716 | unlock_rmap(rmapp); |
711 | break; | 717 | break; |
712 | } | 718 | } |
713 | 719 | ||
714 | /* | 720 | /* |
715 | * To avoid an ABBA deadlock with the HPTE lock bit, | 721 | * To avoid an ABBA deadlock with the HPTE lock bit, |
716 | * we have to unlock the rmap chain before locking the HPTE. | 722 | * we can't spin on the HPTE lock while holding the |
717 | * Thus we remove the first entry, unlock the rmap chain, | 723 | * rmap chain lock. |
718 | * lock the HPTE and then check that it is for the | ||
719 | * page we're unmapping before changing it to non-present. | ||
720 | */ | 724 | */ |
721 | i = *rmapp & KVMPPC_RMAP_INDEX; | 725 | i = *rmapp & KVMPPC_RMAP_INDEX; |
726 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | ||
727 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { | ||
728 | /* unlock rmap before spinning on the HPTE lock */ | ||
729 | unlock_rmap(rmapp); | ||
730 | while (hptep[0] & HPTE_V_HVLOCK) | ||
731 | cpu_relax(); | ||
732 | continue; | ||
733 | } | ||
722 | j = rev[i].forw; | 734 | j = rev[i].forw; |
723 | if (j == i) { | 735 | if (j == i) { |
724 | /* chain is now empty */ | 736 | /* chain is now empty */ |
725 | j = 0; | 737 | *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); |
726 | } else { | 738 | } else { |
727 | /* remove i from chain */ | 739 | /* remove i from chain */ |
728 | h = rev[i].back; | 740 | h = rev[i].back; |
729 | rev[h].forw = j; | 741 | rev[h].forw = j; |
730 | rev[j].back = h; | 742 | rev[j].back = h; |
731 | rev[i].forw = rev[i].back = i; | 743 | rev[i].forw = rev[i].back = i; |
732 | j |= KVMPPC_RMAP_PRESENT; | 744 | *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; |
733 | } | 745 | } |
734 | smp_wmb(); | ||
735 | *rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT); | ||
736 | 746 | ||
737 | /* Now lock, check and modify the HPTE */ | 747 | /* Now check and modify the HPTE */ |
738 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | ||
739 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
740 | cpu_relax(); | ||
741 | ptel = rev[i].guest_rpte; | 748 | ptel = rev[i].guest_rpte; |
742 | psize = hpte_page_size(hptep[0], ptel); | 749 | psize = hpte_page_size(hptep[0], ptel); |
743 | if ((hptep[0] & HPTE_V_VALID) && | 750 | if ((hptep[0] & HPTE_V_VALID) && |
744 | hpte_rpn(ptel, psize) == gfn) { | 751 | hpte_rpn(ptel, psize) == gfn) { |
745 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
746 | hptep[0] |= HPTE_V_ABSENT; | 752 | hptep[0] |= HPTE_V_ABSENT; |
753 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
754 | /* Harvest R and C */ | ||
755 | rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); | ||
756 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; | ||
757 | rev[i].guest_rpte = ptel | rcbits; | ||
747 | } | 758 | } |
759 | unlock_rmap(rmapp); | ||
748 | hptep[0] &= ~HPTE_V_HVLOCK; | 760 | hptep[0] &= ~HPTE_V_HVLOCK; |
749 | } | 761 | } |
750 | return 0; | 762 | return 0; |
@@ -767,7 +779,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
767 | kvm_unmap_rmapp(kvm, rmapp, gfn); | 779 | kvm_unmap_rmapp(kvm, rmapp, gfn); |
768 | while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) | 780 | while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) |
769 | cpu_relax(); | 781 | cpu_relax(); |
770 | __clear_bit(KVMPPC_RMAP_REF_BIT, rmapp); | 782 | *rmapp &= ~KVMPPC_RMAP_REFERENCED; |
771 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); | 783 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); |
772 | return 1; | 784 | return 1; |
773 | } | 785 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index ba4a1376b331..91b45a03f438 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -87,15 +87,17 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); | |||
87 | 87 | ||
88 | /* Remove this HPTE from the chain for a real page */ | 88 | /* Remove this HPTE from the chain for a real page */ |
89 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, | 89 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, |
90 | unsigned long hpte_v) | 90 | struct revmap_entry *rev, |
91 | unsigned long hpte_v, unsigned long hpte_r) | ||
91 | { | 92 | { |
92 | struct revmap_entry *rev, *next, *prev; | 93 | struct revmap_entry *next, *prev; |
93 | unsigned long gfn, ptel, head; | 94 | unsigned long gfn, ptel, head; |
94 | struct kvm_memory_slot *memslot; | 95 | struct kvm_memory_slot *memslot; |
95 | unsigned long *rmap; | 96 | unsigned long *rmap; |
97 | unsigned long rcbits; | ||
96 | 98 | ||
97 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 99 | rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); |
98 | ptel = rev->guest_rpte; | 100 | ptel = rev->guest_rpte |= rcbits; |
99 | gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); | 101 | gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); |
100 | memslot = builtin_gfn_to_memslot(kvm, gfn); | 102 | memslot = builtin_gfn_to_memslot(kvm, gfn); |
101 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | 103 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) |
@@ -116,6 +118,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, | |||
116 | else | 118 | else |
117 | *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; | 119 | *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; |
118 | } | 120 | } |
121 | *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; | ||
119 | unlock_rmap(rmap); | 122 | unlock_rmap(rmap); |
120 | } | 123 | } |
121 | 124 | ||
@@ -162,6 +165,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
162 | pte_t pte; | 165 | pte_t pte; |
163 | unsigned int writing; | 166 | unsigned int writing; |
164 | unsigned long mmu_seq; | 167 | unsigned long mmu_seq; |
168 | unsigned long rcbits; | ||
165 | bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; | 169 | bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; |
166 | 170 | ||
167 | psize = hpte_page_size(pteh, ptel); | 171 | psize = hpte_page_size(pteh, ptel); |
@@ -320,6 +324,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
320 | } else { | 324 | } else { |
321 | kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, | 325 | kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, |
322 | realmode); | 326 | realmode); |
327 | /* Only set R/C in real HPTE if already set in *rmap */ | ||
328 | rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; | ||
329 | ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); | ||
323 | } | 330 | } |
324 | } | 331 | } |
325 | 332 | ||
@@ -394,7 +401,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, | |||
394 | asm volatile("tlbiel %0" : : "r" (rb)); | 401 | asm volatile("tlbiel %0" : : "r" (rb)); |
395 | asm volatile("ptesync" : : : "memory"); | 402 | asm volatile("ptesync" : : : "memory"); |
396 | } | 403 | } |
397 | remove_revmap_chain(kvm, pte_index, v); | 404 | /* Read PTE low word after tlbie to get final R/C values */ |
405 | remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); | ||
398 | } | 406 | } |
399 | r = rev->guest_rpte; | 407 | r = rev->guest_rpte; |
400 | unlock_hpte(hpte, 0); | 408 | unlock_hpte(hpte, 0); |
@@ -469,12 +477,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
469 | 477 | ||
470 | args[j] = ((0x80 | flags) << 56) + pte_index; | 478 | args[j] = ((0x80 | flags) << 56) + pte_index; |
471 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 479 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); |
472 | /* insert R and C bits from guest PTE */ | ||
473 | rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); | ||
474 | args[j] |= rcbits << (56 - 5); | ||
475 | 480 | ||
476 | if (!(hp[0] & HPTE_V_VALID)) | 481 | if (!(hp[0] & HPTE_V_VALID)) { |
482 | /* insert R and C bits from PTE */ | ||
483 | rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); | ||
484 | args[j] |= rcbits << (56 - 5); | ||
477 | continue; | 485 | continue; |
486 | } | ||
478 | 487 | ||
479 | hp[0] &= ~HPTE_V_VALID; /* leave it locked */ | 488 | hp[0] &= ~HPTE_V_VALID; /* leave it locked */ |
480 | tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); | 489 | tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); |
@@ -505,13 +514,16 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
505 | asm volatile("ptesync" : : : "memory"); | 514 | asm volatile("ptesync" : : : "memory"); |
506 | } | 515 | } |
507 | 516 | ||
517 | /* Read PTE low words after tlbie to get final R/C values */ | ||
508 | for (k = 0; k < n; ++k) { | 518 | for (k = 0; k < n; ++k) { |
509 | j = indexes[k]; | 519 | j = indexes[k]; |
510 | pte_index = args[j] & ((1ul << 56) - 1); | 520 | pte_index = args[j] & ((1ul << 56) - 1); |
511 | hp = hptes[k]; | 521 | hp = hptes[k]; |
512 | rev = revs[k]; | 522 | rev = revs[k]; |
513 | remove_revmap_chain(kvm, pte_index, hp[0]); | 523 | remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); |
514 | unlock_hpte(hp, 0); | 524 | rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); |
525 | args[j] |= rcbits << (56 - 5); | ||
526 | hp[0] = 0; | ||
515 | } | 527 | } |
516 | } | 528 | } |
517 | 529 | ||
@@ -595,8 +607,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, | |||
595 | pte_index &= ~3; | 607 | pte_index &= ~3; |
596 | n = 4; | 608 | n = 4; |
597 | } | 609 | } |
598 | if (flags & H_R_XLATE) | 610 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); |
599 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | ||
600 | for (i = 0; i < n; ++i, ++pte_index) { | 611 | for (i = 0; i < n; ++i, ++pte_index) { |
601 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); | 612 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); |
602 | v = hpte[0] & ~HPTE_V_HVLOCK; | 613 | v = hpte[0] & ~HPTE_V_HVLOCK; |
@@ -605,12 +616,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, | |||
605 | v &= ~HPTE_V_ABSENT; | 616 | v &= ~HPTE_V_ABSENT; |
606 | v |= HPTE_V_VALID; | 617 | v |= HPTE_V_VALID; |
607 | } | 618 | } |
608 | if (v & HPTE_V_VALID) { | 619 | if (v & HPTE_V_VALID) |
609 | if (rev) | 620 | r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); |
610 | r = rev[i].guest_rpte; | ||
611 | else | ||
612 | r = hpte[1] | HPTE_R_RPN; | ||
613 | } | ||
614 | vcpu->arch.gpr[4 + i * 2] = v; | 621 | vcpu->arch.gpr[4 + i * 2] = v; |
615 | vcpu->arch.gpr[5 + i * 2] = r; | 622 | vcpu->arch.gpr[5 + i * 2] = r; |
616 | } | 623 | } |