aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-12-14 21:02:02 -0500
committerAvi Kivity <avi@redhat.com>2012-03-05 07:52:39 -0500
commitbad3b5075eeb18cb1641b4171618add638bc0fa7 (patch)
tree11c599691f17ef931ef5dbb8d8e788c500752554
parenta92bce95f0f967dfa6205527d7143d276b0be6a7 (diff)
KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits
This allows both the guest and the host to use the referenced (R) and changed (C) bits in the guest hashed page table. The guest has a view of R and C that is maintained in the guest_rpte field of the revmap entry for the HPTE, and the host has a view that is maintained in the rmap entry for the associated gfn. Both view are updated from the guest HPT. If a bit (R or C) is zero in either view, it will be initially set to zero in the HPTE (or HPTEs), until set to 1 by hardware. When an HPTE is removed for any reason, the R and C bits from the HPTE are ORed into both views. We have to be careful to read the R and C bits from the HPTE after invalidating it, but before unlocking it, in case of any late updates by the hardware. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c48
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c45
3 files changed, 59 insertions, 39 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 968f3aa61cd1..1cb6e522485b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -200,8 +200,9 @@ struct revmap_entry {
200 * index in the guest HPT of a HPTE that points to the page. 200 * index in the guest HPT of a HPTE that points to the page.
201 */ 201 */
202#define KVMPPC_RMAP_LOCK_BIT 63 202#define KVMPPC_RMAP_LOCK_BIT 63
203#define KVMPPC_RMAP_REF_BIT 33 203#define KVMPPC_RMAP_RC_SHIFT 32
204#define KVMPPC_RMAP_REFERENCED (1ul << KVMPPC_RMAP_REF_BIT) 204#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
205#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
205#define KVMPPC_RMAP_PRESENT 0x100000000ul 206#define KVMPPC_RMAP_PRESENT 0x100000000ul
206#define KVMPPC_RMAP_INDEX 0xfffffffful 207#define KVMPPC_RMAP_INDEX 0xfffffffful
207 208
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 66d6452c1081..aa51ddef468e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -505,6 +505,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
505 unsigned long is_io; 505 unsigned long is_io;
506 unsigned int writing, write_ok; 506 unsigned int writing, write_ok;
507 struct vm_area_struct *vma; 507 struct vm_area_struct *vma;
508 unsigned long rcbits;
508 509
509 /* 510 /*
510 * Real-mode code has already searched the HPT and found the 511 * Real-mode code has already searched the HPT and found the
@@ -640,11 +641,17 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
640 goto out_unlock; 641 goto out_unlock;
641 } 642 }
642 643
644 /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
645 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
646 r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
647
643 if (hptep[0] & HPTE_V_VALID) { 648 if (hptep[0] & HPTE_V_VALID) {
644 /* HPTE was previously valid, so we need to invalidate it */ 649 /* HPTE was previously valid, so we need to invalidate it */
645 unlock_rmap(rmap); 650 unlock_rmap(rmap);
646 hptep[0] |= HPTE_V_ABSENT; 651 hptep[0] |= HPTE_V_ABSENT;
647 kvmppc_invalidate_hpte(kvm, hptep, index); 652 kvmppc_invalidate_hpte(kvm, hptep, index);
653 /* don't lose previous R and C bits */
654 r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
648 } else { 655 } else {
649 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); 656 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
650 } 657 }
@@ -701,50 +708,55 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
701 struct revmap_entry *rev = kvm->arch.revmap; 708 struct revmap_entry *rev = kvm->arch.revmap;
702 unsigned long h, i, j; 709 unsigned long h, i, j;
703 unsigned long *hptep; 710 unsigned long *hptep;
704 unsigned long ptel, psize; 711 unsigned long ptel, psize, rcbits;
705 712
706 for (;;) { 713 for (;;) {
707 while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) 714 lock_rmap(rmapp);
708 cpu_relax();
709 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 715 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
710 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); 716 unlock_rmap(rmapp);
711 break; 717 break;
712 } 718 }
713 719
714 /* 720 /*
715 * To avoid an ABBA deadlock with the HPTE lock bit, 721 * To avoid an ABBA deadlock with the HPTE lock bit,
716 * we have to unlock the rmap chain before locking the HPTE. 722 * we can't spin on the HPTE lock while holding the
717 * Thus we remove the first entry, unlock the rmap chain, 723 * rmap chain lock.
718 * lock the HPTE and then check that it is for the
719 * page we're unmapping before changing it to non-present.
720 */ 724 */
721 i = *rmapp & KVMPPC_RMAP_INDEX; 725 i = *rmapp & KVMPPC_RMAP_INDEX;
726 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
727 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
728 /* unlock rmap before spinning on the HPTE lock */
729 unlock_rmap(rmapp);
730 while (hptep[0] & HPTE_V_HVLOCK)
731 cpu_relax();
732 continue;
733 }
722 j = rev[i].forw; 734 j = rev[i].forw;
723 if (j == i) { 735 if (j == i) {
724 /* chain is now empty */ 736 /* chain is now empty */
725 j = 0; 737 *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
726 } else { 738 } else {
727 /* remove i from chain */ 739 /* remove i from chain */
728 h = rev[i].back; 740 h = rev[i].back;
729 rev[h].forw = j; 741 rev[h].forw = j;
730 rev[j].back = h; 742 rev[j].back = h;
731 rev[i].forw = rev[i].back = i; 743 rev[i].forw = rev[i].back = i;
732 j |= KVMPPC_RMAP_PRESENT; 744 *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
733 } 745 }
734 smp_wmb();
735 *rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT);
736 746
737 /* Now lock, check and modify the HPTE */ 747 /* Now check and modify the HPTE */
738 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
739 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
740 cpu_relax();
741 ptel = rev[i].guest_rpte; 748 ptel = rev[i].guest_rpte;
742 psize = hpte_page_size(hptep[0], ptel); 749 psize = hpte_page_size(hptep[0], ptel);
743 if ((hptep[0] & HPTE_V_VALID) && 750 if ((hptep[0] & HPTE_V_VALID) &&
744 hpte_rpn(ptel, psize) == gfn) { 751 hpte_rpn(ptel, psize) == gfn) {
745 kvmppc_invalidate_hpte(kvm, hptep, i);
746 hptep[0] |= HPTE_V_ABSENT; 752 hptep[0] |= HPTE_V_ABSENT;
753 kvmppc_invalidate_hpte(kvm, hptep, i);
754 /* Harvest R and C */
755 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
756 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
757 rev[i].guest_rpte = ptel | rcbits;
747 } 758 }
759 unlock_rmap(rmapp);
748 hptep[0] &= ~HPTE_V_HVLOCK; 760 hptep[0] &= ~HPTE_V_HVLOCK;
749 } 761 }
750 return 0; 762 return 0;
@@ -767,7 +779,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
767 kvm_unmap_rmapp(kvm, rmapp, gfn); 779 kvm_unmap_rmapp(kvm, rmapp, gfn);
768 while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) 780 while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
769 cpu_relax(); 781 cpu_relax();
770 __clear_bit(KVMPPC_RMAP_REF_BIT, rmapp); 782 *rmapp &= ~KVMPPC_RMAP_REFERENCED;
771 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); 783 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
772 return 1; 784 return 1;
773} 785}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index ba4a1376b331..91b45a03f438 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -87,15 +87,17 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
87 87
88/* Remove this HPTE from the chain for a real page */ 88/* Remove this HPTE from the chain for a real page */
89static void remove_revmap_chain(struct kvm *kvm, long pte_index, 89static void remove_revmap_chain(struct kvm *kvm, long pte_index,
90 unsigned long hpte_v) 90 struct revmap_entry *rev,
91 unsigned long hpte_v, unsigned long hpte_r)
91{ 92{
92 struct revmap_entry *rev, *next, *prev; 93 struct revmap_entry *next, *prev;
93 unsigned long gfn, ptel, head; 94 unsigned long gfn, ptel, head;
94 struct kvm_memory_slot *memslot; 95 struct kvm_memory_slot *memslot;
95 unsigned long *rmap; 96 unsigned long *rmap;
97 unsigned long rcbits;
96 98
97 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 99 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
98 ptel = rev->guest_rpte; 100 ptel = rev->guest_rpte |= rcbits;
99 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); 101 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
100 memslot = builtin_gfn_to_memslot(kvm, gfn); 102 memslot = builtin_gfn_to_memslot(kvm, gfn);
101 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 103 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
@@ -116,6 +118,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
116 else 118 else
117 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 119 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
118 } 120 }
121 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
119 unlock_rmap(rmap); 122 unlock_rmap(rmap);
120} 123}
121 124
@@ -162,6 +165,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
162 pte_t pte; 165 pte_t pte;
163 unsigned int writing; 166 unsigned int writing;
164 unsigned long mmu_seq; 167 unsigned long mmu_seq;
168 unsigned long rcbits;
165 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; 169 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
166 170
167 psize = hpte_page_size(pteh, ptel); 171 psize = hpte_page_size(pteh, ptel);
@@ -320,6 +324,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
320 } else { 324 } else {
321 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 325 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
322 realmode); 326 realmode);
327 /* Only set R/C in real HPTE if already set in *rmap */
328 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
329 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
323 } 330 }
324 } 331 }
325 332
@@ -394,7 +401,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
394 asm volatile("tlbiel %0" : : "r" (rb)); 401 asm volatile("tlbiel %0" : : "r" (rb));
395 asm volatile("ptesync" : : : "memory"); 402 asm volatile("ptesync" : : : "memory");
396 } 403 }
397 remove_revmap_chain(kvm, pte_index, v); 404 /* Read PTE low word after tlbie to get final R/C values */
405 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
398 } 406 }
399 r = rev->guest_rpte; 407 r = rev->guest_rpte;
400 unlock_hpte(hpte, 0); 408 unlock_hpte(hpte, 0);
@@ -469,12 +477,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
469 477
470 args[j] = ((0x80 | flags) << 56) + pte_index; 478 args[j] = ((0x80 | flags) << 56) + pte_index;
471 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 479 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
472 /* insert R and C bits from guest PTE */
473 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
474 args[j] |= rcbits << (56 - 5);
475 480
476 if (!(hp[0] & HPTE_V_VALID)) 481 if (!(hp[0] & HPTE_V_VALID)) {
482 /* insert R and C bits from PTE */
483 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
484 args[j] |= rcbits << (56 - 5);
477 continue; 485 continue;
486 }
478 487
479 hp[0] &= ~HPTE_V_VALID; /* leave it locked */ 488 hp[0] &= ~HPTE_V_VALID; /* leave it locked */
480 tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); 489 tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
@@ -505,13 +514,16 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
505 asm volatile("ptesync" : : : "memory"); 514 asm volatile("ptesync" : : : "memory");
506 } 515 }
507 516
517 /* Read PTE low words after tlbie to get final R/C values */
508 for (k = 0; k < n; ++k) { 518 for (k = 0; k < n; ++k) {
509 j = indexes[k]; 519 j = indexes[k];
510 pte_index = args[j] & ((1ul << 56) - 1); 520 pte_index = args[j] & ((1ul << 56) - 1);
511 hp = hptes[k]; 521 hp = hptes[k];
512 rev = revs[k]; 522 rev = revs[k];
513 remove_revmap_chain(kvm, pte_index, hp[0]); 523 remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
514 unlock_hpte(hp, 0); 524 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
525 args[j] |= rcbits << (56 - 5);
526 hp[0] = 0;
515 } 527 }
516 } 528 }
517 529
@@ -595,8 +607,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
595 pte_index &= ~3; 607 pte_index &= ~3;
596 n = 4; 608 n = 4;
597 } 609 }
598 if (flags & H_R_XLATE) 610 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
599 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
600 for (i = 0; i < n; ++i, ++pte_index) { 611 for (i = 0; i < n; ++i, ++pte_index) {
601 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 612 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
602 v = hpte[0] & ~HPTE_V_HVLOCK; 613 v = hpte[0] & ~HPTE_V_HVLOCK;
@@ -605,12 +616,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
605 v &= ~HPTE_V_ABSENT; 616 v &= ~HPTE_V_ABSENT;
606 v |= HPTE_V_VALID; 617 v |= HPTE_V_VALID;
607 } 618 }
608 if (v & HPTE_V_VALID) { 619 if (v & HPTE_V_VALID)
609 if (rev) 620 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
610 r = rev[i].guest_rpte;
611 else
612 r = hpte[1] | HPTE_R_RPN;
613 }
614 vcpu->arch.gpr[4 + i * 2] = v; 621 vcpu->arch.gpr[4 + i * 2] = v;
615 vcpu->arch.gpr[5 + i * 2] = r; 622 vcpu->arch.gpr[5 + i * 2] = r;
616 } 623 }