aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMihai Caraman <mihai.caraman@freescale.com>2014-07-23 12:06:22 -0400
committerAlexander Graf <agraf@suse.de>2014-07-28 09:23:14 -0400
commitf5250471b2d6ad27d536cb34ce39d76b91b2b36b (patch)
treefea4367c59322123c48ba1b04c414ed0fb8f5987
parent51f047261e717b74b226f837a16455994b61ae30 (diff)
KVM: PPC: Bookehv: Get vcpu's last instruction for emulation
On book3e, KVM uses load external pid (lwepx) dedicated instruction to read guest last instruction on the exit path. lwepx exceptions (DTLB_MISS, DSI and LRAT), generated by loading a guest address, needs to be handled by KVM. These exceptions are generated in a substituted guest translation context (EPLC[EGS] = 1) from host context (MSR[GS] = 0). Currently, KVM hooks only interrupts generated from guest context (MSR[GS] = 1), doing minimal checks on the fast path to avoid host performance degradation. lwepx exceptions originate from host state (MSR[GS] = 0) which implies additional checks in DO_KVM macro (beside the current MSR[GS] = 1) by looking at the Exception Syndrome Register (ESR[EPID]) and the External PID Load Context Register (EPLC[EGS]). Doing this on each Data TLB miss exception is obvious too intrusive for the host. Read guest last instruction from kvmppc_load_last_inst() by searching for the physical address and kmap it. This address the TODO for TLB eviction and execute-but-not-read entries, and allow us to get rid of lwepx until we are able to handle failures. A simple stress benchmark shows a 1% sys performance degradation compared with previous approach (lwepx without failure handling): time for i in `seq 1 10000`; do /bin/echo > /dev/null; done real 0m 8.85s user 0m 4.34s sys 0m 4.48s vs real 0m 8.84s user 0m 4.36s sys 0m 4.44s A solution to use lwepx and to handle its exceptions in KVM would be to temporary highjack the interrupt vector from host. This imposes additional synchronizations for cores like FSL e6500 that shares host IVOR registers between hardware threads. This optimized solution can be later developed on top of this patch. Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--arch/powerpc/kvm/booke.c44
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S37
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c92
3 files changed, 145 insertions, 28 deletions
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 50df5e3072cc..97bcde2dc075 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -819,6 +819,28 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
819 } 819 }
820} 820}
821 821
822static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
823 enum emulation_result emulated, u32 last_inst)
824{
825 switch (emulated) {
826 case EMULATE_AGAIN:
827 return RESUME_GUEST;
828
829 case EMULATE_FAIL:
830 pr_debug("%s: load instruction from guest address %lx failed\n",
831 __func__, vcpu->arch.pc);
832 /* For debugging, encode the failing instruction and
833 * report it to userspace. */
834 run->hw.hardware_exit_reason = ~0ULL << 32;
835 run->hw.hardware_exit_reason |= last_inst;
836 kvmppc_core_queue_program(vcpu, ESR_PIL);
837 return RESUME_HOST;
838
839 default:
840 BUG();
841 }
842}
843
822/** 844/**
823 * kvmppc_handle_exit 845 * kvmppc_handle_exit
824 * 846 *
@@ -830,6 +852,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
830 int r = RESUME_HOST; 852 int r = RESUME_HOST;
831 int s; 853 int s;
832 int idx; 854 int idx;
855 u32 last_inst = KVM_INST_FETCH_FAILED;
856 enum emulation_result emulated = EMULATE_DONE;
833 857
834 /* update before a new last_exit_type is rewritten */ 858 /* update before a new last_exit_type is rewritten */
835 kvmppc_update_timing_stats(vcpu); 859 kvmppc_update_timing_stats(vcpu);
@@ -837,6 +861,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
837 /* restart interrupts if they were meant for the host */ 861 /* restart interrupts if they were meant for the host */
838 kvmppc_restart_interrupt(vcpu, exit_nr); 862 kvmppc_restart_interrupt(vcpu, exit_nr);
839 863
864 /*
865 * get last instruction before beeing preempted
866 * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA
867 */
868 switch (exit_nr) {
869 case BOOKE_INTERRUPT_DATA_STORAGE:
870 case BOOKE_INTERRUPT_DTLB_MISS:
871 case BOOKE_INTERRUPT_HV_PRIV:
872 emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
873 break;
874 default:
875 break;
876 }
877
840 local_irq_enable(); 878 local_irq_enable();
841 879
842 trace_kvm_exit(exit_nr, vcpu); 880 trace_kvm_exit(exit_nr, vcpu);
@@ -845,6 +883,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
845 run->exit_reason = KVM_EXIT_UNKNOWN; 883 run->exit_reason = KVM_EXIT_UNKNOWN;
846 run->ready_for_interrupt_injection = 1; 884 run->ready_for_interrupt_injection = 1;
847 885
886 if (emulated != EMULATE_DONE) {
887 r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst);
888 goto out;
889 }
890
848 switch (exit_nr) { 891 switch (exit_nr) {
849 case BOOKE_INTERRUPT_MACHINE_CHECK: 892 case BOOKE_INTERRUPT_MACHINE_CHECK:
850 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); 893 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
@@ -1134,6 +1177,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1134 BUG(); 1177 BUG();
1135 } 1178 }
1136 1179
1180out:
1137 /* 1181 /*
1138 * To avoid clobbering exit_reason, only check for signals if we 1182 * To avoid clobbering exit_reason, only check for signals if we
1139 * aren't already exiting to userspace for some other reason. 1183 * aren't already exiting to userspace for some other reason.
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 6ff448046301..e000b397ece3 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -121,38 +121,14 @@
1211: 1211:
122 122
123 .if \flags & NEED_EMU 123 .if \flags & NEED_EMU
124 /*
125 * This assumes you have external PID support.
126 * To support a bookehv CPU without external PID, you'll
127 * need to look up the TLB entry and create a temporary mapping.
128 *
129 * FIXME: we don't currently handle if the lwepx faults. PR-mode
130 * booke doesn't handle it either. Since Linux doesn't use
131 * broadcast tlbivax anymore, the only way this should happen is
132 * if the guest maps its memory execute-but-not-read, or if we
133 * somehow take a TLB miss in the middle of this entry code and
134 * evict the relevant entry. On e500mc, all kernel lowmem is
135 * bolted into TLB1 large page mappings, and we don't use
136 * broadcast invalidates, so we should not take a TLB miss here.
137 *
138 * Later we'll need to deal with faults here. Disallowing guest
139 * mappings that are execute-but-not-read could be an option on
140 * e500mc, but not on chips with an LRAT if it is used.
141 */
142
143 mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */
144 PPC_STL r15, VCPU_GPR(R15)(r4) 124 PPC_STL r15, VCPU_GPR(R15)(r4)
145 PPC_STL r16, VCPU_GPR(R16)(r4) 125 PPC_STL r16, VCPU_GPR(R16)(r4)
146 PPC_STL r17, VCPU_GPR(R17)(r4) 126 PPC_STL r17, VCPU_GPR(R17)(r4)
147 PPC_STL r18, VCPU_GPR(R18)(r4) 127 PPC_STL r18, VCPU_GPR(R18)(r4)
148 PPC_STL r19, VCPU_GPR(R19)(r4) 128 PPC_STL r19, VCPU_GPR(R19)(r4)
149 mr r8, r3
150 PPC_STL r20, VCPU_GPR(R20)(r4) 129 PPC_STL r20, VCPU_GPR(R20)(r4)
151 rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
152 PPC_STL r21, VCPU_GPR(R21)(r4) 130 PPC_STL r21, VCPU_GPR(R21)(r4)
153 rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
154 PPC_STL r22, VCPU_GPR(R22)(r4) 131 PPC_STL r22, VCPU_GPR(R22)(r4)
155 rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID
156 PPC_STL r23, VCPU_GPR(R23)(r4) 132 PPC_STL r23, VCPU_GPR(R23)(r4)
157 PPC_STL r24, VCPU_GPR(R24)(r4) 133 PPC_STL r24, VCPU_GPR(R24)(r4)
158 PPC_STL r25, VCPU_GPR(R25)(r4) 134 PPC_STL r25, VCPU_GPR(R25)(r4)
@@ -162,10 +138,15 @@
162 PPC_STL r29, VCPU_GPR(R29)(r4) 138 PPC_STL r29, VCPU_GPR(R29)(r4)
163 PPC_STL r30, VCPU_GPR(R30)(r4) 139 PPC_STL r30, VCPU_GPR(R30)(r4)
164 PPC_STL r31, VCPU_GPR(R31)(r4) 140 PPC_STL r31, VCPU_GPR(R31)(r4)
165 mtspr SPRN_EPLC, r8 141
166 isync 142 /*
167 lwepx r9, 0, r5 143 * We don't use external PID support. lwepx faults would need to be
168 mtspr SPRN_EPLC, r3 144 * handled by KVM and this implies aditional code in DO_KVM (for
145 * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which
146 * is too intrusive for the host. Get last instuction in
147 * kvmppc_get_last_inst().
148 */
149 li r9, KVM_INST_FETCH_FAILED
169 stw r9, VCPU_LAST_INST(r4) 150 stw r9, VCPU_LAST_INST(r4)
170 .endif 151 .endif
171 152
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 4385c14fca84..41508267b0e2 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -610,11 +610,103 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
610 } 610 }
611} 611}
612 612
613#ifdef CONFIG_KVM_BOOKE_HV
614int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
615 u32 *instr)
616{
617 gva_t geaddr;
618 hpa_t addr;
619 hfn_t pfn;
620 hva_t eaddr;
621 u32 mas1, mas2, mas3;
622 u64 mas7_mas3;
623 struct page *page;
624 unsigned int addr_space, psize_shift;
625 bool pr;
626 unsigned long flags;
627
628 /* Search TLB for guest pc to get the real address */
629 geaddr = kvmppc_get_pc(vcpu);
630
631 addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG;
632
633 local_irq_save(flags);
634 mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
635 mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
636 asm volatile("tlbsx 0, %[geaddr]\n" : :
637 [geaddr] "r" (geaddr));
638 mtspr(SPRN_MAS5, 0);
639 mtspr(SPRN_MAS8, 0);
640 mas1 = mfspr(SPRN_MAS1);
641 mas2 = mfspr(SPRN_MAS2);
642 mas3 = mfspr(SPRN_MAS3);
643#ifdef CONFIG_64BIT
644 mas7_mas3 = mfspr(SPRN_MAS7_MAS3);
645#else
646 mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3;
647#endif
648 local_irq_restore(flags);
649
650 /*
651 * If the TLB entry for guest pc was evicted, return to the guest.
652 * There are high chances to find a valid TLB entry next time.
653 */
654 if (!(mas1 & MAS1_VALID))
655 return EMULATE_AGAIN;
656
657 /*
658 * Another thread may rewrite the TLB entry in parallel, don't
659 * execute from the address if the execute permission is not set
660 */
661 pr = vcpu->arch.shared->msr & MSR_PR;
662 if (unlikely((pr && !(mas3 & MAS3_UX)) ||
663 (!pr && !(mas3 & MAS3_SX)))) {
664 pr_err_ratelimited(
665 "%s: Instuction emulation from guest addres %08lx without execute permission\n",
666 __func__, geaddr);
667 return EMULATE_AGAIN;
668 }
669
670 /*
671 * The real address will be mapped by a cacheable, memory coherent,
672 * write-back page. Check for mismatches when LRAT is used.
673 */
674 if (has_feature(vcpu, VCPU_FTR_MMU_V2) &&
675 unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) {
676 pr_err_ratelimited(
677 "%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n",
678 __func__, geaddr);
679 return EMULATE_AGAIN;
680 }
681
682 /* Get pfn */
683 psize_shift = MAS1_GET_TSIZE(mas1) + 10;
684 addr = (mas7_mas3 & (~0ULL << psize_shift)) |
685 (geaddr & ((1ULL << psize_shift) - 1ULL));
686 pfn = addr >> PAGE_SHIFT;
687
688 /* Guard against emulation from devices area */
689 if (unlikely(!page_is_ram(pfn))) {
690 pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n",
691 __func__, addr);
692 return EMULATE_AGAIN;
693 }
694
695 /* Map a page and get guest's instruction */
696 page = pfn_to_page(pfn);
697 eaddr = (unsigned long)kmap_atomic(page);
698 *instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK));
699 kunmap_atomic((u32 *)eaddr);
700
701 return EMULATE_DONE;
702}
703#else
613int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, 704int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
614 u32 *instr) 705 u32 *instr)
615{ 706{
616 return EMULATE_AGAIN; 707 return EMULATE_AGAIN;
617} 708}
709#endif
618 710
619/************* MMU Notifiers *************/ 711/************* MMU Notifiers *************/
620 712