aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv_rmhandlers.S
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-06-28 20:23:08 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:57 -0400
commit371fefd6f2dc46668e00871930dde613b88d4bde (patch)
tree35fe799343861405914d27873eb175eb04d6dce5 /arch/powerpc/kvm/book3s_hv_rmhandlers.S
parent54738c097163c3f01e67ccc85462b78d4d4f495f (diff)
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv_rmhandlers.S')
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S168
1 files changed, 162 insertions, 6 deletions
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e6adaadcdff2..c9bf177b7cf2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -30,8 +30,6 @@
30 * * 30 * *
31 ****************************************************************************/ 31 ****************************************************************************/
32 32
33#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
34
35 .globl kvmppc_skip_interrupt 33 .globl kvmppc_skip_interrupt
36kvmppc_skip_interrupt: 34kvmppc_skip_interrupt:
37 mfspr r13,SPRN_SRR0 35 mfspr r13,SPRN_SRR0
@@ -79,6 +77,32 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
79 * * 77 * *
80 *****************************************************************************/ 78 *****************************************************************************/
81 79
80#define XICS_XIRR 4
81#define XICS_QIRR 0xc
82
83/*
84 * We come in here when wakened from nap mode on a secondary hw thread.
85 * Relocation is off and most register values are lost.
86 * r13 points to the PACA.
87 */
88 .globl kvm_start_guest
89kvm_start_guest:
90 ld r1,PACAEMERGSP(r13)
91 subi r1,r1,STACK_FRAME_OVERHEAD
92
93 /* get vcpu pointer */
94 ld r4, HSTATE_KVM_VCPU(r13)
95
96 /* We got here with an IPI; clear it */
97 ld r5, HSTATE_XICS_PHYS(r13)
98 li r0, 0xff
99 li r6, XICS_QIRR
100 li r7, XICS_XIRR
101 lwzcix r8, r5, r7 /* ack the interrupt */
102 sync
103 stbcix r0, r5, r6 /* clear it */
104 stwcix r8, r5, r7 /* EOI it */
105
82.global kvmppc_hv_entry 106.global kvmppc_hv_entry
83kvmppc_hv_entry: 107kvmppc_hv_entry:
84 108
@@ -200,7 +224,20 @@ kvmppc_hv_entry:
200 slbia 224 slbia
201 ptesync 225 ptesync
202 226
203 /* Switch to guest partition. */ 227 /* Increment entry count iff exit count is zero. */
228 ld r5,HSTATE_KVM_VCORE(r13)
229 addi r9,r5,VCORE_ENTRY_EXIT
23021: lwarx r3,0,r9
231 cmpwi r3,0x100 /* any threads starting to exit? */
232 bge secondary_too_late /* if so we're too late to the party */
233 addi r3,r3,1
234 stwcx. r3,0,r9
235 bne 21b
236
237 /* Primary thread switches to guest partition. */
238 lwz r6,VCPU_PTID(r4)
239 cmpwi r6,0
240 bne 20f
204 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ 241 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
205 ld r6,KVM_SDR1(r9) 242 ld r6,KVM_SDR1(r9)
206 lwz r7,KVM_LPID(r9) 243 lwz r7,KVM_LPID(r9)
@@ -210,7 +247,15 @@ kvmppc_hv_entry:
210 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 247 mtspr SPRN_SDR1,r6 /* switch to partition page table */
211 mtspr SPRN_LPID,r7 248 mtspr SPRN_LPID,r7
212 isync 249 isync
213 ld r8,VCPU_LPCR(r4) 250 li r0,1
251 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
252 b 10f
253
254 /* Secondary threads wait for primary to have done partition switch */
25520: lbz r0,VCORE_IN_GUEST(r5)
256 cmpwi r0,0
257 beq 20b
25810: ld r8,VCPU_LPCR(r4)
214 mtspr SPRN_LPCR,r8 259 mtspr SPRN_LPCR,r8
215 isync 260 isync
216 261
@@ -225,10 +270,12 @@ kvmppc_hv_entry:
225 * Invalidate the TLB if we could possibly have stale TLB 270 * Invalidate the TLB if we could possibly have stale TLB
226 * entries for this partition on this core due to the use 271 * entries for this partition on this core due to the use
227 * of tlbiel. 272 * of tlbiel.
273 * XXX maybe only need this on primary thread?
228 */ 274 */
229 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ 275 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
230 lwz r5,VCPU_VCPUID(r4) 276 lwz r5,VCPU_VCPUID(r4)
231 lhz r6,PACAPACAINDEX(r13) 277 lhz r6,PACAPACAINDEX(r13)
278 rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
232 lhz r8,VCPU_LAST_CPU(r4) 279 lhz r8,VCPU_LAST_CPU(r4)
233 sldi r7,r6,1 /* see if this is the same vcpu */ 280 sldi r7,r6,1 /* see if this is the same vcpu */
234 add r7,r7,r9 /* as last ran on this pcpu */ 281 add r7,r7,r9 /* as last ran on this pcpu */
@@ -512,8 +559,60 @@ hcall_real_cont:
512 ptesync 559 ptesync
513 560
514hdec_soon: 561hdec_soon:
515 /* Switch back to host partition */ 562 /* Increment the threads-exiting-guest count in the 0xff00
563 bits of vcore->entry_exit_count */
564 lwsync
565 ld r5,HSTATE_KVM_VCORE(r13)
566 addi r6,r5,VCORE_ENTRY_EXIT
56741: lwarx r3,0,r6
568 addi r0,r3,0x100
569 stwcx. r0,0,r6
570 bne 41b
571
572 /*
573 * At this point we have an interrupt that we have to pass
574 * up to the kernel or qemu; we can't handle it in real mode.
575 * Thus we have to do a partition switch, so we have to
576 * collect the other threads, if we are the first thread
577 * to take an interrupt. To do this, we set the HDEC to 0,
578 * which causes an HDEC interrupt in all threads within 2ns
579 * because the HDEC register is shared between all 4 threads.
580 * However, we don't need to bother if this is an HDEC
581 * interrupt, since the other threads will already be on their
582 * way here in that case.
583 */
584 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
585 beq 40f
586 cmpwi r3,0x100 /* Are we the first here? */
587 bge 40f
588 cmpwi r3,1
589 ble 40f
590 li r0,0
591 mtspr SPRN_HDEC,r0
59240:
593
594 /* Secondary threads wait for primary to do partition switch */
516 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ 595 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
596 ld r5,HSTATE_KVM_VCORE(r13)
597 lwz r3,VCPU_PTID(r9)
598 cmpwi r3,0
599 beq 15f
600 HMT_LOW
60113: lbz r3,VCORE_IN_GUEST(r5)
602 cmpwi r3,0
603 bne 13b
604 HMT_MEDIUM
605 b 16f
606
607 /* Primary thread waits for all the secondaries to exit guest */
60815: lwz r3,VCORE_ENTRY_EXIT(r5)
609 srwi r0,r3,8
610 clrldi r3,r3,56
611 cmpw r3,r0
612 bne 15b
613 isync
614
615 /* Primary thread switches back to host partition */
517 ld r6,KVM_HOST_SDR1(r4) 616 ld r6,KVM_HOST_SDR1(r4)
518 lwz r7,KVM_HOST_LPID(r4) 617 lwz r7,KVM_HOST_LPID(r4)
519 li r8,LPID_RSVD /* switch to reserved LPID */ 618 li r8,LPID_RSVD /* switch to reserved LPID */
@@ -522,10 +621,12 @@ hdec_soon:
522 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 621 mtspr SPRN_SDR1,r6 /* switch to partition page table */
523 mtspr SPRN_LPID,r7 622 mtspr SPRN_LPID,r7
524 isync 623 isync
624 li r0,0
625 stb r0,VCORE_IN_GUEST(r5)
525 lis r8,0x7fff /* MAX_INT@h */ 626 lis r8,0x7fff /* MAX_INT@h */
526 mtspr SPRN_HDEC,r8 627 mtspr SPRN_HDEC,r8
527 628
528 ld r8,KVM_HOST_LPCR(r4) 62916: ld r8,KVM_HOST_LPCR(r4)
529 mtspr SPRN_LPCR,r8 630 mtspr SPRN_LPCR,r8
530 isync 631 isync
531 632
@@ -634,6 +735,11 @@ hdec_soon:
634 mr r3, r9 735 mr r3, r9
635 bl .kvmppc_save_fp 736 bl .kvmppc_save_fp
636 737
738 /* Secondary threads go off to take a nap */
739 lwz r0,VCPU_PTID(r3)
740 cmpwi r0,0
741 bne secondary_nap
742
637 /* 743 /*
638 * Reload DEC. HDEC interrupts were disabled when 744 * Reload DEC. HDEC interrupts were disabled when
639 * we reloaded the host's LPCR value. 745 * we reloaded the host's LPCR value.
@@ -840,6 +946,56 @@ _GLOBAL(kvmppc_h_set_dabr)
840 li r3,0 946 li r3,0
841 blr 947 blr
842 948
949secondary_too_late:
950 ld r5,HSTATE_KVM_VCORE(r13)
951 HMT_LOW
95213: lbz r3,VCORE_IN_GUEST(r5)
953 cmpwi r3,0
954 bne 13b
955 HMT_MEDIUM
956 ld r11,PACA_SLBSHADOWPTR(r13)
957
958 .rept SLB_NUM_BOLTED
959 ld r5,SLBSHADOW_SAVEAREA(r11)
960 ld r6,SLBSHADOW_SAVEAREA+8(r11)
961 andis. r7,r5,SLB_ESID_V@h
962 beq 1f
963 slbmte r6,r5
9641: addi r11,r11,16
965 .endr
966 b 50f
967
968secondary_nap:
969 /* Clear any pending IPI */
97050: ld r5, HSTATE_XICS_PHYS(r13)
971 li r0, 0xff
972 li r6, XICS_QIRR
973 stbcix r0, r5, r6
974
975 /* increment the nap count and then go to nap mode */
976 ld r4, HSTATE_KVM_VCORE(r13)
977 addi r4, r4, VCORE_NAP_COUNT
978 lwsync /* make previous updates visible */
97951: lwarx r3, 0, r4
980 addi r3, r3, 1
981 stwcx. r3, 0, r4
982 bne 51b
983 isync
984
985 mfspr r4, SPRN_LPCR
986 li r0, LPCR_PECE
987 andc r4, r4, r0
988 ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
989 mtspr SPRN_LPCR, r4
990 li r0, 0
991 std r0, HSTATE_SCRATCH0(r13)
992 ptesync
993 ld r0, HSTATE_SCRATCH0(r13)
9941: cmpd r0, r0
995 bne 1b
996 nap
997 b .
998
843/* 999/*
844 * Save away FP, VMX and VSX registers. 1000 * Save away FP, VMX and VSX registers.
845 * r3 = vcpu pointer 1001 * r3 = vcpu pointer