aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2014-12-02 22:48:40 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2014-12-07 21:16:31 -0500
commit56548fc0e86cb9156af7a7e1f15ba78f251dafaf (patch)
treea6153cfd3c2ea27943709c812e7179057fdbf4e5
parent682e77c861c4c60f79ffbeae5e1938ffed24a575 (diff)
powerpc/powernv: Return to cpu offline loop when finished in KVM guest
When a secondary hardware thread has finished running a KVM guest, we currently put that thread into nap mode using a nap instruction in the KVM code. This changes the code so that instead of doing a nap instruction directly, we instead cause the call to power7_nap() that put the thread into nap mode to return. The reason for doing this is to avoid having the KVM code having to know what low-power mode to put the thread into. In the case of a secondary thread used to run a KVM guest, the thread will be offline from the point of view of the host kernel, and the relevant power7_nap() call is the one in pnv_smp_cpu_disable(). In this case we don't want to clear pending IPIs in the offline loop in that function, since that might cause us to miss the wakeup for the next time the thread needs to run a guest. To tell whether or not to clear the interrupt, we use the SRR1 value returned from power7_nap(), and check if it indicates an external interrupt. We arrange that the return from power7_nap() when we have finished running a guest returns 0, so pending interrupts don't get flushed in that case. Note that it is important a secondary thread that has finished executing in the guest, or that didn't have a guest to run, should not return to power7_nap's caller while the kvm_hstate.hwthread_req flag in the PACA is non-zero, because the return from power7_nap will reenable the MMU, and the MMU might still be in guest context. In this situation we spin at low priority in real mode waiting for hwthread_req to become zero. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/kernel/idle_power7.S12
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S54
-rw-r--r--arch/powerpc/platforms/powernv/smp.c23
5 files changed, 68 insertions, 25 deletions
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index dda7ac4c80bd..29c3798cf800 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -451,7 +451,7 @@ extern unsigned long cpuidle_disable;
451enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; 451enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
452 452
453extern int powersave_nap; /* set if nap mode can be used in idle loop */ 453extern int powersave_nap; /* set if nap mode can be used in idle loop */
454extern void power7_nap(int check_irq); 454extern unsigned long power7_nap(int check_irq);
455extern void power7_sleep(void); 455extern void power7_sleep(void);
456extern void flush_instruction_cache(void); 456extern void flush_instruction_cache(void);
457extern void hard_reset_now(void); 457extern void hard_reset_now(void);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6213f494f40b..db08382e19f1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -131,6 +131,8 @@ BEGIN_FTR_SECTION
1311: 1311:
132#endif 132#endif
133 133
134 /* Return SRR1 from power7_nap() */
135 mfspr r3,SPRN_SRR1
134 beq cr1,2f 136 beq cr1,2f
135 b power7_wakeup_noloss 137 b power7_wakeup_noloss
1362: b power7_wakeup_loss 1382: b power7_wakeup_loss
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index c0754bbf8118..18c0687e5ab3 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -212,6 +212,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
212 mtspr SPRN_SRR0,r5 212 mtspr SPRN_SRR0,r5
213 rfid 213 rfid
214 214
215/*
216 * R3 here contains the value that will be returned to the caller
217 * of power7_nap.
218 */
215_GLOBAL(power7_wakeup_loss) 219_GLOBAL(power7_wakeup_loss)
216 ld r1,PACAR1(r13) 220 ld r1,PACAR1(r13)
217BEGIN_FTR_SECTION 221BEGIN_FTR_SECTION
@@ -219,15 +223,19 @@ BEGIN_FTR_SECTION
219END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) 223END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
220 REST_NVGPRS(r1) 224 REST_NVGPRS(r1)
221 REST_GPR(2, r1) 225 REST_GPR(2, r1)
222 ld r3,_CCR(r1) 226 ld r6,_CCR(r1)
223 ld r4,_MSR(r1) 227 ld r4,_MSR(r1)
224 ld r5,_NIP(r1) 228 ld r5,_NIP(r1)
225 addi r1,r1,INT_FRAME_SIZE 229 addi r1,r1,INT_FRAME_SIZE
226 mtcr r3 230 mtcr r6
227 mtspr SPRN_SRR1,r4 231 mtspr SPRN_SRR1,r4
228 mtspr SPRN_SRR0,r5 232 mtspr SPRN_SRR0,r5
229 rfid 233 rfid
230 234
235/*
236 * R3 here contains the value that will be returned to the caller
237 * of power7_nap.
238 */
231_GLOBAL(power7_wakeup_noloss) 239_GLOBAL(power7_wakeup_noloss)
232 lbz r0,PACA_NAPSTATELOST(r13) 240 lbz r0,PACA_NAPSTATELOST(r13)
233 cmpwi r0,0 241 cmpwi r0,0
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index edb2ccdbb2ba..65c105b17a25 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -201,8 +201,6 @@ kvmppc_primary_no_guest:
201 bge kvm_novcpu_exit /* another thread already exiting */ 201 bge kvm_novcpu_exit /* another thread already exiting */
202 li r3, NAPPING_NOVCPU 202 li r3, NAPPING_NOVCPU
203 stb r3, HSTATE_NAPPING(r13) 203 stb r3, HSTATE_NAPPING(r13)
204 li r3, 1
205 stb r3, HSTATE_HWTHREAD_REQ(r13)
206 204
207 b kvm_do_nap 205 b kvm_do_nap
208 206
@@ -293,6 +291,8 @@ kvm_start_guest:
293 /* if we have no vcpu to run, go back to sleep */ 291 /* if we have no vcpu to run, go back to sleep */
294 beq kvm_no_guest 292 beq kvm_no_guest
295 293
294kvm_secondary_got_guest:
295
296 /* Set HSTATE_DSCR(r13) to something sensible */ 296 /* Set HSTATE_DSCR(r13) to something sensible */
297 ld r6, PACA_DSCR(r13) 297 ld r6, PACA_DSCR(r13)
298 std r6, HSTATE_DSCR(r13) 298 std r6, HSTATE_DSCR(r13)
@@ -318,27 +318,46 @@ kvm_start_guest:
318 stwcx. r3, 0, r4 318 stwcx. r3, 0, r4
319 bne 51b 319 bne 51b
320 320
321/*
322 * At this point we have finished executing in the guest.
323 * We need to wait for hwthread_req to become zero, since
324 * we may not turn on the MMU while hwthread_req is non-zero.
325 * While waiting we also need to check if we get given a vcpu to run.
326 */
321kvm_no_guest: 327kvm_no_guest:
322 li r0, KVM_HWTHREAD_IN_NAP 328 lbz r3, HSTATE_HWTHREAD_REQ(r13)
329 cmpwi r3, 0
330 bne 53f
331 HMT_MEDIUM
332 li r0, KVM_HWTHREAD_IN_KERNEL
323 stb r0, HSTATE_HWTHREAD_STATE(r13) 333 stb r0, HSTATE_HWTHREAD_STATE(r13)
324kvm_do_nap: 334 /* need to recheck hwthread_req after a barrier, to avoid race */
325 /* Clear the runlatch bit before napping */ 335 sync
326 mfspr r2, SPRN_CTRLF 336 lbz r3, HSTATE_HWTHREAD_REQ(r13)
327 clrrdi r2, r2, 1 337 cmpwi r3, 0
328 mtspr SPRN_CTRLT, r2 338 bne 54f
329 339/*
340 * We jump to power7_wakeup_loss, which will return to the caller
341 * of power7_nap in the powernv cpu offline loop. The value we
342 * put in r3 becomes the return value for power7_nap.
343 */
330 li r3, LPCR_PECE0 344 li r3, LPCR_PECE0
331 mfspr r4, SPRN_LPCR 345 mfspr r4, SPRN_LPCR
332 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 346 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
333 mtspr SPRN_LPCR, r4 347 mtspr SPRN_LPCR, r4
334 isync 348 li r3, 0
335 std r0, HSTATE_SCRATCH0(r13) 349 b power7_wakeup_loss
336 ptesync 350
337 ld r0, HSTATE_SCRATCH0(r13) 35153: HMT_LOW
3381: cmpd r0, r0 352 ld r4, HSTATE_KVM_VCPU(r13)
339 bne 1b 353 cmpdi r4, 0
340 nap 354 beq kvm_no_guest
341 b . 355 HMT_MEDIUM
356 b kvm_secondary_got_guest
357
35854: li r0, KVM_HWTHREAD_IN_KVM
359 stb r0, HSTATE_HWTHREAD_STATE(r13)
360 b kvm_no_guest
342 361
343/****************************************************************************** 362/******************************************************************************
344 * * 363 * *
@@ -2172,6 +2191,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
2172 * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the 2191 * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
2173 * runlatch bit before napping. 2192 * runlatch bit before napping.
2174 */ 2193 */
2194kvm_do_nap:
2175 mfspr r2, SPRN_CTRLF 2195 mfspr r2, SPRN_CTRLF
2176 clrrdi r2, r2, 1 2196 clrrdi r2, r2, 1
2177 mtspr SPRN_CTRLT, r2 2197 mtspr SPRN_CTRLT, r2
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 4753958cd509..b716f666e48a 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -149,6 +149,7 @@ static int pnv_smp_cpu_disable(void)
149static void pnv_smp_cpu_kill_self(void) 149static void pnv_smp_cpu_kill_self(void)
150{ 150{
151 unsigned int cpu; 151 unsigned int cpu;
152 unsigned long srr1;
152 153
153 /* Standard hot unplug procedure */ 154 /* Standard hot unplug procedure */
154 local_irq_disable(); 155 local_irq_disable();
@@ -165,13 +166,25 @@ static void pnv_smp_cpu_kill_self(void)
165 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); 166 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
166 while (!generic_check_cpu_restart(cpu)) { 167 while (!generic_check_cpu_restart(cpu)) {
167 ppc64_runlatch_off(); 168 ppc64_runlatch_off();
168 power7_nap(1); 169 srr1 = power7_nap(1);
169 ppc64_runlatch_on(); 170 ppc64_runlatch_on();
170 171
171 /* Clear the IPI that woke us up */ 172 /*
172 icp_native_flush_interrupt(); 173 * If the SRR1 value indicates that we woke up due to
173 local_paca->irq_happened &= PACA_IRQ_HARD_DIS; 174 * an external interrupt, then clear the interrupt.
174 mb(); 175 * We clear the interrupt before checking for the
176 * reason, so as to avoid a race where we wake up for
177 * some other reason, find nothing and clear the interrupt
178 * just as some other cpu is sending us an interrupt.
179 * If we returned from power7_nap as a result of
180 * having finished executing in a KVM guest, then srr1
181 * contains 0.
182 */
183 if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
184 icp_native_flush_interrupt();
185 local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
186 smp_mb();
187 }
175 188
176 if (cpu_core_split_required()) 189 if (cpu_core_split_required())
177 continue; 190 continue;