aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2012-02-02 19:54:17 -0500
committerAvi Kivity <avi@redhat.com>2012-04-08 07:01:20 -0400
commitf0888f70151c7f53de2b45ee20ff1905837943e8 (patch)
tree80b7e133e681a2b4e4255a8fa747711da4c59211 /arch/powerpc/kvm
parentf6127716c346c73ab1513edee53231800188c5ba (diff)
KVM: PPC: Book3S HV: Make secondary threads more robust against stray IPIs
Currently on POWER7, if we are running the guest on a core and we don't need all the hardware threads, we do nothing to ensure that the unused threads aren't executing in the kernel (other than checking that they are offline). We just assume they're napping and we don't do anything to stop them trying to enter the kernel while the guest is running. This means that a stray IPI can wake up the hardware thread and it will then try to enter the kernel, but since the core is in guest context, it will execute code from the guest in hypervisor mode once it turns the MMU on, which tends to lead to crashes or hangs in the host. This fixes the problem by adding two new one-byte flags in the kvmppc_host_state structure in the PACA which are used to interlock between the primary thread and the unused secondary threads when entering the guest. With these flags, the primary thread can ensure that the unused secondaries are not already in kernel mode (i.e. handling a stray IPI) and then indicate that they should not try to enter the kernel if they do get woken for any reason. Instead they will go into KVM code, find that there is no vcpu to run, acknowledge and clear the IPI and go back to nap mode. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c49
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S92
2 files changed, 100 insertions, 41 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 01294a5099dd..e87f6196d222 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -569,6 +569,45 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
569 list_del(&vcpu->arch.run_list); 569 list_del(&vcpu->arch.run_list);
570} 570}
571 571
572static int kvmppc_grab_hwthread(int cpu)
573{
574 struct paca_struct *tpaca;
575 long timeout = 1000;
576
577 tpaca = &paca[cpu];
578
579 /* Ensure the thread won't go into the kernel if it wakes */
580 tpaca->kvm_hstate.hwthread_req = 1;
581
582 /*
583 * If the thread is already executing in the kernel (e.g. handling
584 * a stray interrupt), wait for it to get back to nap mode.
585 * The smp_mb() is to ensure that our setting of hwthread_req
586 * is visible before we look at hwthread_state, so if this
587 * races with the code at system_reset_pSeries and the thread
588 * misses our setting of hwthread_req, we are sure to see its
589 * setting of hwthread_state, and vice versa.
590 */
591 smp_mb();
592 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
593 if (--timeout <= 0) {
594 pr_err("KVM: couldn't grab cpu %d\n", cpu);
595 return -EBUSY;
596 }
597 udelay(1);
598 }
599 return 0;
600}
601
602static void kvmppc_release_hwthread(int cpu)
603{
604 struct paca_struct *tpaca;
605
606 tpaca = &paca[cpu];
607 tpaca->kvm_hstate.hwthread_req = 0;
608 tpaca->kvm_hstate.kvm_vcpu = NULL;
609}
610
572static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 611static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
573{ 612{
574 int cpu; 613 int cpu;
@@ -588,8 +627,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
588 smp_wmb(); 627 smp_wmb();
589#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 628#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
590 if (vcpu->arch.ptid) { 629 if (vcpu->arch.ptid) {
591 tpaca->cpu_start = 0x80; 630 kvmppc_grab_hwthread(cpu);
592 wmb();
593 xics_wake_cpu(cpu); 631 xics_wake_cpu(cpu);
594 ++vc->n_woken; 632 ++vc->n_woken;
595 } 633 }
@@ -639,7 +677,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
639 struct kvm_vcpu *vcpu, *vcpu0, *vnext; 677 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
640 long ret; 678 long ret;
641 u64 now; 679 u64 now;
642 int ptid; 680 int ptid, i;
643 681
644 /* don't start if any threads have a signal pending */ 682 /* don't start if any threads have a signal pending */
645 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 683 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -686,12 +724,17 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
686 vc->napping_threads = 0; 724 vc->napping_threads = 0;
687 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 725 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
688 kvmppc_start_thread(vcpu); 726 kvmppc_start_thread(vcpu);
727 /* Grab any remaining hw threads so they can't go into the kernel */
728 for (i = ptid; i < threads_per_core; ++i)
729 kvmppc_grab_hwthread(vc->pcpu + i);
689 730
690 preempt_disable(); 731 preempt_disable();
691 spin_unlock(&vc->lock); 732 spin_unlock(&vc->lock);
692 733
693 kvm_guest_enter(); 734 kvm_guest_enter();
694 __kvmppc_vcore_entry(NULL, vcpu0); 735 __kvmppc_vcore_entry(NULL, vcpu0);
736 for (i = 0; i < threads_per_core; ++i)
737 kvmppc_release_hwthread(vc->pcpu + i);
695 738
696 spin_lock(&vc->lock); 739 spin_lock(&vc->lock);
697 /* disable sending of IPIs on virtual external irqs */ 740 /* disable sending of IPIs on virtual external irqs */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b70bf22a3ff3..d595033bd449 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -26,6 +26,7 @@
26#include <asm/hvcall.h> 26#include <asm/hvcall.h>
27#include <asm/asm-offsets.h> 27#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h> 28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h>
29 30
30/***************************************************************************** 31/*****************************************************************************
31 * * 32 * *
@@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
82 83
83#define XICS_XIRR 4 84#define XICS_XIRR 4
84#define XICS_QIRR 0xc 85#define XICS_QIRR 0xc
86#define XICS_IPI 2 /* interrupt source # for IPIs */
85 87
86/* 88/*
87 * We come in here when wakened from nap mode on a secondary hw thread. 89 * We come in here when wakened from nap mode on a secondary hw thread.
@@ -94,26 +96,54 @@ kvm_start_guest:
94 subi r1,r1,STACK_FRAME_OVERHEAD 96 subi r1,r1,STACK_FRAME_OVERHEAD
95 ld r2,PACATOC(r13) 97 ld r2,PACATOC(r13)
96 98
97 /* were we napping due to cede? */ 99 li r0,KVM_HWTHREAD_IN_KVM
98 lbz r0,HSTATE_NAPPING(r13) 100 stb r0,HSTATE_HWTHREAD_STATE(r13)
99 cmpwi r0,0
100 bne kvm_end_cede
101 101
102 /* get vcpu pointer */ 102 /* NV GPR values from power7_idle() will no longer be valid */
103 ld r4, HSTATE_KVM_VCPU(r13) 103 li r0,1
104 stb r0,PACA_NAPSTATELOST(r13)
104 105
105 /* We got here with an IPI; clear it */ 106 /* get vcpu pointer, NULL if we have no vcpu to run */
106 ld r5, HSTATE_XICS_PHYS(r13) 107 ld r4,HSTATE_KVM_VCPU(r13)
107 li r0, 0xff 108 cmpdi cr1,r4,0
108 li r6, XICS_QIRR 109
109 li r7, XICS_XIRR 110 /* Check the wake reason in SRR1 to see why we got here */
110 lwzcix r8, r5, r7 /* ack the interrupt */ 111 mfspr r3,SPRN_SRR1
112 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
113 cmpwi r3,4 /* was it an external interrupt? */
114 bne 27f
115
116 /*
117 * External interrupt - for now assume it is an IPI, since we
118 * should never get any other interrupts sent to offline threads.
119 * Only do this for secondary threads.
120 */
121 beq cr1,25f
122 lwz r3,VCPU_PTID(r4)
123 cmpwi r3,0
124 beq 27f
12525: ld r5,HSTATE_XICS_PHYS(r13)
126 li r0,0xff
127 li r6,XICS_QIRR
128 li r7,XICS_XIRR
129 lwzcix r8,r5,r7 /* get and ack the interrupt */
111 sync 130 sync
112 stbcix r0, r5, r6 /* clear it */ 131 clrldi. r9,r8,40 /* get interrupt source ID. */
113 stwcix r8, r5, r7 /* EOI it */ 132 beq 27f /* none there? */
133 cmpwi r9,XICS_IPI
134 bne 26f
135 stbcix r0,r5,r6 /* clear IPI */
13626: stwcix r8,r5,r7 /* EOI the interrupt */
114 137
115 /* NV GPR values from power7_idle() will no longer be valid */ 13827: /* XXX should handle hypervisor maintenance interrupts etc. here */
116 stb r0, PACA_NAPSTATELOST(r13) 139
140 /* if we have no vcpu to run, go back to sleep */
141 beq cr1,kvm_no_guest
142
143 /* were we napping due to cede? */
144 lbz r0,HSTATE_NAPPING(r13)
145 cmpwi r0,0
146 bne kvm_end_cede
117 147
118.global kvmppc_hv_entry 148.global kvmppc_hv_entry
119kvmppc_hv_entry: 149kvmppc_hv_entry:
@@ -1445,8 +1475,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1445 * Take a nap until a decrementer or external interrupt occurs, 1475 * Take a nap until a decrementer or external interrupt occurs,
1446 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR 1476 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
1447 */ 1477 */
1448 li r0,0x80 1478 li r0,1
1449 stb r0,PACAPROCSTART(r13) 1479 stb r0,HSTATE_HWTHREAD_REQ(r13)
1450 mfspr r5,SPRN_LPCR 1480 mfspr r5,SPRN_LPCR
1451 ori r5,r5,LPCR_PECE0 | LPCR_PECE1 1481 ori r5,r5,LPCR_PECE0 | LPCR_PECE1
1452 mtspr SPRN_LPCR,r5 1482 mtspr SPRN_LPCR,r5
@@ -1463,26 +1493,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1463kvm_end_cede: 1493kvm_end_cede:
1464 /* Woken by external or decrementer interrupt */ 1494 /* Woken by external or decrementer interrupt */
1465 ld r1, HSTATE_HOST_R1(r13) 1495 ld r1, HSTATE_HOST_R1(r13)
1466 ld r2, PACATOC(r13)
1467 1496
1468 /* If we're a secondary thread and we got here by an IPI, ack it */
1469 ld r4,HSTATE_KVM_VCPU(r13)
1470 lwz r3,VCPU_PTID(r4)
1471 cmpwi r3,0
1472 beq 27f
1473 mfspr r3,SPRN_SRR1
1474 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
1475 cmpwi r3,4 /* was it an external interrupt? */
1476 bne 27f
1477 ld r5, HSTATE_XICS_PHYS(r13)
1478 li r0,0xff
1479 li r6,XICS_QIRR
1480 li r7,XICS_XIRR
1481 lwzcix r8,r5,r7 /* ack the interrupt */
1482 sync
1483 stbcix r0,r5,r6 /* clear it */
1484 stwcix r8,r5,r7 /* EOI it */
148527:
1486 /* load up FP state */ 1497 /* load up FP state */
1487 bl kvmppc_load_fp 1498 bl kvmppc_load_fp
1488 1499
@@ -1580,12 +1591,17 @@ secondary_nap:
1580 stwcx. r3, 0, r4 1591 stwcx. r3, 0, r4
1581 bne 51b 1592 bne 51b
1582 1593
1594kvm_no_guest:
1595 li r0, KVM_HWTHREAD_IN_NAP
1596 stb r0, HSTATE_HWTHREAD_STATE(r13)
1597 li r0, 0
1598 std r0, HSTATE_KVM_VCPU(r13)
1599
1583 li r3, LPCR_PECE0 1600 li r3, LPCR_PECE0
1584 mfspr r4, SPRN_LPCR 1601 mfspr r4, SPRN_LPCR
1585 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 1602 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
1586 mtspr SPRN_LPCR, r4 1603 mtspr SPRN_LPCR, r4
1587 isync 1604 isync
1588 li r0, 0
1589 std r0, HSTATE_SCRATCH0(r13) 1605 std r0, HSTATE_SCRATCH0(r13)
1590 ptesync 1606 ptesync
1591 ld r0, HSTATE_SCRATCH0(r13) 1607 ld r0, HSTATE_SCRATCH0(r13)