aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-02 16:11:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-02 16:11:27 -0400
commit221bb8a46e230b9824204ae86537183d9991ff2a (patch)
tree92510d72285b2285be7cb87288bf088cb28af4c1 /arch/powerpc
parentf7b32e4c021fd788f13f6785e17efbc3eb05b351 (diff)
parent23528bb21ee2c9b27f3feddd77a2a3351a8df148 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: - ARM: GICv3 ITS emulation and various fixes. Removal of the old VGIC implementation. - s390: support for trapping software breakpoints, nested virtualization (vSIE), the STHYI opcode, initial extensions for CPU model support. - MIPS: support for MIPS64 hosts (32-bit guests only) and lots of cleanups, preliminary to this and the upcoming support for hardware virtualization extensions. - x86: support for execute-only mappings in nested EPT; reduced vmexit latency for TSC deadline timer (by about 30%) on Intel hosts; support for more than 255 vCPUs. - PPC: bugfixes. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits) KVM: PPC: Introduce KVM_CAP_PPC_HTM MIPS: Select HAVE_KVM for MIPS64_R{2,6} MIPS: KVM: Reset CP0_PageMask during host TLB flush MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX() MIPS: KVM: Sign extend MFC0/RDHWR results MIPS: KVM: Fix 64-bit big endian dynamic translation MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase MIPS: KVM: Use 64-bit CP0_EBase when appropriate MIPS: KVM: Set CP0_Status.KX on MIPS64 MIPS: KVM: Make entry code MIPS64 friendly MIPS: KVM: Use kmap instead of CKSEG0ADDR() MIPS: KVM: Use virt_to_phys() to get commpage PFN MIPS: Fix definition of KSEGX() for 64-bit KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD kvm: x86: nVMX: maintain internal copy of current VMCS KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures KVM: arm64: vgic-its: Simplify MAPI error handling KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers KVM: arm64: vgic-its: Turn device_id validation into generic ID validation ...
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/hmi.h45
-rw-r--r--arch/powerpc/include/asm/paca.h6
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S4
-rw-r--r--arch/powerpc/kernel/hmi.c56
-rw-r--r--arch/powerpc/kernel/idle_book3s.S4
-rw-r--r--arch/powerpc/kernel/traps.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c41
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c176
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S527
-rw-r--r--arch/powerpc/kvm/book3s_pr.c16
-rw-r--r--arch/powerpc/kvm/booke.c4
-rw-r--r--arch/powerpc/kvm/emulate.c1
-rw-r--r--arch/powerpc/kvm/mpic.c3
-rw-r--r--arch/powerpc/kvm/powerpc.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
16 files changed, 670 insertions, 228 deletions
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
new file mode 100644
index 000000000000..88b4901ac4ee
--- /dev/null
+++ b/arch/powerpc/include/asm/hmi.h
@@ -0,0 +1,45 @@
1/*
2 * Hypervisor Maintenance Interrupt header file.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program.
16 *
17 * Copyright 2015 IBM Corporation
18 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
19 */
20
21#ifndef __ASM_PPC64_HMI_H__
22#define __ASM_PPC64_HMI_H__
23
24#ifdef CONFIG_PPC_BOOK3S_64
25
26#define CORE_TB_RESYNC_REQ_BIT 63
27#define MAX_SUBCORE_PER_CORE 4
28
29/*
30 * sibling_subcore_state structure is used to co-ordinate all threads
31 * during HMI to avoid TB corruption. This structure is allocated once
32 * per each core and shared by all threads on that core.
33 */
34struct sibling_subcore_state {
35 unsigned long flags;
36 u8 in_guest[MAX_SUBCORE_PER_CORE];
37};
38
39extern void wait_for_subcore_guest_exit(void);
40extern void wait_for_tb_resync(void);
41#else
42static inline void wait_for_subcore_guest_exit(void) { }
43static inline void wait_for_tb_resync(void) { }
44#endif
45#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ad171e979ab0..148303e7771f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -26,6 +26,7 @@
26#include <asm/kvm_book3s_asm.h> 26#include <asm/kvm_book3s_asm.h>
27#endif 27#endif
28#include <asm/accounting.h> 28#include <asm/accounting.h>
29#include <asm/hmi.h>
29 30
30register struct paca_struct *local_paca asm("r13"); 31register struct paca_struct *local_paca asm("r13");
31 32
@@ -182,6 +183,11 @@ struct paca_struct {
182 */ 183 */
183 u16 in_mce; 184 u16 in_mce;
184 u8 hmi_event_available; /* HMI event is available */ 185 u8 hmi_event_available; /* HMI event is available */
186 /*
187 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
188 * more details
189 */
190 struct sibling_subcore_state *sibling_subcore_state;
185#endif 191#endif
186 192
187 /* Stuff for accurate time accounting */ 193 /* Stuff for accurate time accounting */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fe4c075bcf50..b2027a5cf508 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32) += vdso32/
41obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 41obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
42obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o 42obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
43obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o 43obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
44obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o 44obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o
45obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o 45obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
46obj-$(CONFIG_PPC64) += vdso64/ 46obj-$(CONFIG_PPC64) += vdso64/
47obj-$(CONFIG_ALTIVEC) += vecemu.o 47obj-$(CONFIG_ALTIVEC) += vecemu.o
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6200e4925d26..694def6c9d61 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -671,6 +671,8 @@ BEGIN_FTR_SECTION
671 beq h_doorbell_common 671 beq h_doorbell_common
672 cmpwi r3,0xea0 672 cmpwi r3,0xea0
673 beq h_virt_irq_common 673 beq h_virt_irq_common
674 cmpwi r3,0xe60
675 beq hmi_exception_common
674FTR_SECTION_ELSE 676FTR_SECTION_ELSE
675 cmpwi r3,0xa00 677 cmpwi r3,0xa00
676 beq doorbell_super_common 678 beq doorbell_super_common
@@ -1172,7 +1174,7 @@ fwnmi_data_area:
1172 1174
1173 .globl hmi_exception_early 1175 .globl hmi_exception_early
1174hmi_exception_early: 1176hmi_exception_early:
1175 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) 1177 EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62)
1176 mr r10,r1 /* Save r1 */ 1178 mr r10,r1 /* Save r1 */
1177 ld r1,PACAEMERGSP(r13) /* Use emergency stack */ 1179 ld r1,PACAEMERGSP(r13) /* Use emergency stack */
1178 subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ 1180 subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c
new file mode 100644
index 000000000000..e3f738eb1cac
--- /dev/null
+++ b/arch/powerpc/kernel/hmi.c
@@ -0,0 +1,56 @@
1/*
2 * Hypervisor Maintenance Interrupt (HMI) handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program.
16 *
17 * Copyright 2015 IBM Corporation
18 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
19 */
20
21#undef DEBUG
22
23#include <linux/types.h>
24#include <linux/compiler.h>
25#include <asm/paca.h>
26#include <asm/hmi.h>
27
28void wait_for_subcore_guest_exit(void)
29{
30 int i;
31
32 /*
33 * NULL bitmap pointer indicates that KVM module hasn't
34 * been loaded yet and hence no guests are running.
35 * If no KVM is in use, no need to co-ordinate among threads
36 * as all of them will always be in host and no one is going
37 * to modify TB other than the opal hmi handler.
38 * Hence, just return from here.
39 */
40 if (!local_paca->sibling_subcore_state)
41 return;
42
43 for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
44 while (local_paca->sibling_subcore_state->in_guest[i])
45 cpu_relax();
46}
47
48void wait_for_tb_resync(void)
49{
50 if (!local_paca->sibling_subcore_state)
51 return;
52
53 while (test_bit(CORE_TB_RESYNC_REQ_BIT,
54 &local_paca->sibling_subcore_state->flags))
55 cpu_relax();
56}
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 335eb6cedae5..8a56a51fc0cb 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -336,7 +336,9 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
336 ld r2,PACATOC(r13); \ 336 ld r2,PACATOC(r13); \
337 ld r1,PACAR1(r13); \ 337 ld r1,PACAR1(r13); \
338 std r3,ORIG_GPR3(r1); /* Save original r3 */ \ 338 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
339 bl opal_rm_handle_hmi; \ 339 li r3,0; /* NULL argument */ \
340 bl hmi_exception_realmode; \
341 nop; \
340 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 342 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
34120: nop; 34320: nop;
342 344
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f7e2f2e318bd..2cb589264cb7 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -61,6 +61,7 @@
61#include <asm/tm.h> 61#include <asm/tm.h>
62#include <asm/debug.h> 62#include <asm/debug.h>
63#include <asm/asm-prototypes.h> 63#include <asm/asm-prototypes.h>
64#include <asm/hmi.h>
64#include <sysdev/fsl_pci.h> 65#include <sysdev/fsl_pci.h>
65 66
66#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) 67#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
@@ -308,9 +309,13 @@ long hmi_exception_realmode(struct pt_regs *regs)
308{ 309{
309 __this_cpu_inc(irq_stat.hmi_exceptions); 310 __this_cpu_inc(irq_stat.hmi_exceptions);
310 311
312 wait_for_subcore_guest_exit();
313
311 if (ppc_md.hmi_exception_early) 314 if (ppc_md.hmi_exception_early)
312 ppc_md.hmi_exception_early(regs); 315 ppc_md.hmi_exception_early(regs);
313 316
317 wait_for_tb_resync();
318
314 return 0; 319 return 0;
315} 320}
316 321
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e20beae5ca7a..2fd5580c8f6e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -52,6 +52,7 @@
52#include <asm/switch_to.h> 52#include <asm/switch_to.h>
53#include <asm/smp.h> 53#include <asm/smp.h>
54#include <asm/dbell.h> 54#include <asm/dbell.h>
55#include <asm/hmi.h>
55#include <linux/gfp.h> 56#include <linux/gfp.h>
56#include <linux/vmalloc.h> 57#include <linux/vmalloc.h>
57#include <linux/highmem.h> 58#include <linux/highmem.h>
@@ -2522,7 +2523,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2522 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) 2523 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
2523 spin_unlock(&pvc->lock); 2524 spin_unlock(&pvc->lock);
2524 2525
2525 kvm_guest_enter(); 2526 guest_enter();
2526 2527
2527 srcu_idx = srcu_read_lock(&vc->kvm->srcu); 2528 srcu_idx = srcu_read_lock(&vc->kvm->srcu);
2528 2529
@@ -2570,7 +2571,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2570 2571
2571 /* make sure updates to secondary vcpu structs are visible now */ 2572 /* make sure updates to secondary vcpu structs are visible now */
2572 smp_mb(); 2573 smp_mb();
2573 kvm_guest_exit(); 2574 guest_exit();
2574 2575
2575 for (sub = 0; sub < core_info.n_subcores; ++sub) 2576 for (sub = 0; sub < core_info.n_subcores; ++sub)
2576 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub], 2577 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
@@ -3401,6 +3402,38 @@ static struct kvmppc_ops kvm_ops_hv = {
3401 .hcall_implemented = kvmppc_hcall_impl_hv, 3402 .hcall_implemented = kvmppc_hcall_impl_hv,
3402}; 3403};
3403 3404
3405static int kvm_init_subcore_bitmap(void)
3406{
3407 int i, j;
3408 int nr_cores = cpu_nr_cores();
3409 struct sibling_subcore_state *sibling_subcore_state;
3410
3411 for (i = 0; i < nr_cores; i++) {
3412 int first_cpu = i * threads_per_core;
3413 int node = cpu_to_node(first_cpu);
3414
3415 /* Ignore if it is already allocated. */
3416 if (paca[first_cpu].sibling_subcore_state)
3417 continue;
3418
3419 sibling_subcore_state =
3420 kmalloc_node(sizeof(struct sibling_subcore_state),
3421 GFP_KERNEL, node);
3422 if (!sibling_subcore_state)
3423 return -ENOMEM;
3424
3425 memset(sibling_subcore_state, 0,
3426 sizeof(struct sibling_subcore_state));
3427
3428 for (j = 0; j < threads_per_core; j++) {
3429 int cpu = first_cpu + j;
3430
3431 paca[cpu].sibling_subcore_state = sibling_subcore_state;
3432 }
3433 }
3434 return 0;
3435}
3436
3404static int kvmppc_book3s_init_hv(void) 3437static int kvmppc_book3s_init_hv(void)
3405{ 3438{
3406 int r; 3439 int r;
@@ -3411,6 +3444,10 @@ static int kvmppc_book3s_init_hv(void)
3411 if (r < 0) 3444 if (r < 0)
3412 return -ENODEV; 3445 return -ENODEV;
3413 3446
3447 r = kvm_init_subcore_bitmap();
3448 if (r)
3449 return r;
3450
3414 kvm_ops_hv.owner = THIS_MODULE; 3451 kvm_ops_hv.owner = THIS_MODULE;
3415 kvmppc_hv_ops = &kvm_ops_hv; 3452 kvmppc_hv_ops = &kvm_ops_hv;
3416 3453
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 93b5f5c9b445..0fa70a9618d7 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -13,6 +13,9 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <asm/opal.h> 14#include <asm/opal.h>
15#include <asm/mce.h> 15#include <asm/mce.h>
16#include <asm/machdep.h>
17#include <asm/cputhreads.h>
18#include <asm/hmi.h>
16 19
17/* SRR1 bits for machine check on POWER7 */ 20/* SRR1 bits for machine check on POWER7 */
18#define SRR1_MC_LDSTERR (1ul << (63-42)) 21#define SRR1_MC_LDSTERR (1ul << (63-42))
@@ -140,3 +143,176 @@ long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
140{ 143{
141 return kvmppc_realmode_mc_power7(vcpu); 144 return kvmppc_realmode_mc_power7(vcpu);
142} 145}
146
147/* Check if dynamic split is in force and return subcore size accordingly. */
148static inline int kvmppc_cur_subcore_size(void)
149{
150 if (local_paca->kvm_hstate.kvm_split_mode)
151 return local_paca->kvm_hstate.kvm_split_mode->subcore_size;
152
153 return threads_per_subcore;
154}
155
156void kvmppc_subcore_enter_guest(void)
157{
158 int thread_id, subcore_id;
159
160 thread_id = cpu_thread_in_core(local_paca->paca_index);
161 subcore_id = thread_id / kvmppc_cur_subcore_size();
162
163 local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
164}
165
166void kvmppc_subcore_exit_guest(void)
167{
168 int thread_id, subcore_id;
169
170 thread_id = cpu_thread_in_core(local_paca->paca_index);
171 subcore_id = thread_id / kvmppc_cur_subcore_size();
172
173 local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
174}
175
176static bool kvmppc_tb_resync_required(void)
177{
178 if (test_and_set_bit(CORE_TB_RESYNC_REQ_BIT,
179 &local_paca->sibling_subcore_state->flags))
180 return false;
181
182 return true;
183}
184
185static void kvmppc_tb_resync_done(void)
186{
187 clear_bit(CORE_TB_RESYNC_REQ_BIT,
188 &local_paca->sibling_subcore_state->flags);
189}
190
191/*
192 * kvmppc_realmode_hmi_handler() is called only by primary thread during
193 * guest exit path.
194 *
195 * There are multiple reasons why HMI could occur, one of them is
196 * Timebase (TB) error. If this HMI is due to TB error, then TB would
197 * have been in stopped state. The opal hmi handler Will fix it and
198 * restore the TB value with host timebase value. For HMI caused due
199 * to non-TB errors, opal hmi handler will not touch/restore TB register
200 * and hence there won't be any change in TB value.
201 *
202 * Since we are not sure about the cause of this HMI, we can't be sure
203 * about the content of TB register whether it holds guest or host timebase
204 * value. Hence the idea is to resync the TB on every HMI, so that we
205 * know about the exact state of the TB value. Resync TB call will
206 * restore TB to host timebase.
207 *
208 * Things to consider:
209 * - On TB error, HMI interrupt is reported on all the threads of the core
210 * that has encountered TB error irrespective of split-core mode.
211 * - The very first thread on the core that get chance to fix TB error
212 * would rsync the TB with local chipTOD value.
213 * - The resync TB is a core level action i.e. it will sync all the TBs
214 * in that core independent of split-core mode. This means if we trigger
215 * TB sync from a thread from one subcore, it would affect TB values of
216 * sibling subcores of the same core.
217 *
218 * All threads need to co-ordinate before making opal hmi handler.
219 * All threads will use sibling_subcore_state->in_guest[] (shared by all
220 * threads in the core) in paca which holds information about whether
221 * sibling subcores are in Guest mode or host mode. The in_guest[] array
222 * is of size MAX_SUBCORE_PER_CORE=4, indexed using subcore id to set/unset
223 * subcore status. Only primary threads from each subcore is responsible
224 * to set/unset its designated array element while entering/exiting the
225 * guset.
226 *
227 * After invoking opal hmi handler call, one of the thread (of entire core)
228 * will need to resync the TB. Bit 63 from subcore state bitmap flags
229 * (sibling_subcore_state->flags) will be used to co-ordinate between
230 * primary threads to decide who takes up the responsibility.
231 *
232 * This is what we do:
233 * - Primary thread from each subcore tries to set resync required bit[63]
234 * of paca->sibling_subcore_state->flags.
235 * - The first primary thread that is able to set the flag takes the
236 * responsibility of TB resync. (Let us call it as thread leader)
237 * - All other threads which are in host will call
238 * wait_for_subcore_guest_exit() and wait for in_guest[0-3] from
239 * paca->sibling_subcore_state to get cleared.
240 * - All the primary thread will clear its subcore status from subcore
241 * state in_guest[] array respectively.
242 * - Once all primary threads clear in_guest[0-3], all of them will invoke
243 * opal hmi handler.
244 * - Now all threads will wait for TB resync to complete by invoking
245 * wait_for_tb_resync() except the thread leader.
246 * - Thread leader will do a TB resync by invoking opal_resync_timebase()
247 * call and the it will clear the resync required bit.
248 * - All other threads will now come out of resync wait loop and proceed
249 * with individual execution.
250 * - On return of this function, primary thread will signal all
251 * secondary threads to proceed.
252 * - All secondary threads will eventually call opal hmi handler on
253 * their exit path.
254 */
255
256long kvmppc_realmode_hmi_handler(void)
257{
258 int ptid = local_paca->kvm_hstate.ptid;
259 bool resync_req;
260
261 /* This is only called on primary thread. */
262 BUG_ON(ptid != 0);
263 __this_cpu_inc(irq_stat.hmi_exceptions);
264
265 /*
266 * By now primary thread has already completed guest->host
267 * partition switch but haven't signaled secondaries yet.
268 * All the secondary threads on this subcore is waiting
269 * for primary thread to signal them to go ahead.
270 *
271 * For threads from subcore which isn't in guest, they all will
272 * wait until all other subcores on this core exit the guest.
273 *
274 * Now set the resync required bit. If you are the first to
275 * set this bit then kvmppc_tb_resync_required() function will
276 * return true. For rest all other subcores
277 * kvmppc_tb_resync_required() will return false.
278 *
279 * If resync_req == true, then this thread is responsible to
280 * initiate TB resync after hmi handler has completed.
281 * All other threads on this core will wait until this thread
282 * clears the resync required bit flag.
283 */
284 resync_req = kvmppc_tb_resync_required();
285
286 /* Reset the subcore status to indicate it has exited guest */
287 kvmppc_subcore_exit_guest();
288
289 /*
290 * Wait for other subcores on this core to exit the guest.
291 * All the primary threads and threads from subcore that are
292 * not in guest will wait here until all subcores are out
293 * of guest context.
294 */
295 wait_for_subcore_guest_exit();
296
297 /*
298 * At this point we are sure that primary threads from each
299 * subcore on this core have completed guest->host partition
300 * switch. Now it is safe to call HMI handler.
301 */
302 if (ppc_md.hmi_exception_early)
303 ppc_md.hmi_exception_early(NULL);
304
305 /*
306 * Check if this thread is responsible to resync TB.
307 * All other threads will wait until this thread completes the
308 * TB resync.
309 */
310 if (resync_req) {
311 opal_resync_timebase();
312 /* Reset TB resync req bit */
313 kvmppc_tb_resync_done();
314 } else {
315 wait_for_tb_resync();
316 }
317 return 0;
318}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 86f0cae37a85..975655573844 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -29,6 +29,7 @@
29#include <asm/kvm_book3s_asm.h> 29#include <asm/kvm_book3s_asm.h>
30#include <asm/book3s/64/mmu-hash.h> 30#include <asm/book3s/64/mmu-hash.h>
31#include <asm/tm.h> 31#include <asm/tm.h>
32#include <asm/opal.h>
32 33
33#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) 34#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
34 35
@@ -373,6 +374,18 @@ kvm_secondary_got_guest:
373 lwsync 374 lwsync
374 std r0, HSTATE_KVM_VCORE(r13) 375 std r0, HSTATE_KVM_VCORE(r13)
375 376
377 /*
378 * All secondaries exiting guest will fall through this path.
379 * Before proceeding, just check for HMI interrupt and
380 * invoke opal hmi handler. By now we are sure that the
381 * primary thread on this core/subcore has already made partition
382 * switch/TB resync and we are good to call opal hmi handler.
383 */
384 cmpwi r12, BOOK3S_INTERRUPT_HMI
385 bne kvm_no_guest
386
387 li r3,0 /* NULL argument */
388 bl hmi_exception_realmode
376/* 389/*
377 * At this point we have finished executing in the guest. 390 * At this point we have finished executing in the guest.
378 * We need to wait for hwthread_req to become zero, since 391 * We need to wait for hwthread_req to become zero, since
@@ -428,6 +441,22 @@ kvm_no_guest:
428 */ 441 */
429kvm_unsplit_nap: 442kvm_unsplit_nap:
430 /* 443 /*
444 * When secondaries are napping in kvm_unsplit_nap() with
445 * hwthread_req = 1, HMI goes ignored even though subcores are
446 * already exited the guest. Hence HMI keeps waking up secondaries
447 * from nap in a loop and secondaries always go back to nap since
448 * no vcore is assigned to them. This makes impossible for primary
449 * thread to get hold of secondary threads resulting into a soft
450 * lockup in KVM path.
451 *
452 * Let us check if HMI is pending and handle it before we go to nap.
453 */
454 cmpwi r12, BOOK3S_INTERRUPT_HMI
455 bne 55f
456 li r3, 0 /* NULL argument */
457 bl hmi_exception_realmode
45855:
459 /*
431 * Ensure that secondary doesn't nap when it has 460 * Ensure that secondary doesn't nap when it has
432 * its vcore pointer set. 461 * its vcore pointer set.
433 */ 462 */
@@ -601,6 +630,11 @@ BEGIN_FTR_SECTION
601 mtspr SPRN_DPDES, r8 630 mtspr SPRN_DPDES, r8
602END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 631END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
603 632
633 /* Mark the subcore state as inside guest */
634 bl kvmppc_subcore_enter_guest
635 nop
636 ld r5, HSTATE_KVM_VCORE(r13)
637 ld r4, HSTATE_KVM_VCPU(r13)
604 li r0,1 638 li r0,1
605 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 639 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
606 640
@@ -655,112 +689,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
655 689
656#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 690#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
657BEGIN_FTR_SECTION 691BEGIN_FTR_SECTION
658 b skip_tm 692 bl kvmppc_restore_tm
659END_FTR_SECTION_IFCLR(CPU_FTR_TM) 693END_FTR_SECTION_IFSET(CPU_FTR_TM)
660
661 /* Turn on TM/FP/VSX/VMX so we can restore them. */
662 mfmsr r5
663 li r6, MSR_TM >> 32
664 sldi r6, r6, 32
665 or r5, r5, r6
666 ori r5, r5, MSR_FP
667 oris r5, r5, (MSR_VEC | MSR_VSX)@h
668 mtmsrd r5
669
670 /*
671 * The user may change these outside of a transaction, so they must
672 * always be context switched.
673 */
674 ld r5, VCPU_TFHAR(r4)
675 ld r6, VCPU_TFIAR(r4)
676 ld r7, VCPU_TEXASR(r4)
677 mtspr SPRN_TFHAR, r5
678 mtspr SPRN_TFIAR, r6
679 mtspr SPRN_TEXASR, r7
680
681 ld r5, VCPU_MSR(r4)
682 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
683 beq skip_tm /* TM not active in guest */
684
685 /* Make sure the failure summary is set, otherwise we'll program check
686 * when we trechkpt. It's possible that this might have been not set
687 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
688 * host.
689 */
690 oris r7, r7, (TEXASR_FS)@h
691 mtspr SPRN_TEXASR, r7
692
693 /*
694 * We need to load up the checkpointed state for the guest.
695 * We need to do this early as it will blow away any GPRs, VSRs and
696 * some SPRs.
697 */
698
699 mr r31, r4
700 addi r3, r31, VCPU_FPRS_TM
701 bl load_fp_state
702 addi r3, r31, VCPU_VRS_TM
703 bl load_vr_state
704 mr r4, r31
705 lwz r7, VCPU_VRSAVE_TM(r4)
706 mtspr SPRN_VRSAVE, r7
707
708 ld r5, VCPU_LR_TM(r4)
709 lwz r6, VCPU_CR_TM(r4)
710 ld r7, VCPU_CTR_TM(r4)
711 ld r8, VCPU_AMR_TM(r4)
712 ld r9, VCPU_TAR_TM(r4)
713 mtlr r5
714 mtcr r6
715 mtctr r7
716 mtspr SPRN_AMR, r8
717 mtspr SPRN_TAR, r9
718
719 /*
720 * Load up PPR and DSCR values but don't put them in the actual SPRs
721 * till the last moment to avoid running with userspace PPR and DSCR for
722 * too long.
723 */
724 ld r29, VCPU_DSCR_TM(r4)
725 ld r30, VCPU_PPR_TM(r4)
726
727 std r2, PACATMSCRATCH(r13) /* Save TOC */
728
729 /* Clear the MSR RI since r1, r13 are all going to be foobar. */
730 li r5, 0
731 mtmsrd r5, 1
732
733 /* Load GPRs r0-r28 */
734 reg = 0
735 .rept 29
736 ld reg, VCPU_GPRS_TM(reg)(r31)
737 reg = reg + 1
738 .endr
739
740 mtspr SPRN_DSCR, r29
741 mtspr SPRN_PPR, r30
742
743 /* Load final GPRs */
744 ld 29, VCPU_GPRS_TM(29)(r31)
745 ld 30, VCPU_GPRS_TM(30)(r31)
746 ld 31, VCPU_GPRS_TM(31)(r31)
747
748 /* TM checkpointed state is now setup. All GPRs are now volatile. */
749 TRECHKPT
750
751 /* Now let's get back the state we need. */
752 HMT_MEDIUM
753 GET_PACA(r13)
754 ld r29, HSTATE_DSCR(r13)
755 mtspr SPRN_DSCR, r29
756 ld r4, HSTATE_KVM_VCPU(r13)
757 ld r1, HSTATE_HOST_R1(r13)
758 ld r2, PACATMSCRATCH(r13)
759
760 /* Set the MSR RI since we have our registers back. */
761 li r5, MSR_RI
762 mtmsrd r5, 1
763skip_tm:
764#endif 694#endif
765 695
766 /* Load guest PMU registers */ 696 /* Load guest PMU registers */
@@ -841,12 +771,6 @@ BEGIN_FTR_SECTION
841 /* Skip next section on POWER7 */ 771 /* Skip next section on POWER7 */
842 b 8f 772 b 8f
843END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 773END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
844 /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
845 mfmsr r8
846 li r0, 1
847 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
848 mtmsrd r8
849
850 /* Load up POWER8-specific registers */ 774 /* Load up POWER8-specific registers */
851 ld r5, VCPU_IAMR(r4) 775 ld r5, VCPU_IAMR(r4)
852 lwz r6, VCPU_PSPB(r4) 776 lwz r6, VCPU_PSPB(r4)
@@ -1436,106 +1360,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1436 1360
1437#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1361#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1438BEGIN_FTR_SECTION 1362BEGIN_FTR_SECTION
1439 b 2f 1363 bl kvmppc_save_tm
1440END_FTR_SECTION_IFCLR(CPU_FTR_TM) 1364END_FTR_SECTION_IFSET(CPU_FTR_TM)
1441 /* Turn on TM. */
1442 mfmsr r8
1443 li r0, 1
1444 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
1445 mtmsrd r8
1446
1447 ld r5, VCPU_MSR(r9)
1448 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
1449 beq 1f /* TM not active in guest. */
1450
1451 li r3, TM_CAUSE_KVM_RESCHED
1452
1453 /* Clear the MSR RI since r1, r13 are all going to be foobar. */
1454 li r5, 0
1455 mtmsrd r5, 1
1456
1457 /* All GPRs are volatile at this point. */
1458 TRECLAIM(R3)
1459
1460 /* Temporarily store r13 and r9 so we have some regs to play with */
1461 SET_SCRATCH0(r13)
1462 GET_PACA(r13)
1463 std r9, PACATMSCRATCH(r13)
1464 ld r9, HSTATE_KVM_VCPU(r13)
1465
1466 /* Get a few more GPRs free. */
1467 std r29, VCPU_GPRS_TM(29)(r9)
1468 std r30, VCPU_GPRS_TM(30)(r9)
1469 std r31, VCPU_GPRS_TM(31)(r9)
1470
1471 /* Save away PPR and DSCR soon so don't run with user values. */
1472 mfspr r31, SPRN_PPR
1473 HMT_MEDIUM
1474 mfspr r30, SPRN_DSCR
1475 ld r29, HSTATE_DSCR(r13)
1476 mtspr SPRN_DSCR, r29
1477
1478 /* Save all but r9, r13 & r29-r31 */
1479 reg = 0
1480 .rept 29
1481 .if (reg != 9) && (reg != 13)
1482 std reg, VCPU_GPRS_TM(reg)(r9)
1483 .endif
1484 reg = reg + 1
1485 .endr
1486 /* ... now save r13 */
1487 GET_SCRATCH0(r4)
1488 std r4, VCPU_GPRS_TM(13)(r9)
1489 /* ... and save r9 */
1490 ld r4, PACATMSCRATCH(r13)
1491 std r4, VCPU_GPRS_TM(9)(r9)
1492
1493 /* Reload stack pointer and TOC. */
1494 ld r1, HSTATE_HOST_R1(r13)
1495 ld r2, PACATOC(r13)
1496
1497 /* Set MSR RI now we have r1 and r13 back. */
1498 li r5, MSR_RI
1499 mtmsrd r5, 1
1500
1501 /* Save away checkpinted SPRs. */
1502 std r31, VCPU_PPR_TM(r9)
1503 std r30, VCPU_DSCR_TM(r9)
1504 mflr r5
1505 mfcr r6
1506 mfctr r7
1507 mfspr r8, SPRN_AMR
1508 mfspr r10, SPRN_TAR
1509 std r5, VCPU_LR_TM(r9)
1510 stw r6, VCPU_CR_TM(r9)
1511 std r7, VCPU_CTR_TM(r9)
1512 std r8, VCPU_AMR_TM(r9)
1513 std r10, VCPU_TAR_TM(r9)
1514
1515 /* Restore r12 as trap number. */
1516 lwz r12, VCPU_TRAP(r9)
1517
1518 /* Save FP/VSX. */
1519 addi r3, r9, VCPU_FPRS_TM
1520 bl store_fp_state
1521 addi r3, r9, VCPU_VRS_TM
1522 bl store_vr_state
1523 mfspr r6, SPRN_VRSAVE
1524 stw r6, VCPU_VRSAVE_TM(r9)
15251:
1526 /*
1527 * We need to save these SPRs after the treclaim so that the software
1528 * error code is recorded correctly in the TEXASR. Also the user may
1529 * change these outside of a transaction, so they must always be
1530 * context switched.
1531 */
1532 mfspr r5, SPRN_TFHAR
1533 mfspr r6, SPRN_TFIAR
1534 mfspr r7, SPRN_TEXASR
1535 std r5, VCPU_TFHAR(r9)
1536 std r6, VCPU_TFIAR(r9)
1537 std r7, VCPU_TEXASR(r9)
15382:
1539#endif 1365#endif
1540 1366
1541 /* Increment yield count if they have a VPA */ 1367 /* Increment yield count if they have a VPA */
@@ -1683,6 +1509,23 @@ BEGIN_FTR_SECTION
1683 mtspr SPRN_DPDES, r8 1509 mtspr SPRN_DPDES, r8
1684END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1510END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1685 1511
1512 /* If HMI, call kvmppc_realmode_hmi_handler() */
1513 cmpwi r12, BOOK3S_INTERRUPT_HMI
1514 bne 27f
1515 bl kvmppc_realmode_hmi_handler
1516 nop
1517 li r12, BOOK3S_INTERRUPT_HMI
1518 /*
1519 * At this point kvmppc_realmode_hmi_handler would have resync-ed
1520 * the TB. Hence it is not required to subtract guest timebase
1521 * offset from timebase. So, skip it.
1522 *
1523 * Also, do not call kvmppc_subcore_exit_guest() because it has
1524 * been invoked as part of kvmppc_realmode_hmi_handler().
1525 */
1526 b 30f
1527
152827:
1686 /* Subtract timebase offset from timebase */ 1529 /* Subtract timebase offset from timebase */
1687 ld r8,VCORE_TB_OFFSET(r5) 1530 ld r8,VCORE_TB_OFFSET(r5)
1688 cmpdi r8,0 1531 cmpdi r8,0
@@ -1698,8 +1541,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1698 addis r8,r8,0x100 /* if so, increment upper 40 bits */ 1541 addis r8,r8,0x100 /* if so, increment upper 40 bits */
1699 mtspr SPRN_TBU40,r8 1542 mtspr SPRN_TBU40,r8
1700 1543
154417: bl kvmppc_subcore_exit_guest
1545 nop
154630: ld r5,HSTATE_KVM_VCORE(r13)
1547 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
1548
1701 /* Reset PCR */ 1549 /* Reset PCR */
170217: ld r0, VCORE_PCR(r5) 1550 ld r0, VCORE_PCR(r5)
1703 cmpdi r0, 0 1551 cmpdi r0, 0
1704 beq 18f 1552 beq 18f
1705 li r0, 0 1553 li r0, 0
@@ -2245,6 +2093,13 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
2245 /* save FP state */ 2093 /* save FP state */
2246 bl kvmppc_save_fp 2094 bl kvmppc_save_fp
2247 2095
2096#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2097BEGIN_FTR_SECTION
2098 ld r9, HSTATE_KVM_VCPU(r13)
2099 bl kvmppc_save_tm
2100END_FTR_SECTION_IFSET(CPU_FTR_TM)
2101#endif
2102
2248 /* 2103 /*
2249 * Set DEC to the smaller of DEC and HDEC, so that we wake 2104 * Set DEC to the smaller of DEC and HDEC, so that we wake
2250 * no later than the end of our timeslice (HDEC interrupts 2105 * no later than the end of our timeslice (HDEC interrupts
@@ -2321,6 +2176,12 @@ kvm_end_cede:
2321 bl kvmhv_accumulate_time 2176 bl kvmhv_accumulate_time
2322#endif 2177#endif
2323 2178
2179#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2180BEGIN_FTR_SECTION
2181 bl kvmppc_restore_tm
2182END_FTR_SECTION_IFSET(CPU_FTR_TM)
2183#endif
2184
2324 /* load up FP state */ 2185 /* load up FP state */
2325 bl kvmppc_load_fp 2186 bl kvmppc_load_fp
2326 2187
@@ -2461,6 +2322,8 @@ BEGIN_FTR_SECTION
2461 cmpwi r6, 3 /* hypervisor doorbell? */ 2322 cmpwi r6, 3 /* hypervisor doorbell? */
2462 beq 3f 2323 beq 3f
2463END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2324END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2325 cmpwi r6, 0xa /* Hypervisor maintenance ? */
2326 beq 4f
2464 li r3, 1 /* anything else, return 1 */ 2327 li r3, 1 /* anything else, return 1 */
24650: blr 23280: blr
2466 2329
@@ -2482,6 +2345,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2482 li r3, -1 2345 li r3, -1
2483 blr 2346 blr
2484 2347
2348 /* Woken up due to Hypervisor maintenance interrupt */
23494: li r12, BOOK3S_INTERRUPT_HMI
2350 li r3, 1
2351 blr
2352
2485/* 2353/*
2486 * Determine what sort of external interrupt is pending (if any). 2354 * Determine what sort of external interrupt is pending (if any).
2487 * Returns: 2355 * Returns:
@@ -2631,6 +2499,239 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
2631 mr r4,r31 2499 mr r4,r31
2632 blr 2500 blr
2633 2501
2502#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2503/*
2504 * Save transactional state and TM-related registers.
2505 * Called with r9 pointing to the vcpu struct.
2506 * This can modify all checkpointed registers, but
2507 * restores r1, r2 and r9 (vcpu pointer) before exit.
2508 */
2509kvmppc_save_tm:
2510 mflr r0
2511 std r0, PPC_LR_STKOFF(r1)
2512
2513 /* Turn on TM. */
2514 mfmsr r8
2515 li r0, 1
2516 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
2517 mtmsrd r8
2518
2519 ld r5, VCPU_MSR(r9)
2520 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
2521 beq 1f /* TM not active in guest. */
2522
2523 std r1, HSTATE_HOST_R1(r13)
2524 li r3, TM_CAUSE_KVM_RESCHED
2525
2526 /* Clear the MSR RI since r1, r13 are all going to be foobar. */
2527 li r5, 0
2528 mtmsrd r5, 1
2529
2530 /* All GPRs are volatile at this point. */
2531 TRECLAIM(R3)
2532
2533 /* Temporarily store r13 and r9 so we have some regs to play with */
2534 SET_SCRATCH0(r13)
2535 GET_PACA(r13)
2536 std r9, PACATMSCRATCH(r13)
2537 ld r9, HSTATE_KVM_VCPU(r13)
2538
2539 /* Get a few more GPRs free. */
2540 std r29, VCPU_GPRS_TM(29)(r9)
2541 std r30, VCPU_GPRS_TM(30)(r9)
2542 std r31, VCPU_GPRS_TM(31)(r9)
2543
2544 /* Save away PPR and DSCR soon so don't run with user values. */
2545 mfspr r31, SPRN_PPR
2546 HMT_MEDIUM
2547 mfspr r30, SPRN_DSCR
2548 ld r29, HSTATE_DSCR(r13)
2549 mtspr SPRN_DSCR, r29
2550
2551 /* Save all but r9, r13 & r29-r31 */
2552 reg = 0
2553 .rept 29
2554 .if (reg != 9) && (reg != 13)
2555 std reg, VCPU_GPRS_TM(reg)(r9)
2556 .endif
2557 reg = reg + 1
2558 .endr
2559 /* ... now save r13 */
2560 GET_SCRATCH0(r4)
2561 std r4, VCPU_GPRS_TM(13)(r9)
2562 /* ... and save r9 */
2563 ld r4, PACATMSCRATCH(r13)
2564 std r4, VCPU_GPRS_TM(9)(r9)
2565
2566 /* Reload stack pointer and TOC. */
2567 ld r1, HSTATE_HOST_R1(r13)
2568 ld r2, PACATOC(r13)
2569
2570 /* Set MSR RI now we have r1 and r13 back. */
2571 li r5, MSR_RI
2572 mtmsrd r5, 1
2573
2574 /* Save away checkpinted SPRs. */
2575 std r31, VCPU_PPR_TM(r9)
2576 std r30, VCPU_DSCR_TM(r9)
2577 mflr r5
2578 mfcr r6
2579 mfctr r7
2580 mfspr r8, SPRN_AMR
2581 mfspr r10, SPRN_TAR
2582 std r5, VCPU_LR_TM(r9)
2583 stw r6, VCPU_CR_TM(r9)
2584 std r7, VCPU_CTR_TM(r9)
2585 std r8, VCPU_AMR_TM(r9)
2586 std r10, VCPU_TAR_TM(r9)
2587
2588 /* Restore r12 as trap number. */
2589 lwz r12, VCPU_TRAP(r9)
2590
2591 /* Save FP/VSX. */
2592 addi r3, r9, VCPU_FPRS_TM
2593 bl store_fp_state
2594 addi r3, r9, VCPU_VRS_TM
2595 bl store_vr_state
2596 mfspr r6, SPRN_VRSAVE
2597 stw r6, VCPU_VRSAVE_TM(r9)
25981:
2599 /*
2600 * We need to save these SPRs after the treclaim so that the software
2601 * error code is recorded correctly in the TEXASR. Also the user may
2602 * change these outside of a transaction, so they must always be
2603 * context switched.
2604 */
2605 mfspr r5, SPRN_TFHAR
2606 mfspr r6, SPRN_TFIAR
2607 mfspr r7, SPRN_TEXASR
2608 std r5, VCPU_TFHAR(r9)
2609 std r6, VCPU_TFIAR(r9)
2610 std r7, VCPU_TEXASR(r9)
2611
2612 ld r0, PPC_LR_STKOFF(r1)
2613 mtlr r0
2614 blr
2615
2616/*
2617 * Restore transactional state and TM-related registers.
2618 * Called with r4 pointing to the vcpu struct.
2619 * This potentially modifies all checkpointed registers.
2620 * It restores r1, r2, r4 from the PACA.
2621 */
2622kvmppc_restore_tm:
2623 mflr r0
2624 std r0, PPC_LR_STKOFF(r1)
2625
2626 /* Turn on TM/FP/VSX/VMX so we can restore them. */
2627 mfmsr r5
2628 li r6, MSR_TM >> 32
2629 sldi r6, r6, 32
2630 or r5, r5, r6
2631 ori r5, r5, MSR_FP
2632 oris r5, r5, (MSR_VEC | MSR_VSX)@h
2633 mtmsrd r5
2634
2635 /*
2636 * The user may change these outside of a transaction, so they must
2637 * always be context switched.
2638 */
2639 ld r5, VCPU_TFHAR(r4)
2640 ld r6, VCPU_TFIAR(r4)
2641 ld r7, VCPU_TEXASR(r4)
2642 mtspr SPRN_TFHAR, r5
2643 mtspr SPRN_TFIAR, r6
2644 mtspr SPRN_TEXASR, r7
2645
2646 ld r5, VCPU_MSR(r4)
2647 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
2648 beqlr /* TM not active in guest */
2649 std r1, HSTATE_HOST_R1(r13)
2650
2651 /* Make sure the failure summary is set, otherwise we'll program check
2652 * when we trechkpt. It's possible that this might have been not set
2653 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
2654 * host.
2655 */
2656 oris r7, r7, (TEXASR_FS)@h
2657 mtspr SPRN_TEXASR, r7
2658
2659 /*
2660 * We need to load up the checkpointed state for the guest.
2661 * We need to do this early as it will blow away any GPRs, VSRs and
2662 * some SPRs.
2663 */
2664
2665 mr r31, r4
2666 addi r3, r31, VCPU_FPRS_TM
2667 bl load_fp_state
2668 addi r3, r31, VCPU_VRS_TM
2669 bl load_vr_state
2670 mr r4, r31
2671 lwz r7, VCPU_VRSAVE_TM(r4)
2672 mtspr SPRN_VRSAVE, r7
2673
2674 ld r5, VCPU_LR_TM(r4)
2675 lwz r6, VCPU_CR_TM(r4)
2676 ld r7, VCPU_CTR_TM(r4)
2677 ld r8, VCPU_AMR_TM(r4)
2678 ld r9, VCPU_TAR_TM(r4)
2679 mtlr r5
2680 mtcr r6
2681 mtctr r7
2682 mtspr SPRN_AMR, r8
2683 mtspr SPRN_TAR, r9
2684
2685 /*
2686 * Load up PPR and DSCR values but don't put them in the actual SPRs
2687 * till the last moment to avoid running with userspace PPR and DSCR for
2688 * too long.
2689 */
2690 ld r29, VCPU_DSCR_TM(r4)
2691 ld r30, VCPU_PPR_TM(r4)
2692
2693 std r2, PACATMSCRATCH(r13) /* Save TOC */
2694
2695 /* Clear the MSR RI since r1, r13 are all going to be foobar. */
2696 li r5, 0
2697 mtmsrd r5, 1
2698
2699 /* Load GPRs r0-r28 */
2700 reg = 0
2701 .rept 29
2702 ld reg, VCPU_GPRS_TM(reg)(r31)
2703 reg = reg + 1
2704 .endr
2705
2706 mtspr SPRN_DSCR, r29
2707 mtspr SPRN_PPR, r30
2708
2709 /* Load final GPRs */
2710 ld 29, VCPU_GPRS_TM(29)(r31)
2711 ld 30, VCPU_GPRS_TM(30)(r31)
2712 ld 31, VCPU_GPRS_TM(31)(r31)
2713
2714 /* TM checkpointed state is now setup. All GPRs are now volatile. */
2715 TRECHKPT
2716
2717 /* Now let's get back the state we need. */
2718 HMT_MEDIUM
2719 GET_PACA(r13)
2720 ld r29, HSTATE_DSCR(r13)
2721 mtspr SPRN_DSCR, r29
2722 ld r4, HSTATE_KVM_VCPU(r13)
2723 ld r1, HSTATE_HOST_R1(r13)
2724 ld r2, PACATMSCRATCH(r13)
2725
2726 /* Set the MSR RI since we have our registers back. */
2727 li r5, MSR_RI
2728 mtmsrd r5, 1
2729
2730 ld r0, PPC_LR_STKOFF(r1)
2731 mtlr r0
2732 blr
2733#endif
2734
2634/* 2735/*
2635 * We come here if we get any exception or interrupt while we are 2736 * We come here if we get any exception or interrupt while we are
2636 * executing host real mode code while in guest MMU context. 2737 * executing host real mode code while in guest MMU context.
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c4f7d6b86b9e..e76f79a45988 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -914,7 +914,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
914 /* We get here with MSR.EE=1 */ 914 /* We get here with MSR.EE=1 */
915 915
916 trace_kvm_exit(exit_nr, vcpu); 916 trace_kvm_exit(exit_nr, vcpu);
917 kvm_guest_exit(); 917 guest_exit();
918 918
919 switch (exit_nr) { 919 switch (exit_nr) {
920 case BOOK3S_INTERRUPT_INST_STORAGE: 920 case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -1049,7 +1049,17 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
1049 int emul; 1049 int emul;
1050 1050
1051program_interrupt: 1051program_interrupt:
1052 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 1052 /*
1053 * shadow_srr1 only contains valid flags if we came here via
1054 * a program exception. The other exceptions (emulation assist,
1055 * FP unavailable, etc.) do not provide flags in SRR1, so use
1056 * an illegal-instruction exception when injecting a program
1057 * interrupt into the guest.
1058 */
1059 if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
1060 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
1061 else
1062 flags = SRR1_PROGILL;
1053 1063
1054 emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); 1064 emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
1055 if (emul != EMULATE_DONE) { 1065 if (emul != EMULATE_DONE) {
@@ -1531,7 +1541,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1531 1541
1532 kvmppc_clear_debug(vcpu); 1542 kvmppc_clear_debug(vcpu);
1533 1543
1534 /* No need for kvm_guest_exit. It's done in handle_exit. 1544 /* No need for guest_exit. It's done in handle_exit.
1535 We also get here with interrupts enabled. */ 1545 We also get here with interrupts enabled. */
1536 1546
1537 /* Make sure we save the guest FPU/Altivec/VSX state */ 1547 /* Make sure we save the guest FPU/Altivec/VSX state */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4afae695899a..02b4672f7347 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -776,7 +776,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
776 776
777 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 777 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
778 778
779 /* No need for kvm_guest_exit. It's done in handle_exit. 779 /* No need for guest_exit. It's done in handle_exit.
780 We also get here with interrupts enabled. */ 780 We also get here with interrupts enabled. */
781 781
782 /* Switch back to user space debug context */ 782 /* Switch back to user space debug context */
@@ -1012,7 +1012,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1012 } 1012 }
1013 1013
1014 trace_kvm_exit(exit_nr, vcpu); 1014 trace_kvm_exit(exit_nr, vcpu);
1015 __kvm_guest_exit(); 1015 guest_exit_irqoff();
1016 1016
1017 local_irq_enable(); 1017 local_irq_enable();
1018 1018
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 5cc2e7af3a7b..b379146de55b 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -302,7 +302,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
302 advance = 0; 302 advance = 0;
303 printk(KERN_ERR "Couldn't emulate instruction 0x%08x " 303 printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
304 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); 304 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
305 kvmppc_core_queue_program(vcpu, 0);
306 } 305 }
307 } 306 }
308 307
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 6249cdc834d1..ed38f8114118 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1823,7 +1823,8 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
1823 return 0; 1823 return 0;
1824} 1824}
1825 1825
1826int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, 1826int kvm_set_routing_entry(struct kvm *kvm,
1827 struct kvm_kernel_irq_routing_entry *e,
1827 const struct kvm_irq_routing_entry *ue) 1828 const struct kvm_irq_routing_entry *ue)
1828{ 1829{
1829 int r = -EINVAL; 1830 int r = -EINVAL;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 02416fea7653..6ce40dd6fe51 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -119,7 +119,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
119 continue; 119 continue;
120 } 120 }
121 121
122 __kvm_guest_enter(); 122 guest_enter_irqoff();
123 return 1; 123 return 1;
124 } 124 }
125 125
@@ -588,6 +588,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
588 r = 1; 588 r = 1;
589 break; 589 break;
590#endif 590#endif
591 case KVM_CAP_PPC_HTM:
592 r = cpu_has_feature(CPU_FTR_TM_COMP) &&
593 is_kvmppc_hv_enabled(kvm);
594 break;
591 default: 595 default:
592 r = 0; 596 r = 0;
593 break; 597 break;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index cf928bba4d9a..3d29d40eb0e9 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -64,7 +64,6 @@ END_FTR_SECTION(0, 1); \
64 OPAL_BRANCH(opal_tracepoint_entry) \ 64 OPAL_BRANCH(opal_tracepoint_entry) \
65 mfcr r12; \ 65 mfcr r12; \
66 stw r12,8(r1); \ 66 stw r12,8(r1); \
67 std r1,PACAR1(r13); \
68 li r11,0; \ 67 li r11,0; \
69 mfmsr r12; \ 68 mfmsr r12; \
70 ori r11,r11,MSR_EE; \ 69 ori r11,r11,MSR_EE; \
@@ -127,7 +126,6 @@ opal_tracepoint_entry:
127 mfcr r12 126 mfcr r12
128 std r11,16(r1) 127 std r11,16(r1)
129 stw r12,8(r1) 128 stw r12,8(r1)
130 std r1,PACAR1(r13)
131 li r11,0 129 li r11,0
132 mfmsr r12 130 mfmsr r12
133 ori r11,r11,MSR_EE 131 ori r11,r11,MSR_EE