aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2018-02-01 10:13:07 -0500
committerRadim Krčmář <rkrcmar@redhat.com>2018-02-01 10:13:07 -0500
commitd2b9b2079e23c1ab80ce1d7670d5e1994468a881 (patch)
treebd9bfb74343da003b7bac0569d0a7f8025cbaef4
parent7bf14c28ee776be567855bd39ed8ff795ea19f55 (diff)
parent9b9b13a6d1537ddc4caccd6f1c41b78edbc08437 (diff)
Merge tag 'kvm-ppc-next-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
PPC KVM update for 4.16 - Allow HPT guests to run on a radix host on POWER9 v2.2 CPUs without requiring the complex thread synchronization that earlier CPU versions required. - A series from Ben Herrenschmidt to improve the handling of escalation interrupts with the XIVE interrupt controller. - Provide for the decrementer register to be copied across on migration. - Various minor cleanups and bugfixes.
-rw-r--r--Documentation/virtual/kvm/api.txt1
-rw-r--r--arch/powerpc/include/asm/hmi.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h14
-rw-r--r--arch/powerpc/include/asm/kvm_host.h6
-rw-r--r--arch/powerpc/include/asm/opal-api.h1
-rw-r--r--arch/powerpc/include/asm/reg.h5
-rw-r--r--arch/powerpc/include/asm/xive-regs.h35
-rw-r--r--arch/powerpc/include/asm/xive.h41
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/mce.c142
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c54
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S237
-rw-r--r--arch/powerpc/kvm/book3s_xive.c109
-rw-r--r--arch/powerpc/kvm/book3s_xive.h15
-rw-r--r--arch/powerpc/kvm/powerpc.c14
-rw-r--r--arch/powerpc/kvm/timing.c3
-rw-r--r--arch/powerpc/sysdev/xive/common.c3
-rw-r--r--arch/powerpc/sysdev/xive/native.c18
21 files changed, 499 insertions, 219 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 70d3368adba9..792fa8717d13 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1841,6 +1841,7 @@ registers, find a list below:
1841 PPC | KVM_REG_PPC_DBSR | 32 1841 PPC | KVM_REG_PPC_DBSR | 32
1842 PPC | KVM_REG_PPC_TIDR | 64 1842 PPC | KVM_REG_PPC_TIDR | 64
1843 PPC | KVM_REG_PPC_PSSCR | 64 1843 PPC | KVM_REG_PPC_PSSCR | 64
1844 PPC | KVM_REG_PPC_DEC_EXPIRY | 64
1844 PPC | KVM_REG_PPC_TM_GPR0 | 64 1845 PPC | KVM_REG_PPC_TM_GPR0 | 64
1845 ... 1846 ...
1846 PPC | KVM_REG_PPC_TM_GPR31 | 64 1847 PPC | KVM_REG_PPC_TM_GPR31 | 64
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 85b7a1a21e22..9c14f7b5c46c 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
42static inline void wait_for_subcore_guest_exit(void) { } 42static inline void wait_for_subcore_guest_exit(void) { }
43static inline void wait_for_tb_resync(void) { } 43static inline void wait_for_tb_resync(void) { }
44#endif 44#endif
45
46struct pt_regs;
47extern long hmi_handle_debugtrig(struct pt_regs *regs);
48
45#endif /* __ASM_PPC64_HMI_H__ */ 49#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 735cfa35298a..998f7b7aaa9e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
122 lphi = (l >> 16) & 0xf; 122 lphi = (l >> 16) & 0xf;
123 switch ((l >> 12) & 0xf) { 123 switch ((l >> 12) & 0xf) {
124 case 0: 124 case 0:
125 return !lphi ? 24 : -1; /* 16MB */ 125 return !lphi ? 24 : 0; /* 16MB */
126 break; 126 break;
127 case 1: 127 case 1:
128 return 16; /* 64kB */ 128 return 16; /* 64kB */
129 break; 129 break;
130 case 3: 130 case 3:
131 return !lphi ? 34 : -1; /* 16GB */ 131 return !lphi ? 34 : 0; /* 16GB */
132 break; 132 break;
133 case 7: 133 case 7:
134 return (16 << 8) + 12; /* 64kB in 4kB */ 134 return (16 << 8) + 12; /* 64kB in 4kB */
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
140 return (24 << 8) + 12; /* 16MB in 4kB */ 140 return (24 << 8) + 12; /* 16MB in 4kB */
141 break; 141 break;
142 } 142 }
143 return -1; 143 return 0;
144} 144}
145 145
146static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l) 146static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
159 159
160static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r) 160static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
161{ 161{
162 return 1ul << kvmppc_hpte_actual_page_shift(v, r); 162 int shift = kvmppc_hpte_actual_page_shift(v, r);
163
164 if (shift)
165 return 1ul << shift;
166 return 0;
163} 167}
164 168
165static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift) 169static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
232 va_low ^= v >> (SID_SHIFT_1T - 16); 236 va_low ^= v >> (SID_SHIFT_1T - 16);
233 va_low &= 0x7ff; 237 va_low &= 0x7ff;
234 238
235 if (b_pgshift == 12) { 239 if (b_pgshift <= 12) {
236 if (a_pgshift > 12) { 240 if (a_pgshift > 12) {
237 sllp = (a_pgshift == 16) ? 5 : 4; 241 sllp = (a_pgshift == 16) ? 5 : 4;
238 rb |= sllp << 5; /* AP field */ 242 rb |= sllp << 5; /* AP field */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3aa5b577cd60..fef8133becc8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -709,6 +709,7 @@ struct kvm_vcpu_arch {
709 u8 ceded; 709 u8 ceded;
710 u8 prodded; 710 u8 prodded;
711 u8 doorbell_request; 711 u8 doorbell_request;
712 u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
712 u32 last_inst; 713 u32 last_inst;
713 714
714 struct swait_queue_head *wqp; 715 struct swait_queue_head *wqp;
@@ -738,8 +739,11 @@ struct kvm_vcpu_arch {
738 struct kvmppc_icp *icp; /* XICS presentation controller */ 739 struct kvmppc_icp *icp; /* XICS presentation controller */
739 struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */ 740 struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
740 __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */ 741 __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
741 u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */ 742 u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
743 u8 xive_esc_on; /* Is the escalation irq enabled ? */
742 union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */ 744 union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
745 u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */
746 u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */
743#endif 747#endif
744 748
745#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 749#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..fc926743647e 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1073,6 +1073,7 @@ enum {
1073/* Flags for OPAL_XIVE_GET/SET_VP_INFO */ 1073/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
1074enum { 1074enum {
1075 OPAL_XIVE_VP_ENABLED = 0x00000001, 1075 OPAL_XIVE_VP_ENABLED = 0x00000001,
1076 OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002,
1076}; 1077};
1077 1078
1078/* "Any chip" replacement for chip ID for allocation functions */ 1079/* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b779f3ccd412..14e41b843952 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -432,8 +432,9 @@
432#define SPRN_LPID 0x13F /* Logical Partition Identifier */ 432#define SPRN_LPID 0x13F /* Logical Partition Identifier */
433#endif 433#endif
434#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 434#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
435#define SPRN_HMER 0x150 /* Hardware m? error recovery */ 435#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
436#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ 436#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
437#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
437#define SPRN_PCR 0x152 /* Processor compatibility register */ 438#define SPRN_PCR 0x152 /* Processor compatibility register */
438#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ 439#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
439#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ 440#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -10,6 +10,41 @@
10#define _ASM_POWERPC_XIVE_REGS_H 10#define _ASM_POWERPC_XIVE_REGS_H
11 11
12/* 12/*
13 * "magic" Event State Buffer (ESB) MMIO offsets.
14 *
15 * Each interrupt source has a 2-bit state machine called ESB
16 * which can be controlled by MMIO. It's made of 2 bits, P and
17 * Q. P indicates that an interrupt is pending (has been sent
18 * to a queue and is waiting for an EOI). Q indicates that the
19 * interrupt has been triggered while pending.
20 *
21 * This acts as a coalescing mechanism in order to guarantee
22 * that a given interrupt only occurs at most once in a queue.
23 *
24 * When doing an EOI, the Q bit will indicate if the interrupt
25 * needs to be re-triggered.
26 *
27 * The following offsets into the ESB MMIO allow to read or
28 * manipulate the PQ bits. They must be used with an 8-bytes
29 * load instruction. They all return the previous state of the
30 * interrupt (atomically).
31 *
32 * Additionally, some ESB pages support doing an EOI via a
33 * store at 0 and some ESBs support doing a trigger via a
34 * separate trigger page.
35 */
36#define XIVE_ESB_STORE_EOI 0x400 /* Store */
37#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
38#define XIVE_ESB_GET 0x800 /* Load */
39#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
40#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
41#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
42#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
43
44#define XIVE_ESB_VAL_P 0x2
45#define XIVE_ESB_VAL_Q 0x1
46
47/*
13 * Thread Management (aka "TM") registers 48 * Thread Management (aka "TM") registers
14 */ 49 */
15 50
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..e602903c3029 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
58#define XIVE_IRQ_FLAG_EOI_FW 0x10 58#define XIVE_IRQ_FLAG_EOI_FW 0x10
59#define XIVE_IRQ_FLAG_H_INT_ESB 0x20 59#define XIVE_IRQ_FLAG_H_INT_ESB 0x20
60 60
61/* Special flag set by KVM for excalation interrupts */
62#define XIVE_IRQ_NO_EOI 0x80
63
61#define XIVE_INVALID_CHIP_ID -1 64#define XIVE_INVALID_CHIP_ID -1
62 65
63/* A queue tracking structure in a CPU */ 66/* A queue tracking structure in a CPU */
@@ -72,41 +75,6 @@ struct xive_q {
72 atomic_t pending_count; 75 atomic_t pending_count;
73}; 76};
74 77
75/*
76 * "magic" Event State Buffer (ESB) MMIO offsets.
77 *
78 * Each interrupt source has a 2-bit state machine called ESB
79 * which can be controlled by MMIO. It's made of 2 bits, P and
80 * Q. P indicates that an interrupt is pending (has been sent
81 * to a queue and is waiting for an EOI). Q indicates that the
82 * interrupt has been triggered while pending.
83 *
84 * This acts as a coalescing mechanism in order to guarantee
85 * that a given interrupt only occurs at most once in a queue.
86 *
87 * When doing an EOI, the Q bit will indicate if the interrupt
88 * needs to be re-triggered.
89 *
90 * The following offsets into the ESB MMIO allow to read or
91 * manipulate the PQ bits. They must be used with an 8-bytes
92 * load instruction. They all return the previous state of the
93 * interrupt (atomically).
94 *
95 * Additionally, some ESB pages support doing an EOI via a
96 * store at 0 and some ESBs support doing a trigger via a
97 * separate trigger page.
98 */
99#define XIVE_ESB_STORE_EOI 0x400 /* Store */
100#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
101#define XIVE_ESB_GET 0x800 /* Load */
102#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
103#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
104#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
105#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
106
107#define XIVE_ESB_VAL_P 0x2
108#define XIVE_ESB_VAL_Q 0x1
109
110/* Global enable flags for the XIVE support */ 78/* Global enable flags for the XIVE support */
111extern bool __xive_enabled; 79extern bool __xive_enabled;
112 80
@@ -143,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
143 111
144extern void xive_native_sync_source(u32 hw_irq); 112extern void xive_native_sync_source(u32 hw_irq);
145extern bool is_xive_irq(struct irq_chip *chip); 113extern bool is_xive_irq(struct irq_chip *chip);
146extern int xive_native_enable_vp(u32 vp_id); 114extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
147extern int xive_native_disable_vp(u32 vp_id); 115extern int xive_native_disable_vp(u32 vp_id);
148extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); 116extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
117extern bool xive_native_has_single_escalation(void);
149 118
150#else 119#else
151 120
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
632#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) 632#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
633#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) 633#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
634 634
635#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
636
635/* Transactional Memory checkpointed state: 637/* Transactional Memory checkpointed state:
636 * This is all GPRs, all VSX regs and a subset of SPRs 638 * This is all GPRs, all VSX regs and a subset of SPRs
637 */ 639 */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f390d57cf2e1..ff6ce2fd7579 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -519,6 +519,7 @@ int main(void)
519 OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); 519 OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
520 OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); 520 OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
521 OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); 521 OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
522 OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
522 OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); 523 OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
523 OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); 524 OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
524 OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); 525 OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
@@ -738,6 +739,9 @@ int main(void)
738 DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, 739 DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
739 arch.xive_cam_word)); 740 arch.xive_cam_word));
740 DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); 741 DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
742 DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
743 DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
744 DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
741#endif 745#endif
742 746
743#ifdef CONFIG_KVM_EXIT_TIMING 747#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 742e4658c5dc..d2fecaec4fec 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
495 return handled; 495 return handled;
496} 496}
497 497
498long hmi_exception_realmode(struct pt_regs *regs) 498/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
499static enum {
500 DTRIG_UNKNOWN,
501 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
502 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
503} hmer_debug_trig_function;
504
505static int init_debug_trig_function(void)
499{ 506{
500 __this_cpu_inc(irq_stat.hmi_exceptions); 507 int pvr;
501 508 struct device_node *cpun;
502#ifdef CONFIG_PPC_BOOK3S_64 509 struct property *prop = NULL;
503 /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ 510 const char *str;
504 if (pvr_version_is(PVR_POWER9)) { 511
505 unsigned long hmer = mfspr(SPRN_HMER); 512 /* First look in the device tree */
506 513 preempt_disable();
507 /* Do we have the debug bit set */ 514 cpun = of_get_cpu_node(smp_processor_id(), NULL);
508 if (hmer & PPC_BIT(17)) { 515 if (cpun) {
509 hmer &= ~PPC_BIT(17); 516 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
510 mtspr(SPRN_HMER, hmer); 517 prop, str) {
511 518 if (strcmp(str, "bit17-vector-ci-load") == 0)
512 /* 519 hmer_debug_trig_function = DTRIG_VECTOR_CI;
513 * Now to avoid problems with soft-disable we 520 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
514 * only do the emulation if we are coming from 521 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
515 * user space
516 */
517 if (user_mode(regs))
518 local_paca->hmi_p9_special_emu = 1;
519
520 /*
521 * Don't bother going to OPAL if that's the
522 * only relevant bit.
523 */
524 if (!(hmer & mfspr(SPRN_HMEER)))
525 return local_paca->hmi_p9_special_emu;
526 } 522 }
523 of_node_put(cpun);
524 }
525 preempt_enable();
526
527 /* If we found the property, don't look at PVR */
528 if (prop)
529 goto out;
530
531 pvr = mfspr(SPRN_PVR);
532 /* Check for POWER9 Nimbus (scale-out) */
533 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
534 /* DD2.2 and later */
535 if ((pvr & 0xfff) >= 0x202)
536 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
537 /* DD2.0 and DD2.1 - used for vector CI load emulation */
538 else if ((pvr & 0xfff) >= 0x200)
539 hmer_debug_trig_function = DTRIG_VECTOR_CI;
540 }
541
542 out:
543 switch (hmer_debug_trig_function) {
544 case DTRIG_VECTOR_CI:
545 pr_debug("HMI debug trigger used for vector CI load\n");
546 break;
547 case DTRIG_SUSPEND_ESCAPE:
548 pr_debug("HMI debug trigger used for TM suspend escape\n");
549 break;
550 default:
551 break;
527 } 552 }
528#endif /* CONFIG_PPC_BOOK3S_64 */ 553 return 0;
554}
555__initcall(init_debug_trig_function);
556
557/*
558 * Handle HMIs that occur as a result of a debug trigger.
559 * Return values:
560 * -1 means this is not a HMI cause that we know about
561 * 0 means no further handling is required
562 * 1 means further handling is required
563 */
564long hmi_handle_debugtrig(struct pt_regs *regs)
565{
566 unsigned long hmer = mfspr(SPRN_HMER);
567 long ret = 0;
568
569 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
570 if (!((hmer & HMER_DEBUG_TRIG)
571 && hmer_debug_trig_function != DTRIG_UNKNOWN))
572 return -1;
573
574 hmer &= ~HMER_DEBUG_TRIG;
575 /* HMER is a write-AND register */
576 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
577
578 switch (hmer_debug_trig_function) {
579 case DTRIG_VECTOR_CI:
580 /*
581 * Now to avoid problems with soft-disable we
582 * only do the emulation if we are coming from
583 * host user space
584 */
585 if (regs && user_mode(regs))
586 ret = local_paca->hmi_p9_special_emu = 1;
587
588 break;
589
590 default:
591 break;
592 }
593
594 /*
595 * See if any other HMI causes remain to be handled
596 */
597 if (hmer & mfspr(SPRN_HMEER))
598 return -1;
599
600 return ret;
601}
602
603/*
604 * Return values:
605 */
606long hmi_exception_realmode(struct pt_regs *regs)
607{
608 int ret;
609
610 __this_cpu_inc(irq_stat.hmi_exceptions);
611
612 ret = hmi_handle_debugtrig(regs);
613 if (ret >= 0)
614 return ret;
529 615
530 wait_for_subcore_guest_exit(); 616 wait_for_subcore_guest_exit();
531 617
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 58618f644c56..0c854816e653 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
573 j = i + 1; 573 j = i + 1;
574 if (npages) { 574 if (npages) {
575 set_dirty_bits(map, i, npages); 575 set_dirty_bits(map, i, npages);
576 i = j + npages; 576 j = i + npages;
577 } 577 }
578 } 578 }
579 return 0; 579 return 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2d46037ce936..e5f81fc108e0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -118,6 +118,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
118MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); 118MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
119#endif 119#endif
120 120
121/* If set, the threads on each CPU core have to be in the same MMU mode */
122static bool no_mixing_hpt_and_radix;
123
121static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 124static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
122static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 125static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
123 126
@@ -1497,6 +1500,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1497 case KVM_REG_PPC_ARCH_COMPAT: 1500 case KVM_REG_PPC_ARCH_COMPAT:
1498 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); 1501 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
1499 break; 1502 break;
1503 case KVM_REG_PPC_DEC_EXPIRY:
1504 *val = get_reg_val(id, vcpu->arch.dec_expires +
1505 vcpu->arch.vcore->tb_offset);
1506 break;
1500 default: 1507 default:
1501 r = -EINVAL; 1508 r = -EINVAL;
1502 break; 1509 break;
@@ -1724,6 +1731,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1724 case KVM_REG_PPC_ARCH_COMPAT: 1731 case KVM_REG_PPC_ARCH_COMPAT:
1725 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); 1732 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
1726 break; 1733 break;
1734 case KVM_REG_PPC_DEC_EXPIRY:
1735 vcpu->arch.dec_expires = set_reg_val(id, *val) -
1736 vcpu->arch.vcore->tb_offset;
1737 break;
1727 default: 1738 default:
1728 r = -EINVAL; 1739 r = -EINVAL;
1729 break; 1740 break;
@@ -2378,8 +2389,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2378static bool subcore_config_ok(int n_subcores, int n_threads) 2389static bool subcore_config_ok(int n_subcores, int n_threads)
2379{ 2390{
2380 /* 2391 /*
2381 * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core 2392 * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
2382 * mode, with one thread per subcore. 2393 * split-core mode, with one thread per subcore.
2383 */ 2394 */
2384 if (cpu_has_feature(CPU_FTR_ARCH_300)) 2395 if (cpu_has_feature(CPU_FTR_ARCH_300))
2385 return n_subcores <= 4 && n_threads == 1; 2396 return n_subcores <= 4 && n_threads == 1;
@@ -2415,8 +2426,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2415 if (!cpu_has_feature(CPU_FTR_ARCH_207S)) 2426 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2416 return false; 2427 return false;
2417 2428
2418 /* POWER9 currently requires all threads to be in the same MMU mode */ 2429 /* Some POWER9 chips require all threads to be in the same MMU mode */
2419 if (cpu_has_feature(CPU_FTR_ARCH_300) && 2430 if (no_mixing_hpt_and_radix &&
2420 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) 2431 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
2421 return false; 2432 return false;
2422 2433
@@ -2679,9 +2690,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2679 * threads are offline. Also check if the number of threads in this 2690 * threads are offline. Also check if the number of threads in this
2680 * guest are greater than the current system threads per guest. 2691 * guest are greater than the current system threads per guest.
2681 * On POWER9, we need to be not in independent-threads mode if 2692 * On POWER9, we need to be not in independent-threads mode if
2682 * this is a HPT guest on a radix host. 2693 * this is a HPT guest on a radix host machine where the
2694 * CPU threads may not be in different MMU modes.
2683 */ 2695 */
2684 hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm); 2696 hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
2697 !kvm_is_radix(vc->kvm);
2685 if (((controlled_threads > 1) && 2698 if (((controlled_threads > 1) &&
2686 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || 2699 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
2687 (hpt_on_radix && vc->kvm->arch.threads_indep)) { 2700 (hpt_on_radix && vc->kvm->arch.threads_indep)) {
@@ -2831,7 +2844,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2831 */ 2844 */
2832 if (!thr0_done) 2845 if (!thr0_done)
2833 kvmppc_start_thread(NULL, pvc); 2846 kvmppc_start_thread(NULL, pvc);
2834 thr += pvc->num_threads;
2835 } 2847 }
2836 2848
2837 /* 2849 /*
@@ -2987,7 +2999,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
2987{ 2999{
2988 if (!xive_enabled()) 3000 if (!xive_enabled())
2989 return false; 3001 return false;
2990 return vcpu->arch.xive_saved_state.pipr < 3002 return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
2991 vcpu->arch.xive_saved_state.cppr; 3003 vcpu->arch.xive_saved_state.cppr;
2992} 3004}
2993#else 3005#else
@@ -3176,17 +3188,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3176 * this thread straight away and have it join in. 3188 * this thread straight away and have it join in.
3177 */ 3189 */
3178 if (!signal_pending(current)) { 3190 if (!signal_pending(current)) {
3179 if (vc->vcore_state == VCORE_PIGGYBACK) { 3191 if ((vc->vcore_state == VCORE_PIGGYBACK ||
3180 if (spin_trylock(&vc->lock)) { 3192 vc->vcore_state == VCORE_RUNNING) &&
3181 if (vc->vcore_state == VCORE_RUNNING &&
3182 !VCORE_IS_EXITING(vc)) {
3183 kvmppc_create_dtl_entry(vcpu, vc);
3184 kvmppc_start_thread(vcpu, vc);
3185 trace_kvm_guest_enter(vcpu);
3186 }
3187 spin_unlock(&vc->lock);
3188 }
3189 } else if (vc->vcore_state == VCORE_RUNNING &&
3190 !VCORE_IS_EXITING(vc)) { 3193 !VCORE_IS_EXITING(vc)) {
3191 kvmppc_create_dtl_entry(vcpu, vc); 3194 kvmppc_create_dtl_entry(vcpu, vc);
3192 kvmppc_start_thread(vcpu, vc); 3195 kvmppc_start_thread(vcpu, vc);
@@ -4448,6 +4451,19 @@ static int kvmppc_book3s_init_hv(void)
4448 4451
4449 if (kvmppc_radix_possible()) 4452 if (kvmppc_radix_possible())
4450 r = kvmppc_radix_init(); 4453 r = kvmppc_radix_init();
4454
4455 /*
4456 * POWER9 chips before version 2.02 can't have some threads in
4457 * HPT mode and some in radix mode on the same core.
4458 */
4459 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4460 unsigned int pvr = mfspr(SPRN_PVR);
4461 if ((pvr >> 16) == PVR_POWER9 &&
4462 (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
4463 ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
4464 no_mixing_hpt_and_radix = true;
4465 }
4466
4451 return r; 4467 return r;
4452} 4468}
4453 4469
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index c356f9a40b24..c296343d0dcc 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void)
268 * secondary threads to proceed. 268 * secondary threads to proceed.
269 * - All secondary threads will eventually call opal hmi handler on 269 * - All secondary threads will eventually call opal hmi handler on
270 * their exit path. 270 * their exit path.
271 *
272 * Returns 1 if the timebase offset should be applied, 0 if not.
271 */ 273 */
272 274
273long kvmppc_realmode_hmi_handler(void) 275long kvmppc_realmode_hmi_handler(void)
274{ 276{
275 int ptid = local_paca->kvm_hstate.ptid;
276 bool resync_req; 277 bool resync_req;
277 278
278 /* This is only called on primary thread. */
279 BUG_ON(ptid != 0);
280 __this_cpu_inc(irq_stat.hmi_exceptions); 279 __this_cpu_inc(irq_stat.hmi_exceptions);
281 280
281 if (hmi_handle_debugtrig(NULL) >= 0)
282 return 1;
283
282 /* 284 /*
283 * By now primary thread has already completed guest->host 285 * By now primary thread has already completed guest->host
284 * partition switch but haven't signaled secondaries yet. 286 * partition switch but haven't signaled secondaries yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9c61f736c75b..b64f10a5f5e7 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -617,13 +617,6 @@ kvmppc_hv_entry:
617 lbz r0, KVM_RADIX(r9) 617 lbz r0, KVM_RADIX(r9)
618 cmpwi cr7, r0, 0 618 cmpwi cr7, r0, 0
619 619
620 /* Clear out SLB if hash */
621 bne cr7, 2f
622 li r6,0
623 slbmte r6,r6
624 slbia
625 ptesync
6262:
627 /* 620 /*
628 * POWER7/POWER8 host -> guest partition switch code. 621 * POWER7/POWER8 host -> guest partition switch code.
629 * We don't have to lock against concurrent tlbies, 622 * We don't have to lock against concurrent tlbies,
@@ -738,19 +731,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
73810: cmpdi r4, 0 73110: cmpdi r4, 0
739 beq kvmppc_primary_no_guest 732 beq kvmppc_primary_no_guest
740kvmppc_got_guest: 733kvmppc_got_guest:
741
742 /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
743 lwz r5,VCPU_SLB_MAX(r4)
744 cmpwi r5,0
745 beq 9f
746 mtctr r5
747 addi r6,r4,VCPU_SLB
7481: ld r8,VCPU_SLB_E(r6)
749 ld r9,VCPU_SLB_V(r6)
750 slbmte r9,r8
751 addi r6,r6,VCPU_SLB_SIZE
752 bdnz 1b
7539:
754 /* Increment yield count if they have a VPA */ 734 /* Increment yield count if they have a VPA */
755 ld r3, VCPU_VPA(r4) 735 ld r3, VCPU_VPA(r4)
756 cmpdi r3, 0 736 cmpdi r3, 0
@@ -957,7 +937,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
957 mftb r7 937 mftb r7
958 subf r3,r7,r8 938 subf r3,r7,r8
959 mtspr SPRN_DEC,r3 939 mtspr SPRN_DEC,r3
960 std r3,VCPU_DEC(r4)
961 940
962 ld r5, VCPU_SPRG0(r4) 941 ld r5, VCPU_SPRG0(r4)
963 ld r6, VCPU_SPRG1(r4) 942 ld r6, VCPU_SPRG1(r4)
@@ -1018,6 +997,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1018 cmpdi r3, 512 /* 1 microsecond */ 997 cmpdi r3, 512 /* 1 microsecond */
1019 blt hdec_soon 998 blt hdec_soon
1020 999
1000 /* For hash guest, clear out and reload the SLB */
1001 ld r6, VCPU_KVM(r4)
1002 lbz r0, KVM_RADIX(r6)
1003 cmpwi r0, 0
1004 bne 9f
1005 li r6, 0
1006 slbmte r6, r6
1007 slbia
1008 ptesync
1009
1010 /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
1011 lwz r5,VCPU_SLB_MAX(r4)
1012 cmpwi r5,0
1013 beq 9f
1014 mtctr r5
1015 addi r6,r4,VCPU_SLB
10161: ld r8,VCPU_SLB_E(r6)
1017 ld r9,VCPU_SLB_V(r6)
1018 slbmte r9,r8
1019 addi r6,r6,VCPU_SLB_SIZE
1020 bdnz 1b
10219:
1022
1021#ifdef CONFIG_KVM_XICS 1023#ifdef CONFIG_KVM_XICS
1022 /* We are entering the guest on that thread, push VCPU to XIVE */ 1024 /* We are entering the guest on that thread, push VCPU to XIVE */
1023 ld r10, HSTATE_XIVE_TIMA_PHYS(r13) 1025 ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
@@ -1031,8 +1033,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1031 li r9, TM_QW1_OS + TM_WORD2 1033 li r9, TM_QW1_OS + TM_WORD2
1032 stwcix r11,r9,r10 1034 stwcix r11,r9,r10
1033 li r9, 1 1035 li r9, 1
1034 stw r9, VCPU_XIVE_PUSHED(r4) 1036 stb r9, VCPU_XIVE_PUSHED(r4)
1035 eieio 1037 eieio
1038
1039 /*
1040 * We clear the irq_pending flag. There is a small chance of a
1041 * race vs. the escalation interrupt happening on another
1042 * processor setting it again, but the only consequence is to
1043 * cause a spurrious wakeup on the next H_CEDE which is not an
1044 * issue.
1045 */
1046 li r0,0
1047 stb r0, VCPU_IRQ_PENDING(r4)
1048
1049 /*
1050 * In single escalation mode, if the escalation interrupt is
1051 * on, we mask it.
1052 */
1053 lbz r0, VCPU_XIVE_ESC_ON(r4)
1054 cmpwi r0,0
1055 beq 1f
1056 ld r10, VCPU_XIVE_ESC_RADDR(r4)
1057 li r9, XIVE_ESB_SET_PQ_01
1058 ldcix r0, r10, r9
1059 sync
1060
1061 /* We have a possible subtle race here: The escalation interrupt might
1062 * have fired and be on its way to the host queue while we mask it,
1063 * and if we unmask it early enough (re-cede right away), there is
1064 * a theorical possibility that it fires again, thus landing in the
1065 * target queue more than once which is a big no-no.
1066 *
1067 * Fortunately, solving this is rather easy. If the above load setting
1068 * PQ to 01 returns a previous value where P is set, then we know the
1069 * escalation interrupt is somewhere on its way to the host. In that
1070 * case we simply don't clear the xive_esc_on flag below. It will be
1071 * eventually cleared by the handler for the escalation interrupt.
1072 *
1073 * Then, when doing a cede, we check that flag again before re-enabling
1074 * the escalation interrupt, and if set, we abort the cede.
1075 */
1076 andi. r0, r0, XIVE_ESB_VAL_P
1077 bne- 1f
1078
1079 /* Now P is 0, we can clear the flag */
1080 li r0, 0
1081 stb r0, VCPU_XIVE_ESC_ON(r4)
10821:
1036no_xive: 1083no_xive:
1037#endif /* CONFIG_KVM_XICS */ 1084#endif /* CONFIG_KVM_XICS */
1038 1085
@@ -1193,7 +1240,7 @@ hdec_soon:
1193 addi r3, r4, VCPU_TB_RMEXIT 1240 addi r3, r4, VCPU_TB_RMEXIT
1194 bl kvmhv_accumulate_time 1241 bl kvmhv_accumulate_time
1195#endif 1242#endif
1196 b guest_exit_cont 1243 b guest_bypass
1197 1244
1198/****************************************************************************** 1245/******************************************************************************
1199 * * 1246 * *
@@ -1423,15 +1470,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1423 blt deliver_guest_interrupt 1470 blt deliver_guest_interrupt
1424 1471
1425guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 1472guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1473 /* Save more register state */
1474 mfdar r6
1475 mfdsisr r7
1476 std r6, VCPU_DAR(r9)
1477 stw r7, VCPU_DSISR(r9)
1478 /* don't overwrite fault_dar/fault_dsisr if HDSI */
1479 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
1480 beq mc_cont
1481 std r6, VCPU_FAULT_DAR(r9)
1482 stw r7, VCPU_FAULT_DSISR(r9)
1483
1484 /* See if it is a machine check */
1485 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1486 beq machine_check_realmode
1487mc_cont:
1488#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1489 addi r3, r9, VCPU_TB_RMEXIT
1490 mr r4, r9
1491 bl kvmhv_accumulate_time
1492#endif
1426#ifdef CONFIG_KVM_XICS 1493#ifdef CONFIG_KVM_XICS
1427 /* We are exiting, pull the VP from the XIVE */ 1494 /* We are exiting, pull the VP from the XIVE */
1428 lwz r0, VCPU_XIVE_PUSHED(r9) 1495 lbz r0, VCPU_XIVE_PUSHED(r9)
1429 cmpwi cr0, r0, 0 1496 cmpwi cr0, r0, 0
1430 beq 1f 1497 beq 1f
1431 li r7, TM_SPC_PULL_OS_CTX 1498 li r7, TM_SPC_PULL_OS_CTX
1432 li r6, TM_QW1_OS 1499 li r6, TM_QW1_OS
1433 mfmsr r0 1500 mfmsr r0
1434 andi. r0, r0, MSR_IR /* in real mode? */ 1501 andi. r0, r0, MSR_DR /* in real mode? */
1435 beq 2f 1502 beq 2f
1436 ld r10, HSTATE_XIVE_TIMA_VIRT(r13) 1503 ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
1437 cmpldi cr0, r10, 0 1504 cmpldi cr0, r10, 0
@@ -1454,33 +1521,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1454 /* Fixup some of the state for the next load */ 1521 /* Fixup some of the state for the next load */
1455 li r10, 0 1522 li r10, 0
1456 li r0, 0xff 1523 li r0, 0xff
1457 stw r10, VCPU_XIVE_PUSHED(r9) 1524 stb r10, VCPU_XIVE_PUSHED(r9)
1458 stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) 1525 stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
1459 stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) 1526 stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
1460 eieio 1527 eieio
14611: 15281:
1462#endif /* CONFIG_KVM_XICS */ 1529#endif /* CONFIG_KVM_XICS */
1463 /* Save more register state */
1464 mfdar r6
1465 mfdsisr r7
1466 std r6, VCPU_DAR(r9)
1467 stw r7, VCPU_DSISR(r9)
1468 /* don't overwrite fault_dar/fault_dsisr if HDSI */
1469 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
1470 beq mc_cont
1471 std r6, VCPU_FAULT_DAR(r9)
1472 stw r7, VCPU_FAULT_DSISR(r9)
1473 1530
1474 /* See if it is a machine check */ 1531 /* For hash guest, read the guest SLB and save it away */
1475 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1532 ld r5, VCPU_KVM(r9)
1476 beq machine_check_realmode 1533 lbz r0, KVM_RADIX(r5)
1477mc_cont: 1534 li r5, 0
1478#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1535 cmpwi r0, 0
1479 addi r3, r9, VCPU_TB_RMEXIT 1536 bne 3f /* for radix, save 0 entries */
1480 mr r4, r9 1537 lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
1481 bl kvmhv_accumulate_time 1538 mtctr r0
1482#endif 1539 li r6,0
1540 addi r7,r9,VCPU_SLB
15411: slbmfee r8,r6
1542 andis. r0,r8,SLB_ESID_V@h
1543 beq 2f
1544 add r8,r8,r6 /* put index in */
1545 slbmfev r3,r6
1546 std r8,VCPU_SLB_E(r7)
1547 std r3,VCPU_SLB_V(r7)
1548 addi r7,r7,VCPU_SLB_SIZE
1549 addi r5,r5,1
15502: addi r6,r6,1
1551 bdnz 1b
1552 /* Finally clear out the SLB */
1553 li r0,0
1554 slbmte r0,r0
1555 slbia
1556 ptesync
15573: stw r5,VCPU_SLB_MAX(r9)
1483 1558
1559guest_bypass:
1484 mr r3, r12 1560 mr r3, r12
1485 /* Increment exit count, poke other threads to exit */ 1561 /* Increment exit count, poke other threads to exit */
1486 bl kvmhv_commence_exit 1562 bl kvmhv_commence_exit
@@ -1501,31 +1577,6 @@ mc_cont:
1501 ori r6,r6,1 1577 ori r6,r6,1
1502 mtspr SPRN_CTRLT,r6 1578 mtspr SPRN_CTRLT,r6
15034: 15794:
1504 /* Check if we are running hash or radix and store it in cr2 */
1505 ld r5, VCPU_KVM(r9)
1506 lbz r0, KVM_RADIX(r5)
1507 cmpwi cr2,r0,0
1508
1509 /* Read the guest SLB and save it away */
1510 li r5, 0
1511 bne cr2, 3f /* for radix, save 0 entries */
1512 lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
1513 mtctr r0
1514 li r6,0
1515 addi r7,r9,VCPU_SLB
15161: slbmfee r8,r6
1517 andis. r0,r8,SLB_ESID_V@h
1518 beq 2f
1519 add r8,r8,r6 /* put index in */
1520 slbmfev r3,r6
1521 std r8,VCPU_SLB_E(r7)
1522 std r3,VCPU_SLB_V(r7)
1523 addi r7,r7,VCPU_SLB_SIZE
1524 addi r5,r5,1
15252: addi r6,r6,1
1526 bdnz 1b
15273: stw r5,VCPU_SLB_MAX(r9)
1528
1529 /* 1580 /*
1530 * Save the guest PURR/SPURR 1581 * Save the guest PURR/SPURR
1531 */ 1582 */
@@ -1803,7 +1854,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1803 ld r5, VCPU_KVM(r9) 1854 ld r5, VCPU_KVM(r9)
1804 lbz r0, KVM_RADIX(r5) 1855 lbz r0, KVM_RADIX(r5)
1805 cmpwi cr2, r0, 0 1856 cmpwi cr2, r0, 0
1806 beq cr2, 3f 1857 beq cr2, 4f
1807 1858
1808 /* Radix: Handle the case where the guest used an illegal PID */ 1859 /* Radix: Handle the case where the guest used an illegal PID */
1809 LOAD_REG_ADDR(r4, mmu_base_pid) 1860 LOAD_REG_ADDR(r4, mmu_base_pid)
@@ -1839,15 +1890,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1839BEGIN_FTR_SECTION 1890BEGIN_FTR_SECTION
1840 PPC_INVALIDATE_ERAT 1891 PPC_INVALIDATE_ERAT
1841END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) 1892END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
1842 b 4f 18934:
1843#endif /* CONFIG_PPC_RADIX_MMU */ 1894#endif /* CONFIG_PPC_RADIX_MMU */
1844 1895
1845 /* Hash: clear out SLB */
18463: li r5,0
1847 slbmte r5,r5
1848 slbia
1849 ptesync
18504:
1851 /* 1896 /*
1852 * POWER7/POWER8 guest -> host partition switch code. 1897 * POWER7/POWER8 guest -> host partition switch code.
1853 * We don't have to lock against tlbies but we do 1898 * We don't have to lock against tlbies but we do
@@ -1908,16 +1953,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1908 bne 27f 1953 bne 27f
1909 bl kvmppc_realmode_hmi_handler 1954 bl kvmppc_realmode_hmi_handler
1910 nop 1955 nop
1956 cmpdi r3, 0
1911 li r12, BOOK3S_INTERRUPT_HMI 1957 li r12, BOOK3S_INTERRUPT_HMI
1912 /* 1958 /*
1913 * At this point kvmppc_realmode_hmi_handler would have resync-ed 1959 * At this point kvmppc_realmode_hmi_handler may have resync-ed
1914 * the TB. Hence it is not required to subtract guest timebase 1960 * the TB, and if it has, we must not subtract the guest timebase
1915 * offset from timebase. So, skip it. 1961 * offset from the timebase. So, skip it.
1916 * 1962 *
1917 * Also, do not call kvmppc_subcore_exit_guest() because it has 1963 * Also, do not call kvmppc_subcore_exit_guest() because it has
1918 * been invoked as part of kvmppc_realmode_hmi_handler(). 1964 * been invoked as part of kvmppc_realmode_hmi_handler().
1919 */ 1965 */
1920 b 30f 1966 beq 30f
1921 1967
192227: 196827:
1923 /* Subtract timebase offset from timebase */ 1969 /* Subtract timebase offset from timebase */
@@ -2744,7 +2790,32 @@ kvm_cede_prodded:
2744 /* we've ceded but we want to give control to the host */ 2790 /* we've ceded but we want to give control to the host */
2745kvm_cede_exit: 2791kvm_cede_exit:
2746 ld r9, HSTATE_KVM_VCPU(r13) 2792 ld r9, HSTATE_KVM_VCPU(r13)
2747 b guest_exit_cont 2793#ifdef CONFIG_KVM_XICS
2794 /* Abort if we still have a pending escalation */
2795 lbz r5, VCPU_XIVE_ESC_ON(r9)
2796 cmpwi r5, 0
2797 beq 1f
2798 li r0, 0
2799 stb r0, VCPU_CEDED(r9)
28001: /* Enable XIVE escalation */
2801 li r5, XIVE_ESB_SET_PQ_00
2802 mfmsr r0
2803 andi. r0, r0, MSR_DR /* in real mode? */
2804 beq 1f
2805 ld r10, VCPU_XIVE_ESC_VADDR(r9)
2806 cmpdi r10, 0
2807 beq 3f
2808 ldx r0, r10, r5
2809 b 2f
28101: ld r10, VCPU_XIVE_ESC_RADDR(r9)
2811 cmpdi r10, 0
2812 beq 3f
2813 ldcix r0, r10, r5
28142: sync
2815 li r0, 1
2816 stb r0, VCPU_XIVE_ESC_ON(r9)
2817#endif /* CONFIG_KVM_XICS */
28183: b guest_exit_cont
2748 2819
2749 /* Try to handle a machine check in real mode */ 2820 /* Try to handle a machine check in real mode */
2750machine_check_realmode: 2821machine_check_realmode:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 0d750d274c4e..badfdbb857a2 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
84{ 84{
85 struct kvm_vcpu *vcpu = data; 85 struct kvm_vcpu *vcpu = data;
86 86
87 /* We use the existing H_PROD mechanism to wake up the target */ 87 vcpu->arch.irq_pending = 1;
88 vcpu->arch.prodded = 1;
89 smp_mb(); 88 smp_mb();
90 if (vcpu->arch.ceded) 89 if (vcpu->arch.ceded)
91 kvmppc_fast_vcpu_kick(vcpu); 90 kvmppc_fast_vcpu_kick(vcpu);
92 91
92 /* Since we have the no-EOI flag, the interrupt is effectively
93 * disabled now. Clearing xive_esc_on means we won't bother
94 * doing so on the next entry.
95 *
96 * This also allows the entry code to know that if a PQ combination
97 * of 10 is observed while xive_esc_on is true, it means the queue
98 * contains an unprocessed escalation interrupt. We don't make use of
99 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
100 */
101 vcpu->arch.xive_esc_on = false;
102
93 return IRQ_HANDLED; 103 return IRQ_HANDLED;
94} 104}
95 105
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
112 return -EIO; 122 return -EIO;
113 } 123 }
114 124
115 /* 125 if (xc->xive->single_escalation)
116 * Future improvement: start with them disabled 126 name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
117 * and handle DD2 and later scheme of merged escalation 127 vcpu->kvm->arch.lpid, xc->server_num);
118 * interrupts 128 else
119 */ 129 name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
120 name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", 130 vcpu->kvm->arch.lpid, xc->server_num, prio);
121 vcpu->kvm->arch.lpid, xc->server_num, prio);
122 if (!name) { 131 if (!name) {
123 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", 132 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
124 prio, xc->server_num); 133 prio, xc->server_num);
125 rc = -ENOMEM; 134 rc = -ENOMEM;
126 goto error; 135 goto error;
127 } 136 }
137
138 pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
139
128 rc = request_irq(xc->esc_virq[prio], xive_esc_irq, 140 rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
129 IRQF_NO_THREAD, name, vcpu); 141 IRQF_NO_THREAD, name, vcpu);
130 if (rc) { 142 if (rc) {
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
133 goto error; 145 goto error;
134 } 146 }
135 xc->esc_virq_names[prio] = name; 147 xc->esc_virq_names[prio] = name;
148
149 /* In single escalation mode, we grab the ESB MMIO of the
150 * interrupt and mask it. Also populate the VCPU v/raddr
151 * of the ESB page for use by asm entry/exit code. Finally
152 * set the XIVE_IRQ_NO_EOI flag which will prevent the
153 * core code from performing an EOI on the escalation
154 * interrupt, thus leaving it effectively masked after
155 * it fires once.
156 */
157 if (xc->xive->single_escalation) {
158 struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
159 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
160
161 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
162 vcpu->arch.xive_esc_raddr = xd->eoi_page;
163 vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
164 xd->flags |= XIVE_IRQ_NO_EOI;
165 }
166
136 return 0; 167 return 0;
137error: 168error:
138 irq_dispose_mapping(xc->esc_virq[prio]); 169 irq_dispose_mapping(xc->esc_virq[prio]);
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
191 222
192 pr_devel("Provisioning prio... %d\n", prio); 223 pr_devel("Provisioning prio... %d\n", prio);
193 224
194 /* Provision each VCPU and enable escalations */ 225 /* Provision each VCPU and enable escalations if needed */
195 kvm_for_each_vcpu(i, vcpu, kvm) { 226 kvm_for_each_vcpu(i, vcpu, kvm) {
196 if (!vcpu->arch.xive_vcpu) 227 if (!vcpu->arch.xive_vcpu)
197 continue; 228 continue;
198 rc = xive_provision_queue(vcpu, prio); 229 rc = xive_provision_queue(vcpu, prio);
199 if (rc == 0) 230 if (rc == 0 && !xive->single_escalation)
200 xive_attach_escalation(vcpu, prio); 231 xive_attach_escalation(vcpu, prio);
201 if (rc) 232 if (rc)
202 return rc; 233 return rc;
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
1082 /* Allocate IPI */ 1113 /* Allocate IPI */
1083 xc->vp_ipi = xive_native_alloc_irq(); 1114 xc->vp_ipi = xive_native_alloc_irq();
1084 if (!xc->vp_ipi) { 1115 if (!xc->vp_ipi) {
1116 pr_err("Failed to allocate xive irq for VCPU IPI\n");
1085 r = -EIO; 1117 r = -EIO;
1086 goto bail; 1118 goto bail;
1087 } 1119 }
@@ -1092,18 +1124,33 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
1092 goto bail; 1124 goto bail;
1093 1125
1094 /* 1126 /*
1127 * Enable the VP first as the single escalation mode will
1128 * affect escalation interrupts numbering
1129 */
1130 r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
1131 if (r) {
1132 pr_err("Failed to enable VP in OPAL, err %d\n", r);
1133 goto bail;
1134 }
1135
1136 /*
1095 * Initialize queues. Initially we set them all for no queueing 1137 * Initialize queues. Initially we set them all for no queueing
1096 * and we enable escalation for queue 0 only which we'll use for 1138 * and we enable escalation for queue 0 only which we'll use for
1097 * our mfrr change notifications. If the VCPU is hot-plugged, we 1139 * our mfrr change notifications. If the VCPU is hot-plugged, we
1098 * do handle provisioning however. 1140 * do handle provisioning however based on the existing "map"
1141 * of enabled queues.
1099 */ 1142 */
1100 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 1143 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1101 struct xive_q *q = &xc->queues[i]; 1144 struct xive_q *q = &xc->queues[i];
1102 1145
1146 /* Single escalation, no queue 7 */
1147 if (i == 7 && xive->single_escalation)
1148 break;
1149
1103 /* Is queue already enabled ? Provision it */ 1150 /* Is queue already enabled ? Provision it */
1104 if (xive->qmap & (1 << i)) { 1151 if (xive->qmap & (1 << i)) {
1105 r = xive_provision_queue(vcpu, i); 1152 r = xive_provision_queue(vcpu, i);
1106 if (r == 0) 1153 if (r == 0 && !xive->single_escalation)
1107 xive_attach_escalation(vcpu, i); 1154 xive_attach_escalation(vcpu, i);
1108 if (r) 1155 if (r)
1109 goto bail; 1156 goto bail;
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
1123 if (r) 1170 if (r)
1124 goto bail; 1171 goto bail;
1125 1172
1126 /* Enable the VP */
1127 r = xive_native_enable_vp(xc->vp_id);
1128 if (r)
1129 goto bail;
1130
1131 /* Route the IPI */ 1173 /* Route the IPI */
1132 r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); 1174 r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
1133 if (!r) 1175 if (!r)
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1474 1516
1475 pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", 1517 pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
1476 val, server, guest_prio); 1518 val, server, guest_prio);
1519
1477 /* 1520 /*
1478 * If the source doesn't already have an IPI, allocate 1521 * If the source doesn't already have an IPI, allocate
1479 * one and get the corresponding data 1522 * one and get the corresponding data
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
1762 if (xive->vp_base == XIVE_INVALID_VP) 1805 if (xive->vp_base == XIVE_INVALID_VP)
1763 ret = -ENOMEM; 1806 ret = -ENOMEM;
1764 1807
1808 xive->single_escalation = xive_native_has_single_escalation();
1809
1765 if (ret) { 1810 if (ret) {
1766 kfree(xive); 1811 kfree(xive);
1767 return ret; 1812 return ret;
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
1795 1840
1796 kvm_for_each_vcpu(i, vcpu, kvm) { 1841 kvm_for_each_vcpu(i, vcpu, kvm) {
1797 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1842 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1843 unsigned int i;
1798 1844
1799 if (!xc) 1845 if (!xc)
1800 continue; 1846 continue;
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
1804 xc->server_num, xc->cppr, xc->hw_cppr, 1850 xc->server_num, xc->cppr, xc->hw_cppr,
1805 xc->mfrr, xc->pending, 1851 xc->mfrr, xc->pending,
1806 xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); 1852 xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
1853 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1854 struct xive_q *q = &xc->queues[i];
1855 u32 i0, i1, idx;
1856
1857 if (!q->qpage && !xc->esc_virq[i])
1858 continue;
1859
1860 seq_printf(m, " [q%d]: ", i);
1861
1862 if (q->qpage) {
1863 idx = q->idx;
1864 i0 = be32_to_cpup(q->qpage + idx);
1865 idx = (idx + 1) & q->msk;
1866 i1 = be32_to_cpup(q->qpage + idx);
1867 seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
1868 }
1869 if (xc->esc_virq[i]) {
1870 struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
1871 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
1872 u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
1873 seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
1874 (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
1875 (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
1876 xc->esc_virq[i], pq, xd->eoi_page);
1877 seq_printf(m, "\n");
1878 }
1879 }
1807 1880
1808 t_rm_h_xirr += xc->stat_rm_h_xirr; 1881 t_rm_h_xirr += xc->stat_rm_h_xirr;
1809 t_rm_h_ipoll += xc->stat_rm_h_ipoll; 1882 t_rm_h_ipoll += xc->stat_rm_h_ipoll;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 6ba63f8e8a61..a08ae6fd4c51 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
120 u32 q_order; 120 u32 q_order;
121 u32 q_page_order; 121 u32 q_page_order;
122 122
123 /* Flags */
124 u8 single_escalation;
123}; 125};
124 126
125#define KVMPPC_XIVE_Q_COUNT 8 127#define KVMPPC_XIVE_Q_COUNT 8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
201 * is as follow. 203 * is as follow.
202 * 204 *
203 * Guest request for 0...6 are honored. Guest request for anything 205 * Guest request for 0...6 are honored. Guest request for anything
204 * higher results in a priority of 7 being applied. 206 * higher results in a priority of 6 being applied.
205 *
206 * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
207 * in order to match AIX expectations
208 * 207 *
209 * Similar mapping is done for CPPR values 208 * Similar mapping is done for CPPR values
210 */ 209 */
211static inline u8 xive_prio_from_guest(u8 prio) 210static inline u8 xive_prio_from_guest(u8 prio)
212{ 211{
213 if (prio == 0xff || prio < 8) 212 if (prio == 0xff || prio < 6)
214 return prio; 213 return prio;
215 return 7; 214 return 6;
216} 215}
217 216
218static inline u8 xive_prio_to_guest(u8 prio) 217static inline u8 xive_prio_to_guest(u8 prio)
219{ 218{
220 if (prio == 0xff || prio < 7) 219 return prio;
221 return prio;
222 return 0xb;
223} 220}
224 221
225static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) 222static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 545a230f675f..748562ec9a04 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -763,7 +763,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
763 763
764 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 764 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
765 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 765 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
766 vcpu->arch.dec_expires = ~(u64)0; 766 vcpu->arch.dec_expires = get_tb();
767 767
768#ifdef CONFIG_KVM_EXIT_TIMING 768#ifdef CONFIG_KVM_EXIT_TIMING
769 mutex_init(&vcpu->arch.exit_timing_lock); 769 mutex_init(&vcpu->arch.exit_timing_lock);
@@ -1106,11 +1106,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
1106{ 1106{
1107 enum emulation_result emulated = EMULATE_DONE; 1107 enum emulation_result emulated = EMULATE_DONE;
1108 1108
1109 /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ 1109 /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
1110 if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || 1110 if (vcpu->arch.mmio_vsx_copy_nums > 4)
1111 (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
1112 return EMULATE_FAIL; 1111 return EMULATE_FAIL;
1113 }
1114 1112
1115 while (vcpu->arch.mmio_vsx_copy_nums) { 1113 while (vcpu->arch.mmio_vsx_copy_nums) {
1116 emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, 1114 emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
@@ -1252,11 +1250,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
1252 1250
1253 vcpu->arch.io_gpr = rs; 1251 vcpu->arch.io_gpr = rs;
1254 1252
1255 /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ 1253 /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
1256 if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || 1254 if (vcpu->arch.mmio_vsx_copy_nums > 4)
1257 (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
1258 return EMULATE_FAIL; 1255 return EMULATE_FAIL;
1259 }
1260 1256
1261 while (vcpu->arch.mmio_vsx_copy_nums) { 1257 while (vcpu->arch.mmio_vsx_copy_nums) {
1262 if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) 1258 if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index e44d2b2ea97e..1c03c978eb18 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
143 int i; 143 int i;
144 u64 min, max, sum, sum_quad; 144 u64 min, max, sum, sum_quad;
145 145
146 seq_printf(m, "%s", "type count min max sum sum_squared\n"); 146 seq_puts(m, "type count min max sum sum_squared\n");
147
148 147
149 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { 148 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
150 149
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a3b8d7d1316e..2547b6021e6a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
367 * EOI the source if it hasn't been disabled and hasn't 367 * EOI the source if it hasn't been disabled and hasn't
368 * been passed-through to a KVM guest 368 * been passed-through to a KVM guest
369 */ 369 */
370 if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) 370 if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
371 !(xd->flags & XIVE_IRQ_NO_EOI))
371 xive_do_source_eoi(irqd_to_hwirq(d), xd); 372 xive_do_source_eoi(irqd_to_hwirq(d), xd);
372 373
373 /* 374 /*
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ebc244b08d67..d22aeb0b69e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
42static u32 xive_queue_shift; 42static u32 xive_queue_shift;
43static u32 xive_pool_vps = XIVE_INVALID_VP; 43static u32 xive_pool_vps = XIVE_INVALID_VP;
44static struct kmem_cache *xive_provision_cache; 44static struct kmem_cache *xive_provision_cache;
45static bool xive_has_single_esc;
45 46
46int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) 47int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
47{ 48{
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
571 break; 572 break;
572 } 573 }
573 574
575 /* Do we support single escalation */
576 if (of_get_property(np, "single-escalation-support", NULL) != NULL)
577 xive_has_single_esc = true;
578
574 /* Configure Thread Management areas for KVM */ 579 /* Configure Thread Management areas for KVM */
575 for_each_possible_cpu(cpu) 580 for_each_possible_cpu(cpu)
576 kvmppc_set_xive_tima(cpu, r.start, tima); 581 kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
667} 672}
668EXPORT_SYMBOL_GPL(xive_native_free_vp_block); 673EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
669 674
670int xive_native_enable_vp(u32 vp_id) 675int xive_native_enable_vp(u32 vp_id, bool single_escalation)
671{ 676{
672 s64 rc; 677 s64 rc;
678 u64 flags = OPAL_XIVE_VP_ENABLED;
673 679
680 if (single_escalation)
681 flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
674 for (;;) { 682 for (;;) {
675 rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0); 683 rc = opal_xive_set_vp_info(vp_id, flags, 0);
676 if (rc != OPAL_BUSY) 684 if (rc != OPAL_BUSY)
677 break; 685 break;
678 msleep(1); 686 msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
710 return 0; 718 return 0;
711} 719}
712EXPORT_SYMBOL_GPL(xive_native_get_vp_info); 720EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
721
722bool xive_native_has_single_escalation(void)
723{
724 return xive_has_single_esc;
725}
726EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);