diff options
author | Radim Krčmář <rkrcmar@redhat.com> | 2018-02-01 10:13:07 -0500 |
---|---|---|
committer | Radim Krčmář <rkrcmar@redhat.com> | 2018-02-01 10:13:07 -0500 |
commit | d2b9b2079e23c1ab80ce1d7670d5e1994468a881 (patch) | |
tree | bd9bfb74343da003b7bac0569d0a7f8025cbaef4 | |
parent | 7bf14c28ee776be567855bd39ed8ff795ea19f55 (diff) | |
parent | 9b9b13a6d1537ddc4caccd6f1c41b78edbc08437 (diff) |
Merge tag 'kvm-ppc-next-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
PPC KVM update for 4.16
- Allow HPT guests to run on a radix host on POWER9 v2.2 CPUs
without requiring the complex thread synchronization that earlier
CPU versions required.
- A series from Ben Herrenschmidt to improve the handling of
escalation interrupts with the XIVE interrupt controller.
- Provide for the decrementer register to be copied across on
migration.
- Various minor cleanups and bugfixes.
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/hmi.h | 4 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 14 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 6 | ||||
-rw-r--r-- | arch/powerpc/include/asm/opal-api.h | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/reg.h | 5 | ||||
-rw-r--r-- | arch/powerpc/include/asm/xive-regs.h | 35 | ||||
-rw-r--r-- | arch/powerpc/include/asm/xive.h | 41 | ||||
-rw-r--r-- | arch/powerpc/include/uapi/asm/kvm.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/mce.c | 142 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 54 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_ras.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 237 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 109 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.h | 15 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 14 | ||||
-rw-r--r-- | arch/powerpc/kvm/timing.c | 3 | ||||
-rw-r--r-- | arch/powerpc/sysdev/xive/common.c | 3 | ||||
-rw-r--r-- | arch/powerpc/sysdev/xive/native.c | 18 |
21 files changed, 499 insertions, 219 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 70d3368adba9..792fa8717d13 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -1841,6 +1841,7 @@ registers, find a list below: | |||
1841 | PPC | KVM_REG_PPC_DBSR | 32 | 1841 | PPC | KVM_REG_PPC_DBSR | 32 |
1842 | PPC | KVM_REG_PPC_TIDR | 64 | 1842 | PPC | KVM_REG_PPC_TIDR | 64 |
1843 | PPC | KVM_REG_PPC_PSSCR | 64 | 1843 | PPC | KVM_REG_PPC_PSSCR | 64 |
1844 | PPC | KVM_REG_PPC_DEC_EXPIRY | 64 | ||
1844 | PPC | KVM_REG_PPC_TM_GPR0 | 64 | 1845 | PPC | KVM_REG_PPC_TM_GPR0 | 64 |
1845 | ... | 1846 | ... |
1846 | PPC | KVM_REG_PPC_TM_GPR31 | 64 | 1847 | PPC | KVM_REG_PPC_TM_GPR31 | 64 |
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 85b7a1a21e22..9c14f7b5c46c 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h | |||
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void); | |||
42 | static inline void wait_for_subcore_guest_exit(void) { } | 42 | static inline void wait_for_subcore_guest_exit(void) { } |
43 | static inline void wait_for_tb_resync(void) { } | 43 | static inline void wait_for_tb_resync(void) { } |
44 | #endif | 44 | #endif |
45 | |||
46 | struct pt_regs; | ||
47 | extern long hmi_handle_debugtrig(struct pt_regs *regs); | ||
48 | |||
45 | #endif /* __ASM_PPC64_HMI_H__ */ | 49 | #endif /* __ASM_PPC64_HMI_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 735cfa35298a..998f7b7aaa9e 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l) | |||
122 | lphi = (l >> 16) & 0xf; | 122 | lphi = (l >> 16) & 0xf; |
123 | switch ((l >> 12) & 0xf) { | 123 | switch ((l >> 12) & 0xf) { |
124 | case 0: | 124 | case 0: |
125 | return !lphi ? 24 : -1; /* 16MB */ | 125 | return !lphi ? 24 : 0; /* 16MB */ |
126 | break; | 126 | break; |
127 | case 1: | 127 | case 1: |
128 | return 16; /* 64kB */ | 128 | return 16; /* 64kB */ |
129 | break; | 129 | break; |
130 | case 3: | 130 | case 3: |
131 | return !lphi ? 34 : -1; /* 16GB */ | 131 | return !lphi ? 34 : 0; /* 16GB */ |
132 | break; | 132 | break; |
133 | case 7: | 133 | case 7: |
134 | return (16 << 8) + 12; /* 64kB in 4kB */ | 134 | return (16 << 8) + 12; /* 64kB in 4kB */ |
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l) | |||
140 | return (24 << 8) + 12; /* 16MB in 4kB */ | 140 | return (24 << 8) + 12; /* 16MB in 4kB */ |
141 | break; | 141 | break; |
142 | } | 142 | } |
143 | return -1; | 143 | return 0; |
144 | } | 144 | } |
145 | 145 | ||
146 | static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l) | 146 | static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l) |
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l | |||
159 | 159 | ||
160 | static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r) | 160 | static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r) |
161 | { | 161 | { |
162 | return 1ul << kvmppc_hpte_actual_page_shift(v, r); | 162 | int shift = kvmppc_hpte_actual_page_shift(v, r); |
163 | |||
164 | if (shift) | ||
165 | return 1ul << shift; | ||
166 | return 0; | ||
163 | } | 167 | } |
164 | 168 | ||
165 | static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift) | 169 | static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift) |
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | |||
232 | va_low ^= v >> (SID_SHIFT_1T - 16); | 236 | va_low ^= v >> (SID_SHIFT_1T - 16); |
233 | va_low &= 0x7ff; | 237 | va_low &= 0x7ff; |
234 | 238 | ||
235 | if (b_pgshift == 12) { | 239 | if (b_pgshift <= 12) { |
236 | if (a_pgshift > 12) { | 240 | if (a_pgshift > 12) { |
237 | sllp = (a_pgshift == 16) ? 5 : 4; | 241 | sllp = (a_pgshift == 16) ? 5 : 4; |
238 | rb |= sllp << 5; /* AP field */ | 242 | rb |= sllp << 5; /* AP field */ |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 3aa5b577cd60..fef8133becc8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -709,6 +709,7 @@ struct kvm_vcpu_arch { | |||
709 | u8 ceded; | 709 | u8 ceded; |
710 | u8 prodded; | 710 | u8 prodded; |
711 | u8 doorbell_request; | 711 | u8 doorbell_request; |
712 | u8 irq_pending; /* Used by XIVE to signal pending guest irqs */ | ||
712 | u32 last_inst; | 713 | u32 last_inst; |
713 | 714 | ||
714 | struct swait_queue_head *wqp; | 715 | struct swait_queue_head *wqp; |
@@ -738,8 +739,11 @@ struct kvm_vcpu_arch { | |||
738 | struct kvmppc_icp *icp; /* XICS presentation controller */ | 739 | struct kvmppc_icp *icp; /* XICS presentation controller */ |
739 | struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */ | 740 | struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */ |
740 | __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */ | 741 | __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */ |
741 | u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */ | 742 | u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */ |
743 | u8 xive_esc_on; /* Is the escalation irq enabled ? */ | ||
742 | union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */ | 744 | union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */ |
745 | u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */ | ||
746 | u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */ | ||
743 | #endif | 747 | #endif |
744 | 748 | ||
745 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 749 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 233c7504b1f2..fc926743647e 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h | |||
@@ -1073,6 +1073,7 @@ enum { | |||
1073 | /* Flags for OPAL_XIVE_GET/SET_VP_INFO */ | 1073 | /* Flags for OPAL_XIVE_GET/SET_VP_INFO */ |
1074 | enum { | 1074 | enum { |
1075 | OPAL_XIVE_VP_ENABLED = 0x00000001, | 1075 | OPAL_XIVE_VP_ENABLED = 0x00000001, |
1076 | OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002, | ||
1076 | }; | 1077 | }; |
1077 | 1078 | ||
1078 | /* "Any chip" replacement for chip ID for allocation functions */ | 1079 | /* "Any chip" replacement for chip ID for allocation functions */ |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..14e41b843952 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -432,8 +432,9 @@ | |||
432 | #define SPRN_LPID 0x13F /* Logical Partition Identifier */ | 432 | #define SPRN_LPID 0x13F /* Logical Partition Identifier */ |
433 | #endif | 433 | #endif |
434 | #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ | 434 | #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ |
435 | #define SPRN_HMER 0x150 /* Hardware m? error recovery */ | 435 | #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ |
436 | #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ | 436 | #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ |
437 | #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ | ||
437 | #define SPRN_PCR 0x152 /* Processor compatibility register */ | 438 | #define SPRN_PCR 0x152 /* Processor compatibility register */ |
438 | #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ | 439 | #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ |
439 | #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ | 440 | #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ |
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 1d3f2be5ae39..fa4288822b68 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h | |||
@@ -10,6 +10,41 @@ | |||
10 | #define _ASM_POWERPC_XIVE_REGS_H | 10 | #define _ASM_POWERPC_XIVE_REGS_H |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * "magic" Event State Buffer (ESB) MMIO offsets. | ||
14 | * | ||
15 | * Each interrupt source has a 2-bit state machine called ESB | ||
16 | * which can be controlled by MMIO. It's made of 2 bits, P and | ||
17 | * Q. P indicates that an interrupt is pending (has been sent | ||
18 | * to a queue and is waiting for an EOI). Q indicates that the | ||
19 | * interrupt has been triggered while pending. | ||
20 | * | ||
21 | * This acts as a coalescing mechanism in order to guarantee | ||
22 | * that a given interrupt only occurs at most once in a queue. | ||
23 | * | ||
24 | * When doing an EOI, the Q bit will indicate if the interrupt | ||
25 | * needs to be re-triggered. | ||
26 | * | ||
27 | * The following offsets into the ESB MMIO allow to read or | ||
28 | * manipulate the PQ bits. They must be used with an 8-bytes | ||
29 | * load instruction. They all return the previous state of the | ||
30 | * interrupt (atomically). | ||
31 | * | ||
32 | * Additionally, some ESB pages support doing an EOI via a | ||
33 | * store at 0 and some ESBs support doing a trigger via a | ||
34 | * separate trigger page. | ||
35 | */ | ||
36 | #define XIVE_ESB_STORE_EOI 0x400 /* Store */ | ||
37 | #define XIVE_ESB_LOAD_EOI 0x000 /* Load */ | ||
38 | #define XIVE_ESB_GET 0x800 /* Load */ | ||
39 | #define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ | ||
40 | #define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ | ||
41 | #define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ | ||
42 | #define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ | ||
43 | |||
44 | #define XIVE_ESB_VAL_P 0x2 | ||
45 | #define XIVE_ESB_VAL_Q 0x1 | ||
46 | |||
47 | /* | ||
13 | * Thread Management (aka "TM") registers | 48 | * Thread Management (aka "TM") registers |
14 | */ | 49 | */ |
15 | 50 | ||
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 371fbebf1ec9..e602903c3029 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h | |||
@@ -58,6 +58,9 @@ struct xive_irq_data { | |||
58 | #define XIVE_IRQ_FLAG_EOI_FW 0x10 | 58 | #define XIVE_IRQ_FLAG_EOI_FW 0x10 |
59 | #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 | 59 | #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 |
60 | 60 | ||
61 | /* Special flag set by KVM for excalation interrupts */ | ||
62 | #define XIVE_IRQ_NO_EOI 0x80 | ||
63 | |||
61 | #define XIVE_INVALID_CHIP_ID -1 | 64 | #define XIVE_INVALID_CHIP_ID -1 |
62 | 65 | ||
63 | /* A queue tracking structure in a CPU */ | 66 | /* A queue tracking structure in a CPU */ |
@@ -72,41 +75,6 @@ struct xive_q { | |||
72 | atomic_t pending_count; | 75 | atomic_t pending_count; |
73 | }; | 76 | }; |
74 | 77 | ||
75 | /* | ||
76 | * "magic" Event State Buffer (ESB) MMIO offsets. | ||
77 | * | ||
78 | * Each interrupt source has a 2-bit state machine called ESB | ||
79 | * which can be controlled by MMIO. It's made of 2 bits, P and | ||
80 | * Q. P indicates that an interrupt is pending (has been sent | ||
81 | * to a queue and is waiting for an EOI). Q indicates that the | ||
82 | * interrupt has been triggered while pending. | ||
83 | * | ||
84 | * This acts as a coalescing mechanism in order to guarantee | ||
85 | * that a given interrupt only occurs at most once in a queue. | ||
86 | * | ||
87 | * When doing an EOI, the Q bit will indicate if the interrupt | ||
88 | * needs to be re-triggered. | ||
89 | * | ||
90 | * The following offsets into the ESB MMIO allow to read or | ||
91 | * manipulate the PQ bits. They must be used with an 8-bytes | ||
92 | * load instruction. They all return the previous state of the | ||
93 | * interrupt (atomically). | ||
94 | * | ||
95 | * Additionally, some ESB pages support doing an EOI via a | ||
96 | * store at 0 and some ESBs support doing a trigger via a | ||
97 | * separate trigger page. | ||
98 | */ | ||
99 | #define XIVE_ESB_STORE_EOI 0x400 /* Store */ | ||
100 | #define XIVE_ESB_LOAD_EOI 0x000 /* Load */ | ||
101 | #define XIVE_ESB_GET 0x800 /* Load */ | ||
102 | #define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ | ||
103 | #define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ | ||
104 | #define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ | ||
105 | #define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ | ||
106 | |||
107 | #define XIVE_ESB_VAL_P 0x2 | ||
108 | #define XIVE_ESB_VAL_Q 0x1 | ||
109 | |||
110 | /* Global enable flags for the XIVE support */ | 78 | /* Global enable flags for the XIVE support */ |
111 | extern bool __xive_enabled; | 79 | extern bool __xive_enabled; |
112 | 80 | ||
@@ -143,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); | |||
143 | 111 | ||
144 | extern void xive_native_sync_source(u32 hw_irq); | 112 | extern void xive_native_sync_source(u32 hw_irq); |
145 | extern bool is_xive_irq(struct irq_chip *chip); | 113 | extern bool is_xive_irq(struct irq_chip *chip); |
146 | extern int xive_native_enable_vp(u32 vp_id); | 114 | extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); |
147 | extern int xive_native_disable_vp(u32 vp_id); | 115 | extern int xive_native_disable_vp(u32 vp_id); |
148 | extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); | 116 | extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); |
117 | extern bool xive_native_has_single_escalation(void); | ||
149 | 118 | ||
150 | #else | 119 | #else |
151 | 120 | ||
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 637b7263cb86..833ed9a16adf 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char { | |||
632 | #define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) | 632 | #define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) |
633 | #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) | 633 | #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) |
634 | 634 | ||
635 | #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) | ||
636 | |||
635 | /* Transactional Memory checkpointed state: | 637 | /* Transactional Memory checkpointed state: |
636 | * This is all GPRs, all VSX regs and a subset of SPRs | 638 | * This is all GPRs, all VSX regs and a subset of SPRs |
637 | */ | 639 | */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f390d57cf2e1..ff6ce2fd7579 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -519,6 +519,7 @@ int main(void) | |||
519 | OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); | 519 | OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); |
520 | OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); | 520 | OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); |
521 | OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); | 521 | OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); |
522 | OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); | ||
522 | OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); | 523 | OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); |
523 | OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); | 524 | OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); |
524 | OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); | 525 | OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); |
@@ -738,6 +739,9 @@ int main(void) | |||
738 | DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, | 739 | DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, |
739 | arch.xive_cam_word)); | 740 | arch.xive_cam_word)); |
740 | DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); | 741 | DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); |
742 | DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on)); | ||
743 | DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr)); | ||
744 | DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr)); | ||
741 | #endif | 745 | #endif |
742 | 746 | ||
743 | #ifdef CONFIG_KVM_EXIT_TIMING | 747 | #ifdef CONFIG_KVM_EXIT_TIMING |
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 742e4658c5dc..d2fecaec4fec 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c | |||
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs) | |||
495 | return handled; | 495 | return handled; |
496 | } | 496 | } |
497 | 497 | ||
498 | long hmi_exception_realmode(struct pt_regs *regs) | 498 | /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ |
499 | static enum { | ||
500 | DTRIG_UNKNOWN, | ||
501 | DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ | ||
502 | DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ | ||
503 | } hmer_debug_trig_function; | ||
504 | |||
505 | static int init_debug_trig_function(void) | ||
499 | { | 506 | { |
500 | __this_cpu_inc(irq_stat.hmi_exceptions); | 507 | int pvr; |
501 | 508 | struct device_node *cpun; | |
502 | #ifdef CONFIG_PPC_BOOK3S_64 | 509 | struct property *prop = NULL; |
503 | /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ | 510 | const char *str; |
504 | if (pvr_version_is(PVR_POWER9)) { | 511 | |
505 | unsigned long hmer = mfspr(SPRN_HMER); | 512 | /* First look in the device tree */ |
506 | 513 | preempt_disable(); | |
507 | /* Do we have the debug bit set */ | 514 | cpun = of_get_cpu_node(smp_processor_id(), NULL); |
508 | if (hmer & PPC_BIT(17)) { | 515 | if (cpun) { |
509 | hmer &= ~PPC_BIT(17); | 516 | of_property_for_each_string(cpun, "ibm,hmi-special-triggers", |
510 | mtspr(SPRN_HMER, hmer); | 517 | prop, str) { |
511 | 518 | if (strcmp(str, "bit17-vector-ci-load") == 0) | |
512 | /* | 519 | hmer_debug_trig_function = DTRIG_VECTOR_CI; |
513 | * Now to avoid problems with soft-disable we | 520 | else if (strcmp(str, "bit17-tm-suspend-escape") == 0) |
514 | * only do the emulation if we are coming from | 521 | hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; |
515 | * user space | ||
516 | */ | ||
517 | if (user_mode(regs)) | ||
518 | local_paca->hmi_p9_special_emu = 1; | ||
519 | |||
520 | /* | ||
521 | * Don't bother going to OPAL if that's the | ||
522 | * only relevant bit. | ||
523 | */ | ||
524 | if (!(hmer & mfspr(SPRN_HMEER))) | ||
525 | return local_paca->hmi_p9_special_emu; | ||
526 | } | 522 | } |
523 | of_node_put(cpun); | ||
524 | } | ||
525 | preempt_enable(); | ||
526 | |||
527 | /* If we found the property, don't look at PVR */ | ||
528 | if (prop) | ||
529 | goto out; | ||
530 | |||
531 | pvr = mfspr(SPRN_PVR); | ||
532 | /* Check for POWER9 Nimbus (scale-out) */ | ||
533 | if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { | ||
534 | /* DD2.2 and later */ | ||
535 | if ((pvr & 0xfff) >= 0x202) | ||
536 | hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; | ||
537 | /* DD2.0 and DD2.1 - used for vector CI load emulation */ | ||
538 | else if ((pvr & 0xfff) >= 0x200) | ||
539 | hmer_debug_trig_function = DTRIG_VECTOR_CI; | ||
540 | } | ||
541 | |||
542 | out: | ||
543 | switch (hmer_debug_trig_function) { | ||
544 | case DTRIG_VECTOR_CI: | ||
545 | pr_debug("HMI debug trigger used for vector CI load\n"); | ||
546 | break; | ||
547 | case DTRIG_SUSPEND_ESCAPE: | ||
548 | pr_debug("HMI debug trigger used for TM suspend escape\n"); | ||
549 | break; | ||
550 | default: | ||
551 | break; | ||
527 | } | 552 | } |
528 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 553 | return 0; |
554 | } | ||
555 | __initcall(init_debug_trig_function); | ||
556 | |||
557 | /* | ||
558 | * Handle HMIs that occur as a result of a debug trigger. | ||
559 | * Return values: | ||
560 | * -1 means this is not a HMI cause that we know about | ||
561 | * 0 means no further handling is required | ||
562 | * 1 means further handling is required | ||
563 | */ | ||
564 | long hmi_handle_debugtrig(struct pt_regs *regs) | ||
565 | { | ||
566 | unsigned long hmer = mfspr(SPRN_HMER); | ||
567 | long ret = 0; | ||
568 | |||
569 | /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ | ||
570 | if (!((hmer & HMER_DEBUG_TRIG) | ||
571 | && hmer_debug_trig_function != DTRIG_UNKNOWN)) | ||
572 | return -1; | ||
573 | |||
574 | hmer &= ~HMER_DEBUG_TRIG; | ||
575 | /* HMER is a write-AND register */ | ||
576 | mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); | ||
577 | |||
578 | switch (hmer_debug_trig_function) { | ||
579 | case DTRIG_VECTOR_CI: | ||
580 | /* | ||
581 | * Now to avoid problems with soft-disable we | ||
582 | * only do the emulation if we are coming from | ||
583 | * host user space | ||
584 | */ | ||
585 | if (regs && user_mode(regs)) | ||
586 | ret = local_paca->hmi_p9_special_emu = 1; | ||
587 | |||
588 | break; | ||
589 | |||
590 | default: | ||
591 | break; | ||
592 | } | ||
593 | |||
594 | /* | ||
595 | * See if any other HMI causes remain to be handled | ||
596 | */ | ||
597 | if (hmer & mfspr(SPRN_HMEER)) | ||
598 | return -1; | ||
599 | |||
600 | return ret; | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * Return values: | ||
605 | */ | ||
606 | long hmi_exception_realmode(struct pt_regs *regs) | ||
607 | { | ||
608 | int ret; | ||
609 | |||
610 | __this_cpu_inc(irq_stat.hmi_exceptions); | ||
611 | |||
612 | ret = hmi_handle_debugtrig(regs); | ||
613 | if (ret >= 0) | ||
614 | return ret; | ||
529 | 615 | ||
530 | wait_for_subcore_guest_exit(); | 616 | wait_for_subcore_guest_exit(); |
531 | 617 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 58618f644c56..0c854816e653 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c | |||
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, | |||
573 | j = i + 1; | 573 | j = i + 1; |
574 | if (npages) { | 574 | if (npages) { |
575 | set_dirty_bits(map, i, npages); | 575 | set_dirty_bits(map, i, npages); |
576 | i = j + npages; | 576 | j = i + npages; |
577 | } | 577 | } |
578 | } | 578 | } |
579 | return 0; | 579 | return 0; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2d46037ce936..e5f81fc108e0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -118,6 +118,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, | |||
118 | MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); | 118 | MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); |
119 | #endif | 119 | #endif |
120 | 120 | ||
121 | /* If set, the threads on each CPU core have to be in the same MMU mode */ | ||
122 | static bool no_mixing_hpt_and_radix; | ||
123 | |||
121 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | 124 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); |
122 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); | 125 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); |
123 | 126 | ||
@@ -1497,6 +1500,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, | |||
1497 | case KVM_REG_PPC_ARCH_COMPAT: | 1500 | case KVM_REG_PPC_ARCH_COMPAT: |
1498 | *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); | 1501 | *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); |
1499 | break; | 1502 | break; |
1503 | case KVM_REG_PPC_DEC_EXPIRY: | ||
1504 | *val = get_reg_val(id, vcpu->arch.dec_expires + | ||
1505 | vcpu->arch.vcore->tb_offset); | ||
1506 | break; | ||
1500 | default: | 1507 | default: |
1501 | r = -EINVAL; | 1508 | r = -EINVAL; |
1502 | break; | 1509 | break; |
@@ -1724,6 +1731,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, | |||
1724 | case KVM_REG_PPC_ARCH_COMPAT: | 1731 | case KVM_REG_PPC_ARCH_COMPAT: |
1725 | r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); | 1732 | r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); |
1726 | break; | 1733 | break; |
1734 | case KVM_REG_PPC_DEC_EXPIRY: | ||
1735 | vcpu->arch.dec_expires = set_reg_val(id, *val) - | ||
1736 | vcpu->arch.vcore->tb_offset; | ||
1737 | break; | ||
1727 | default: | 1738 | default: |
1728 | r = -EINVAL; | 1739 | r = -EINVAL; |
1729 | break; | 1740 | break; |
@@ -2378,8 +2389,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) | |||
2378 | static bool subcore_config_ok(int n_subcores, int n_threads) | 2389 | static bool subcore_config_ok(int n_subcores, int n_threads) |
2379 | { | 2390 | { |
2380 | /* | 2391 | /* |
2381 | * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core | 2392 | * POWER9 "SMT4" cores are permanently in what is effectively a 4-way |
2382 | * mode, with one thread per subcore. | 2393 | * split-core mode, with one thread per subcore. |
2383 | */ | 2394 | */ |
2384 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | 2395 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
2385 | return n_subcores <= 4 && n_threads == 1; | 2396 | return n_subcores <= 4 && n_threads == 1; |
@@ -2415,8 +2426,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) | |||
2415 | if (!cpu_has_feature(CPU_FTR_ARCH_207S)) | 2426 | if (!cpu_has_feature(CPU_FTR_ARCH_207S)) |
2416 | return false; | 2427 | return false; |
2417 | 2428 | ||
2418 | /* POWER9 currently requires all threads to be in the same MMU mode */ | 2429 | /* Some POWER9 chips require all threads to be in the same MMU mode */ |
2419 | if (cpu_has_feature(CPU_FTR_ARCH_300) && | 2430 | if (no_mixing_hpt_and_radix && |
2420 | kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) | 2431 | kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) |
2421 | return false; | 2432 | return false; |
2422 | 2433 | ||
@@ -2679,9 +2690,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2679 | * threads are offline. Also check if the number of threads in this | 2690 | * threads are offline. Also check if the number of threads in this |
2680 | * guest are greater than the current system threads per guest. | 2691 | * guest are greater than the current system threads per guest. |
2681 | * On POWER9, we need to be not in independent-threads mode if | 2692 | * On POWER9, we need to be not in independent-threads mode if |
2682 | * this is a HPT guest on a radix host. | 2693 | * this is a HPT guest on a radix host machine where the |
2694 | * CPU threads may not be in different MMU modes. | ||
2683 | */ | 2695 | */ |
2684 | hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm); | 2696 | hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() && |
2697 | !kvm_is_radix(vc->kvm); | ||
2685 | if (((controlled_threads > 1) && | 2698 | if (((controlled_threads > 1) && |
2686 | ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || | 2699 | ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || |
2687 | (hpt_on_radix && vc->kvm->arch.threads_indep)) { | 2700 | (hpt_on_radix && vc->kvm->arch.threads_indep)) { |
@@ -2831,7 +2844,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2831 | */ | 2844 | */ |
2832 | if (!thr0_done) | 2845 | if (!thr0_done) |
2833 | kvmppc_start_thread(NULL, pvc); | 2846 | kvmppc_start_thread(NULL, pvc); |
2834 | thr += pvc->num_threads; | ||
2835 | } | 2847 | } |
2836 | 2848 | ||
2837 | /* | 2849 | /* |
@@ -2987,7 +2999,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) | |||
2987 | { | 2999 | { |
2988 | if (!xive_enabled()) | 3000 | if (!xive_enabled()) |
2989 | return false; | 3001 | return false; |
2990 | return vcpu->arch.xive_saved_state.pipr < | 3002 | return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < |
2991 | vcpu->arch.xive_saved_state.cppr; | 3003 | vcpu->arch.xive_saved_state.cppr; |
2992 | } | 3004 | } |
2993 | #else | 3005 | #else |
@@ -3176,17 +3188,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3176 | * this thread straight away and have it join in. | 3188 | * this thread straight away and have it join in. |
3177 | */ | 3189 | */ |
3178 | if (!signal_pending(current)) { | 3190 | if (!signal_pending(current)) { |
3179 | if (vc->vcore_state == VCORE_PIGGYBACK) { | 3191 | if ((vc->vcore_state == VCORE_PIGGYBACK || |
3180 | if (spin_trylock(&vc->lock)) { | 3192 | vc->vcore_state == VCORE_RUNNING) && |
3181 | if (vc->vcore_state == VCORE_RUNNING && | ||
3182 | !VCORE_IS_EXITING(vc)) { | ||
3183 | kvmppc_create_dtl_entry(vcpu, vc); | ||
3184 | kvmppc_start_thread(vcpu, vc); | ||
3185 | trace_kvm_guest_enter(vcpu); | ||
3186 | } | ||
3187 | spin_unlock(&vc->lock); | ||
3188 | } | ||
3189 | } else if (vc->vcore_state == VCORE_RUNNING && | ||
3190 | !VCORE_IS_EXITING(vc)) { | 3193 | !VCORE_IS_EXITING(vc)) { |
3191 | kvmppc_create_dtl_entry(vcpu, vc); | 3194 | kvmppc_create_dtl_entry(vcpu, vc); |
3192 | kvmppc_start_thread(vcpu, vc); | 3195 | kvmppc_start_thread(vcpu, vc); |
@@ -4448,6 +4451,19 @@ static int kvmppc_book3s_init_hv(void) | |||
4448 | 4451 | ||
4449 | if (kvmppc_radix_possible()) | 4452 | if (kvmppc_radix_possible()) |
4450 | r = kvmppc_radix_init(); | 4453 | r = kvmppc_radix_init(); |
4454 | |||
4455 | /* | ||
4456 | * POWER9 chips before version 2.02 can't have some threads in | ||
4457 | * HPT mode and some in radix mode on the same core. | ||
4458 | */ | ||
4459 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { | ||
4460 | unsigned int pvr = mfspr(SPRN_PVR); | ||
4461 | if ((pvr >> 16) == PVR_POWER9 && | ||
4462 | (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) || | ||
4463 | ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101))) | ||
4464 | no_mixing_hpt_and_radix = true; | ||
4465 | } | ||
4466 | |||
4451 | return r; | 4467 | return r; |
4452 | } | 4468 | } |
4453 | 4469 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index c356f9a40b24..c296343d0dcc 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c | |||
@@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void) | |||
268 | * secondary threads to proceed. | 268 | * secondary threads to proceed. |
269 | * - All secondary threads will eventually call opal hmi handler on | 269 | * - All secondary threads will eventually call opal hmi handler on |
270 | * their exit path. | 270 | * their exit path. |
271 | * | ||
272 | * Returns 1 if the timebase offset should be applied, 0 if not. | ||
271 | */ | 273 | */ |
272 | 274 | ||
273 | long kvmppc_realmode_hmi_handler(void) | 275 | long kvmppc_realmode_hmi_handler(void) |
274 | { | 276 | { |
275 | int ptid = local_paca->kvm_hstate.ptid; | ||
276 | bool resync_req; | 277 | bool resync_req; |
277 | 278 | ||
278 | /* This is only called on primary thread. */ | ||
279 | BUG_ON(ptid != 0); | ||
280 | __this_cpu_inc(irq_stat.hmi_exceptions); | 279 | __this_cpu_inc(irq_stat.hmi_exceptions); |
281 | 280 | ||
281 | if (hmi_handle_debugtrig(NULL) >= 0) | ||
282 | return 1; | ||
283 | |||
282 | /* | 284 | /* |
283 | * By now primary thread has already completed guest->host | 285 | * By now primary thread has already completed guest->host |
284 | * partition switch but haven't signaled secondaries yet. | 286 | * partition switch but haven't signaled secondaries yet. |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9c61f736c75b..b64f10a5f5e7 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -617,13 +617,6 @@ kvmppc_hv_entry: | |||
617 | lbz r0, KVM_RADIX(r9) | 617 | lbz r0, KVM_RADIX(r9) |
618 | cmpwi cr7, r0, 0 | 618 | cmpwi cr7, r0, 0 |
619 | 619 | ||
620 | /* Clear out SLB if hash */ | ||
621 | bne cr7, 2f | ||
622 | li r6,0 | ||
623 | slbmte r6,r6 | ||
624 | slbia | ||
625 | ptesync | ||
626 | 2: | ||
627 | /* | 620 | /* |
628 | * POWER7/POWER8 host -> guest partition switch code. | 621 | * POWER7/POWER8 host -> guest partition switch code. |
629 | * We don't have to lock against concurrent tlbies, | 622 | * We don't have to lock against concurrent tlbies, |
@@ -738,19 +731,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
738 | 10: cmpdi r4, 0 | 731 | 10: cmpdi r4, 0 |
739 | beq kvmppc_primary_no_guest | 732 | beq kvmppc_primary_no_guest |
740 | kvmppc_got_guest: | 733 | kvmppc_got_guest: |
741 | |||
742 | /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ | ||
743 | lwz r5,VCPU_SLB_MAX(r4) | ||
744 | cmpwi r5,0 | ||
745 | beq 9f | ||
746 | mtctr r5 | ||
747 | addi r6,r4,VCPU_SLB | ||
748 | 1: ld r8,VCPU_SLB_E(r6) | ||
749 | ld r9,VCPU_SLB_V(r6) | ||
750 | slbmte r9,r8 | ||
751 | addi r6,r6,VCPU_SLB_SIZE | ||
752 | bdnz 1b | ||
753 | 9: | ||
754 | /* Increment yield count if they have a VPA */ | 734 | /* Increment yield count if they have a VPA */ |
755 | ld r3, VCPU_VPA(r4) | 735 | ld r3, VCPU_VPA(r4) |
756 | cmpdi r3, 0 | 736 | cmpdi r3, 0 |
@@ -957,7 +937,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
957 | mftb r7 | 937 | mftb r7 |
958 | subf r3,r7,r8 | 938 | subf r3,r7,r8 |
959 | mtspr SPRN_DEC,r3 | 939 | mtspr SPRN_DEC,r3 |
960 | std r3,VCPU_DEC(r4) | ||
961 | 940 | ||
962 | ld r5, VCPU_SPRG0(r4) | 941 | ld r5, VCPU_SPRG0(r4) |
963 | ld r6, VCPU_SPRG1(r4) | 942 | ld r6, VCPU_SPRG1(r4) |
@@ -1018,6 +997,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
1018 | cmpdi r3, 512 /* 1 microsecond */ | 997 | cmpdi r3, 512 /* 1 microsecond */ |
1019 | blt hdec_soon | 998 | blt hdec_soon |
1020 | 999 | ||
1000 | /* For hash guest, clear out and reload the SLB */ | ||
1001 | ld r6, VCPU_KVM(r4) | ||
1002 | lbz r0, KVM_RADIX(r6) | ||
1003 | cmpwi r0, 0 | ||
1004 | bne 9f | ||
1005 | li r6, 0 | ||
1006 | slbmte r6, r6 | ||
1007 | slbia | ||
1008 | ptesync | ||
1009 | |||
1010 | /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ | ||
1011 | lwz r5,VCPU_SLB_MAX(r4) | ||
1012 | cmpwi r5,0 | ||
1013 | beq 9f | ||
1014 | mtctr r5 | ||
1015 | addi r6,r4,VCPU_SLB | ||
1016 | 1: ld r8,VCPU_SLB_E(r6) | ||
1017 | ld r9,VCPU_SLB_V(r6) | ||
1018 | slbmte r9,r8 | ||
1019 | addi r6,r6,VCPU_SLB_SIZE | ||
1020 | bdnz 1b | ||
1021 | 9: | ||
1022 | |||
1021 | #ifdef CONFIG_KVM_XICS | 1023 | #ifdef CONFIG_KVM_XICS |
1022 | /* We are entering the guest on that thread, push VCPU to XIVE */ | 1024 | /* We are entering the guest on that thread, push VCPU to XIVE */ |
1023 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) | 1025 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) |
@@ -1031,8 +1033,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
1031 | li r9, TM_QW1_OS + TM_WORD2 | 1033 | li r9, TM_QW1_OS + TM_WORD2 |
1032 | stwcix r11,r9,r10 | 1034 | stwcix r11,r9,r10 |
1033 | li r9, 1 | 1035 | li r9, 1 |
1034 | stw r9, VCPU_XIVE_PUSHED(r4) | 1036 | stb r9, VCPU_XIVE_PUSHED(r4) |
1035 | eieio | 1037 | eieio |
1038 | |||
1039 | /* | ||
1040 | * We clear the irq_pending flag. There is a small chance of a | ||
1041 | * race vs. the escalation interrupt happening on another | ||
1042 | * processor setting it again, but the only consequence is to | ||
1043 | * cause a spurrious wakeup on the next H_CEDE which is not an | ||
1044 | * issue. | ||
1045 | */ | ||
1046 | li r0,0 | ||
1047 | stb r0, VCPU_IRQ_PENDING(r4) | ||
1048 | |||
1049 | /* | ||
1050 | * In single escalation mode, if the escalation interrupt is | ||
1051 | * on, we mask it. | ||
1052 | */ | ||
1053 | lbz r0, VCPU_XIVE_ESC_ON(r4) | ||
1054 | cmpwi r0,0 | ||
1055 | beq 1f | ||
1056 | ld r10, VCPU_XIVE_ESC_RADDR(r4) | ||
1057 | li r9, XIVE_ESB_SET_PQ_01 | ||
1058 | ldcix r0, r10, r9 | ||
1059 | sync | ||
1060 | |||
1061 | /* We have a possible subtle race here: The escalation interrupt might | ||
1062 | * have fired and be on its way to the host queue while we mask it, | ||
1063 | * and if we unmask it early enough (re-cede right away), there is | ||
1064 | * a theorical possibility that it fires again, thus landing in the | ||
1065 | * target queue more than once which is a big no-no. | ||
1066 | * | ||
1067 | * Fortunately, solving this is rather easy. If the above load setting | ||
1068 | * PQ to 01 returns a previous value where P is set, then we know the | ||
1069 | * escalation interrupt is somewhere on its way to the host. In that | ||
1070 | * case we simply don't clear the xive_esc_on flag below. It will be | ||
1071 | * eventually cleared by the handler for the escalation interrupt. | ||
1072 | * | ||
1073 | * Then, when doing a cede, we check that flag again before re-enabling | ||
1074 | * the escalation interrupt, and if set, we abort the cede. | ||
1075 | */ | ||
1076 | andi. r0, r0, XIVE_ESB_VAL_P | ||
1077 | bne- 1f | ||
1078 | |||
1079 | /* Now P is 0, we can clear the flag */ | ||
1080 | li r0, 0 | ||
1081 | stb r0, VCPU_XIVE_ESC_ON(r4) | ||
1082 | 1: | ||
1036 | no_xive: | 1083 | no_xive: |
1037 | #endif /* CONFIG_KVM_XICS */ | 1084 | #endif /* CONFIG_KVM_XICS */ |
1038 | 1085 | ||
@@ -1193,7 +1240,7 @@ hdec_soon: | |||
1193 | addi r3, r4, VCPU_TB_RMEXIT | 1240 | addi r3, r4, VCPU_TB_RMEXIT |
1194 | bl kvmhv_accumulate_time | 1241 | bl kvmhv_accumulate_time |
1195 | #endif | 1242 | #endif |
1196 | b guest_exit_cont | 1243 | b guest_bypass |
1197 | 1244 | ||
1198 | /****************************************************************************** | 1245 | /****************************************************************************** |
1199 | * * | 1246 | * * |
@@ -1423,15 +1470,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
1423 | blt deliver_guest_interrupt | 1470 | blt deliver_guest_interrupt |
1424 | 1471 | ||
1425 | guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | 1472 | guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ |
1473 | /* Save more register state */ | ||
1474 | mfdar r6 | ||
1475 | mfdsisr r7 | ||
1476 | std r6, VCPU_DAR(r9) | ||
1477 | stw r7, VCPU_DSISR(r9) | ||
1478 | /* don't overwrite fault_dar/fault_dsisr if HDSI */ | ||
1479 | cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | ||
1480 | beq mc_cont | ||
1481 | std r6, VCPU_FAULT_DAR(r9) | ||
1482 | stw r7, VCPU_FAULT_DSISR(r9) | ||
1483 | |||
1484 | /* See if it is a machine check */ | ||
1485 | cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK | ||
1486 | beq machine_check_realmode | ||
1487 | mc_cont: | ||
1488 | #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING | ||
1489 | addi r3, r9, VCPU_TB_RMEXIT | ||
1490 | mr r4, r9 | ||
1491 | bl kvmhv_accumulate_time | ||
1492 | #endif | ||
1426 | #ifdef CONFIG_KVM_XICS | 1493 | #ifdef CONFIG_KVM_XICS |
1427 | /* We are exiting, pull the VP from the XIVE */ | 1494 | /* We are exiting, pull the VP from the XIVE */ |
1428 | lwz r0, VCPU_XIVE_PUSHED(r9) | 1495 | lbz r0, VCPU_XIVE_PUSHED(r9) |
1429 | cmpwi cr0, r0, 0 | 1496 | cmpwi cr0, r0, 0 |
1430 | beq 1f | 1497 | beq 1f |
1431 | li r7, TM_SPC_PULL_OS_CTX | 1498 | li r7, TM_SPC_PULL_OS_CTX |
1432 | li r6, TM_QW1_OS | 1499 | li r6, TM_QW1_OS |
1433 | mfmsr r0 | 1500 | mfmsr r0 |
1434 | andi. r0, r0, MSR_IR /* in real mode? */ | 1501 | andi. r0, r0, MSR_DR /* in real mode? */ |
1435 | beq 2f | 1502 | beq 2f |
1436 | ld r10, HSTATE_XIVE_TIMA_VIRT(r13) | 1503 | ld r10, HSTATE_XIVE_TIMA_VIRT(r13) |
1437 | cmpldi cr0, r10, 0 | 1504 | cmpldi cr0, r10, 0 |
@@ -1454,33 +1521,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | |||
1454 | /* Fixup some of the state for the next load */ | 1521 | /* Fixup some of the state for the next load */ |
1455 | li r10, 0 | 1522 | li r10, 0 |
1456 | li r0, 0xff | 1523 | li r0, 0xff |
1457 | stw r10, VCPU_XIVE_PUSHED(r9) | 1524 | stb r10, VCPU_XIVE_PUSHED(r9) |
1458 | stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) | 1525 | stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) |
1459 | stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) | 1526 | stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) |
1460 | eieio | 1527 | eieio |
1461 | 1: | 1528 | 1: |
1462 | #endif /* CONFIG_KVM_XICS */ | 1529 | #endif /* CONFIG_KVM_XICS */ |
1463 | /* Save more register state */ | ||
1464 | mfdar r6 | ||
1465 | mfdsisr r7 | ||
1466 | std r6, VCPU_DAR(r9) | ||
1467 | stw r7, VCPU_DSISR(r9) | ||
1468 | /* don't overwrite fault_dar/fault_dsisr if HDSI */ | ||
1469 | cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | ||
1470 | beq mc_cont | ||
1471 | std r6, VCPU_FAULT_DAR(r9) | ||
1472 | stw r7, VCPU_FAULT_DSISR(r9) | ||
1473 | 1530 | ||
1474 | /* See if it is a machine check */ | 1531 | /* For hash guest, read the guest SLB and save it away */ |
1475 | cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK | 1532 | ld r5, VCPU_KVM(r9) |
1476 | beq machine_check_realmode | 1533 | lbz r0, KVM_RADIX(r5) |
1477 | mc_cont: | 1534 | li r5, 0 |
1478 | #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING | 1535 | cmpwi r0, 0 |
1479 | addi r3, r9, VCPU_TB_RMEXIT | 1536 | bne 3f /* for radix, save 0 entries */ |
1480 | mr r4, r9 | 1537 | lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ |
1481 | bl kvmhv_accumulate_time | 1538 | mtctr r0 |
1482 | #endif | 1539 | li r6,0 |
1540 | addi r7,r9,VCPU_SLB | ||
1541 | 1: slbmfee r8,r6 | ||
1542 | andis. r0,r8,SLB_ESID_V@h | ||
1543 | beq 2f | ||
1544 | add r8,r8,r6 /* put index in */ | ||
1545 | slbmfev r3,r6 | ||
1546 | std r8,VCPU_SLB_E(r7) | ||
1547 | std r3,VCPU_SLB_V(r7) | ||
1548 | addi r7,r7,VCPU_SLB_SIZE | ||
1549 | addi r5,r5,1 | ||
1550 | 2: addi r6,r6,1 | ||
1551 | bdnz 1b | ||
1552 | /* Finally clear out the SLB */ | ||
1553 | li r0,0 | ||
1554 | slbmte r0,r0 | ||
1555 | slbia | ||
1556 | ptesync | ||
1557 | 3: stw r5,VCPU_SLB_MAX(r9) | ||
1483 | 1558 | ||
1559 | guest_bypass: | ||
1484 | mr r3, r12 | 1560 | mr r3, r12 |
1485 | /* Increment exit count, poke other threads to exit */ | 1561 | /* Increment exit count, poke other threads to exit */ |
1486 | bl kvmhv_commence_exit | 1562 | bl kvmhv_commence_exit |
@@ -1501,31 +1577,6 @@ mc_cont: | |||
1501 | ori r6,r6,1 | 1577 | ori r6,r6,1 |
1502 | mtspr SPRN_CTRLT,r6 | 1578 | mtspr SPRN_CTRLT,r6 |
1503 | 4: | 1579 | 4: |
1504 | /* Check if we are running hash or radix and store it in cr2 */ | ||
1505 | ld r5, VCPU_KVM(r9) | ||
1506 | lbz r0, KVM_RADIX(r5) | ||
1507 | cmpwi cr2,r0,0 | ||
1508 | |||
1509 | /* Read the guest SLB and save it away */ | ||
1510 | li r5, 0 | ||
1511 | bne cr2, 3f /* for radix, save 0 entries */ | ||
1512 | lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ | ||
1513 | mtctr r0 | ||
1514 | li r6,0 | ||
1515 | addi r7,r9,VCPU_SLB | ||
1516 | 1: slbmfee r8,r6 | ||
1517 | andis. r0,r8,SLB_ESID_V@h | ||
1518 | beq 2f | ||
1519 | add r8,r8,r6 /* put index in */ | ||
1520 | slbmfev r3,r6 | ||
1521 | std r8,VCPU_SLB_E(r7) | ||
1522 | std r3,VCPU_SLB_V(r7) | ||
1523 | addi r7,r7,VCPU_SLB_SIZE | ||
1524 | addi r5,r5,1 | ||
1525 | 2: addi r6,r6,1 | ||
1526 | bdnz 1b | ||
1527 | 3: stw r5,VCPU_SLB_MAX(r9) | ||
1528 | |||
1529 | /* | 1580 | /* |
1530 | * Save the guest PURR/SPURR | 1581 | * Save the guest PURR/SPURR |
1531 | */ | 1582 | */ |
@@ -1803,7 +1854,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
1803 | ld r5, VCPU_KVM(r9) | 1854 | ld r5, VCPU_KVM(r9) |
1804 | lbz r0, KVM_RADIX(r5) | 1855 | lbz r0, KVM_RADIX(r5) |
1805 | cmpwi cr2, r0, 0 | 1856 | cmpwi cr2, r0, 0 |
1806 | beq cr2, 3f | 1857 | beq cr2, 4f |
1807 | 1858 | ||
1808 | /* Radix: Handle the case where the guest used an illegal PID */ | 1859 | /* Radix: Handle the case where the guest used an illegal PID */ |
1809 | LOAD_REG_ADDR(r4, mmu_base_pid) | 1860 | LOAD_REG_ADDR(r4, mmu_base_pid) |
@@ -1839,15 +1890,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
1839 | BEGIN_FTR_SECTION | 1890 | BEGIN_FTR_SECTION |
1840 | PPC_INVALIDATE_ERAT | 1891 | PPC_INVALIDATE_ERAT |
1841 | END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) | 1892 | END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) |
1842 | b 4f | 1893 | 4: |
1843 | #endif /* CONFIG_PPC_RADIX_MMU */ | 1894 | #endif /* CONFIG_PPC_RADIX_MMU */ |
1844 | 1895 | ||
1845 | /* Hash: clear out SLB */ | ||
1846 | 3: li r5,0 | ||
1847 | slbmte r5,r5 | ||
1848 | slbia | ||
1849 | ptesync | ||
1850 | 4: | ||
1851 | /* | 1896 | /* |
1852 | * POWER7/POWER8 guest -> host partition switch code. | 1897 | * POWER7/POWER8 guest -> host partition switch code. |
1853 | * We don't have to lock against tlbies but we do | 1898 | * We don't have to lock against tlbies but we do |
@@ -1908,16 +1953,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
1908 | bne 27f | 1953 | bne 27f |
1909 | bl kvmppc_realmode_hmi_handler | 1954 | bl kvmppc_realmode_hmi_handler |
1910 | nop | 1955 | nop |
1956 | cmpdi r3, 0 | ||
1911 | li r12, BOOK3S_INTERRUPT_HMI | 1957 | li r12, BOOK3S_INTERRUPT_HMI |
1912 | /* | 1958 | /* |
1913 | * At this point kvmppc_realmode_hmi_handler would have resync-ed | 1959 | * At this point kvmppc_realmode_hmi_handler may have resync-ed |
1914 | * the TB. Hence it is not required to subtract guest timebase | 1960 | * the TB, and if it has, we must not subtract the guest timebase |
1915 | * offset from timebase. So, skip it. | 1961 | * offset from the timebase. So, skip it. |
1916 | * | 1962 | * |
1917 | * Also, do not call kvmppc_subcore_exit_guest() because it has | 1963 | * Also, do not call kvmppc_subcore_exit_guest() because it has |
1918 | * been invoked as part of kvmppc_realmode_hmi_handler(). | 1964 | * been invoked as part of kvmppc_realmode_hmi_handler(). |
1919 | */ | 1965 | */ |
1920 | b 30f | 1966 | beq 30f |
1921 | 1967 | ||
1922 | 27: | 1968 | 27: |
1923 | /* Subtract timebase offset from timebase */ | 1969 | /* Subtract timebase offset from timebase */ |
@@ -2744,7 +2790,32 @@ kvm_cede_prodded: | |||
2744 | /* we've ceded but we want to give control to the host */ | 2790 | /* we've ceded but we want to give control to the host */ |
2745 | kvm_cede_exit: | 2791 | kvm_cede_exit: |
2746 | ld r9, HSTATE_KVM_VCPU(r13) | 2792 | ld r9, HSTATE_KVM_VCPU(r13) |
2747 | b guest_exit_cont | 2793 | #ifdef CONFIG_KVM_XICS |
2794 | /* Abort if we still have a pending escalation */ | ||
2795 | lbz r5, VCPU_XIVE_ESC_ON(r9) | ||
2796 | cmpwi r5, 0 | ||
2797 | beq 1f | ||
2798 | li r0, 0 | ||
2799 | stb r0, VCPU_CEDED(r9) | ||
2800 | 1: /* Enable XIVE escalation */ | ||
2801 | li r5, XIVE_ESB_SET_PQ_00 | ||
2802 | mfmsr r0 | ||
2803 | andi. r0, r0, MSR_DR /* in real mode? */ | ||
2804 | beq 1f | ||
2805 | ld r10, VCPU_XIVE_ESC_VADDR(r9) | ||
2806 | cmpdi r10, 0 | ||
2807 | beq 3f | ||
2808 | ldx r0, r10, r5 | ||
2809 | b 2f | ||
2810 | 1: ld r10, VCPU_XIVE_ESC_RADDR(r9) | ||
2811 | cmpdi r10, 0 | ||
2812 | beq 3f | ||
2813 | ldcix r0, r10, r5 | ||
2814 | 2: sync | ||
2815 | li r0, 1 | ||
2816 | stb r0, VCPU_XIVE_ESC_ON(r9) | ||
2817 | #endif /* CONFIG_KVM_XICS */ | ||
2818 | 3: b guest_exit_cont | ||
2748 | 2819 | ||
2749 | /* Try to handle a machine check in real mode */ | 2820 | /* Try to handle a machine check in real mode */ |
2750 | machine_check_realmode: | 2821 | machine_check_realmode: |
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 0d750d274c4e..badfdbb857a2 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c | |||
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data) | |||
84 | { | 84 | { |
85 | struct kvm_vcpu *vcpu = data; | 85 | struct kvm_vcpu *vcpu = data; |
86 | 86 | ||
87 | /* We use the existing H_PROD mechanism to wake up the target */ | 87 | vcpu->arch.irq_pending = 1; |
88 | vcpu->arch.prodded = 1; | ||
89 | smp_mb(); | 88 | smp_mb(); |
90 | if (vcpu->arch.ceded) | 89 | if (vcpu->arch.ceded) |
91 | kvmppc_fast_vcpu_kick(vcpu); | 90 | kvmppc_fast_vcpu_kick(vcpu); |
92 | 91 | ||
92 | /* Since we have the no-EOI flag, the interrupt is effectively | ||
93 | * disabled now. Clearing xive_esc_on means we won't bother | ||
94 | * doing so on the next entry. | ||
95 | * | ||
96 | * This also allows the entry code to know that if a PQ combination | ||
97 | * of 10 is observed while xive_esc_on is true, it means the queue | ||
98 | * contains an unprocessed escalation interrupt. We don't make use of | ||
99 | * that knowledge today but might (see comment in book3s_hv_rmhandler.S) | ||
100 | */ | ||
101 | vcpu->arch.xive_esc_on = false; | ||
102 | |||
93 | return IRQ_HANDLED; | 103 | return IRQ_HANDLED; |
94 | } | 104 | } |
95 | 105 | ||
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | |||
112 | return -EIO; | 122 | return -EIO; |
113 | } | 123 | } |
114 | 124 | ||
115 | /* | 125 | if (xc->xive->single_escalation) |
116 | * Future improvement: start with them disabled | 126 | name = kasprintf(GFP_KERNEL, "kvm-%d-%d", |
117 | * and handle DD2 and later scheme of merged escalation | 127 | vcpu->kvm->arch.lpid, xc->server_num); |
118 | * interrupts | 128 | else |
119 | */ | 129 | name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", |
120 | name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", | 130 | vcpu->kvm->arch.lpid, xc->server_num, prio); |
121 | vcpu->kvm->arch.lpid, xc->server_num, prio); | ||
122 | if (!name) { | 131 | if (!name) { |
123 | pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", | 132 | pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", |
124 | prio, xc->server_num); | 133 | prio, xc->server_num); |
125 | rc = -ENOMEM; | 134 | rc = -ENOMEM; |
126 | goto error; | 135 | goto error; |
127 | } | 136 | } |
137 | |||
138 | pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio); | ||
139 | |||
128 | rc = request_irq(xc->esc_virq[prio], xive_esc_irq, | 140 | rc = request_irq(xc->esc_virq[prio], xive_esc_irq, |
129 | IRQF_NO_THREAD, name, vcpu); | 141 | IRQF_NO_THREAD, name, vcpu); |
130 | if (rc) { | 142 | if (rc) { |
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | |||
133 | goto error; | 145 | goto error; |
134 | } | 146 | } |
135 | xc->esc_virq_names[prio] = name; | 147 | xc->esc_virq_names[prio] = name; |
148 | |||
149 | /* In single escalation mode, we grab the ESB MMIO of the | ||
150 | * interrupt and mask it. Also populate the VCPU v/raddr | ||
151 | * of the ESB page for use by asm entry/exit code. Finally | ||
152 | * set the XIVE_IRQ_NO_EOI flag which will prevent the | ||
153 | * core code from performing an EOI on the escalation | ||
154 | * interrupt, thus leaving it effectively masked after | ||
155 | * it fires once. | ||
156 | */ | ||
157 | if (xc->xive->single_escalation) { | ||
158 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); | ||
159 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); | ||
160 | |||
161 | xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); | ||
162 | vcpu->arch.xive_esc_raddr = xd->eoi_page; | ||
163 | vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; | ||
164 | xd->flags |= XIVE_IRQ_NO_EOI; | ||
165 | } | ||
166 | |||
136 | return 0; | 167 | return 0; |
137 | error: | 168 | error: |
138 | irq_dispose_mapping(xc->esc_virq[prio]); | 169 | irq_dispose_mapping(xc->esc_virq[prio]); |
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) | |||
191 | 222 | ||
192 | pr_devel("Provisioning prio... %d\n", prio); | 223 | pr_devel("Provisioning prio... %d\n", prio); |
193 | 224 | ||
194 | /* Provision each VCPU and enable escalations */ | 225 | /* Provision each VCPU and enable escalations if needed */ |
195 | kvm_for_each_vcpu(i, vcpu, kvm) { | 226 | kvm_for_each_vcpu(i, vcpu, kvm) { |
196 | if (!vcpu->arch.xive_vcpu) | 227 | if (!vcpu->arch.xive_vcpu) |
197 | continue; | 228 | continue; |
198 | rc = xive_provision_queue(vcpu, prio); | 229 | rc = xive_provision_queue(vcpu, prio); |
199 | if (rc == 0) | 230 | if (rc == 0 && !xive->single_escalation) |
200 | xive_attach_escalation(vcpu, prio); | 231 | xive_attach_escalation(vcpu, prio); |
201 | if (rc) | 232 | if (rc) |
202 | return rc; | 233 | return rc; |
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
1082 | /* Allocate IPI */ | 1113 | /* Allocate IPI */ |
1083 | xc->vp_ipi = xive_native_alloc_irq(); | 1114 | xc->vp_ipi = xive_native_alloc_irq(); |
1084 | if (!xc->vp_ipi) { | 1115 | if (!xc->vp_ipi) { |
1116 | pr_err("Failed to allocate xive irq for VCPU IPI\n"); | ||
1085 | r = -EIO; | 1117 | r = -EIO; |
1086 | goto bail; | 1118 | goto bail; |
1087 | } | 1119 | } |
@@ -1092,18 +1124,33 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
1092 | goto bail; | 1124 | goto bail; |
1093 | 1125 | ||
1094 | /* | 1126 | /* |
1127 | * Enable the VP first as the single escalation mode will | ||
1128 | * affect escalation interrupts numbering | ||
1129 | */ | ||
1130 | r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); | ||
1131 | if (r) { | ||
1132 | pr_err("Failed to enable VP in OPAL, err %d\n", r); | ||
1133 | goto bail; | ||
1134 | } | ||
1135 | |||
1136 | /* | ||
1095 | * Initialize queues. Initially we set them all for no queueing | 1137 | * Initialize queues. Initially we set them all for no queueing |
1096 | * and we enable escalation for queue 0 only which we'll use for | 1138 | * and we enable escalation for queue 0 only which we'll use for |
1097 | * our mfrr change notifications. If the VCPU is hot-plugged, we | 1139 | * our mfrr change notifications. If the VCPU is hot-plugged, we |
1098 | * do handle provisioning however. | 1140 | * do handle provisioning however based on the existing "map" |
1141 | * of enabled queues. | ||
1099 | */ | 1142 | */ |
1100 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | 1143 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { |
1101 | struct xive_q *q = &xc->queues[i]; | 1144 | struct xive_q *q = &xc->queues[i]; |
1102 | 1145 | ||
1146 | /* Single escalation, no queue 7 */ | ||
1147 | if (i == 7 && xive->single_escalation) | ||
1148 | break; | ||
1149 | |||
1103 | /* Is queue already enabled ? Provision it */ | 1150 | /* Is queue already enabled ? Provision it */ |
1104 | if (xive->qmap & (1 << i)) { | 1151 | if (xive->qmap & (1 << i)) { |
1105 | r = xive_provision_queue(vcpu, i); | 1152 | r = xive_provision_queue(vcpu, i); |
1106 | if (r == 0) | 1153 | if (r == 0 && !xive->single_escalation) |
1107 | xive_attach_escalation(vcpu, i); | 1154 | xive_attach_escalation(vcpu, i); |
1108 | if (r) | 1155 | if (r) |
1109 | goto bail; | 1156 | goto bail; |
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
1123 | if (r) | 1170 | if (r) |
1124 | goto bail; | 1171 | goto bail; |
1125 | 1172 | ||
1126 | /* Enable the VP */ | ||
1127 | r = xive_native_enable_vp(xc->vp_id); | ||
1128 | if (r) | ||
1129 | goto bail; | ||
1130 | |||
1131 | /* Route the IPI */ | 1173 | /* Route the IPI */ |
1132 | r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); | 1174 | r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); |
1133 | if (!r) | 1175 | if (!r) |
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) | |||
1474 | 1516 | ||
1475 | pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", | 1517 | pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", |
1476 | val, server, guest_prio); | 1518 | val, server, guest_prio); |
1519 | |||
1477 | /* | 1520 | /* |
1478 | * If the source doesn't already have an IPI, allocate | 1521 | * If the source doesn't already have an IPI, allocate |
1479 | * one and get the corresponding data | 1522 | * one and get the corresponding data |
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | |||
1762 | if (xive->vp_base == XIVE_INVALID_VP) | 1805 | if (xive->vp_base == XIVE_INVALID_VP) |
1763 | ret = -ENOMEM; | 1806 | ret = -ENOMEM; |
1764 | 1807 | ||
1808 | xive->single_escalation = xive_native_has_single_escalation(); | ||
1809 | |||
1765 | if (ret) { | 1810 | if (ret) { |
1766 | kfree(xive); | 1811 | kfree(xive); |
1767 | return ret; | 1812 | return ret; |
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private) | |||
1795 | 1840 | ||
1796 | kvm_for_each_vcpu(i, vcpu, kvm) { | 1841 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1797 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 1842 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1843 | unsigned int i; | ||
1798 | 1844 | ||
1799 | if (!xc) | 1845 | if (!xc) |
1800 | continue; | 1846 | continue; |
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private) | |||
1804 | xc->server_num, xc->cppr, xc->hw_cppr, | 1850 | xc->server_num, xc->cppr, xc->hw_cppr, |
1805 | xc->mfrr, xc->pending, | 1851 | xc->mfrr, xc->pending, |
1806 | xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); | 1852 | xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); |
1853 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | ||
1854 | struct xive_q *q = &xc->queues[i]; | ||
1855 | u32 i0, i1, idx; | ||
1856 | |||
1857 | if (!q->qpage && !xc->esc_virq[i]) | ||
1858 | continue; | ||
1859 | |||
1860 | seq_printf(m, " [q%d]: ", i); | ||
1861 | |||
1862 | if (q->qpage) { | ||
1863 | idx = q->idx; | ||
1864 | i0 = be32_to_cpup(q->qpage + idx); | ||
1865 | idx = (idx + 1) & q->msk; | ||
1866 | i1 = be32_to_cpup(q->qpage + idx); | ||
1867 | seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); | ||
1868 | } | ||
1869 | if (xc->esc_virq[i]) { | ||
1870 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); | ||
1871 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); | ||
1872 | u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); | ||
1873 | seq_printf(m, "E:%c%c I(%d:%llx:%llx)", | ||
1874 | (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', | ||
1875 | (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', | ||
1876 | xc->esc_virq[i], pq, xd->eoi_page); | ||
1877 | seq_printf(m, "\n"); | ||
1878 | } | ||
1879 | } | ||
1807 | 1880 | ||
1808 | t_rm_h_xirr += xc->stat_rm_h_xirr; | 1881 | t_rm_h_xirr += xc->stat_rm_h_xirr; |
1809 | t_rm_h_ipoll += xc->stat_rm_h_ipoll; | 1882 | t_rm_h_ipoll += xc->stat_rm_h_ipoll; |
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 6ba63f8e8a61..a08ae6fd4c51 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h | |||
@@ -120,6 +120,8 @@ struct kvmppc_xive { | |||
120 | u32 q_order; | 120 | u32 q_order; |
121 | u32 q_page_order; | 121 | u32 q_page_order; |
122 | 122 | ||
123 | /* Flags */ | ||
124 | u8 single_escalation; | ||
123 | }; | 125 | }; |
124 | 126 | ||
125 | #define KVMPPC_XIVE_Q_COUNT 8 | 127 | #define KVMPPC_XIVE_Q_COUNT 8 |
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp | |||
201 | * is as follow. | 203 | * is as follow. |
202 | * | 204 | * |
203 | * Guest request for 0...6 are honored. Guest request for anything | 205 | * Guest request for 0...6 are honored. Guest request for anything |
204 | * higher results in a priority of 7 being applied. | 206 | * higher results in a priority of 6 being applied. |
205 | * | ||
206 | * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb | ||
207 | * in order to match AIX expectations | ||
208 | * | 207 | * |
209 | * Similar mapping is done for CPPR values | 208 | * Similar mapping is done for CPPR values |
210 | */ | 209 | */ |
211 | static inline u8 xive_prio_from_guest(u8 prio) | 210 | static inline u8 xive_prio_from_guest(u8 prio) |
212 | { | 211 | { |
213 | if (prio == 0xff || prio < 8) | 212 | if (prio == 0xff || prio < 6) |
214 | return prio; | 213 | return prio; |
215 | return 7; | 214 | return 6; |
216 | } | 215 | } |
217 | 216 | ||
218 | static inline u8 xive_prio_to_guest(u8 prio) | 217 | static inline u8 xive_prio_to_guest(u8 prio) |
219 | { | 218 | { |
220 | if (prio == 0xff || prio < 7) | 219 | return prio; |
221 | return prio; | ||
222 | return 0xb; | ||
223 | } | 220 | } |
224 | 221 | ||
225 | static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) | 222 | static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 545a230f675f..748562ec9a04 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -763,7 +763,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
763 | 763 | ||
764 | hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 764 | hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
765 | vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; | 765 | vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; |
766 | vcpu->arch.dec_expires = ~(u64)0; | 766 | vcpu->arch.dec_expires = get_tb(); |
767 | 767 | ||
768 | #ifdef CONFIG_KVM_EXIT_TIMING | 768 | #ifdef CONFIG_KVM_EXIT_TIMING |
769 | mutex_init(&vcpu->arch.exit_timing_lock); | 769 | mutex_init(&vcpu->arch.exit_timing_lock); |
@@ -1106,11 +1106,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1106 | { | 1106 | { |
1107 | enum emulation_result emulated = EMULATE_DONE; | 1107 | enum emulation_result emulated = EMULATE_DONE; |
1108 | 1108 | ||
1109 | /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ | 1109 | /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */ |
1110 | if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || | 1110 | if (vcpu->arch.mmio_vsx_copy_nums > 4) |
1111 | (vcpu->arch.mmio_vsx_copy_nums < 0) ) { | ||
1112 | return EMULATE_FAIL; | 1111 | return EMULATE_FAIL; |
1113 | } | ||
1114 | 1112 | ||
1115 | while (vcpu->arch.mmio_vsx_copy_nums) { | 1113 | while (vcpu->arch.mmio_vsx_copy_nums) { |
1116 | emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, | 1114 | emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, |
@@ -1252,11 +1250,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1252 | 1250 | ||
1253 | vcpu->arch.io_gpr = rs; | 1251 | vcpu->arch.io_gpr = rs; |
1254 | 1252 | ||
1255 | /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ | 1253 | /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */ |
1256 | if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || | 1254 | if (vcpu->arch.mmio_vsx_copy_nums > 4) |
1257 | (vcpu->arch.mmio_vsx_copy_nums < 0) ) { | ||
1258 | return EMULATE_FAIL; | 1255 | return EMULATE_FAIL; |
1259 | } | ||
1260 | 1256 | ||
1261 | while (vcpu->arch.mmio_vsx_copy_nums) { | 1257 | while (vcpu->arch.mmio_vsx_copy_nums) { |
1262 | if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) | 1258 | if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) |
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index e44d2b2ea97e..1c03c978eb18 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c | |||
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) | |||
143 | int i; | 143 | int i; |
144 | u64 min, max, sum, sum_quad; | 144 | u64 min, max, sum, sum_quad; |
145 | 145 | ||
146 | seq_printf(m, "%s", "type count min max sum sum_squared\n"); | 146 | seq_puts(m, "type count min max sum sum_squared\n"); |
147 | |||
148 | 147 | ||
149 | for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { | 148 | for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { |
150 | 149 | ||
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a3b8d7d1316e..2547b6021e6a 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c | |||
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d) | |||
367 | * EOI the source if it hasn't been disabled and hasn't | 367 | * EOI the source if it hasn't been disabled and hasn't |
368 | * been passed-through to a KVM guest | 368 | * been passed-through to a KVM guest |
369 | */ | 369 | */ |
370 | if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) | 370 | if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && |
371 | !(xd->flags & XIVE_IRQ_NO_EOI)) | ||
371 | xive_do_source_eoi(irqd_to_hwirq(d), xd); | 372 | xive_do_source_eoi(irqd_to_hwirq(d), xd); |
372 | 373 | ||
373 | /* | 374 | /* |
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index ebc244b08d67..d22aeb0b69e1 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c | |||
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count; | |||
42 | static u32 xive_queue_shift; | 42 | static u32 xive_queue_shift; |
43 | static u32 xive_pool_vps = XIVE_INVALID_VP; | 43 | static u32 xive_pool_vps = XIVE_INVALID_VP; |
44 | static struct kmem_cache *xive_provision_cache; | 44 | static struct kmem_cache *xive_provision_cache; |
45 | static bool xive_has_single_esc; | ||
45 | 46 | ||
46 | int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) | 47 | int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) |
47 | { | 48 | { |
@@ -571,6 +572,10 @@ bool __init xive_native_init(void) | |||
571 | break; | 572 | break; |
572 | } | 573 | } |
573 | 574 | ||
575 | /* Do we support single escalation */ | ||
576 | if (of_get_property(np, "single-escalation-support", NULL) != NULL) | ||
577 | xive_has_single_esc = true; | ||
578 | |||
574 | /* Configure Thread Management areas for KVM */ | 579 | /* Configure Thread Management areas for KVM */ |
575 | for_each_possible_cpu(cpu) | 580 | for_each_possible_cpu(cpu) |
576 | kvmppc_set_xive_tima(cpu, r.start, tima); | 581 | kvmppc_set_xive_tima(cpu, r.start, tima); |
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base) | |||
667 | } | 672 | } |
668 | EXPORT_SYMBOL_GPL(xive_native_free_vp_block); | 673 | EXPORT_SYMBOL_GPL(xive_native_free_vp_block); |
669 | 674 | ||
670 | int xive_native_enable_vp(u32 vp_id) | 675 | int xive_native_enable_vp(u32 vp_id, bool single_escalation) |
671 | { | 676 | { |
672 | s64 rc; | 677 | s64 rc; |
678 | u64 flags = OPAL_XIVE_VP_ENABLED; | ||
673 | 679 | ||
680 | if (single_escalation) | ||
681 | flags |= OPAL_XIVE_VP_SINGLE_ESCALATION; | ||
674 | for (;;) { | 682 | for (;;) { |
675 | rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0); | 683 | rc = opal_xive_set_vp_info(vp_id, flags, 0); |
676 | if (rc != OPAL_BUSY) | 684 | if (rc != OPAL_BUSY) |
677 | break; | 685 | break; |
678 | msleep(1); | 686 | msleep(1); |
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) | |||
710 | return 0; | 718 | return 0; |
711 | } | 719 | } |
712 | EXPORT_SYMBOL_GPL(xive_native_get_vp_info); | 720 | EXPORT_SYMBOL_GPL(xive_native_get_vp_info); |
721 | |||
722 | bool xive_native_has_single_escalation(void) | ||
723 | { | ||
724 | return xive_has_single_esc; | ||
725 | } | ||
726 | EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); | ||