aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2017-04-05 03:54:50 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2017-04-10 07:41:34 -0400
commit243e25112d06b348f087a6f7aba4bbc288285bdd (patch)
treec4f605e98e6e8cf6dbe730c946142943b82ac38e
parenta978e13965a40ac07163643cc3fa0ddb0d354198 (diff)
powerpc/xive: Native exploitation of the XIVE interrupt controller
The XIVE interrupt controller is the new interrupt controller found in POWER9. It supports advanced virtualization capabilities among other things. Currently we use a set of firmware calls that simulate the old "XICS" interrupt controller but this is fairly inefficient. This adds the framework for using XIVE along with a native backend which OPAL for configuration. Later, a backend allowing the use in a KVM or PowerVM guest will also be provided. This disables some fast path for interrupts in KVM when XIVE is enabled as these rely on the firmware emulation code which is no longer available when the XIVE is used natively by Linux. A latter patch will make KVM also directly exploit the XIVE, thus recovering the lost performance (and more). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [mpe: Fixup pr_xxx("XIVE:"...), don't split pr_xxx() strings, tweak Kconfig so XIVE_NATIVE selects XIVE and depends on POWERNV, fix build errors when SMP=n, fold in fixes from Ben: Don't call cpu_online() on an invalid CPU number Fix irq target selection returning out of bounds cpu# Extra sanity checks on cpu numbers ] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/xive-regs.h97
-rw-r--r--arch/powerpc/include/asm/xive.h163
-rw-r--r--arch/powerpc/include/asm/xmon.h2
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c8
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig1
-rw-r--r--arch/powerpc/platforms/powernv/setup.c15
-rw-r--r--arch/powerpc/platforms/powernv/smp.c39
-rw-r--r--arch/powerpc/sysdev/Kconfig1
-rw-r--r--arch/powerpc/sysdev/Makefile1
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig11
-rw-r--r--arch/powerpc/sysdev/xive/Makefile4
-rw-r--r--arch/powerpc/sysdev/xive/common.c1302
-rw-r--r--arch/powerpc/sysdev/xive/native.c639
-rw-r--r--arch/powerpc/sysdev/xive/xive-internal.h62
-rw-r--r--arch/powerpc/xmon/xmon.c94
15 files changed, 2427 insertions, 12 deletions
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
new file mode 100644
index 000000000000..1d3f2be5ae39
--- /dev/null
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef _ASM_POWERPC_XIVE_REGS_H
10#define _ASM_POWERPC_XIVE_REGS_H
11
12/*
13 * Thread Management (aka "TM") registers
14 */
15
16/* TM register offsets */
17#define TM_QW0_USER 0x000 /* All rings */
18#define TM_QW1_OS 0x010 /* Ring 0..2 */
19#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */
20#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */
21
22/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */
23#define TM_NSR 0x0 /* + + - + */
24#define TM_CPPR 0x1 /* - + - + */
25#define TM_IPB 0x2 /* - + + + */
26#define TM_LSMFB 0x3 /* - + + + */
27#define TM_ACK_CNT 0x4 /* - + - - */
28#define TM_INC 0x5 /* - + - + */
29#define TM_AGE 0x6 /* - + - + */
30#define TM_PIPR 0x7 /* - + - + */
31
32#define TM_WORD0 0x0
33#define TM_WORD1 0x4
34
35/*
36 * QW word 2 contains the valid bit at the top and other fields
37 * depending on the QW.
38 */
39#define TM_WORD2 0x8
40#define TM_QW0W2_VU PPC_BIT32(0)
41#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1,31) // XX 2,31 ?
42#define TM_QW1W2_VO PPC_BIT32(0)
43#define TM_QW1W2_OS_CAM PPC_BITMASK32(8,31)
44#define TM_QW2W2_VP PPC_BIT32(0)
45#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8,31)
46#define TM_QW3W2_VT PPC_BIT32(0)
47#define TM_QW3W2_LP PPC_BIT32(6)
48#define TM_QW3W2_LE PPC_BIT32(7)
49#define TM_QW3W2_T PPC_BIT32(31)
50
51/*
52 * In addition to normal loads to "peek" and writes (only when invalid)
53 * using 4 and 8 bytes accesses, the above registers support these
54 * "special" byte operations:
55 *
56 * - Byte load from QW0[NSR] - User level NSR (EBB)
57 * - Byte store to QW0[NSR] - User level NSR (EBB)
58 * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
59 * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
60 * otherwise VT||0000000
61 * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
62 *
63 * Then we have all these "special" CI ops at these offset that trigger
64 * all sorts of side effects:
65 */
66#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/
67#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */
68#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */
69#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user context */
70#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */
71#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS context to reg */
72#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate Pool context to reg*/
73#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */
74#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd line */
75#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */
76#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even line */
77#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */
78/* XXX more... */
79
80/* NSR fields for the various QW ack types */
81#define TM_QW0_NSR_EB PPC_BIT8(0)
82#define TM_QW1_NSR_EO PPC_BIT8(0)
83#define TM_QW3_NSR_HE PPC_BITMASK8(0,1)
84#define TM_QW3_NSR_HE_NONE 0
85#define TM_QW3_NSR_HE_POOL 1
86#define TM_QW3_NSR_HE_PHYS 2
87#define TM_QW3_NSR_HE_LSI 3
88#define TM_QW3_NSR_I PPC_BIT8(2)
89#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3,7)
90
91/* Utilities to manipulate these (originaly from OPAL) */
92#define MASK_TO_LSH(m) (__builtin_ffsl(m) - 1)
93#define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m))
94#define SETFIELD(m, v, val) \
95 (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))
96
97#endif /* _ASM_POWERPC_XIVE_REGS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
new file mode 100644
index 000000000000..3cdbeaeac397
--- /dev/null
+++ b/arch/powerpc/include/asm/xive.h
@@ -0,0 +1,163 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef _ASM_POWERPC_XIVE_H
10#define _ASM_POWERPC_XIVE_H
11
12#define XIVE_INVALID_VP 0xffffffff
13
14#ifdef CONFIG_PPC_XIVE
15
16/*
17 * Thread Interrupt Management Area (TIMA)
18 *
19 * This is a global MMIO region divided in 4 pages of varying access
20 * permissions, providing access to per-cpu interrupt management
21 * functions. It always identifies the CPU doing the access based
22 * on the PowerBus initiator ID, thus we always access via the
23 * same offset regardless of where the code is executing
24 */
25extern void __iomem *xive_tima;
26
27/*
28 * Offset in the TM area of our current execution level (provided by
29 * the backend)
30 */
31extern u32 xive_tima_offset;
32
33/*
34 * Per-irq data (irq_get_handler_data for normal IRQs), IPIs
35 * have it stored in the xive_cpu structure. We also cache
36 * for normal interrupts the current target CPU.
37 *
38 * This structure is setup by the backend for each interrupt.
39 */
40struct xive_irq_data {
41 u64 flags;
42 u64 eoi_page;
43 void __iomem *eoi_mmio;
44 u64 trig_page;
45 void __iomem *trig_mmio;
46 u32 esb_shift;
47 int src_chip;
48
49 /* Setup/used by frontend */
50 int target;
51 bool saved_p;
52};
53#define XIVE_IRQ_FLAG_STORE_EOI 0x01
54#define XIVE_IRQ_FLAG_LSI 0x02
55#define XIVE_IRQ_FLAG_SHIFT_BUG 0x04
56#define XIVE_IRQ_FLAG_MASK_FW 0x08
57#define XIVE_IRQ_FLAG_EOI_FW 0x10
58
59#define XIVE_INVALID_CHIP_ID -1
60
61/* A queue tracking structure in a CPU */
62struct xive_q {
63 __be32 *qpage;
64 u32 msk;
65 u32 idx;
66 u32 toggle;
67 u64 eoi_phys;
68 u32 esc_irq;
69 atomic_t count;
70 atomic_t pending_count;
71};
72
73/*
74 * "magic" Event State Buffer (ESB) MMIO offsets.
75 *
76 * Each interrupt source has a 2-bit state machine called ESB
77 * which can be controlled by MMIO. It's made of 2 bits, P and
78 * Q. P indicates that an interrupt is pending (has been sent
79 * to a queue and is waiting for an EOI). Q indicates that the
80 * interrupt has been triggered while pending.
81 *
82 * This acts as a coalescing mechanism in order to guarantee
83 * that a given interrupt only occurs at most once in a queue.
84 *
85 * When doing an EOI, the Q bit will indicate if the interrupt
86 * needs to be re-triggered.
87 *
88 * The following offsets into the ESB MMIO allow to read or
89 * manipulate the PQ bits. They must be used with an 8-bytes
90 * load instruction. They all return the previous state of the
91 * interrupt (atomically).
92 *
93 * Additionally, some ESB pages support doing an EOI via a
94 * store at 0 and some ESBs support doing a trigger via a
95 * separate trigger page.
96 */
97#define XIVE_ESB_GET 0x800
98#define XIVE_ESB_SET_PQ_00 0xc00
99#define XIVE_ESB_SET_PQ_01 0xd00
100#define XIVE_ESB_SET_PQ_10 0xe00
101#define XIVE_ESB_SET_PQ_11 0xf00
102#define XIVE_ESB_MASK XIVE_ESB_SET_PQ_01
103
104#define XIVE_ESB_VAL_P 0x2
105#define XIVE_ESB_VAL_Q 0x1
106
107/* Global enable flags for the XIVE support */
108extern bool __xive_enabled;
109
110static inline bool xive_enabled(void) { return __xive_enabled; }
111
112extern bool xive_native_init(void);
113extern void xive_smp_probe(void);
114extern int xive_smp_prepare_cpu(unsigned int cpu);
115extern void xive_smp_setup_cpu(void);
116extern void xive_smp_disable_cpu(void);
117extern void xive_kexec_teardown_cpu(int secondary);
118extern void xive_shutdown(void);
119extern void xive_flush_interrupt(void);
120
121/* xmon hook */
122extern void xmon_xive_do_dump(int cpu);
123
124/* APIs used by KVM */
125extern u32 xive_native_default_eq_shift(void);
126extern u32 xive_native_alloc_vp_block(u32 max_vcpus);
127extern void xive_native_free_vp_block(u32 vp_base);
128extern int xive_native_populate_irq_data(u32 hw_irq,
129 struct xive_irq_data *data);
130extern void xive_cleanup_irq_data(struct xive_irq_data *xd);
131extern u32 xive_native_alloc_irq(void);
132extern void xive_native_free_irq(u32 irq);
133extern int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
134
135extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
136 __be32 *qpage, u32 order, bool can_escalate);
137extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
138
139extern bool __xive_irq_trigger(struct xive_irq_data *xd);
140extern bool __xive_irq_retrigger(struct xive_irq_data *xd);
141extern void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd);
142
143extern bool is_xive_irq(struct irq_chip *chip);
144
145#else
146
147static inline bool xive_enabled(void) { return false; }
148
149static inline bool xive_native_init(void) { return false; }
150static inline void xive_smp_probe(void) { }
151extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
152static inline void xive_smp_setup_cpu(void) { }
153static inline void xive_smp_disable_cpu(void) { }
154static inline void xive_kexec_teardown_cpu(int secondary) { }
155static inline void xive_shutdown(void) { }
156static inline void xive_flush_interrupt(void) { }
157
158static inline u32 xive_native_alloc_vp_block(u32 max_vcpus) { return XIVE_INVALID_VP; }
159static inline void xive_native_free_vp_block(u32 vp_base) { }
160
161#endif
162
163#endif /* _ASM_POWERPC_XIVE_H */
diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h
index 5eb8e599e5cc..eb42a0c6e1d9 100644
--- a/arch/powerpc/include/asm/xmon.h
+++ b/arch/powerpc/include/asm/xmon.h
@@ -29,5 +29,7 @@ static inline void xmon_register_spus(struct list_head *list) { };
29extern int cpus_are_in_xmon(void); 29extern int cpus_are_in_xmon(void);
30#endif 30#endif
31 31
32extern void xmon_printf(const char *format, ...);
33
32#endif /* __KERNEL __ */ 34#endif /* __KERNEL __ */
33#endif /* __ASM_POWERPC_XMON_H */ 35#endif /* __ASM_POWERPC_XMON_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 4d6c64b3041c..ae55603cf661 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -23,6 +23,7 @@
23#include <asm/kvm_book3s.h> 23#include <asm/kvm_book3s.h>
24#include <asm/archrandom.h> 24#include <asm/archrandom.h>
25#include <asm/xics.h> 25#include <asm/xics.h>
26#include <asm/xive.h>
26#include <asm/dbell.h> 27#include <asm/dbell.h>
27#include <asm/cputhreads.h> 28#include <asm/cputhreads.h>
28#include <asm/io.h> 29#include <asm/io.h>
@@ -224,6 +225,10 @@ void kvmhv_rm_send_ipi(int cpu)
224 return; 225 return;
225 } 226 }
226 227
228 /* We should never reach this */
229 if (WARN_ON_ONCE(xive_enabled()))
230 return;
231
227 /* Else poke the target with an IPI */ 232 /* Else poke the target with an IPI */
228 xics_phys = paca[cpu].kvm_hstate.xics_phys; 233 xics_phys = paca[cpu].kvm_hstate.xics_phys;
229 if (xics_phys) 234 if (xics_phys)
@@ -386,6 +391,9 @@ long kvmppc_read_intr(void)
386 long rc; 391 long rc;
387 bool again; 392 bool again;
388 393
394 if (xive_enabled())
395 return 1;
396
389 do { 397 do {
390 again = false; 398 again = false;
391 rc = kvmppc_read_one_intr(&again); 399 rc = kvmppc_read_one_intr(&again);
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 3a07e4dcf97c..9689a6272995 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -4,6 +4,7 @@ config PPC_POWERNV
4 select PPC_NATIVE 4 select PPC_NATIVE
5 select PPC_XICS 5 select PPC_XICS
6 select PPC_ICP_NATIVE 6 select PPC_ICP_NATIVE
7 select PPC_XIVE_NATIVE
7 select PPC_P7_NAP 8 select PPC_P7_NAP
8 select PCI 9 select PCI
9 select PCI_MSI 10 select PCI_MSI
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d50c7d99baaf..adceac978d18 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -32,6 +32,7 @@
32#include <asm/machdep.h> 32#include <asm/machdep.h>
33#include <asm/firmware.h> 33#include <asm/firmware.h>
34#include <asm/xics.h> 34#include <asm/xics.h>
35#include <asm/xive.h>
35#include <asm/opal.h> 36#include <asm/opal.h>
36#include <asm/kexec.h> 37#include <asm/kexec.h>
37#include <asm/smp.h> 38#include <asm/smp.h>
@@ -76,7 +77,9 @@ static void __init pnv_init(void)
76 77
77static void __init pnv_init_IRQ(void) 78static void __init pnv_init_IRQ(void)
78{ 79{
79 xics_init(); 80 /* Try using a XIVE if available, otherwise use a XICS */
81 if (!xive_native_init())
82 xics_init();
80 83
81 WARN_ON(!ppc_md.get_irq); 84 WARN_ON(!ppc_md.get_irq);
82} 85}
@@ -218,10 +221,12 @@ static void pnv_kexec_wait_secondaries_down(void)
218 221
219static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) 222static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
220{ 223{
221 xics_kexec_teardown_cpu(secondary); 224 if (xive_enabled())
225 xive_kexec_teardown_cpu(secondary);
226 else
227 xics_kexec_teardown_cpu(secondary);
222 228
223 /* On OPAL, we return all CPUs to firmware */ 229 /* On OPAL, we return all CPUs to firmware */
224
225 if (!firmware_has_feature(FW_FEATURE_OPAL)) 230 if (!firmware_has_feature(FW_FEATURE_OPAL))
226 return; 231 return;
227 232
@@ -237,6 +242,10 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
237 /* Primary waits for the secondaries to have reached OPAL */ 242 /* Primary waits for the secondaries to have reached OPAL */
238 pnv_kexec_wait_secondaries_down(); 243 pnv_kexec_wait_secondaries_down();
239 244
245 /* Switch XIVE back to emulation mode */
246 if (xive_enabled())
247 xive_shutdown();
248
240 /* 249 /*
241 * We might be running as little-endian - now that interrupts 250 * We might be running as little-endian - now that interrupts
242 * are disabled, reset the HILE bit to big-endian so we don't 251 * are disabled, reset the HILE bit to big-endian so we don't
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 8b67e1eefb5c..f57195588c6c 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -29,6 +29,7 @@
29#include <asm/vdso_datapage.h> 29#include <asm/vdso_datapage.h>
30#include <asm/cputhreads.h> 30#include <asm/cputhreads.h>
31#include <asm/xics.h> 31#include <asm/xics.h>
32#include <asm/xive.h>
32#include <asm/opal.h> 33#include <asm/opal.h>
33#include <asm/runlatch.h> 34#include <asm/runlatch.h>
34#include <asm/code-patching.h> 35#include <asm/code-patching.h>
@@ -47,7 +48,9 @@
47 48
48static void pnv_smp_setup_cpu(int cpu) 49static void pnv_smp_setup_cpu(int cpu)
49{ 50{
50 if (cpu != boot_cpuid) 51 if (xive_enabled())
52 xive_smp_setup_cpu();
53 else if (cpu != boot_cpuid)
51 xics_setup_cpu(); 54 xics_setup_cpu();
52 55
53#ifdef CONFIG_PPC_DOORBELL 56#ifdef CONFIG_PPC_DOORBELL
@@ -132,7 +135,10 @@ static int pnv_smp_cpu_disable(void)
132 vdso_data->processorCount--; 135 vdso_data->processorCount--;
133 if (cpu == boot_cpuid) 136 if (cpu == boot_cpuid)
134 boot_cpuid = cpumask_any(cpu_online_mask); 137 boot_cpuid = cpumask_any(cpu_online_mask);
135 xics_migrate_irqs_away(); 138 if (xive_enabled())
139 xive_smp_disable_cpu();
140 else
141 xics_migrate_irqs_away();
136 return 0; 142 return 0;
137} 143}
138 144
@@ -213,9 +219,12 @@ static void pnv_smp_cpu_kill_self(void)
213 if (((srr1 & wmask) == SRR1_WAKEEE) || 219 if (((srr1 & wmask) == SRR1_WAKEEE) ||
214 ((srr1 & wmask) == SRR1_WAKEHVI) || 220 ((srr1 & wmask) == SRR1_WAKEHVI) ||
215 (local_paca->irq_happened & PACA_IRQ_EE)) { 221 (local_paca->irq_happened & PACA_IRQ_EE)) {
216 if (cpu_has_feature(CPU_FTR_ARCH_300)) 222 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
217 icp_opal_flush_interrupt(); 223 if (xive_enabled())
218 else 224 xive_flush_interrupt();
225 else
226 icp_opal_flush_interrupt();
227 } else
219 icp_native_flush_interrupt(); 228 icp_native_flush_interrupt();
220 } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { 229 } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
221 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); 230 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
@@ -252,10 +261,26 @@ static int pnv_cpu_bootable(unsigned int nr)
252 return smp_generic_cpu_bootable(nr); 261 return smp_generic_cpu_bootable(nr);
253} 262}
254 263
264static int pnv_smp_prepare_cpu(int cpu)
265{
266 if (xive_enabled())
267 return xive_smp_prepare_cpu(cpu);
268 return 0;
269}
270
271static void __init pnv_smp_probe(void)
272{
273 if (xive_enabled())
274 xive_smp_probe();
275 else
276 xics_smp_probe();
277}
278
255static struct smp_ops_t pnv_smp_ops = { 279static struct smp_ops_t pnv_smp_ops = {
256 .message_pass = smp_muxed_ipi_message_pass, 280 .message_pass = smp_muxed_ipi_message_pass,
257 .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ 281 .cause_ipi = NULL, /* Filled at runtime by xi{cs,ve}_smp_probe() */
258 .probe = xics_smp_probe, 282 .probe = pnv_smp_probe,
283 .prepare_cpu = pnv_smp_prepare_cpu,
259 .kick_cpu = pnv_smp_kick_cpu, 284 .kick_cpu = pnv_smp_kick_cpu,
260 .setup_cpu = pnv_smp_setup_cpu, 285 .setup_cpu = pnv_smp_setup_cpu,
261 .cpu_bootable = pnv_cpu_bootable, 286 .cpu_bootable = pnv_cpu_bootable,
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 52dc165c0efb..caf882e749dc 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -28,6 +28,7 @@ config PPC_MSI_BITMAP
28 default y if PPC_POWERNV 28 default y if PPC_POWERNV
29 29
30source "arch/powerpc/sysdev/xics/Kconfig" 30source "arch/powerpc/sysdev/xics/Kconfig"
31source "arch/powerpc/sysdev/xive/Kconfig"
31 32
32config PPC_SCOM 33config PPC_SCOM
33 bool 34 bool
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index a254824719f1..c0ae11d4f62f 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -71,5 +71,6 @@ obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
71subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror 71subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
72 72
73obj-$(CONFIG_PPC_XICS) += xics/ 73obj-$(CONFIG_PPC_XICS) += xics/
74obj-$(CONFIG_PPC_XIVE) += xive/
74 75
75obj-$(CONFIG_GE_FPGA) += ge/ 76obj-$(CONFIG_GE_FPGA) += ge/
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
new file mode 100644
index 000000000000..12ccd7373d2f
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -0,0 +1,11 @@
1config PPC_XIVE
2 bool
3 default n
4 select PPC_SMP_MUXED_IPI
5 select HARDIRQS_SW_RESEND
6
7config PPC_XIVE_NATIVE
8 bool
9 default n
10 select PPC_XIVE
11 depends on PPC_POWERNV
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile
new file mode 100644
index 000000000000..3fab303fc169
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -0,0 +1,4 @@
1subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
2
3obj-y += common.o
4obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
new file mode 100644
index 000000000000..d9cd7f705f21
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -0,0 +1,1302 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "xive: " fmt
11
12#include <linux/types.h>
13#include <linux/threads.h>
14#include <linux/kernel.h>
15#include <linux/irq.h>
16#include <linux/debugfs.h>
17#include <linux/smp.h>
18#include <linux/interrupt.h>
19#include <linux/seq_file.h>
20#include <linux/init.h>
21#include <linux/cpu.h>
22#include <linux/of.h>
23#include <linux/slab.h>
24#include <linux/spinlock.h>
25#include <linux/msi.h>
26
27#include <asm/prom.h>
28#include <asm/io.h>
29#include <asm/smp.h>
30#include <asm/machdep.h>
31#include <asm/irq.h>
32#include <asm/errno.h>
33#include <asm/xive.h>
34#include <asm/xive-regs.h>
35#include <asm/xmon.h>
36
37#include "xive-internal.h"
38
39#undef DEBUG_FLUSH
40#undef DEBUG_ALL
41
42#ifdef DEBUG_ALL
43#define DBG_VERBOSE(fmt...) pr_devel(fmt)
44#else
45#define DBG_VERBOSE(fmt...) do { } while(0)
46#endif
47
48bool __xive_enabled;
49bool xive_cmdline_disabled;
50
51/* We use only one priority for now */
52static u8 xive_irq_priority;
53
54/* TIMA */
55void __iomem *xive_tima;
56u32 xive_tima_offset;
57
58/* Backend ops */
59static const struct xive_ops *xive_ops;
60
61/* Our global interrupt domain */
62static struct irq_domain *xive_irq_domain;
63
64#ifdef CONFIG_SMP
65/* The IPIs all use the same logical irq number */
66static u32 xive_ipi_irq;
67#endif
68
69/* Xive state for each CPU */
70static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
71
72/*
73 * A "disabled" interrupt should never fire, to catch problems
74 * we set its logical number to this
75 */
76#define XIVE_BAD_IRQ 0x7fffffff
77#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1)
78
79/* An invalid CPU target */
80#define XIVE_INVALID_TARGET (-1)
81
82/*
83 * Read the next entry in a queue, return its content if it's valid
84 * or 0 if there is no new entry.
85 *
86 * The queue pointer is moved forward unless "just_peek" is set
87 */
88static u32 xive_read_eq(struct xive_q *q, bool just_peek)
89{
90 u32 cur;
91
92 if (!q->qpage)
93 return 0;
94 cur = be32_to_cpup(q->qpage + q->idx);
95
96 /* Check valid bit (31) vs current toggle polarity */
97 if ((cur >> 31) == q->toggle)
98 return 0;
99
100 /* If consuming from the queue ... */
101 if (!just_peek) {
102 /* Next entry */
103 q->idx = (q->idx + 1) & q->msk;
104
105 /* Wrap around: flip valid toggle */
106 if (q->idx == 0)
107 q->toggle ^= 1;
108 }
109 /* Mask out the valid bit (31) */
110 return cur & 0x7fffffff;
111}
112
113/*
114 * Scans all the queue that may have interrupts in them
115 * (based on "pending_prio") in priority order until an
116 * interrupt is found or all the queues are empty.
117 *
118 * Then updates the CPPR (Current Processor Priority
119 * Register) based on the most favored interrupt found
120 * (0xff if none) and return what was found (0 if none).
121 *
122 * If just_peek is set, return the most favored pending
123 * interrupt if any but don't update the queue pointers.
124 *
125 * Note: This function can operate generically on any number
126 * of queues (up to 8). The current implementation of the XIVE
127 * driver only uses a single queue however.
128 *
129 * Note2: This will also "flush" "the pending_count" of a queue
130 * into the "count" when that queue is observed to be empty.
131 * This is used to keep track of the amount of interrupts
132 * targetting a queue. When an interrupt is moved away from
133 * a queue, we only decrement that queue count once the queue
134 * has been observed empty to avoid races.
135 */
136static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
137{
138 u32 irq = 0;
139 u8 prio;
140
141 /* Find highest pending priority */
142 while (xc->pending_prio != 0) {
143 struct xive_q *q;
144
145 prio = ffs(xc->pending_prio) - 1;
146 DBG_VERBOSE("scan_irq: trying prio %d\n", prio);
147
148 /* Try to fetch */
149 irq = xive_read_eq(&xc->queue[prio], just_peek);
150
151 /* Found something ? That's it */
152 if (irq)
153 break;
154
155 /* Clear pending bits */
156 xc->pending_prio &= ~(1 << prio);
157
158 /*
159 * Check if the queue count needs adjusting due to
160 * interrupts being moved away. See description of
161 * xive_dec_target_count()
162 */
163 q = &xc->queue[prio];
164 if (atomic_read(&q->pending_count)) {
165 int p = atomic_xchg(&q->pending_count, 0);
166 if (p) {
167 WARN_ON(p > atomic_read(&q->count));
168 atomic_sub(p, &q->count);
169 }
170 }
171 }
172
173 /* If nothing was found, set CPPR to 0xff */
174 if (irq == 0)
175 prio = 0xff;
176
177 /* Update HW CPPR to match if necessary */
178 if (prio != xc->cppr) {
179 DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio);
180 xc->cppr = prio;
181 out_8(xive_tima + xive_tima_offset + TM_CPPR, prio);
182 }
183
184 return irq;
185}
186
187/*
188 * This is used to perform the magic loads from an ESB
189 * described in xive.h
190 */
191static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset)
192{
193 u64 val;
194
195 /* Handle HW errata */
196 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
197 offset |= offset << 4;
198
199 val = in_be64(xd->eoi_mmio + offset);
200
201 return (u8)val;
202}
203
204#ifdef CONFIG_XMON
205static void xive_dump_eq(const char *name, struct xive_q *q)
206{
207 u32 i0, i1, idx;
208
209 if (!q->qpage)
210 return;
211 idx = q->idx;
212 i0 = be32_to_cpup(q->qpage + idx);
213 idx = (idx + 1) & q->msk;
214 i1 = be32_to_cpup(q->qpage + idx);
215 xmon_printf(" %s Q T=%d %08x %08x ...\n", name,
216 q->toggle, i0, i1);
217}
218
219void xmon_xive_do_dump(int cpu)
220{
221 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
222
223 xmon_printf("XIVE state for CPU %d:\n", cpu);
224 xmon_printf(" pp=%02x cppr=%02x\n", xc->pending_prio, xc->cppr);
225 xive_dump_eq("IRQ", &xc->queue[xive_irq_priority]);
226#ifdef CONFIG_SMP
227 {
228 u64 val = xive_poke_esb(&xc->ipi_data, XIVE_ESB_GET);
229 xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi,
230 val & XIVE_ESB_VAL_P ? 'P' : 'p',
231 val & XIVE_ESB_VAL_P ? 'Q' : 'q');
232 }
233#endif
234}
235#endif /* CONFIG_XMON */
236
237static unsigned int xive_get_irq(void)
238{
239 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
240 u32 irq;
241
242 /*
243 * This can be called either as a result of a HW interrupt or
244 * as a "replay" because EOI decided there was still something
245 * in one of the queues.
246 *
247 * First we perform an ACK cycle in order to update our mask
248 * of pending priorities. This will also have the effect of
249 * updating the CPPR to the most favored pending interrupts.
250 *
251 * In the future, if we have a way to differenciate a first
252 * entry (on HW interrupt) from a replay triggered by EOI,
253 * we could skip this on replays unless we soft-mask tells us
254 * that a new HW interrupt occurred.
255 */
256 xive_ops->update_pending(xc);
257
258 DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio);
259
260 /* Scan our queue(s) for interrupts */
261 irq = xive_scan_interrupts(xc, false);
262
263 DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n",
264 irq, xc->pending_prio);
265
266 /* Return pending interrupt if any */
267 if (irq == XIVE_BAD_IRQ)
268 return 0;
269 return irq;
270}
271
272/*
273 * After EOI'ing an interrupt, we need to re-check the queue
274 * to see if another interrupt is pending since multiple
275 * interrupts can coalesce into a single notification to the
276 * CPU.
277 *
278 * If we find that there is indeed more in there, we call
279 * force_external_irq_replay() to make Linux synthetize an
280 * external interrupt on the next call to local_irq_restore().
281 */
282static void xive_do_queue_eoi(struct xive_cpu *xc)
283{
284 if (xive_scan_interrupts(xc, true) != 0) {
285 DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio);
286 force_external_irq_replay();
287 }
288}
289
290/*
291 * EOI an interrupt at the source. There are several methods
292 * to do this depending on the HW version and source type
293 */
294void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
295{
296 /* If the XIVE supports the new "store EOI facility, use it */
297 if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
298 out_be64(xd->eoi_mmio, 0);
299 else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
300 /*
301 * The FW told us to call it. This happens for some
302 * interrupt sources that need additional HW whacking
303 * beyond the ESB manipulation. For example LPC interrupts
304 * on P9 DD1.0 need a latch to be clared in the LPC bridge
305 * itself. The Firmware will take care of it.
306 */
307 if (WARN_ON_ONCE(!xive_ops->eoi))
308 return;
309 xive_ops->eoi(hw_irq);
310 } else {
311 u8 eoi_val;
312
313 /*
314 * Otherwise for EOI, we use the special MMIO that does
315 * a clear of both P and Q and returns the old Q,
316 * except for LSIs where we use the "EOI cycle" special
317 * load.
318 *
319 * This allows us to then do a re-trigger if Q was set
320 * rather than synthesizing an interrupt in software
321 *
322 * For LSIs, using the HW EOI cycle works around a problem
323 * on P9 DD1 PHBs where the other ESB accesses don't work
324 * properly.
325 */
326 if (xd->flags & XIVE_IRQ_FLAG_LSI)
327 in_be64(xd->eoi_mmio);
328 else {
329 eoi_val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
330 DBG_VERBOSE("eoi_val=%x\n", offset, eoi_val);
331
332 /* Re-trigger if needed */
333 if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
334 out_be64(xd->trig_mmio, 0);
335 }
336 }
337}
338
339/* irq_chip eoi callback */
340static void xive_irq_eoi(struct irq_data *d)
341{
342 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
343 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
344
345 DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
346 d->irq, irqd_to_hwirq(d), xc->pending_prio);
347
348 /* EOI the source if it hasn't been disabled */
349 if (!irqd_irq_disabled(d))
350 xive_do_source_eoi(irqd_to_hwirq(d), xd);
351
352 /*
353 * Clear saved_p to indicate that it's no longer occupying
354 * a queue slot on the target queue
355 */
356 xd->saved_p = false;
357
358 /* Check for more work in the queue */
359 xive_do_queue_eoi(xc);
360}
361
362/*
363 * Helper used to mask and unmask an interrupt source. This
364 * is only called for normal interrupts that do not require
365 * masking/unmasking via firmware.
366 */
367static void xive_do_source_set_mask(struct xive_irq_data *xd,
368 bool mask)
369{
370 u64 val;
371
372 /*
373 * If the interrupt had P set, it may be in a queue.
374 *
375 * We need to make sure we don't re-enable it until it
376 * has been fetched from that queue and EOId. We keep
377 * a copy of that P state and use it to restore the
378 * ESB accordingly on unmask.
379 */
380 if (mask) {
381 val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_01);
382 xd->saved_p = !!(val & XIVE_ESB_VAL_P);
383 } else if (xd->saved_p)
384 xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
385 else
386 xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
387}
388
389/*
390 * Try to chose "cpu" as a new interrupt target. Increments
391 * the queue accounting for that target if it's not already
392 * full.
393 */
394static bool xive_try_pick_target(int cpu)
395{
396 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
397 struct xive_q *q = &xc->queue[xive_irq_priority];
398 int max;
399
400 /*
401 * Calculate max number of interrupts in that queue.
402 *
403 * We leave a gap of 1 just in case...
404 */
405 max = (q->msk + 1) - 1;
406 return !!atomic_add_unless(&q->count, 1, max);
407}
408
409/*
410 * Un-account an interrupt for a target CPU. We don't directly
411 * decrement q->count since the interrupt might still be present
412 * in the queue.
413 *
414 * Instead increment a separate counter "pending_count" which
415 * will be substracted from "count" later when that CPU observes
416 * the queue to be empty.
417 */
418static void xive_dec_target_count(int cpu)
419{
420 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
421 struct xive_q *q = &xc->queue[xive_irq_priority];
422
423 if (unlikely(WARN_ON(cpu < 0 || !xc))) {
424 pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc);
425 return;
426 }
427
428 /*
429 * We increment the "pending count" which will be used
430 * to decrement the target queue count whenever it's next
431 * processed and found empty. This ensure that we don't
432 * decrement while we still have the interrupt there
433 * occupying a slot.
434 */
435 atomic_inc(&q->pending_count);
436}
437
438/* Find a tentative CPU target in a CPU mask */
439static int xive_find_target_in_mask(const struct cpumask *mask,
440 unsigned int fuzz)
441{
442 int cpu, first, num, i;
443
444 /* Pick up a starting point CPU in the mask based on fuzz */
445 num = cpumask_weight(mask);
446 first = fuzz % num;
447
448 /* Locate it */
449 cpu = cpumask_first(mask);
450 for (i = 0; i < first && cpu < nr_cpu_ids; i++)
451 cpu = cpumask_next(cpu, mask);
452
453 /* Sanity check */
454 if (WARN_ON(cpu >= nr_cpu_ids))
455 cpu = cpumask_first(cpu_online_mask);
456
457 /* Remember first one to handle wrap-around */
458 first = cpu;
459
460 /*
461 * Now go through the entire mask until we find a valid
462 * target.
463 */
464 for (;;) {
465 /*
466 * We re-check online as the fallback case passes us
467 * an untested affinity mask
468 */
469 if (cpu_online(cpu) && xive_try_pick_target(cpu))
470 return cpu;
471 cpu = cpumask_next(cpu, mask);
472 if (cpu == first)
473 break;
474 /* Wrap around */
475 if (cpu >= nr_cpu_ids)
476 cpu = cpumask_first(mask);
477 }
478 return -1;
479}
480
481/*
482 * Pick a target CPU for an interrupt. This is done at
483 * startup or if the affinity is changed in a way that
484 * invalidates the current target.
485 */
486static int xive_pick_irq_target(struct irq_data *d,
487 const struct cpumask *affinity)
488{
489 static unsigned int fuzz;
490 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
491 cpumask_var_t mask;
492 int cpu = -1;
493
494 /*
495 * If we have chip IDs, first we try to build a mask of
496 * CPUs matching the CPU and find a target in there
497 */
498 if (xd->src_chip != XIVE_INVALID_CHIP_ID &&
499 zalloc_cpumask_var(&mask, GFP_ATOMIC)) {
500 /* Build a mask of matching chip IDs */
501 for_each_cpu_and(cpu, affinity, cpu_online_mask) {
502 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
503 if (xc->chip_id == xd->src_chip)
504 cpumask_set_cpu(cpu, mask);
505 }
506 /* Try to find a target */
507 if (cpumask_empty(mask))
508 cpu = -1;
509 else
510 cpu = xive_find_target_in_mask(mask, fuzz++);
511 free_cpumask_var(mask);
512 if (cpu >= 0)
513 return cpu;
514 fuzz--;
515 }
516
517 /* No chip IDs, fallback to using the affinity mask */
518 return xive_find_target_in_mask(affinity, fuzz++);
519}
520
521static unsigned int xive_irq_startup(struct irq_data *d)
522{
523 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
524 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
525 int target, rc;
526
527 pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
528 d->irq, hw_irq, d);
529
530#ifdef CONFIG_PCI_MSI
531 /*
532 * The generic MSI code returns with the interrupt disabled on the
533 * card, using the MSI mask bits. Firmware doesn't appear to unmask
534 * at that level, so we do it here by hand.
535 */
536 if (irq_data_get_msi_desc(d))
537 pci_msi_unmask_irq(d);
538#endif
539
540 /* Pick a target */
541 target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
542 if (target == XIVE_INVALID_TARGET) {
543 /* Try again breaking affinity */
544 target = xive_pick_irq_target(d, cpu_online_mask);
545 if (target == XIVE_INVALID_TARGET)
546 return -ENXIO;
547 pr_warn("irq %d started with broken affinity\n", d->irq);
548 }
549
550 /* Sanity check */
551 if (WARN_ON(target == XIVE_INVALID_TARGET ||
552 target >= nr_cpu_ids))
553 target = smp_processor_id();
554
555 xd->target = target;
556
557 /*
558 * Configure the logical number to be the Linux IRQ number
559 * and set the target queue
560 */
561 rc = xive_ops->configure_irq(hw_irq,
562 get_hard_smp_processor_id(target),
563 xive_irq_priority, d->irq);
564 if (rc)
565 return rc;
566
567 /* Unmask the ESB */
568 xive_do_source_set_mask(xd, false);
569
570 return 0;
571}
572
573static void xive_irq_shutdown(struct irq_data *d)
574{
575 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
576 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
577
578 pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n",
579 d->irq, hw_irq, d);
580
581 if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
582 return;
583
584 /* Mask the interrupt at the source */
585 xive_do_source_set_mask(xd, true);
586
587 /*
588 * The above may have set saved_p. We clear it otherwise it
589 * will prevent re-enabling later on. It is ok to forget the
590 * fact that the interrupt might be in a queue because we are
591 * accounting that already in xive_dec_target_count() and will
592 * be re-routing it to a new queue with proper accounting when
593 * it's started up again
594 */
595 xd->saved_p = false;
596
597 /*
598 * Mask the interrupt in HW in the IVT/EAS and set the number
599 * to be the "bad" IRQ number
600 */
601 xive_ops->configure_irq(hw_irq,
602 get_hard_smp_processor_id(xd->target),
603 0xff, XIVE_BAD_IRQ);
604
605 xive_dec_target_count(xd->target);
606 xd->target = XIVE_INVALID_TARGET;
607}
608
609static void xive_irq_unmask(struct irq_data *d)
610{
611 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
612
613 pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd);
614
615 /*
616 * This is a workaround for PCI LSI problems on P9, for
617 * these, we call FW to set the mask. The problems might
618 * be fixed by P9 DD2.0, if that is the case, firmware
619 * will no longer set that flag.
620 */
621 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) {
622 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
623 xive_ops->configure_irq(hw_irq,
624 get_hard_smp_processor_id(xd->target),
625 xive_irq_priority, d->irq);
626 return;
627 }
628
629 xive_do_source_set_mask(xd, false);
630}
631
632static void xive_irq_mask(struct irq_data *d)
633{
634 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
635
636 pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd);
637
638 /*
639 * This is a workaround for PCI LSI problems on P9, for
640 * these, we call OPAL to set the mask. The problems might
641 * be fixed by P9 DD2.0, if that is the case, firmware
642 * will no longer set that flag.
643 */
644 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) {
645 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
646 xive_ops->configure_irq(hw_irq,
647 get_hard_smp_processor_id(xd->target),
648 0xff, d->irq);
649 return;
650 }
651
652 xive_do_source_set_mask(xd, true);
653}
654
655static int xive_irq_set_affinity(struct irq_data *d,
656 const struct cpumask *cpumask,
657 bool force)
658{
659 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
660 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
661 u32 target, old_target;
662 int rc = 0;
663
664 pr_devel("xive_irq_set_affinity: irq %d\n", d->irq);
665
666 /* Is this valid ? */
667 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
668 return -EINVAL;
669
670 /*
671 * If existing target is already in the new mask, and is
672 * online then do nothing.
673 */
674 if (xd->target != XIVE_INVALID_TARGET &&
675 cpu_online(xd->target) &&
676 cpumask_test_cpu(xd->target, cpumask))
677 return IRQ_SET_MASK_OK;
678
679 /* Pick a new target */
680 target = xive_pick_irq_target(d, cpumask);
681
682 /* No target found */
683 if (target == XIVE_INVALID_TARGET)
684 return -ENXIO;
685
686 /* Sanity check */
687 if (WARN_ON(target >= nr_cpu_ids))
688 target = smp_processor_id();
689
690 old_target = xd->target;
691
692 rc = xive_ops->configure_irq(hw_irq,
693 get_hard_smp_processor_id(target),
694 xive_irq_priority, d->irq);
695 if (rc < 0) {
696 pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
697 return rc;
698 }
699
700 pr_devel(" target: 0x%x\n", target);
701 xd->target = target;
702
703 /* Give up previous target */
704 if (old_target != XIVE_INVALID_TARGET)
705 xive_dec_target_count(old_target);
706
707 return IRQ_SET_MASK_OK;
708}
709
710static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type)
711{
712 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
713
714 /*
715 * We only support these. This has really no effect other than setting
716 * the corresponding descriptor bits mind you but those will in turn
717 * affect the resend function when re-enabling an edge interrupt.
718 *
719 * Set set the default to edge as explained in map().
720 */
721 if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
722 flow_type = IRQ_TYPE_EDGE_RISING;
723
724 if (flow_type != IRQ_TYPE_EDGE_RISING &&
725 flow_type != IRQ_TYPE_LEVEL_LOW)
726 return -EINVAL;
727
728 irqd_set_trigger_type(d, flow_type);
729
730 /*
731 * Double check it matches what the FW thinks
732 *
733 * NOTE: We don't know yet if the PAPR interface will provide
734 * the LSI vs MSI information apart from the device-tree so
735 * this check might have to move into an optional backend call
736 * that is specific to the native backend
737 */
738 if ((flow_type == IRQ_TYPE_LEVEL_LOW) !=
739 !!(xd->flags & XIVE_IRQ_FLAG_LSI)) {
740 pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n",
741 d->irq, (u32)irqd_to_hwirq(d),
742 (flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge",
743 (xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge");
744 }
745
746 return IRQ_SET_MASK_OK_NOCOPY;
747}
748
749static int xive_irq_retrigger(struct irq_data *d)
750{
751 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
752
753 /* This should be only for MSIs */
754 if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
755 return 0;
756
757 /*
758 * To perform a retrigger, we first set the PQ bits to
759 * 11, then perform an EOI.
760 */
761 xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
762
763 /*
764 * Note: We pass "0" to the hw_irq argument in order to
765 * avoid calling into the backend EOI code which we don't
766 * want to do in the case of a re-trigger. Backends typically
767 * only do EOI for LSIs anyway.
768 */
769 xive_do_source_eoi(0, xd);
770
771 return 1;
772}
773
774static struct irq_chip xive_irq_chip = {
775 .name = "XIVE-IRQ",
776 .irq_startup = xive_irq_startup,
777 .irq_shutdown = xive_irq_shutdown,
778 .irq_eoi = xive_irq_eoi,
779 .irq_mask = xive_irq_mask,
780 .irq_unmask = xive_irq_unmask,
781 .irq_set_affinity = xive_irq_set_affinity,
782 .irq_set_type = xive_irq_set_type,
783 .irq_retrigger = xive_irq_retrigger,
784};
785
786bool is_xive_irq(struct irq_chip *chip)
787{
788 return chip == &xive_irq_chip;
789}
790
791void xive_cleanup_irq_data(struct xive_irq_data *xd)
792{
793 if (xd->eoi_mmio) {
794 iounmap(xd->eoi_mmio);
795 if (xd->eoi_mmio == xd->trig_mmio)
796 xd->trig_mmio = NULL;
797 xd->eoi_mmio = NULL;
798 }
799 if (xd->trig_mmio) {
800 iounmap(xd->trig_mmio);
801 xd->trig_mmio = NULL;
802 }
803}
804
805static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
806{
807 struct xive_irq_data *xd;
808 int rc;
809
810 xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL);
811 if (!xd)
812 return -ENOMEM;
813 rc = xive_ops->populate_irq_data(hw, xd);
814 if (rc) {
815 kfree(xd);
816 return rc;
817 }
818 xd->target = XIVE_INVALID_TARGET;
819 irq_set_handler_data(virq, xd);
820
821 return 0;
822}
823
824static void xive_irq_free_data(unsigned int virq)
825{
826 struct xive_irq_data *xd = irq_get_handler_data(virq);
827
828 if (!xd)
829 return;
830 irq_set_handler_data(virq, NULL);
831 xive_cleanup_irq_data(xd);
832 kfree(xd);
833}
834
835#ifdef CONFIG_SMP
836
837static void xive_cause_ipi(int cpu, unsigned long msg)
838{
839 struct xive_cpu *xc;
840 struct xive_irq_data *xd;
841
842 xc = per_cpu(xive_cpu, cpu);
843
844 DBG_VERBOSE("IPI msg#%ld CPU %d -> %d (HW IRQ 0x%x)\n",
845 msg, smp_processor_id(), cpu, xc->hw_ipi);
846
847 xd = &xc->ipi_data;
848 if (WARN_ON(!xd->trig_mmio))
849 return;
850 out_be64(xd->trig_mmio, 0);
851}
852
853static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id)
854{
855 return smp_ipi_demux();
856}
857
858static void xive_ipi_eoi(struct irq_data *d)
859{
860 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
861
862 /* Handle possible race with unplug and drop stale IPIs */
863 if (!xc)
864 return;
865 xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data);
866 xive_do_queue_eoi(xc);
867}
868
869static void xive_ipi_do_nothing(struct irq_data *d)
870{
871 /*
872 * Nothing to do, we never mask/unmask IPIs, but the callback
873 * has to exist for the struct irq_chip.
874 */
875}
876
877static struct irq_chip xive_ipi_chip = {
878 .name = "XIVE-IPI",
879 .irq_eoi = xive_ipi_eoi,
880 .irq_mask = xive_ipi_do_nothing,
881 .irq_unmask = xive_ipi_do_nothing,
882};
883
884static void __init xive_request_ipi(void)
885{
886 unsigned int virq;
887
888 /*
889 * Initialization failed, move on, we might manage to
890 * reach the point where we display our errors before
891 * the system falls appart
892 */
893 if (!xive_irq_domain)
894 return;
895
896 /* Initialize it */
897 virq = irq_create_mapping(xive_irq_domain, 0);
898 xive_ipi_irq = virq;
899
900 WARN_ON(request_irq(virq, xive_muxed_ipi_action,
901 IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
902}
903
904static int xive_setup_cpu_ipi(unsigned int cpu)
905{
906 struct xive_cpu *xc;
907 int rc;
908
909 pr_debug("Setting up IPI for CPU %d\n", cpu);
910
911 xc = per_cpu(xive_cpu, cpu);
912
913 /* Check if we are already setup */
914 if (xc->hw_ipi != 0)
915 return 0;
916
917 /* Grab an IPI from the backend, this will populate xc->hw_ipi */
918 if (xive_ops->get_ipi(cpu, xc))
919 return -EIO;
920
921 /*
922 * Populate the IRQ data in the xive_cpu structure and
923 * configure the HW / enable the IPIs.
924 */
925 rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data);
926 if (rc) {
927 pr_err("Failed to populate IPI data on CPU %d\n", cpu);
928 return -EIO;
929 }
930 rc = xive_ops->configure_irq(xc->hw_ipi,
931 get_hard_smp_processor_id(cpu),
932 xive_irq_priority, xive_ipi_irq);
933 if (rc) {
934 pr_err("Failed to map IPI CPU %d\n", cpu);
935 return -EIO;
936 }
937 pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu,
938 xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
939
940 /* Unmask it */
941 xive_do_source_set_mask(&xc->ipi_data, false);
942
943 return 0;
944}
945
946static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
947{
948 /* Disable the IPI and free the IRQ data */
949
950 /* Already cleaned up ? */
951 if (xc->hw_ipi == 0)
952 return;
953
954 /* Mask the IPI */
955 xive_do_source_set_mask(&xc->ipi_data, true);
956
957 /*
958 * Note: We don't call xive_cleanup_irq_data() to free
959 * the mappings as this is called from an IPI on kexec
960 * which is not a safe environment to call iounmap()
961 */
962
963 /* Deconfigure/mask in the backend */
964 xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(),
965 0xff, xive_ipi_irq);
966
967 /* Free the IPIs in the backend */
968 xive_ops->put_ipi(cpu, xc);
969}
970
971void __init xive_smp_probe(void)
972{
973 smp_ops->cause_ipi = xive_cause_ipi;
974
975 /* Register the IPI */
976 xive_request_ipi();
977
978 /* Allocate and setup IPI for the boot CPU */
979 xive_setup_cpu_ipi(smp_processor_id());
980}
981
982#endif /* CONFIG_SMP */
983
984static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
985 irq_hw_number_t hw)
986{
987 int rc;
988
989 /*
990 * Mark interrupts as edge sensitive by default so that resend
991 * actually works. Will fix that up below if needed.
992 */
993 irq_clear_status_flags(virq, IRQ_LEVEL);
994
995#ifdef CONFIG_SMP
996 /* IPIs are special and come up with HW number 0 */
997 if (hw == 0) {
998 /*
999 * IPIs are marked per-cpu. We use separate HW interrupts under
1000 * the hood but associated with the same "linux" interrupt
1001 */
1002 irq_set_chip_and_handler(virq, &xive_ipi_chip,
1003 handle_percpu_irq);
1004 return 0;
1005 }
1006#endif
1007
1008 rc = xive_irq_alloc_data(virq, hw);
1009 if (rc)
1010 return rc;
1011
1012 irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq);
1013
1014 return 0;
1015}
1016
1017static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
1018{
1019 struct irq_data *data = irq_get_irq_data(virq);
1020 unsigned int hw_irq;
1021
1022 /* XXX Assign BAD number */
1023 if (!data)
1024 return;
1025 hw_irq = (unsigned int)irqd_to_hwirq(data);
1026 if (hw_irq)
1027 xive_irq_free_data(virq);
1028}
1029
1030static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
1031 const u32 *intspec, unsigned int intsize,
1032 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
1033
1034{
1035 *out_hwirq = intspec[0];
1036
1037 /*
1038 * If intsize is at least 2, we look for the type in the second cell,
1039 * we assume the LSB indicates a level interrupt.
1040 */
1041 if (intsize > 1) {
1042 if (intspec[1] & 1)
1043 *out_flags = IRQ_TYPE_LEVEL_LOW;
1044 else
1045 *out_flags = IRQ_TYPE_EDGE_RISING;
1046 } else
1047 *out_flags = IRQ_TYPE_LEVEL_LOW;
1048
1049 return 0;
1050}
1051
1052static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node,
1053 enum irq_domain_bus_token bus_token)
1054{
1055 return xive_ops->match(node);
1056}
1057
1058static const struct irq_domain_ops xive_irq_domain_ops = {
1059 .match = xive_irq_domain_match,
1060 .map = xive_irq_domain_map,
1061 .unmap = xive_irq_domain_unmap,
1062 .xlate = xive_irq_domain_xlate,
1063};
1064
1065static void __init xive_init_host(void)
1066{
1067 xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ,
1068 &xive_irq_domain_ops, NULL);
1069 if (WARN_ON(xive_irq_domain == NULL))
1070 return;
1071 irq_set_default_host(xive_irq_domain);
1072}
1073
1074static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1075{
1076 if (xc->queue[xive_irq_priority].qpage)
1077 xive_ops->cleanup_queue(cpu, xc, xive_irq_priority);
1078}
1079
1080static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1081{
1082 int rc = 0;
1083
1084 /* We setup 1 queues for now with a 64k page */
1085 if (!xc->queue[xive_irq_priority].qpage)
1086 rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority);
1087
1088 return rc;
1089}
1090
1091static int xive_prepare_cpu(unsigned int cpu)
1092{
1093 struct xive_cpu *xc;
1094
1095 xc = per_cpu(xive_cpu, cpu);
1096 if (!xc) {
1097 struct device_node *np;
1098
1099 xc = kzalloc_node(sizeof(struct xive_cpu),
1100 GFP_KERNEL, cpu_to_node(cpu));
1101 if (!xc)
1102 return -ENOMEM;
1103 np = of_get_cpu_node(cpu, NULL);
1104 if (np)
1105 xc->chip_id = of_get_ibm_chip_id(np);
1106 of_node_put(np);
1107
1108 per_cpu(xive_cpu, cpu) = xc;
1109 }
1110
1111 /* Setup EQs if not already */
1112 return xive_setup_cpu_queues(cpu, xc);
1113}
1114
1115static void xive_setup_cpu(void)
1116{
1117 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1118
1119 /* Debug: Dump the TM state */
1120 pr_devel("CPU %d [HW 0x%02x] VT=%02x\n",
1121 smp_processor_id(), hard_smp_processor_id(),
1122 in_8(xive_tima + xive_tima_offset + TM_WORD2));
1123
1124 /* The backend might have additional things to do */
1125 if (xive_ops->setup_cpu)
1126 xive_ops->setup_cpu(smp_processor_id(), xc);
1127
1128 /* Set CPPR to 0xff to enable flow of interrupts */
1129 xc->cppr = 0xff;
1130 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1131}
1132
1133#ifdef CONFIG_SMP
1134void xive_smp_setup_cpu(void)
1135{
1136 pr_devel("SMP setup CPU %d\n", smp_processor_id());
1137
1138 /* This will have already been done on the boot CPU */
1139 if (smp_processor_id() != boot_cpuid)
1140 xive_setup_cpu();
1141
1142}
1143
1144int xive_smp_prepare_cpu(unsigned int cpu)
1145{
1146 int rc;
1147
1148 /* Allocate per-CPU data and queues */
1149 rc = xive_prepare_cpu(cpu);
1150 if (rc)
1151 return rc;
1152
1153 /* Allocate and setup IPI for the new CPU */
1154 return xive_setup_cpu_ipi(cpu);
1155}
1156
1157#ifdef CONFIG_HOTPLUG_CPU
1158static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
1159{
1160 u32 irq;
1161
1162 /* We assume local irqs are disabled */
1163 WARN_ON(!irqs_disabled());
1164
1165 /* Check what's already in the CPU queue */
1166 while ((irq = xive_scan_interrupts(xc, false)) != 0) {
1167 /*
1168 * We need to re-route that interrupt to its new destination.
1169 * First get and lock the descriptor
1170 */
1171 struct irq_desc *desc = irq_to_desc(irq);
1172 struct irq_data *d = irq_desc_get_irq_data(desc);
1173 struct xive_irq_data *xd;
1174 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
1175
1176 /*
1177 * Ignore anything that isn't a XIVE irq and ignore
1178 * IPIs, so can just be dropped.
1179 */
1180 if (d->domain != xive_irq_domain || hw_irq == 0)
1181 continue;
1182
1183 /*
1184 * The IRQ should have already been re-routed, it's just a
1185 * stale in the old queue, so re-trigger it in order to make
1186 * it reach is new destination.
1187 */
1188#ifdef DEBUG_FLUSH
1189 pr_info("CPU %d: Got irq %d while offline, re-sending...\n",
1190 cpu, irq);
1191#endif
1192 raw_spin_lock(&desc->lock);
1193 xd = irq_desc_get_handler_data(desc);
1194
1195 /*
1196 * For LSIs, we EOI, this will cause a resend if it's
1197 * still asserted. Otherwise do an MSI retrigger.
1198 */
1199 if (xd->flags & XIVE_IRQ_FLAG_LSI)
1200 xive_do_source_eoi(irqd_to_hwirq(d), xd);
1201 else
1202 xive_irq_retrigger(d);
1203
1204 raw_spin_unlock(&desc->lock);
1205 }
1206}
1207
1208void xive_smp_disable_cpu(void)
1209{
1210 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1211 unsigned int cpu = smp_processor_id();
1212
1213 /* Migrate interrupts away from the CPU */
1214 irq_migrate_all_off_this_cpu();
1215
1216 /* Set CPPR to 0 to disable flow of interrupts */
1217 xc->cppr = 0;
1218 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1219
1220 /* Flush everything still in the queue */
1221 xive_flush_cpu_queue(cpu, xc);
1222
1223 /* Re-enable CPPR */
1224 xc->cppr = 0xff;
1225 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1226}
1227
1228void xive_flush_interrupt(void)
1229{
1230 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1231 unsigned int cpu = smp_processor_id();
1232
1233 /* Called if an interrupt occurs while the CPU is hot unplugged */
1234 xive_flush_cpu_queue(cpu, xc);
1235}
1236
1237#endif /* CONFIG_HOTPLUG_CPU */
1238
1239#endif /* CONFIG_SMP */
1240
1241void xive_kexec_teardown_cpu(int secondary)
1242{
1243 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1244 unsigned int cpu = smp_processor_id();
1245
1246 /* Set CPPR to 0 to disable flow of interrupts */
1247 xc->cppr = 0;
1248 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1249
1250 /* Backend cleanup if any */
1251 if (xive_ops->teardown_cpu)
1252 xive_ops->teardown_cpu(cpu, xc);
1253
1254#ifdef CONFIG_SMP
1255 /* Get rid of IPI */
1256 xive_cleanup_cpu_ipi(cpu, xc);
1257#endif
1258
1259 /* Disable and free the queues */
1260 xive_cleanup_cpu_queues(cpu, xc);
1261}
1262
1263void xive_shutdown(void)
1264{
1265 xive_ops->shutdown();
1266}
1267
1268bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
1269 u8 max_prio)
1270{
1271 xive_tima = area;
1272 xive_tima_offset = offset;
1273 xive_ops = ops;
1274 xive_irq_priority = max_prio;
1275
1276 ppc_md.get_irq = xive_get_irq;
1277 __xive_enabled = true;
1278
1279 pr_devel("Initializing host..\n");
1280 xive_init_host();
1281
1282 pr_devel("Initializing boot CPU..\n");
1283
1284 /* Allocate per-CPU data and queues */
1285 xive_prepare_cpu(smp_processor_id());
1286
1287 /* Get ready for interrupts */
1288 xive_setup_cpu();
1289
1290 pr_info("Interrupt handling intialized with %s backend\n",
1291 xive_ops->name);
1292 pr_info("Using priority %d for all interrupts\n", max_prio);
1293
1294 return true;
1295}
1296
1297static int __init xive_off(char *arg)
1298{
1299 xive_cmdline_disabled = true;
1300 return 0;
1301}
1302__setup("xive=off", xive_off);
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
new file mode 100644
index 000000000000..5fae59186cb2
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -0,0 +1,639 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "xive: " fmt
11
12#include <linux/types.h>
13#include <linux/irq.h>
14#include <linux/debugfs.h>
15#include <linux/smp.h>
16#include <linux/interrupt.h>
17#include <linux/seq_file.h>
18#include <linux/init.h>
19#include <linux/of.h>
20#include <linux/slab.h>
21#include <linux/spinlock.h>
22#include <linux/delay.h>
23#include <linux/cpumask.h>
24#include <linux/mm.h>
25
26#include <asm/prom.h>
27#include <asm/io.h>
28#include <asm/smp.h>
29#include <asm/irq.h>
30#include <asm/errno.h>
31#include <asm/xive.h>
32#include <asm/xive-regs.h>
33#include <asm/opal.h>
34
35#include "xive-internal.h"
36
37
38static u32 xive_provision_size;
39static u32 *xive_provision_chips;
40static u32 xive_provision_chip_count;
41static u32 xive_queue_shift;
42static u32 xive_pool_vps = XIVE_INVALID_VP;
43static struct kmem_cache *xive_provision_cache;
44
45int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
46{
47 __be64 flags, eoi_page, trig_page;
48 __be32 esb_shift, src_chip;
49 u64 opal_flags;
50 s64 rc;
51
52 memset(data, 0, sizeof(*data));
53
54 rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
55 &esb_shift, &src_chip);
56 if (rc) {
57 pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
58 hw_irq, rc);
59 return -EINVAL;
60 }
61
62 opal_flags = be64_to_cpu(flags);
63 if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
64 data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
65 if (opal_flags & OPAL_XIVE_IRQ_LSI)
66 data->flags |= XIVE_IRQ_FLAG_LSI;
67 if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG)
68 data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG;
69 if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW)
70 data->flags |= XIVE_IRQ_FLAG_MASK_FW;
71 if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
72 data->flags |= XIVE_IRQ_FLAG_EOI_FW;
73 data->eoi_page = be64_to_cpu(eoi_page);
74 data->trig_page = be64_to_cpu(trig_page);
75 data->esb_shift = be32_to_cpu(esb_shift);
76 data->src_chip = be32_to_cpu(src_chip);
77
78 data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
79 if (!data->eoi_mmio) {
80 pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
81 return -ENOMEM;
82 }
83
84 if (!data->trig_page)
85 return 0;
86 if (data->trig_page == data->eoi_page) {
87 data->trig_mmio = data->eoi_mmio;
88 return 0;
89 }
90
91 data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
92 if (!data->trig_mmio) {
93 pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
94 return -ENOMEM;
95 }
96 return 0;
97}
98
99int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
100{
101 s64 rc;
102
103 for (;;) {
104 rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
105 if (rc != OPAL_BUSY)
106 break;
107 msleep(1);
108 }
109 return rc == 0 ? 0 : -ENXIO;
110}
111
112/* This can be called multiple time to change a queue configuration */
113int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
114 __be32 *qpage, u32 order, bool can_escalate)
115{
116 s64 rc = 0;
117 __be64 qeoi_page_be;
118 __be32 esc_irq_be;
119 u64 flags, qpage_phys;
120
121 /* If there's an actual queue page, clean it */
122 if (order) {
123 if (WARN_ON(!qpage))
124 return -EINVAL;
125 qpage_phys = __pa(qpage);
126 } else
127 qpage_phys = 0;
128
129 /* Initialize the rest of the fields */
130 q->msk = order ? ((1u << (order - 2)) - 1) : 0;
131 q->idx = 0;
132 q->toggle = 0;
133
134 rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
135 &qeoi_page_be,
136 &esc_irq_be,
137 NULL);
138 if (rc) {
139 pr_err("Error %lld getting queue info prio %d\n", rc, prio);
140 rc = -EIO;
141 goto fail;
142 }
143 q->eoi_phys = be64_to_cpu(qeoi_page_be);
144
145 /* Default flags */
146 flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
147
148 /* Escalation needed ? */
149 if (can_escalate) {
150 q->esc_irq = be32_to_cpu(esc_irq_be);
151 flags |= OPAL_XIVE_EQ_ESCALATE;
152 }
153
154 /* Configure and enable the queue in HW */
155 for (;;) {
156 rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
157 if (rc != OPAL_BUSY)
158 break;
159 msleep(1);
160 }
161 if (rc) {
162 pr_err("Error %lld setting queue for prio %d\n", rc, prio);
163 rc = -EIO;
164 } else {
165 /*
166 * KVM code requires all of the above to be visible before
167 * q->qpage is set due to how it manages IPI EOIs
168 */
169 wmb();
170 q->qpage = qpage;
171 }
172fail:
173 return rc;
174}
175
176static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
177{
178 s64 rc;
179
180 /* Disable the queue in HW */
181 for (;;) {
182 rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
183 break;
184 msleep(1);
185 }
186 if (rc)
187 pr_err("Error %lld disabling queue for prio %d\n", rc, prio);
188}
189
190void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
191{
192 __xive_native_disable_queue(vp_id, q, prio);
193}
194
195static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
196{
197 struct xive_q *q = &xc->queue[prio];
198 unsigned int alloc_order;
199 struct page *pages;
200 __be32 *qpage;
201
202 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
203 (xive_queue_shift - PAGE_SHIFT) : 0;
204 pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
205 if (!pages)
206 return -ENOMEM;
207 qpage = (__be32 *)page_address(pages);
208 memset(qpage, 0, 1 << xive_queue_shift);
209 return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
210 q, prio, qpage, xive_queue_shift, false);
211}
212
213static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
214{
215 struct xive_q *q = &xc->queue[prio];
216 unsigned int alloc_order;
217
218 /*
219 * We use the variant with no iounmap as this is called on exec
220 * from an IPI and iounmap isn't safe
221 */
222 __xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
223 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
224 (xive_queue_shift - PAGE_SHIFT) : 0;
225 free_pages((unsigned long)q->qpage, alloc_order);
226 q->qpage = NULL;
227}
228
229static bool xive_native_match(struct device_node *node)
230{
231 return of_device_is_compatible(node, "ibm,opal-xive-vc");
232}
233
234#ifdef CONFIG_SMP
235static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
236{
237 struct device_node *np;
238 unsigned int chip_id;
239 s64 irq;
240
241 /* Find the chip ID */
242 np = of_get_cpu_node(cpu, NULL);
243 if (np) {
244 if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
245 chip_id = 0;
246 }
247
248 /* Allocate an IPI and populate info about it */
249 for (;;) {
250 irq = opal_xive_allocate_irq(chip_id);
251 if (irq == OPAL_BUSY) {
252 msleep(1);
253 continue;
254 }
255 if (irq < 0) {
256 pr_err("Failed to allocate IPI on CPU %d\n", cpu);
257 return -ENXIO;
258 }
259 xc->hw_ipi = irq;
260 break;
261 }
262 return 0;
263}
264
265u32 xive_native_alloc_irq(void)
266{
267 s64 rc;
268
269 for (;;) {
270 rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP);
271 if (rc != OPAL_BUSY)
272 break;
273 msleep(1);
274 }
275 if (rc < 0)
276 return 0;
277 return rc;
278}
279
280void xive_native_free_irq(u32 irq)
281{
282 for (;;) {
283 s64 rc = opal_xive_free_irq(irq);
284 if (rc != OPAL_BUSY)
285 break;
286 msleep(1);
287 }
288}
289
290static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
291{
292 s64 rc;
293
294 /* Free the IPI */
295 if (!xc->hw_ipi)
296 return;
297 for (;;) {
298 rc = opal_xive_free_irq(xc->hw_ipi);
299 if (rc == OPAL_BUSY) {
300 msleep(1);
301 continue;
302 }
303 xc->hw_ipi = 0;
304 break;
305 }
306}
307#endif /* CONFIG_SMP */
308
309static void xive_native_shutdown(void)
310{
311 /* Switch the XIVE to emulation mode */
312 opal_xive_reset(OPAL_XIVE_MODE_EMU);
313}
314
315/*
316 * Perform an "ack" cycle on the current thread, thus
317 * grabbing the pending active priorities and updating
318 * the CPPR to the most favored one.
319 */
320static void xive_native_update_pending(struct xive_cpu *xc)
321{
322 u8 he, cppr;
323 u16 ack;
324
325 /* Perform the acknowledge hypervisor to register cycle */
326 ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
327
328 /* Synchronize subsequent queue accesses */
329 mb();
330
331 /*
332 * Grab the CPPR and the "HE" field which indicates the source
333 * of the hypervisor interrupt (if any)
334 */
335 cppr = ack & 0xff;
336 he = GETFIELD(TM_QW3_NSR_HE, (ack >> 8));
337 switch(he) {
338 case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
339 break;
340 case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
341 if (cppr == 0xff)
342 return;
343 /* Mark the priority pending */
344 xc->pending_prio |= 1 << cppr;
345
346 /*
347 * A new interrupt should never have a CPPR less favored
348 * than our current one.
349 */
350 if (cppr >= xc->cppr)
351 pr_err("CPU %d odd ack CPPR, got %d at %d\n",
352 smp_processor_id(), cppr, xc->cppr);
353
354 /* Update our idea of what the CPPR is */
355 xc->cppr = cppr;
356 break;
357 case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
358 case TM_QW3_NSR_HE_LSI: /* Legacy FW LSI (unused) */
359 pr_err("CPU %d got unexpected interrupt type HE=%d\n",
360 smp_processor_id(), he);
361 return;
362 }
363}
364
365static void xive_native_eoi(u32 hw_irq)
366{
367 /*
368 * Not normally used except if specific interrupts need
369 * a workaround on EOI.
370 */
371 opal_int_eoi(hw_irq);
372}
373
374static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
375{
376 s64 rc;
377 u32 vp;
378 __be64 vp_cam_be;
379 u64 vp_cam;
380
381 if (xive_pool_vps == XIVE_INVALID_VP)
382 return;
383
384 /* Enable the pool VP */
385 vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
386 pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
387 for (;;) {
388 rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
389 if (rc != OPAL_BUSY)
390 break;
391 msleep(1);
392 }
393 if (rc) {
394 pr_err("Failed to enable pool VP on CPU %d\n", cpu);
395 return;
396 }
397
398 /* Grab it's CAM value */
399 rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
400 if (rc) {
401 pr_err("Failed to get pool VP info CPU %d\n", cpu);
402 return;
403 }
404 vp_cam = be64_to_cpu(vp_cam_be);
405
406 pr_debug("VP CAM = %llx\n", vp_cam);
407
408 /* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
409 pr_debug("(Old HW value: %08x)\n",
410 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
411 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
412 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2,
413 TM_QW2W2_VP | vp_cam);
414 pr_debug("(New HW value: %08x)\n",
415 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
416}
417
418static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
419{
420 s64 rc;
421 u32 vp;
422
423 if (xive_pool_vps == XIVE_INVALID_VP)
424 return;
425
426 /* Pull the pool VP from the CPU */
427 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
428
429 /* Disable it */
430 vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
431 for (;;) {
432 rc = opal_xive_set_vp_info(vp, 0, 0);
433 if (rc != OPAL_BUSY)
434 break;
435 msleep(1);
436 }
437}
438
439static void xive_native_sync_source(u32 hw_irq)
440{
441 opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
442}
443
444static const struct xive_ops xive_native_ops = {
445 .populate_irq_data = xive_native_populate_irq_data,
446 .configure_irq = xive_native_configure_irq,
447 .setup_queue = xive_native_setup_queue,
448 .cleanup_queue = xive_native_cleanup_queue,
449 .match = xive_native_match,
450 .shutdown = xive_native_shutdown,
451 .update_pending = xive_native_update_pending,
452 .eoi = xive_native_eoi,
453 .setup_cpu = xive_native_setup_cpu,
454 .teardown_cpu = xive_native_teardown_cpu,
455 .sync_source = xive_native_sync_source,
456#ifdef CONFIG_SMP
457 .get_ipi = xive_native_get_ipi,
458 .put_ipi = xive_native_put_ipi,
459#endif /* CONFIG_SMP */
460 .name = "native",
461};
462
463static bool xive_parse_provisioning(struct device_node *np)
464{
465 int rc;
466
467 if (of_property_read_u32(np, "ibm,xive-provision-page-size",
468 &xive_provision_size) < 0)
469 return true;
470 rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
471 if (rc < 0) {
472 pr_err("Error %d getting provision chips array\n", rc);
473 return false;
474 }
475 xive_provision_chip_count = rc;
476 if (rc == 0)
477 return true;
478
479 xive_provision_chips = kzalloc(4 * xive_provision_chip_count,
480 GFP_KERNEL);
481 if (WARN_ON(!xive_provision_chips))
482 return false;
483
484 rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
485 xive_provision_chips,
486 xive_provision_chip_count);
487 if (rc < 0) {
488 pr_err("Error %d reading provision chips array\n", rc);
489 return false;
490 }
491
492 xive_provision_cache = kmem_cache_create("xive-provision",
493 xive_provision_size,
494 xive_provision_size,
495 0, NULL);
496 if (!xive_provision_cache) {
497 pr_err("Failed to allocate provision cache\n");
498 return false;
499 }
500 return true;
501}
502
503u32 xive_native_default_eq_shift(void)
504{
505 return xive_queue_shift;
506}
507
508bool xive_native_init(void)
509{
510 struct device_node *np;
511 struct resource r;
512 void __iomem *tima;
513 struct property *prop;
514 u8 max_prio = 7;
515 const __be32 *p;
516 u32 val;
517 s64 rc;
518
519 if (xive_cmdline_disabled)
520 return false;
521
522 pr_devel("xive_native_init()\n");
523 np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
524 if (!np) {
525 pr_devel("not found !\n");
526 return false;
527 }
528 pr_devel("Found %s\n", np->full_name);
529
530 /* Resource 1 is HV window */
531 if (of_address_to_resource(np, 1, &r)) {
532 pr_err("Failed to get thread mgmnt area resource\n");
533 return false;
534 }
535 tima = ioremap(r.start, resource_size(&r));
536 if (!tima) {
537 pr_err("Failed to map thread mgmnt area\n");
538 return false;
539 }
540
541 /* Read number of priorities */
542 if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
543 max_prio = val - 1;
544
545 /* Iterate the EQ sizes and pick one */
546 of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
547 xive_queue_shift = val;
548 if (val == PAGE_SHIFT)
549 break;
550 }
551
552 /* Grab size of provisioning pages */
553 xive_parse_provisioning(np);
554
555 /* Switch the XIVE to exploitation mode */
556 rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
557 if (rc) {
558 pr_err("Switch to exploitation mode failed with error %lld\n", rc);
559 return false;
560 }
561
562 /* Initialize XIVE core with our backend */
563 if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
564 max_prio)) {
565 opal_xive_reset(OPAL_XIVE_MODE_EMU);
566 return false;
567 }
568 pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
569 return true;
570}
571
572static bool xive_native_provision_pages(void)
573{
574 u32 i;
575 void *p;
576
577 for (i = 0; i < xive_provision_chip_count; i++) {
578 u32 chip = xive_provision_chips[i];
579
580 /*
581 * XXX TODO: Try to make the allocation local to the node where
582 * the chip resides.
583 */
584 p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
585 if (!p) {
586 pr_err("Failed to allocate provisioning page\n");
587 return false;
588 }
589 opal_xive_donate_page(chip, __pa(p));
590 }
591 return true;
592}
593
594u32 xive_native_alloc_vp_block(u32 max_vcpus)
595{
596 s64 rc;
597 u32 order;
598
599 order = fls(max_vcpus) - 1;
600 if (max_vcpus > (1 << order))
601 order++;
602
603 pr_info("VP block alloc, for max VCPUs %d use order %d\n",
604 max_vcpus, order);
605
606 for (;;) {
607 rc = opal_xive_alloc_vp_block(order);
608 switch (rc) {
609 case OPAL_BUSY:
610 msleep(1);
611 break;
612 case OPAL_XIVE_PROVISIONING:
613 if (!xive_native_provision_pages())
614 return XIVE_INVALID_VP;
615 break;
616 default:
617 if (rc < 0) {
618 pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
619 order, rc);
620 return XIVE_INVALID_VP;
621 }
622 return rc;
623 }
624 }
625}
626EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
627
628void xive_native_free_vp_block(u32 vp_base)
629{
630 s64 rc;
631
632 if (vp_base == XIVE_INVALID_VP)
633 return;
634
635 rc = opal_xive_free_vp_block(vp_base);
636 if (rc < 0)
637 pr_warn("OPAL error %lld freeing VP block\n", rc);
638}
639EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
new file mode 100644
index 000000000000..d07ef2d29caf
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef __XIVE_INTERNAL_H
10#define __XIVE_INTERNAL_H
11
12/* Each CPU carry one of these with various per-CPU state */
13struct xive_cpu {
14#ifdef CONFIG_SMP
15 /* HW irq number and data of IPI */
16 u32 hw_ipi;
17 struct xive_irq_data ipi_data;
18#endif /* CONFIG_SMP */
19
20 int chip_id;
21
22 /* Queue datas. Only one is populated */
23#define XIVE_MAX_QUEUES 8
24 struct xive_q queue[XIVE_MAX_QUEUES];
25
26 /*
27 * Pending mask. Each bit corresponds to a priority that
28 * potentially has pending interrupts.
29 */
30 u8 pending_prio;
31
32 /* Cache of HW CPPR */
33 u8 cppr;
34};
35
36/* Backend ops */
37struct xive_ops {
38 int (*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data);
39 int (*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
40 int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
41 void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
42 void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
43 void (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc);
44 bool (*match)(struct device_node *np);
45 void (*shutdown)(void);
46
47 void (*update_pending)(struct xive_cpu *xc);
48 void (*eoi)(u32 hw_irq);
49 void (*sync_source)(u32 hw_irq);
50#ifdef CONFIG_SMP
51 int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc);
52 void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc);
53#endif
54 const char *name;
55};
56
57bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
58 u8 max_prio);
59
60extern bool xive_cmdline_disabled;
61
62#endif /* __XIVE_INTERNAL_H */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 16321ad9e70c..67435b9bf98d 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -30,6 +30,7 @@
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31 31
32#include <asm/ptrace.h> 32#include <asm/ptrace.h>
33#include <asm/smp.h>
33#include <asm/string.h> 34#include <asm/string.h>
34#include <asm/prom.h> 35#include <asm/prom.h>
35#include <asm/machdep.h> 36#include <asm/machdep.h>
@@ -48,7 +49,7 @@
48#include <asm/reg.h> 49#include <asm/reg.h>
49#include <asm/debug.h> 50#include <asm/debug.h>
50#include <asm/hw_breakpoint.h> 51#include <asm/hw_breakpoint.h>
51 52#include <asm/xive.h>
52#include <asm/opal.h> 53#include <asm/opal.h>
53#include <asm/firmware.h> 54#include <asm/firmware.h>
54 55
@@ -232,7 +233,13 @@ Commands:\n\
232 "\ 233 "\
233 dr dump stream of raw bytes\n\ 234 dr dump stream of raw bytes\n\
234 dt dump the tracing buffers (uses printk)\n\ 235 dt dump the tracing buffers (uses printk)\n\
235 e print exception information\n\ 236"
237#ifdef CONFIG_PPC_POWERNV
238" dx# dump xive on CPU #\n\
239 dxi# dump xive irq state #\n\
240 dxa dump xive on all CPUs\n"
241#endif
242" e print exception information\n\
236 f flush cache\n\ 243 f flush cache\n\
237 la lookup symbol+offset of specified address\n\ 244 la lookup symbol+offset of specified address\n\
238 ls lookup address of specified symbol\n\ 245 ls lookup address of specified symbol\n\
@@ -2338,6 +2345,81 @@ static void dump_pacas(void)
2338} 2345}
2339#endif 2346#endif
2340 2347
2348#ifdef CONFIG_PPC_POWERNV
2349static void dump_one_xive(int cpu)
2350{
2351 unsigned int hwid = get_hard_smp_processor_id(cpu);
2352
2353 opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
2354 opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
2355 opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
2356 opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
2357 opal_xive_dump(XIVE_DUMP_VP, hwid);
2358 opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
2359
2360 if (setjmp(bus_error_jmp) != 0) {
2361 catch_memory_errors = 0;
2362 printf("*** Error dumping xive on cpu %d\n", cpu);
2363 return;
2364 }
2365
2366 catch_memory_errors = 1;
2367 sync();
2368 xmon_xive_do_dump(cpu);
2369 sync();
2370 __delay(200);
2371 catch_memory_errors = 0;
2372}
2373
2374static void dump_all_xives(void)
2375{
2376 int cpu;
2377
2378 if (num_possible_cpus() == 0) {
2379 printf("No possible cpus, use 'dx #' to dump individual cpus\n");
2380 return;
2381 }
2382
2383 for_each_possible_cpu(cpu)
2384 dump_one_xive(cpu);
2385}
2386
2387static void dump_one_xive_irq(u32 num)
2388{
2389 s64 rc;
2390 __be64 vp;
2391 u8 prio;
2392 __be32 lirq;
2393
2394 rc = opal_xive_get_irq_config(num, &vp, &prio, &lirq);
2395 xmon_printf("IRQ 0x%x config: vp=0x%llx prio=%d lirq=0x%x (rc=%lld)\n",
2396 num, be64_to_cpu(vp), prio, be32_to_cpu(lirq), rc);
2397}
2398
2399static void dump_xives(void)
2400{
2401 unsigned long num;
2402 int c;
2403
2404 c = inchar();
2405 if (c == 'a') {
2406 dump_all_xives();
2407 return;
2408 } else if (c == 'i') {
2409 if (scanhex(&num))
2410 dump_one_xive_irq(num);
2411 return;
2412 }
2413
2414 termch = c; /* Put c back, it wasn't 'a' */
2415
2416 if (scanhex(&num))
2417 dump_one_xive(num);
2418 else
2419 dump_one_xive(xmon_owner);
2420}
2421#endif /* CONFIG_PPC_POWERNV */
2422
2341static void dump_by_size(unsigned long addr, long count, int size) 2423static void dump_by_size(unsigned long addr, long count, int size)
2342{ 2424{
2343 unsigned char temp[16]; 2425 unsigned char temp[16];
@@ -2386,6 +2468,14 @@ dump(void)
2386 return; 2468 return;
2387 } 2469 }
2388#endif 2470#endif
2471#ifdef CONFIG_PPC_POWERNV
2472 if (c == 'x') {
2473 xmon_start_pagination();
2474 dump_xives();
2475 xmon_end_pagination();
2476 return;
2477 }
2478#endif
2389 2479
2390 if (c == '\n') 2480 if (c == '\n')
2391 termch = c; 2481 termch = c;