aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@ozlabs.org>2017-04-27 18:23:16 -0400
committerPaul Mackerras <paulus@ozlabs.org>2017-04-27 18:23:16 -0400
commitfb7dcf723dd2cb1d5d8f2f49c3023130938848e3 (patch)
treedc26c9f9616a06b88b34689f3a5b44a73738b52f
parentdb4b0dfab7b016f5514442046d86a9727ee87f12 (diff)
parent5af50993850a48ba749b122173d789ea90976c72 (diff)
Merge remote-tracking branch 'remotes/powerpc/topic/xive' into kvm-ppc-next
This merges in the powerpc topic/xive branch to bring in the code for the in-kernel XICS interrupt controller emulation to use the new XIVE (eXternal Interrupt Virtualization Engine) hardware in the POWER9 chip directly, rather than via a XICS emulation in firmware. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
-rw-r--r--arch/powerpc/include/asm/bitops.h8
-rw-r--r--arch/powerpc/include/asm/io.h98
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h28
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h84
-rw-r--r--arch/powerpc/include/asm/opal-api.h74
-rw-r--r--arch/powerpc/include/asm/opal.h36
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/asm/smp.h2
-rw-r--r--arch/powerpc/include/asm/xive-regs.h97
-rw-r--r--arch/powerpc/include/asm/xive.h162
-rw-r--r--arch/powerpc/include/asm/xmon.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c10
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S15
-rw-r--r--arch/powerpc/kernel/irq.c40
-rw-r--r--arch/powerpc/kernel/smp.c19
-rw-r--r--arch/powerpc/kvm/Kconfig5
-rw-r--r--arch/powerpc/kvm/Makefile4
-rw-r--r--arch/powerpc/kvm/book3s.c83
-rw-r--r--arch/powerpc/kvm/book3s_hv.c69
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c132
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c14
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xive.c47
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S62
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c21
-rw-r--r--arch/powerpc/kvm/book3s_xics.c37
-rw-r--r--arch/powerpc/kvm/book3s_xics.h7
-rw-r--r--arch/powerpc/kvm/book3s_xive.c1893
-rw-r--r--arch/powerpc/kvm/book3s_xive.h256
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c503
-rw-r--r--arch/powerpc/kvm/irq.h1
-rw-r--r--arch/powerpc/kvm/powerpc.c17
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig1
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S15
-rw-r--r--arch/powerpc/platforms/powernv/opal.c1
-rw-r--r--arch/powerpc/platforms/powernv/rng.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c15
-rw-r--r--arch/powerpc/platforms/powernv/smp.c39
-rw-r--r--arch/powerpc/sysdev/Kconfig1
-rw-r--r--arch/powerpc/sysdev/Makefile1
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c8
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig11
-rw-r--r--arch/powerpc/sysdev/xive/Makefile4
-rw-r--r--arch/powerpc/sysdev/xive/common.c1432
-rw-r--r--arch/powerpc/sysdev/xive/native.c715
-rw-r--r--arch/powerpc/sysdev/xive/xive-internal.h62
-rw-r--r--arch/powerpc/xmon/xmon.c94
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--virt/kvm/kvm_main.c4
50 files changed, 6014 insertions, 224 deletions
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index bc5fdfd22788..33a24fdd7958 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -55,6 +55,14 @@
55#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \ 55#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \
56 ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit)) 56 ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
57 57
58#define PPC_BITLSHIFT32(be) (32 - 1 - (be))
59#define PPC_BIT32(bit) (1UL << PPC_BITLSHIFT32(bit))
60#define PPC_BITMASK32(bs, be) ((PPC_BIT32(bs) - PPC_BIT32(be))|PPC_BIT32(bs))
61
62#define PPC_BITLSHIFT8(be) (8 - 1 - (be))
63#define PPC_BIT8(bit) (1UL << PPC_BITLSHIFT8(bit))
64#define PPC_BITMASK8(bs, be) ((PPC_BIT8(bs) - PPC_BIT8(be))|PPC_BIT8(bs))
65
58#include <asm/barrier.h> 66#include <asm/barrier.h>
59 67
60/* Macro for generating the ***_bits() functions */ 68/* Macro for generating the ***_bits() functions */
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 5ed292431b5b..45c136a832ce 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -192,24 +192,8 @@ DEF_MMIO_OUT_D(out_le32, 32, stw);
192 192
193#endif /* __BIG_ENDIAN */ 193#endif /* __BIG_ENDIAN */
194 194
195/*
196 * Cache inhibitied accessors for use in real mode, you don't want to use these
197 * unless you know what you're doing.
198 *
199 * NB. These use the cpu byte ordering.
200 */
201DEF_MMIO_OUT_X(out_rm8, 8, stbcix);
202DEF_MMIO_OUT_X(out_rm16, 16, sthcix);
203DEF_MMIO_OUT_X(out_rm32, 32, stwcix);
204DEF_MMIO_IN_X(in_rm8, 8, lbzcix);
205DEF_MMIO_IN_X(in_rm16, 16, lhzcix);
206DEF_MMIO_IN_X(in_rm32, 32, lwzcix);
207
208#ifdef __powerpc64__ 195#ifdef __powerpc64__
209 196
210DEF_MMIO_OUT_X(out_rm64, 64, stdcix);
211DEF_MMIO_IN_X(in_rm64, 64, ldcix);
212
213#ifdef __BIG_ENDIAN__ 197#ifdef __BIG_ENDIAN__
214DEF_MMIO_OUT_D(out_be64, 64, std); 198DEF_MMIO_OUT_D(out_be64, 64, std);
215DEF_MMIO_IN_D(in_be64, 64, ld); 199DEF_MMIO_IN_D(in_be64, 64, ld);
@@ -242,35 +226,6 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val)
242#endif 226#endif
243#endif /* __powerpc64__ */ 227#endif /* __powerpc64__ */
244 228
245
246/*
247 * Simple Cache inhibited accessors
248 * Unlike the DEF_MMIO_* macros, these don't include any h/w memory
249 * barriers, callers need to manage memory barriers on their own.
250 * These can only be used in hypervisor real mode.
251 */
252
253static inline u32 _lwzcix(unsigned long addr)
254{
255 u32 ret;
256
257 __asm__ __volatile__("lwzcix %0,0, %1"
258 : "=r" (ret) : "r" (addr) : "memory");
259 return ret;
260}
261
262static inline void _stbcix(u64 addr, u8 val)
263{
264 __asm__ __volatile__("stbcix %0,0,%1"
265 : : "r" (val), "r" (addr) : "memory");
266}
267
268static inline void _stwcix(u64 addr, u32 val)
269{
270 __asm__ __volatile__("stwcix %0,0,%1"
271 : : "r" (val), "r" (addr) : "memory");
272}
273
274/* 229/*
275 * Low level IO stream instructions are defined out of line for now 230 * Low level IO stream instructions are defined out of line for now
276 */ 231 */
@@ -417,15 +372,64 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
417} 372}
418 373
419/* 374/*
420 * Real mode version of the above. stdcix is only supposed to be used 375 * Real mode versions of the above. Those instructions are only supposed
421 * in hypervisor real mode as per the architecture spec. 376 * to be used in hypervisor real mode as per the architecture spec.
422 */ 377 */
378static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr)
379{
380 __asm__ __volatile__("stbcix %0,0,%1"
381 : : "r" (val), "r" (paddr) : "memory");
382}
383
384static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr)
385{
386 __asm__ __volatile__("sthcix %0,0,%1"
387 : : "r" (val), "r" (paddr) : "memory");
388}
389
390static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr)
391{
392 __asm__ __volatile__("stwcix %0,0,%1"
393 : : "r" (val), "r" (paddr) : "memory");
394}
395
423static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 396static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
424{ 397{
425 __asm__ __volatile__("stdcix %0,0,%1" 398 __asm__ __volatile__("stdcix %0,0,%1"
426 : : "r" (val), "r" (paddr) : "memory"); 399 : : "r" (val), "r" (paddr) : "memory");
427} 400}
428 401
402static inline u8 __raw_rm_readb(volatile void __iomem *paddr)
403{
404 u8 ret;
405 __asm__ __volatile__("lbzcix %0,0, %1"
406 : "=r" (ret) : "r" (paddr) : "memory");
407 return ret;
408}
409
410static inline u16 __raw_rm_readw(volatile void __iomem *paddr)
411{
412 u16 ret;
413 __asm__ __volatile__("lhzcix %0,0, %1"
414 : "=r" (ret) : "r" (paddr) : "memory");
415 return ret;
416}
417
418static inline u32 __raw_rm_readl(volatile void __iomem *paddr)
419{
420 u32 ret;
421 __asm__ __volatile__("lwzcix %0,0, %1"
422 : "=r" (ret) : "r" (paddr) : "memory");
423 return ret;
424}
425
426static inline u64 __raw_rm_readq(volatile void __iomem *paddr)
427{
428 u64 ret;
429 __asm__ __volatile__("ldcix %0,0, %1"
430 : "=r" (ret) : "r" (paddr) : "memory");
431 return ret;
432}
429#endif /* __powerpc64__ */ 433#endif /* __powerpc64__ */
430 434
431/* 435/*
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index d318d432caa9..b148496ffe36 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -110,7 +110,9 @@ struct kvmppc_host_state {
110 u8 ptid; 110 u8 ptid;
111 struct kvm_vcpu *kvm_vcpu; 111 struct kvm_vcpu *kvm_vcpu;
112 struct kvmppc_vcore *kvm_vcore; 112 struct kvmppc_vcore *kvm_vcore;
113 unsigned long xics_phys; 113 void __iomem *xics_phys;
114 void __iomem *xive_tima_phys;
115 void __iomem *xive_tima_virt;
114 u32 saved_xirr; 116 u32 saved_xirr;
115 u64 dabr; 117 u64 dabr;
116 u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ 118 u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 77c60826d145..9c51ac4b8f36 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -210,6 +210,12 @@ struct kvmppc_spapr_tce_table {
210/* XICS components, defined in book3s_xics.c */ 210/* XICS components, defined in book3s_xics.c */
211struct kvmppc_xics; 211struct kvmppc_xics;
212struct kvmppc_icp; 212struct kvmppc_icp;
213extern struct kvm_device_ops kvm_xics_ops;
214
215/* XIVE components, defined in book3s_xive.c */
216struct kvmppc_xive;
217struct kvmppc_xive_vcpu;
218extern struct kvm_device_ops kvm_xive_ops;
213 219
214struct kvmppc_passthru_irqmap; 220struct kvmppc_passthru_irqmap;
215 221
@@ -298,6 +304,7 @@ struct kvm_arch {
298#endif 304#endif
299#ifdef CONFIG_KVM_XICS 305#ifdef CONFIG_KVM_XICS
300 struct kvmppc_xics *xics; 306 struct kvmppc_xics *xics;
307 struct kvmppc_xive *xive;
301 struct kvmppc_passthru_irqmap *pimap; 308 struct kvmppc_passthru_irqmap *pimap;
302#endif 309#endif
303 struct kvmppc_ops *kvm_ops; 310 struct kvmppc_ops *kvm_ops;
@@ -427,7 +434,7 @@ struct kvmppc_passthru_irqmap {
427 434
428#define KVMPPC_IRQ_DEFAULT 0 435#define KVMPPC_IRQ_DEFAULT 0
429#define KVMPPC_IRQ_MPIC 1 436#define KVMPPC_IRQ_MPIC 1
430#define KVMPPC_IRQ_XICS 2 437#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
431 438
432#define MMIO_HPTE_CACHE_SIZE 4 439#define MMIO_HPTE_CACHE_SIZE 4
433 440
@@ -454,6 +461,21 @@ struct mmio_hpte_cache {
454 461
455struct openpic; 462struct openpic;
456 463
464/* W0 and W1 of a XIVE thread management context */
465union xive_tma_w01 {
466 struct {
467 u8 nsr;
468 u8 cppr;
469 u8 ipb;
470 u8 lsmfb;
471 u8 ack;
472 u8 inc;
473 u8 age;
474 u8 pipr;
475 };
476 __be64 w01;
477};
478
457struct kvm_vcpu_arch { 479struct kvm_vcpu_arch {
458 ulong host_stack; 480 ulong host_stack;
459 u32 host_pid; 481 u32 host_pid;
@@ -714,6 +736,10 @@ struct kvm_vcpu_arch {
714 struct openpic *mpic; /* KVM_IRQ_MPIC */ 736 struct openpic *mpic; /* KVM_IRQ_MPIC */
715#ifdef CONFIG_KVM_XICS 737#ifdef CONFIG_KVM_XICS
716 struct kvmppc_icp *icp; /* XICS presentation controller */ 738 struct kvmppc_icp *icp; /* XICS presentation controller */
739 struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
740 __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
741 u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
742 union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
717#endif 743#endif
718 744
719#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 745#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5885d327c025..e0d88c38602b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -240,6 +240,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
240extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); 240extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
241extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); 241extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
242extern void kvmppc_rtas_tokens_free(struct kvm *kvm); 242extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
243
243extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, 244extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
244 u32 priority); 245 u32 priority);
245extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, 246extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
@@ -425,7 +426,15 @@ struct openpic;
425extern void kvm_cma_reserve(void) __init; 426extern void kvm_cma_reserve(void) __init;
426static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 427static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
427{ 428{
428 paca[cpu].kvm_hstate.xics_phys = addr; 429 paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
430}
431
432static inline void kvmppc_set_xive_tima(int cpu,
433 unsigned long phys_addr,
434 void __iomem *virt_addr)
435{
436 paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
437 paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
429} 438}
430 439
431static inline u32 kvmppc_get_xics_latch(void) 440static inline u32 kvmppc_get_xics_latch(void)
@@ -458,6 +467,11 @@ static inline void __init kvm_cma_reserve(void)
458static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 467static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
459{} 468{}
460 469
470static inline void kvmppc_set_xive_tima(int cpu,
471 unsigned long phys_addr,
472 void __iomem *virt_addr)
473{}
474
461static inline u32 kvmppc_get_xics_latch(void) 475static inline u32 kvmppc_get_xics_latch(void)
462{ 476{
463 return 0; 477 return 0;
@@ -494,8 +508,6 @@ extern void kvmppc_free_host_rm_ops(void);
494extern void kvmppc_free_pimap(struct kvm *kvm); 508extern void kvmppc_free_pimap(struct kvm *kvm);
495extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall); 509extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
496extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); 510extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
497extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
498extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
499extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); 511extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
500extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); 512extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
501extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); 513extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
@@ -510,6 +522,10 @@ extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
510 struct kvmppc_irq_map *irq_map, 522 struct kvmppc_irq_map *irq_map,
511 struct kvmppc_passthru_irqmap *pimap, 523 struct kvmppc_passthru_irqmap *pimap,
512 bool *again); 524 bool *again);
525
526extern int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
527 int level, bool line_status);
528
513extern int h_ipi_redirect; 529extern int h_ipi_redirect;
514#else 530#else
515static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( 531static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
@@ -523,16 +539,64 @@ static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
523static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) 539static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
524 { return 0; } 540 { return 0; }
525static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } 541static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
526static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
527 unsigned long server)
528 { return -EINVAL; }
529static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
530 struct kvm_irq_level *args)
531 { return -ENOTTY; }
532static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) 542static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
533 { return 0; } 543 { return 0; }
534#endif 544#endif
535 545
546#ifdef CONFIG_KVM_XIVE
547/*
548 * Below the first "xive" is the "eXternal Interrupt Virtualization Engine"
549 * ie. P9 new interrupt controller, while the second "xive" is the legacy
550 * "eXternal Interrupt Vector Entry" which is the configuration of an
551 * interrupt on the "xics" interrupt controller on P8 and earlier. Those
552 * two function consume or produce a legacy "XIVE" state from the
553 * new "XIVE" interrupt controller.
554 */
555extern int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
556 u32 priority);
557extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
558 u32 *priority);
559extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
560extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
561extern void kvmppc_xive_init_module(void);
562extern void kvmppc_xive_exit_module(void);
563
564extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
565 struct kvm_vcpu *vcpu, u32 cpu);
566extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
567extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
568 struct irq_desc *host_desc);
569extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
570 struct irq_desc *host_desc);
571extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
572extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
573
574extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
575 int level, bool line_status);
576#else
577static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
578 u32 priority) { return -1; }
579static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
580 u32 *priority) { return -1; }
581static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
582static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
583static inline void kvmppc_xive_init_module(void) { }
584static inline void kvmppc_xive_exit_module(void) { }
585
586static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
587 struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
588static inline void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
589static inline int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
590 struct irq_desc *host_desc) { return -ENODEV; }
591static inline int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
592 struct irq_desc *host_desc) { return -ENODEV; }
593static inline u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) { return 0; }
594static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { return -ENOENT; }
595
596static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
597 int level, bool line_status) { return -ENODEV; }
598#endif /* CONFIG_KVM_XIVE */
599
536/* 600/*
537 * Prototypes for functions called only from assembler code. 601 * Prototypes for functions called only from assembler code.
538 * Having prototypes reduces sparse errors. 602 * Having prototypes reduces sparse errors.
@@ -570,6 +634,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
570long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 634long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
571 unsigned long slb_v, unsigned int status, bool data); 635 unsigned long slb_v, unsigned int status, bool data);
572unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); 636unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
637unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
638unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
573int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, 639int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
574 unsigned long mfrr); 640 unsigned long mfrr);
575int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); 641int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index a0aa285869b5..bc8ac3c0e649 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -40,6 +40,8 @@
40#define OPAL_I2C_ARBT_LOST -22 40#define OPAL_I2C_ARBT_LOST -22
41#define OPAL_I2C_NACK_RCVD -23 41#define OPAL_I2C_NACK_RCVD -23
42#define OPAL_I2C_STOP_ERR -24 42#define OPAL_I2C_STOP_ERR -24
43#define OPAL_XIVE_PROVISIONING -31
44#define OPAL_XIVE_FREE_ACTIVE -32
43 45
44/* API Tokens (in r0) */ 46/* API Tokens (in r0) */
45#define OPAL_INVALID_CALL -1 47#define OPAL_INVALID_CALL -1
@@ -168,7 +170,24 @@
168#define OPAL_INT_SET_MFRR 125 170#define OPAL_INT_SET_MFRR 125
169#define OPAL_PCI_TCE_KILL 126 171#define OPAL_PCI_TCE_KILL 126
170#define OPAL_NMMU_SET_PTCR 127 172#define OPAL_NMMU_SET_PTCR 127
171#define OPAL_LAST 127 173#define OPAL_XIVE_RESET 128
174#define OPAL_XIVE_GET_IRQ_INFO 129
175#define OPAL_XIVE_GET_IRQ_CONFIG 130
176#define OPAL_XIVE_SET_IRQ_CONFIG 131
177#define OPAL_XIVE_GET_QUEUE_INFO 132
178#define OPAL_XIVE_SET_QUEUE_INFO 133
179#define OPAL_XIVE_DONATE_PAGE 134
180#define OPAL_XIVE_ALLOCATE_VP_BLOCK 135
181#define OPAL_XIVE_FREE_VP_BLOCK 136
182#define OPAL_XIVE_GET_VP_INFO 137
183#define OPAL_XIVE_SET_VP_INFO 138
184#define OPAL_XIVE_ALLOCATE_IRQ 139
185#define OPAL_XIVE_FREE_IRQ 140
186#define OPAL_XIVE_SYNC 141
187#define OPAL_XIVE_DUMP 142
188#define OPAL_XIVE_RESERVED3 143
189#define OPAL_XIVE_RESERVED4 144
190#define OPAL_LAST 144
172 191
173/* Device tree flags */ 192/* Device tree flags */
174 193
@@ -928,6 +947,59 @@ enum {
928 OPAL_PCI_TCE_KILL_ALL, 947 OPAL_PCI_TCE_KILL_ALL,
929}; 948};
930 949
950/* The xive operation mode indicates the active "API" and
951 * corresponds to the "mode" parameter of the opal_xive_reset()
952 * call
953 */
954enum {
955 OPAL_XIVE_MODE_EMU = 0,
956 OPAL_XIVE_MODE_EXPL = 1,
957};
958
959/* Flags for OPAL_XIVE_GET_IRQ_INFO */
960enum {
961 OPAL_XIVE_IRQ_TRIGGER_PAGE = 0x00000001,
962 OPAL_XIVE_IRQ_STORE_EOI = 0x00000002,
963 OPAL_XIVE_IRQ_LSI = 0x00000004,
964 OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008,
965 OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010,
966 OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020,
967};
968
969/* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */
970enum {
971 OPAL_XIVE_EQ_ENABLED = 0x00000001,
972 OPAL_XIVE_EQ_ALWAYS_NOTIFY = 0x00000002,
973 OPAL_XIVE_EQ_ESCALATE = 0x00000004,
974};
975
976/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
977enum {
978 OPAL_XIVE_VP_ENABLED = 0x00000001,
979};
980
981/* "Any chip" replacement for chip ID for allocation functions */
982enum {
983 OPAL_XIVE_ANY_CHIP = 0xffffffff,
984};
985
986/* Xive sync options */
987enum {
988 /* This bits are cumulative, arg is a girq */
989 XIVE_SYNC_EAS = 0x00000001, /* Sync irq source */
990 XIVE_SYNC_QUEUE = 0x00000002, /* Sync irq target */
991};
992
993/* Dump options */
994enum {
995 XIVE_DUMP_TM_HYP = 0,
996 XIVE_DUMP_TM_POOL = 1,
997 XIVE_DUMP_TM_OS = 2,
998 XIVE_DUMP_TM_USER = 3,
999 XIVE_DUMP_VP = 4,
1000 XIVE_DUMP_EMU_STATE = 5,
1001};
1002
931#endif /* __ASSEMBLY__ */ 1003#endif /* __ASSEMBLY__ */
932 1004
933#endif /* __OPAL_API_H */ 1005#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 1ff03a6da76e..cb7d6078b03a 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -226,6 +226,42 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
226 uint32_t pe_num, uint32_t tce_size, 226 uint32_t pe_num, uint32_t tce_size,
227 uint64_t dma_addr, uint32_t npages); 227 uint64_t dma_addr, uint32_t npages);
228int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr); 228int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
229int64_t opal_xive_reset(uint64_t version);
230int64_t opal_xive_get_irq_info(uint32_t girq,
231 __be64 *out_flags,
232 __be64 *out_eoi_page,
233 __be64 *out_trig_page,
234 __be32 *out_esb_shift,
235 __be32 *out_src_chip);
236int64_t opal_xive_get_irq_config(uint32_t girq, __be64 *out_vp,
237 uint8_t *out_prio, __be32 *out_lirq);
238int64_t opal_xive_set_irq_config(uint32_t girq, uint64_t vp, uint8_t prio,
239 uint32_t lirq);
240int64_t opal_xive_get_queue_info(uint64_t vp, uint32_t prio,
241 __be64 *out_qpage,
242 __be64 *out_qsize,
243 __be64 *out_qeoi_page,
244 __be32 *out_escalate_irq,
245 __be64 *out_qflags);
246int64_t opal_xive_set_queue_info(uint64_t vp, uint32_t prio,
247 uint64_t qpage,
248 uint64_t qsize,
249 uint64_t qflags);
250int64_t opal_xive_donate_page(uint32_t chip_id, uint64_t addr);
251int64_t opal_xive_alloc_vp_block(uint32_t alloc_order);
252int64_t opal_xive_free_vp_block(uint64_t vp);
253int64_t opal_xive_get_vp_info(uint64_t vp,
254 __be64 *out_flags,
255 __be64 *out_cam_value,
256 __be64 *out_report_cl_pair,
257 __be32 *out_chip_id);
258int64_t opal_xive_set_vp_info(uint64_t vp,
259 uint64_t flags,
260 uint64_t report_cl_pair);
261int64_t opal_xive_allocate_irq(uint32_t chip_id);
262int64_t opal_xive_free_irq(uint32_t girq);
263int64_t opal_xive_sync(uint32_t type, uint32_t id);
264int64_t opal_xive_dump(uint32_t type, uint32_t id);
229 265
230/* Internal functions */ 266/* Internal functions */
231extern int early_init_dt_scan_opal(unsigned long node, const char *uname, 267extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fc879fd6bdae..d0b332b8afad 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -365,6 +365,7 @@
365#define LPCR_MER_SH 11 365#define LPCR_MER_SH 11
366#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */ 366#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */
367#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ 367#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */
368#define LPCR_HEIC ASM_CONST(0x0000000000000010) /* Hypervisor External Interrupt Control */
368#define LPCR_LPES 0x0000000c 369#define LPCR_LPES 0x0000000c
369#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */ 370#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */
370#define LPCR_LPES1 ASM_CONST(0x0000000000000004) /* LPAR Env selector 1 */ 371#define LPCR_LPES1 ASM_CONST(0x0000000000000004) /* LPAR Env selector 1 */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 32db16d2e7ad..63fa780b71a0 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -44,6 +44,7 @@ struct smp_ops_t {
44#endif 44#endif
45 void (*probe)(void); 45 void (*probe)(void);
46 int (*kick_cpu)(int nr); 46 int (*kick_cpu)(int nr);
47 int (*prepare_cpu)(int nr);
47 void (*setup_cpu)(int nr); 48 void (*setup_cpu)(int nr);
48 void (*bringup_done)(void); 49 void (*bringup_done)(void);
49 void (*take_timebase)(void); 50 void (*take_timebase)(void);
@@ -61,7 +62,6 @@ extern void smp_generic_take_timebase(void);
61DECLARE_PER_CPU(unsigned int, cpu_pvr); 62DECLARE_PER_CPU(unsigned int, cpu_pvr);
62 63
63#ifdef CONFIG_HOTPLUG_CPU 64#ifdef CONFIG_HOTPLUG_CPU
64extern void migrate_irqs(void);
65int generic_cpu_disable(void); 65int generic_cpu_disable(void);
66void generic_cpu_die(unsigned int cpu); 66void generic_cpu_die(unsigned int cpu);
67void generic_set_cpu_dead(unsigned int cpu); 67void generic_set_cpu_dead(unsigned int cpu);
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
new file mode 100644
index 000000000000..1d3f2be5ae39
--- /dev/null
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef _ASM_POWERPC_XIVE_REGS_H
10#define _ASM_POWERPC_XIVE_REGS_H
11
12/*
13 * Thread Management (aka "TM") registers
14 */
15
16/* TM register offsets */
17#define TM_QW0_USER 0x000 /* All rings */
18#define TM_QW1_OS 0x010 /* Ring 0..2 */
19#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */
20#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */
21
22/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */
23#define TM_NSR 0x0 /* + + - + */
24#define TM_CPPR 0x1 /* - + - + */
25#define TM_IPB 0x2 /* - + + + */
26#define TM_LSMFB 0x3 /* - + + + */
27#define TM_ACK_CNT 0x4 /* - + - - */
28#define TM_INC 0x5 /* - + - + */
29#define TM_AGE 0x6 /* - + - + */
30#define TM_PIPR 0x7 /* - + - + */
31
32#define TM_WORD0 0x0
33#define TM_WORD1 0x4
34
35/*
36 * QW word 2 contains the valid bit at the top and other fields
37 * depending on the QW.
38 */
39#define TM_WORD2 0x8
40#define TM_QW0W2_VU PPC_BIT32(0)
41#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1,31) // XX 2,31 ?
42#define TM_QW1W2_VO PPC_BIT32(0)
43#define TM_QW1W2_OS_CAM PPC_BITMASK32(8,31)
44#define TM_QW2W2_VP PPC_BIT32(0)
45#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8,31)
46#define TM_QW3W2_VT PPC_BIT32(0)
47#define TM_QW3W2_LP PPC_BIT32(6)
48#define TM_QW3W2_LE PPC_BIT32(7)
49#define TM_QW3W2_T PPC_BIT32(31)
50
51/*
52 * In addition to normal loads to "peek" and writes (only when invalid)
53 * using 4 and 8 bytes accesses, the above registers support these
54 * "special" byte operations:
55 *
56 * - Byte load from QW0[NSR] - User level NSR (EBB)
57 * - Byte store to QW0[NSR] - User level NSR (EBB)
58 * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
59 * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
60 * otherwise VT||0000000
61 * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
62 *
63 * Then we have all these "special" CI ops at these offset that trigger
64 * all sorts of side effects:
65 */
66#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/
67#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */
68#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */
69#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user context */
70#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */
71#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS context to reg */
72#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate Pool context to reg*/
73#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */
74#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd line */
75#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */
76#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even line */
77#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */
78/* XXX more... */
79
80/* NSR fields for the various QW ack types */
81#define TM_QW0_NSR_EB PPC_BIT8(0)
82#define TM_QW1_NSR_EO PPC_BIT8(0)
83#define TM_QW3_NSR_HE PPC_BITMASK8(0,1)
84#define TM_QW3_NSR_HE_NONE 0
85#define TM_QW3_NSR_HE_POOL 1
86#define TM_QW3_NSR_HE_PHYS 2
87#define TM_QW3_NSR_HE_LSI 3
88#define TM_QW3_NSR_I PPC_BIT8(2)
89#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3,7)
90
91/* Utilities to manipulate these (originaly from OPAL) */
92#define MASK_TO_LSH(m) (__builtin_ffsl(m) - 1)
93#define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m))
94#define SETFIELD(m, v, val) \
95 (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))
96
97#endif /* _ASM_POWERPC_XIVE_REGS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
new file mode 100644
index 000000000000..c8a822acf962
--- /dev/null
+++ b/arch/powerpc/include/asm/xive.h
@@ -0,0 +1,162 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef _ASM_POWERPC_XIVE_H
10#define _ASM_POWERPC_XIVE_H
11
12#define XIVE_INVALID_VP 0xffffffff
13
14#ifdef CONFIG_PPC_XIVE
15
16/*
17 * Thread Interrupt Management Area (TIMA)
18 *
19 * This is a global MMIO region divided in 4 pages of varying access
20 * permissions, providing access to per-cpu interrupt management
21 * functions. It always identifies the CPU doing the access based
22 * on the PowerBus initiator ID, thus we always access via the
23 * same offset regardless of where the code is executing
24 */
25extern void __iomem *xive_tima;
26
27/*
28 * Offset in the TM area of our current execution level (provided by
29 * the backend)
30 */
31extern u32 xive_tima_offset;
32
33/*
34 * Per-irq data (irq_get_handler_data for normal IRQs), IPIs
35 * have it stored in the xive_cpu structure. We also cache
36 * for normal interrupts the current target CPU.
37 *
38 * This structure is setup by the backend for each interrupt.
39 */
40struct xive_irq_data {
41 u64 flags;
42 u64 eoi_page;
43 void __iomem *eoi_mmio;
44 u64 trig_page;
45 void __iomem *trig_mmio;
46 u32 esb_shift;
47 int src_chip;
48
49 /* Setup/used by frontend */
50 int target;
51 bool saved_p;
52};
53#define XIVE_IRQ_FLAG_STORE_EOI 0x01
54#define XIVE_IRQ_FLAG_LSI 0x02
55#define XIVE_IRQ_FLAG_SHIFT_BUG 0x04
56#define XIVE_IRQ_FLAG_MASK_FW 0x08
57#define XIVE_IRQ_FLAG_EOI_FW 0x10
58
59#define XIVE_INVALID_CHIP_ID -1
60
61/* A queue tracking structure in a CPU */
62struct xive_q {
63 __be32 *qpage;
64 u32 msk;
65 u32 idx;
66 u32 toggle;
67 u64 eoi_phys;
68 u32 esc_irq;
69 atomic_t count;
70 atomic_t pending_count;
71};
72
73/*
74 * "magic" Event State Buffer (ESB) MMIO offsets.
75 *
76 * Each interrupt source has a 2-bit state machine called ESB
77 * which can be controlled by MMIO. It's made of 2 bits, P and
78 * Q. P indicates that an interrupt is pending (has been sent
79 * to a queue and is waiting for an EOI). Q indicates that the
80 * interrupt has been triggered while pending.
81 *
82 * This acts as a coalescing mechanism in order to guarantee
83 * that a given interrupt only occurs at most once in a queue.
84 *
85 * When doing an EOI, the Q bit will indicate if the interrupt
86 * needs to be re-triggered.
87 *
88 * The following offsets into the ESB MMIO allow to read or
89 * manipulate the PQ bits. They must be used with an 8-bytes
90 * load instruction. They all return the previous state of the
91 * interrupt (atomically).
92 *
93 * Additionally, some ESB pages support doing an EOI via a
94 * store at 0 and some ESBs support doing a trigger via a
95 * separate trigger page.
96 */
97#define XIVE_ESB_GET 0x800
98#define XIVE_ESB_SET_PQ_00 0xc00
99#define XIVE_ESB_SET_PQ_01 0xd00
100#define XIVE_ESB_SET_PQ_10 0xe00
101#define XIVE_ESB_SET_PQ_11 0xf00
102
103#define XIVE_ESB_VAL_P 0x2
104#define XIVE_ESB_VAL_Q 0x1
105
106/* Global enable flags for the XIVE support */
107extern bool __xive_enabled;
108
109static inline bool xive_enabled(void) { return __xive_enabled; }
110
111extern bool xive_native_init(void);
112extern void xive_smp_probe(void);
113extern int xive_smp_prepare_cpu(unsigned int cpu);
114extern void xive_smp_setup_cpu(void);
115extern void xive_smp_disable_cpu(void);
116extern void xive_kexec_teardown_cpu(int secondary);
117extern void xive_shutdown(void);
118extern void xive_flush_interrupt(void);
119
120/* xmon hook */
121extern void xmon_xive_do_dump(int cpu);
122
123/* APIs used by KVM */
124extern u32 xive_native_default_eq_shift(void);
125extern u32 xive_native_alloc_vp_block(u32 max_vcpus);
126extern void xive_native_free_vp_block(u32 vp_base);
127extern int xive_native_populate_irq_data(u32 hw_irq,
128 struct xive_irq_data *data);
129extern void xive_cleanup_irq_data(struct xive_irq_data *xd);
130extern u32 xive_native_alloc_irq(void);
131extern void xive_native_free_irq(u32 irq);
132extern int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
133
134extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
135 __be32 *qpage, u32 order, bool can_escalate);
136extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
137
138extern void xive_native_sync_source(u32 hw_irq);
139extern bool is_xive_irq(struct irq_chip *chip);
140extern int xive_native_enable_vp(u32 vp_id);
141extern int xive_native_disable_vp(u32 vp_id);
142extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
143
144#else
145
146static inline bool xive_enabled(void) { return false; }
147
148static inline bool xive_native_init(void) { return false; }
149static inline void xive_smp_probe(void) { }
150extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
151static inline void xive_smp_setup_cpu(void) { }
152static inline void xive_smp_disable_cpu(void) { }
153static inline void xive_kexec_teardown_cpu(int secondary) { }
154static inline void xive_shutdown(void) { }
155static inline void xive_flush_interrupt(void) { }
156
157static inline u32 xive_native_alloc_vp_block(u32 max_vcpus) { return XIVE_INVALID_VP; }
158static inline void xive_native_free_vp_block(u32 vp_base) { }
159
160#endif
161
162#endif /* _ASM_POWERPC_XIVE_H */
diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h
index 5eb8e599e5cc..eb42a0c6e1d9 100644
--- a/arch/powerpc/include/asm/xmon.h
+++ b/arch/powerpc/include/asm/xmon.h
@@ -29,5 +29,7 @@ static inline void xmon_register_spus(struct list_head *list) { };
29extern int cpus_are_in_xmon(void); 29extern int cpus_are_in_xmon(void);
30#endif 30#endif
31 31
32extern void xmon_printf(const char *format, ...);
33
32#endif /* __KERNEL __ */ 34#endif /* __KERNEL __ */
33#endif /* __ASM_POWERPC_XMON_H */ 35#endif /* __ASM_POWERPC_XMON_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4367e7df51a1..1822187813dc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -630,6 +630,8 @@ int main(void)
630 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); 630 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
631 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); 631 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
632 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); 632 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
633 HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
634 HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
633 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); 635 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
634 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); 636 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
635 HSTATE_FIELD(HSTATE_PTID, ptid); 637 HSTATE_FIELD(HSTATE_PTID, ptid);
@@ -715,6 +717,14 @@ int main(void)
715 OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); 717 OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
716#endif 718#endif
717 719
720#ifdef CONFIG_KVM_XICS
721 DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
722 arch.xive_saved_state));
723 DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
724 arch.xive_cam_word));
725 DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
726#endif
727
718#ifdef CONFIG_KVM_EXIT_TIMING 728#ifdef CONFIG_KVM_EXIT_TIMING
719 OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); 729 OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
720 OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); 730 OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 7fe8c79e6937..1fce4ddd2e6c 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,6 +29,7 @@ _GLOBAL(__setup_cpu_power7)
29 li r0,0 29 li r0,0
30 mtspr SPRN_LPID,r0 30 mtspr SPRN_LPID,r0
31 mfspr r3,SPRN_LPCR 31 mfspr r3,SPRN_LPCR
32 li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
32 bl __init_LPCR 33 bl __init_LPCR
33 bl __init_tlb_power7 34 bl __init_tlb_power7
34 mtlr r11 35 mtlr r11
@@ -42,6 +43,7 @@ _GLOBAL(__restore_cpu_power7)
42 li r0,0 43 li r0,0
43 mtspr SPRN_LPID,r0 44 mtspr SPRN_LPID,r0
44 mfspr r3,SPRN_LPCR 45 mfspr r3,SPRN_LPCR
46 li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
45 bl __init_LPCR 47 bl __init_LPCR
46 bl __init_tlb_power7 48 bl __init_tlb_power7
47 mtlr r11 49 mtlr r11
@@ -59,6 +61,7 @@ _GLOBAL(__setup_cpu_power8)
59 mtspr SPRN_LPID,r0 61 mtspr SPRN_LPID,r0
60 mfspr r3,SPRN_LPCR 62 mfspr r3,SPRN_LPCR
61 ori r3, r3, LPCR_PECEDH 63 ori r3, r3, LPCR_PECEDH
64 li r4,0 /* LPES = 0 */
62 bl __init_LPCR 65 bl __init_LPCR
63 bl __init_HFSCR 66 bl __init_HFSCR
64 bl __init_tlb_power8 67 bl __init_tlb_power8
@@ -80,6 +83,7 @@ _GLOBAL(__restore_cpu_power8)
80 mtspr SPRN_LPID,r0 83 mtspr SPRN_LPID,r0
81 mfspr r3,SPRN_LPCR 84 mfspr r3,SPRN_LPCR
82 ori r3, r3, LPCR_PECEDH 85 ori r3, r3, LPCR_PECEDH
86 li r4,0 /* LPES = 0 */
83 bl __init_LPCR 87 bl __init_LPCR
84 bl __init_HFSCR 88 bl __init_HFSCR
85 bl __init_tlb_power8 89 bl __init_tlb_power8
@@ -99,10 +103,11 @@ _GLOBAL(__setup_cpu_power9)
99 mtspr SPRN_PSSCR,r0 103 mtspr SPRN_PSSCR,r0
100 mtspr SPRN_LPID,r0 104 mtspr SPRN_LPID,r0
101 mfspr r3,SPRN_LPCR 105 mfspr r3,SPRN_LPCR
102 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) 106 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
103 or r3, r3, r4 107 or r3, r3, r4
104 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) 108 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
105 andc r3, r3, r4 109 andc r3, r3, r4
110 li r4,0 /* LPES = 0 */
106 bl __init_LPCR 111 bl __init_LPCR
107 bl __init_HFSCR 112 bl __init_HFSCR
108 bl __init_tlb_power9 113 bl __init_tlb_power9
@@ -122,10 +127,11 @@ _GLOBAL(__restore_cpu_power9)
122 mtspr SPRN_PSSCR,r0 127 mtspr SPRN_PSSCR,r0
123 mtspr SPRN_LPID,r0 128 mtspr SPRN_LPID,r0
124 mfspr r3,SPRN_LPCR 129 mfspr r3,SPRN_LPCR
125 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) 130 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
126 or r3, r3, r4 131 or r3, r3, r4
127 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) 132 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
128 andc r3, r3, r4 133 andc r3, r3, r4
134 li r4,0 /* LPES = 0 */
129 bl __init_LPCR 135 bl __init_LPCR
130 bl __init_HFSCR 136 bl __init_HFSCR
131 bl __init_tlb_power9 137 bl __init_tlb_power9
@@ -146,7 +152,7 @@ __init_hvmode_206:
146 152
147__init_LPCR: 153__init_LPCR:
148 /* Setup a sane LPCR: 154 /* Setup a sane LPCR:
149 * Called with initial LPCR in R3 155 * Called with initial LPCR in R3 and desired LPES 2-bit value in R4
150 * 156 *
151 * LPES = 0b01 (HSRR0/1 used for 0x500) 157 * LPES = 0b01 (HSRR0/1 used for 0x500)
152 * PECE = 0b111 158 * PECE = 0b111
@@ -157,8 +163,7 @@ __init_LPCR:
157 * 163 *
158 * Other bits untouched for now 164 * Other bits untouched for now
159 */ 165 */
160 li r5,1 166 rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
161 rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
162 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) 167 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
163 li r5,4 168 li r5,4
164 rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 169 rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index a018f5cae899..8ee7b44450eb 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -442,46 +442,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
442 return sum; 442 return sum;
443} 443}
444 444
445#ifdef CONFIG_HOTPLUG_CPU
446void migrate_irqs(void)
447{
448 struct irq_desc *desc;
449 unsigned int irq;
450 static int warned;
451 cpumask_var_t mask;
452 const struct cpumask *map = cpu_online_mask;
453
454 alloc_cpumask_var(&mask, GFP_KERNEL);
455
456 for_each_irq_desc(irq, desc) {
457 struct irq_data *data;
458 struct irq_chip *chip;
459
460 data = irq_desc_get_irq_data(desc);
461 if (irqd_is_per_cpu(data))
462 continue;
463
464 chip = irq_data_get_irq_chip(data);
465
466 cpumask_and(mask, irq_data_get_affinity_mask(data), map);
467 if (cpumask_any(mask) >= nr_cpu_ids) {
468 pr_warn("Breaking affinity for irq %i\n", irq);
469 cpumask_copy(mask, map);
470 }
471 if (chip->irq_set_affinity)
472 chip->irq_set_affinity(data, mask, true);
473 else if (desc->action && !(warned++))
474 pr_err("Cannot set affinity for irq %i\n", irq);
475 }
476
477 free_cpumask_var(mask);
478
479 local_irq_enable();
480 mdelay(1);
481 local_irq_disable();
482}
483#endif
484
485static inline void check_stack_overflow(void) 445static inline void check_stack_overflow(void)
486{ 446{
487#ifdef CONFIG_DEBUG_STACKOVERFLOW 447#ifdef CONFIG_DEBUG_STACKOVERFLOW
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 46f89e66a273..6e61cdb89194 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -439,7 +439,14 @@ int generic_cpu_disable(void)
439#ifdef CONFIG_PPC64 439#ifdef CONFIG_PPC64
440 vdso_data->processorCount--; 440 vdso_data->processorCount--;
441#endif 441#endif
442 migrate_irqs(); 442 /* Update affinity of all IRQs previously aimed at this CPU */
443 irq_migrate_all_off_this_cpu();
444
445 /* Give the CPU time to drain in-flight ones */
446 local_irq_enable();
447 mdelay(1);
448 local_irq_disable();
449
443 return 0; 450 return 0;
444} 451}
445 452
@@ -521,6 +528,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
521 528
522 cpu_idle_thread_init(cpu, tidle); 529 cpu_idle_thread_init(cpu, tidle);
523 530
531 /*
532 * The platform might need to allocate resources prior to bringing
533 * up the CPU
534 */
535 if (smp_ops->prepare_cpu) {
536 rc = smp_ops->prepare_cpu(cpu);
537 if (rc)
538 return rc;
539 }
540
524 /* Make sure callin-map entry is 0 (can be leftover a CPU 541 /* Make sure callin-map entry is 0 (can be leftover a CPU
525 * hotplug 542 * hotplug
526 */ 543 */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 65a471de96de..24de532c1736 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -197,6 +197,11 @@ config KVM_XICS
197 Specification) interrupt controller architecture used on 197 Specification) interrupt controller architecture used on
198 IBM POWER (pSeries) servers. 198 IBM POWER (pSeries) servers.
199 199
200config KVM_XIVE
201 bool
202 default y
203 depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
204
200source drivers/vhost/Kconfig 205source drivers/vhost/Kconfig
201 206
202endif # VIRTUALIZATION 207endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b87ccde2137a..d91a2604c496 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -74,7 +74,7 @@ kvm-hv-y += \
74 book3s_64_mmu_radix.o 74 book3s_64_mmu_radix.o
75 75
76kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 76kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
77 book3s_hv_rm_xics.o 77 book3s_hv_rm_xics.o book3s_hv_rm_xive.o
78 78
79ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 79ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
80kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 80kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
@@ -89,6 +89,8 @@ endif
89kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ 89kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
90 book3s_xics.o 90 book3s_xics.o
91 91
92kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
93
92kvm-book3s_64-module-objs := \ 94kvm-book3s_64-module-objs := \
93 $(common-objs-y) \ 95 $(common-objs-y) \
94 book3s.o \ 96 book3s.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 0ff0d07c0757..72d977e30952 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -20,6 +20,10 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/miscdevice.h> 22#include <linux/miscdevice.h>
23#include <linux/gfp.h>
24#include <linux/sched.h>
25#include <linux/vmalloc.h>
26#include <linux/highmem.h>
23 27
24#include <asm/reg.h> 28#include <asm/reg.h>
25#include <asm/cputable.h> 29#include <asm/cputable.h>
@@ -31,10 +35,7 @@
31#include <asm/kvm_book3s.h> 35#include <asm/kvm_book3s.h>
32#include <asm/mmu_context.h> 36#include <asm/mmu_context.h>
33#include <asm/page.h> 37#include <asm/page.h>
34#include <linux/gfp.h> 38#include <asm/xive.h>
35#include <linux/sched.h>
36#include <linux/vmalloc.h>
37#include <linux/highmem.h>
38 39
39#include "book3s.h" 40#include "book3s.h"
40#include "trace.h" 41#include "trace.h"
@@ -596,11 +597,14 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
596 break; 597 break;
597#ifdef CONFIG_KVM_XICS 598#ifdef CONFIG_KVM_XICS
598 case KVM_REG_PPC_ICP_STATE: 599 case KVM_REG_PPC_ICP_STATE:
599 if (!vcpu->arch.icp) { 600 if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
600 r = -ENXIO; 601 r = -ENXIO;
601 break; 602 break;
602 } 603 }
603 *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); 604 if (xive_enabled())
605 *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
606 else
607 *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
604 break; 608 break;
605#endif /* CONFIG_KVM_XICS */ 609#endif /* CONFIG_KVM_XICS */
606 case KVM_REG_PPC_FSCR: 610 case KVM_REG_PPC_FSCR:
@@ -666,12 +670,14 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
666#endif /* CONFIG_VSX */ 670#endif /* CONFIG_VSX */
667#ifdef CONFIG_KVM_XICS 671#ifdef CONFIG_KVM_XICS
668 case KVM_REG_PPC_ICP_STATE: 672 case KVM_REG_PPC_ICP_STATE:
669 if (!vcpu->arch.icp) { 673 if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
670 r = -ENXIO; 674 r = -ENXIO;
671 break; 675 break;
672 } 676 }
673 r = kvmppc_xics_set_icp(vcpu, 677 if (xive_enabled())
674 set_reg_val(id, *val)); 678 r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
679 else
680 r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
675 break; 681 break;
676#endif /* CONFIG_KVM_XICS */ 682#endif /* CONFIG_KVM_XICS */
677 case KVM_REG_PPC_FSCR: 683 case KVM_REG_PPC_FSCR:
@@ -942,6 +948,50 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
942 return kvm->arch.kvm_ops->hcall_implemented(hcall); 948 return kvm->arch.kvm_ops->hcall_implemented(hcall);
943} 949}
944 950
951#ifdef CONFIG_KVM_XICS
952int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
953 bool line_status)
954{
955 if (xive_enabled())
956 return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
957 line_status);
958 else
959 return kvmppc_xics_set_irq(kvm, irq_source_id, irq, level,
960 line_status);
961}
962
963int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
964 struct kvm *kvm, int irq_source_id,
965 int level, bool line_status)
966{
967 return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
968 level, line_status);
969}
970static int kvmppc_book3s_set_irq(struct kvm_kernel_irq_routing_entry *e,
971 struct kvm *kvm, int irq_source_id, int level,
972 bool line_status)
973{
974 return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
975}
976
977int kvm_irq_map_gsi(struct kvm *kvm,
978 struct kvm_kernel_irq_routing_entry *entries, int gsi)
979{
980 entries->gsi = gsi;
981 entries->type = KVM_IRQ_ROUTING_IRQCHIP;
982 entries->set = kvmppc_book3s_set_irq;
983 entries->irqchip.irqchip = 0;
984 entries->irqchip.pin = gsi;
985 return 1;
986}
987
988int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
989{
990 return pin;
991}
992
993#endif /* CONFIG_KVM_XICS */
994
945static int kvmppc_book3s_init(void) 995static int kvmppc_book3s_init(void)
946{ 996{
947 int r; 997 int r;
@@ -952,12 +1002,25 @@ static int kvmppc_book3s_init(void)
952#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 1002#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
953 r = kvmppc_book3s_init_pr(); 1003 r = kvmppc_book3s_init_pr();
954#endif 1004#endif
955 return r;
956 1005
1006#ifdef CONFIG_KVM_XICS
1007#ifdef CONFIG_KVM_XIVE
1008 if (xive_enabled()) {
1009 kvmppc_xive_init_module();
1010 kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
1011 } else
1012#endif
1013 kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
1014#endif
1015 return r;
957} 1016}
958 1017
959static void kvmppc_book3s_exit(void) 1018static void kvmppc_book3s_exit(void)
960{ 1019{
1020#ifdef CONFIG_KVM_XICS
1021 if (xive_enabled())
1022 kvmppc_xive_exit_module();
1023#endif
961#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 1024#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
962 kvmppc_book3s_exit_pr(); 1025 kvmppc_book3s_exit_pr();
963#endif 1026#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d42182eb0c26..42b7a4fd57d9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -35,6 +35,15 @@
35#include <linux/srcu.h> 35#include <linux/srcu.h>
36#include <linux/miscdevice.h> 36#include <linux/miscdevice.h>
37#include <linux/debugfs.h> 37#include <linux/debugfs.h>
38#include <linux/gfp.h>
39#include <linux/vmalloc.h>
40#include <linux/highmem.h>
41#include <linux/hugetlb.h>
42#include <linux/kvm_irqfd.h>
43#include <linux/irqbypass.h>
44#include <linux/module.h>
45#include <linux/compiler.h>
46#include <linux/of.h>
38 47
39#include <asm/reg.h> 48#include <asm/reg.h>
40#include <asm/cputable.h> 49#include <asm/cputable.h>
@@ -58,15 +67,7 @@
58#include <asm/mmu.h> 67#include <asm/mmu.h>
59#include <asm/opal.h> 68#include <asm/opal.h>
60#include <asm/xics.h> 69#include <asm/xics.h>
61#include <linux/gfp.h> 70#include <asm/xive.h>
62#include <linux/vmalloc.h>
63#include <linux/highmem.h>
64#include <linux/hugetlb.h>
65#include <linux/kvm_irqfd.h>
66#include <linux/irqbypass.h>
67#include <linux/module.h>
68#include <linux/compiler.h>
69#include <linux/of.h>
70 71
71#include "book3s.h" 72#include "book3s.h"
72 73
@@ -837,6 +838,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
837 case H_IPOLL: 838 case H_IPOLL:
838 case H_XIRR_X: 839 case H_XIRR_X:
839 if (kvmppc_xics_enabled(vcpu)) { 840 if (kvmppc_xics_enabled(vcpu)) {
841 if (xive_enabled()) {
842 ret = H_NOT_AVAILABLE;
843 return RESUME_GUEST;
844 }
840 ret = kvmppc_xics_hcall(vcpu, req); 845 ret = kvmppc_xics_hcall(vcpu, req);
841 break; 846 break;
842 } 847 }
@@ -2947,8 +2952,12 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
2947 r = kvmppc_book3s_hv_page_fault(run, vcpu, 2952 r = kvmppc_book3s_hv_page_fault(run, vcpu,
2948 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 2953 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
2949 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 2954 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2950 } else if (r == RESUME_PASSTHROUGH) 2955 } else if (r == RESUME_PASSTHROUGH) {
2951 r = kvmppc_xics_rm_complete(vcpu, 0); 2956 if (WARN_ON(xive_enabled()))
2957 r = H_SUCCESS;
2958 else
2959 r = kvmppc_xics_rm_complete(vcpu, 0);
2960 }
2952 } while (is_kvmppc_resume_guest(r)); 2961 } while (is_kvmppc_resume_guest(r));
2953 2962
2954 out: 2963 out:
@@ -3400,10 +3409,20 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3400 /* 3409 /*
3401 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed) 3410 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
3402 * Set HVICE bit to enable hypervisor virtualization interrupts. 3411 * Set HVICE bit to enable hypervisor virtualization interrupts.
3412 * Set HEIC to prevent OS interrupts to go to hypervisor (should
3413 * be unnecessary but better safe than sorry in case we re-enable
3414 * EE in HV mode with this LPCR still set)
3403 */ 3415 */
3404 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 3416 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
3405 lpcr &= ~LPCR_VPM0; 3417 lpcr &= ~LPCR_VPM0;
3406 lpcr |= LPCR_HVICE; 3418 lpcr |= LPCR_HVICE | LPCR_HEIC;
3419
3420 /*
3421 * If xive is enabled, we route 0x500 interrupts directly
3422 * to the guest.
3423 */
3424 if (xive_enabled())
3425 lpcr |= LPCR_LPES;
3407 } 3426 }
3408 3427
3409 /* 3428 /*
@@ -3533,7 +3552,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3533 struct kvmppc_irq_map *irq_map; 3552 struct kvmppc_irq_map *irq_map;
3534 struct kvmppc_passthru_irqmap *pimap; 3553 struct kvmppc_passthru_irqmap *pimap;
3535 struct irq_chip *chip; 3554 struct irq_chip *chip;
3536 int i; 3555 int i, rc = 0;
3537 3556
3538 if (!kvm_irq_bypass) 3557 if (!kvm_irq_bypass)
3539 return 1; 3558 return 1;
@@ -3558,10 +3577,10 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3558 /* 3577 /*
3559 * For now, we only support interrupts for which the EOI operation 3578 * For now, we only support interrupts for which the EOI operation
3560 * is an OPAL call followed by a write to XIRR, since that's 3579 * is an OPAL call followed by a write to XIRR, since that's
3561 * what our real-mode EOI code does. 3580 * what our real-mode EOI code does, or a XIVE interrupt
3562 */ 3581 */
3563 chip = irq_data_get_irq_chip(&desc->irq_data); 3582 chip = irq_data_get_irq_chip(&desc->irq_data);
3564 if (!chip || !is_pnv_opal_msi(chip)) { 3583 if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
3565 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n", 3584 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
3566 host_irq, guest_gsi); 3585 host_irq, guest_gsi);
3567 mutex_unlock(&kvm->lock); 3586 mutex_unlock(&kvm->lock);
@@ -3603,7 +3622,12 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3603 if (i == pimap->n_mapped) 3622 if (i == pimap->n_mapped)
3604 pimap->n_mapped++; 3623 pimap->n_mapped++;
3605 3624
3606 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); 3625 if (xive_enabled())
3626 rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
3627 else
3628 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
3629 if (rc)
3630 irq_map->r_hwirq = 0;
3607 3631
3608 mutex_unlock(&kvm->lock); 3632 mutex_unlock(&kvm->lock);
3609 3633
@@ -3614,7 +3638,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3614{ 3638{
3615 struct irq_desc *desc; 3639 struct irq_desc *desc;
3616 struct kvmppc_passthru_irqmap *pimap; 3640 struct kvmppc_passthru_irqmap *pimap;
3617 int i; 3641 int i, rc = 0;
3618 3642
3619 if (!kvm_irq_bypass) 3643 if (!kvm_irq_bypass)
3620 return 0; 3644 return 0;
@@ -3639,9 +3663,12 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3639 return -ENODEV; 3663 return -ENODEV;
3640 } 3664 }
3641 3665
3642 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); 3666 if (xive_enabled())
3667 rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
3668 else
3669 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
3643 3670
3644 /* invalidate the entry */ 3671 /* invalidate the entry (what do do on error from the above ?) */
3645 pimap->mapped[i].r_hwirq = 0; 3672 pimap->mapped[i].r_hwirq = 0;
3646 3673
3647 /* 3674 /*
@@ -3650,7 +3677,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3650 */ 3677 */
3651 unlock: 3678 unlock:
3652 mutex_unlock(&kvm->lock); 3679 mutex_unlock(&kvm->lock);
3653 return 0; 3680 return rc;
3654} 3681}
3655 3682
3656static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons, 3683static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
@@ -3928,7 +3955,7 @@ static int kvmppc_book3s_init_hv(void)
3928 * indirectly, via OPAL. 3955 * indirectly, via OPAL.
3929 */ 3956 */
3930#ifdef CONFIG_SMP 3957#ifdef CONFIG_SMP
3931 if (!local_paca->kvm_hstate.xics_phys) { 3958 if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
3932 struct device_node *np; 3959 struct device_node *np;
3933 3960
3934 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); 3961 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 4d6c64b3041c..846b40cb3a62 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -23,6 +23,7 @@
23#include <asm/kvm_book3s.h> 23#include <asm/kvm_book3s.h>
24#include <asm/archrandom.h> 24#include <asm/archrandom.h>
25#include <asm/xics.h> 25#include <asm/xics.h>
26#include <asm/xive.h>
26#include <asm/dbell.h> 27#include <asm/dbell.h>
27#include <asm/cputhreads.h> 28#include <asm/cputhreads.h>
28#include <asm/io.h> 29#include <asm/io.h>
@@ -31,6 +32,24 @@
31 32
32#define KVM_CMA_CHUNK_ORDER 18 33#define KVM_CMA_CHUNK_ORDER 18
33 34
35#include "book3s_xics.h"
36#include "book3s_xive.h"
37
38/*
39 * The XIVE module will populate these when it loads
40 */
41unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
42unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
43int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
44 unsigned long mfrr);
45int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
46int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
47EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
48EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
49EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
50EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
51EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
52
34/* 53/*
35 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) 54 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
36 * should be power of 2. 55 * should be power of 2.
@@ -193,12 +212,6 @@ long kvmppc_h_random(struct kvm_vcpu *vcpu)
193 return H_HARDWARE; 212 return H_HARDWARE;
194} 213}
195 214
196static inline void rm_writeb(unsigned long paddr, u8 val)
197{
198 __asm__ __volatile__("stbcix %0,0,%1"
199 : : "r" (val), "r" (paddr) : "memory");
200}
201
202/* 215/*
203 * Send an interrupt or message to another CPU. 216 * Send an interrupt or message to another CPU.
204 * The caller needs to include any barrier needed to order writes 217 * The caller needs to include any barrier needed to order writes
@@ -206,7 +219,7 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
206 */ 219 */
207void kvmhv_rm_send_ipi(int cpu) 220void kvmhv_rm_send_ipi(int cpu)
208{ 221{
209 unsigned long xics_phys; 222 void __iomem *xics_phys;
210 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); 223 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
211 224
212 /* On POWER9 we can use msgsnd for any destination cpu. */ 225 /* On POWER9 we can use msgsnd for any destination cpu. */
@@ -215,6 +228,7 @@ void kvmhv_rm_send_ipi(int cpu)
215 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); 228 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
216 return; 229 return;
217 } 230 }
231
218 /* On POWER8 for IPIs to threads in the same core, use msgsnd. */ 232 /* On POWER8 for IPIs to threads in the same core, use msgsnd. */
219 if (cpu_has_feature(CPU_FTR_ARCH_207S) && 233 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
220 cpu_first_thread_sibling(cpu) == 234 cpu_first_thread_sibling(cpu) ==
@@ -224,10 +238,14 @@ void kvmhv_rm_send_ipi(int cpu)
224 return; 238 return;
225 } 239 }
226 240
241 /* We should never reach this */
242 if (WARN_ON_ONCE(xive_enabled()))
243 return;
244
227 /* Else poke the target with an IPI */ 245 /* Else poke the target with an IPI */
228 xics_phys = paca[cpu].kvm_hstate.xics_phys; 246 xics_phys = paca[cpu].kvm_hstate.xics_phys;
229 if (xics_phys) 247 if (xics_phys)
230 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); 248 __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
231 else 249 else
232 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY); 250 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
233} 251}
@@ -386,6 +404,9 @@ long kvmppc_read_intr(void)
386 long rc; 404 long rc;
387 bool again; 405 bool again;
388 406
407 if (xive_enabled())
408 return 1;
409
389 do { 410 do {
390 again = false; 411 again = false;
391 rc = kvmppc_read_one_intr(&again); 412 rc = kvmppc_read_one_intr(&again);
@@ -397,13 +418,16 @@ long kvmppc_read_intr(void)
397 418
398static long kvmppc_read_one_intr(bool *again) 419static long kvmppc_read_one_intr(bool *again)
399{ 420{
400 unsigned long xics_phys; 421 void __iomem *xics_phys;
401 u32 h_xirr; 422 u32 h_xirr;
402 __be32 xirr; 423 __be32 xirr;
403 u32 xisr; 424 u32 xisr;
404 u8 host_ipi; 425 u8 host_ipi;
405 int64_t rc; 426 int64_t rc;
406 427
428 if (xive_enabled())
429 return 1;
430
407 /* see if a host IPI is pending */ 431 /* see if a host IPI is pending */
408 host_ipi = local_paca->kvm_hstate.host_ipi; 432 host_ipi = local_paca->kvm_hstate.host_ipi;
409 if (host_ipi) 433 if (host_ipi)
@@ -415,7 +439,7 @@ static long kvmppc_read_one_intr(bool *again)
415 if (!xics_phys) 439 if (!xics_phys)
416 rc = opal_int_get_xirr(&xirr, false); 440 rc = opal_int_get_xirr(&xirr, false);
417 else 441 else
418 xirr = _lwzcix(xics_phys + XICS_XIRR); 442 xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
419 if (rc < 0) 443 if (rc < 0)
420 return 1; 444 return 1;
421 445
@@ -445,8 +469,8 @@ static long kvmppc_read_one_intr(bool *again)
445 if (xisr == XICS_IPI) { 469 if (xisr == XICS_IPI) {
446 rc = 0; 470 rc = 0;
447 if (xics_phys) { 471 if (xics_phys) {
448 _stbcix(xics_phys + XICS_MFRR, 0xff); 472 __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
449 _stwcix(xics_phys + XICS_XIRR, xirr); 473 __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
450 } else { 474 } else {
451 opal_int_set_mfrr(hard_smp_processor_id(), 0xff); 475 opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
452 rc = opal_int_eoi(h_xirr); 476 rc = opal_int_eoi(h_xirr);
@@ -471,7 +495,8 @@ static long kvmppc_read_one_intr(bool *again)
471 * we need to resend that IPI, bummer 495 * we need to resend that IPI, bummer
472 */ 496 */
473 if (xics_phys) 497 if (xics_phys)
474 _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); 498 __raw_rm_writeb(IPI_PRIORITY,
499 xics_phys + XICS_MFRR);
475 else 500 else
476 opal_int_set_mfrr(hard_smp_processor_id(), 501 opal_int_set_mfrr(hard_smp_processor_id(),
477 IPI_PRIORITY); 502 IPI_PRIORITY);
@@ -487,3 +512,84 @@ static long kvmppc_read_one_intr(bool *again)
487 512
488 return kvmppc_check_passthru(xisr, xirr, again); 513 return kvmppc_check_passthru(xisr, xirr, again);
489} 514}
515
516#ifdef CONFIG_KVM_XICS
517static inline bool is_rm(void)
518{
519 return !(mfmsr() & MSR_DR);
520}
521
522unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
523{
524 if (xive_enabled()) {
525 if (is_rm())
526 return xive_rm_h_xirr(vcpu);
527 if (unlikely(!__xive_vm_h_xirr))
528 return H_NOT_AVAILABLE;
529 return __xive_vm_h_xirr(vcpu);
530 } else
531 return xics_rm_h_xirr(vcpu);
532}
533
534unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
535{
536 vcpu->arch.gpr[5] = get_tb();
537 if (xive_enabled()) {
538 if (is_rm())
539 return xive_rm_h_xirr(vcpu);
540 if (unlikely(!__xive_vm_h_xirr))
541 return H_NOT_AVAILABLE;
542 return __xive_vm_h_xirr(vcpu);
543 } else
544 return xics_rm_h_xirr(vcpu);
545}
546
547unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
548{
549 if (xive_enabled()) {
550 if (is_rm())
551 return xive_rm_h_ipoll(vcpu, server);
552 if (unlikely(!__xive_vm_h_ipoll))
553 return H_NOT_AVAILABLE;
554 return __xive_vm_h_ipoll(vcpu, server);
555 } else
556 return H_TOO_HARD;
557}
558
559int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
560 unsigned long mfrr)
561{
562 if (xive_enabled()) {
563 if (is_rm())
564 return xive_rm_h_ipi(vcpu, server, mfrr);
565 if (unlikely(!__xive_vm_h_ipi))
566 return H_NOT_AVAILABLE;
567 return __xive_vm_h_ipi(vcpu, server, mfrr);
568 } else
569 return xics_rm_h_ipi(vcpu, server, mfrr);
570}
571
572int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
573{
574 if (xive_enabled()) {
575 if (is_rm())
576 return xive_rm_h_cppr(vcpu, cppr);
577 if (unlikely(!__xive_vm_h_cppr))
578 return H_NOT_AVAILABLE;
579 return __xive_vm_h_cppr(vcpu, cppr);
580 } else
581 return xics_rm_h_cppr(vcpu, cppr);
582}
583
584int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
585{
586 if (xive_enabled()) {
587 if (is_rm())
588 return xive_rm_h_eoi(vcpu, xirr);
589 if (unlikely(!__xive_vm_h_eoi))
590 return H_NOT_AVAILABLE;
591 return __xive_vm_h_eoi(vcpu, xirr);
592 } else
593 return xics_rm_h_eoi(vcpu, xirr);
594}
595#endif /* CONFIG_KVM_XICS */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index e78542d99cd6..f8068801ac36 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -485,7 +485,7 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
485} 485}
486 486
487 487
488unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) 488unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
489{ 489{
490 union kvmppc_icp_state old_state, new_state; 490 union kvmppc_icp_state old_state, new_state;
491 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 491 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -523,8 +523,8 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
523 return check_too_hard(xics, icp); 523 return check_too_hard(xics, icp);
524} 524}
525 525
526int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, 526int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
527 unsigned long mfrr) 527 unsigned long mfrr)
528{ 528{
529 union kvmppc_icp_state old_state, new_state; 529 union kvmppc_icp_state old_state, new_state;
530 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 530 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -610,7 +610,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
610 return check_too_hard(xics, this_icp); 610 return check_too_hard(xics, this_icp);
611} 611}
612 612
613int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) 613int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
614{ 614{
615 union kvmppc_icp_state old_state, new_state; 615 union kvmppc_icp_state old_state, new_state;
616 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 616 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -730,7 +730,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
730 return check_too_hard(xics, icp); 730 return check_too_hard(xics, icp);
731} 731}
732 732
733int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) 733int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
734{ 734{
735 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 735 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
736 struct kvmppc_icp *icp = vcpu->arch.icp; 736 struct kvmppc_icp *icp = vcpu->arch.icp;
@@ -766,7 +766,7 @@ unsigned long eoi_rc;
766 766
767static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) 767static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
768{ 768{
769 unsigned long xics_phys; 769 void __iomem *xics_phys;
770 int64_t rc; 770 int64_t rc;
771 771
772 rc = pnv_opal_pci_msi_eoi(c, hwirq); 772 rc = pnv_opal_pci_msi_eoi(c, hwirq);
@@ -779,7 +779,7 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
779 /* EOI it */ 779 /* EOI it */
780 xics_phys = local_paca->kvm_hstate.xics_phys; 780 xics_phys = local_paca->kvm_hstate.xics_phys;
781 if (xics_phys) { 781 if (xics_phys) {
782 _stwcix(xics_phys + XICS_XIRR, xirr); 782 __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
783 } else { 783 } else {
784 rc = opal_int_eoi(be32_to_cpu(xirr)); 784 rc = opal_int_eoi(be32_to_cpu(xirr));
785 *again = rc > 0; 785 *again = rc > 0;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
new file mode 100644
index 000000000000..abf5f01b6eb1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c
@@ -0,0 +1,47 @@
1#include <linux/kernel.h>
2#include <linux/kvm_host.h>
3#include <linux/err.h>
4#include <linux/kernel_stat.h>
5
6#include <asm/kvm_book3s.h>
7#include <asm/kvm_ppc.h>
8#include <asm/hvcall.h>
9#include <asm/xics.h>
10#include <asm/debug.h>
11#include <asm/synch.h>
12#include <asm/cputhreads.h>
13#include <asm/pgtable.h>
14#include <asm/ppc-opcode.h>
15#include <asm/pnv-pci.h>
16#include <asm/opal.h>
17#include <asm/smp.h>
18#include <asm/asm-prototypes.h>
19#include <asm/xive.h>
20#include <asm/xive-regs.h>
21
22#include "book3s_xive.h"
23
24/* XXX */
25#include <asm/udbg.h>
26//#define DBG(fmt...) udbg_printf(fmt)
27#define DBG(fmt...) do { } while(0)
28
29static inline void __iomem *get_tima_phys(void)
30{
31 return local_paca->kvm_hstate.xive_tima_phys;
32}
33
34#undef XIVE_RUNTIME_CHECKS
35#define X_PFX xive_rm_
36#define X_STATIC
37#define X_STAT_PFX stat_rm_
38#define __x_tima get_tima_phys()
39#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page))
40#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page))
41#define __x_readb __raw_rm_readb
42#define __x_writeb __raw_rm_writeb
43#define __x_readw __raw_rm_readw
44#define __x_readq __raw_rm_readq
45#define __x_writeq __raw_rm_writeq
46
47#include "book3s_xive_template.c"
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7c6477d1840a..bdb3f76ceb6b 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -30,6 +30,7 @@
30#include <asm/book3s/64/mmu-hash.h> 30#include <asm/book3s/64/mmu-hash.h>
31#include <asm/tm.h> 31#include <asm/tm.h>
32#include <asm/opal.h> 32#include <asm/opal.h>
33#include <asm/xive-regs.h>
33 34
34#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) 35#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
35 36
@@ -970,6 +971,23 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
970 cmpwi r3, 512 /* 1 microsecond */ 971 cmpwi r3, 512 /* 1 microsecond */
971 blt hdec_soon 972 blt hdec_soon
972 973
974#ifdef CONFIG_KVM_XICS
975 /* We are entering the guest on that thread, push VCPU to XIVE */
976 ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
977 cmpldi cr0, r10, r0
978 beq no_xive
979 ld r11, VCPU_XIVE_SAVED_STATE(r4)
980 li r9, TM_QW1_OS
981 stdcix r11,r9,r10
982 eieio
983 lwz r11, VCPU_XIVE_CAM_WORD(r4)
984 li r9, TM_QW1_OS + TM_WORD2
985 stwcix r11,r9,r10
986 li r9, 1
987 stw r9, VCPU_XIVE_PUSHED(r4)
988no_xive:
989#endif /* CONFIG_KVM_XICS */
990
973deliver_guest_interrupt: 991deliver_guest_interrupt:
974 ld r6, VCPU_CTR(r4) 992 ld r6, VCPU_CTR(r4)
975 ld r7, VCPU_XER(r4) 993 ld r7, VCPU_XER(r4)
@@ -1307,6 +1325,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1307 blt deliver_guest_interrupt 1325 blt deliver_guest_interrupt
1308 1326
1309guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 1327guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1328#ifdef CONFIG_KVM_XICS
1329 /* We are exiting, pull the VP from the XIVE */
1330 lwz r0, VCPU_XIVE_PUSHED(r9)
1331 cmpwi cr0, r0, 0
1332 beq 1f
1333 li r7, TM_SPC_PULL_OS_CTX
1334 li r6, TM_QW1_OS
1335 mfmsr r0
1336 andi. r0, r0, MSR_IR /* in real mode? */
1337 beq 2f
1338 ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
1339 cmpldi cr0, r10, 0
1340 beq 1f
1341 /* First load to pull the context, we ignore the value */
1342 lwzx r11, r7, r10
1343 eieio
1344 /* Second load to recover the context state (Words 0 and 1) */
1345 ldx r11, r6, r10
1346 b 3f
13472: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
1348 cmpldi cr0, r10, 0
1349 beq 1f
1350 /* First load to pull the context, we ignore the value */
1351 lwzcix r11, r7, r10
1352 eieio
1353 /* Second load to recover the context state (Words 0 and 1) */
1354 ldcix r11, r6, r10
13553: std r11, VCPU_XIVE_SAVED_STATE(r9)
1356 /* Fixup some of the state for the next load */
1357 li r10, 0
1358 li r0, 0xff
1359 stw r10, VCPU_XIVE_PUSHED(r9)
1360 stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
1361 stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
13621:
1363#endif /* CONFIG_KVM_XICS */
1310 /* Save more register state */ 1364 /* Save more register state */
1311 mfdar r6 1365 mfdar r6
1312 mfdsisr r7 1366 mfdsisr r7
@@ -2011,7 +2065,7 @@ hcall_real_table:
2011 .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table 2065 .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
2012 .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table 2066 .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
2013 .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table 2067 .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
2014 .long 0 /* 0x70 - H_IPOLL */ 2068 .long DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
2015 .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table 2069 .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
2016#else 2070#else
2017 .long 0 /* 0x64 - H_EOI */ 2071 .long 0 /* 0x64 - H_EOI */
@@ -2181,7 +2235,11 @@ hcall_real_table:
2181 .long 0 /* 0x2f0 */ 2235 .long 0 /* 0x2f0 */
2182 .long 0 /* 0x2f4 */ 2236 .long 0 /* 0x2f4 */
2183 .long 0 /* 0x2f8 */ 2237 .long 0 /* 0x2f8 */
2184 .long 0 /* 0x2fc */ 2238#ifdef CONFIG_KVM_XICS
2239 .long DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
2240#else
2241 .long 0 /* 0x2fc - H_XIRR_X*/
2242#endif
2185 .long DOTSYM(kvmppc_h_random) - hcall_real_table 2243 .long DOTSYM(kvmppc_h_random) - hcall_real_table
2186 .globl hcall_real_table_end 2244 .globl hcall_real_table_end
2187hcall_real_table_end: 2245hcall_real_table_end:
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 20528701835b..2d3b2b1cc272 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -16,6 +16,7 @@
16#include <asm/kvm_ppc.h> 16#include <asm/kvm_ppc.h>
17#include <asm/hvcall.h> 17#include <asm/hvcall.h>
18#include <asm/rtas.h> 18#include <asm/rtas.h>
19#include <asm/xive.h>
19 20
20#ifdef CONFIG_KVM_XICS 21#ifdef CONFIG_KVM_XICS
21static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) 22static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -32,7 +33,10 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
32 server = be32_to_cpu(args->args[1]); 33 server = be32_to_cpu(args->args[1]);
33 priority = be32_to_cpu(args->args[2]); 34 priority = be32_to_cpu(args->args[2]);
34 35
35 rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); 36 if (xive_enabled())
37 rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
38 else
39 rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
36 if (rc) 40 if (rc)
37 rc = -3; 41 rc = -3;
38out: 42out:
@@ -52,7 +56,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
52 irq = be32_to_cpu(args->args[0]); 56 irq = be32_to_cpu(args->args[0]);
53 57
54 server = priority = 0; 58 server = priority = 0;
55 rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); 59 if (xive_enabled())
60 rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
61 else
62 rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
56 if (rc) { 63 if (rc) {
57 rc = -3; 64 rc = -3;
58 goto out; 65 goto out;
@@ -76,7 +83,10 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
76 83
77 irq = be32_to_cpu(args->args[0]); 84 irq = be32_to_cpu(args->args[0]);
78 85
79 rc = kvmppc_xics_int_off(vcpu->kvm, irq); 86 if (xive_enabled())
87 rc = kvmppc_xive_int_off(vcpu->kvm, irq);
88 else
89 rc = kvmppc_xics_int_off(vcpu->kvm, irq);
80 if (rc) 90 if (rc)
81 rc = -3; 91 rc = -3;
82out: 92out:
@@ -95,7 +105,10 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
95 105
96 irq = be32_to_cpu(args->args[0]); 106 irq = be32_to_cpu(args->args[0]);
97 107
98 rc = kvmppc_xics_int_on(vcpu->kvm, irq); 108 if (xive_enabled())
109 rc = kvmppc_xive_int_on(vcpu->kvm, irq);
110 else
111 rc = kvmppc_xics_int_on(vcpu->kvm, irq);
99 if (rc) 112 if (rc)
100 rc = -3; 113 rc = -3;
101out: 114out:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index e48803e2918d..e6829c415bc8 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1084,7 +1084,7 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
1084 return xics->ics[icsid]; 1084 return xics->ics[icsid];
1085} 1085}
1086 1086
1087int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) 1087static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
1088{ 1088{
1089 struct kvmppc_icp *icp; 1089 struct kvmppc_icp *icp;
1090 1090
@@ -1307,8 +1307,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
1307 return 0; 1307 return 0;
1308} 1308}
1309 1309
1310int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, 1310int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1311 bool line_status) 1311 bool line_status)
1312{ 1312{
1313 struct kvmppc_xics *xics = kvm->arch.xics; 1313 struct kvmppc_xics *xics = kvm->arch.xics;
1314 1314
@@ -1317,14 +1317,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1317 return ics_deliver_irq(xics, irq, level); 1317 return ics_deliver_irq(xics, irq, level);
1318} 1318}
1319 1319
1320int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
1321 struct kvm *kvm, int irq_source_id,
1322 int level, bool line_status)
1323{
1324 return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
1325 level, line_status);
1326}
1327
1328static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1320static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1329{ 1321{
1330 struct kvmppc_xics *xics = dev->private; 1322 struct kvmppc_xics *xics = dev->private;
@@ -1458,29 +1450,6 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
1458 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 1450 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
1459} 1451}
1460 1452
1461static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e,
1462 struct kvm *kvm, int irq_source_id, int level,
1463 bool line_status)
1464{
1465 return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
1466}
1467
1468int kvm_irq_map_gsi(struct kvm *kvm,
1469 struct kvm_kernel_irq_routing_entry *entries, int gsi)
1470{
1471 entries->gsi = gsi;
1472 entries->type = KVM_IRQ_ROUTING_IRQCHIP;
1473 entries->set = xics_set_irq;
1474 entries->irqchip.irqchip = 0;
1475 entries->irqchip.pin = gsi;
1476 return 1;
1477}
1478
1479int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
1480{
1481 return pin;
1482}
1483
1484void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq, 1453void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
1485 unsigned long host_irq) 1454 unsigned long host_irq)
1486{ 1455{
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index ec5474cf70c6..453c9e518c19 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -10,6 +10,7 @@
10#ifndef _KVM_PPC_BOOK3S_XICS_H 10#ifndef _KVM_PPC_BOOK3S_XICS_H
11#define _KVM_PPC_BOOK3S_XICS_H 11#define _KVM_PPC_BOOK3S_XICS_H
12 12
13#ifdef CONFIG_KVM_XICS
13/* 14/*
14 * We use a two-level tree to store interrupt source information. 15 * We use a two-level tree to store interrupt source information.
15 * There are up to 1024 ICS nodes, each of which can represent 16 * There are up to 1024 ICS nodes, each of which can represent
@@ -144,5 +145,11 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
144 return ics; 145 return ics;
145} 146}
146 147
148extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu);
149extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
150 unsigned long mfrr);
151extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
152extern int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
147 153
154#endif /* CONFIG_KVM_XICS */
148#endif /* _KVM_PPC_BOOK3S_XICS_H */ 155#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
new file mode 100644
index 000000000000..7807ee17af4b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -0,0 +1,1893 @@
1/*
2 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#define pr_fmt(fmt) "xive-kvm: " fmt
10
11#include <linux/kernel.h>
12#include <linux/kvm_host.h>
13#include <linux/err.h>
14#include <linux/gfp.h>
15#include <linux/spinlock.h>
16#include <linux/delay.h>
17#include <linux/percpu.h>
18#include <linux/cpumask.h>
19#include <asm/uaccess.h>
20#include <asm/kvm_book3s.h>
21#include <asm/kvm_ppc.h>
22#include <asm/hvcall.h>
23#include <asm/xics.h>
24#include <asm/xive.h>
25#include <asm/xive-regs.h>
26#include <asm/debug.h>
27#include <asm/time.h>
28#include <asm/opal.h>
29
30#include <linux/debugfs.h>
31#include <linux/seq_file.h>
32
33#include "book3s_xive.h"
34
35
36/*
37 * Virtual mode variants of the hcalls for use on radix/radix
38 * with AIL. They require the VCPU's VP to be "pushed"
39 *
40 * We still instanciate them here because we use some of the
41 * generated utility functions as well in this file.
42 */
43#define XIVE_RUNTIME_CHECKS
44#define X_PFX xive_vm_
45#define X_STATIC static
46#define X_STAT_PFX stat_vm_
47#define __x_tima xive_tima
48#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
49#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
50#define __x_readb __raw_readb
51#define __x_writeb __raw_writeb
52#define __x_readw __raw_readw
53#define __x_readq __raw_readq
54#define __x_writeq __raw_writeq
55
56#include "book3s_xive_template.c"
57
58/*
59 * We leave a gap of a couple of interrupts in the queue to
60 * account for the IPI and additional safety guard.
61 */
62#define XIVE_Q_GAP 2
63
64/*
65 * This is a simple trigger for a generic XIVE IRQ. This must
66 * only be called for interrupts that support a trigger page
67 */
68static bool xive_irq_trigger(struct xive_irq_data *xd)
69{
70 /* This should be only for MSIs */
71 if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
72 return false;
73
74 /* Those interrupts should always have a trigger page */
75 if (WARN_ON(!xd->trig_mmio))
76 return false;
77
78 out_be64(xd->trig_mmio, 0);
79
80 return true;
81}
82
83static irqreturn_t xive_esc_irq(int irq, void *data)
84{
85 struct kvm_vcpu *vcpu = data;
86
87 /* We use the existing H_PROD mechanism to wake up the target */
88 vcpu->arch.prodded = 1;
89 smp_mb();
90 if (vcpu->arch.ceded)
91 kvmppc_fast_vcpu_kick(vcpu);
92
93 return IRQ_HANDLED;
94}
95
96static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
97{
98 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
99 struct xive_q *q = &xc->queues[prio];
100 char *name = NULL;
101 int rc;
102
103 /* Already there ? */
104 if (xc->esc_virq[prio])
105 return 0;
106
107 /* Hook up the escalation interrupt */
108 xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
109 if (!xc->esc_virq[prio]) {
110 pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
111 prio, xc->server_num);
112 return -EIO;
113 }
114
115 /*
116 * Future improvement: start with them disabled
117 * and handle DD2 and later scheme of merged escalation
118 * interrupts
119 */
120 name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
121 vcpu->kvm->arch.lpid, xc->server_num, prio);
122 if (!name) {
123 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
124 prio, xc->server_num);
125 rc = -ENOMEM;
126 goto error;
127 }
128 rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
129 IRQF_NO_THREAD, name, vcpu);
130 if (rc) {
131 pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
132 prio, xc->server_num);
133 goto error;
134 }
135 xc->esc_virq_names[prio] = name;
136 return 0;
137error:
138 irq_dispose_mapping(xc->esc_virq[prio]);
139 xc->esc_virq[prio] = 0;
140 kfree(name);
141 return rc;
142}
143
144static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
145{
146 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
147 struct kvmppc_xive *xive = xc->xive;
148 struct xive_q *q = &xc->queues[prio];
149 void *qpage;
150 int rc;
151
152 if (WARN_ON(q->qpage))
153 return 0;
154
155 /* Allocate the queue and retrieve infos on current node for now */
156 qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
157 if (!qpage) {
158 pr_err("Failed to allocate queue %d for VCPU %d\n",
159 prio, xc->server_num);
160 return -ENOMEM;;
161 }
162 memset(qpage, 0, 1 << xive->q_order);
163
164 /*
165 * Reconfigure the queue. This will set q->qpage only once the
166 * queue is fully configured. This is a requirement for prio 0
167 * as we will stop doing EOIs for every IPI as soon as we observe
168 * qpage being non-NULL, and instead will only EOI when we receive
169 * corresponding queue 0 entries
170 */
171 rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
172 xive->q_order, true);
173 if (rc)
174 pr_err("Failed to configure queue %d for VCPU %d\n",
175 prio, xc->server_num);
176 return rc;
177}
178
179/* Called with kvm_lock held */
180static int xive_check_provisioning(struct kvm *kvm, u8 prio)
181{
182 struct kvmppc_xive *xive = kvm->arch.xive;
183 struct kvm_vcpu *vcpu;
184 int i, rc;
185
186 lockdep_assert_held(&kvm->lock);
187
188 /* Already provisioned ? */
189 if (xive->qmap & (1 << prio))
190 return 0;
191
192 pr_devel("Provisioning prio... %d\n", prio);
193
194 /* Provision each VCPU and enable escalations */
195 kvm_for_each_vcpu(i, vcpu, kvm) {
196 if (!vcpu->arch.xive_vcpu)
197 continue;
198 rc = xive_provision_queue(vcpu, prio);
199 if (rc == 0)
200 xive_attach_escalation(vcpu, prio);
201 if (rc)
202 return rc;
203 }
204
205 /* Order previous stores and mark it as provisioned */
206 mb();
207 xive->qmap |= (1 << prio);
208 return 0;
209}
210
211static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
212{
213 struct kvm_vcpu *vcpu;
214 struct kvmppc_xive_vcpu *xc;
215 struct xive_q *q;
216
217 /* Locate target server */
218 vcpu = kvmppc_xive_find_server(kvm, server);
219 if (!vcpu) {
220 pr_warn("%s: Can't find server %d\n", __func__, server);
221 return;
222 }
223 xc = vcpu->arch.xive_vcpu;
224 if (WARN_ON(!xc))
225 return;
226
227 q = &xc->queues[prio];
228 atomic_inc(&q->pending_count);
229}
230
231static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
232{
233 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
234 struct xive_q *q;
235 u32 max;
236
237 if (WARN_ON(!xc))
238 return -ENXIO;
239 if (!xc->valid)
240 return -ENXIO;
241
242 q = &xc->queues[prio];
243 if (WARN_ON(!q->qpage))
244 return -ENXIO;
245
246 /* Calculate max number of interrupts in that queue. */
247 max = (q->msk + 1) - XIVE_Q_GAP;
248 return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
249}
250
251static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
252{
253 struct kvm_vcpu *vcpu;
254 int i, rc;
255
256 /* Locate target server */
257 vcpu = kvmppc_xive_find_server(kvm, *server);
258 if (!vcpu) {
259 pr_devel("Can't find server %d\n", *server);
260 return -EINVAL;
261 }
262
263 pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
264
265 /* Try pick it */
266 rc = xive_try_pick_queue(vcpu, prio);
267 if (rc == 0)
268 return rc;
269
270 pr_devel(" .. failed, looking up candidate...\n");
271
272 /* Failed, pick another VCPU */
273 kvm_for_each_vcpu(i, vcpu, kvm) {
274 if (!vcpu->arch.xive_vcpu)
275 continue;
276 rc = xive_try_pick_queue(vcpu, prio);
277 if (rc == 0) {
278 *server = vcpu->arch.xive_vcpu->server_num;
279 pr_devel(" found on 0x%x/%d\n", *server, prio);
280 return rc;
281 }
282 }
283 pr_devel(" no available target !\n");
284
285 /* No available target ! */
286 return -EBUSY;
287}
288
289static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
290 struct kvmppc_xive_src_block *sb,
291 struct kvmppc_xive_irq_state *state)
292{
293 struct xive_irq_data *xd;
294 u32 hw_num;
295 u8 old_prio;
296 u64 val;
297
298 /*
299 * Take the lock, set masked, try again if racing
300 * with H_EOI
301 */
302 for (;;) {
303 arch_spin_lock(&sb->lock);
304 old_prio = state->guest_priority;
305 state->guest_priority = MASKED;
306 mb();
307 if (!state->in_eoi)
308 break;
309 state->guest_priority = old_prio;
310 arch_spin_unlock(&sb->lock);
311 }
312
313 /* No change ? Bail */
314 if (old_prio == MASKED)
315 return old_prio;
316
317 /* Get the right irq */
318 kvmppc_xive_select_irq(state, &hw_num, &xd);
319
320 /*
321 * If the interrupt is marked as needing masking via
322 * firmware, we do it here. Firmware masking however
323 * is "lossy", it won't return the old p and q bits
324 * and won't set the interrupt to a state where it will
325 * record queued ones. If this is an issue we should do
326 * lazy masking instead.
327 *
328 * For now, we work around this in unmask by forcing
329 * an interrupt whenever we unmask a non-LSI via FW
330 * (if ever).
331 */
332 if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
333 xive_native_configure_irq(hw_num,
334 xive->vp_base + state->act_server,
335 MASKED, state->number);
336 /* set old_p so we can track if an H_EOI was done */
337 state->old_p = true;
338 state->old_q = false;
339 } else {
340 /* Set PQ to 10, return old P and old Q and remember them */
341 val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
342 state->old_p = !!(val & 2);
343 state->old_q = !!(val & 1);
344
345 /*
346 * Synchronize hardware to sensure the queues are updated
347 * when masking
348 */
349 xive_native_sync_source(hw_num);
350 }
351
352 return old_prio;
353}
354
355static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
356 struct kvmppc_xive_irq_state *state)
357{
358 /*
359 * Take the lock try again if racing with H_EOI
360 */
361 for (;;) {
362 arch_spin_lock(&sb->lock);
363 if (!state->in_eoi)
364 break;
365 arch_spin_unlock(&sb->lock);
366 }
367}
368
369static void xive_finish_unmask(struct kvmppc_xive *xive,
370 struct kvmppc_xive_src_block *sb,
371 struct kvmppc_xive_irq_state *state,
372 u8 prio)
373{
374 struct xive_irq_data *xd;
375 u32 hw_num;
376
377 /* If we aren't changing a thing, move on */
378 if (state->guest_priority != MASKED)
379 goto bail;
380
381 /* Get the right irq */
382 kvmppc_xive_select_irq(state, &hw_num, &xd);
383
384 /*
385 * See command in xive_lock_and_mask() concerning masking
386 * via firmware.
387 */
388 if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
389 xive_native_configure_irq(hw_num,
390 xive->vp_base + state->act_server,
391 state->act_priority, state->number);
392 /* If an EOI is needed, do it here */
393 if (!state->old_p)
394 xive_vm_source_eoi(hw_num, xd);
395 /* If this is not an LSI, force a trigger */
396 if (!(xd->flags & OPAL_XIVE_IRQ_LSI))
397 xive_irq_trigger(xd);
398 goto bail;
399 }
400
401 /* Old Q set, set PQ to 11 */
402 if (state->old_q)
403 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
404
405 /*
406 * If not old P, then perform an "effective" EOI,
407 * on the source. This will handle the cases where
408 * FW EOI is needed.
409 */
410 if (!state->old_p)
411 xive_vm_source_eoi(hw_num, xd);
412
413 /* Synchronize ordering and mark unmasked */
414 mb();
415bail:
416 state->guest_priority = prio;
417}
418
419/*
420 * Target an interrupt to a given server/prio, this will fallback
421 * to another server if necessary and perform the HW targetting
422 * updates as needed
423 *
424 * NOTE: Must be called with the state lock held
425 */
426static int xive_target_interrupt(struct kvm *kvm,
427 struct kvmppc_xive_irq_state *state,
428 u32 server, u8 prio)
429{
430 struct kvmppc_xive *xive = kvm->arch.xive;
431 u32 hw_num;
432 int rc;
433
434 /*
435 * This will return a tentative server and actual
436 * priority. The count for that new target will have
437 * already been incremented.
438 */
439 rc = xive_select_target(kvm, &server, prio);
440
441 /*
442 * We failed to find a target ? Not much we can do
443 * at least until we support the GIQ.
444 */
445 if (rc)
446 return rc;
447
448 /*
449 * Increment the old queue pending count if there
450 * was one so that the old queue count gets adjusted later
451 * when observed to be empty.
452 */
453 if (state->act_priority != MASKED)
454 xive_inc_q_pending(kvm,
455 state->act_server,
456 state->act_priority);
457 /*
458 * Update state and HW
459 */
460 state->act_priority = prio;
461 state->act_server = server;
462
463 /* Get the right irq */
464 kvmppc_xive_select_irq(state, &hw_num, NULL);
465
466 return xive_native_configure_irq(hw_num,
467 xive->vp_base + server,
468 prio, state->number);
469}
470
471/*
472 * Targetting rules: In order to avoid losing track of
473 * pending interrupts accross mask and unmask, which would
474 * allow queue overflows, we implement the following rules:
475 *
476 * - Unless it was never enabled (or we run out of capacity)
477 * an interrupt is always targetted at a valid server/queue
478 * pair even when "masked" by the guest. This pair tends to
479 * be the last one used but it can be changed under some
480 * circumstances. That allows us to separate targetting
481 * from masking, we only handle accounting during (re)targetting,
482 * this also allows us to let an interrupt drain into its target
483 * queue after masking, avoiding complex schemes to remove
484 * interrupts out of remote processor queues.
485 *
486 * - When masking, we set PQ to 10 and save the previous value
487 * of P and Q.
488 *
489 * - When unmasking, if saved Q was set, we set PQ to 11
490 * otherwise we leave PQ to the HW state which will be either
491 * 10 if nothing happened or 11 if the interrupt fired while
492 * masked. Effectively we are OR'ing the previous Q into the
493 * HW Q.
494 *
495 * Then if saved P is clear, we do an effective EOI (Q->P->Trigger)
496 * which will unmask the interrupt and shoot a new one if Q was
497 * set.
498 *
499 * Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11,
500 * effectively meaning an H_EOI from the guest is still expected
501 * for that interrupt).
502 *
503 * - If H_EOI occurs while masked, we clear the saved P.
504 *
505 * - When changing target, we account on the new target and
506 * increment a separate "pending" counter on the old one.
507 * This pending counter will be used to decrement the old
508 * target's count when its queue has been observed empty.
509 */
510
511int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
512 u32 priority)
513{
514 struct kvmppc_xive *xive = kvm->arch.xive;
515 struct kvmppc_xive_src_block *sb;
516 struct kvmppc_xive_irq_state *state;
517 u8 new_act_prio;
518 int rc = 0;
519 u16 idx;
520
521 if (!xive)
522 return -ENODEV;
523
524 pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
525 irq, server, priority);
526
527 /* First, check provisioning of queues */
528 if (priority != MASKED)
529 rc = xive_check_provisioning(xive->kvm,
530 xive_prio_from_guest(priority));
531 if (rc) {
532 pr_devel(" provisioning failure %d !\n", rc);
533 return rc;
534 }
535
536 sb = kvmppc_xive_find_source(xive, irq, &idx);
537 if (!sb)
538 return -EINVAL;
539 state = &sb->irq_state[idx];
540
541 /*
542 * We first handle masking/unmasking since the locking
543 * might need to be retried due to EOIs, we'll handle
544 * targetting changes later. These functions will return
545 * with the SB lock held.
546 *
547 * xive_lock_and_mask() will also set state->guest_priority
548 * but won't otherwise change other fields of the state.
549 *
550 * xive_lock_for_unmask will not actually unmask, this will
551 * be done later by xive_finish_unmask() once the targetting
552 * has been done, so we don't try to unmask an interrupt
553 * that hasn't yet been targetted.
554 */
555 if (priority == MASKED)
556 xive_lock_and_mask(xive, sb, state);
557 else
558 xive_lock_for_unmask(sb, state);
559
560
561 /*
562 * Then we handle targetting.
563 *
564 * First calculate a new "actual priority"
565 */
566 new_act_prio = state->act_priority;
567 if (priority != MASKED)
568 new_act_prio = xive_prio_from_guest(priority);
569
570 pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
571 new_act_prio, state->act_server, state->act_priority);
572
573 /*
574 * Then check if we actually need to change anything,
575 *
576 * The condition for re-targetting the interrupt is that
577 * we have a valid new priority (new_act_prio is not 0xff)
578 * and either the server or the priority changed.
579 *
580 * Note: If act_priority was ff and the new priority is
581 * also ff, we don't do anything and leave the interrupt
582 * untargetted. An attempt of doing an int_on on an
583 * untargetted interrupt will fail. If that is a problem
584 * we could initialize interrupts with valid default
585 */
586
587 if (new_act_prio != MASKED &&
588 (state->act_server != server ||
589 state->act_priority != new_act_prio))
590 rc = xive_target_interrupt(kvm, state, server, new_act_prio);
591
592 /*
593 * Perform the final unmasking of the interrupt source
594 * if necessary
595 */
596 if (priority != MASKED)
597 xive_finish_unmask(xive, sb, state, priority);
598
599 /*
600 * Finally Update saved_priority to match. Only int_on/off
601 * set this field to a different value.
602 */
603 state->saved_priority = priority;
604
605 arch_spin_unlock(&sb->lock);
606 return rc;
607}
608
609int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
610 u32 *priority)
611{
612 struct kvmppc_xive *xive = kvm->arch.xive;
613 struct kvmppc_xive_src_block *sb;
614 struct kvmppc_xive_irq_state *state;
615 u16 idx;
616
617 if (!xive)
618 return -ENODEV;
619
620 sb = kvmppc_xive_find_source(xive, irq, &idx);
621 if (!sb)
622 return -EINVAL;
623 state = &sb->irq_state[idx];
624 arch_spin_lock(&sb->lock);
625 *server = state->guest_server;
626 *priority = state->guest_priority;
627 arch_spin_unlock(&sb->lock);
628
629 return 0;
630}
631
632int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
633{
634 struct kvmppc_xive *xive = kvm->arch.xive;
635 struct kvmppc_xive_src_block *sb;
636 struct kvmppc_xive_irq_state *state;
637 u16 idx;
638
639 if (!xive)
640 return -ENODEV;
641
642 sb = kvmppc_xive_find_source(xive, irq, &idx);
643 if (!sb)
644 return -EINVAL;
645 state = &sb->irq_state[idx];
646
647 pr_devel("int_on(irq=0x%x)\n", irq);
648
649 /*
650 * Check if interrupt was not targetted
651 */
652 if (state->act_priority == MASKED) {
653 pr_devel("int_on on untargetted interrupt\n");
654 return -EINVAL;
655 }
656
657 /* If saved_priority is 0xff, do nothing */
658 if (state->saved_priority == MASKED)
659 return 0;
660
661 /*
662 * Lock and unmask it.
663 */
664 xive_lock_for_unmask(sb, state);
665 xive_finish_unmask(xive, sb, state, state->saved_priority);
666 arch_spin_unlock(&sb->lock);
667
668 return 0;
669}
670
671int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
672{
673 struct kvmppc_xive *xive = kvm->arch.xive;
674 struct kvmppc_xive_src_block *sb;
675 struct kvmppc_xive_irq_state *state;
676 u16 idx;
677
678 if (!xive)
679 return -ENODEV;
680
681 sb = kvmppc_xive_find_source(xive, irq, &idx);
682 if (!sb)
683 return -EINVAL;
684 state = &sb->irq_state[idx];
685
686 pr_devel("int_off(irq=0x%x)\n", irq);
687
688 /*
689 * Lock and mask
690 */
691 state->saved_priority = xive_lock_and_mask(xive, sb, state);
692 arch_spin_unlock(&sb->lock);
693
694 return 0;
695}
696
697static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
698{
699 struct kvmppc_xive_src_block *sb;
700 struct kvmppc_xive_irq_state *state;
701 u16 idx;
702
703 sb = kvmppc_xive_find_source(xive, irq, &idx);
704 if (!sb)
705 return false;
706 state = &sb->irq_state[idx];
707 if (!state->valid)
708 return false;
709
710 /*
711 * Trigger the IPI. This assumes we never restore a pass-through
712 * interrupt which should be safe enough
713 */
714 xive_irq_trigger(&state->ipi_data);
715
716 return true;
717}
718
719u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
720{
721 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
722
723 if (!xc)
724 return 0;
725
726 /* Return the per-cpu state for state saving/migration */
727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
729}
730
731int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
732{
733 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
734 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
735 u8 cppr, mfrr;
736 u32 xisr;
737
738 if (!xc || !xive)
739 return -ENOENT;
740
741 /* Grab individual state fields. We don't use pending_pri */
742 cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
743 xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
744 KVM_REG_PPC_ICP_XISR_MASK;
745 mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
746
747 pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
748 xc->server_num, cppr, mfrr, xisr);
749
750 /*
751 * We can't update the state of a "pushed" VCPU, but that
752 * shouldn't happen.
753 */
754 if (WARN_ON(vcpu->arch.xive_pushed))
755 return -EIO;
756
757 /* Update VCPU HW saved state */
758 vcpu->arch.xive_saved_state.cppr = cppr;
759 xc->hw_cppr = xc->cppr = cppr;
760
761 /*
762 * Update MFRR state. If it's not 0xff, we mark the VCPU as
763 * having a pending MFRR change, which will re-evaluate the
764 * target. The VCPU will thus potentially get a spurious
765 * interrupt but that's not a big deal.
766 */
767 xc->mfrr = mfrr;
768 if (mfrr < cppr)
769 xive_irq_trigger(&xc->vp_ipi_data);
770
771 /*
772 * Now saved XIRR is "interesting". It means there's something in
773 * the legacy "1 element" queue... for an IPI we simply ignore it,
774 * as the MFRR restore will handle that. For anything else we need
775 * to force a resend of the source.
776 * However the source may not have been setup yet. If that's the
777 * case, we keep that info and increment a counter in the xive to
778 * tell subsequent xive_set_source() to go look.
779 */
780 if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
781 xc->delayed_irq = xisr;
782 xive->delayed_irqs++;
783 pr_devel(" xisr restore delayed\n");
784 }
785
786 return 0;
787}
788
789int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
790 struct irq_desc *host_desc)
791{
792 struct kvmppc_xive *xive = kvm->arch.xive;
793 struct kvmppc_xive_src_block *sb;
794 struct kvmppc_xive_irq_state *state;
795 struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
796 unsigned int host_irq = irq_desc_get_irq(host_desc);
797 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
798 u16 idx;
799 u8 prio;
800 int rc;
801
802 if (!xive)
803 return -ENODEV;
804
805 pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
806
807 sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
808 if (!sb)
809 return -EINVAL;
810 state = &sb->irq_state[idx];
811
812 /*
813 * Mark the passed-through interrupt as going to a VCPU,
814 * this will prevent further EOIs and similar operations
815 * from the XIVE code. It will also mask the interrupt
816 * to either PQ=10 or 11 state, the latter if the interrupt
817 * is pending. This will allow us to unmask or retrigger it
818 * after routing it to the guest with a simple EOI.
819 *
820 * The "state" argument is a "token", all it needs is to be
821 * non-NULL to switch to passed-through or NULL for the
822 * other way around. We may not yet have an actual VCPU
823 * target here and we don't really care.
824 */
825 rc = irq_set_vcpu_affinity(host_irq, state);
826 if (rc) {
827 pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
828 return rc;
829 }
830
831 /*
832 * Mask and read state of IPI. We need to know if its P bit
833 * is set as that means it's potentially already using a
834 * queue entry in the target
835 */
836 prio = xive_lock_and_mask(xive, sb, state);
837 pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
838 state->old_p, state->old_q);
839
840 /* Turn the IPI hard off */
841 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
842
843 /* Grab info about irq */
844 state->pt_number = hw_irq;
845 state->pt_data = irq_data_get_irq_handler_data(host_data);
846
847 /*
848 * Configure the IRQ to match the existing configuration of
849 * the IPI if it was already targetted. Otherwise this will
850 * mask the interrupt in a lossy way (act_priority is 0xff)
851 * which is fine for a never started interrupt.
852 */
853 xive_native_configure_irq(hw_irq,
854 xive->vp_base + state->act_server,
855 state->act_priority, state->number);
856
857 /*
858 * We do an EOI to enable the interrupt (and retrigger if needed)
859 * if the guest has the interrupt unmasked and the P bit was *not*
860 * set in the IPI. If it was set, we know a slot may still be in
861 * use in the target queue thus we have to wait for a guest
862 * originated EOI
863 */
864 if (prio != MASKED && !state->old_p)
865 xive_vm_source_eoi(hw_irq, state->pt_data);
866
867 /* Clear old_p/old_q as they are no longer relevant */
868 state->old_p = state->old_q = false;
869
870 /* Restore guest prio (unlocks EOI) */
871 mb();
872 state->guest_priority = prio;
873 arch_spin_unlock(&sb->lock);
874
875 return 0;
876}
877EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
878
879int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
880 struct irq_desc *host_desc)
881{
882 struct kvmppc_xive *xive = kvm->arch.xive;
883 struct kvmppc_xive_src_block *sb;
884 struct kvmppc_xive_irq_state *state;
885 unsigned int host_irq = irq_desc_get_irq(host_desc);
886 u16 idx;
887 u8 prio;
888 int rc;
889
890 if (!xive)
891 return -ENODEV;
892
893 pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
894
895 sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
896 if (!sb)
897 return -EINVAL;
898 state = &sb->irq_state[idx];
899
900 /*
901 * Mask and read state of IRQ. We need to know if its P bit
902 * is set as that means it's potentially already using a
903 * queue entry in the target
904 */
905 prio = xive_lock_and_mask(xive, sb, state);
906 pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
907 state->old_p, state->old_q);
908
909 /*
910 * If old_p is set, the interrupt is pending, we switch it to
911 * PQ=11. This will force a resend in the host so the interrupt
912 * isn't lost to whatver host driver may pick it up
913 */
914 if (state->old_p)
915 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
916
917 /* Release the passed-through interrupt to the host */
918 rc = irq_set_vcpu_affinity(host_irq, NULL);
919 if (rc) {
920 pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
921 return rc;
922 }
923
924 /* Forget about the IRQ */
925 state->pt_number = 0;
926 state->pt_data = NULL;
927
928 /* Reconfigure the IPI */
929 xive_native_configure_irq(state->ipi_number,
930 xive->vp_base + state->act_server,
931 state->act_priority, state->number);
932
933 /*
934 * If old_p is set (we have a queue entry potentially
935 * occupied) or the interrupt is masked, we set the IPI
936 * to PQ=10 state. Otherwise we just re-enable it (PQ=00).
937 */
938 if (prio == MASKED || state->old_p)
939 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
940 else
941 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
942
943 /* Restore guest prio (unlocks EOI) */
944 mb();
945 state->guest_priority = prio;
946 arch_spin_unlock(&sb->lock);
947
948 return 0;
949}
950EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
951
952static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
953{
954 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
955 struct kvm *kvm = vcpu->kvm;
956 struct kvmppc_xive *xive = kvm->arch.xive;
957 int i, j;
958
959 for (i = 0; i <= xive->max_sbid; i++) {
960 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
961
962 if (!sb)
963 continue;
964 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
965 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
966
967 if (!state->valid)
968 continue;
969 if (state->act_priority == MASKED)
970 continue;
971 if (state->act_server != xc->server_num)
972 continue;
973
974 /* Clean it up */
975 arch_spin_lock(&sb->lock);
976 state->act_priority = MASKED;
977 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
978 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
979 if (state->pt_number) {
980 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
981 xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
982 }
983 arch_spin_unlock(&sb->lock);
984 }
985 }
986}
987
988void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
989{
990 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
991 struct kvmppc_xive *xive = xc->xive;
992 int i;
993
994 pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
995
996 /* Ensure no interrupt is still routed to that VP */
997 xc->valid = false;
998 kvmppc_xive_disable_vcpu_interrupts(vcpu);
999
1000 /* Mask the VP IPI */
1001 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
1002
1003 /* Disable the VP */
1004 xive_native_disable_vp(xc->vp_id);
1005
1006 /* Free the queues & associated interrupts */
1007 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1008 struct xive_q *q = &xc->queues[i];
1009
1010 /* Free the escalation irq */
1011 if (xc->esc_virq[i]) {
1012 free_irq(xc->esc_virq[i], vcpu);
1013 irq_dispose_mapping(xc->esc_virq[i]);
1014 kfree(xc->esc_virq_names[i]);
1015 }
1016 /* Free the queue */
1017 xive_native_disable_queue(xc->vp_id, q, i);
1018 if (q->qpage) {
1019 free_pages((unsigned long)q->qpage,
1020 xive->q_page_order);
1021 q->qpage = NULL;
1022 }
1023 }
1024
1025 /* Free the IPI */
1026 if (xc->vp_ipi) {
1027 xive_cleanup_irq_data(&xc->vp_ipi_data);
1028 xive_native_free_irq(xc->vp_ipi);
1029 }
1030 /* Free the VP */
1031 kfree(xc);
1032}
1033
1034int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
1035 struct kvm_vcpu *vcpu, u32 cpu)
1036{
1037 struct kvmppc_xive *xive = dev->private;
1038 struct kvmppc_xive_vcpu *xc;
1039 int i, r = -EBUSY;
1040
1041 pr_devel("connect_vcpu(cpu=%d)\n", cpu);
1042
1043 if (dev->ops != &kvm_xive_ops) {
1044 pr_devel("Wrong ops !\n");
1045 return -EPERM;
1046 }
1047 if (xive->kvm != vcpu->kvm)
1048 return -EPERM;
1049 if (vcpu->arch.irq_type)
1050 return -EBUSY;
1051 if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
1052 pr_devel("Duplicate !\n");
1053 return -EEXIST;
1054 }
1055 if (cpu >= KVM_MAX_VCPUS) {
1056 pr_devel("Out of bounds !\n");
1057 return -EINVAL;
1058 }
1059 xc = kzalloc(sizeof(*xc), GFP_KERNEL);
1060 if (!xc)
1061 return -ENOMEM;
1062
1063 /* We need to synchronize with queue provisioning */
1064 mutex_lock(&vcpu->kvm->lock);
1065 vcpu->arch.xive_vcpu = xc;
1066 xc->xive = xive;
1067 xc->vcpu = vcpu;
1068 xc->server_num = cpu;
1069 xc->vp_id = xive->vp_base + cpu;
1070 xc->mfrr = 0xff;
1071 xc->valid = true;
1072
1073 r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
1074 if (r)
1075 goto bail;
1076
1077 /* Configure VCPU fields for use by assembly push/pull */
1078 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
1079 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
1080
1081 /* Allocate IPI */
1082 xc->vp_ipi = xive_native_alloc_irq();
1083 if (!xc->vp_ipi) {
1084 r = -EIO;
1085 goto bail;
1086 }
1087 pr_devel(" IPI=0x%x\n", xc->vp_ipi);
1088
1089 r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
1090 if (r)
1091 goto bail;
1092
1093 /*
1094 * Initialize queues. Initially we set them all for no queueing
1095 * and we enable escalation for queue 0 only which we'll use for
1096 * our mfrr change notifications. If the VCPU is hot-plugged, we
1097 * do handle provisioning however.
1098 */
1099 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1100 struct xive_q *q = &xc->queues[i];
1101
1102 /* Is queue already enabled ? Provision it */
1103 if (xive->qmap & (1 << i)) {
1104 r = xive_provision_queue(vcpu, i);
1105 if (r == 0)
1106 xive_attach_escalation(vcpu, i);
1107 if (r)
1108 goto bail;
1109 } else {
1110 r = xive_native_configure_queue(xc->vp_id,
1111 q, i, NULL, 0, true);
1112 if (r) {
1113 pr_err("Failed to configure queue %d for VCPU %d\n",
1114 i, cpu);
1115 goto bail;
1116 }
1117 }
1118 }
1119
1120 /* If not done above, attach priority 0 escalation */
1121 r = xive_attach_escalation(vcpu, 0);
1122 if (r)
1123 goto bail;
1124
1125 /* Enable the VP */
1126 r = xive_native_enable_vp(xc->vp_id);
1127 if (r)
1128 goto bail;
1129
1130 /* Route the IPI */
1131 r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
1132 if (!r)
1133 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
1134
1135bail:
1136 mutex_unlock(&vcpu->kvm->lock);
1137 if (r) {
1138 kvmppc_xive_cleanup_vcpu(vcpu);
1139 return r;
1140 }
1141
1142 vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
1143 return 0;
1144}
1145
1146/*
1147 * Scanning of queues before/after migration save
1148 */
1149static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
1150{
1151 struct kvmppc_xive_src_block *sb;
1152 struct kvmppc_xive_irq_state *state;
1153 u16 idx;
1154
1155 sb = kvmppc_xive_find_source(xive, irq, &idx);
1156 if (!sb)
1157 return;
1158
1159 state = &sb->irq_state[idx];
1160
1161 /* Some sanity checking */
1162 if (!state->valid) {
1163 pr_err("invalid irq 0x%x in cpu queue!\n", irq);
1164 return;
1165 }
1166
1167 /*
1168 * If the interrupt is in a queue it should have P set.
1169 * We warn so that gets reported. A backtrace isn't useful
1170 * so no need to use a WARN_ON.
1171 */
1172 if (!state->saved_p)
1173 pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
1174
1175 /* Set flag */
1176 state->in_queue = true;
1177}
1178
1179static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
1180 struct kvmppc_xive_src_block *sb,
1181 u32 irq)
1182{
1183 struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
1184
1185 if (!state->valid)
1186 return;
1187
1188 /* Mask and save state, this will also sync HW queues */
1189 state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
1190
1191 /* Transfer P and Q */
1192 state->saved_p = state->old_p;
1193 state->saved_q = state->old_q;
1194
1195 /* Unlock */
1196 arch_spin_unlock(&sb->lock);
1197}
1198
1199static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
1200 struct kvmppc_xive_src_block *sb,
1201 u32 irq)
1202{
1203 struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
1204
1205 if (!state->valid)
1206 return;
1207
1208 /*
1209 * Lock / exclude EOI (not technically necessary if the
1210 * guest isn't running concurrently. If this becomes a
1211 * performance issue we can probably remove the lock.
1212 */
1213 xive_lock_for_unmask(sb, state);
1214
1215 /* Restore mask/prio if it wasn't masked */
1216 if (state->saved_scan_prio != MASKED)
1217 xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
1218
1219 /* Unlock */
1220 arch_spin_unlock(&sb->lock);
1221}
1222
1223static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
1224{
1225 u32 idx = q->idx;
1226 u32 toggle = q->toggle;
1227 u32 irq;
1228
1229 do {
1230 irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
1231 if (irq > XICS_IPI)
1232 xive_pre_save_set_queued(xive, irq);
1233 } while(irq);
1234}
1235
1236static void xive_pre_save_scan(struct kvmppc_xive *xive)
1237{
1238 struct kvm_vcpu *vcpu = NULL;
1239 int i, j;
1240
1241 /*
1242 * See comment in xive_get_source() about how this
1243 * work. Collect a stable state for all interrupts
1244 */
1245 for (i = 0; i <= xive->max_sbid; i++) {
1246 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1247 if (!sb)
1248 continue;
1249 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1250 xive_pre_save_mask_irq(xive, sb, j);
1251 }
1252
1253 /* Then scan the queues and update the "in_queue" flag */
1254 kvm_for_each_vcpu(i, vcpu, xive->kvm) {
1255 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1256 if (!xc)
1257 continue;
1258 for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
1259 if (xc->queues[i].qpage)
1260 xive_pre_save_queue(xive, &xc->queues[i]);
1261 }
1262 }
1263
1264 /* Finally restore interrupt states */
1265 for (i = 0; i <= xive->max_sbid; i++) {
1266 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1267 if (!sb)
1268 continue;
1269 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1270 xive_pre_save_unmask_irq(xive, sb, j);
1271 }
1272}
1273
1274static void xive_post_save_scan(struct kvmppc_xive *xive)
1275{
1276 u32 i, j;
1277
1278 /* Clear all the in_queue flags */
1279 for (i = 0; i <= xive->max_sbid; i++) {
1280 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1281 if (!sb)
1282 continue;
1283 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1284 sb->irq_state[j].in_queue = false;
1285 }
1286
1287 /* Next get_source() will do a new scan */
1288 xive->saved_src_count = 0;
1289}
1290
1291/*
1292 * This returns the source configuration and state to user space.
1293 */
1294static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
1295{
1296 struct kvmppc_xive_src_block *sb;
1297 struct kvmppc_xive_irq_state *state;
1298 u64 __user *ubufp = (u64 __user *) addr;
1299 u64 val, prio;
1300 u16 idx;
1301
1302 sb = kvmppc_xive_find_source(xive, irq, &idx);
1303 if (!sb)
1304 return -ENOENT;
1305
1306 state = &sb->irq_state[idx];
1307
1308 if (!state->valid)
1309 return -ENOENT;
1310
1311 pr_devel("get_source(%ld)...\n", irq);
1312
1313 /*
1314 * So to properly save the state into something that looks like a
1315 * XICS migration stream we cannot treat interrupts individually.
1316 *
1317 * We need, instead, mask them all (& save their previous PQ state)
1318 * to get a stable state in the HW, then sync them to ensure that
1319 * any interrupt that had already fired hits its queue, and finally
1320 * scan all the queues to collect which interrupts are still present
1321 * in the queues, so we can set the "pending" flag on them and
1322 * they can be resent on restore.
1323 *
1324 * So we do it all when the "first" interrupt gets saved, all the
1325 * state is collected at that point, the rest of xive_get_source()
1326 * will merely collect and convert that state to the expected
1327 * userspace bit mask.
1328 */
1329 if (xive->saved_src_count == 0)
1330 xive_pre_save_scan(xive);
1331 xive->saved_src_count++;
1332
1333 /* Convert saved state into something compatible with xics */
1334 val = state->guest_server;
1335 prio = state->saved_scan_prio;
1336
1337 if (prio == MASKED) {
1338 val |= KVM_XICS_MASKED;
1339 prio = state->saved_priority;
1340 }
1341 val |= prio << KVM_XICS_PRIORITY_SHIFT;
1342 if (state->lsi) {
1343 val |= KVM_XICS_LEVEL_SENSITIVE;
1344 if (state->saved_p)
1345 val |= KVM_XICS_PENDING;
1346 } else {
1347 if (state->saved_p)
1348 val |= KVM_XICS_PRESENTED;
1349
1350 if (state->saved_q)
1351 val |= KVM_XICS_QUEUED;
1352
1353 /*
1354 * We mark it pending (which will attempt a re-delivery)
1355 * if we are in a queue *or* we were masked and had
1356 * Q set which is equivalent to the XICS "masked pending"
1357 * state
1358 */
1359 if (state->in_queue || (prio == MASKED && state->saved_q))
1360 val |= KVM_XICS_PENDING;
1361 }
1362
1363 /*
1364 * If that was the last interrupt saved, reset the
1365 * in_queue flags
1366 */
1367 if (xive->saved_src_count == xive->src_count)
1368 xive_post_save_scan(xive);
1369
1370 /* Copy the result to userspace */
1371 if (put_user(val, ubufp))
1372 return -EFAULT;
1373
1374 return 0;
1375}
1376
1377static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive,
1378 int irq)
1379{
1380 struct kvm *kvm = xive->kvm;
1381 struct kvmppc_xive_src_block *sb;
1382 int i, bid;
1383
1384 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
1385
1386 mutex_lock(&kvm->lock);
1387
1388 /* block already exists - somebody else got here first */
1389 if (xive->src_blocks[bid])
1390 goto out;
1391
1392 /* Create the ICS */
1393 sb = kzalloc(sizeof(*sb), GFP_KERNEL);
1394 if (!sb)
1395 goto out;
1396
1397 sb->id = bid;
1398
1399 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
1400 sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
1401 sb->irq_state[i].guest_priority = MASKED;
1402 sb->irq_state[i].saved_priority = MASKED;
1403 sb->irq_state[i].act_priority = MASKED;
1404 }
1405 smp_wmb();
1406 xive->src_blocks[bid] = sb;
1407
1408 if (bid > xive->max_sbid)
1409 xive->max_sbid = bid;
1410
1411out:
1412 mutex_unlock(&kvm->lock);
1413 return xive->src_blocks[bid];
1414}
1415
1416static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
1417{
1418 struct kvm *kvm = xive->kvm;
1419 struct kvm_vcpu *vcpu = NULL;
1420 int i;
1421
1422 kvm_for_each_vcpu(i, vcpu, kvm) {
1423 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1424
1425 if (!xc)
1426 continue;
1427
1428 if (xc->delayed_irq == irq) {
1429 xc->delayed_irq = 0;
1430 xive->delayed_irqs--;
1431 return true;
1432 }
1433 }
1434 return false;
1435}
1436
1437static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1438{
1439 struct kvmppc_xive_src_block *sb;
1440 struct kvmppc_xive_irq_state *state;
1441 u64 __user *ubufp = (u64 __user *) addr;
1442 u16 idx;
1443 u64 val;
1444 u8 act_prio, guest_prio;
1445 u32 server;
1446 int rc = 0;
1447
1448 if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
1449 return -ENOENT;
1450
1451 pr_devel("set_source(irq=0x%lx)\n", irq);
1452
1453 /* Find the source */
1454 sb = kvmppc_xive_find_source(xive, irq, &idx);
1455 if (!sb) {
1456 pr_devel("No source, creating source block...\n");
1457 sb = xive_create_src_block(xive, irq);
1458 if (!sb) {
1459 pr_devel("Failed to create block...\n");
1460 return -ENOMEM;
1461 }
1462 }
1463 state = &sb->irq_state[idx];
1464
1465 /* Read user passed data */
1466 if (get_user(val, ubufp)) {
1467 pr_devel("fault getting user info !\n");
1468 return -EFAULT;
1469 }
1470
1471 server = val & KVM_XICS_DESTINATION_MASK;
1472 guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
1473
1474 pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
1475 val, server, guest_prio);
1476 /*
1477 * If the source doesn't already have an IPI, allocate
1478 * one and get the corresponding data
1479 */
1480 if (!state->ipi_number) {
1481 state->ipi_number = xive_native_alloc_irq();
1482 if (state->ipi_number == 0) {
1483 pr_devel("Failed to allocate IPI !\n");
1484 return -ENOMEM;
1485 }
1486 xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
1487 pr_devel(" src_ipi=0x%x\n", state->ipi_number);
1488 }
1489
1490 /*
1491 * We use lock_and_mask() to set us in the right masked
1492 * state. We will override that state from the saved state
1493 * further down, but this will handle the cases of interrupts
1494 * that need FW masking. We set the initial guest_priority to
1495 * 0 before calling it to ensure it actually performs the masking.
1496 */
1497 state->guest_priority = 0;
1498 xive_lock_and_mask(xive, sb, state);
1499
1500 /*
1501 * Now, we select a target if we have one. If we don't we
1502 * leave the interrupt untargetted. It means that an interrupt
1503 * can become "untargetted" accross migration if it was masked
1504 * by set_xive() but there is little we can do about it.
1505 */
1506
1507 /* First convert prio and mark interrupt as untargetted */
1508 act_prio = xive_prio_from_guest(guest_prio);
1509 state->act_priority = MASKED;
1510 state->guest_server = server;
1511
1512 /*
1513 * We need to drop the lock due to the mutex below. Hopefully
1514 * nothing is touching that interrupt yet since it hasn't been
1515 * advertized to a running guest yet
1516 */
1517 arch_spin_unlock(&sb->lock);
1518
1519 /* If we have a priority target the interrupt */
1520 if (act_prio != MASKED) {
1521 /* First, check provisioning of queues */
1522 mutex_lock(&xive->kvm->lock);
1523 rc = xive_check_provisioning(xive->kvm, act_prio);
1524 mutex_unlock(&xive->kvm->lock);
1525
1526 /* Target interrupt */
1527 if (rc == 0)
1528 rc = xive_target_interrupt(xive->kvm, state,
1529 server, act_prio);
1530 /*
1531 * If provisioning or targetting failed, leave it
1532 * alone and masked. It will remain disabled until
1533 * the guest re-targets it.
1534 */
1535 }
1536
1537 /*
1538 * Find out if this was a delayed irq stashed in an ICP,
1539 * in which case, treat it as pending
1540 */
1541 if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
1542 val |= KVM_XICS_PENDING;
1543 pr_devel(" Found delayed ! forcing PENDING !\n");
1544 }
1545
1546 /* Cleanup the SW state */
1547 state->old_p = false;
1548 state->old_q = false;
1549 state->lsi = false;
1550 state->asserted = false;
1551
1552 /* Restore LSI state */
1553 if (val & KVM_XICS_LEVEL_SENSITIVE) {
1554 state->lsi = true;
1555 if (val & KVM_XICS_PENDING)
1556 state->asserted = true;
1557 pr_devel(" LSI ! Asserted=%d\n", state->asserted);
1558 }
1559
1560 /*
1561 * Restore P and Q. If the interrupt was pending, we
1562 * force both P and Q, which will trigger a resend.
1563 *
1564 * That means that a guest that had both an interrupt
1565 * pending (queued) and Q set will restore with only
1566 * one instance of that interrupt instead of 2, but that
1567 * is perfectly fine as coalescing interrupts that haven't
1568 * been presented yet is always allowed.
1569 */
1570 if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
1571 state->old_p = true;
1572 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
1573 state->old_q = true;
1574
1575 pr_devel(" P=%d, Q=%d\n", state->old_p, state->old_q);
1576
1577 /*
1578 * If the interrupt was unmasked, update guest priority and
1579 * perform the appropriate state transition and do a
1580 * re-trigger if necessary.
1581 */
1582 if (val & KVM_XICS_MASKED) {
1583 pr_devel(" masked, saving prio\n");
1584 state->guest_priority = MASKED;
1585 state->saved_priority = guest_prio;
1586 } else {
1587 pr_devel(" unmasked, restoring to prio %d\n", guest_prio);
1588 xive_finish_unmask(xive, sb, state, guest_prio);
1589 state->saved_priority = guest_prio;
1590 }
1591
1592 /* Increment the number of valid sources and mark this one valid */
1593 if (!state->valid)
1594 xive->src_count++;
1595 state->valid = true;
1596
1597 return 0;
1598}
1599
1600int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1601 bool line_status)
1602{
1603 struct kvmppc_xive *xive = kvm->arch.xive;
1604 struct kvmppc_xive_src_block *sb;
1605 struct kvmppc_xive_irq_state *state;
1606 u16 idx;
1607
1608 if (!xive)
1609 return -ENODEV;
1610
1611 sb = kvmppc_xive_find_source(xive, irq, &idx);
1612 if (!sb)
1613 return -EINVAL;
1614
1615 /* Perform locklessly .... (we need to do some RCUisms here...) */
1616 state = &sb->irq_state[idx];
1617 if (!state->valid)
1618 return -EINVAL;
1619
1620 /* We don't allow a trigger on a passed-through interrupt */
1621 if (state->pt_number)
1622 return -EINVAL;
1623
1624 if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
1625 state->asserted = 1;
1626 else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
1627 state->asserted = 0;
1628 return 0;
1629 }
1630
1631 /* Trigger the IPI */
1632 xive_irq_trigger(&state->ipi_data);
1633
1634 return 0;
1635}
1636
1637static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1638{
1639 struct kvmppc_xive *xive = dev->private;
1640
1641 /* We honor the existing XICS ioctl */
1642 switch (attr->group) {
1643 case KVM_DEV_XICS_GRP_SOURCES:
1644 return xive_set_source(xive, attr->attr, attr->addr);
1645 }
1646 return -ENXIO;
1647}
1648
1649static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1650{
1651 struct kvmppc_xive *xive = dev->private;
1652
1653 /* We honor the existing XICS ioctl */
1654 switch (attr->group) {
1655 case KVM_DEV_XICS_GRP_SOURCES:
1656 return xive_get_source(xive, attr->attr, attr->addr);
1657 }
1658 return -ENXIO;
1659}
1660
1661static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1662{
1663 /* We honor the same limits as XICS, at least for now */
1664 switch (attr->group) {
1665 case KVM_DEV_XICS_GRP_SOURCES:
1666 if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
1667 attr->attr < KVMPPC_XICS_NR_IRQS)
1668 return 0;
1669 break;
1670 }
1671 return -ENXIO;
1672}
1673
1674static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
1675{
1676 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
1677 xive_native_configure_irq(hw_num, 0, MASKED, 0);
1678 xive_cleanup_irq_data(xd);
1679}
1680
1681static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
1682{
1683 int i;
1684
1685 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
1686 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
1687
1688 if (!state->valid)
1689 continue;
1690
1691 kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
1692 xive_native_free_irq(state->ipi_number);
1693
1694 /* Pass-through, cleanup too */
1695 if (state->pt_number)
1696 kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
1697
1698 state->valid = false;
1699 }
1700}
1701
1702static void kvmppc_xive_free(struct kvm_device *dev)
1703{
1704 struct kvmppc_xive *xive = dev->private;
1705 struct kvm *kvm = xive->kvm;
1706 int i;
1707
1708 debugfs_remove(xive->dentry);
1709
1710 if (kvm)
1711 kvm->arch.xive = NULL;
1712
1713 /* Mask and free interrupts */
1714 for (i = 0; i <= xive->max_sbid; i++) {
1715 if (xive->src_blocks[i])
1716 kvmppc_xive_free_sources(xive->src_blocks[i]);
1717 kfree(xive->src_blocks[i]);
1718 xive->src_blocks[i] = NULL;
1719 }
1720
1721 if (xive->vp_base != XIVE_INVALID_VP)
1722 xive_native_free_vp_block(xive->vp_base);
1723
1724
1725 kfree(xive);
1726 kfree(dev);
1727}
1728
1729static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
1730{
1731 struct kvmppc_xive *xive;
1732 struct kvm *kvm = dev->kvm;
1733 int ret = 0;
1734
1735 pr_devel("Creating xive for partition\n");
1736
1737 xive = kzalloc(sizeof(*xive), GFP_KERNEL);
1738 if (!xive)
1739 return -ENOMEM;
1740
1741 dev->private = xive;
1742 xive->dev = dev;
1743 xive->kvm = kvm;
1744
1745 /* Already there ? */
1746 if (kvm->arch.xive)
1747 ret = -EEXIST;
1748 else
1749 kvm->arch.xive = xive;
1750
1751 /* We use the default queue size set by the host */
1752 xive->q_order = xive_native_default_eq_shift();
1753 if (xive->q_order < PAGE_SHIFT)
1754 xive->q_page_order = 0;
1755 else
1756 xive->q_page_order = xive->q_order - PAGE_SHIFT;
1757
1758 /* Allocate a bunch of VPs */
1759 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1760 pr_devel("VP_Base=%x\n", xive->vp_base);
1761
1762 if (xive->vp_base == XIVE_INVALID_VP)
1763 ret = -ENOMEM;
1764
1765 if (ret) {
1766 kfree(xive);
1767 return ret;
1768 }
1769
1770 return 0;
1771}
1772
1773
1774static int xive_debug_show(struct seq_file *m, void *private)
1775{
1776 struct kvmppc_xive *xive = m->private;
1777 struct kvm *kvm = xive->kvm;
1778 struct kvm_vcpu *vcpu;
1779 u64 t_rm_h_xirr = 0;
1780 u64 t_rm_h_ipoll = 0;
1781 u64 t_rm_h_cppr = 0;
1782 u64 t_rm_h_eoi = 0;
1783 u64 t_rm_h_ipi = 0;
1784 u64 t_vm_h_xirr = 0;
1785 u64 t_vm_h_ipoll = 0;
1786 u64 t_vm_h_cppr = 0;
1787 u64 t_vm_h_eoi = 0;
1788 u64 t_vm_h_ipi = 0;
1789 unsigned int i;
1790
1791 if (!kvm)
1792 return 0;
1793
1794 seq_printf(m, "=========\nVCPU state\n=========\n");
1795
1796 kvm_for_each_vcpu(i, vcpu, kvm) {
1797 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1798
1799 if (!xc)
1800 continue;
1801
1802 seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
1803 " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
1804 xc->server_num, xc->cppr, xc->hw_cppr,
1805 xc->mfrr, xc->pending,
1806 xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
1807
1808 t_rm_h_xirr += xc->stat_rm_h_xirr;
1809 t_rm_h_ipoll += xc->stat_rm_h_ipoll;
1810 t_rm_h_cppr += xc->stat_rm_h_cppr;
1811 t_rm_h_eoi += xc->stat_rm_h_eoi;
1812 t_rm_h_ipi += xc->stat_rm_h_ipi;
1813 t_vm_h_xirr += xc->stat_vm_h_xirr;
1814 t_vm_h_ipoll += xc->stat_vm_h_ipoll;
1815 t_vm_h_cppr += xc->stat_vm_h_cppr;
1816 t_vm_h_eoi += xc->stat_vm_h_eoi;
1817 t_vm_h_ipi += xc->stat_vm_h_ipi;
1818 }
1819
1820 seq_printf(m, "Hcalls totals\n");
1821 seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
1822 seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
1823 seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
1824 seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
1825 seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
1826
1827 return 0;
1828}
1829
1830static int xive_debug_open(struct inode *inode, struct file *file)
1831{
1832 return single_open(file, xive_debug_show, inode->i_private);
1833}
1834
1835static const struct file_operations xive_debug_fops = {
1836 .open = xive_debug_open,
1837 .read = seq_read,
1838 .llseek = seq_lseek,
1839 .release = single_release,
1840};
1841
1842static void xive_debugfs_init(struct kvmppc_xive *xive)
1843{
1844 char *name;
1845
1846 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1847 if (!name) {
1848 pr_err("%s: no memory for name\n", __func__);
1849 return;
1850 }
1851
1852 xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
1853 xive, &xive_debug_fops);
1854
1855 pr_debug("%s: created %s\n", __func__, name);
1856 kfree(name);
1857}
1858
1859static void kvmppc_xive_init(struct kvm_device *dev)
1860{
1861 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1862
1863 /* Register some debug interfaces */
1864 xive_debugfs_init(xive);
1865}
1866
1867struct kvm_device_ops kvm_xive_ops = {
1868 .name = "kvm-xive",
1869 .create = kvmppc_xive_create,
1870 .init = kvmppc_xive_init,
1871 .destroy = kvmppc_xive_free,
1872 .set_attr = xive_set_attr,
1873 .get_attr = xive_get_attr,
1874 .has_attr = xive_has_attr,
1875};
1876
1877void kvmppc_xive_init_module(void)
1878{
1879 __xive_vm_h_xirr = xive_vm_h_xirr;
1880 __xive_vm_h_ipoll = xive_vm_h_ipoll;
1881 __xive_vm_h_ipi = xive_vm_h_ipi;
1882 __xive_vm_h_cppr = xive_vm_h_cppr;
1883 __xive_vm_h_eoi = xive_vm_h_eoi;
1884}
1885
1886void kvmppc_xive_exit_module(void)
1887{
1888 __xive_vm_h_xirr = NULL;
1889 __xive_vm_h_ipoll = NULL;
1890 __xive_vm_h_ipi = NULL;
1891 __xive_vm_h_cppr = NULL;
1892 __xive_vm_h_eoi = NULL;
1893}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
new file mode 100644
index 000000000000..5938f7644dc1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -0,0 +1,256 @@
1/*
2 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef _KVM_PPC_BOOK3S_XIVE_H
10#define _KVM_PPC_BOOK3S_XIVE_H
11
12#ifdef CONFIG_KVM_XICS
13#include "book3s_xics.h"
14
15/*
16 * State for one guest irq source.
17 *
18 * For each guest source we allocate a HW interrupt in the XIVE
19 * which we use for all SW triggers. It will be unused for
20 * pass-through but it's easier to keep around as the same
21 * guest interrupt can alternatively be emulated or pass-through
22 * if a physical device is hot unplugged and replaced with an
23 * emulated one.
24 *
25 * This state structure is very similar to the XICS one with
26 * additional XIVE specific tracking.
27 */
28struct kvmppc_xive_irq_state {
29 bool valid; /* Interrupt entry is valid */
30
31 u32 number; /* Guest IRQ number */
32 u32 ipi_number; /* XIVE IPI HW number */
33 struct xive_irq_data ipi_data; /* XIVE IPI associated data */
34 u32 pt_number; /* XIVE Pass-through number if any */
35 struct xive_irq_data *pt_data; /* XIVE Pass-through associated data */
36
37 /* Targetting as set by guest */
38 u32 guest_server; /* Current guest selected target */
39 u8 guest_priority; /* Guest set priority */
40 u8 saved_priority; /* Saved priority when masking */
41
42 /* Actual targetting */
43 u32 act_server; /* Actual server */
44 u8 act_priority; /* Actual priority */
45
46 /* Various state bits */
47 bool in_eoi; /* Synchronize with H_EOI */
48 bool old_p; /* P bit state when masking */
49 bool old_q; /* Q bit state when masking */
50 bool lsi; /* level-sensitive interrupt */
51 bool asserted; /* Only for emulated LSI: current state */
52
53 /* Saved for migration state */
54 bool in_queue;
55 bool saved_p;
56 bool saved_q;
57 u8 saved_scan_prio;
58};
59
60/* Select the "right" interrupt (IPI vs. passthrough) */
61static inline void kvmppc_xive_select_irq(struct kvmppc_xive_irq_state *state,
62 u32 *out_hw_irq,
63 struct xive_irq_data **out_xd)
64{
65 if (state->pt_number) {
66 if (out_hw_irq)
67 *out_hw_irq = state->pt_number;
68 if (out_xd)
69 *out_xd = state->pt_data;
70 } else {
71 if (out_hw_irq)
72 *out_hw_irq = state->ipi_number;
73 if (out_xd)
74 *out_xd = &state->ipi_data;
75 }
76}
77
78/*
79 * This corresponds to an "ICS" in XICS terminology, we use it
80 * as a mean to break up source information into multiple structures.
81 */
82struct kvmppc_xive_src_block {
83 arch_spinlock_t lock;
84 u16 id;
85 struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
86};
87
88
89struct kvmppc_xive {
90 struct kvm *kvm;
91 struct kvm_device *dev;
92 struct dentry *dentry;
93
94 /* VP block associated with the VM */
95 u32 vp_base;
96
97 /* Blocks of sources */
98 struct kvmppc_xive_src_block *src_blocks[KVMPPC_XICS_MAX_ICS_ID + 1];
99 u32 max_sbid;
100
101 /*
102 * For state save, we lazily scan the queues on the first interrupt
103 * being migrated. We don't have a clean way to reset that flags
104 * so we keep track of the number of valid sources and how many of
105 * them were migrated so we can reset when all of them have been
106 * processed.
107 */
108 u32 src_count;
109 u32 saved_src_count;
110
111 /*
112 * Some irqs are delayed on restore until the source is created,
113 * keep track here of how many of them
114 */
115 u32 delayed_irqs;
116
117 /* Which queues (priorities) are in use by the guest */
118 u8 qmap;
119
120 /* Queue orders */
121 u32 q_order;
122 u32 q_page_order;
123
124};
125
126#define KVMPPC_XIVE_Q_COUNT 8
127
128struct kvmppc_xive_vcpu {
129 struct kvmppc_xive *xive;
130 struct kvm_vcpu *vcpu;
131 bool valid;
132
133 /* Server number. This is the HW CPU ID from a guest perspective */
134 u32 server_num;
135
136 /*
137 * HW VP corresponding to this VCPU. This is the base of the VP
138 * block plus the server number.
139 */
140 u32 vp_id;
141 u32 vp_chip_id;
142 u32 vp_cam;
143
144 /* IPI used for sending ... IPIs */
145 u32 vp_ipi;
146 struct xive_irq_data vp_ipi_data;
147
148 /* Local emulation state */
149 uint8_t cppr; /* guest CPPR */
150 uint8_t hw_cppr;/* Hardware CPPR */
151 uint8_t mfrr;
152 uint8_t pending;
153
154 /* Each VP has 8 queues though we only provision some */
155 struct xive_q queues[KVMPPC_XIVE_Q_COUNT];
156 u32 esc_virq[KVMPPC_XIVE_Q_COUNT];
157 char *esc_virq_names[KVMPPC_XIVE_Q_COUNT];
158
159 /* Stash a delayed irq on restore from migration (see set_icp) */
160 u32 delayed_irq;
161
162 /* Stats */
163 u64 stat_rm_h_xirr;
164 u64 stat_rm_h_ipoll;
165 u64 stat_rm_h_cppr;
166 u64 stat_rm_h_eoi;
167 u64 stat_rm_h_ipi;
168 u64 stat_vm_h_xirr;
169 u64 stat_vm_h_ipoll;
170 u64 stat_vm_h_cppr;
171 u64 stat_vm_h_eoi;
172 u64 stat_vm_h_ipi;
173};
174
175static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
176{
177 struct kvm_vcpu *vcpu = NULL;
178 int i;
179
180 kvm_for_each_vcpu(i, vcpu, kvm) {
181 if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
182 return vcpu;
183 }
184 return NULL;
185}
186
187static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmppc_xive *xive,
188 u32 irq, u16 *source)
189{
190 u32 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
191 u16 src = irq & KVMPPC_XICS_SRC_MASK;
192
193 if (source)
194 *source = src;
195 if (bid > KVMPPC_XICS_MAX_ICS_ID)
196 return NULL;
197 return xive->src_blocks[bid];
198}
199
200/*
201 * Mapping between guest priorities and host priorities
202 * is as follow.
203 *
204 * Guest request for 0...6 are honored. Guest request for anything
205 * higher results in a priority of 7 being applied.
206 *
207 * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
208 * in order to match AIX expectations
209 *
210 * Similar mapping is done for CPPR values
211 */
212static inline u8 xive_prio_from_guest(u8 prio)
213{
214 if (prio == 0xff || prio < 8)
215 return prio;
216 return 7;
217}
218
219static inline u8 xive_prio_to_guest(u8 prio)
220{
221 if (prio == 0xff || prio < 7)
222 return prio;
223 return 0xb;
224}
225
226static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
227{
228 u32 cur;
229
230 if (!qpage)
231 return 0;
232 cur = be32_to_cpup(qpage + *idx);
233 if ((cur >> 31) == *toggle)
234 return 0;
235 *idx = (*idx + 1) & msk;
236 if (*idx == 0)
237 (*toggle) ^= 1;
238 return cur & 0x7fffffff;
239}
240
241extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
242extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
243extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
244 unsigned long mfrr);
245extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
246extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
247
248extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
249extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
250extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
251 unsigned long mfrr);
252extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
253extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
254
255#endif /* CONFIG_KVM_XICS */
256#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
new file mode 100644
index 000000000000..023a31133c37
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -0,0 +1,503 @@
1/*
2 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9/* File to be included by other .c files */
10
11#define XGLUE(a,b) a##b
12#define GLUE(a,b) XGLUE(a,b)
13
14static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
15{
16 u8 cppr;
17 u16 ack;
18
19 /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
20
21 /* Perform the acknowledge OS to register cycle. */
22 ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
23
24 /* Synchronize subsequent queue accesses */
25 mb();
26
27 /* XXX Check grouping level */
28
29 /* Anything ? */
30 if (!((ack >> 8) & TM_QW1_NSR_EO))
31 return;
32
33 /* Grab CPPR of the most favored pending interrupt */
34 cppr = ack & 0xff;
35 if (cppr < 8)
36 xc->pending |= 1 << cppr;
37
38#ifdef XIVE_RUNTIME_CHECKS
39 /* Check consistency */
40 if (cppr >= xc->hw_cppr)
41 pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
42 smp_processor_id(), cppr, xc->hw_cppr);
43#endif
44
45 /*
46 * Update our image of the HW CPPR. We don't yet modify
47 * xc->cppr, this will be done as we scan for interrupts
48 * in the queues.
49 */
50 xc->hw_cppr = cppr;
51}
52
53static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
54{
55 u64 val;
56
57 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
58 offset |= offset << 4;
59
60 val =__x_readq(__x_eoi_page(xd) + offset);
61#ifdef __LITTLE_ENDIAN__
62 val >>= 64-8;
63#endif
64 return (u8)val;
65}
66
67
68static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
69{
70 /* If the XIVE supports the new "store EOI facility, use it */
71 if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
72 __x_writeq(0, __x_eoi_page(xd));
73 else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
74 opal_int_eoi(hw_irq);
75 } else {
76 uint64_t eoi_val;
77
78 /*
79 * Otherwise for EOI, we use the special MMIO that does
80 * a clear of both P and Q and returns the old Q,
81 * except for LSIs where we use the "EOI cycle" special
82 * load.
83 *
84 * This allows us to then do a re-trigger if Q was set
85 * rather than synthetizing an interrupt in software
86 *
87 * For LSIs, using the HW EOI cycle works around a problem
88 * on P9 DD1 PHBs where the other ESB accesses don't work
89 * properly.
90 */
91 if (xd->flags & XIVE_IRQ_FLAG_LSI)
92 __x_readq(__x_eoi_page(xd));
93 else {
94 eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
95
96 /* Re-trigger if needed */
97 if ((eoi_val & 1) && __x_trig_page(xd))
98 __x_writeq(0, __x_trig_page(xd));
99 }
100 }
101}
102
103enum {
104 scan_fetch,
105 scan_poll,
106 scan_eoi,
107};
108
109static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
110 u8 pending, int scan_type)
111{
112 u32 hirq = 0;
113 u8 prio = 0xff;
114
115 /* Find highest pending priority */
116 while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
117 struct xive_q *q;
118 u32 idx, toggle;
119 __be32 *qpage;
120
121 /*
122 * If pending is 0 this will return 0xff which is what
123 * we want
124 */
125 prio = ffs(pending) - 1;
126
127 /*
128 * If the most favoured prio we found pending is less
129 * favored (or equal) than a pending IPI, we return
130 * the IPI instead.
131 *
132 * Note: If pending was 0 and mfrr is 0xff, we will
133 * not spurriously take an IPI because mfrr cannot
134 * then be smaller than cppr.
135 */
136 if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
137 prio = xc->mfrr;
138 hirq = XICS_IPI;
139 break;
140 }
141
142 /* Don't scan past the guest cppr */
143 if (prio >= xc->cppr || prio > 7)
144 break;
145
146 /* Grab queue and pointers */
147 q = &xc->queues[prio];
148 idx = q->idx;
149 toggle = q->toggle;
150
151 /*
152 * Snapshot the queue page. The test further down for EOI
153 * must use the same "copy" that was used by __xive_read_eq
154 * since qpage can be set concurrently and we don't want
155 * to miss an EOI.
156 */
157 qpage = READ_ONCE(q->qpage);
158
159skip_ipi:
160 /*
161 * Try to fetch from the queue. Will return 0 for a
162 * non-queueing priority (ie, qpage = 0).
163 */
164 hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
165
166 /*
167 * If this was a signal for an MFFR change done by
168 * H_IPI we skip it. Additionally, if we were fetching
169 * we EOI it now, thus re-enabling reception of a new
170 * such signal.
171 *
172 * We also need to do that if prio is 0 and we had no
173 * page for the queue. In this case, we have non-queued
174 * IPI that needs to be EOId.
175 *
176 * This is safe because if we have another pending MFRR
177 * change that wasn't observed above, the Q bit will have
178 * been set and another occurrence of the IPI will trigger.
179 */
180 if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
181 if (scan_type == scan_fetch)
182 GLUE(X_PFX,source_eoi)(xc->vp_ipi,
183 &xc->vp_ipi_data);
184 /* Loop back on same queue with updated idx/toggle */
185#ifdef XIVE_RUNTIME_CHECKS
186 WARN_ON(hirq && hirq != XICS_IPI);
187#endif
188 if (hirq)
189 goto skip_ipi;
190 }
191
192 /* If fetching, update queue pointers */
193 if (scan_type == scan_fetch) {
194 q->idx = idx;
195 q->toggle = toggle;
196 }
197
198 /* Something found, stop searching */
199 if (hirq)
200 break;
201
202 /* Clear the pending bit on the now empty queue */
203 pending &= ~(1 << prio);
204
205 /*
206 * Check if the queue count needs adjusting due to
207 * interrupts being moved away.
208 */
209 if (atomic_read(&q->pending_count)) {
210 int p = atomic_xchg(&q->pending_count, 0);
211 if (p) {
212#ifdef XIVE_RUNTIME_CHECKS
213 WARN_ON(p > atomic_read(&q->count));
214#endif
215 atomic_sub(p, &q->count);
216 }
217 }
218 }
219
220 /* If we are just taking a "peek", do nothing else */
221 if (scan_type == scan_poll)
222 return hirq;
223
224 /* Update the pending bits */
225 xc->pending = pending;
226
227 /*
228 * If this is an EOI that's it, no CPPR adjustment done here,
229 * all we needed was cleanup the stale pending bits and check
230 * if there's anything left.
231 */
232 if (scan_type == scan_eoi)
233 return hirq;
234
235 /*
236 * If we found an interrupt, adjust what the guest CPPR should
237 * be as if we had just fetched that interrupt from HW.
238 */
239 if (hirq)
240 xc->cppr = prio;
241 /*
242 * If it was an IPI the HW CPPR might have been lowered too much
243 * as the HW interrupt we use for IPIs is routed to priority 0.
244 *
245 * We re-sync it here.
246 */
247 if (xc->cppr != xc->hw_cppr) {
248 xc->hw_cppr = xc->cppr;
249 __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
250 }
251
252 return hirq;
253}
254
255X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
256{
257 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
258 u8 old_cppr;
259 u32 hirq;
260
261 pr_devel("H_XIRR\n");
262
263 xc->GLUE(X_STAT_PFX,h_xirr)++;
264
265 /* First collect pending bits from HW */
266 GLUE(X_PFX,ack_pending)(xc);
267
268 /*
269 * Cleanup the old-style bits if needed (they may have been
270 * set by pull or an escalation interrupts).
271 */
272 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
273 clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
274 &vcpu->arch.pending_exceptions);
275
276 pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
277 xc->pending, xc->hw_cppr, xc->cppr);
278
279 /* Grab previous CPPR and reverse map it */
280 old_cppr = xive_prio_to_guest(xc->cppr);
281
282 /* Scan for actual interrupts */
283 hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
284
285 pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
286 hirq, xc->hw_cppr, xc->cppr);
287
288#ifdef XIVE_RUNTIME_CHECKS
289 /* That should never hit */
290 if (hirq & 0xff000000)
291 pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
292#endif
293
294 /*
295 * XXX We could check if the interrupt is masked here and
296 * filter it. If we chose to do so, we would need to do:
297 *
298 * if (masked) {
299 * lock();
300 * if (masked) {
301 * old_Q = true;
302 * hirq = 0;
303 * }
304 * unlock();
305 * }
306 */
307
308 /* Return interrupt and old CPPR in GPR4 */
309 vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
310
311 return H_SUCCESS;
312}
313
314X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
315{
316 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
317 u8 pending = xc->pending;
318 u32 hirq;
319 u8 pipr;
320
321 pr_devel("H_IPOLL(server=%ld)\n", server);
322
323 xc->GLUE(X_STAT_PFX,h_ipoll)++;
324
325 /* Grab the target VCPU if not the current one */
326 if (xc->server_num != server) {
327 vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
328 if (!vcpu)
329 return H_PARAMETER;
330 xc = vcpu->arch.xive_vcpu;
331
332 /* Scan all priorities */
333 pending = 0xff;
334 } else {
335 /* Grab pending interrupt if any */
336 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
337 if (pipr < 8)
338 pending |= 1 << pipr;
339 }
340
341 hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
342
343 /* Return interrupt and old CPPR in GPR4 */
344 vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
345
346 return H_SUCCESS;
347}
348
349static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
350{
351 u8 pending, prio;
352
353 pending = xc->pending;
354 if (xc->mfrr != 0xff) {
355 if (xc->mfrr < 8)
356 pending |= 1 << xc->mfrr;
357 else
358 pending |= 0x80;
359 }
360 if (!pending)
361 return;
362 prio = ffs(pending) - 1;
363
364 __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
365}
366
367X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
368{
369 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
370 u8 old_cppr;
371
372 pr_devel("H_CPPR(cppr=%ld)\n", cppr);
373
374 xc->GLUE(X_STAT_PFX,h_cppr)++;
375
376 /* Map CPPR */
377 cppr = xive_prio_from_guest(cppr);
378
379 /* Remember old and update SW state */
380 old_cppr = xc->cppr;
381 xc->cppr = cppr;
382
383 /*
384 * We are masking less, we need to look for pending things
385 * to deliver and set VP pending bits accordingly to trigger
386 * a new interrupt otherwise we might miss MFRR changes for
387 * which we have optimized out sending an IPI signal.
388 */
389 if (cppr > old_cppr)
390 GLUE(X_PFX,push_pending_to_hw)(xc);
391
392 /* Apply new CPPR */
393 xc->hw_cppr = cppr;
394 __x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
395
396 return H_SUCCESS;
397}
398
399X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
400{
401 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
402 struct kvmppc_xive_src_block *sb;
403 struct kvmppc_xive_irq_state *state;
404 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
405 struct xive_irq_data *xd;
406 u8 new_cppr = xirr >> 24;
407 u32 irq = xirr & 0x00ffffff, hw_num;
408 u16 src;
409 int rc = 0;
410
411 pr_devel("H_EOI(xirr=%08lx)\n", xirr);
412
413 xc->GLUE(X_STAT_PFX,h_eoi)++;
414
415 xc->cppr = xive_prio_from_guest(new_cppr);
416
417 /*
418 * IPIs are synthetized from MFRR and thus don't need
419 * any special EOI handling. The underlying interrupt
420 * used to signal MFRR changes is EOId when fetched from
421 * the queue.
422 */
423 if (irq == XICS_IPI || irq == 0)
424 goto bail;
425
426 /* Find interrupt source */
427 sb = kvmppc_xive_find_source(xive, irq, &src);
428 if (!sb) {
429 pr_devel(" source not found !\n");
430 rc = H_PARAMETER;
431 goto bail;
432 }
433 state = &sb->irq_state[src];
434 kvmppc_xive_select_irq(state, &hw_num, &xd);
435
436 state->in_eoi = true;
437 mb();
438
439again:
440 if (state->guest_priority == MASKED) {
441 arch_spin_lock(&sb->lock);
442 if (state->guest_priority != MASKED) {
443 arch_spin_unlock(&sb->lock);
444 goto again;
445 }
446 pr_devel(" EOI on saved P...\n");
447
448 /* Clear old_p, that will cause unmask to perform an EOI */
449 state->old_p = false;
450
451 arch_spin_unlock(&sb->lock);
452 } else {
453 pr_devel(" EOI on source...\n");
454
455 /* Perform EOI on the source */
456 GLUE(X_PFX,source_eoi)(hw_num, xd);
457
458 /* If it's an emulated LSI, check level and resend */
459 if (state->lsi && state->asserted)
460 __x_writeq(0, __x_trig_page(xd));
461
462 }
463
464 mb();
465 state->in_eoi = false;
466bail:
467
468 /* Re-evaluate pending IRQs and update HW */
469 GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
470 GLUE(X_PFX,push_pending_to_hw)(xc);
471 pr_devel(" after scan pending=%02x\n", xc->pending);
472
473 /* Apply new CPPR */
474 xc->hw_cppr = xc->cppr;
475 __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
476
477 return rc;
478}
479
480X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
481 unsigned long mfrr)
482{
483 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
484
485 pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
486
487 xc->GLUE(X_STAT_PFX,h_ipi)++;
488
489 /* Find target */
490 vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
491 if (!vcpu)
492 return H_PARAMETER;
493 xc = vcpu->arch.xive_vcpu;
494
495 /* Locklessly write over MFRR */
496 xc->mfrr = mfrr;
497
498 /* Shoot the IPI if most favored than target cppr */
499 if (mfrr < xc->cppr)
500 __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
501
502 return H_SUCCESS;
503}
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b90762..3f1be85a83bc 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -12,6 +12,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
12#endif 12#endif
13#ifdef CONFIG_KVM_XICS 13#ifdef CONFIG_KVM_XICS
14 ret = ret || (kvm->arch.xics != NULL); 14 ret = ret || (kvm->arch.xics != NULL);
15 ret = ret || (kvm->arch.xive != NULL);
15#endif 16#endif
16 smp_rmb(); 17 smp_rmb();
17 return ret; 18 return ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cf725c580fc5..e4b58f2e335e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,6 +38,8 @@
38#include <asm/irqflags.h> 38#include <asm/irqflags.h>
39#include <asm/iommu.h> 39#include <asm/iommu.h>
40#include <asm/switch_to.h> 40#include <asm/switch_to.h>
41#include <asm/xive.h>
42
41#include "timing.h" 43#include "timing.h"
42#include "irq.h" 44#include "irq.h"
43#include "../mm/mmu_decl.h" 45#include "../mm/mmu_decl.h"
@@ -697,7 +699,10 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
697 kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); 699 kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
698 break; 700 break;
699 case KVMPPC_IRQ_XICS: 701 case KVMPPC_IRQ_XICS:
700 kvmppc_xics_free_icp(vcpu); 702 if (xive_enabled())
703 kvmppc_xive_cleanup_vcpu(vcpu);
704 else
705 kvmppc_xics_free_icp(vcpu);
701 break; 706 break;
702 } 707 }
703 708
@@ -1522,8 +1527,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1522 1527
1523 r = -EPERM; 1528 r = -EPERM;
1524 dev = kvm_device_from_filp(f.file); 1529 dev = kvm_device_from_filp(f.file);
1525 if (dev) 1530 if (dev) {
1526 r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); 1531 if (xive_enabled())
1532 r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
1533 else
1534 r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
1535 }
1527 1536
1528 fdput(f); 1537 fdput(f);
1529 break; 1538 break;
@@ -1547,7 +1556,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
1547 return true; 1556 return true;
1548#endif 1557#endif
1549#ifdef CONFIG_KVM_XICS 1558#ifdef CONFIG_KVM_XICS
1550 if (kvm->arch.xics) 1559 if (kvm->arch.xics || kvm->arch.xive)
1551 return true; 1560 return true;
1552#endif 1561#endif
1553 return false; 1562 return false;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 99b0ae8acb78..9b25cded03e9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -373,6 +373,7 @@ config PPC_PERF_CTRS
373 373
374config SMP 374config SMP
375 depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x 375 depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x
376 select GENERIC_IRQ_MIGRATION
376 bool "Symmetric multi-processing support" 377 bool "Symmetric multi-processing support"
377 ---help--- 378 ---help---
378 This enables support for systems with more than one CPU. If you have 379 This enables support for systems with more than one CPU. If you have
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 3a07e4dcf97c..9689a6272995 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -4,6 +4,7 @@ config PPC_POWERNV
4 select PPC_NATIVE 4 select PPC_NATIVE
5 select PPC_XICS 5 select PPC_XICS
6 select PPC_ICP_NATIVE 6 select PPC_ICP_NATIVE
7 select PPC_XIVE_NATIVE
7 select PPC_P7_NAP 8 select PPC_P7_NAP
8 select PCI 9 select PCI
9 select PCI_MSI 10 select PCI_MSI
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index da8a0f7a035c..085605a73168 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -301,3 +301,18 @@ OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
301OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); 301OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
302OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); 302OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
303OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR); 303OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
304OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET);
305OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO);
306OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG);
307OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG);
308OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO);
309OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
310OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
311OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
312OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
313OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
314OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
315OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
316OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
317OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
318OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index e0f856bfbfe8..d71cd773d870 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -890,3 +890,4 @@ EXPORT_SYMBOL_GPL(opal_leds_set_ind);
890EXPORT_SYMBOL_GPL(opal_write_oppanel_async); 890EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
891/* Export this for KVM */ 891/* Export this for KVM */
892EXPORT_SYMBOL_GPL(opal_int_set_mfrr); 892EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
893EXPORT_SYMBOL_GPL(opal_int_eoi);
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 5dcbdea1afac..1a9d84371a4d 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -62,7 +62,7 @@ int powernv_get_random_real_mode(unsigned long *v)
62 62
63 rng = raw_cpu_read(powernv_rng); 63 rng = raw_cpu_read(powernv_rng);
64 64
65 *v = rng_whiten(rng, in_rm64(rng->regs_real)); 65 *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
66 66
67 return 1; 67 return 1;
68} 68}
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d50c7d99baaf..adceac978d18 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -32,6 +32,7 @@
32#include <asm/machdep.h> 32#include <asm/machdep.h>
33#include <asm/firmware.h> 33#include <asm/firmware.h>
34#include <asm/xics.h> 34#include <asm/xics.h>
35#include <asm/xive.h>
35#include <asm/opal.h> 36#include <asm/opal.h>
36#include <asm/kexec.h> 37#include <asm/kexec.h>
37#include <asm/smp.h> 38#include <asm/smp.h>
@@ -76,7 +77,9 @@ static void __init pnv_init(void)
76 77
77static void __init pnv_init_IRQ(void) 78static void __init pnv_init_IRQ(void)
78{ 79{
79 xics_init(); 80 /* Try using a XIVE if available, otherwise use a XICS */
81 if (!xive_native_init())
82 xics_init();
80 83
81 WARN_ON(!ppc_md.get_irq); 84 WARN_ON(!ppc_md.get_irq);
82} 85}
@@ -218,10 +221,12 @@ static void pnv_kexec_wait_secondaries_down(void)
218 221
219static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) 222static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
220{ 223{
221 xics_kexec_teardown_cpu(secondary); 224 if (xive_enabled())
225 xive_kexec_teardown_cpu(secondary);
226 else
227 xics_kexec_teardown_cpu(secondary);
222 228
223 /* On OPAL, we return all CPUs to firmware */ 229 /* On OPAL, we return all CPUs to firmware */
224
225 if (!firmware_has_feature(FW_FEATURE_OPAL)) 230 if (!firmware_has_feature(FW_FEATURE_OPAL))
226 return; 231 return;
227 232
@@ -237,6 +242,10 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
237 /* Primary waits for the secondaries to have reached OPAL */ 242 /* Primary waits for the secondaries to have reached OPAL */
238 pnv_kexec_wait_secondaries_down(); 243 pnv_kexec_wait_secondaries_down();
239 244
245 /* Switch XIVE back to emulation mode */
246 if (xive_enabled())
247 xive_shutdown();
248
240 /* 249 /*
241 * We might be running as little-endian - now that interrupts 250 * We might be running as little-endian - now that interrupts
242 * are disabled, reset the HILE bit to big-endian so we don't 251 * are disabled, reset the HILE bit to big-endian so we don't
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 8b67e1eefb5c..f57195588c6c 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -29,6 +29,7 @@
29#include <asm/vdso_datapage.h> 29#include <asm/vdso_datapage.h>
30#include <asm/cputhreads.h> 30#include <asm/cputhreads.h>
31#include <asm/xics.h> 31#include <asm/xics.h>
32#include <asm/xive.h>
32#include <asm/opal.h> 33#include <asm/opal.h>
33#include <asm/runlatch.h> 34#include <asm/runlatch.h>
34#include <asm/code-patching.h> 35#include <asm/code-patching.h>
@@ -47,7 +48,9 @@
47 48
48static void pnv_smp_setup_cpu(int cpu) 49static void pnv_smp_setup_cpu(int cpu)
49{ 50{
50 if (cpu != boot_cpuid) 51 if (xive_enabled())
52 xive_smp_setup_cpu();
53 else if (cpu != boot_cpuid)
51 xics_setup_cpu(); 54 xics_setup_cpu();
52 55
53#ifdef CONFIG_PPC_DOORBELL 56#ifdef CONFIG_PPC_DOORBELL
@@ -132,7 +135,10 @@ static int pnv_smp_cpu_disable(void)
132 vdso_data->processorCount--; 135 vdso_data->processorCount--;
133 if (cpu == boot_cpuid) 136 if (cpu == boot_cpuid)
134 boot_cpuid = cpumask_any(cpu_online_mask); 137 boot_cpuid = cpumask_any(cpu_online_mask);
135 xics_migrate_irqs_away(); 138 if (xive_enabled())
139 xive_smp_disable_cpu();
140 else
141 xics_migrate_irqs_away();
136 return 0; 142 return 0;
137} 143}
138 144
@@ -213,9 +219,12 @@ static void pnv_smp_cpu_kill_self(void)
213 if (((srr1 & wmask) == SRR1_WAKEEE) || 219 if (((srr1 & wmask) == SRR1_WAKEEE) ||
214 ((srr1 & wmask) == SRR1_WAKEHVI) || 220 ((srr1 & wmask) == SRR1_WAKEHVI) ||
215 (local_paca->irq_happened & PACA_IRQ_EE)) { 221 (local_paca->irq_happened & PACA_IRQ_EE)) {
216 if (cpu_has_feature(CPU_FTR_ARCH_300)) 222 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
217 icp_opal_flush_interrupt(); 223 if (xive_enabled())
218 else 224 xive_flush_interrupt();
225 else
226 icp_opal_flush_interrupt();
227 } else
219 icp_native_flush_interrupt(); 228 icp_native_flush_interrupt();
220 } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { 229 } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
221 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); 230 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
@@ -252,10 +261,26 @@ static int pnv_cpu_bootable(unsigned int nr)
252 return smp_generic_cpu_bootable(nr); 261 return smp_generic_cpu_bootable(nr);
253} 262}
254 263
264static int pnv_smp_prepare_cpu(int cpu)
265{
266 if (xive_enabled())
267 return xive_smp_prepare_cpu(cpu);
268 return 0;
269}
270
271static void __init pnv_smp_probe(void)
272{
273 if (xive_enabled())
274 xive_smp_probe();
275 else
276 xics_smp_probe();
277}
278
255static struct smp_ops_t pnv_smp_ops = { 279static struct smp_ops_t pnv_smp_ops = {
256 .message_pass = smp_muxed_ipi_message_pass, 280 .message_pass = smp_muxed_ipi_message_pass,
257 .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ 281 .cause_ipi = NULL, /* Filled at runtime by xi{cs,ve}_smp_probe() */
258 .probe = xics_smp_probe, 282 .probe = pnv_smp_probe,
283 .prepare_cpu = pnv_smp_prepare_cpu,
259 .kick_cpu = pnv_smp_kick_cpu, 284 .kick_cpu = pnv_smp_kick_cpu,
260 .setup_cpu = pnv_smp_setup_cpu, 285 .setup_cpu = pnv_smp_setup_cpu,
261 .cpu_bootable = pnv_cpu_bootable, 286 .cpu_bootable = pnv_cpu_bootable,
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 52dc165c0efb..caf882e749dc 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -28,6 +28,7 @@ config PPC_MSI_BITMAP
28 default y if PPC_POWERNV 28 default y if PPC_POWERNV
29 29
30source "arch/powerpc/sysdev/xics/Kconfig" 30source "arch/powerpc/sysdev/xics/Kconfig"
31source "arch/powerpc/sysdev/xive/Kconfig"
31 32
32config PPC_SCOM 33config PPC_SCOM
33 bool 34 bool
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index a254824719f1..c0ae11d4f62f 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -71,5 +71,6 @@ obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
71subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror 71subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
72 72
73obj-$(CONFIG_PPC_XICS) += xics/ 73obj-$(CONFIG_PPC_XICS) += xics/
74obj-$(CONFIG_PPC_XIVE) += xive/
74 75
75obj-$(CONFIG_GE_FPGA) += ge/ 76obj-$(CONFIG_GE_FPGA) += ge/
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 8a6a043e239b..f0f3f47a3fc9 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -168,15 +168,15 @@ void icp_native_cause_ipi_rm(int cpu)
168 * Need the physical address of the XICS to be 168 * Need the physical address of the XICS to be
169 * previously saved in kvm_hstate in the paca. 169 * previously saved in kvm_hstate in the paca.
170 */ 170 */
171 unsigned long xics_phys; 171 void __iomem *xics_phys;
172 172
173 /* 173 /*
174 * Just like the cause_ipi functions, it is required to 174 * Just like the cause_ipi functions, it is required to
175 * include a full barrier (out8 includes a sync) before 175 * include a full barrier before causing the IPI.
176 * causing the IPI.
177 */ 176 */
178 xics_phys = paca[cpu].kvm_hstate.xics_phys; 177 xics_phys = paca[cpu].kvm_hstate.xics_phys;
179 out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY); 178 mb();
179 __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
180} 180}
181#endif 181#endif
182 182
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
new file mode 100644
index 000000000000..12ccd7373d2f
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -0,0 +1,11 @@
1config PPC_XIVE
2 bool
3 default n
4 select PPC_SMP_MUXED_IPI
5 select HARDIRQS_SW_RESEND
6
7config PPC_XIVE_NATIVE
8 bool
9 default n
10 select PPC_XIVE
11 depends on PPC_POWERNV
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile
new file mode 100644
index 000000000000..3fab303fc169
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -0,0 +1,4 @@
1subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
2
3obj-y += common.o
4obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
new file mode 100644
index 000000000000..496036c93531
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -0,0 +1,1432 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "xive: " fmt
11
12#include <linux/types.h>
13#include <linux/threads.h>
14#include <linux/kernel.h>
15#include <linux/irq.h>
16#include <linux/debugfs.h>
17#include <linux/smp.h>
18#include <linux/interrupt.h>
19#include <linux/seq_file.h>
20#include <linux/init.h>
21#include <linux/cpu.h>
22#include <linux/of.h>
23#include <linux/slab.h>
24#include <linux/spinlock.h>
25#include <linux/msi.h>
26
27#include <asm/prom.h>
28#include <asm/io.h>
29#include <asm/smp.h>
30#include <asm/machdep.h>
31#include <asm/irq.h>
32#include <asm/errno.h>
33#include <asm/xive.h>
34#include <asm/xive-regs.h>
35#include <asm/xmon.h>
36
37#include "xive-internal.h"
38
39#undef DEBUG_FLUSH
40#undef DEBUG_ALL
41
42#ifdef DEBUG_ALL
43#define DBG_VERBOSE(fmt...) pr_devel(fmt)
44#else
45#define DBG_VERBOSE(fmt...) do { } while(0)
46#endif
47
48bool __xive_enabled;
49EXPORT_SYMBOL_GPL(__xive_enabled);
50bool xive_cmdline_disabled;
51
52/* We use only one priority for now */
53static u8 xive_irq_priority;
54
55/* TIMA exported to KVM */
56void __iomem *xive_tima;
57EXPORT_SYMBOL_GPL(xive_tima);
58u32 xive_tima_offset;
59
60/* Backend ops */
61static const struct xive_ops *xive_ops;
62
63/* Our global interrupt domain */
64static struct irq_domain *xive_irq_domain;
65
66#ifdef CONFIG_SMP
67/* The IPIs all use the same logical irq number */
68static u32 xive_ipi_irq;
69#endif
70
71/* Xive state for each CPU */
72static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
73
74/*
75 * A "disabled" interrupt should never fire, to catch problems
76 * we set its logical number to this
77 */
78#define XIVE_BAD_IRQ 0x7fffffff
79#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1)
80
81/* An invalid CPU target */
82#define XIVE_INVALID_TARGET (-1)
83
84/*
85 * Read the next entry in a queue, return its content if it's valid
86 * or 0 if there is no new entry.
87 *
88 * The queue pointer is moved forward unless "just_peek" is set
89 */
90static u32 xive_read_eq(struct xive_q *q, bool just_peek)
91{
92 u32 cur;
93
94 if (!q->qpage)
95 return 0;
96 cur = be32_to_cpup(q->qpage + q->idx);
97
98 /* Check valid bit (31) vs current toggle polarity */
99 if ((cur >> 31) == q->toggle)
100 return 0;
101
102 /* If consuming from the queue ... */
103 if (!just_peek) {
104 /* Next entry */
105 q->idx = (q->idx + 1) & q->msk;
106
107 /* Wrap around: flip valid toggle */
108 if (q->idx == 0)
109 q->toggle ^= 1;
110 }
111 /* Mask out the valid bit (31) */
112 return cur & 0x7fffffff;
113}
114
115/*
116 * Scans all the queue that may have interrupts in them
117 * (based on "pending_prio") in priority order until an
118 * interrupt is found or all the queues are empty.
119 *
120 * Then updates the CPPR (Current Processor Priority
121 * Register) based on the most favored interrupt found
122 * (0xff if none) and return what was found (0 if none).
123 *
124 * If just_peek is set, return the most favored pending
125 * interrupt if any but don't update the queue pointers.
126 *
127 * Note: This function can operate generically on any number
128 * of queues (up to 8). The current implementation of the XIVE
129 * driver only uses a single queue however.
130 *
131 * Note2: This will also "flush" "the pending_count" of a queue
132 * into the "count" when that queue is observed to be empty.
133 * This is used to keep track of the amount of interrupts
134 * targetting a queue. When an interrupt is moved away from
135 * a queue, we only decrement that queue count once the queue
136 * has been observed empty to avoid races.
137 */
138static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
139{
140 u32 irq = 0;
141 u8 prio;
142
143 /* Find highest pending priority */
144 while (xc->pending_prio != 0) {
145 struct xive_q *q;
146
147 prio = ffs(xc->pending_prio) - 1;
148 DBG_VERBOSE("scan_irq: trying prio %d\n", prio);
149
150 /* Try to fetch */
151 irq = xive_read_eq(&xc->queue[prio], just_peek);
152
153 /* Found something ? That's it */
154 if (irq)
155 break;
156
157 /* Clear pending bits */
158 xc->pending_prio &= ~(1 << prio);
159
160 /*
161 * Check if the queue count needs adjusting due to
162 * interrupts being moved away. See description of
163 * xive_dec_target_count()
164 */
165 q = &xc->queue[prio];
166 if (atomic_read(&q->pending_count)) {
167 int p = atomic_xchg(&q->pending_count, 0);
168 if (p) {
169 WARN_ON(p > atomic_read(&q->count));
170 atomic_sub(p, &q->count);
171 }
172 }
173 }
174
175 /* If nothing was found, set CPPR to 0xff */
176 if (irq == 0)
177 prio = 0xff;
178
179 /* Update HW CPPR to match if necessary */
180 if (prio != xc->cppr) {
181 DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio);
182 xc->cppr = prio;
183 out_8(xive_tima + xive_tima_offset + TM_CPPR, prio);
184 }
185
186 return irq;
187}
188
189/*
190 * This is used to perform the magic loads from an ESB
191 * described in xive.h
192 */
193static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset)
194{
195 u64 val;
196
197 /* Handle HW errata */
198 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
199 offset |= offset << 4;
200
201 val = in_be64(xd->eoi_mmio + offset);
202
203 return (u8)val;
204}
205
206#ifdef CONFIG_XMON
207static void xive_dump_eq(const char *name, struct xive_q *q)
208{
209 u32 i0, i1, idx;
210
211 if (!q->qpage)
212 return;
213 idx = q->idx;
214 i0 = be32_to_cpup(q->qpage + idx);
215 idx = (idx + 1) & q->msk;
216 i1 = be32_to_cpup(q->qpage + idx);
217 xmon_printf(" %s Q T=%d %08x %08x ...\n", name,
218 q->toggle, i0, i1);
219}
220
221void xmon_xive_do_dump(int cpu)
222{
223 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
224
225 xmon_printf("XIVE state for CPU %d:\n", cpu);
226 xmon_printf(" pp=%02x cppr=%02x\n", xc->pending_prio, xc->cppr);
227 xive_dump_eq("IRQ", &xc->queue[xive_irq_priority]);
228#ifdef CONFIG_SMP
229 {
230 u64 val = xive_poke_esb(&xc->ipi_data, XIVE_ESB_GET);
231 xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi,
232 val & XIVE_ESB_VAL_P ? 'P' : 'p',
233 val & XIVE_ESB_VAL_P ? 'Q' : 'q');
234 }
235#endif
236}
237#endif /* CONFIG_XMON */
238
239static unsigned int xive_get_irq(void)
240{
241 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
242 u32 irq;
243
244 /*
245 * This can be called either as a result of a HW interrupt or
246 * as a "replay" because EOI decided there was still something
247 * in one of the queues.
248 *
249 * First we perform an ACK cycle in order to update our mask
250 * of pending priorities. This will also have the effect of
251 * updating the CPPR to the most favored pending interrupts.
252 *
253 * In the future, if we have a way to differenciate a first
254 * entry (on HW interrupt) from a replay triggered by EOI,
255 * we could skip this on replays unless we soft-mask tells us
256 * that a new HW interrupt occurred.
257 */
258 xive_ops->update_pending(xc);
259
260 DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio);
261
262 /* Scan our queue(s) for interrupts */
263 irq = xive_scan_interrupts(xc, false);
264
265 DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n",
266 irq, xc->pending_prio);
267
268 /* Return pending interrupt if any */
269 if (irq == XIVE_BAD_IRQ)
270 return 0;
271 return irq;
272}
273
274/*
275 * After EOI'ing an interrupt, we need to re-check the queue
276 * to see if another interrupt is pending since multiple
277 * interrupts can coalesce into a single notification to the
278 * CPU.
279 *
280 * If we find that there is indeed more in there, we call
281 * force_external_irq_replay() to make Linux synthetize an
282 * external interrupt on the next call to local_irq_restore().
283 */
284static void xive_do_queue_eoi(struct xive_cpu *xc)
285{
286 if (xive_scan_interrupts(xc, true) != 0) {
287 DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio);
288 force_external_irq_replay();
289 }
290}
291
292/*
293 * EOI an interrupt at the source. There are several methods
294 * to do this depending on the HW version and source type
295 */
296void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
297{
298 /* If the XIVE supports the new "store EOI facility, use it */
299 if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
300 out_be64(xd->eoi_mmio, 0);
301 else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
302 /*
303 * The FW told us to call it. This happens for some
304 * interrupt sources that need additional HW whacking
305 * beyond the ESB manipulation. For example LPC interrupts
306 * on P9 DD1.0 need a latch to be clared in the LPC bridge
307 * itself. The Firmware will take care of it.
308 */
309 if (WARN_ON_ONCE(!xive_ops->eoi))
310 return;
311 xive_ops->eoi(hw_irq);
312 } else {
313 u8 eoi_val;
314
315 /*
316 * Otherwise for EOI, we use the special MMIO that does
317 * a clear of both P and Q and returns the old Q,
318 * except for LSIs where we use the "EOI cycle" special
319 * load.
320 *
321 * This allows us to then do a re-trigger if Q was set
322 * rather than synthesizing an interrupt in software
323 *
324 * For LSIs, using the HW EOI cycle works around a problem
325 * on P9 DD1 PHBs where the other ESB accesses don't work
326 * properly.
327 */
328 if (xd->flags & XIVE_IRQ_FLAG_LSI)
329 in_be64(xd->eoi_mmio);
330 else {
331 eoi_val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
332 DBG_VERBOSE("eoi_val=%x\n", offset, eoi_val);
333
334 /* Re-trigger if needed */
335 if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
336 out_be64(xd->trig_mmio, 0);
337 }
338 }
339}
340
341/* irq_chip eoi callback */
342static void xive_irq_eoi(struct irq_data *d)
343{
344 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
345 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
346
347 DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
348 d->irq, irqd_to_hwirq(d), xc->pending_prio);
349
350 /*
351 * EOI the source if it hasn't been disabled and hasn't
352 * been passed-through to a KVM guest
353 */
354 if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
355 xive_do_source_eoi(irqd_to_hwirq(d), xd);
356
357 /*
358 * Clear saved_p to indicate that it's no longer occupying
359 * a queue slot on the target queue
360 */
361 xd->saved_p = false;
362
363 /* Check for more work in the queue */
364 xive_do_queue_eoi(xc);
365}
366
367/*
368 * Helper used to mask and unmask an interrupt source. This
369 * is only called for normal interrupts that do not require
370 * masking/unmasking via firmware.
371 */
372static void xive_do_source_set_mask(struct xive_irq_data *xd,
373 bool mask)
374{
375 u64 val;
376
377 /*
378 * If the interrupt had P set, it may be in a queue.
379 *
380 * We need to make sure we don't re-enable it until it
381 * has been fetched from that queue and EOId. We keep
382 * a copy of that P state and use it to restore the
383 * ESB accordingly on unmask.
384 */
385 if (mask) {
386 val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_01);
387 xd->saved_p = !!(val & XIVE_ESB_VAL_P);
388 } else if (xd->saved_p)
389 xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
390 else
391 xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
392}
393
394/*
395 * Try to chose "cpu" as a new interrupt target. Increments
396 * the queue accounting for that target if it's not already
397 * full.
398 */
399static bool xive_try_pick_target(int cpu)
400{
401 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
402 struct xive_q *q = &xc->queue[xive_irq_priority];
403 int max;
404
405 /*
406 * Calculate max number of interrupts in that queue.
407 *
408 * We leave a gap of 1 just in case...
409 */
410 max = (q->msk + 1) - 1;
411 return !!atomic_add_unless(&q->count, 1, max);
412}
413
414/*
415 * Un-account an interrupt for a target CPU. We don't directly
416 * decrement q->count since the interrupt might still be present
417 * in the queue.
418 *
419 * Instead increment a separate counter "pending_count" which
420 * will be substracted from "count" later when that CPU observes
421 * the queue to be empty.
422 */
423static void xive_dec_target_count(int cpu)
424{
425 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
426 struct xive_q *q = &xc->queue[xive_irq_priority];
427
428 if (unlikely(WARN_ON(cpu < 0 || !xc))) {
429 pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc);
430 return;
431 }
432
433 /*
434 * We increment the "pending count" which will be used
435 * to decrement the target queue count whenever it's next
436 * processed and found empty. This ensure that we don't
437 * decrement while we still have the interrupt there
438 * occupying a slot.
439 */
440 atomic_inc(&q->pending_count);
441}
442
443/* Find a tentative CPU target in a CPU mask */
444static int xive_find_target_in_mask(const struct cpumask *mask,
445 unsigned int fuzz)
446{
447 int cpu, first, num, i;
448
449 /* Pick up a starting point CPU in the mask based on fuzz */
450 num = cpumask_weight(mask);
451 first = fuzz % num;
452
453 /* Locate it */
454 cpu = cpumask_first(mask);
455 for (i = 0; i < first && cpu < nr_cpu_ids; i++)
456 cpu = cpumask_next(cpu, mask);
457
458 /* Sanity check */
459 if (WARN_ON(cpu >= nr_cpu_ids))
460 cpu = cpumask_first(cpu_online_mask);
461
462 /* Remember first one to handle wrap-around */
463 first = cpu;
464
465 /*
466 * Now go through the entire mask until we find a valid
467 * target.
468 */
469 for (;;) {
470 /*
471 * We re-check online as the fallback case passes us
472 * an untested affinity mask
473 */
474 if (cpu_online(cpu) && xive_try_pick_target(cpu))
475 return cpu;
476 cpu = cpumask_next(cpu, mask);
477 if (cpu == first)
478 break;
479 /* Wrap around */
480 if (cpu >= nr_cpu_ids)
481 cpu = cpumask_first(mask);
482 }
483 return -1;
484}
485
486/*
487 * Pick a target CPU for an interrupt. This is done at
488 * startup or if the affinity is changed in a way that
489 * invalidates the current target.
490 */
491static int xive_pick_irq_target(struct irq_data *d,
492 const struct cpumask *affinity)
493{
494 static unsigned int fuzz;
495 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
496 cpumask_var_t mask;
497 int cpu = -1;
498
499 /*
500 * If we have chip IDs, first we try to build a mask of
501 * CPUs matching the CPU and find a target in there
502 */
503 if (xd->src_chip != XIVE_INVALID_CHIP_ID &&
504 zalloc_cpumask_var(&mask, GFP_ATOMIC)) {
505 /* Build a mask of matching chip IDs */
506 for_each_cpu_and(cpu, affinity, cpu_online_mask) {
507 struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
508 if (xc->chip_id == xd->src_chip)
509 cpumask_set_cpu(cpu, mask);
510 }
511 /* Try to find a target */
512 if (cpumask_empty(mask))
513 cpu = -1;
514 else
515 cpu = xive_find_target_in_mask(mask, fuzz++);
516 free_cpumask_var(mask);
517 if (cpu >= 0)
518 return cpu;
519 fuzz--;
520 }
521
522 /* No chip IDs, fallback to using the affinity mask */
523 return xive_find_target_in_mask(affinity, fuzz++);
524}
525
526static unsigned int xive_irq_startup(struct irq_data *d)
527{
528 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
529 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
530 int target, rc;
531
532 pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
533 d->irq, hw_irq, d);
534
535#ifdef CONFIG_PCI_MSI
536 /*
537 * The generic MSI code returns with the interrupt disabled on the
538 * card, using the MSI mask bits. Firmware doesn't appear to unmask
539 * at that level, so we do it here by hand.
540 */
541 if (irq_data_get_msi_desc(d))
542 pci_msi_unmask_irq(d);
543#endif
544
545 /* Pick a target */
546 target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
547 if (target == XIVE_INVALID_TARGET) {
548 /* Try again breaking affinity */
549 target = xive_pick_irq_target(d, cpu_online_mask);
550 if (target == XIVE_INVALID_TARGET)
551 return -ENXIO;
552 pr_warn("irq %d started with broken affinity\n", d->irq);
553 }
554
555 /* Sanity check */
556 if (WARN_ON(target == XIVE_INVALID_TARGET ||
557 target >= nr_cpu_ids))
558 target = smp_processor_id();
559
560 xd->target = target;
561
562 /*
563 * Configure the logical number to be the Linux IRQ number
564 * and set the target queue
565 */
566 rc = xive_ops->configure_irq(hw_irq,
567 get_hard_smp_processor_id(target),
568 xive_irq_priority, d->irq);
569 if (rc)
570 return rc;
571
572 /* Unmask the ESB */
573 xive_do_source_set_mask(xd, false);
574
575 return 0;
576}
577
578static void xive_irq_shutdown(struct irq_data *d)
579{
580 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
581 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
582
583 pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n",
584 d->irq, hw_irq, d);
585
586 if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
587 return;
588
589 /* Mask the interrupt at the source */
590 xive_do_source_set_mask(xd, true);
591
592 /*
593 * The above may have set saved_p. We clear it otherwise it
594 * will prevent re-enabling later on. It is ok to forget the
595 * fact that the interrupt might be in a queue because we are
596 * accounting that already in xive_dec_target_count() and will
597 * be re-routing it to a new queue with proper accounting when
598 * it's started up again
599 */
600 xd->saved_p = false;
601
602 /*
603 * Mask the interrupt in HW in the IVT/EAS and set the number
604 * to be the "bad" IRQ number
605 */
606 xive_ops->configure_irq(hw_irq,
607 get_hard_smp_processor_id(xd->target),
608 0xff, XIVE_BAD_IRQ);
609
610 xive_dec_target_count(xd->target);
611 xd->target = XIVE_INVALID_TARGET;
612}
613
614static void xive_irq_unmask(struct irq_data *d)
615{
616 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
617
618 pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd);
619
620 /*
621 * This is a workaround for PCI LSI problems on P9, for
622 * these, we call FW to set the mask. The problems might
623 * be fixed by P9 DD2.0, if that is the case, firmware
624 * will no longer set that flag.
625 */
626 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) {
627 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
628 xive_ops->configure_irq(hw_irq,
629 get_hard_smp_processor_id(xd->target),
630 xive_irq_priority, d->irq);
631 return;
632 }
633
634 xive_do_source_set_mask(xd, false);
635}
636
637static void xive_irq_mask(struct irq_data *d)
638{
639 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
640
641 pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd);
642
643 /*
644 * This is a workaround for PCI LSI problems on P9, for
645 * these, we call OPAL to set the mask. The problems might
646 * be fixed by P9 DD2.0, if that is the case, firmware
647 * will no longer set that flag.
648 */
649 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) {
650 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
651 xive_ops->configure_irq(hw_irq,
652 get_hard_smp_processor_id(xd->target),
653 0xff, d->irq);
654 return;
655 }
656
657 xive_do_source_set_mask(xd, true);
658}
659
660static int xive_irq_set_affinity(struct irq_data *d,
661 const struct cpumask *cpumask,
662 bool force)
663{
664 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
665 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
666 u32 target, old_target;
667 int rc = 0;
668
669 pr_devel("xive_irq_set_affinity: irq %d\n", d->irq);
670
671 /* Is this valid ? */
672 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
673 return -EINVAL;
674
675 /*
676 * If existing target is already in the new mask, and is
677 * online then do nothing.
678 */
679 if (xd->target != XIVE_INVALID_TARGET &&
680 cpu_online(xd->target) &&
681 cpumask_test_cpu(xd->target, cpumask))
682 return IRQ_SET_MASK_OK;
683
684 /* Pick a new target */
685 target = xive_pick_irq_target(d, cpumask);
686
687 /* No target found */
688 if (target == XIVE_INVALID_TARGET)
689 return -ENXIO;
690
691 /* Sanity check */
692 if (WARN_ON(target >= nr_cpu_ids))
693 target = smp_processor_id();
694
695 old_target = xd->target;
696
697 /*
698 * Only configure the irq if it's not currently passed-through to
699 * a KVM guest
700 */
701 if (!irqd_is_forwarded_to_vcpu(d))
702 rc = xive_ops->configure_irq(hw_irq,
703 get_hard_smp_processor_id(target),
704 xive_irq_priority, d->irq);
705 if (rc < 0) {
706 pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
707 return rc;
708 }
709
710 pr_devel(" target: 0x%x\n", target);
711 xd->target = target;
712
713 /* Give up previous target */
714 if (old_target != XIVE_INVALID_TARGET)
715 xive_dec_target_count(old_target);
716
717 return IRQ_SET_MASK_OK;
718}
719
720static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type)
721{
722 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
723
724 /*
725 * We only support these. This has really no effect other than setting
726 * the corresponding descriptor bits mind you but those will in turn
727 * affect the resend function when re-enabling an edge interrupt.
728 *
729 * Set set the default to edge as explained in map().
730 */
731 if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
732 flow_type = IRQ_TYPE_EDGE_RISING;
733
734 if (flow_type != IRQ_TYPE_EDGE_RISING &&
735 flow_type != IRQ_TYPE_LEVEL_LOW)
736 return -EINVAL;
737
738 irqd_set_trigger_type(d, flow_type);
739
740 /*
741 * Double check it matches what the FW thinks
742 *
743 * NOTE: We don't know yet if the PAPR interface will provide
744 * the LSI vs MSI information apart from the device-tree so
745 * this check might have to move into an optional backend call
746 * that is specific to the native backend
747 */
748 if ((flow_type == IRQ_TYPE_LEVEL_LOW) !=
749 !!(xd->flags & XIVE_IRQ_FLAG_LSI)) {
750 pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n",
751 d->irq, (u32)irqd_to_hwirq(d),
752 (flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge",
753 (xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge");
754 }
755
756 return IRQ_SET_MASK_OK_NOCOPY;
757}
758
759static int xive_irq_retrigger(struct irq_data *d)
760{
761 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
762
763 /* This should be only for MSIs */
764 if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
765 return 0;
766
767 /*
768 * To perform a retrigger, we first set the PQ bits to
769 * 11, then perform an EOI.
770 */
771 xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
772
773 /*
774 * Note: We pass "0" to the hw_irq argument in order to
775 * avoid calling into the backend EOI code which we don't
776 * want to do in the case of a re-trigger. Backends typically
777 * only do EOI for LSIs anyway.
778 */
779 xive_do_source_eoi(0, xd);
780
781 return 1;
782}
783
784static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
785{
786 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
787 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
788 int rc;
789 u8 pq;
790
791 /*
792 * We only support this on interrupts that do not require
793 * firmware calls for masking and unmasking
794 */
795 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW)
796 return -EIO;
797
798 /*
799 * This is called by KVM with state non-NULL for enabling
800 * pass-through or NULL for disabling it
801 */
802 if (state) {
803 irqd_set_forwarded_to_vcpu(d);
804
805 /* Set it to PQ=10 state to prevent further sends */
806 pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
807
808 /* No target ? nothing to do */
809 if (xd->target == XIVE_INVALID_TARGET) {
810 /*
811 * An untargetted interrupt should have been
812 * also masked at the source
813 */
814 WARN_ON(pq & 2);
815
816 return 0;
817 }
818
819 /*
820 * If P was set, adjust state to PQ=11 to indicate
821 * that a resend is needed for the interrupt to reach
822 * the guest. Also remember the value of P.
823 *
824 * This also tells us that it's in flight to a host queue
825 * or has already been fetched but hasn't been EOIed yet
826 * by the host. This it's potentially using up a host
827 * queue slot. This is important to know because as long
828 * as this is the case, we must not hard-unmask it when
829 * "returning" that interrupt to the host.
830 *
831 * This saved_p is cleared by the host EOI, when we know
832 * for sure the queue slot is no longer in use.
833 */
834 if (pq & 2) {
835 pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
836 xd->saved_p = true;
837
838 /*
839 * Sync the XIVE source HW to ensure the interrupt
840 * has gone through the EAS before we change its
841 * target to the guest. That should guarantee us
842 * that we *will* eventually get an EOI for it on
843 * the host. Otherwise there would be a small window
844 * for P to be seen here but the interrupt going
845 * to the guest queue.
846 */
847 if (xive_ops->sync_source)
848 xive_ops->sync_source(hw_irq);
849 } else
850 xd->saved_p = false;
851 } else {
852 irqd_clr_forwarded_to_vcpu(d);
853
854 /* No host target ? hard mask and return */
855 if (xd->target == XIVE_INVALID_TARGET) {
856 xive_do_source_set_mask(xd, true);
857 return 0;
858 }
859
860 /*
861 * Sync the XIVE source HW to ensure the interrupt
862 * has gone through the EAS before we change its
863 * target to the host.
864 */
865 if (xive_ops->sync_source)
866 xive_ops->sync_source(hw_irq);
867
868 /*
869 * By convention we are called with the interrupt in
870 * a PQ=10 or PQ=11 state, ie, it won't fire and will
871 * have latched in Q whether there's a pending HW
872 * interrupt or not.
873 *
874 * First reconfigure the target.
875 */
876 rc = xive_ops->configure_irq(hw_irq,
877 get_hard_smp_processor_id(xd->target),
878 xive_irq_priority, d->irq);
879 if (rc)
880 return rc;
881
882 /*
883 * Then if saved_p is not set, effectively re-enable the
884 * interrupt with an EOI. If it is set, we know there is
885 * still a message in a host queue somewhere that will be
886 * EOId eventually.
887 *
888 * Note: We don't check irqd_irq_disabled(). Effectively,
889 * we *will* let the irq get through even if masked if the
890 * HW is still firing it in order to deal with the whole
891 * saved_p business properly. If the interrupt triggers
892 * while masked, the generic code will re-mask it anyway.
893 */
894 if (!xd->saved_p)
895 xive_do_source_eoi(hw_irq, xd);
896
897 }
898 return 0;
899}
900
901static struct irq_chip xive_irq_chip = {
902 .name = "XIVE-IRQ",
903 .irq_startup = xive_irq_startup,
904 .irq_shutdown = xive_irq_shutdown,
905 .irq_eoi = xive_irq_eoi,
906 .irq_mask = xive_irq_mask,
907 .irq_unmask = xive_irq_unmask,
908 .irq_set_affinity = xive_irq_set_affinity,
909 .irq_set_type = xive_irq_set_type,
910 .irq_retrigger = xive_irq_retrigger,
911 .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
912};
913
914bool is_xive_irq(struct irq_chip *chip)
915{
916 return chip == &xive_irq_chip;
917}
918EXPORT_SYMBOL_GPL(is_xive_irq);
919
920void xive_cleanup_irq_data(struct xive_irq_data *xd)
921{
922 if (xd->eoi_mmio) {
923 iounmap(xd->eoi_mmio);
924 if (xd->eoi_mmio == xd->trig_mmio)
925 xd->trig_mmio = NULL;
926 xd->eoi_mmio = NULL;
927 }
928 if (xd->trig_mmio) {
929 iounmap(xd->trig_mmio);
930 xd->trig_mmio = NULL;
931 }
932}
933EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
934
935static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
936{
937 struct xive_irq_data *xd;
938 int rc;
939
940 xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL);
941 if (!xd)
942 return -ENOMEM;
943 rc = xive_ops->populate_irq_data(hw, xd);
944 if (rc) {
945 kfree(xd);
946 return rc;
947 }
948 xd->target = XIVE_INVALID_TARGET;
949 irq_set_handler_data(virq, xd);
950
951 return 0;
952}
953
954static void xive_irq_free_data(unsigned int virq)
955{
956 struct xive_irq_data *xd = irq_get_handler_data(virq);
957
958 if (!xd)
959 return;
960 irq_set_handler_data(virq, NULL);
961 xive_cleanup_irq_data(xd);
962 kfree(xd);
963}
964
965#ifdef CONFIG_SMP
966
967static void xive_cause_ipi(int cpu, unsigned long msg)
968{
969 struct xive_cpu *xc;
970 struct xive_irq_data *xd;
971
972 xc = per_cpu(xive_cpu, cpu);
973
974 DBG_VERBOSE("IPI msg#%ld CPU %d -> %d (HW IRQ 0x%x)\n",
975 msg, smp_processor_id(), cpu, xc->hw_ipi);
976
977 xd = &xc->ipi_data;
978 if (WARN_ON(!xd->trig_mmio))
979 return;
980 out_be64(xd->trig_mmio, 0);
981}
982
983static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id)
984{
985 return smp_ipi_demux();
986}
987
988static void xive_ipi_eoi(struct irq_data *d)
989{
990 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
991
992 /* Handle possible race with unplug and drop stale IPIs */
993 if (!xc)
994 return;
995 xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data);
996 xive_do_queue_eoi(xc);
997}
998
999static void xive_ipi_do_nothing(struct irq_data *d)
1000{
1001 /*
1002 * Nothing to do, we never mask/unmask IPIs, but the callback
1003 * has to exist for the struct irq_chip.
1004 */
1005}
1006
1007static struct irq_chip xive_ipi_chip = {
1008 .name = "XIVE-IPI",
1009 .irq_eoi = xive_ipi_eoi,
1010 .irq_mask = xive_ipi_do_nothing,
1011 .irq_unmask = xive_ipi_do_nothing,
1012};
1013
1014static void __init xive_request_ipi(void)
1015{
1016 unsigned int virq;
1017
1018 /*
1019 * Initialization failed, move on, we might manage to
1020 * reach the point where we display our errors before
1021 * the system falls appart
1022 */
1023 if (!xive_irq_domain)
1024 return;
1025
1026 /* Initialize it */
1027 virq = irq_create_mapping(xive_irq_domain, 0);
1028 xive_ipi_irq = virq;
1029
1030 WARN_ON(request_irq(virq, xive_muxed_ipi_action,
1031 IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
1032}
1033
1034static int xive_setup_cpu_ipi(unsigned int cpu)
1035{
1036 struct xive_cpu *xc;
1037 int rc;
1038
1039 pr_debug("Setting up IPI for CPU %d\n", cpu);
1040
1041 xc = per_cpu(xive_cpu, cpu);
1042
1043 /* Check if we are already setup */
1044 if (xc->hw_ipi != 0)
1045 return 0;
1046
1047 /* Grab an IPI from the backend, this will populate xc->hw_ipi */
1048 if (xive_ops->get_ipi(cpu, xc))
1049 return -EIO;
1050
1051 /*
1052 * Populate the IRQ data in the xive_cpu structure and
1053 * configure the HW / enable the IPIs.
1054 */
1055 rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data);
1056 if (rc) {
1057 pr_err("Failed to populate IPI data on CPU %d\n", cpu);
1058 return -EIO;
1059 }
1060 rc = xive_ops->configure_irq(xc->hw_ipi,
1061 get_hard_smp_processor_id(cpu),
1062 xive_irq_priority, xive_ipi_irq);
1063 if (rc) {
1064 pr_err("Failed to map IPI CPU %d\n", cpu);
1065 return -EIO;
1066 }
1067 pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu,
1068 xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
1069
1070 /* Unmask it */
1071 xive_do_source_set_mask(&xc->ipi_data, false);
1072
1073 return 0;
1074}
1075
1076static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
1077{
1078 /* Disable the IPI and free the IRQ data */
1079
1080 /* Already cleaned up ? */
1081 if (xc->hw_ipi == 0)
1082 return;
1083
1084 /* Mask the IPI */
1085 xive_do_source_set_mask(&xc->ipi_data, true);
1086
1087 /*
1088 * Note: We don't call xive_cleanup_irq_data() to free
1089 * the mappings as this is called from an IPI on kexec
1090 * which is not a safe environment to call iounmap()
1091 */
1092
1093 /* Deconfigure/mask in the backend */
1094 xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(),
1095 0xff, xive_ipi_irq);
1096
1097 /* Free the IPIs in the backend */
1098 xive_ops->put_ipi(cpu, xc);
1099}
1100
1101void __init xive_smp_probe(void)
1102{
1103 smp_ops->cause_ipi = xive_cause_ipi;
1104
1105 /* Register the IPI */
1106 xive_request_ipi();
1107
1108 /* Allocate and setup IPI for the boot CPU */
1109 xive_setup_cpu_ipi(smp_processor_id());
1110}
1111
1112#endif /* CONFIG_SMP */
1113
1114static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
1115 irq_hw_number_t hw)
1116{
1117 int rc;
1118
1119 /*
1120 * Mark interrupts as edge sensitive by default so that resend
1121 * actually works. Will fix that up below if needed.
1122 */
1123 irq_clear_status_flags(virq, IRQ_LEVEL);
1124
1125#ifdef CONFIG_SMP
1126 /* IPIs are special and come up with HW number 0 */
1127 if (hw == 0) {
1128 /*
1129 * IPIs are marked per-cpu. We use separate HW interrupts under
1130 * the hood but associated with the same "linux" interrupt
1131 */
1132 irq_set_chip_and_handler(virq, &xive_ipi_chip,
1133 handle_percpu_irq);
1134 return 0;
1135 }
1136#endif
1137
1138 rc = xive_irq_alloc_data(virq, hw);
1139 if (rc)
1140 return rc;
1141
1142 irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq);
1143
1144 return 0;
1145}
1146
1147static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
1148{
1149 struct irq_data *data = irq_get_irq_data(virq);
1150 unsigned int hw_irq;
1151
1152 /* XXX Assign BAD number */
1153 if (!data)
1154 return;
1155 hw_irq = (unsigned int)irqd_to_hwirq(data);
1156 if (hw_irq)
1157 xive_irq_free_data(virq);
1158}
1159
1160static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
1161 const u32 *intspec, unsigned int intsize,
1162 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
1163
1164{
1165 *out_hwirq = intspec[0];
1166
1167 /*
1168 * If intsize is at least 2, we look for the type in the second cell,
1169 * we assume the LSB indicates a level interrupt.
1170 */
1171 if (intsize > 1) {
1172 if (intspec[1] & 1)
1173 *out_flags = IRQ_TYPE_LEVEL_LOW;
1174 else
1175 *out_flags = IRQ_TYPE_EDGE_RISING;
1176 } else
1177 *out_flags = IRQ_TYPE_LEVEL_LOW;
1178
1179 return 0;
1180}
1181
1182static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node,
1183 enum irq_domain_bus_token bus_token)
1184{
1185 return xive_ops->match(node);
1186}
1187
1188static const struct irq_domain_ops xive_irq_domain_ops = {
1189 .match = xive_irq_domain_match,
1190 .map = xive_irq_domain_map,
1191 .unmap = xive_irq_domain_unmap,
1192 .xlate = xive_irq_domain_xlate,
1193};
1194
1195static void __init xive_init_host(void)
1196{
1197 xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ,
1198 &xive_irq_domain_ops, NULL);
1199 if (WARN_ON(xive_irq_domain == NULL))
1200 return;
1201 irq_set_default_host(xive_irq_domain);
1202}
1203
1204static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1205{
1206 if (xc->queue[xive_irq_priority].qpage)
1207 xive_ops->cleanup_queue(cpu, xc, xive_irq_priority);
1208}
1209
1210static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1211{
1212 int rc = 0;
1213
1214 /* We setup 1 queues for now with a 64k page */
1215 if (!xc->queue[xive_irq_priority].qpage)
1216 rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority);
1217
1218 return rc;
1219}
1220
1221static int xive_prepare_cpu(unsigned int cpu)
1222{
1223 struct xive_cpu *xc;
1224
1225 xc = per_cpu(xive_cpu, cpu);
1226 if (!xc) {
1227 struct device_node *np;
1228
1229 xc = kzalloc_node(sizeof(struct xive_cpu),
1230 GFP_KERNEL, cpu_to_node(cpu));
1231 if (!xc)
1232 return -ENOMEM;
1233 np = of_get_cpu_node(cpu, NULL);
1234 if (np)
1235 xc->chip_id = of_get_ibm_chip_id(np);
1236 of_node_put(np);
1237
1238 per_cpu(xive_cpu, cpu) = xc;
1239 }
1240
1241 /* Setup EQs if not already */
1242 return xive_setup_cpu_queues(cpu, xc);
1243}
1244
1245static void xive_setup_cpu(void)
1246{
1247 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1248
1249 /* Debug: Dump the TM state */
1250 pr_devel("CPU %d [HW 0x%02x] VT=%02x\n",
1251 smp_processor_id(), hard_smp_processor_id(),
1252 in_8(xive_tima + xive_tima_offset + TM_WORD2));
1253
1254 /* The backend might have additional things to do */
1255 if (xive_ops->setup_cpu)
1256 xive_ops->setup_cpu(smp_processor_id(), xc);
1257
1258 /* Set CPPR to 0xff to enable flow of interrupts */
1259 xc->cppr = 0xff;
1260 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1261}
1262
1263#ifdef CONFIG_SMP
1264void xive_smp_setup_cpu(void)
1265{
1266 pr_devel("SMP setup CPU %d\n", smp_processor_id());
1267
1268 /* This will have already been done on the boot CPU */
1269 if (smp_processor_id() != boot_cpuid)
1270 xive_setup_cpu();
1271
1272}
1273
1274int xive_smp_prepare_cpu(unsigned int cpu)
1275{
1276 int rc;
1277
1278 /* Allocate per-CPU data and queues */
1279 rc = xive_prepare_cpu(cpu);
1280 if (rc)
1281 return rc;
1282
1283 /* Allocate and setup IPI for the new CPU */
1284 return xive_setup_cpu_ipi(cpu);
1285}
1286
1287#ifdef CONFIG_HOTPLUG_CPU
1288static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
1289{
1290 u32 irq;
1291
1292 /* We assume local irqs are disabled */
1293 WARN_ON(!irqs_disabled());
1294
1295 /* Check what's already in the CPU queue */
1296 while ((irq = xive_scan_interrupts(xc, false)) != 0) {
1297 /*
1298 * We need to re-route that interrupt to its new destination.
1299 * First get and lock the descriptor
1300 */
1301 struct irq_desc *desc = irq_to_desc(irq);
1302 struct irq_data *d = irq_desc_get_irq_data(desc);
1303 struct xive_irq_data *xd;
1304 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
1305
1306 /*
1307 * Ignore anything that isn't a XIVE irq and ignore
1308 * IPIs, so can just be dropped.
1309 */
1310 if (d->domain != xive_irq_domain || hw_irq == 0)
1311 continue;
1312
1313 /*
1314 * The IRQ should have already been re-routed, it's just a
1315 * stale in the old queue, so re-trigger it in order to make
1316 * it reach is new destination.
1317 */
1318#ifdef DEBUG_FLUSH
1319 pr_info("CPU %d: Got irq %d while offline, re-sending...\n",
1320 cpu, irq);
1321#endif
1322 raw_spin_lock(&desc->lock);
1323 xd = irq_desc_get_handler_data(desc);
1324
1325 /*
1326 * For LSIs, we EOI, this will cause a resend if it's
1327 * still asserted. Otherwise do an MSI retrigger.
1328 */
1329 if (xd->flags & XIVE_IRQ_FLAG_LSI)
1330 xive_do_source_eoi(irqd_to_hwirq(d), xd);
1331 else
1332 xive_irq_retrigger(d);
1333
1334 raw_spin_unlock(&desc->lock);
1335 }
1336}
1337
1338void xive_smp_disable_cpu(void)
1339{
1340 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1341 unsigned int cpu = smp_processor_id();
1342
1343 /* Migrate interrupts away from the CPU */
1344 irq_migrate_all_off_this_cpu();
1345
1346 /* Set CPPR to 0 to disable flow of interrupts */
1347 xc->cppr = 0;
1348 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1349
1350 /* Flush everything still in the queue */
1351 xive_flush_cpu_queue(cpu, xc);
1352
1353 /* Re-enable CPPR */
1354 xc->cppr = 0xff;
1355 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1356}
1357
1358void xive_flush_interrupt(void)
1359{
1360 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1361 unsigned int cpu = smp_processor_id();
1362
1363 /* Called if an interrupt occurs while the CPU is hot unplugged */
1364 xive_flush_cpu_queue(cpu, xc);
1365}
1366
1367#endif /* CONFIG_HOTPLUG_CPU */
1368
1369#endif /* CONFIG_SMP */
1370
1371void xive_kexec_teardown_cpu(int secondary)
1372{
1373 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1374 unsigned int cpu = smp_processor_id();
1375
1376 /* Set CPPR to 0 to disable flow of interrupts */
1377 xc->cppr = 0;
1378 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1379
1380 /* Backend cleanup if any */
1381 if (xive_ops->teardown_cpu)
1382 xive_ops->teardown_cpu(cpu, xc);
1383
1384#ifdef CONFIG_SMP
1385 /* Get rid of IPI */
1386 xive_cleanup_cpu_ipi(cpu, xc);
1387#endif
1388
1389 /* Disable and free the queues */
1390 xive_cleanup_cpu_queues(cpu, xc);
1391}
1392
1393void xive_shutdown(void)
1394{
1395 xive_ops->shutdown();
1396}
1397
1398bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
1399 u8 max_prio)
1400{
1401 xive_tima = area;
1402 xive_tima_offset = offset;
1403 xive_ops = ops;
1404 xive_irq_priority = max_prio;
1405
1406 ppc_md.get_irq = xive_get_irq;
1407 __xive_enabled = true;
1408
1409 pr_devel("Initializing host..\n");
1410 xive_init_host();
1411
1412 pr_devel("Initializing boot CPU..\n");
1413
1414 /* Allocate per-CPU data and queues */
1415 xive_prepare_cpu(smp_processor_id());
1416
1417 /* Get ready for interrupts */
1418 xive_setup_cpu();
1419
1420 pr_info("Interrupt handling intialized with %s backend\n",
1421 xive_ops->name);
1422 pr_info("Using priority %d for all interrupts\n", max_prio);
1423
1424 return true;
1425}
1426
1427static int __init xive_off(char *arg)
1428{
1429 xive_cmdline_disabled = true;
1430 return 0;
1431}
1432__setup("xive=off", xive_off);
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
new file mode 100644
index 000000000000..6feac0a758e1
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -0,0 +1,715 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "xive: " fmt
11
12#include <linux/types.h>
13#include <linux/irq.h>
14#include <linux/debugfs.h>
15#include <linux/smp.h>
16#include <linux/interrupt.h>
17#include <linux/seq_file.h>
18#include <linux/init.h>
19#include <linux/of.h>
20#include <linux/slab.h>
21#include <linux/spinlock.h>
22#include <linux/delay.h>
23#include <linux/cpumask.h>
24#include <linux/mm.h>
25
26#include <asm/prom.h>
27#include <asm/io.h>
28#include <asm/smp.h>
29#include <asm/irq.h>
30#include <asm/errno.h>
31#include <asm/xive.h>
32#include <asm/xive-regs.h>
33#include <asm/opal.h>
34#include <asm/kvm_ppc.h>
35
36#include "xive-internal.h"
37
38
39static u32 xive_provision_size;
40static u32 *xive_provision_chips;
41static u32 xive_provision_chip_count;
42static u32 xive_queue_shift;
43static u32 xive_pool_vps = XIVE_INVALID_VP;
44static struct kmem_cache *xive_provision_cache;
45
46int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
47{
48 __be64 flags, eoi_page, trig_page;
49 __be32 esb_shift, src_chip;
50 u64 opal_flags;
51 s64 rc;
52
53 memset(data, 0, sizeof(*data));
54
55 rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
56 &esb_shift, &src_chip);
57 if (rc) {
58 pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
59 hw_irq, rc);
60 return -EINVAL;
61 }
62
63 opal_flags = be64_to_cpu(flags);
64 if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
65 data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
66 if (opal_flags & OPAL_XIVE_IRQ_LSI)
67 data->flags |= XIVE_IRQ_FLAG_LSI;
68 if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG)
69 data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG;
70 if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW)
71 data->flags |= XIVE_IRQ_FLAG_MASK_FW;
72 if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
73 data->flags |= XIVE_IRQ_FLAG_EOI_FW;
74 data->eoi_page = be64_to_cpu(eoi_page);
75 data->trig_page = be64_to_cpu(trig_page);
76 data->esb_shift = be32_to_cpu(esb_shift);
77 data->src_chip = be32_to_cpu(src_chip);
78
79 data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
80 if (!data->eoi_mmio) {
81 pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
82 return -ENOMEM;
83 }
84
85 if (!data->trig_page)
86 return 0;
87 if (data->trig_page == data->eoi_page) {
88 data->trig_mmio = data->eoi_mmio;
89 return 0;
90 }
91
92 data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
93 if (!data->trig_mmio) {
94 pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
95 return -ENOMEM;
96 }
97 return 0;
98}
99EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
100
101int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
102{
103 s64 rc;
104
105 for (;;) {
106 rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
107 if (rc != OPAL_BUSY)
108 break;
109 msleep(1);
110 }
111 return rc == 0 ? 0 : -ENXIO;
112}
113EXPORT_SYMBOL_GPL(xive_native_configure_irq);
114
115
116/* This can be called multiple time to change a queue configuration */
117int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
118 __be32 *qpage, u32 order, bool can_escalate)
119{
120 s64 rc = 0;
121 __be64 qeoi_page_be;
122 __be32 esc_irq_be;
123 u64 flags, qpage_phys;
124
125 /* If there's an actual queue page, clean it */
126 if (order) {
127 if (WARN_ON(!qpage))
128 return -EINVAL;
129 qpage_phys = __pa(qpage);
130 } else
131 qpage_phys = 0;
132
133 /* Initialize the rest of the fields */
134 q->msk = order ? ((1u << (order - 2)) - 1) : 0;
135 q->idx = 0;
136 q->toggle = 0;
137
138 rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
139 &qeoi_page_be,
140 &esc_irq_be,
141 NULL);
142 if (rc) {
143 pr_err("Error %lld getting queue info prio %d\n", rc, prio);
144 rc = -EIO;
145 goto fail;
146 }
147 q->eoi_phys = be64_to_cpu(qeoi_page_be);
148
149 /* Default flags */
150 flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
151
152 /* Escalation needed ? */
153 if (can_escalate) {
154 q->esc_irq = be32_to_cpu(esc_irq_be);
155 flags |= OPAL_XIVE_EQ_ESCALATE;
156 }
157
158 /* Configure and enable the queue in HW */
159 for (;;) {
160 rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
161 if (rc != OPAL_BUSY)
162 break;
163 msleep(1);
164 }
165 if (rc) {
166 pr_err("Error %lld setting queue for prio %d\n", rc, prio);
167 rc = -EIO;
168 } else {
169 /*
170 * KVM code requires all of the above to be visible before
171 * q->qpage is set due to how it manages IPI EOIs
172 */
173 wmb();
174 q->qpage = qpage;
175 }
176fail:
177 return rc;
178}
179EXPORT_SYMBOL_GPL(xive_native_configure_queue);
180
181static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
182{
183 s64 rc;
184
185 /* Disable the queue in HW */
186 for (;;) {
187 rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
188 break;
189 msleep(1);
190 }
191 if (rc)
192 pr_err("Error %lld disabling queue for prio %d\n", rc, prio);
193}
194
195void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
196{
197 __xive_native_disable_queue(vp_id, q, prio);
198}
199EXPORT_SYMBOL_GPL(xive_native_disable_queue);
200
201static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
202{
203 struct xive_q *q = &xc->queue[prio];
204 unsigned int alloc_order;
205 struct page *pages;
206 __be32 *qpage;
207
208 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
209 (xive_queue_shift - PAGE_SHIFT) : 0;
210 pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
211 if (!pages)
212 return -ENOMEM;
213 qpage = (__be32 *)page_address(pages);
214 memset(qpage, 0, 1 << xive_queue_shift);
215 return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
216 q, prio, qpage, xive_queue_shift, false);
217}
218
219static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
220{
221 struct xive_q *q = &xc->queue[prio];
222 unsigned int alloc_order;
223
224 /*
225 * We use the variant with no iounmap as this is called on exec
226 * from an IPI and iounmap isn't safe
227 */
228 __xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
229 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
230 (xive_queue_shift - PAGE_SHIFT) : 0;
231 free_pages((unsigned long)q->qpage, alloc_order);
232 q->qpage = NULL;
233}
234
235static bool xive_native_match(struct device_node *node)
236{
237 return of_device_is_compatible(node, "ibm,opal-xive-vc");
238}
239
240#ifdef CONFIG_SMP
241static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
242{
243 struct device_node *np;
244 unsigned int chip_id;
245 s64 irq;
246
247 /* Find the chip ID */
248 np = of_get_cpu_node(cpu, NULL);
249 if (np) {
250 if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
251 chip_id = 0;
252 }
253
254 /* Allocate an IPI and populate info about it */
255 for (;;) {
256 irq = opal_xive_allocate_irq(chip_id);
257 if (irq == OPAL_BUSY) {
258 msleep(1);
259 continue;
260 }
261 if (irq < 0) {
262 pr_err("Failed to allocate IPI on CPU %d\n", cpu);
263 return -ENXIO;
264 }
265 xc->hw_ipi = irq;
266 break;
267 }
268 return 0;
269}
270#endif /* CONFIG_SMP */
271
272u32 xive_native_alloc_irq(void)
273{
274 s64 rc;
275
276 for (;;) {
277 rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP);
278 if (rc != OPAL_BUSY)
279 break;
280 msleep(1);
281 }
282 if (rc < 0)
283 return 0;
284 return rc;
285}
286EXPORT_SYMBOL_GPL(xive_native_alloc_irq);
287
288void xive_native_free_irq(u32 irq)
289{
290 for (;;) {
291 s64 rc = opal_xive_free_irq(irq);
292 if (rc != OPAL_BUSY)
293 break;
294 msleep(1);
295 }
296}
297EXPORT_SYMBOL_GPL(xive_native_free_irq);
298
299#ifdef CONFIG_SMP
300static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
301{
302 s64 rc;
303
304 /* Free the IPI */
305 if (!xc->hw_ipi)
306 return;
307 for (;;) {
308 rc = opal_xive_free_irq(xc->hw_ipi);
309 if (rc == OPAL_BUSY) {
310 msleep(1);
311 continue;
312 }
313 xc->hw_ipi = 0;
314 break;
315 }
316}
317#endif /* CONFIG_SMP */
318
319static void xive_native_shutdown(void)
320{
321 /* Switch the XIVE to emulation mode */
322 opal_xive_reset(OPAL_XIVE_MODE_EMU);
323}
324
325/*
326 * Perform an "ack" cycle on the current thread, thus
327 * grabbing the pending active priorities and updating
328 * the CPPR to the most favored one.
329 */
330static void xive_native_update_pending(struct xive_cpu *xc)
331{
332 u8 he, cppr;
333 u16 ack;
334
335 /* Perform the acknowledge hypervisor to register cycle */
336 ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
337
338 /* Synchronize subsequent queue accesses */
339 mb();
340
341 /*
342 * Grab the CPPR and the "HE" field which indicates the source
343 * of the hypervisor interrupt (if any)
344 */
345 cppr = ack & 0xff;
346 he = GETFIELD(TM_QW3_NSR_HE, (ack >> 8));
347 switch(he) {
348 case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
349 break;
350 case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
351 if (cppr == 0xff)
352 return;
353 /* Mark the priority pending */
354 xc->pending_prio |= 1 << cppr;
355
356 /*
357 * A new interrupt should never have a CPPR less favored
358 * than our current one.
359 */
360 if (cppr >= xc->cppr)
361 pr_err("CPU %d odd ack CPPR, got %d at %d\n",
362 smp_processor_id(), cppr, xc->cppr);
363
364 /* Update our idea of what the CPPR is */
365 xc->cppr = cppr;
366 break;
367 case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
368 case TM_QW3_NSR_HE_LSI: /* Legacy FW LSI (unused) */
369 pr_err("CPU %d got unexpected interrupt type HE=%d\n",
370 smp_processor_id(), he);
371 return;
372 }
373}
374
375static void xive_native_eoi(u32 hw_irq)
376{
377 /*
378 * Not normally used except if specific interrupts need
379 * a workaround on EOI.
380 */
381 opal_int_eoi(hw_irq);
382}
383
384static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
385{
386 s64 rc;
387 u32 vp;
388 __be64 vp_cam_be;
389 u64 vp_cam;
390
391 if (xive_pool_vps == XIVE_INVALID_VP)
392 return;
393
394 /* Enable the pool VP */
395 vp = xive_pool_vps + cpu;
396 pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
397 for (;;) {
398 rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
399 if (rc != OPAL_BUSY)
400 break;
401 msleep(1);
402 }
403 if (rc) {
404 pr_err("Failed to enable pool VP on CPU %d\n", cpu);
405 return;
406 }
407
408 /* Grab it's CAM value */
409 rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
410 if (rc) {
411 pr_err("Failed to get pool VP info CPU %d\n", cpu);
412 return;
413 }
414 vp_cam = be64_to_cpu(vp_cam_be);
415
416 pr_debug("VP CAM = %llx\n", vp_cam);
417
418 /* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
419 pr_debug("(Old HW value: %08x)\n",
420 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
421 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
422 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2,
423 TM_QW2W2_VP | vp_cam);
424 pr_debug("(New HW value: %08x)\n",
425 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
426}
427
428static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
429{
430 s64 rc;
431 u32 vp;
432
433 if (xive_pool_vps == XIVE_INVALID_VP)
434 return;
435
436 /* Pull the pool VP from the CPU */
437 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
438
439 /* Disable it */
440 vp = xive_pool_vps + cpu;
441 for (;;) {
442 rc = opal_xive_set_vp_info(vp, 0, 0);
443 if (rc != OPAL_BUSY)
444 break;
445 msleep(1);
446 }
447}
448
449void xive_native_sync_source(u32 hw_irq)
450{
451 opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
452}
453EXPORT_SYMBOL_GPL(xive_native_sync_source);
454
455static const struct xive_ops xive_native_ops = {
456 .populate_irq_data = xive_native_populate_irq_data,
457 .configure_irq = xive_native_configure_irq,
458 .setup_queue = xive_native_setup_queue,
459 .cleanup_queue = xive_native_cleanup_queue,
460 .match = xive_native_match,
461 .shutdown = xive_native_shutdown,
462 .update_pending = xive_native_update_pending,
463 .eoi = xive_native_eoi,
464 .setup_cpu = xive_native_setup_cpu,
465 .teardown_cpu = xive_native_teardown_cpu,
466 .sync_source = xive_native_sync_source,
467#ifdef CONFIG_SMP
468 .get_ipi = xive_native_get_ipi,
469 .put_ipi = xive_native_put_ipi,
470#endif /* CONFIG_SMP */
471 .name = "native",
472};
473
474static bool xive_parse_provisioning(struct device_node *np)
475{
476 int rc;
477
478 if (of_property_read_u32(np, "ibm,xive-provision-page-size",
479 &xive_provision_size) < 0)
480 return true;
481 rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
482 if (rc < 0) {
483 pr_err("Error %d getting provision chips array\n", rc);
484 return false;
485 }
486 xive_provision_chip_count = rc;
487 if (rc == 0)
488 return true;
489
490 xive_provision_chips = kzalloc(4 * xive_provision_chip_count,
491 GFP_KERNEL);
492 if (WARN_ON(!xive_provision_chips))
493 return false;
494
495 rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
496 xive_provision_chips,
497 xive_provision_chip_count);
498 if (rc < 0) {
499 pr_err("Error %d reading provision chips array\n", rc);
500 return false;
501 }
502
503 xive_provision_cache = kmem_cache_create("xive-provision",
504 xive_provision_size,
505 xive_provision_size,
506 0, NULL);
507 if (!xive_provision_cache) {
508 pr_err("Failed to allocate provision cache\n");
509 return false;
510 }
511 return true;
512}
513
514static void xive_native_setup_pools(void)
515{
516 /* Allocate a pool big enough */
517 pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids);
518
519 xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
520 if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
521 pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
522
523 pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n",
524 xive_pool_vps, nr_cpu_ids);
525}
526
527u32 xive_native_default_eq_shift(void)
528{
529 return xive_queue_shift;
530}
531EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
532
533bool xive_native_init(void)
534{
535 struct device_node *np;
536 struct resource r;
537 void __iomem *tima;
538 struct property *prop;
539 u8 max_prio = 7;
540 const __be32 *p;
541 u32 val, cpu;
542 s64 rc;
543
544 if (xive_cmdline_disabled)
545 return false;
546
547 pr_devel("xive_native_init()\n");
548 np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
549 if (!np) {
550 pr_devel("not found !\n");
551 return false;
552 }
553 pr_devel("Found %s\n", np->full_name);
554
555 /* Resource 1 is HV window */
556 if (of_address_to_resource(np, 1, &r)) {
557 pr_err("Failed to get thread mgmnt area resource\n");
558 return false;
559 }
560 tima = ioremap(r.start, resource_size(&r));
561 if (!tima) {
562 pr_err("Failed to map thread mgmnt area\n");
563 return false;
564 }
565
566 /* Read number of priorities */
567 if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
568 max_prio = val - 1;
569
570 /* Iterate the EQ sizes and pick one */
571 of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
572 xive_queue_shift = val;
573 if (val == PAGE_SHIFT)
574 break;
575 }
576
577 /* Configure Thread Management areas for KVM */
578 for_each_possible_cpu(cpu)
579 kvmppc_set_xive_tima(cpu, r.start, tima);
580
581 /* Grab size of provisionning pages */
582 xive_parse_provisioning(np);
583
584 /* Switch the XIVE to exploitation mode */
585 rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
586 if (rc) {
587 pr_err("Switch to exploitation mode failed with error %lld\n", rc);
588 return false;
589 }
590
591 /* Setup some dummy HV pool VPs */
592 xive_native_setup_pools();
593
594 /* Initialize XIVE core with our backend */
595 if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
596 max_prio)) {
597 opal_xive_reset(OPAL_XIVE_MODE_EMU);
598 return false;
599 }
600 pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
601 return true;
602}
603
604static bool xive_native_provision_pages(void)
605{
606 u32 i;
607 void *p;
608
609 for (i = 0; i < xive_provision_chip_count; i++) {
610 u32 chip = xive_provision_chips[i];
611
612 /*
613 * XXX TODO: Try to make the allocation local to the node where
614 * the chip resides.
615 */
616 p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
617 if (!p) {
618 pr_err("Failed to allocate provisioning page\n");
619 return false;
620 }
621 opal_xive_donate_page(chip, __pa(p));
622 }
623 return true;
624}
625
626u32 xive_native_alloc_vp_block(u32 max_vcpus)
627{
628 s64 rc;
629 u32 order;
630
631 order = fls(max_vcpus) - 1;
632 if (max_vcpus > (1 << order))
633 order++;
634
635 pr_info("VP block alloc, for max VCPUs %d use order %d\n",
636 max_vcpus, order);
637
638 for (;;) {
639 rc = opal_xive_alloc_vp_block(order);
640 switch (rc) {
641 case OPAL_BUSY:
642 msleep(1);
643 break;
644 case OPAL_XIVE_PROVISIONING:
645 if (!xive_native_provision_pages())
646 return XIVE_INVALID_VP;
647 break;
648 default:
649 if (rc < 0) {
650 pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
651 order, rc);
652 return XIVE_INVALID_VP;
653 }
654 return rc;
655 }
656 }
657}
658EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
659
660void xive_native_free_vp_block(u32 vp_base)
661{
662 s64 rc;
663
664 if (vp_base == XIVE_INVALID_VP)
665 return;
666
667 rc = opal_xive_free_vp_block(vp_base);
668 if (rc < 0)
669 pr_warn("OPAL error %lld freeing VP block\n", rc);
670}
671EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
672
673int xive_native_enable_vp(u32 vp_id)
674{
675 s64 rc;
676
677 for (;;) {
678 rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
679 if (rc != OPAL_BUSY)
680 break;
681 msleep(1);
682 }
683 return rc ? -EIO : 0;
684}
685EXPORT_SYMBOL_GPL(xive_native_enable_vp);
686
687int xive_native_disable_vp(u32 vp_id)
688{
689 s64 rc;
690
691 for (;;) {
692 rc = opal_xive_set_vp_info(vp_id, 0, 0);
693 if (rc != OPAL_BUSY)
694 break;
695 msleep(1);
696 }
697 return rc ? -EIO : 0;
698}
699EXPORT_SYMBOL_GPL(xive_native_disable_vp);
700
701int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
702{
703 __be64 vp_cam_be;
704 __be32 vp_chip_id_be;
705 s64 rc;
706
707 rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
708 if (rc)
709 return -EIO;
710 *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
711 *out_chip_id = be32_to_cpu(vp_chip_id_be);
712
713 return 0;
714}
715EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
new file mode 100644
index 000000000000..d07ef2d29caf
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef __XIVE_INTERNAL_H
10#define __XIVE_INTERNAL_H
11
12/* Each CPU carry one of these with various per-CPU state */
13struct xive_cpu {
14#ifdef CONFIG_SMP
15 /* HW irq number and data of IPI */
16 u32 hw_ipi;
17 struct xive_irq_data ipi_data;
18#endif /* CONFIG_SMP */
19
20 int chip_id;
21
22 /* Queue datas. Only one is populated */
23#define XIVE_MAX_QUEUES 8
24 struct xive_q queue[XIVE_MAX_QUEUES];
25
26 /*
27 * Pending mask. Each bit corresponds to a priority that
28 * potentially has pending interrupts.
29 */
30 u8 pending_prio;
31
32 /* Cache of HW CPPR */
33 u8 cppr;
34};
35
36/* Backend ops */
37struct xive_ops {
38 int (*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data);
39 int (*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
40 int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
41 void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
42 void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
43 void (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc);
44 bool (*match)(struct device_node *np);
45 void (*shutdown)(void);
46
47 void (*update_pending)(struct xive_cpu *xc);
48 void (*eoi)(u32 hw_irq);
49 void (*sync_source)(u32 hw_irq);
50#ifdef CONFIG_SMP
51 int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc);
52 void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc);
53#endif
54 const char *name;
55};
56
57bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
58 u8 max_prio);
59
60extern bool xive_cmdline_disabled;
61
62#endif /* __XIVE_INTERNAL_H */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 16321ad9e70c..67435b9bf98d 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -30,6 +30,7 @@
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31 31
32#include <asm/ptrace.h> 32#include <asm/ptrace.h>
33#include <asm/smp.h>
33#include <asm/string.h> 34#include <asm/string.h>
34#include <asm/prom.h> 35#include <asm/prom.h>
35#include <asm/machdep.h> 36#include <asm/machdep.h>
@@ -48,7 +49,7 @@
48#include <asm/reg.h> 49#include <asm/reg.h>
49#include <asm/debug.h> 50#include <asm/debug.h>
50#include <asm/hw_breakpoint.h> 51#include <asm/hw_breakpoint.h>
51 52#include <asm/xive.h>
52#include <asm/opal.h> 53#include <asm/opal.h>
53#include <asm/firmware.h> 54#include <asm/firmware.h>
54 55
@@ -232,7 +233,13 @@ Commands:\n\
232 "\ 233 "\
233 dr dump stream of raw bytes\n\ 234 dr dump stream of raw bytes\n\
234 dt dump the tracing buffers (uses printk)\n\ 235 dt dump the tracing buffers (uses printk)\n\
235 e print exception information\n\ 236"
237#ifdef CONFIG_PPC_POWERNV
238" dx# dump xive on CPU #\n\
239 dxi# dump xive irq state #\n\
240 dxa dump xive on all CPUs\n"
241#endif
242" e print exception information\n\
236 f flush cache\n\ 243 f flush cache\n\
237 la lookup symbol+offset of specified address\n\ 244 la lookup symbol+offset of specified address\n\
238 ls lookup address of specified symbol\n\ 245 ls lookup address of specified symbol\n\
@@ -2338,6 +2345,81 @@ static void dump_pacas(void)
2338} 2345}
2339#endif 2346#endif
2340 2347
2348#ifdef CONFIG_PPC_POWERNV
2349static void dump_one_xive(int cpu)
2350{
2351 unsigned int hwid = get_hard_smp_processor_id(cpu);
2352
2353 opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
2354 opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
2355 opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
2356 opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
2357 opal_xive_dump(XIVE_DUMP_VP, hwid);
2358 opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
2359
2360 if (setjmp(bus_error_jmp) != 0) {
2361 catch_memory_errors = 0;
2362 printf("*** Error dumping xive on cpu %d\n", cpu);
2363 return;
2364 }
2365
2366 catch_memory_errors = 1;
2367 sync();
2368 xmon_xive_do_dump(cpu);
2369 sync();
2370 __delay(200);
2371 catch_memory_errors = 0;
2372}
2373
2374static void dump_all_xives(void)
2375{
2376 int cpu;
2377
2378 if (num_possible_cpus() == 0) {
2379 printf("No possible cpus, use 'dx #' to dump individual cpus\n");
2380 return;
2381 }
2382
2383 for_each_possible_cpu(cpu)
2384 dump_one_xive(cpu);
2385}
2386
2387static void dump_one_xive_irq(u32 num)
2388{
2389 s64 rc;
2390 __be64 vp;
2391 u8 prio;
2392 __be32 lirq;
2393
2394 rc = opal_xive_get_irq_config(num, &vp, &prio, &lirq);
2395 xmon_printf("IRQ 0x%x config: vp=0x%llx prio=%d lirq=0x%x (rc=%lld)\n",
2396 num, be64_to_cpu(vp), prio, be32_to_cpu(lirq), rc);
2397}
2398
2399static void dump_xives(void)
2400{
2401 unsigned long num;
2402 int c;
2403
2404 c = inchar();
2405 if (c == 'a') {
2406 dump_all_xives();
2407 return;
2408 } else if (c == 'i') {
2409 if (scanhex(&num))
2410 dump_one_xive_irq(num);
2411 return;
2412 }
2413
2414 termch = c; /* Put c back, it wasn't 'a' */
2415
2416 if (scanhex(&num))
2417 dump_one_xive(num);
2418 else
2419 dump_one_xive(xmon_owner);
2420}
2421#endif /* CONFIG_PPC_POWERNV */
2422
2341static void dump_by_size(unsigned long addr, long count, int size) 2423static void dump_by_size(unsigned long addr, long count, int size)
2342{ 2424{
2343 unsigned char temp[16]; 2425 unsigned char temp[16];
@@ -2386,6 +2468,14 @@ dump(void)
2386 return; 2468 return;
2387 } 2469 }
2388#endif 2470#endif
2471#ifdef CONFIG_PPC_POWERNV
2472 if (c == 'x') {
2473 xmon_start_pagination();
2474 dump_xives();
2475 xmon_end_pagination();
2476 return;
2477 }
2478#endif
2389 2479
2390 if (c == '\n') 2480 if (c == '\n')
2391 termch = c; 2481 termch = c;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 397b7b5b1933..9de1d3ca83b2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1149,7 +1149,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
1149void kvm_unregister_device_ops(u32 type); 1149void kvm_unregister_device_ops(u32 type);
1150 1150
1151extern struct kvm_device_ops kvm_mpic_ops; 1151extern struct kvm_device_ops kvm_mpic_ops;
1152extern struct kvm_device_ops kvm_xics_ops;
1153extern struct kvm_device_ops kvm_arm_vgic_v2_ops; 1152extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
1154extern struct kvm_device_ops kvm_arm_vgic_v3_ops; 1153extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
1155 1154
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 357e67cba32e..4e19bc812c29 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2825,10 +2825,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
2825 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, 2825 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
2826 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, 2826 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
2827#endif 2827#endif
2828
2829#ifdef CONFIG_KVM_XICS
2830 [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
2831#endif
2832}; 2828};
2833 2829
2834int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) 2830int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)