aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorGleb Natapov <gleb@redhat.com>2013-04-28 05:50:07 -0400
committerGleb Natapov <gleb@redhat.com>2013-04-28 05:50:07 -0400
commit064d1afaa5a60fc391d0b4b77599fc8f63f99cd3 (patch)
tree2e640cdfa50b0048c52e021f07a8b24560251b26 /arch/powerpc
parent730dca42c1d363c939da18c1499c7327c66e2b37 (diff)
parent8b78645c93b5d469e8006d68dbc92edc2640c654 (diff)
Merge git://github.com/agraf/linux-2.6.git kvm-ppc-next into queue
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/hvcall.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h5
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h13
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h8
-rw-r--r--arch/powerpc/include/asm/kvm_host.h40
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h107
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h73
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kvm/44x.c12
-rw-r--r--arch/powerpc/kvm/Kconfig26
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/book3s.c27
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c120
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c88
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c406
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S228
-rw-r--r--arch/powerpc/kvm/book3s_pr.c5
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c21
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c274
-rw-r--r--arch/powerpc/kvm/book3s_xics.c1130
-rw-r--r--arch/powerpc/kvm/book3s_xics.h129
-rw-r--r--arch/powerpc/kvm/booke.c123
-rw-r--r--arch/powerpc/kvm/e500.c14
-rw-r--r--arch/powerpc/kvm/e500.h22
-rw-r--r--arch/powerpc/kvm/e500_emulate.c19
-rw-r--r--arch/powerpc/kvm/e500_mmu.c192
-rw-r--r--arch/powerpc/kvm/e500mc.c16
-rw-r--r--arch/powerpc/kvm/irq.h17
-rw-r--r--arch/powerpc/kvm/mpic.c1843
-rw-r--r--arch/powerpc/kvm/powerpc.c72
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c8
34 files changed, 4864 insertions, 208 deletions
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 4bc2c3dad6ad..cf4df8e2139a 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
270#define H_SET_MODE 0x31C 270#define H_SET_MODE 0x31C
271#define MAX_HCALL_OPCODE H_SET_MODE 271#define MAX_HCALL_OPCODE H_SET_MODE
272 272
273/* Platform specific hcalls, used by KVM */
274#define H_RTAS 0xf000
275
273#ifndef __ASSEMBLY__ 276#ifndef __ASSEMBLY__
274 277
275/** 278/**
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index bc81842ea25a..349ed85c7d61 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void);
142extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 142extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
143extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 143extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
144extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); 144extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
145extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
146 unsigned int vec);
145extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); 147extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
146extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, 148extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
147 bool upper, u32 val); 149 bool upper, u32 val);
@@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
156 unsigned long pte_index); 158 unsigned long pte_index);
157extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, 159extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
158 unsigned long *nb_ret); 160 unsigned long *nb_ret);
159extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); 161extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
162 unsigned long gpa, bool dirty);
160extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 163extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
161 long pte_index, unsigned long pteh, unsigned long ptel); 164 long pte_index, unsigned long pteh, unsigned long ptel);
162extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 165extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1dc9928..9c1ff330c805 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
268 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); 268 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
269} 269}
270 270
271#ifdef CONFIG_KVM_BOOK3S_64_HV
272/*
273 * Note modification of an HPTE; set the HPTE modified bit
274 * if anyone is interested.
275 */
276static inline void note_hpte_modification(struct kvm *kvm,
277 struct revmap_entry *rev)
278{
279 if (atomic_read(&kvm->arch.hpte_mod_interest))
280 rev->guest_rpte |= HPTE_GR_MODIFIED;
281}
282#endif /* CONFIG_KVM_BOOK3S_64_HV */
283
271#endif /* __ASM_KVM_BOOK3S_64_H__ */ 284#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index cdc3d2717cc6..9039d3c97eec 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -20,6 +20,11 @@
20#ifndef __ASM_KVM_BOOK3S_ASM_H__ 20#ifndef __ASM_KVM_BOOK3S_ASM_H__
21#define __ASM_KVM_BOOK3S_ASM_H__ 21#define __ASM_KVM_BOOK3S_ASM_H__
22 22
23/* XICS ICP register offsets */
24#define XICS_XIRR 4
25#define XICS_MFRR 0xc
26#define XICS_IPI 2 /* interrupt source # for IPIs */
27
23#ifdef __ASSEMBLY__ 28#ifdef __ASSEMBLY__
24 29
25#ifdef CONFIG_KVM_BOOK3S_HANDLER 30#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -81,10 +86,11 @@ struct kvmppc_host_state {
81#ifdef CONFIG_KVM_BOOK3S_64_HV 86#ifdef CONFIG_KVM_BOOK3S_64_HV
82 u8 hwthread_req; 87 u8 hwthread_req;
83 u8 hwthread_state; 88 u8 hwthread_state;
84 89 u8 host_ipi;
85 struct kvm_vcpu *kvm_vcpu; 90 struct kvm_vcpu *kvm_vcpu;
86 struct kvmppc_vcore *kvm_vcore; 91 struct kvmppc_vcore *kvm_vcore;
87 unsigned long xics_phys; 92 unsigned long xics_phys;
93 u32 saved_xirr;
88 u64 dabr; 94 u64 dabr;
89 u64 host_mmcr[3]; 95 u64 host_mmcr[3];
90 u32 host_pmc[8]; 96 u32 host_pmc[8];
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e34f8fee9080..af326cde7cb6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -44,6 +44,10 @@
44#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 44#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
45#endif 45#endif
46 46
47/* These values are internal and can be increased later */
48#define KVM_NR_IRQCHIPS 1
49#define KVM_IRQCHIP_NUM_PINS 256
50
47#if !defined(CONFIG_KVM_440) 51#if !defined(CONFIG_KVM_440)
48#include <linux/mmu_notifier.h> 52#include <linux/mmu_notifier.h>
49 53
@@ -188,6 +192,10 @@ struct kvmppc_linear_info {
188 int type; 192 int type;
189}; 193};
190 194
195/* XICS components, defined in book3s_xics.c */
196struct kvmppc_xics;
197struct kvmppc_icp;
198
191/* 199/*
192 * The reverse mapping array has one entry for each HPTE, 200 * The reverse mapping array has one entry for each HPTE,
193 * which stores the guest's view of the second word of the HPTE 201 * which stores the guest's view of the second word of the HPTE
@@ -255,6 +263,13 @@ struct kvm_arch {
255#endif /* CONFIG_KVM_BOOK3S_64_HV */ 263#endif /* CONFIG_KVM_BOOK3S_64_HV */
256#ifdef CONFIG_PPC_BOOK3S_64 264#ifdef CONFIG_PPC_BOOK3S_64
257 struct list_head spapr_tce_tables; 265 struct list_head spapr_tce_tables;
266 struct list_head rtas_tokens;
267#endif
268#ifdef CONFIG_KVM_MPIC
269 struct openpic *mpic;
270#endif
271#ifdef CONFIG_KVM_XICS
272 struct kvmppc_xics *xics;
258#endif 273#endif
259}; 274};
260 275
@@ -301,11 +316,13 @@ struct kvmppc_vcore {
301 * that a guest can register. 316 * that a guest can register.
302 */ 317 */
303struct kvmppc_vpa { 318struct kvmppc_vpa {
319 unsigned long gpa; /* Current guest phys addr */
304 void *pinned_addr; /* Address in kernel linear mapping */ 320 void *pinned_addr; /* Address in kernel linear mapping */
305 void *pinned_end; /* End of region */ 321 void *pinned_end; /* End of region */
306 unsigned long next_gpa; /* Guest phys addr for update */ 322 unsigned long next_gpa; /* Guest phys addr for update */
307 unsigned long len; /* Number of bytes required */ 323 unsigned long len; /* Number of bytes required */
308 u8 update_pending; /* 1 => update pinned_addr from next_gpa */ 324 u8 update_pending; /* 1 => update pinned_addr from next_gpa */
325 bool dirty; /* true => area has been modified by kernel */
309}; 326};
310 327
311struct kvmppc_pte { 328struct kvmppc_pte {
@@ -359,6 +376,11 @@ struct kvmppc_slb {
359#define KVMPPC_BOOKE_MAX_IAC 4 376#define KVMPPC_BOOKE_MAX_IAC 4
360#define KVMPPC_BOOKE_MAX_DAC 2 377#define KVMPPC_BOOKE_MAX_DAC 2
361 378
379/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
380#define KVMPPC_EPR_NONE 0 /* EPR not supported */
381#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */
382#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */
383
362struct kvmppc_booke_debug_reg { 384struct kvmppc_booke_debug_reg {
363 u32 dbcr0; 385 u32 dbcr0;
364 u32 dbcr1; 386 u32 dbcr1;
@@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg {
370 u64 dac[KVMPPC_BOOKE_MAX_DAC]; 392 u64 dac[KVMPPC_BOOKE_MAX_DAC];
371}; 393};
372 394
395#define KVMPPC_IRQ_DEFAULT 0
396#define KVMPPC_IRQ_MPIC 1
397#define KVMPPC_IRQ_XICS 2
398
399struct openpic;
400
373struct kvm_vcpu_arch { 401struct kvm_vcpu_arch {
374 ulong host_stack; 402 ulong host_stack;
375 u32 host_pid; 403 u32 host_pid;
@@ -502,7 +530,9 @@ struct kvm_vcpu_arch {
502 spinlock_t wdt_lock; 530 spinlock_t wdt_lock;
503 struct timer_list wdt_timer; 531 struct timer_list wdt_timer;
504 u32 tlbcfg[4]; 532 u32 tlbcfg[4];
533 u32 tlbps[4];
505 u32 mmucfg; 534 u32 mmucfg;
535 u32 eptcfg;
506 u32 epr; 536 u32 epr;
507 u32 crit_save; 537 u32 crit_save;
508 struct kvmppc_booke_debug_reg dbg_reg; 538 struct kvmppc_booke_debug_reg dbg_reg;
@@ -522,7 +552,7 @@ struct kvm_vcpu_arch {
522 u8 sane; 552 u8 sane;
523 u8 cpu_type; 553 u8 cpu_type;
524 u8 hcall_needed; 554 u8 hcall_needed;
525 u8 epr_enabled; 555 u8 epr_flags; /* KVMPPC_EPR_xxx */
526 u8 epr_needed; 556 u8 epr_needed;
527 557
528 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ 558 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@@ -549,6 +579,13 @@ struct kvm_vcpu_arch {
549 unsigned long magic_page_pa; /* phys addr to map the magic page to */ 579 unsigned long magic_page_pa; /* phys addr to map the magic page to */
550 unsigned long magic_page_ea; /* effect. addr to map the magic page to */ 580 unsigned long magic_page_ea; /* effect. addr to map the magic page to */
551 581
582 int irq_type; /* one of KVM_IRQ_* */
583 int irq_cpu_id;
584 struct openpic *mpic; /* KVM_IRQ_MPIC */
585#ifdef CONFIG_KVM_XICS
586 struct kvmppc_icp *icp; /* XICS presentation controller */
587#endif
588
552#ifdef CONFIG_KVM_BOOK3S_64_HV 589#ifdef CONFIG_KVM_BOOK3S_64_HV
553 struct kvm_vcpu_arch_shared shregs; 590 struct kvm_vcpu_arch_shared shregs;
554 591
@@ -589,5 +626,6 @@ struct kvm_vcpu_arch {
589#define KVM_MMIO_REG_FQPR 0x0060 626#define KVM_MMIO_REG_FQPR 0x0060
590 627
591#define __KVM_HAVE_ARCH_WQP 628#define __KVM_HAVE_ARCH_WQP
629#define __KVM_HAVE_CREATE_DEVICE
592 630
593#endif /* __POWERPC_KVM_HOST_H__ */ 631#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index f58930779ae8..d7339df19259 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -44,7 +44,7 @@ enum emulation_result {
44 EMULATE_DO_DCR, /* kvm_run filled with DCR request */ 44 EMULATE_DO_DCR, /* kvm_run filled with DCR request */
45 EMULATE_FAIL, /* can't emulate this instruction */ 45 EMULATE_FAIL, /* can't emulate this instruction */
46 EMULATE_AGAIN, /* something went wrong. go again */ 46 EMULATE_AGAIN, /* something went wrong. go again */
47 EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ 47 EMULATE_EXIT_USER, /* emulation requires exit to user-space */
48}; 48};
49 49
50extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 50extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -130,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
130extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, 130extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
131 struct kvm_memory_slot *memslot, unsigned long porder); 131 struct kvm_memory_slot *memslot, unsigned long porder);
132extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); 132extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
133
133extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 134extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
134 struct kvm_create_spapr_tce *args); 135 struct kvm_create_spapr_tce *args);
135extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 136extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@@ -164,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
164 165
165extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); 166extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
166 167
168int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
169
170extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
171extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
172extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
173extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
174 u32 priority);
175extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
176 u32 *priority);
177extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
178extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
179
167/* 180/*
168 * Cuts out inst bits with ordering according to spec. 181 * Cuts out inst bits with ordering according to spec.
169 * That means the leftmost bit is zero. All given bits are included. 182 * That means the leftmost bit is zero. All given bits are included.
@@ -245,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
245 258
246void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); 259void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
247 260
261struct openpic;
262
248#ifdef CONFIG_KVM_BOOK3S_64_HV 263#ifdef CONFIG_KVM_BOOK3S_64_HV
249static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 264static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
250{ 265{
251 paca[cpu].kvm_hstate.xics_phys = addr; 266 paca[cpu].kvm_hstate.xics_phys = addr;
252} 267}
253 268
269static inline u32 kvmppc_get_xics_latch(void)
270{
271 u32 xirr = get_paca()->kvm_hstate.saved_xirr;
272
273 get_paca()->kvm_hstate.saved_xirr = 0;
274
275 return xirr;
276}
277
278static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
279{
280 paca[cpu].kvm_hstate.host_ipi = host_ipi;
281}
282
283extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
254extern void kvm_linear_init(void); 284extern void kvm_linear_init(void);
255 285
256#else 286#else
@@ -259,6 +289,44 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
259 289
260static inline void kvm_linear_init(void) 290static inline void kvm_linear_init(void)
261{} 291{}
292
293static inline u32 kvmppc_get_xics_latch(void)
294{
295 return 0;
296}
297
298static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
299{}
300
301static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
302{
303 kvm_vcpu_kick(vcpu);
304}
305#endif
306
307#ifdef CONFIG_KVM_XICS
308static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
309{
310 return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
311}
312extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
313extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
314extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
315extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
316extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
317extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
318#else
319static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
320 { return 0; }
321static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
322static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
323 unsigned long server)
324 { return -EINVAL; }
325static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
326 struct kvm_irq_level *args)
327 { return -ENOTTY; }
328static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
329 { return 0; }
262#endif 330#endif
263 331
264static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) 332static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
@@ -270,6 +338,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
270#endif 338#endif
271} 339}
272 340
341#ifdef CONFIG_KVM_MPIC
342
343void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
344int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
345 u32 cpu);
346void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
347
348#else
349
350static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
351{
352}
353
354static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
355 struct kvm_vcpu *vcpu, u32 cpu)
356{
357 return -EINVAL;
358}
359
360static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
361 struct kvm_vcpu *vcpu)
362{
363}
364
365#endif /* CONFIG_KVM_MPIC */
366
273int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, 367int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
274 struct kvm_config_tlb *cfg); 368 struct kvm_config_tlb *cfg);
275int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, 369int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
@@ -282,8 +376,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
282 376
283static inline void kvmppc_mmu_flush_icache(pfn_t pfn) 377static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
284{ 378{
285 /* Clear i-cache for new pages */
286 struct page *page; 379 struct page *page;
380 /*
381 * We can only access pages that the kernel maps
382 * as memory. Bail out for unmapped ones.
383 */
384 if (!pfn_valid(pfn))
385 return;
386
387 /* Clear i-cache for new pages */
287 page = pfn_to_page(pfn); 388 page = pfn_to_page(pfn);
288 if (!test_bit(PG_arch_1, &page->flags)) { 389 if (!test_bit(PG_arch_1, &page->flags)) {
289 flush_dcache_icache_page(page); 390 flush_dcache_icache_page(page);
@@ -323,4 +424,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
323 return ea; 424 return ea;
324} 425}
325 426
427extern void xics_wake_cpu(int cpu);
428
326#endif /* __POWERPC_KVM_PPC_H__ */ 429#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c9c67fc888c9..799322433620 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -290,6 +290,7 @@
290#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ 290#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */
291#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ 291#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */
292#define LPCR_MER 0x00000800 /* Mediated External Exception */ 292#define LPCR_MER 0x00000800 /* Mediated External Exception */
293#define LPCR_MER_SH 11
293#define LPCR_LPES 0x0000000c 294#define LPCR_LPES 0x0000000c
294#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ 295#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */
295#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ 296#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c2ff99c01562..427b9aca2a0f 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -25,6 +25,8 @@
25/* Select powerpc specific features in <linux/kvm.h> */ 25/* Select powerpc specific features in <linux/kvm.h> */
26#define __KVM_HAVE_SPAPR_TCE 26#define __KVM_HAVE_SPAPR_TCE
27#define __KVM_HAVE_PPC_SMT 27#define __KVM_HAVE_PPC_SMT
28#define __KVM_HAVE_IRQCHIP
29#define __KVM_HAVE_IRQ_LINE
28 30
29struct kvm_regs { 31struct kvm_regs {
30 __u64 pc; 32 __u64 pc;
@@ -272,8 +274,31 @@ struct kvm_debug_exit_arch {
272 274
273/* for KVM_SET_GUEST_DEBUG */ 275/* for KVM_SET_GUEST_DEBUG */
274struct kvm_guest_debug_arch { 276struct kvm_guest_debug_arch {
277 struct {
278 /* H/W breakpoint/watchpoint address */
279 __u64 addr;
280 /*
281 * Type denotes h/w breakpoint, read watchpoint, write
282 * watchpoint or watchpoint (both read and write).
283 */
284#define KVMPPC_DEBUG_NONE 0x0
285#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1)
286#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2)
287#define KVMPPC_DEBUG_WATCH_READ (1UL << 3)
288 __u32 type;
289 __u32 reserved;
290 } bp[16];
275}; 291};
276 292
293/* Debug related defines */
294/*
295 * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
296 * and upper 16 bits are architecture specific. Architecture specific defines
297 * that ioctl is for setting hardware breakpoint or software breakpoint.
298 */
299#define KVM_GUESTDBG_USE_SW_BP 0x00010000
300#define KVM_GUESTDBG_USE_HW_BP 0x00020000
301
277/* definition of registers in kvm_run */ 302/* definition of registers in kvm_run */
278struct kvm_sync_regs { 303struct kvm_sync_regs {
279}; 304};
@@ -299,6 +324,12 @@ struct kvm_allocate_rma {
299 __u64 rma_size; 324 __u64 rma_size;
300}; 325};
301 326
327/* for KVM_CAP_PPC_RTAS */
328struct kvm_rtas_token_args {
329 char name[120];
330 __u64 token; /* Use a token of 0 to undefine a mapping */
331};
332
302struct kvm_book3e_206_tlb_entry { 333struct kvm_book3e_206_tlb_entry {
303 __u32 mas8; 334 __u32 mas8;
304 __u32 mas1; 335 __u32 mas1;
@@ -359,6 +390,26 @@ struct kvm_get_htab_header {
359 __u16 n_invalid; 390 __u16 n_invalid;
360}; 391};
361 392
393/* Per-vcpu XICS interrupt controller state */
394#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
395
396#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */
397#define KVM_REG_PPC_ICP_CPPR_MASK 0xff
398#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */
399#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff
400#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */
401#define KVM_REG_PPC_ICP_MFRR_MASK 0xff
402#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
403#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
404
405/* Device control API: PPC-specific devices */
406#define KVM_DEV_MPIC_GRP_MISC 1
407#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
408
409#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */
410#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */
411
412/* One-Reg API: PPC-specific registers */
362#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) 413#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
363#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) 414#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
364#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) 415#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
@@ -426,4 +477,26 @@ struct kvm_get_htab_header {
426/* Debugging: Special instruction for software breakpoint */ 477/* Debugging: Special instruction for software breakpoint */
427#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) 478#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
428 479
480/* MMU registers */
481#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
482#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
483#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
484#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
485#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
486#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
487#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
488/*
489 * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
490 * KVM_CAP_SW_TLB ioctl
491 */
492#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
493#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
494#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
495#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
496#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
497#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
498#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
499#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
500#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
501
429#endif /* __LINUX_KVM_POWERPC_H */ 502#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d87c90886c75..a791229329cf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -477,6 +477,7 @@ int main(void)
477 DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); 477 DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
478 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); 478 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
479 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 479 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
480 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
480#endif 481#endif
481#ifdef CONFIG_PPC_BOOK3S 482#ifdef CONFIG_PPC_BOOK3S
482 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 483 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -573,6 +574,8 @@ int main(void)
573 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); 574 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
574 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); 575 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
575 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); 576 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
577 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
578 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
576 HSTATE_FIELD(HSTATE_MMCR, host_mmcr); 579 HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
577 HSTATE_FIELD(HSTATE_PMC, host_pmc); 580 HSTATE_FIELD(HSTATE_PMC, host_pmc);
578 HSTATE_FIELD(HSTATE_PURR, host_purr); 581 HSTATE_FIELD(HSTATE_PURR, host_purr);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 3d7fd21c65f9..2f5c6b6d6877 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
124 return kvmppc_set_sregs_ivor(vcpu, sregs); 124 return kvmppc_set_sregs_ivor(vcpu, sregs);
125} 125}
126 126
127int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
128 union kvmppc_one_reg *val)
129{
130 return -EINVAL;
131}
132
133int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
134 union kvmppc_one_reg *val)
135{
136 return -EINVAL;
137}
138
127struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 139struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
128{ 140{
129 struct kvmppc_vcpu_44x *vcpu_44x; 141 struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec72e43..eb643f862579 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,21 +136,41 @@ config KVM_E500V2
136 If unsure, say N. 136 If unsure, say N.
137 137
138config KVM_E500MC 138config KVM_E500MC
139 bool "KVM support for PowerPC E500MC/E5500 processors" 139 bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
140 depends on PPC_E500MC 140 depends on PPC_E500MC
141 select KVM 141 select KVM
142 select KVM_MMIO 142 select KVM_MMIO
143 select KVM_BOOKE_HV 143 select KVM_BOOKE_HV
144 select MMU_NOTIFIER 144 select MMU_NOTIFIER
145 ---help--- 145 ---help---
146 Support running unmodified E500MC/E5500 (32-bit) guest kernels in 146 Support running unmodified E500MC/E5500/E6500 guest kernels in
147 virtual machines on E500MC/E5500 host processors. 147 virtual machines on E500MC/E5500/E6500 host processors.
148 148
149 This module provides access to the hardware capabilities through 149 This module provides access to the hardware capabilities through
150 a character device node named /dev/kvm. 150 a character device node named /dev/kvm.
151 151
152 If unsure, say N. 152 If unsure, say N.
153 153
154config KVM_MPIC
155 bool "KVM in-kernel MPIC emulation"
156 depends on KVM && E500
157 select HAVE_KVM_IRQCHIP
158 select HAVE_KVM_IRQ_ROUTING
159 select HAVE_KVM_MSI
160 help
161 Enable support for emulating MPIC devices inside the
162 host kernel, rather than relying on userspace to emulate.
163 Currently, support is limited to certain versions of
164 Freescale's MPIC implementation.
165
166config KVM_XICS
167 bool "KVM in-kernel XICS emulation"
168 depends on KVM_BOOK3S_64 && !KVM_MPIC
169 ---help---
170 Include support for the XICS (eXternal Interrupt Controller
171 Specification) interrupt controller architecture used on
172 IBM POWER (pSeries) servers.
173
154source drivers/vhost/Kconfig 174source drivers/vhost/Kconfig
155 175
156endif # VIRTUALIZATION 176endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b772eded8c26..422de3f4d46c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
72 book3s_hv.o \ 72 book3s_hv.o \
73 book3s_hv_interrupts.o \ 73 book3s_hv_interrupts.o \
74 book3s_64_mmu_hv.o 74 book3s_64_mmu_hv.o
75kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
76 book3s_hv_rm_xics.o
75kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 77kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
76 book3s_hv_rmhandlers.o \ 78 book3s_hv_rmhandlers.o \
77 book3s_hv_rm_mmu.o \ 79 book3s_hv_rm_mmu.o \
78 book3s_64_vio_hv.o \ 80 book3s_64_vio_hv.o \
79 book3s_hv_ras.o \ 81 book3s_hv_ras.o \
80 book3s_hv_builtin.o 82 book3s_hv_builtin.o \
83 $(kvm-book3s_64-builtin-xics-objs-y)
84
85kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
86 book3s_xics.o
81 87
82kvm-book3s_64-module-objs := \ 88kvm-book3s_64-module-objs := \
83 ../../../virt/kvm/kvm_main.o \ 89 ../../../virt/kvm/kvm_main.o \
@@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \
86 emulate.o \ 92 emulate.o \
87 book3s.o \ 93 book3s.o \
88 book3s_64_vio.o \ 94 book3s_64_vio.o \
95 book3s_rtas.o \
89 $(kvm-book3s_64-objs-y) 96 $(kvm-book3s_64-objs-y)
90 97
91kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) 98kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -103,6 +110,9 @@ kvm-book3s_32-objs := \
103 book3s_32_mmu.o 110 book3s_32_mmu.o
104kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) 111kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
105 112
113kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
114kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
115
106kvm-objs := $(kvm-objs-m) $(kvm-objs-y) 116kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
107 117
108obj-$(CONFIG_KVM_440) += kvm.o 118obj-$(CONFIG_KVM_440) += kvm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2d32ae4bc439..700df6f1d32c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
104 return prio; 104 return prio;
105} 105}
106 106
107static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, 107void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
108 unsigned int vec) 108 unsigned int vec)
109{ 109{
110 unsigned long old_pending = vcpu->arch.pending_exceptions; 110 unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -535,6 +535,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
535 &opcode, sizeof(u32)); 535 &opcode, sizeof(u32));
536 break; 536 break;
537 } 537 }
538#ifdef CONFIG_KVM_XICS
539 case KVM_REG_PPC_ICP_STATE:
540 if (!vcpu->arch.icp) {
541 r = -ENXIO;
542 break;
543 }
544 val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
545 break;
546#endif /* CONFIG_KVM_XICS */
538 default: 547 default:
539 r = -EINVAL; 548 r = -EINVAL;
540 break; 549 break;
@@ -597,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
597 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); 606 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
598 break; 607 break;
599#endif /* CONFIG_ALTIVEC */ 608#endif /* CONFIG_ALTIVEC */
609#ifdef CONFIG_KVM_XICS
610 case KVM_REG_PPC_ICP_STATE:
611 if (!vcpu->arch.icp) {
612 r = -ENXIO;
613 break;
614 }
615 r = kvmppc_xics_set_icp(vcpu,
616 set_reg_val(reg->id, val));
617 break;
618#endif /* CONFIG_KVM_XICS */
600 default: 619 default:
601 r = -EINVAL; 620 r = -EINVAL;
602 break; 621 break;
@@ -612,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
612 return 0; 631 return 0;
613} 632}
614 633
634int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
635 struct kvm_guest_debug *dbg)
636{
637 return -EINVAL;
638}
639
615void kvmppc_decrementer_func(unsigned long data) 640void kvmppc_decrementer_func(unsigned long data)
616{ 641{
617 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 642 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8cc18abd6dde..69efe0d6cedc 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
893 /* Harvest R and C */ 893 /* Harvest R and C */
894 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); 894 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
895 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 895 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
896 rev[i].guest_rpte = ptel | rcbits; 896 if (rcbits & ~rev[i].guest_rpte) {
897 rev[i].guest_rpte = ptel | rcbits;
898 note_hpte_modification(kvm, &rev[i]);
899 }
897 } 900 }
898 unlock_rmap(rmapp); 901 unlock_rmap(rmapp);
899 hptep[0] &= ~HPTE_V_HVLOCK; 902 hptep[0] &= ~HPTE_V_HVLOCK;
@@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
976 /* Now check and modify the HPTE */ 979 /* Now check and modify the HPTE */
977 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { 980 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
978 kvmppc_clear_ref_hpte(kvm, hptep, i); 981 kvmppc_clear_ref_hpte(kvm, hptep, i);
979 rev[i].guest_rpte |= HPTE_R_R; 982 if (!(rev[i].guest_rpte & HPTE_R_R)) {
983 rev[i].guest_rpte |= HPTE_R_R;
984 note_hpte_modification(kvm, &rev[i]);
985 }
980 ret = 1; 986 ret = 1;
981 } 987 }
982 hptep[0] &= ~HPTE_V_HVLOCK; 988 hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1080 hptep[1] &= ~HPTE_R_C; 1086 hptep[1] &= ~HPTE_R_C;
1081 eieio(); 1087 eieio();
1082 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 1088 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
1083 rev[i].guest_rpte |= HPTE_R_C; 1089 if (!(rev[i].guest_rpte & HPTE_R_C)) {
1090 rev[i].guest_rpte |= HPTE_R_C;
1091 note_hpte_modification(kvm, &rev[i]);
1092 }
1084 ret = 1; 1093 ret = 1;
1085 } 1094 }
1086 hptep[0] &= ~HPTE_V_HVLOCK; 1095 hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1090 return ret; 1099 return ret;
1091} 1100}
1092 1101
1102static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1103 struct kvm_memory_slot *memslot,
1104 unsigned long *map)
1105{
1106 unsigned long gfn;
1107
1108 if (!vpa->dirty || !vpa->pinned_addr)
1109 return;
1110 gfn = vpa->gpa >> PAGE_SHIFT;
1111 if (gfn < memslot->base_gfn ||
1112 gfn >= memslot->base_gfn + memslot->npages)
1113 return;
1114
1115 vpa->dirty = false;
1116 if (map)
1117 __set_bit_le(gfn - memslot->base_gfn, map);
1118}
1119
1093long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1120long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1094 unsigned long *map) 1121 unsigned long *map)
1095{ 1122{
1096 unsigned long i; 1123 unsigned long i;
1097 unsigned long *rmapp; 1124 unsigned long *rmapp;
1125 struct kvm_vcpu *vcpu;
1098 1126
1099 preempt_disable(); 1127 preempt_disable();
1100 rmapp = memslot->arch.rmap; 1128 rmapp = memslot->arch.rmap;
@@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1103 __set_bit_le(i, map); 1131 __set_bit_le(i, map);
1104 ++rmapp; 1132 ++rmapp;
1105 } 1133 }
1134
1135 /* Harvest dirty bits from VPA and DTL updates */
1136 /* Note: we never modify the SLB shadow buffer areas */
1137 kvm_for_each_vcpu(i, vcpu, kvm) {
1138 spin_lock(&vcpu->arch.vpa_update_lock);
1139 harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
1140 harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
1141 spin_unlock(&vcpu->arch.vpa_update_lock);
1142 }
1106 preempt_enable(); 1143 preempt_enable();
1107 return 0; 1144 return 0;
1108} 1145}
@@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1114 unsigned long gfn = gpa >> PAGE_SHIFT; 1151 unsigned long gfn = gpa >> PAGE_SHIFT;
1115 struct page *page, *pages[1]; 1152 struct page *page, *pages[1];
1116 int npages; 1153 int npages;
1117 unsigned long hva, psize, offset; 1154 unsigned long hva, offset;
1118 unsigned long pa; 1155 unsigned long pa;
1119 unsigned long *physp; 1156 unsigned long *physp;
1120 int srcu_idx; 1157 int srcu_idx;
@@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1146 } 1183 }
1147 srcu_read_unlock(&kvm->srcu, srcu_idx); 1184 srcu_read_unlock(&kvm->srcu, srcu_idx);
1148 1185
1149 psize = PAGE_SIZE; 1186 offset = gpa & (PAGE_SIZE - 1);
1150 if (PageHuge(page)) {
1151 page = compound_head(page);
1152 psize <<= compound_order(page);
1153 }
1154 offset = gpa & (psize - 1);
1155 if (nb_ret) 1187 if (nb_ret)
1156 *nb_ret = psize - offset; 1188 *nb_ret = PAGE_SIZE - offset;
1157 return page_address(page) + offset; 1189 return page_address(page) + offset;
1158 1190
1159 err: 1191 err:
@@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1161 return NULL; 1193 return NULL;
1162} 1194}
1163 1195
1164void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) 1196void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1197 bool dirty)
1165{ 1198{
1166 struct page *page = virt_to_page(va); 1199 struct page *page = virt_to_page(va);
1200 struct kvm_memory_slot *memslot;
1201 unsigned long gfn;
1202 unsigned long *rmap;
1203 int srcu_idx;
1167 1204
1168 put_page(page); 1205 put_page(page);
1206
1207 if (!dirty || !kvm->arch.using_mmu_notifiers)
1208 return;
1209
1210 /* We need to mark this page dirty in the rmap chain */
1211 gfn = gpa >> PAGE_SHIFT;
1212 srcu_idx = srcu_read_lock(&kvm->srcu);
1213 memslot = gfn_to_memslot(kvm, gfn);
1214 if (memslot) {
1215 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
1216 lock_rmap(rmap);
1217 *rmap |= KVMPPC_RMAP_CHANGED;
1218 unlock_rmap(rmap);
1219 }
1220 srcu_read_unlock(&kvm->srcu, srcu_idx);
1169} 1221}
1170 1222
1171/* 1223/*
@@ -1193,16 +1245,36 @@ struct kvm_htab_ctx {
1193 1245
1194#define HPTE_SIZE (2 * sizeof(unsigned long)) 1246#define HPTE_SIZE (2 * sizeof(unsigned long))
1195 1247
1248/*
1249 * Returns 1 if this HPT entry has been modified or has pending
1250 * R/C bit changes.
1251 */
1252static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
1253{
1254 unsigned long rcbits_unset;
1255
1256 if (revp->guest_rpte & HPTE_GR_MODIFIED)
1257 return 1;
1258
1259 /* Also need to consider changes in reference and changed bits */
1260 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1261 if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
1262 return 1;
1263
1264 return 0;
1265}
1266
1196static long record_hpte(unsigned long flags, unsigned long *hptp, 1267static long record_hpte(unsigned long flags, unsigned long *hptp,
1197 unsigned long *hpte, struct revmap_entry *revp, 1268 unsigned long *hpte, struct revmap_entry *revp,
1198 int want_valid, int first_pass) 1269 int want_valid, int first_pass)
1199{ 1270{
1200 unsigned long v, r; 1271 unsigned long v, r;
1272 unsigned long rcbits_unset;
1201 int ok = 1; 1273 int ok = 1;
1202 int valid, dirty; 1274 int valid, dirty;
1203 1275
1204 /* Unmodified entries are uninteresting except on the first pass */ 1276 /* Unmodified entries are uninteresting except on the first pass */
1205 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1277 dirty = hpte_dirty(revp, hptp);
1206 if (!first_pass && !dirty) 1278 if (!first_pass && !dirty)
1207 return 0; 1279 return 0;
1208 1280
@@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
1223 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) 1295 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1224 cpu_relax(); 1296 cpu_relax();
1225 v = hptp[0]; 1297 v = hptp[0];
1298
1299 /* re-evaluate valid and dirty from synchronized HPTE value */
1300 valid = !!(v & HPTE_V_VALID);
1301 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1302
1303 /* Harvest R and C into guest view if necessary */
1304 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1305 if (valid && (rcbits_unset & hptp[1])) {
1306 revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
1307 HPTE_GR_MODIFIED;
1308 dirty = 1;
1309 }
1310
1226 if (v & HPTE_V_ABSENT) { 1311 if (v & HPTE_V_ABSENT) {
1227 v &= ~HPTE_V_ABSENT; 1312 v &= ~HPTE_V_ABSENT;
1228 v |= HPTE_V_VALID; 1313 v |= HPTE_V_VALID;
1314 valid = 1;
1229 } 1315 }
1230 /* re-evaluate valid and dirty from synchronized HPTE value */
1231 valid = !!(v & HPTE_V_VALID);
1232 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) 1316 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
1233 valid = 0; 1317 valid = 0;
1234 r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); 1318
1235 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1319 r = revp->guest_rpte;
1236 /* only clear modified if this is the right sort of entry */ 1320 /* only clear modified if this is the right sort of entry */
1237 if (valid == want_valid && dirty) { 1321 if (valid == want_valid && dirty) {
1238 r &= ~HPTE_GR_MODIFIED; 1322 r &= ~HPTE_GR_MODIFIED;
@@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1288 /* Skip uninteresting entries, i.e. clean on not-first pass */ 1372 /* Skip uninteresting entries, i.e. clean on not-first pass */
1289 if (!first_pass) { 1373 if (!first_pass) {
1290 while (i < kvm->arch.hpt_npte && 1374 while (i < kvm->arch.hpt_npte &&
1291 !(revp->guest_rpte & HPTE_GR_MODIFIED)) { 1375 !hpte_dirty(revp, hptp)) {
1292 ++i; 1376 ++i;
1293 hptp += 2; 1377 hptp += 2;
1294 ++revp; 1378 ++revp;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 836c56975e21..1f6344c4408d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
194 run->papr_hcall.args[i] = gpr; 194 run->papr_hcall.args[i] = gpr;
195 } 195 }
196 196
197 emulated = EMULATE_DO_PAPR; 197 run->exit_reason = KVM_EXIT_PAPR_HCALL;
198 vcpu->arch.hcall_needed = 1;
199 emulated = EMULATE_EXIT_USER;
198 break; 200 break;
199 } 201 }
200#endif 202#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1e521baf9a7d..178521e81ce4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -66,6 +66,31 @@
66static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 66static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
67static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 67static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
68 68
69void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
70{
71 int me;
72 int cpu = vcpu->cpu;
73 wait_queue_head_t *wqp;
74
75 wqp = kvm_arch_vcpu_wq(vcpu);
76 if (waitqueue_active(wqp)) {
77 wake_up_interruptible(wqp);
78 ++vcpu->stat.halt_wakeup;
79 }
80
81 me = get_cpu();
82
83 /* CPU points to the first thread of the core */
84 if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
85 int real_cpu = cpu + vcpu->arch.ptid;
86 if (paca[real_cpu].kvm_hstate.xics_phys)
87 xics_wake_cpu(real_cpu);
88 else if (cpu_online(cpu))
89 smp_send_reschedule(cpu);
90 }
91 put_cpu();
92}
93
69/* 94/*
70 * We use the vcpu_load/put functions to measure stolen time. 95 * We use the vcpu_load/put functions to measure stolen time.
71 * Stolen time is counted as time when either the vcpu is able to 96 * Stolen time is counted as time when either the vcpu is able to
@@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
259 len = ((struct reg_vpa *)va)->length.hword; 284 len = ((struct reg_vpa *)va)->length.hword;
260 else 285 else
261 len = ((struct reg_vpa *)va)->length.word; 286 len = ((struct reg_vpa *)va)->length.word;
262 kvmppc_unpin_guest_page(kvm, va); 287 kvmppc_unpin_guest_page(kvm, va, vpa, false);
263 288
264 /* Check length */ 289 /* Check length */
265 if (len > nb || len < sizeof(struct reg_vpa)) 290 if (len > nb || len < sizeof(struct reg_vpa))
@@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
359 va = NULL; 384 va = NULL;
360 nb = 0; 385 nb = 0;
361 if (gpa) 386 if (gpa)
362 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); 387 va = kvmppc_pin_guest_page(kvm, gpa, &nb);
363 spin_lock(&vcpu->arch.vpa_update_lock); 388 spin_lock(&vcpu->arch.vpa_update_lock);
364 if (gpa == vpap->next_gpa) 389 if (gpa == vpap->next_gpa)
365 break; 390 break;
366 /* sigh... unpin that one and try again */ 391 /* sigh... unpin that one and try again */
367 if (va) 392 if (va)
368 kvmppc_unpin_guest_page(kvm, va); 393 kvmppc_unpin_guest_page(kvm, va, gpa, false);
369 } 394 }
370 395
371 vpap->update_pending = 0; 396 vpap->update_pending = 0;
@@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
375 * has changed the mappings underlying guest memory, 400 * has changed the mappings underlying guest memory,
376 * so unregister the region. 401 * so unregister the region.
377 */ 402 */
378 kvmppc_unpin_guest_page(kvm, va); 403 kvmppc_unpin_guest_page(kvm, va, gpa, false);
379 va = NULL; 404 va = NULL;
380 } 405 }
381 if (vpap->pinned_addr) 406 if (vpap->pinned_addr)
382 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); 407 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
408 vpap->dirty);
409 vpap->gpa = gpa;
383 vpap->pinned_addr = va; 410 vpap->pinned_addr = va;
411 vpap->dirty = false;
384 if (va) 412 if (va)
385 vpap->pinned_end = va + vpap->len; 413 vpap->pinned_end = va + vpap->len;
386} 414}
@@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
472 /* order writing *dt vs. writing vpa->dtl_idx */ 500 /* order writing *dt vs. writing vpa->dtl_idx */
473 smp_wmb(); 501 smp_wmb();
474 vpa->dtl_idx = ++vcpu->arch.dtl_index; 502 vpa->dtl_idx = ++vcpu->arch.dtl_index;
503 vcpu->arch.dtl.dirty = true;
475} 504}
476 505
477int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 506int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
479 unsigned long req = kvmppc_get_gpr(vcpu, 3); 508 unsigned long req = kvmppc_get_gpr(vcpu, 3);
480 unsigned long target, ret = H_SUCCESS; 509 unsigned long target, ret = H_SUCCESS;
481 struct kvm_vcpu *tvcpu; 510 struct kvm_vcpu *tvcpu;
482 int idx; 511 int idx, rc;
483 512
484 switch (req) { 513 switch (req) {
485 case H_ENTER: 514 case H_ENTER:
@@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
515 kvmppc_get_gpr(vcpu, 5), 544 kvmppc_get_gpr(vcpu, 5),
516 kvmppc_get_gpr(vcpu, 6)); 545 kvmppc_get_gpr(vcpu, 6));
517 break; 546 break;
547 case H_RTAS:
548 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
549 return RESUME_HOST;
550
551 rc = kvmppc_rtas_hcall(vcpu);
552
553 if (rc == -ENOENT)
554 return RESUME_HOST;
555 else if (rc == 0)
556 break;
557
558 /* Send the error out to userspace via KVM_RUN */
559 return rc;
560
561 case H_XIRR:
562 case H_CPPR:
563 case H_EOI:
564 case H_IPI:
565 if (kvmppc_xics_enabled(vcpu)) {
566 ret = kvmppc_xics_hcall(vcpu, req);
567 break;
568 } /* fallthrough */
518 default: 569 default:
519 return RESUME_HOST; 570 return RESUME_HOST;
520 } 571 }
@@ -913,15 +964,19 @@ out:
913 return ERR_PTR(err); 964 return ERR_PTR(err);
914} 965}
915 966
967static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
968{
969 if (vpa->pinned_addr)
970 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
971 vpa->dirty);
972}
973
916void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 974void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
917{ 975{
918 spin_lock(&vcpu->arch.vpa_update_lock); 976 spin_lock(&vcpu->arch.vpa_update_lock);
919 if (vcpu->arch.dtl.pinned_addr) 977 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
920 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); 978 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
921 if (vcpu->arch.slb_shadow.pinned_addr) 979 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
922 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
923 if (vcpu->arch.vpa.pinned_addr)
924 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
925 spin_unlock(&vcpu->arch.vpa_update_lock); 980 spin_unlock(&vcpu->arch.vpa_update_lock);
926 kvm_vcpu_uninit(vcpu); 981 kvm_vcpu_uninit(vcpu);
927 kmem_cache_free(kvm_vcpu_cache, vcpu); 982 kmem_cache_free(kvm_vcpu_cache, vcpu);
@@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
955} 1010}
956 1011
957extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 1012extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
958extern void xics_wake_cpu(int cpu);
959 1013
960static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 1014static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
961 struct kvm_vcpu *vcpu) 1015 struct kvm_vcpu *vcpu)
@@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1330 break; 1384 break;
1331 vc->runner = vcpu; 1385 vc->runner = vcpu;
1332 n_ceded = 0; 1386 n_ceded = 0;
1333 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) 1387 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
1334 if (!v->arch.pending_exceptions) 1388 if (!v->arch.pending_exceptions)
1335 n_ceded += v->arch.ceded; 1389 n_ceded += v->arch.ceded;
1390 else
1391 v->arch.ceded = 0;
1392 }
1336 if (n_ceded == vc->n_runnable) 1393 if (n_ceded == vc->n_runnable)
1337 kvmppc_vcore_blocked(vc); 1394 kvmppc_vcore_blocked(vc);
1338 else 1395 else
@@ -1821,6 +1878,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1821 cpumask_setall(&kvm->arch.need_tlb_flush); 1878 cpumask_setall(&kvm->arch.need_tlb_flush);
1822 1879
1823 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1880 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1881 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
1824 1882
1825 kvm->arch.rma = NULL; 1883 kvm->arch.rma = NULL;
1826 1884
@@ -1866,6 +1924,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
1866 kvm->arch.rma = NULL; 1924 kvm->arch.rma = NULL;
1867 } 1925 }
1868 1926
1927 kvmppc_rtas_tokens_free(kvm);
1928
1869 kvmppc_free_hpt(kvm); 1929 kvmppc_free_hpt(kvm);
1870 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1930 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
1871} 1931}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 19c93bae1aea..6dcbb49105a4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
97} 97}
98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
99 99
100/*
101 * Note modification of an HPTE; set the HPTE modified bit
102 * if anyone is interested.
103 */
104static inline void note_hpte_modification(struct kvm *kvm,
105 struct revmap_entry *rev)
106{
107 if (atomic_read(&kvm->arch.hpte_mod_interest))
108 rev->guest_rpte |= HPTE_GR_MODIFIED;
109}
110
111/* Remove this HPTE from the chain for a real page */ 100/* Remove this HPTE from the chain for a real page */
112static void remove_revmap_chain(struct kvm *kvm, long pte_index, 101static void remove_revmap_chain(struct kvm *kvm, long pte_index,
113 struct revmap_entry *rev, 102 struct revmap_entry *rev,
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 000000000000..b4b0082f761c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,406 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/kvm_host.h>
12#include <linux/err.h>
13
14#include <asm/kvm_book3s.h>
15#include <asm/kvm_ppc.h>
16#include <asm/hvcall.h>
17#include <asm/xics.h>
18#include <asm/debug.h>
19#include <asm/synch.h>
20#include <asm/ppc-opcode.h>
21
22#include "book3s_xics.h"
23
24#define DEBUG_PASSUP
25
26static inline void rm_writeb(unsigned long paddr, u8 val)
27{
28 __asm__ __volatile__("sync; stbcix %0,0,%1"
29 : : "r" (val), "r" (paddr) : "memory");
30}
31
32static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
33 struct kvm_vcpu *this_vcpu)
34{
35 struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
36 unsigned long xics_phys;
37 int cpu;
38
39 /* Mark the target VCPU as having an interrupt pending */
40 vcpu->stat.queue_intr++;
41 set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
42
43 /* Kick self ? Just set MER and return */
44 if (vcpu == this_vcpu) {
45 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
46 return;
47 }
48
49 /* Check if the core is loaded, if not, too hard */
50 cpu = vcpu->cpu;
51 if (cpu < 0 || cpu >= nr_cpu_ids) {
52 this_icp->rm_action |= XICS_RM_KICK_VCPU;
53 this_icp->rm_kick_target = vcpu;
54 return;
55 }
56 /* In SMT cpu will always point to thread 0, we adjust it */
57 cpu += vcpu->arch.ptid;
58
59 /* Not too hard, then poke the target */
60 xics_phys = paca[cpu].kvm_hstate.xics_phys;
61 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
62}
63
64static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
65{
66 /* Note: Only called on self ! */
67 clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
68 &vcpu->arch.pending_exceptions);
69 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
70}
71
72static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
73 union kvmppc_icp_state old,
74 union kvmppc_icp_state new)
75{
76 struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
77 bool success;
78
79 /* Calculate new output value */
80 new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
81
82 /* Attempt atomic update */
83 success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
84 if (!success)
85 goto bail;
86
87 /*
88 * Check for output state update
89 *
90 * Note that this is racy since another processor could be updating
91 * the state already. This is why we never clear the interrupt output
92 * here, we only ever set it. The clear only happens prior to doing
93 * an update and only by the processor itself. Currently we do it
94 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
95 *
96 * We also do not try to figure out whether the EE state has changed,
97 * we unconditionally set it if the new state calls for it. The reason
98 * for that is that we opportunistically remove the pending interrupt
99 * flag when raising CPPR, so we need to set it back here if an
100 * interrupt is still pending.
101 */
102 if (new.out_ee)
103 icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
104
105 /* Expose the state change for debug purposes */
106 this_vcpu->arch.icp->rm_dbgstate = new;
107 this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
108
109 bail:
110 return success;
111}
112
113static inline int check_too_hard(struct kvmppc_xics *xics,
114 struct kvmppc_icp *icp)
115{
116 return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
117}
118
119static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
120 u8 new_cppr)
121{
122 union kvmppc_icp_state old_state, new_state;
123 bool resend;
124
125 /*
126 * This handles several related states in one operation:
127 *
128 * ICP State: Down_CPPR
129 *
130 * Load CPPR with new value and if the XISR is 0
131 * then check for resends:
132 *
133 * ICP State: Resend
134 *
135 * If MFRR is more favored than CPPR, check for IPIs
136 * and notify ICS of a potential resend. This is done
137 * asynchronously (when used in real mode, we will have
138 * to exit here).
139 *
140 * We do not handle the complete Check_IPI as documented
141 * here. In the PAPR, this state will be used for both
142 * Set_MFRR and Down_CPPR. However, we know that we aren't
143 * changing the MFRR state here so we don't need to handle
144 * the case of an MFRR causing a reject of a pending irq,
145 * this will have been handled when the MFRR was set in the
146 * first place.
147 *
148 * Thus we don't have to handle rejects, only resends.
149 *
150 * When implementing real mode for HV KVM, resend will lead to
151 * a H_TOO_HARD return and the whole transaction will be handled
152 * in virtual mode.
153 */
154 do {
155 old_state = new_state = ACCESS_ONCE(icp->state);
156
157 /* Down_CPPR */
158 new_state.cppr = new_cppr;
159
160 /*
161 * Cut down Resend / Check_IPI / IPI
162 *
163 * The logic is that we cannot have a pending interrupt
164 * trumped by an IPI at this point (see above), so we
165 * know that either the pending interrupt is already an
166 * IPI (in which case we don't care to override it) or
167 * it's either more favored than us or non existent
168 */
169 if (new_state.mfrr < new_cppr &&
170 new_state.mfrr <= new_state.pending_pri) {
171 new_state.pending_pri = new_state.mfrr;
172 new_state.xisr = XICS_IPI;
173 }
174
175 /* Latch/clear resend bit */
176 resend = new_state.need_resend;
177 new_state.need_resend = 0;
178
179 } while (!icp_rm_try_update(icp, old_state, new_state));
180
181 /*
182 * Now handle resend checks. Those are asynchronous to the ICP
183 * state update in HW (ie bus transactions) so we can handle them
184 * separately here as well.
185 */
186 if (resend)
187 icp->rm_action |= XICS_RM_CHECK_RESEND;
188}
189
190
191unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
192{
193 union kvmppc_icp_state old_state, new_state;
194 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
195 struct kvmppc_icp *icp = vcpu->arch.icp;
196 u32 xirr;
197
198 if (!xics || !xics->real_mode)
199 return H_TOO_HARD;
200
201 /* First clear the interrupt */
202 icp_rm_clr_vcpu_irq(icp->vcpu);
203
204 /*
205 * ICP State: Accept_Interrupt
206 *
207 * Return the pending interrupt (if any) along with the
208 * current CPPR, then clear the XISR & set CPPR to the
209 * pending priority
210 */
211 do {
212 old_state = new_state = ACCESS_ONCE(icp->state);
213
214 xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
215 if (!old_state.xisr)
216 break;
217 new_state.cppr = new_state.pending_pri;
218 new_state.pending_pri = 0xff;
219 new_state.xisr = 0;
220
221 } while (!icp_rm_try_update(icp, old_state, new_state));
222
223 /* Return the result in GPR4 */
224 vcpu->arch.gpr[4] = xirr;
225
226 return check_too_hard(xics, icp);
227}
228
229int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
230 unsigned long mfrr)
231{
232 union kvmppc_icp_state old_state, new_state;
233 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
234 struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
235 u32 reject;
236 bool resend;
237 bool local;
238
239 if (!xics || !xics->real_mode)
240 return H_TOO_HARD;
241
242 local = this_icp->server_num == server;
243 if (local)
244 icp = this_icp;
245 else
246 icp = kvmppc_xics_find_server(vcpu->kvm, server);
247 if (!icp)
248 return H_PARAMETER;
249
250 /*
251 * ICP state: Set_MFRR
252 *
253 * If the CPPR is more favored than the new MFRR, then
254 * nothing needs to be done as there can be no XISR to
255 * reject.
256 *
257 * If the CPPR is less favored, then we might be replacing
258 * an interrupt, and thus need to possibly reject it as in
259 *
260 * ICP state: Check_IPI
261 */
262 do {
263 old_state = new_state = ACCESS_ONCE(icp->state);
264
265 /* Set_MFRR */
266 new_state.mfrr = mfrr;
267
268 /* Check_IPI */
269 reject = 0;
270 resend = false;
271 if (mfrr < new_state.cppr) {
272 /* Reject a pending interrupt if not an IPI */
273 if (mfrr <= new_state.pending_pri)
274 reject = new_state.xisr;
275 new_state.pending_pri = mfrr;
276 new_state.xisr = XICS_IPI;
277 }
278
279 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
280 resend = new_state.need_resend;
281 new_state.need_resend = 0;
282 }
283 } while (!icp_rm_try_update(icp, old_state, new_state));
284
285 /* Pass rejects to virtual mode */
286 if (reject && reject != XICS_IPI) {
287 this_icp->rm_action |= XICS_RM_REJECT;
288 this_icp->rm_reject = reject;
289 }
290
291 /* Pass resends to virtual mode */
292 if (resend)
293 this_icp->rm_action |= XICS_RM_CHECK_RESEND;
294
295 return check_too_hard(xics, this_icp);
296}
297
298int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
299{
300 union kvmppc_icp_state old_state, new_state;
301 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
302 struct kvmppc_icp *icp = vcpu->arch.icp;
303 u32 reject;
304
305 if (!xics || !xics->real_mode)
306 return H_TOO_HARD;
307
308 /*
309 * ICP State: Set_CPPR
310 *
311 * We can safely compare the new value with the current
312 * value outside of the transaction as the CPPR is only
313 * ever changed by the processor on itself
314 */
315 if (cppr > icp->state.cppr) {
316 icp_rm_down_cppr(xics, icp, cppr);
317 goto bail;
318 } else if (cppr == icp->state.cppr)
319 return H_SUCCESS;
320
321 /*
322 * ICP State: Up_CPPR
323 *
324 * The processor is raising its priority, this can result
325 * in a rejection of a pending interrupt:
326 *
327 * ICP State: Reject_Current
328 *
329 * We can remove EE from the current processor, the update
330 * transaction will set it again if needed
331 */
332 icp_rm_clr_vcpu_irq(icp->vcpu);
333
334 do {
335 old_state = new_state = ACCESS_ONCE(icp->state);
336
337 reject = 0;
338 new_state.cppr = cppr;
339
340 if (cppr <= new_state.pending_pri) {
341 reject = new_state.xisr;
342 new_state.xisr = 0;
343 new_state.pending_pri = 0xff;
344 }
345
346 } while (!icp_rm_try_update(icp, old_state, new_state));
347
348 /* Pass rejects to virtual mode */
349 if (reject && reject != XICS_IPI) {
350 icp->rm_action |= XICS_RM_REJECT;
351 icp->rm_reject = reject;
352 }
353 bail:
354 return check_too_hard(xics, icp);
355}
356
357int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
358{
359 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
360 struct kvmppc_icp *icp = vcpu->arch.icp;
361 struct kvmppc_ics *ics;
362 struct ics_irq_state *state;
363 u32 irq = xirr & 0x00ffffff;
364 u16 src;
365
366 if (!xics || !xics->real_mode)
367 return H_TOO_HARD;
368
369 /*
370 * ICP State: EOI
371 *
372 * Note: If EOI is incorrectly used by SW to lower the CPPR
373 * value (ie more favored), we do not check for rejection of
374 * a pending interrupt, this is a SW error and PAPR sepcifies
375 * that we don't have to deal with it.
376 *
377 * The sending of an EOI to the ICS is handled after the
378 * CPPR update
379 *
380 * ICP State: Down_CPPR which we handle
381 * in a separate function as it's shared with H_CPPR.
382 */
383 icp_rm_down_cppr(xics, icp, xirr >> 24);
384
385 /* IPIs have no EOI */
386 if (irq == XICS_IPI)
387 goto bail;
388 /*
389 * EOI handling: If the interrupt is still asserted, we need to
390 * resend it. We can take a lockless "peek" at the ICS state here.
391 *
392 * "Message" interrupts will never have "asserted" set
393 */
394 ics = kvmppc_xics_find_ics(xics, irq, &src);
395 if (!ics)
396 goto bail;
397 state = &ics->irq_state[src];
398
399 /* Still asserted, resend it, we make it look like a reject */
400 if (state->asserted) {
401 icp->rm_action |= XICS_RM_REJECT;
402 icp->rm_reject = irq;
403 }
404 bail:
405 return check_too_hard(xics, icp);
406}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e33d11f1b977..b02f91e4c70d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
79 * * 79 * *
80 *****************************************************************************/ 80 *****************************************************************************/
81 81
82#define XICS_XIRR 4
83#define XICS_QIRR 0xc
84#define XICS_IPI 2 /* interrupt source # for IPIs */
85
86/* 82/*
87 * We come in here when wakened from nap mode on a secondary hw thread. 83 * We come in here when wakened from nap mode on a secondary hw thread.
88 * Relocation is off and most register values are lost. 84 * Relocation is off and most register values are lost.
@@ -101,50 +97,51 @@ kvm_start_guest:
101 li r0,1 97 li r0,1
102 stb r0,PACA_NAPSTATELOST(r13) 98 stb r0,PACA_NAPSTATELOST(r13)
103 99
104 /* get vcpu pointer, NULL if we have no vcpu to run */ 100 /* were we napping due to cede? */
105 ld r4,HSTATE_KVM_VCPU(r13) 101 lbz r0,HSTATE_NAPPING(r13)
106 cmpdi cr1,r4,0 102 cmpwi r0,0
103 bne kvm_end_cede
104
105 /*
106 * We weren't napping due to cede, so this must be a secondary
107 * thread being woken up to run a guest, or being woken up due
108 * to a stray IPI. (Or due to some machine check or hypervisor
109 * maintenance interrupt while the core is in KVM.)
110 */
107 111
108 /* Check the wake reason in SRR1 to see why we got here */ 112 /* Check the wake reason in SRR1 to see why we got here */
109 mfspr r3,SPRN_SRR1 113 mfspr r3,SPRN_SRR1
110 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ 114 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
111 cmpwi r3,4 /* was it an external interrupt? */ 115 cmpwi r3,4 /* was it an external interrupt? */
112 bne 27f 116 bne 27f /* if not */
113 117 ld r5,HSTATE_XICS_PHYS(r13)
114 /* 118 li r7,XICS_XIRR /* if it was an external interrupt, */
115 * External interrupt - for now assume it is an IPI, since we
116 * should never get any other interrupts sent to offline threads.
117 * Only do this for secondary threads.
118 */
119 beq cr1,25f
120 lwz r3,VCPU_PTID(r4)
121 cmpwi r3,0
122 beq 27f
12325: ld r5,HSTATE_XICS_PHYS(r13)
124 li r0,0xff
125 li r6,XICS_QIRR
126 li r7,XICS_XIRR
127 lwzcix r8,r5,r7 /* get and ack the interrupt */ 119 lwzcix r8,r5,r7 /* get and ack the interrupt */
128 sync 120 sync
129 clrldi. r9,r8,40 /* get interrupt source ID. */ 121 clrldi. r9,r8,40 /* get interrupt source ID. */
130 beq 27f /* none there? */ 122 beq 28f /* none there? */
131 cmpwi r9,XICS_IPI 123 cmpwi r9,XICS_IPI /* was it an IPI? */
132 bne 26f 124 bne 29f
125 li r0,0xff
126 li r6,XICS_MFRR
133 stbcix r0,r5,r6 /* clear IPI */ 127 stbcix r0,r5,r6 /* clear IPI */
13426: stwcix r8,r5,r7 /* EOI the interrupt */ 128 stwcix r8,r5,r7 /* EOI the interrupt */
135 129 sync /* order loading of vcpu after that */
13627: /* XXX should handle hypervisor maintenance interrupts etc. here */
137 130
138 /* reload vcpu pointer after clearing the IPI */ 131 /* get vcpu pointer, NULL if we have no vcpu to run */
139 ld r4,HSTATE_KVM_VCPU(r13) 132 ld r4,HSTATE_KVM_VCPU(r13)
140 cmpdi r4,0 133 cmpdi r4,0
141 /* if we have no vcpu to run, go back to sleep */ 134 /* if we have no vcpu to run, go back to sleep */
142 beq kvm_no_guest 135 beq kvm_no_guest
136 b kvmppc_hv_entry
143 137
144 /* were we napping due to cede? */ 13827: /* XXX should handle hypervisor maintenance interrupts etc. here */
145 lbz r0,HSTATE_NAPPING(r13) 139 b kvm_no_guest
146 cmpwi r0,0 14028: /* SRR1 said external but ICP said nope?? */
147 bne kvm_end_cede 141 b kvm_no_guest
14229: /* External non-IPI interrupt to offline secondary thread? help?? */
143 stw r8,HSTATE_SAVED_XIRR(r13)
144 b kvm_no_guest
148 145
149.global kvmppc_hv_entry 146.global kvmppc_hv_entry
150kvmppc_hv_entry: 147kvmppc_hv_entry:
@@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
260 lwz r5, LPPACA_YIELDCOUNT(r3) 257 lwz r5, LPPACA_YIELDCOUNT(r3)
261 addi r5, r5, 1 258 addi r5, r5, 1
262 stw r5, LPPACA_YIELDCOUNT(r3) 259 stw r5, LPPACA_YIELDCOUNT(r3)
260 li r6, 1
261 stb r6, VCPU_VPA_DIRTY(r4)
26325: 26225:
264 /* Load up DAR and DSISR */ 263 /* Load up DAR and DSISR */
265 ld r5, VCPU_DAR(r4) 264 ld r5, VCPU_DAR(r4)
@@ -485,20 +484,20 @@ toc_tlbie_lock:
485 mtctr r6 484 mtctr r6
486 mtxer r7 485 mtxer r7
487 486
487 ld r10, VCPU_PC(r4)
488 ld r11, VCPU_MSR(r4)
488kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ 489kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
489 ld r6, VCPU_SRR0(r4) 490 ld r6, VCPU_SRR0(r4)
490 ld r7, VCPU_SRR1(r4) 491 ld r7, VCPU_SRR1(r4)
491 ld r10, VCPU_PC(r4)
492 ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
493 492
493 /* r11 = vcpu->arch.msr & ~MSR_HV */
494 rldicl r11, r11, 63 - MSR_HV_LG, 1 494 rldicl r11, r11, 63 - MSR_HV_LG, 1
495 rotldi r11, r11, 1 + MSR_HV_LG 495 rotldi r11, r11, 1 + MSR_HV_LG
496 ori r11, r11, MSR_ME 496 ori r11, r11, MSR_ME
497 497
498 /* Check if we can deliver an external or decrementer interrupt now */ 498 /* Check if we can deliver an external or decrementer interrupt now */
499 ld r0,VCPU_PENDING_EXC(r4) 499 ld r0,VCPU_PENDING_EXC(r4)
500 li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) 500 lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
501 oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
502 and r0,r0,r8 501 and r0,r0,r8
503 cmpdi cr1,r0,0 502 cmpdi cr1,r0,0
504 andi. r0,r11,MSR_EE 503 andi. r0,r11,MSR_EE
@@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
526 /* Move SRR0 and SRR1 into the respective regs */ 525 /* Move SRR0 and SRR1 into the respective regs */
5275: mtspr SPRN_SRR0, r6 5265: mtspr SPRN_SRR0, r6
528 mtspr SPRN_SRR1, r7 527 mtspr SPRN_SRR1, r7
529 li r0,0
530 stb r0,VCPU_CEDED(r4) /* cancel cede */
531 528
532fast_guest_return: 529fast_guest_return:
530 li r0,0
531 stb r0,VCPU_CEDED(r4) /* cancel cede */
533 mtspr SPRN_HSRR0,r10 532 mtspr SPRN_HSRR0,r10
534 mtspr SPRN_HSRR1,r11 533 mtspr SPRN_HSRR1,r11
535 534
@@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
676 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL 675 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
677 beq hcall_try_real_mode 676 beq hcall_try_real_mode
678 677
679 /* Check for mediated interrupts (could be done earlier really ...) */ 678 /* Only handle external interrupts here on arch 206 and later */
680BEGIN_FTR_SECTION 679BEGIN_FTR_SECTION
681 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL 680 b ext_interrupt_to_host
682 bne+ 1f 681END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
683 andi. r0,r11,MSR_EE 682
684 beq 1f 683 /* External interrupt ? */
685 mfspr r5,SPRN_LPCR 684 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
686 andi. r0,r5,LPCR_MER 685 bne+ ext_interrupt_to_host
687 bne bounce_ext_interrupt 686
6881: 687 /* External interrupt, first check for host_ipi. If this is
689END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 688 * set, we know the host wants us out so let's do it now
689 */
690do_ext_interrupt:
691 lbz r0, HSTATE_HOST_IPI(r13)
692 cmpwi r0, 0
693 bne ext_interrupt_to_host
694
695 /* Now read the interrupt from the ICP */
696 ld r5, HSTATE_XICS_PHYS(r13)
697 li r7, XICS_XIRR
698 cmpdi r5, 0
699 beq- ext_interrupt_to_host
700 lwzcix r3, r5, r7
701 rlwinm. r0, r3, 0, 0xffffff
702 sync
703 beq 3f /* if nothing pending in the ICP */
704
705 /* We found something in the ICP...
706 *
707 * If it's not an IPI, stash it in the PACA and return to
708 * the host, we don't (yet) handle directing real external
709 * interrupts directly to the guest
710 */
711 cmpwi r0, XICS_IPI
712 bne ext_stash_for_host
713
714 /* It's an IPI, clear the MFRR and EOI it */
715 li r0, 0xff
716 li r6, XICS_MFRR
717 stbcix r0, r5, r6 /* clear the IPI */
718 stwcix r3, r5, r7 /* EOI it */
719 sync
720
721 /* We need to re-check host IPI now in case it got set in the
722 * meantime. If it's clear, we bounce the interrupt to the
723 * guest
724 */
725 lbz r0, HSTATE_HOST_IPI(r13)
726 cmpwi r0, 0
727 bne- 1f
728
729 /* Allright, looks like an IPI for the guest, we need to set MER */
7303:
731 /* Check if any CPU is heading out to the host, if so head out too */
732 ld r5, HSTATE_KVM_VCORE(r13)
733 lwz r0, VCORE_ENTRY_EXIT(r5)
734 cmpwi r0, 0x100
735 bge ext_interrupt_to_host
736
737 /* See if there is a pending interrupt for the guest */
738 mfspr r8, SPRN_LPCR
739 ld r0, VCPU_PENDING_EXC(r9)
740 /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
741 rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
742 rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
743 beq 2f
744
745 /* And if the guest EE is set, we can deliver immediately, else
746 * we return to the guest with MER set
747 */
748 andi. r0, r11, MSR_EE
749 beq 2f
750 mtspr SPRN_SRR0, r10
751 mtspr SPRN_SRR1, r11
752 li r10, BOOK3S_INTERRUPT_EXTERNAL
753 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
754 rotldi r11, r11, 63
7552: mr r4, r9
756 mtspr SPRN_LPCR, r8
757 b fast_guest_return
758
759 /* We raced with the host, we need to resend that IPI, bummer */
7601: li r0, IPI_PRIORITY
761 stbcix r0, r5, r6 /* set the IPI */
762 sync
763 b ext_interrupt_to_host
764
765ext_stash_for_host:
766 /* It's not an IPI and it's for the host, stash it in the PACA
767 * before exit, it will be picked up by the host ICP driver
768 */
769 stw r3, HSTATE_SAVED_XIRR(r13)
770ext_interrupt_to_host:
690 771
691guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 772guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
692 /* Save DEC */ 773 /* Save DEC */
@@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
829 beq 44f 910 beq 44f
830 ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ 911 ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
831 li r0,IPI_PRIORITY 912 li r0,IPI_PRIORITY
832 li r7,XICS_QIRR 913 li r7,XICS_MFRR
833 stbcix r0,r7,r8 /* trigger the IPI */ 914 stbcix r0,r7,r8 /* trigger the IPI */
83444: srdi. r3,r3,1 91544: srdi. r3,r3,1
835 addi r6,r6,PACA_SIZE 916 addi r6,r6,PACA_SIZE
@@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1018 lwz r3, LPPACA_YIELDCOUNT(r8) 1099 lwz r3, LPPACA_YIELDCOUNT(r8)
1019 addi r3, r3, 1 1100 addi r3, r3, 1
1020 stw r3, LPPACA_YIELDCOUNT(r8) 1101 stw r3, LPPACA_YIELDCOUNT(r8)
1102 li r3, 1
1103 stb r3, VCPU_VPA_DIRTY(r9)
102125: 110425:
1022 /* Save PMU registers if requested */ 1105 /* Save PMU registers if requested */
1023 /* r8 and cr0.eq are live here */ 1106 /* r8 and cr0.eq are live here */
@@ -1350,11 +1433,19 @@ hcall_real_table:
1350 .long 0 /* 0x58 */ 1433 .long 0 /* 0x58 */
1351 .long 0 /* 0x5c */ 1434 .long 0 /* 0x5c */
1352 .long 0 /* 0x60 */ 1435 .long 0 /* 0x60 */
1353 .long 0 /* 0x64 */ 1436#ifdef CONFIG_KVM_XICS
1354 .long 0 /* 0x68 */ 1437 .long .kvmppc_rm_h_eoi - hcall_real_table
1355 .long 0 /* 0x6c */ 1438 .long .kvmppc_rm_h_cppr - hcall_real_table
1356 .long 0 /* 0x70 */ 1439 .long .kvmppc_rm_h_ipi - hcall_real_table
1357 .long 0 /* 0x74 */ 1440 .long 0 /* 0x70 - H_IPOLL */
1441 .long .kvmppc_rm_h_xirr - hcall_real_table
1442#else
1443 .long 0 /* 0x64 - H_EOI */
1444 .long 0 /* 0x68 - H_CPPR */
1445 .long 0 /* 0x6c - H_IPI */
1446 .long 0 /* 0x70 - H_IPOLL */
1447 .long 0 /* 0x74 - H_XIRR */
1448#endif
1358 .long 0 /* 0x78 */ 1449 .long 0 /* 0x78 */
1359 .long 0 /* 0x7c */ 1450 .long 0 /* 0x7c */
1360 .long 0 /* 0x80 */ 1451 .long 0 /* 0x80 */
@@ -1405,15 +1496,6 @@ ignore_hdec:
1405 mr r4,r9 1496 mr r4,r9
1406 b fast_guest_return 1497 b fast_guest_return
1407 1498
1408bounce_ext_interrupt:
1409 mr r4,r9
1410 mtspr SPRN_SRR0,r10
1411 mtspr SPRN_SRR1,r11
1412 li r10,BOOK3S_INTERRUPT_EXTERNAL
1413 li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1414 rotldi r11,r11,63
1415 b fast_guest_return
1416
1417_GLOBAL(kvmppc_h_set_dabr) 1499_GLOBAL(kvmppc_h_set_dabr)
1418 std r4,VCPU_DABR(r3) 1500 std r4,VCPU_DABR(r3)
1419 /* Work around P7 bug where DABR can get corrupted on mtspr */ 1501 /* Work around P7 bug where DABR can get corrupted on mtspr */
@@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1519 b . 1601 b .
1520 1602
1521kvm_end_cede: 1603kvm_end_cede:
1604 /* get vcpu pointer */
1605 ld r4, HSTATE_KVM_VCPU(r13)
1606
1522 /* Woken by external or decrementer interrupt */ 1607 /* Woken by external or decrementer interrupt */
1523 ld r1, HSTATE_HOST_R1(r13) 1608 ld r1, HSTATE_HOST_R1(r13)
1524 1609
@@ -1558,6 +1643,16 @@ kvm_end_cede:
1558 li r0,0 1643 li r0,0
1559 stb r0,HSTATE_NAPPING(r13) 1644 stb r0,HSTATE_NAPPING(r13)
1560 1645
1646 /* Check the wake reason in SRR1 to see why we got here */
1647 mfspr r3, SPRN_SRR1
1648 rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */
1649 cmpwi r3, 4 /* was it an external interrupt? */
1650 li r12, BOOK3S_INTERRUPT_EXTERNAL
1651 mr r9, r4
1652 ld r10, VCPU_PC(r9)
1653 ld r11, VCPU_MSR(r9)
1654 beq do_ext_interrupt /* if so */
1655
1561 /* see if any other thread is already exiting */ 1656 /* see if any other thread is already exiting */
1562 lwz r0,VCORE_ENTRY_EXIT(r5) 1657 lwz r0,VCORE_ENTRY_EXIT(r5)
1563 cmpwi r0,0x100 1658 cmpwi r0,0x100
@@ -1577,8 +1672,7 @@ kvm_cede_prodded:
1577 1672
1578 /* we've ceded but we want to give control to the host */ 1673 /* we've ceded but we want to give control to the host */
1579kvm_cede_exit: 1674kvm_cede_exit:
1580 li r3,H_TOO_HARD 1675 b hcall_real_fallback
1581 blr
1582 1676
1583 /* Try to handle a machine check in real mode */ 1677 /* Try to handle a machine check in real mode */
1584machine_check_realmode: 1678machine_check_realmode:
@@ -1626,7 +1720,7 @@ secondary_nap:
1626 beq 37f 1720 beq 37f
1627 sync 1721 sync
1628 li r0, 0xff 1722 li r0, 0xff
1629 li r6, XICS_QIRR 1723 li r6, XICS_MFRR
1630 stbcix r0, r5, r6 /* clear the IPI */ 1724 stbcix r0, r5, r6 /* clear the IPI */
1631 stwcix r3, r5, r7 /* EOI it */ 1725 stwcix r3, r5, r7 /* EOI it */
163237: sync 172637: sync
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 286e23e6b92d..d09baf143500 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -762,9 +762,7 @@ program_interrupt:
762 run->exit_reason = KVM_EXIT_MMIO; 762 run->exit_reason = KVM_EXIT_MMIO;
763 r = RESUME_HOST_NV; 763 r = RESUME_HOST_NV;
764 break; 764 break;
765 case EMULATE_DO_PAPR: 765 case EMULATE_EXIT_USER:
766 run->exit_reason = KVM_EXIT_PAPR_HCALL;
767 vcpu->arch.hcall_needed = 1;
768 r = RESUME_HOST_NV; 766 r = RESUME_HOST_NV;
769 break; 767 break;
770 default: 768 default:
@@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1298{ 1296{
1299#ifdef CONFIG_PPC64 1297#ifdef CONFIG_PPC64
1300 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1298 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1299 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
1301#endif 1300#endif
1302 1301
1303 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1302 if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ee02b30878ed..b24309c6c2d5 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
227 return EMULATE_DONE; 227 return EMULATE_DONE;
228} 228}
229 229
230static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
231{
232 long rc = kvmppc_xics_hcall(vcpu, cmd);
233 kvmppc_set_gpr(vcpu, 3, rc);
234 return EMULATE_DONE;
235}
236
230int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) 237int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
231{ 238{
232 switch (cmd) { 239 switch (cmd) {
@@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
246 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 253 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
247 vcpu->stat.halt_wakeup++; 254 vcpu->stat.halt_wakeup++;
248 return EMULATE_DONE; 255 return EMULATE_DONE;
256 case H_XIRR:
257 case H_CPPR:
258 case H_EOI:
259 case H_IPI:
260 if (kvmppc_xics_enabled(vcpu))
261 return kvmppc_h_pr_xics_hcall(vcpu, cmd);
262 break;
263 case H_RTAS:
264 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
265 return RESUME_HOST;
266 if (kvmppc_rtas_hcall(vcpu))
267 break;
268 kvmppc_set_gpr(vcpu, 3, 0);
269 return EMULATE_DONE;
249 } 270 }
250 271
251 return EMULATE_FAIL; 272 return EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 000000000000..3219ba895246
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,274 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kernel.h>
10#include <linux/kvm_host.h>
11#include <linux/kvm.h>
12#include <linux/err.h>
13
14#include <asm/uaccess.h>
15#include <asm/kvm_book3s.h>
16#include <asm/kvm_ppc.h>
17#include <asm/hvcall.h>
18#include <asm/rtas.h>
19
20#ifdef CONFIG_KVM_XICS
21static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
22{
23 u32 irq, server, priority;
24 int rc;
25
26 if (args->nargs != 3 || args->nret != 1) {
27 rc = -3;
28 goto out;
29 }
30
31 irq = args->args[0];
32 server = args->args[1];
33 priority = args->args[2];
34
35 rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
36 if (rc)
37 rc = -3;
38out:
39 args->rets[0] = rc;
40}
41
42static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
43{
44 u32 irq, server, priority;
45 int rc;
46
47 if (args->nargs != 1 || args->nret != 3) {
48 rc = -3;
49 goto out;
50 }
51
52 irq = args->args[0];
53
54 server = priority = 0;
55 rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
56 if (rc) {
57 rc = -3;
58 goto out;
59 }
60
61 args->rets[1] = server;
62 args->rets[2] = priority;
63out:
64 args->rets[0] = rc;
65}
66
67static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
68{
69 u32 irq;
70 int rc;
71
72 if (args->nargs != 1 || args->nret != 1) {
73 rc = -3;
74 goto out;
75 }
76
77 irq = args->args[0];
78
79 rc = kvmppc_xics_int_off(vcpu->kvm, irq);
80 if (rc)
81 rc = -3;
82out:
83 args->rets[0] = rc;
84}
85
86static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
87{
88 u32 irq;
89 int rc;
90
91 if (args->nargs != 1 || args->nret != 1) {
92 rc = -3;
93 goto out;
94 }
95
96 irq = args->args[0];
97
98 rc = kvmppc_xics_int_on(vcpu->kvm, irq);
99 if (rc)
100 rc = -3;
101out:
102 args->rets[0] = rc;
103}
104#endif /* CONFIG_KVM_XICS */
105
106struct rtas_handler {
107 void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
108 char *name;
109};
110
111static struct rtas_handler rtas_handlers[] = {
112#ifdef CONFIG_KVM_XICS
113 { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
114 { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
115 { .name = "ibm,int-off", .handler = kvm_rtas_int_off },
116 { .name = "ibm,int-on", .handler = kvm_rtas_int_on },
117#endif
118};
119
120struct rtas_token_definition {
121 struct list_head list;
122 struct rtas_handler *handler;
123 u64 token;
124};
125
126static int rtas_name_matches(char *s1, char *s2)
127{
128 struct kvm_rtas_token_args args;
129 return !strncmp(s1, s2, sizeof(args.name));
130}
131
132static int rtas_token_undefine(struct kvm *kvm, char *name)
133{
134 struct rtas_token_definition *d, *tmp;
135
136 lockdep_assert_held(&kvm->lock);
137
138 list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
139 if (rtas_name_matches(d->handler->name, name)) {
140 list_del(&d->list);
141 kfree(d);
142 return 0;
143 }
144 }
145
146 /* It's not an error to undefine an undefined token */
147 return 0;
148}
149
150static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
151{
152 struct rtas_token_definition *d;
153 struct rtas_handler *h = NULL;
154 bool found;
155 int i;
156
157 lockdep_assert_held(&kvm->lock);
158
159 list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
160 if (d->token == token)
161 return -EEXIST;
162 }
163
164 found = false;
165 for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
166 h = &rtas_handlers[i];
167 if (rtas_name_matches(h->name, name)) {
168 found = true;
169 break;
170 }
171 }
172
173 if (!found)
174 return -ENOENT;
175
176 d = kzalloc(sizeof(*d), GFP_KERNEL);
177 if (!d)
178 return -ENOMEM;
179
180 d->handler = h;
181 d->token = token;
182
183 list_add_tail(&d->list, &kvm->arch.rtas_tokens);
184
185 return 0;
186}
187
188int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
189{
190 struct kvm_rtas_token_args args;
191 int rc;
192
193 if (copy_from_user(&args, argp, sizeof(args)))
194 return -EFAULT;
195
196 mutex_lock(&kvm->lock);
197
198 if (args.token)
199 rc = rtas_token_define(kvm, args.name, args.token);
200 else
201 rc = rtas_token_undefine(kvm, args.name);
202
203 mutex_unlock(&kvm->lock);
204
205 return rc;
206}
207
208int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
209{
210 struct rtas_token_definition *d;
211 struct rtas_args args;
212 rtas_arg_t *orig_rets;
213 gpa_t args_phys;
214 int rc;
215
216 /* r4 contains the guest physical address of the RTAS args */
217 args_phys = kvmppc_get_gpr(vcpu, 4);
218
219 rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
220 if (rc)
221 goto fail;
222
223 /*
224 * args->rets is a pointer into args->args. Now that we've
225 * copied args we need to fix it up to point into our copy,
226 * not the guest args. We also need to save the original
227 * value so we can restore it on the way out.
228 */
229 orig_rets = args.rets;
230 args.rets = &args.args[args.nargs];
231
232 mutex_lock(&vcpu->kvm->lock);
233
234 rc = -ENOENT;
235 list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
236 if (d->token == args.token) {
237 d->handler->handler(vcpu, &args);
238 rc = 0;
239 break;
240 }
241 }
242
243 mutex_unlock(&vcpu->kvm->lock);
244
245 if (rc == 0) {
246 args.rets = orig_rets;
247 rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
248 if (rc)
249 goto fail;
250 }
251
252 return rc;
253
254fail:
255 /*
256 * We only get here if the guest has called RTAS with a bogus
257 * args pointer. That means we can't get to the args, and so we
258 * can't fail the RTAS call. So fail right out to userspace,
259 * which should kill the guest.
260 */
261 return rc;
262}
263
264void kvmppc_rtas_tokens_free(struct kvm *kvm)
265{
266 struct rtas_token_definition *d, *tmp;
267
268 lockdep_assert_held(&kvm->lock);
269
270 list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
271 list_del(&d->list);
272 kfree(d);
273 }
274}
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 000000000000..ee841ed8a690
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1130 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/kvm_host.h>
12#include <linux/err.h>
13#include <linux/gfp.h>
14
15#include <asm/uaccess.h>
16#include <asm/kvm_book3s.h>
17#include <asm/kvm_ppc.h>
18#include <asm/hvcall.h>
19#include <asm/xics.h>
20#include <asm/debug.h>
21
22#include <linux/debugfs.h>
23#include <linux/seq_file.h>
24
25#include "book3s_xics.h"
26
27#if 1
28#define XICS_DBG(fmt...) do { } while (0)
29#else
30#define XICS_DBG(fmt...) trace_printk(fmt)
31#endif
32
33#define ENABLE_REALMODE true
34#define DEBUG_REALMODE false
35
36/*
37 * LOCKING
38 * =======
39 *
40 * Each ICS has a mutex protecting the information about the IRQ
41 * sources and avoiding simultaneous deliveries if the same interrupt.
42 *
43 * ICP operations are done via a single compare & swap transaction
44 * (most ICP state fits in the union kvmppc_icp_state)
45 */
46
47/*
48 * TODO
49 * ====
50 *
51 * - To speed up resends, keep a bitmap of "resend" set bits in the
52 * ICS
53 *
54 * - Speed up server# -> ICP lookup (array ? hash table ?)
55 *
56 * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
57 * locks array to improve scalability
58 *
59 * - ioctl's to save/restore the entire state for snapshot & migration
60 */
61
62/* -- ICS routines -- */
63
64static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
65 u32 new_irq);
66
67static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
68{
69 struct ics_irq_state *state;
70 struct kvmppc_ics *ics;
71 u16 src;
72
73 XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
74
75 ics = kvmppc_xics_find_ics(xics, irq, &src);
76 if (!ics) {
77 XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
78 return -EINVAL;
79 }
80 state = &ics->irq_state[src];
81 if (!state->exists)
82 return -EINVAL;
83
84 /*
85 * We set state->asserted locklessly. This should be fine as
86 * we are the only setter, thus concurrent access is undefined
87 * to begin with.
88 */
89 if (level == KVM_INTERRUPT_SET_LEVEL)
90 state->asserted = 1;
91 else if (level == KVM_INTERRUPT_UNSET) {
92 state->asserted = 0;
93 return 0;
94 }
95
96 /* Attempt delivery */
97 icp_deliver_irq(xics, NULL, irq);
98
99 return 0;
100}
101
102static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
103 struct kvmppc_icp *icp)
104{
105 int i;
106
107 mutex_lock(&ics->lock);
108
109 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
110 struct ics_irq_state *state = &ics->irq_state[i];
111
112 if (!state->resend)
113 continue;
114
115 XICS_DBG("resend %#x prio %#x\n", state->number,
116 state->priority);
117
118 mutex_unlock(&ics->lock);
119 icp_deliver_irq(xics, icp, state->number);
120 mutex_lock(&ics->lock);
121 }
122
123 mutex_unlock(&ics->lock);
124}
125
126static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
127 struct ics_irq_state *state,
128 u32 server, u32 priority, u32 saved_priority)
129{
130 bool deliver;
131
132 mutex_lock(&ics->lock);
133
134 state->server = server;
135 state->priority = priority;
136 state->saved_priority = saved_priority;
137 deliver = false;
138 if ((state->masked_pending || state->resend) && priority != MASKED) {
139 state->masked_pending = 0;
140 deliver = true;
141 }
142
143 mutex_unlock(&ics->lock);
144
145 return deliver;
146}
147
148int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
149{
150 struct kvmppc_xics *xics = kvm->arch.xics;
151 struct kvmppc_icp *icp;
152 struct kvmppc_ics *ics;
153 struct ics_irq_state *state;
154 u16 src;
155
156 if (!xics)
157 return -ENODEV;
158
159 ics = kvmppc_xics_find_ics(xics, irq, &src);
160 if (!ics)
161 return -EINVAL;
162 state = &ics->irq_state[src];
163
164 icp = kvmppc_xics_find_server(kvm, server);
165 if (!icp)
166 return -EINVAL;
167
168 XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
169 irq, server, priority,
170 state->masked_pending, state->resend);
171
172 if (write_xive(xics, ics, state, server, priority, priority))
173 icp_deliver_irq(xics, icp, irq);
174
175 return 0;
176}
177
178int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
179{
180 struct kvmppc_xics *xics = kvm->arch.xics;
181 struct kvmppc_ics *ics;
182 struct ics_irq_state *state;
183 u16 src;
184
185 if (!xics)
186 return -ENODEV;
187
188 ics = kvmppc_xics_find_ics(xics, irq, &src);
189 if (!ics)
190 return -EINVAL;
191 state = &ics->irq_state[src];
192
193 mutex_lock(&ics->lock);
194 *server = state->server;
195 *priority = state->priority;
196 mutex_unlock(&ics->lock);
197
198 return 0;
199}
200
201int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
202{
203 struct kvmppc_xics *xics = kvm->arch.xics;
204 struct kvmppc_icp *icp;
205 struct kvmppc_ics *ics;
206 struct ics_irq_state *state;
207 u16 src;
208
209 if (!xics)
210 return -ENODEV;
211
212 ics = kvmppc_xics_find_ics(xics, irq, &src);
213 if (!ics)
214 return -EINVAL;
215 state = &ics->irq_state[src];
216
217 icp = kvmppc_xics_find_server(kvm, state->server);
218 if (!icp)
219 return -EINVAL;
220
221 if (write_xive(xics, ics, state, state->server, state->saved_priority,
222 state->saved_priority))
223 icp_deliver_irq(xics, icp, irq);
224
225 return 0;
226}
227
228int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
229{
230 struct kvmppc_xics *xics = kvm->arch.xics;
231 struct kvmppc_ics *ics;
232 struct ics_irq_state *state;
233 u16 src;
234
235 if (!xics)
236 return -ENODEV;
237
238 ics = kvmppc_xics_find_ics(xics, irq, &src);
239 if (!ics)
240 return -EINVAL;
241 state = &ics->irq_state[src];
242
243 write_xive(xics, ics, state, state->server, MASKED, state->priority);
244
245 return 0;
246}
247
248/* -- ICP routines, including hcalls -- */
249
250static inline bool icp_try_update(struct kvmppc_icp *icp,
251 union kvmppc_icp_state old,
252 union kvmppc_icp_state new,
253 bool change_self)
254{
255 bool success;
256
257 /* Calculate new output value */
258 new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
259
260 /* Attempt atomic update */
261 success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
262 if (!success)
263 goto bail;
264
265 XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
266 icp->server_num,
267 old.cppr, old.mfrr, old.pending_pri, old.xisr,
268 old.need_resend, old.out_ee);
269 XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
270 new.cppr, new.mfrr, new.pending_pri, new.xisr,
271 new.need_resend, new.out_ee);
272 /*
273 * Check for output state update
274 *
275 * Note that this is racy since another processor could be updating
276 * the state already. This is why we never clear the interrupt output
277 * here, we only ever set it. The clear only happens prior to doing
278 * an update and only by the processor itself. Currently we do it
279 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
280 *
281 * We also do not try to figure out whether the EE state has changed,
282 * we unconditionally set it if the new state calls for it. The reason
283 * for that is that we opportunistically remove the pending interrupt
284 * flag when raising CPPR, so we need to set it back here if an
285 * interrupt is still pending.
286 */
287 if (new.out_ee) {
288 kvmppc_book3s_queue_irqprio(icp->vcpu,
289 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
290 if (!change_self)
291 kvmppc_fast_vcpu_kick(icp->vcpu);
292 }
293 bail:
294 return success;
295}
296
297static void icp_check_resend(struct kvmppc_xics *xics,
298 struct kvmppc_icp *icp)
299{
300 u32 icsid;
301
302 /* Order this load with the test for need_resend in the caller */
303 smp_rmb();
304 for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
305 struct kvmppc_ics *ics = xics->ics[icsid];
306
307 if (!test_and_clear_bit(icsid, icp->resend_map))
308 continue;
309 if (!ics)
310 continue;
311 ics_check_resend(xics, ics, icp);
312 }
313}
314
315static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
316 u32 *reject)
317{
318 union kvmppc_icp_state old_state, new_state;
319 bool success;
320
321 XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
322 icp->server_num);
323
324 do {
325 old_state = new_state = ACCESS_ONCE(icp->state);
326
327 *reject = 0;
328
329 /* See if we can deliver */
330 success = new_state.cppr > priority &&
331 new_state.mfrr > priority &&
332 new_state.pending_pri > priority;
333
334 /*
335 * If we can, check for a rejection and perform the
336 * delivery
337 */
338 if (success) {
339 *reject = new_state.xisr;
340 new_state.xisr = irq;
341 new_state.pending_pri = priority;
342 } else {
343 /*
344 * If we failed to deliver we set need_resend
345 * so a subsequent CPPR state change causes us
346 * to try a new delivery.
347 */
348 new_state.need_resend = true;
349 }
350
351 } while (!icp_try_update(icp, old_state, new_state, false));
352
353 return success;
354}
355
356static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
357 u32 new_irq)
358{
359 struct ics_irq_state *state;
360 struct kvmppc_ics *ics;
361 u32 reject;
362 u16 src;
363
364 /*
365 * This is used both for initial delivery of an interrupt and
366 * for subsequent rejection.
367 *
368 * Rejection can be racy vs. resends. We have evaluated the
369 * rejection in an atomic ICP transaction which is now complete,
370 * so potentially the ICP can already accept the interrupt again.
371 *
372 * So we need to retry the delivery. Essentially the reject path
373 * boils down to a failed delivery. Always.
374 *
375 * Now the interrupt could also have moved to a different target,
376 * thus we may need to re-do the ICP lookup as well
377 */
378
379 again:
380 /* Get the ICS state and lock it */
381 ics = kvmppc_xics_find_ics(xics, new_irq, &src);
382 if (!ics) {
383 XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
384 return;
385 }
386 state = &ics->irq_state[src];
387
388 /* Get a lock on the ICS */
389 mutex_lock(&ics->lock);
390
391 /* Get our server */
392 if (!icp || state->server != icp->server_num) {
393 icp = kvmppc_xics_find_server(xics->kvm, state->server);
394 if (!icp) {
395 pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
396 new_irq, state->server);
397 goto out;
398 }
399 }
400
401 /* Clear the resend bit of that interrupt */
402 state->resend = 0;
403
404 /*
405 * If masked, bail out
406 *
407 * Note: PAPR doesn't mention anything about masked pending
408 * when doing a resend, only when doing a delivery.
409 *
410 * However that would have the effect of losing a masked
411 * interrupt that was rejected and isn't consistent with
412 * the whole masked_pending business which is about not
413 * losing interrupts that occur while masked.
414 *
415 * I don't differenciate normal deliveries and resends, this
416 * implementation will differ from PAPR and not lose such
417 * interrupts.
418 */
419 if (state->priority == MASKED) {
420 XICS_DBG("irq %#x masked pending\n", new_irq);
421 state->masked_pending = 1;
422 goto out;
423 }
424
425 /*
426 * Try the delivery, this will set the need_resend flag
427 * in the ICP as part of the atomic transaction if the
428 * delivery is not possible.
429 *
430 * Note that if successful, the new delivery might have itself
431 * rejected an interrupt that was "delivered" before we took the
432 * icp mutex.
433 *
434 * In this case we do the whole sequence all over again for the
435 * new guy. We cannot assume that the rejected interrupt is less
436 * favored than the new one, and thus doesn't need to be delivered,
437 * because by the time we exit icp_try_to_deliver() the target
438 * processor may well have alrady consumed & completed it, and thus
439 * the rejected interrupt might actually be already acceptable.
440 */
441 if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
442 /*
443 * Delivery was successful, did we reject somebody else ?
444 */
445 if (reject && reject != XICS_IPI) {
446 mutex_unlock(&ics->lock);
447 new_irq = reject;
448 goto again;
449 }
450 } else {
451 /*
452 * We failed to deliver the interrupt we need to set the
453 * resend map bit and mark the ICS state as needing a resend
454 */
455 set_bit(ics->icsid, icp->resend_map);
456 state->resend = 1;
457
458 /*
459 * If the need_resend flag got cleared in the ICP some time
460 * between icp_try_to_deliver() atomic update and now, then
461 * we know it might have missed the resend_map bit. So we
462 * retry
463 */
464 smp_mb();
465 if (!icp->state.need_resend) {
466 mutex_unlock(&ics->lock);
467 goto again;
468 }
469 }
470 out:
471 mutex_unlock(&ics->lock);
472}
473
474static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
475 u8 new_cppr)
476{
477 union kvmppc_icp_state old_state, new_state;
478 bool resend;
479
480 /*
481 * This handles several related states in one operation:
482 *
483 * ICP State: Down_CPPR
484 *
485 * Load CPPR with new value and if the XISR is 0
486 * then check for resends:
487 *
488 * ICP State: Resend
489 *
490 * If MFRR is more favored than CPPR, check for IPIs
491 * and notify ICS of a potential resend. This is done
492 * asynchronously (when used in real mode, we will have
493 * to exit here).
494 *
495 * We do not handle the complete Check_IPI as documented
496 * here. In the PAPR, this state will be used for both
497 * Set_MFRR and Down_CPPR. However, we know that we aren't
498 * changing the MFRR state here so we don't need to handle
499 * the case of an MFRR causing a reject of a pending irq,
500 * this will have been handled when the MFRR was set in the
501 * first place.
502 *
503 * Thus we don't have to handle rejects, only resends.
504 *
505 * When implementing real mode for HV KVM, resend will lead to
506 * a H_TOO_HARD return and the whole transaction will be handled
507 * in virtual mode.
508 */
509 do {
510 old_state = new_state = ACCESS_ONCE(icp->state);
511
512 /* Down_CPPR */
513 new_state.cppr = new_cppr;
514
515 /*
516 * Cut down Resend / Check_IPI / IPI
517 *
518 * The logic is that we cannot have a pending interrupt
519 * trumped by an IPI at this point (see above), so we
520 * know that either the pending interrupt is already an
521 * IPI (in which case we don't care to override it) or
522 * it's either more favored than us or non existent
523 */
524 if (new_state.mfrr < new_cppr &&
525 new_state.mfrr <= new_state.pending_pri) {
526 WARN_ON(new_state.xisr != XICS_IPI &&
527 new_state.xisr != 0);
528 new_state.pending_pri = new_state.mfrr;
529 new_state.xisr = XICS_IPI;
530 }
531
532 /* Latch/clear resend bit */
533 resend = new_state.need_resend;
534 new_state.need_resend = 0;
535
536 } while (!icp_try_update(icp, old_state, new_state, true));
537
538 /*
539 * Now handle resend checks. Those are asynchronous to the ICP
540 * state update in HW (ie bus transactions) so we can handle them
541 * separately here too
542 */
543 if (resend)
544 icp_check_resend(xics, icp);
545}
546
547static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
548{
549 union kvmppc_icp_state old_state, new_state;
550 struct kvmppc_icp *icp = vcpu->arch.icp;
551 u32 xirr;
552
553 /* First, remove EE from the processor */
554 kvmppc_book3s_dequeue_irqprio(icp->vcpu,
555 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
556
557 /*
558 * ICP State: Accept_Interrupt
559 *
560 * Return the pending interrupt (if any) along with the
561 * current CPPR, then clear the XISR & set CPPR to the
562 * pending priority
563 */
564 do {
565 old_state = new_state = ACCESS_ONCE(icp->state);
566
567 xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
568 if (!old_state.xisr)
569 break;
570 new_state.cppr = new_state.pending_pri;
571 new_state.pending_pri = 0xff;
572 new_state.xisr = 0;
573
574 } while (!icp_try_update(icp, old_state, new_state, true));
575
576 XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
577
578 return xirr;
579}
580
581static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
582 unsigned long mfrr)
583{
584 union kvmppc_icp_state old_state, new_state;
585 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
586 struct kvmppc_icp *icp;
587 u32 reject;
588 bool resend;
589 bool local;
590
591 XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
592 vcpu->vcpu_id, server, mfrr);
593
594 icp = vcpu->arch.icp;
595 local = icp->server_num == server;
596 if (!local) {
597 icp = kvmppc_xics_find_server(vcpu->kvm, server);
598 if (!icp)
599 return H_PARAMETER;
600 }
601
602 /*
603 * ICP state: Set_MFRR
604 *
605 * If the CPPR is more favored than the new MFRR, then
606 * nothing needs to be rejected as there can be no XISR to
607 * reject. If the MFRR is being made less favored then
608 * there might be a previously-rejected interrupt needing
609 * to be resent.
610 *
611 * If the CPPR is less favored, then we might be replacing
612 * an interrupt, and thus need to possibly reject it as in
613 *
614 * ICP state: Check_IPI
615 */
616 do {
617 old_state = new_state = ACCESS_ONCE(icp->state);
618
619 /* Set_MFRR */
620 new_state.mfrr = mfrr;
621
622 /* Check_IPI */
623 reject = 0;
624 resend = false;
625 if (mfrr < new_state.cppr) {
626 /* Reject a pending interrupt if not an IPI */
627 if (mfrr <= new_state.pending_pri)
628 reject = new_state.xisr;
629 new_state.pending_pri = mfrr;
630 new_state.xisr = XICS_IPI;
631 }
632
633 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
634 resend = new_state.need_resend;
635 new_state.need_resend = 0;
636 }
637 } while (!icp_try_update(icp, old_state, new_state, local));
638
639 /* Handle reject */
640 if (reject && reject != XICS_IPI)
641 icp_deliver_irq(xics, icp, reject);
642
643 /* Handle resend */
644 if (resend)
645 icp_check_resend(xics, icp);
646
647 return H_SUCCESS;
648}
649
650static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
651{
652 union kvmppc_icp_state old_state, new_state;
653 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
654 struct kvmppc_icp *icp = vcpu->arch.icp;
655 u32 reject;
656
657 XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
658
659 /*
660 * ICP State: Set_CPPR
661 *
662 * We can safely compare the new value with the current
663 * value outside of the transaction as the CPPR is only
664 * ever changed by the processor on itself
665 */
666 if (cppr > icp->state.cppr)
667 icp_down_cppr(xics, icp, cppr);
668 else if (cppr == icp->state.cppr)
669 return;
670
671 /*
672 * ICP State: Up_CPPR
673 *
674 * The processor is raising its priority, this can result
675 * in a rejection of a pending interrupt:
676 *
677 * ICP State: Reject_Current
678 *
679 * We can remove EE from the current processor, the update
680 * transaction will set it again if needed
681 */
682 kvmppc_book3s_dequeue_irqprio(icp->vcpu,
683 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
684
685 do {
686 old_state = new_state = ACCESS_ONCE(icp->state);
687
688 reject = 0;
689 new_state.cppr = cppr;
690
691 if (cppr <= new_state.pending_pri) {
692 reject = new_state.xisr;
693 new_state.xisr = 0;
694 new_state.pending_pri = 0xff;
695 }
696
697 } while (!icp_try_update(icp, old_state, new_state, true));
698
699 /*
700 * Check for rejects. They are handled by doing a new delivery
701 * attempt (see comments in icp_deliver_irq).
702 */
703 if (reject && reject != XICS_IPI)
704 icp_deliver_irq(xics, icp, reject);
705}
706
707static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
708{
709 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
710 struct kvmppc_icp *icp = vcpu->arch.icp;
711 struct kvmppc_ics *ics;
712 struct ics_irq_state *state;
713 u32 irq = xirr & 0x00ffffff;
714 u16 src;
715
716 XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
717
718 /*
719 * ICP State: EOI
720 *
721 * Note: If EOI is incorrectly used by SW to lower the CPPR
722 * value (ie more favored), we do not check for rejection of
723 * a pending interrupt, this is a SW error and PAPR sepcifies
724 * that we don't have to deal with it.
725 *
726 * The sending of an EOI to the ICS is handled after the
727 * CPPR update
728 *
729 * ICP State: Down_CPPR which we handle
730 * in a separate function as it's shared with H_CPPR.
731 */
732 icp_down_cppr(xics, icp, xirr >> 24);
733
734 /* IPIs have no EOI */
735 if (irq == XICS_IPI)
736 return H_SUCCESS;
737 /*
738 * EOI handling: If the interrupt is still asserted, we need to
739 * resend it. We can take a lockless "peek" at the ICS state here.
740 *
741 * "Message" interrupts will never have "asserted" set
742 */
743 ics = kvmppc_xics_find_ics(xics, irq, &src);
744 if (!ics) {
745 XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
746 return H_PARAMETER;
747 }
748 state = &ics->irq_state[src];
749
750 /* Still asserted, resend it */
751 if (state->asserted)
752 icp_deliver_irq(xics, icp, irq);
753
754 return H_SUCCESS;
755}
756
757static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
758{
759 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
760 struct kvmppc_icp *icp = vcpu->arch.icp;
761
762 XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
763 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
764
765 if (icp->rm_action & XICS_RM_KICK_VCPU)
766 kvmppc_fast_vcpu_kick(icp->rm_kick_target);
767 if (icp->rm_action & XICS_RM_CHECK_RESEND)
768 icp_check_resend(xics, icp);
769 if (icp->rm_action & XICS_RM_REJECT)
770 icp_deliver_irq(xics, icp, icp->rm_reject);
771
772 icp->rm_action = 0;
773
774 return H_SUCCESS;
775}
776
777int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
778{
779 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
780 unsigned long res;
781 int rc = H_SUCCESS;
782
783 /* Check if we have an ICP */
784 if (!xics || !vcpu->arch.icp)
785 return H_HARDWARE;
786
787 /* Check for real mode returning too hard */
788 if (xics->real_mode)
789 return kvmppc_xics_rm_complete(vcpu, req);
790
791 switch (req) {
792 case H_XIRR:
793 res = kvmppc_h_xirr(vcpu);
794 kvmppc_set_gpr(vcpu, 4, res);
795 break;
796 case H_CPPR:
797 kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
798 break;
799 case H_EOI:
800 rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
801 break;
802 case H_IPI:
803 rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
804 kvmppc_get_gpr(vcpu, 5));
805 break;
806 }
807
808 return rc;
809}
810
811
812/* -- Initialisation code etc. -- */
813
814static int xics_debug_show(struct seq_file *m, void *private)
815{
816 struct kvmppc_xics *xics = m->private;
817 struct kvm *kvm = xics->kvm;
818 struct kvm_vcpu *vcpu;
819 int icsid, i;
820
821 if (!kvm)
822 return 0;
823
824 seq_printf(m, "=========\nICP state\n=========\n");
825
826 kvm_for_each_vcpu(i, vcpu, kvm) {
827 struct kvmppc_icp *icp = vcpu->arch.icp;
828 union kvmppc_icp_state state;
829
830 if (!icp)
831 continue;
832
833 state.raw = ACCESS_ONCE(icp->state.raw);
834 seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
835 icp->server_num, state.xisr,
836 state.pending_pri, state.cppr, state.mfrr,
837 state.out_ee, state.need_resend);
838 }
839
840 for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
841 struct kvmppc_ics *ics = xics->ics[icsid];
842
843 if (!ics)
844 continue;
845
846 seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
847 icsid);
848
849 mutex_lock(&ics->lock);
850
851 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
852 struct ics_irq_state *irq = &ics->irq_state[i];
853
854 seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
855 irq->number, irq->server, irq->priority,
856 irq->saved_priority, irq->asserted,
857 irq->resend, irq->masked_pending);
858
859 }
860 mutex_unlock(&ics->lock);
861 }
862 return 0;
863}
864
865static int xics_debug_open(struct inode *inode, struct file *file)
866{
867 return single_open(file, xics_debug_show, inode->i_private);
868}
869
870static const struct file_operations xics_debug_fops = {
871 .open = xics_debug_open,
872 .read = seq_read,
873 .llseek = seq_lseek,
874 .release = single_release,
875};
876
877static void xics_debugfs_init(struct kvmppc_xics *xics)
878{
879 char *name;
880
881 name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
882 if (!name) {
883 pr_err("%s: no memory for name\n", __func__);
884 return;
885 }
886
887 xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
888 xics, &xics_debug_fops);
889
890 pr_debug("%s: created %s\n", __func__, name);
891 kfree(name);
892}
893
894struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
895 struct kvmppc_xics *xics, int irq)
896{
897 struct kvmppc_ics *ics;
898 int i, icsid;
899
900 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
901
902 mutex_lock(&kvm->lock);
903
904 /* ICS already exists - somebody else got here first */
905 if (xics->ics[icsid])
906 goto out;
907
908 /* Create the ICS */
909 ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
910 if (!ics)
911 goto out;
912
913 mutex_init(&ics->lock);
914 ics->icsid = icsid;
915
916 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
917 ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
918 ics->irq_state[i].priority = MASKED;
919 ics->irq_state[i].saved_priority = MASKED;
920 }
921 smp_wmb();
922 xics->ics[icsid] = ics;
923
924 if (icsid > xics->max_icsid)
925 xics->max_icsid = icsid;
926
927 out:
928 mutex_unlock(&kvm->lock);
929 return xics->ics[icsid];
930}
931
932int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
933{
934 struct kvmppc_icp *icp;
935
936 if (!vcpu->kvm->arch.xics)
937 return -ENODEV;
938
939 if (kvmppc_xics_find_server(vcpu->kvm, server_num))
940 return -EEXIST;
941
942 icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
943 if (!icp)
944 return -ENOMEM;
945
946 icp->vcpu = vcpu;
947 icp->server_num = server_num;
948 icp->state.mfrr = MASKED;
949 icp->state.pending_pri = MASKED;
950 vcpu->arch.icp = icp;
951
952 XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
953
954 return 0;
955}
956
957u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
958{
959 struct kvmppc_icp *icp = vcpu->arch.icp;
960 union kvmppc_icp_state state;
961
962 if (!icp)
963 return 0;
964 state = icp->state;
965 return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
966 ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
967 ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
968 ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
969}
970
971int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
972{
973 struct kvmppc_icp *icp = vcpu->arch.icp;
974 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
975 union kvmppc_icp_state old_state, new_state;
976 struct kvmppc_ics *ics;
977 u8 cppr, mfrr, pending_pri;
978 u32 xisr;
979 u16 src;
980 bool resend;
981
982 if (!icp || !xics)
983 return -ENOENT;
984
985 cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
986 xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
987 KVM_REG_PPC_ICP_XISR_MASK;
988 mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
989 pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
990
991 /* Require the new state to be internally consistent */
992 if (xisr == 0) {
993 if (pending_pri != 0xff)
994 return -EINVAL;
995 } else if (xisr == XICS_IPI) {
996 if (pending_pri != mfrr || pending_pri >= cppr)
997 return -EINVAL;
998 } else {
999 if (pending_pri >= mfrr || pending_pri >= cppr)
1000 return -EINVAL;
1001 ics = kvmppc_xics_find_ics(xics, xisr, &src);
1002 if (!ics)
1003 return -EINVAL;
1004 }
1005
1006 new_state.raw = 0;
1007 new_state.cppr = cppr;
1008 new_state.xisr = xisr;
1009 new_state.mfrr = mfrr;
1010 new_state.pending_pri = pending_pri;
1011
1012 /*
1013 * Deassert the CPU interrupt request.
1014 * icp_try_update will reassert it if necessary.
1015 */
1016 kvmppc_book3s_dequeue_irqprio(icp->vcpu,
1017 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
1018
1019 /*
1020 * Note that if we displace an interrupt from old_state.xisr,
1021 * we don't mark it as rejected. We expect userspace to set
1022 * the state of the interrupt sources to be consistent with
1023 * the ICP states (either before or afterwards, which doesn't
1024 * matter). We do handle resends due to CPPR becoming less
1025 * favoured because that is necessary to end up with a
1026 * consistent state in the situation where userspace restores
1027 * the ICS states before the ICP states.
1028 */
1029 do {
1030 old_state = ACCESS_ONCE(icp->state);
1031
1032 if (new_state.mfrr <= old_state.mfrr) {
1033 resend = false;
1034 new_state.need_resend = old_state.need_resend;
1035 } else {
1036 resend = old_state.need_resend;
1037 new_state.need_resend = 0;
1038 }
1039 } while (!icp_try_update(icp, old_state, new_state, false));
1040
1041 if (resend)
1042 icp_check_resend(xics, icp);
1043
1044 return 0;
1045}
1046
1047/* -- ioctls -- */
1048
1049int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
1050{
1051 struct kvmppc_xics *xics;
1052 int r;
1053
1054 /* locking against multiple callers? */
1055
1056 xics = kvm->arch.xics;
1057 if (!xics)
1058 return -ENODEV;
1059
1060 switch (args->level) {
1061 case KVM_INTERRUPT_SET:
1062 case KVM_INTERRUPT_SET_LEVEL:
1063 case KVM_INTERRUPT_UNSET:
1064 r = ics_deliver_irq(xics, args->irq, args->level);
1065 break;
1066 default:
1067 r = -EINVAL;
1068 }
1069
1070 return r;
1071}
1072
1073void kvmppc_xics_free(struct kvmppc_xics *xics)
1074{
1075 int i;
1076 struct kvm *kvm = xics->kvm;
1077
1078 debugfs_remove(xics->dentry);
1079
1080 if (kvm)
1081 kvm->arch.xics = NULL;
1082
1083 for (i = 0; i <= xics->max_icsid; i++)
1084 kfree(xics->ics[i]);
1085 kfree(xics);
1086}
1087
1088int kvm_xics_create(struct kvm *kvm, u32 type)
1089{
1090 struct kvmppc_xics *xics;
1091 int ret = 0;
1092
1093 xics = kzalloc(sizeof(*xics), GFP_KERNEL);
1094 if (!xics)
1095 return -ENOMEM;
1096
1097 xics->kvm = kvm;
1098
1099 /* Already there ? */
1100 mutex_lock(&kvm->lock);
1101 if (kvm->arch.xics)
1102 ret = -EEXIST;
1103 else
1104 kvm->arch.xics = xics;
1105 mutex_unlock(&kvm->lock);
1106
1107 if (ret)
1108 return ret;
1109
1110 xics_debugfs_init(xics);
1111
1112#ifdef CONFIG_KVM_BOOK3S_64_HV
1113 if (cpu_has_feature(CPU_FTR_ARCH_206)) {
1114 /* Enable real mode support */
1115 xics->real_mode = ENABLE_REALMODE;
1116 xics->real_mode_dbg = DEBUG_REALMODE;
1117 }
1118#endif /* CONFIG_KVM_BOOK3S_64_HV */
1119
1120 return 0;
1121}
1122
1123void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
1124{
1125 if (!vcpu->arch.icp)
1126 return;
1127 kfree(vcpu->arch.icp);
1128 vcpu->arch.icp = NULL;
1129 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
1130}
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 000000000000..e4fdec3dde77
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,129 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef _KVM_PPC_BOOK3S_XICS_H
11#define _KVM_PPC_BOOK3S_XICS_H
12
13/*
14 * We use a two-level tree to store interrupt source information.
15 * There are up to 1024 ICS nodes, each of which can represent
16 * 1024 sources.
17 */
18#define KVMPPC_XICS_MAX_ICS_ID 1023
19#define KVMPPC_XICS_ICS_SHIFT 10
20#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT)
21#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1)
22
23/*
24 * Interrupt source numbers below this are reserved, for example
25 * 0 is "no interrupt", and 2 is used for IPIs.
26 */
27#define KVMPPC_XICS_FIRST_IRQ 16
28#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \
29 KVMPPC_XICS_IRQ_PER_ICS)
30
31/* Priority value to use for disabling an interrupt */
32#define MASKED 0xff
33
34/* State for one irq source */
35struct ics_irq_state {
36 u32 number;
37 u32 server;
38 u8 priority;
39 u8 saved_priority;
40 u8 resend;
41 u8 masked_pending;
42 u8 asserted; /* Only for LSI */
43 u8 exists;
44};
45
46/* Atomic ICP state, updated with a single compare & swap */
47union kvmppc_icp_state {
48 unsigned long raw;
49 struct {
50 u8 out_ee:1;
51 u8 need_resend:1;
52 u8 cppr;
53 u8 mfrr;
54 u8 pending_pri;
55 u32 xisr;
56 };
57};
58
59/* One bit per ICS */
60#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
61
62struct kvmppc_icp {
63 struct kvm_vcpu *vcpu;
64 unsigned long server_num;
65 union kvmppc_icp_state state;
66 unsigned long resend_map[ICP_RESEND_MAP_SIZE];
67
68 /* Real mode might find something too hard, here's the action
69 * it might request from virtual mode
70 */
71#define XICS_RM_KICK_VCPU 0x1
72#define XICS_RM_CHECK_RESEND 0x2
73#define XICS_RM_REJECT 0x4
74 u32 rm_action;
75 struct kvm_vcpu *rm_kick_target;
76 u32 rm_reject;
77
78 /* Debug stuff for real mode */
79 union kvmppc_icp_state rm_dbgstate;
80 struct kvm_vcpu *rm_dbgtgt;
81};
82
83struct kvmppc_ics {
84 struct mutex lock;
85 u16 icsid;
86 struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
87};
88
89struct kvmppc_xics {
90 struct kvm *kvm;
91 struct dentry *dentry;
92 u32 max_icsid;
93 bool real_mode;
94 bool real_mode_dbg;
95 struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
96};
97
98static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
99 u32 nr)
100{
101 struct kvm_vcpu *vcpu = NULL;
102 int i;
103
104 kvm_for_each_vcpu(i, vcpu, kvm) {
105 if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
106 return vcpu->arch.icp;
107 }
108 return NULL;
109}
110
111static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
112 u32 irq, u16 *source)
113{
114 u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
115 u16 src = irq & KVMPPC_XICS_SRC_MASK;
116 struct kvmppc_ics *ics;
117
118 if (source)
119 *source = src;
120 if (icsid > KVMPPC_XICS_MAX_ICS_ID)
121 return NULL;
122 ics = xics->ics[icsid];
123 if (!ics)
124 return NULL;
125 return ics;
126}
127
128
129#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a49a68a25c39..1020119226db 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -346,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
346 keep_irq = true; 346 keep_irq = true;
347 } 347 }
348 348
349 if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) 349 if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
350 update_epr = true; 350 update_epr = true;
351 351
352 switch (priority) { 352 switch (priority) {
@@ -427,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
427 set_guest_esr(vcpu, vcpu->arch.queued_esr); 427 set_guest_esr(vcpu, vcpu->arch.queued_esr);
428 if (update_dear == true) 428 if (update_dear == true)
429 set_guest_dear(vcpu, vcpu->arch.queued_dear); 429 set_guest_dear(vcpu, vcpu->arch.queued_dear);
430 if (update_epr == true) 430 if (update_epr == true) {
431 kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); 431 if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
432 kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
433 else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
434 BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
435 kvmppc_mpic_set_epr(vcpu);
436 }
437 }
432 438
433 new_msr &= msr_mask; 439 new_msr &= msr_mask;
434#if defined(CONFIG_64BIT) 440#if defined(CONFIG_64BIT)
@@ -745,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
745 kvmppc_core_queue_program(vcpu, ESR_PIL); 751 kvmppc_core_queue_program(vcpu, ESR_PIL);
746 return RESUME_HOST; 752 return RESUME_HOST;
747 753
754 case EMULATE_EXIT_USER:
755 return RESUME_HOST;
756
748 default: 757 default:
749 BUG(); 758 BUG();
750 } 759 }
@@ -1412,120 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1412 1421
1413int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1422int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1414{ 1423{
1415 int r = -EINVAL; 1424 int r = 0;
1425 union kvmppc_one_reg val;
1426 int size;
1427 long int i;
1428
1429 size = one_reg_size(reg->id);
1430 if (size > sizeof(val))
1431 return -EINVAL;
1416 1432
1417 switch (reg->id) { 1433 switch (reg->id) {
1418 case KVM_REG_PPC_IAC1: 1434 case KVM_REG_PPC_IAC1:
1419 case KVM_REG_PPC_IAC2: 1435 case KVM_REG_PPC_IAC2:
1420 case KVM_REG_PPC_IAC3: 1436 case KVM_REG_PPC_IAC3:
1421 case KVM_REG_PPC_IAC4: { 1437 case KVM_REG_PPC_IAC4:
1422 int iac = reg->id - KVM_REG_PPC_IAC1; 1438 i = reg->id - KVM_REG_PPC_IAC1;
1423 r = copy_to_user((u64 __user *)(long)reg->addr, 1439 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
1424 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
1425 break; 1440 break;
1426 }
1427 case KVM_REG_PPC_DAC1: 1441 case KVM_REG_PPC_DAC1:
1428 case KVM_REG_PPC_DAC2: { 1442 case KVM_REG_PPC_DAC2:
1429 int dac = reg->id - KVM_REG_PPC_DAC1; 1443 i = reg->id - KVM_REG_PPC_DAC1;
1430 r = copy_to_user((u64 __user *)(long)reg->addr, 1444 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
1431 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
1432 break; 1445 break;
1433 }
1434 case KVM_REG_PPC_EPR: { 1446 case KVM_REG_PPC_EPR: {
1435 u32 epr = get_guest_epr(vcpu); 1447 u32 epr = get_guest_epr(vcpu);
1436 r = put_user(epr, (u32 __user *)(long)reg->addr); 1448 val = get_reg_val(reg->id, epr);
1437 break; 1449 break;
1438 } 1450 }
1439#if defined(CONFIG_64BIT) 1451#if defined(CONFIG_64BIT)
1440 case KVM_REG_PPC_EPCR: 1452 case KVM_REG_PPC_EPCR:
1441 r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); 1453 val = get_reg_val(reg->id, vcpu->arch.epcr);
1442 break; 1454 break;
1443#endif 1455#endif
1444 case KVM_REG_PPC_TCR: 1456 case KVM_REG_PPC_TCR:
1445 r = put_user(vcpu->arch.tcr, (u32 __user *)(long)reg->addr); 1457 val = get_reg_val(reg->id, vcpu->arch.tcr);
1446 break; 1458 break;
1447 case KVM_REG_PPC_TSR: 1459 case KVM_REG_PPC_TSR:
1448 r = put_user(vcpu->arch.tsr, (u32 __user *)(long)reg->addr); 1460 val = get_reg_val(reg->id, vcpu->arch.tsr);
1449 break; 1461 break;
1450 case KVM_REG_PPC_DEBUG_INST: { 1462 case KVM_REG_PPC_DEBUG_INST:
1451 u32 opcode = KVMPPC_INST_EHPRIV; 1463 val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
1452 r = copy_to_user((u32 __user *)(long)reg->addr,
1453 &opcode, sizeof(u32));
1454 break; 1464 break;
1455 }
1456 default: 1465 default:
1466 r = kvmppc_get_one_reg(vcpu, reg->id, &val);
1457 break; 1467 break;
1458 } 1468 }
1469
1470 if (r)
1471 return r;
1472
1473 if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
1474 r = -EFAULT;
1475
1459 return r; 1476 return r;
1460} 1477}
1461 1478
1462int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1479int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1463{ 1480{
1464 int r = -EINVAL; 1481 int r = 0;
1482 union kvmppc_one_reg val;
1483 int size;
1484 long int i;
1485
1486 size = one_reg_size(reg->id);
1487 if (size > sizeof(val))
1488 return -EINVAL;
1489
1490 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
1491 return -EFAULT;
1465 1492
1466 switch (reg->id) { 1493 switch (reg->id) {
1467 case KVM_REG_PPC_IAC1: 1494 case KVM_REG_PPC_IAC1:
1468 case KVM_REG_PPC_IAC2: 1495 case KVM_REG_PPC_IAC2:
1469 case KVM_REG_PPC_IAC3: 1496 case KVM_REG_PPC_IAC3:
1470 case KVM_REG_PPC_IAC4: { 1497 case KVM_REG_PPC_IAC4:
1471 int iac = reg->id - KVM_REG_PPC_IAC1; 1498 i = reg->id - KVM_REG_PPC_IAC1;
1472 r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], 1499 vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
1473 (u64 __user *)(long)reg->addr, sizeof(u64));
1474 break; 1500 break;
1475 }
1476 case KVM_REG_PPC_DAC1: 1501 case KVM_REG_PPC_DAC1:
1477 case KVM_REG_PPC_DAC2: { 1502 case KVM_REG_PPC_DAC2:
1478 int dac = reg->id - KVM_REG_PPC_DAC1; 1503 i = reg->id - KVM_REG_PPC_DAC1;
1479 r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], 1504 vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
1480 (u64 __user *)(long)reg->addr, sizeof(u64));
1481 break; 1505 break;
1482 }
1483 case KVM_REG_PPC_EPR: { 1506 case KVM_REG_PPC_EPR: {
1484 u32 new_epr; 1507 u32 new_epr = set_reg_val(reg->id, val);
1485 r = get_user(new_epr, (u32 __user *)(long)reg->addr); 1508 kvmppc_set_epr(vcpu, new_epr);
1486 if (!r)
1487 kvmppc_set_epr(vcpu, new_epr);
1488 break; 1509 break;
1489 } 1510 }
1490#if defined(CONFIG_64BIT) 1511#if defined(CONFIG_64BIT)
1491 case KVM_REG_PPC_EPCR: { 1512 case KVM_REG_PPC_EPCR: {
1492 u32 new_epcr; 1513 u32 new_epcr = set_reg_val(reg->id, val);
1493 r = get_user(new_epcr, (u32 __user *)(long)reg->addr); 1514 kvmppc_set_epcr(vcpu, new_epcr);
1494 if (r == 0)
1495 kvmppc_set_epcr(vcpu, new_epcr);
1496 break; 1515 break;
1497 } 1516 }
1498#endif 1517#endif
1499 case KVM_REG_PPC_OR_TSR: { 1518 case KVM_REG_PPC_OR_TSR: {
1500 u32 tsr_bits; 1519 u32 tsr_bits = set_reg_val(reg->id, val);
1501 r = get_user(tsr_bits, (u32 __user *)(long)reg->addr);
1502 kvmppc_set_tsr_bits(vcpu, tsr_bits); 1520 kvmppc_set_tsr_bits(vcpu, tsr_bits);
1503 break; 1521 break;
1504 } 1522 }
1505 case KVM_REG_PPC_CLEAR_TSR: { 1523 case KVM_REG_PPC_CLEAR_TSR: {
1506 u32 tsr_bits; 1524 u32 tsr_bits = set_reg_val(reg->id, val);
1507 r = get_user(tsr_bits, (u32 __user *)(long)reg->addr);
1508 kvmppc_clr_tsr_bits(vcpu, tsr_bits); 1525 kvmppc_clr_tsr_bits(vcpu, tsr_bits);
1509 break; 1526 break;
1510 } 1527 }
1511 case KVM_REG_PPC_TSR: { 1528 case KVM_REG_PPC_TSR: {
1512 u32 tsr; 1529 u32 tsr = set_reg_val(reg->id, val);
1513 r = get_user(tsr, (u32 __user *)(long)reg->addr);
1514 kvmppc_set_tsr(vcpu, tsr); 1530 kvmppc_set_tsr(vcpu, tsr);
1515 break; 1531 break;
1516 } 1532 }
1517 case KVM_REG_PPC_TCR: { 1533 case KVM_REG_PPC_TCR: {
1518 u32 tcr; 1534 u32 tcr = set_reg_val(reg->id, val);
1519 r = get_user(tcr, (u32 __user *)(long)reg->addr);
1520 kvmppc_set_tcr(vcpu, tcr); 1535 kvmppc_set_tcr(vcpu, tcr);
1521 break; 1536 break;
1522 } 1537 }
1523 default: 1538 default:
1539 r = kvmppc_set_one_reg(vcpu, reg->id, &val);
1524 break; 1540 break;
1525 } 1541 }
1542
1526 return r; 1543 return r;
1527} 1544}
1528 1545
1546int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1547 struct kvm_guest_debug *dbg)
1548{
1549 return -EINVAL;
1550}
1551
1529int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1552int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1530{ 1553{
1531 return -ENOTSUPP; 1554 return -ENOTSUPP;
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 6dd4de7802bf..ce6b73c29612 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
425 return kvmppc_set_sregs_ivor(vcpu, sregs); 425 return kvmppc_set_sregs_ivor(vcpu, sregs);
426} 426}
427 427
428int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
429 union kvmppc_one_reg *val)
430{
431 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
432 return r;
433}
434
435int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
436 union kvmppc_one_reg *val)
437{
438 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
439 return r;
440}
441
428struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 442struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
429{ 443{
430 struct kvmppc_vcpu_e500 *vcpu_e500; 444 struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 33db48a8ce24..c2e5e98453a6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -23,6 +23,10 @@
23#include <asm/mmu-book3e.h> 23#include <asm/mmu-book3e.h>
24#include <asm/tlb.h> 24#include <asm/tlb.h>
25 25
26enum vcpu_ftr {
27 VCPU_FTR_MMU_V2
28};
29
26#define E500_PID_NUM 3 30#define E500_PID_NUM 3
27#define E500_TLB_NUM 2 31#define E500_TLB_NUM 2
28 32
@@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
131void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 135void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
132int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 136int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
133 137
138int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
139 union kvmppc_one_reg *val);
140int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
141 union kvmppc_one_reg *val);
134 142
135#ifdef CONFIG_KVM_E500V2 143#ifdef CONFIG_KVM_E500V2
136unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, 144unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
295#define get_tlb_sts(gtlbe) (MAS1_TS) 303#define get_tlb_sts(gtlbe) (MAS1_TS)
296#endif /* !BOOKE_HV */ 304#endif /* !BOOKE_HV */
297 305
306static inline bool has_feature(const struct kvm_vcpu *vcpu,
307 enum vcpu_ftr ftr)
308{
309 bool has_ftr;
310 switch (ftr) {
311 case VCPU_FTR_MMU_V2:
312 has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
313 break;
314 default:
315 return false;
316 }
317 return has_ftr;
318}
319
298#endif /* KVM_E500_H */ 320#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353a836a..b10a01243abd 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
284 case SPRN_TLB1CFG: 284 case SPRN_TLB1CFG:
285 *spr_val = vcpu->arch.tlbcfg[1]; 285 *spr_val = vcpu->arch.tlbcfg[1];
286 break; 286 break;
287 case SPRN_TLB0PS:
288 if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
289 return EMULATE_FAIL;
290 *spr_val = vcpu->arch.tlbps[0];
291 break;
292 case SPRN_TLB1PS:
293 if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
294 return EMULATE_FAIL;
295 *spr_val = vcpu->arch.tlbps[1];
296 break;
287 case SPRN_L1CSR0: 297 case SPRN_L1CSR0:
288 *spr_val = vcpu_e500->l1csr0; 298 *spr_val = vcpu_e500->l1csr0;
289 break; 299 break;
@@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
307 case SPRN_MMUCFG: 317 case SPRN_MMUCFG:
308 *spr_val = vcpu->arch.mmucfg; 318 *spr_val = vcpu->arch.mmucfg;
309 break; 319 break;
320 case SPRN_EPTCFG:
321 if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
322 return EMULATE_FAIL;
323 /*
324 * Legacy Linux guests access EPTCFG register even if the E.PT
325 * category is disabled in the VM. Give them a chance to live.
326 */
327 *spr_val = vcpu->arch.eptcfg;
328 break;
310 329
311 /* extra exceptions */ 330 /* extra exceptions */
312 case SPRN_IVOR32: 331 case SPRN_IVOR32:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 5c4475983f78..c41a5a96b558 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
596 return 0; 596 return 0;
597} 597}
598 598
599int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
600 union kvmppc_one_reg *val)
601{
602 int r = 0;
603 long int i;
604
605 switch (id) {
606 case KVM_REG_PPC_MAS0:
607 *val = get_reg_val(id, vcpu->arch.shared->mas0);
608 break;
609 case KVM_REG_PPC_MAS1:
610 *val = get_reg_val(id, vcpu->arch.shared->mas1);
611 break;
612 case KVM_REG_PPC_MAS2:
613 *val = get_reg_val(id, vcpu->arch.shared->mas2);
614 break;
615 case KVM_REG_PPC_MAS7_3:
616 *val = get_reg_val(id, vcpu->arch.shared->mas7_3);
617 break;
618 case KVM_REG_PPC_MAS4:
619 *val = get_reg_val(id, vcpu->arch.shared->mas4);
620 break;
621 case KVM_REG_PPC_MAS6:
622 *val = get_reg_val(id, vcpu->arch.shared->mas6);
623 break;
624 case KVM_REG_PPC_MMUCFG:
625 *val = get_reg_val(id, vcpu->arch.mmucfg);
626 break;
627 case KVM_REG_PPC_EPTCFG:
628 *val = get_reg_val(id, vcpu->arch.eptcfg);
629 break;
630 case KVM_REG_PPC_TLB0CFG:
631 case KVM_REG_PPC_TLB1CFG:
632 case KVM_REG_PPC_TLB2CFG:
633 case KVM_REG_PPC_TLB3CFG:
634 i = id - KVM_REG_PPC_TLB0CFG;
635 *val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
636 break;
637 case KVM_REG_PPC_TLB0PS:
638 case KVM_REG_PPC_TLB1PS:
639 case KVM_REG_PPC_TLB2PS:
640 case KVM_REG_PPC_TLB3PS:
641 i = id - KVM_REG_PPC_TLB0PS;
642 *val = get_reg_val(id, vcpu->arch.tlbps[i]);
643 break;
644 default:
645 r = -EINVAL;
646 break;
647 }
648
649 return r;
650}
651
652int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
653 union kvmppc_one_reg *val)
654{
655 int r = 0;
656 long int i;
657
658 switch (id) {
659 case KVM_REG_PPC_MAS0:
660 vcpu->arch.shared->mas0 = set_reg_val(id, *val);
661 break;
662 case KVM_REG_PPC_MAS1:
663 vcpu->arch.shared->mas1 = set_reg_val(id, *val);
664 break;
665 case KVM_REG_PPC_MAS2:
666 vcpu->arch.shared->mas2 = set_reg_val(id, *val);
667 break;
668 case KVM_REG_PPC_MAS7_3:
669 vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
670 break;
671 case KVM_REG_PPC_MAS4:
672 vcpu->arch.shared->mas4 = set_reg_val(id, *val);
673 break;
674 case KVM_REG_PPC_MAS6:
675 vcpu->arch.shared->mas6 = set_reg_val(id, *val);
676 break;
677 /* Only allow MMU registers to be set to the config supported by KVM */
678 case KVM_REG_PPC_MMUCFG: {
679 u32 reg = set_reg_val(id, *val);
680 if (reg != vcpu->arch.mmucfg)
681 r = -EINVAL;
682 break;
683 }
684 case KVM_REG_PPC_EPTCFG: {
685 u32 reg = set_reg_val(id, *val);
686 if (reg != vcpu->arch.eptcfg)
687 r = -EINVAL;
688 break;
689 }
690 case KVM_REG_PPC_TLB0CFG:
691 case KVM_REG_PPC_TLB1CFG:
692 case KVM_REG_PPC_TLB2CFG:
693 case KVM_REG_PPC_TLB3CFG: {
694 /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
695 u32 reg = set_reg_val(id, *val);
696 i = id - KVM_REG_PPC_TLB0CFG;
697 if (reg != vcpu->arch.tlbcfg[i])
698 r = -EINVAL;
699 break;
700 }
701 case KVM_REG_PPC_TLB0PS:
702 case KVM_REG_PPC_TLB1PS:
703 case KVM_REG_PPC_TLB2PS:
704 case KVM_REG_PPC_TLB3PS: {
705 u32 reg = set_reg_val(id, *val);
706 i = id - KVM_REG_PPC_TLB0PS;
707 if (reg != vcpu->arch.tlbps[i])
708 r = -EINVAL;
709 break;
710 }
711 default:
712 r = -EINVAL;
713 break;
714 }
715
716 return r;
717}
718
719static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
720 struct kvm_book3e_206_tlb_params *params)
721{
722 vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
723 if (params->tlb_sizes[0] <= 2048)
724 vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
725 vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
726
727 vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
728 vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
729 vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
730 return 0;
731}
732
599int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, 733int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
600 struct kvm_config_tlb *cfg) 734 struct kvm_config_tlb *cfg)
601{ 735{
@@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
692 vcpu_e500->gtlb_offset[0] = 0; 826 vcpu_e500->gtlb_offset[0] = 0;
693 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; 827 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
694 828
695 vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; 829 /* Update vcpu's MMU geometry based on SW_TLB input */
696 830 vcpu_mmu_geometry_update(vcpu, &params);
697 vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
698 if (params.tlb_sizes[0] <= 2048)
699 vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
700 vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
701
702 vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
703 vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
704 vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
705 831
706 vcpu_e500->shared_tlb_pages = pages; 832 vcpu_e500->shared_tlb_pages = pages;
707 vcpu_e500->num_shared_tlb_pages = num_pages; 833 vcpu_e500->num_shared_tlb_pages = num_pages;
@@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
737 return 0; 863 return 0;
738} 864}
739 865
866/* Vcpu's MMU default configuration */
867static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
868 struct kvmppc_e500_tlb_params *params)
869{
870 /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
871 vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
872
873 /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
874 vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
875 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
876 vcpu->arch.tlbcfg[0] |= params[0].entries;
877 vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
878
879 vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
880 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
881 vcpu->arch.tlbcfg[1] |= params[1].entries;
882 vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
883
884 if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
885 vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
886 vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
887
888 vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
889
890 /* Guest mmu emulation currently doesn't handle E.PT */
891 vcpu->arch.eptcfg = 0;
892 vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
893 vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
894 }
895
896 return 0;
897}
898
740int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) 899int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
741{ 900{
742 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; 901 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
@@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
781 if (!vcpu_e500->g2h_tlb1_map) 940 if (!vcpu_e500->g2h_tlb1_map)
782 goto err; 941 goto err;
783 942
784 /* Init TLB configuration register */ 943 vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
785 vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
786 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
787 vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
788 vcpu->arch.tlbcfg[0] |=
789 vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
790
791 vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
792 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
793 vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
794 vcpu->arch.tlbcfg[1] |=
795 vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
796 944
797 kvmppc_recalc_tlb1map_range(vcpu_e500); 945 kvmppc_recalc_tlb1map_range(vcpu_e500);
798 return 0; 946 return 0;
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 1f89d26e65fb..c3bdc0aeabe2 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -172,6 +172,8 @@ int kvmppc_core_check_processor_compat(void)
172 r = 0; 172 r = 0;
173 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) 173 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
174 r = 0; 174 r = 0;
175 else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
176 r = 0;
175 else 177 else
176 r = -ENOTSUPP; 178 r = -ENOTSUPP;
177 179
@@ -255,6 +257,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
255 return kvmppc_set_sregs_ivor(vcpu, sregs); 257 return kvmppc_set_sregs_ivor(vcpu, sregs);
256} 258}
257 259
260int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
261 union kvmppc_one_reg *val)
262{
263 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
264 return r;
265}
266
267int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
268 union kvmppc_one_reg *val)
269{
270 int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
271 return r;
272}
273
258struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 274struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
259{ 275{
260 struct kvmppc_vcpu_e500 *vcpu_e500; 276 struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
new file mode 100644
index 000000000000..f1e27fdc8c2e
--- /dev/null
+++ b/arch/powerpc/kvm/irq.h
@@ -0,0 +1,17 @@
1#ifndef __IRQ_H
2#define __IRQ_H
3
4#include <linux/kvm_host.h>
5
6static inline int irqchip_in_kernel(struct kvm *kvm)
7{
8 int ret = 0;
9
10#ifdef CONFIG_KVM_MPIC
11 ret = ret || (kvm->arch.mpic != NULL);
12#endif
13 smp_rmb();
14 return ret;
15}
16
17#endif
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
new file mode 100644
index 000000000000..f3148f8cdc12
--- /dev/null
+++ b/arch/powerpc/kvm/mpic.c
@@ -0,0 +1,1843 @@
1/*
2 * OpenPIC emulation
3 *
4 * Copyright (c) 2004 Jocelyn Mayer
5 * 2011 Alexander Graf
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26#include <linux/slab.h>
27#include <linux/mutex.h>
28#include <linux/kvm_host.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/anon_inodes.h>
32#include <asm/uaccess.h>
33#include <asm/mpic.h>
34#include <asm/kvm_para.h>
35#include <asm/kvm_host.h>
36#include <asm/kvm_ppc.h>
37#include "iodev.h"
38
39#define MAX_CPU 32
40#define MAX_SRC 256
41#define MAX_TMR 4
42#define MAX_IPI 4
43#define MAX_MSI 8
44#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR)
45#define VID 0x03 /* MPIC version ID */
46
47/* OpenPIC capability flags */
48#define OPENPIC_FLAG_IDR_CRIT (1 << 0)
49#define OPENPIC_FLAG_ILR (2 << 0)
50
51/* OpenPIC address map */
52#define OPENPIC_REG_SIZE 0x40000
53#define OPENPIC_GLB_REG_START 0x0
54#define OPENPIC_GLB_REG_SIZE 0x10F0
55#define OPENPIC_TMR_REG_START 0x10F0
56#define OPENPIC_TMR_REG_SIZE 0x220
57#define OPENPIC_MSI_REG_START 0x1600
58#define OPENPIC_MSI_REG_SIZE 0x200
59#define OPENPIC_SUMMARY_REG_START 0x3800
60#define OPENPIC_SUMMARY_REG_SIZE 0x800
61#define OPENPIC_SRC_REG_START 0x10000
62#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20)
63#define OPENPIC_CPU_REG_START 0x20000
64#define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000))
65
66struct fsl_mpic_info {
67 int max_ext;
68};
69
70static struct fsl_mpic_info fsl_mpic_20 = {
71 .max_ext = 12,
72};
73
74static struct fsl_mpic_info fsl_mpic_42 = {
75 .max_ext = 12,
76};
77
78#define FRR_NIRQ_SHIFT 16
79#define FRR_NCPU_SHIFT 8
80#define FRR_VID_SHIFT 0
81
82#define VID_REVISION_1_2 2
83#define VID_REVISION_1_3 3
84
85#define VIR_GENERIC 0x00000000 /* Generic Vendor ID */
86
87#define GCR_RESET 0x80000000
88#define GCR_MODE_PASS 0x00000000
89#define GCR_MODE_MIXED 0x20000000
90#define GCR_MODE_PROXY 0x60000000
91
92#define TBCR_CI 0x80000000 /* count inhibit */
93#define TCCR_TOG 0x80000000 /* toggles when decrement to zero */
94
95#define IDR_EP_SHIFT 31
96#define IDR_EP_MASK (1 << IDR_EP_SHIFT)
97#define IDR_CI0_SHIFT 30
98#define IDR_CI1_SHIFT 29
99#define IDR_P1_SHIFT 1
100#define IDR_P0_SHIFT 0
101
102#define ILR_INTTGT_MASK 0x000000ff
103#define ILR_INTTGT_INT 0x00
104#define ILR_INTTGT_CINT 0x01 /* critical */
105#define ILR_INTTGT_MCP 0x02 /* machine check */
106#define NUM_OUTPUTS 3
107
108#define MSIIR_OFFSET 0x140
109#define MSIIR_SRS_SHIFT 29
110#define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT)
111#define MSIIR_IBS_SHIFT 24
112#define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT)
113
114static int get_current_cpu(void)
115{
116#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
117 struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
118 return vcpu ? vcpu->arch.irq_cpu_id : -1;
119#else
120 /* XXX */
121 return -1;
122#endif
123}
124
125static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
126 u32 val, int idx);
127static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
128 u32 *ptr, int idx);
129
130enum irq_type {
131 IRQ_TYPE_NORMAL = 0,
132 IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */
133 IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */
134};
135
136struct irq_queue {
137 /* Round up to the nearest 64 IRQs so that the queue length
138 * won't change when moving between 32 and 64 bit hosts.
139 */
140 unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
141 int next;
142 int priority;
143};
144
145struct irq_source {
146 uint32_t ivpr; /* IRQ vector/priority register */
147 uint32_t idr; /* IRQ destination register */
148 uint32_t destmask; /* bitmap of CPU destinations */
149 int last_cpu;
150 int output; /* IRQ level, e.g. ILR_INTTGT_INT */
151 int pending; /* TRUE if IRQ is pending */
152 enum irq_type type;
153 bool level:1; /* level-triggered */
154 bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */
155};
156
157#define IVPR_MASK_SHIFT 31
158#define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT)
159#define IVPR_ACTIVITY_SHIFT 30
160#define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT)
161#define IVPR_MODE_SHIFT 29
162#define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT)
163#define IVPR_POLARITY_SHIFT 23
164#define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT)
165#define IVPR_SENSE_SHIFT 22
166#define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT)
167
168#define IVPR_PRIORITY_MASK (0xF << 16)
169#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
170#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
171
172/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
173#define IDR_EP 0x80000000 /* external pin */
174#define IDR_CI 0x40000000 /* critical interrupt */
175
176struct irq_dest {
177 struct kvm_vcpu *vcpu;
178
179 int32_t ctpr; /* CPU current task priority */
180 struct irq_queue raised;
181 struct irq_queue servicing;
182
183 /* Count of IRQ sources asserting on non-INT outputs */
184 uint32_t outputs_active[NUM_OUTPUTS];
185};
186
187struct openpic {
188 struct kvm *kvm;
189 struct kvm_device *dev;
190 struct kvm_io_device mmio;
191 struct list_head mmio_regions;
192 atomic_t users;
193
194 gpa_t reg_base;
195 spinlock_t lock;
196
197 /* Behavior control */
198 struct fsl_mpic_info *fsl;
199 uint32_t model;
200 uint32_t flags;
201 uint32_t nb_irqs;
202 uint32_t vid;
203 uint32_t vir; /* Vendor identification register */
204 uint32_t vector_mask;
205 uint32_t tfrr_reset;
206 uint32_t ivpr_reset;
207 uint32_t idr_reset;
208 uint32_t brr1;
209 uint32_t mpic_mode_mask;
210
211 /* Global registers */
212 uint32_t frr; /* Feature reporting register */
213 uint32_t gcr; /* Global configuration register */
214 uint32_t pir; /* Processor initialization register */
215 uint32_t spve; /* Spurious vector register */
216 uint32_t tfrr; /* Timer frequency reporting register */
217 /* Source registers */
218 struct irq_source src[MAX_IRQ];
219 /* Local registers per output pin */
220 struct irq_dest dst[MAX_CPU];
221 uint32_t nb_cpus;
222 /* Timer registers */
223 struct {
224 uint32_t tccr; /* Global timer current count register */
225 uint32_t tbcr; /* Global timer base count register */
226 } timers[MAX_TMR];
227 /* Shared MSI registers */
228 struct {
229 uint32_t msir; /* Shared Message Signaled Interrupt Register */
230 } msi[MAX_MSI];
231 uint32_t max_irq;
232 uint32_t irq_ipi0;
233 uint32_t irq_tim0;
234 uint32_t irq_msi;
235};
236
237
238static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
239 int output)
240{
241 struct kvm_interrupt irq = {
242 .irq = KVM_INTERRUPT_SET_LEVEL,
243 };
244
245 if (!dst->vcpu) {
246 pr_debug("%s: destination cpu %d does not exist\n",
247 __func__, (int)(dst - &opp->dst[0]));
248 return;
249 }
250
251 pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
252 output);
253
254 if (output != ILR_INTTGT_INT) /* TODO */
255 return;
256
257 kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
258}
259
260static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
261 int output)
262{
263 if (!dst->vcpu) {
264 pr_debug("%s: destination cpu %d does not exist\n",
265 __func__, (int)(dst - &opp->dst[0]));
266 return;
267 }
268
269 pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
270 output);
271
272 if (output != ILR_INTTGT_INT) /* TODO */
273 return;
274
275 kvmppc_core_dequeue_external(dst->vcpu);
276}
277
278static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
279{
280 set_bit(n_IRQ, q->queue);
281}
282
283static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
284{
285 clear_bit(n_IRQ, q->queue);
286}
287
288static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
289{
290 return test_bit(n_IRQ, q->queue);
291}
292
293static void IRQ_check(struct openpic *opp, struct irq_queue *q)
294{
295 int irq = -1;
296 int next = -1;
297 int priority = -1;
298
299 for (;;) {
300 irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
301 if (irq == opp->max_irq)
302 break;
303
304 pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
305 irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
306
307 if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
308 next = irq;
309 priority = IVPR_PRIORITY(opp->src[irq].ivpr);
310 }
311 }
312
313 q->next = next;
314 q->priority = priority;
315}
316
317static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
318{
319 /* XXX: optimize */
320 IRQ_check(opp, q);
321
322 return q->next;
323}
324
325static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
326 bool active, bool was_active)
327{
328 struct irq_dest *dst;
329 struct irq_source *src;
330 int priority;
331
332 dst = &opp->dst[n_CPU];
333 src = &opp->src[n_IRQ];
334
335 pr_debug("%s: IRQ %d active %d was %d\n",
336 __func__, n_IRQ, active, was_active);
337
338 if (src->output != ILR_INTTGT_INT) {
339 pr_debug("%s: output %d irq %d active %d was %d count %d\n",
340 __func__, src->output, n_IRQ, active, was_active,
341 dst->outputs_active[src->output]);
342
343 /* On Freescale MPIC, critical interrupts ignore priority,
344 * IACK, EOI, etc. Before MPIC v4.1 they also ignore
345 * masking.
346 */
347 if (active) {
348 if (!was_active &&
349 dst->outputs_active[src->output]++ == 0) {
350 pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
351 __func__, src->output, n_CPU, n_IRQ);
352 mpic_irq_raise(opp, dst, src->output);
353 }
354 } else {
355 if (was_active &&
356 --dst->outputs_active[src->output] == 0) {
357 pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
358 __func__, src->output, n_CPU, n_IRQ);
359 mpic_irq_lower(opp, dst, src->output);
360 }
361 }
362
363 return;
364 }
365
366 priority = IVPR_PRIORITY(src->ivpr);
367
368 /* Even if the interrupt doesn't have enough priority,
369 * it is still raised, in case ctpr is lowered later.
370 */
371 if (active)
372 IRQ_setbit(&dst->raised, n_IRQ);
373 else
374 IRQ_resetbit(&dst->raised, n_IRQ);
375
376 IRQ_check(opp, &dst->raised);
377
378 if (active && priority <= dst->ctpr) {
379 pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
380 __func__, n_IRQ, priority, dst->ctpr, n_CPU);
381 active = 0;
382 }
383
384 if (active) {
385 if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
386 priority <= dst->servicing.priority) {
387 pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
388 __func__, n_IRQ, dst->servicing.next, n_CPU);
389 } else {
390 pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
391 __func__, n_CPU, n_IRQ, dst->raised.next);
392 mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
393 }
394 } else {
395 IRQ_get_next(opp, &dst->servicing);
396 if (dst->raised.priority > dst->ctpr &&
397 dst->raised.priority > dst->servicing.priority) {
398 pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
399 __func__, n_IRQ, dst->raised.next,
400 dst->raised.priority, dst->ctpr,
401 dst->servicing.priority, n_CPU);
402 /* IRQ line stays asserted */
403 } else {
404 pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
405 __func__, n_IRQ, dst->ctpr,
406 dst->servicing.priority, n_CPU);
407 mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
408 }
409 }
410}
411
412/* update pic state because registers for n_IRQ have changed value */
413static void openpic_update_irq(struct openpic *opp, int n_IRQ)
414{
415 struct irq_source *src;
416 bool active, was_active;
417 int i;
418
419 src = &opp->src[n_IRQ];
420 active = src->pending;
421
422 if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
423 /* Interrupt source is disabled */
424 pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
425 active = false;
426 }
427
428 was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
429
430 /*
431 * We don't have a similar check for already-active because
432 * ctpr may have changed and we need to withdraw the interrupt.
433 */
434 if (!active && !was_active) {
435 pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
436 return;
437 }
438
439 if (active)
440 src->ivpr |= IVPR_ACTIVITY_MASK;
441 else
442 src->ivpr &= ~IVPR_ACTIVITY_MASK;
443
444 if (src->destmask == 0) {
445 /* No target */
446 pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
447 return;
448 }
449
450 if (src->destmask == (1 << src->last_cpu)) {
451 /* Only one CPU is allowed to receive this IRQ */
452 IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
453 } else if (!(src->ivpr & IVPR_MODE_MASK)) {
454 /* Directed delivery mode */
455 for (i = 0; i < opp->nb_cpus; i++) {
456 if (src->destmask & (1 << i)) {
457 IRQ_local_pipe(opp, i, n_IRQ, active,
458 was_active);
459 }
460 }
461 } else {
462 /* Distributed delivery mode */
463 for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
464 if (i == opp->nb_cpus)
465 i = 0;
466
467 if (src->destmask & (1 << i)) {
468 IRQ_local_pipe(opp, i, n_IRQ, active,
469 was_active);
470 src->last_cpu = i;
471 break;
472 }
473 }
474 }
475}
476
477static void openpic_set_irq(void *opaque, int n_IRQ, int level)
478{
479 struct openpic *opp = opaque;
480 struct irq_source *src;
481
482 if (n_IRQ >= MAX_IRQ) {
483 WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
484 return;
485 }
486
487 src = &opp->src[n_IRQ];
488 pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
489 n_IRQ, level, src->ivpr);
490 if (src->level) {
491 /* level-sensitive irq */
492 src->pending = level;
493 openpic_update_irq(opp, n_IRQ);
494 } else {
495 /* edge-sensitive irq */
496 if (level) {
497 src->pending = 1;
498 openpic_update_irq(opp, n_IRQ);
499 }
500
501 if (src->output != ILR_INTTGT_INT) {
502 /* Edge-triggered interrupts shouldn't be used
503 * with non-INT delivery, but just in case,
504 * try to make it do something sane rather than
505 * cause an interrupt storm. This is close to
506 * what you'd probably see happen in real hardware.
507 */
508 src->pending = 0;
509 openpic_update_irq(opp, n_IRQ);
510 }
511 }
512}
513
514static void openpic_reset(struct openpic *opp)
515{
516 int i;
517
518 opp->gcr = GCR_RESET;
519 /* Initialise controller registers */
520 opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
521 (opp->vid << FRR_VID_SHIFT);
522
523 opp->pir = 0;
524 opp->spve = -1 & opp->vector_mask;
525 opp->tfrr = opp->tfrr_reset;
526 /* Initialise IRQ sources */
527 for (i = 0; i < opp->max_irq; i++) {
528 opp->src[i].ivpr = opp->ivpr_reset;
529 opp->src[i].idr = opp->idr_reset;
530
531 switch (opp->src[i].type) {
532 case IRQ_TYPE_NORMAL:
533 opp->src[i].level =
534 !!(opp->ivpr_reset & IVPR_SENSE_MASK);
535 break;
536
537 case IRQ_TYPE_FSLINT:
538 opp->src[i].ivpr |= IVPR_POLARITY_MASK;
539 break;
540
541 case IRQ_TYPE_FSLSPECIAL:
542 break;
543 }
544 }
545 /* Initialise IRQ destinations */
546 for (i = 0; i < MAX_CPU; i++) {
547 opp->dst[i].ctpr = 15;
548 memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
549 opp->dst[i].raised.next = -1;
550 memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
551 opp->dst[i].servicing.next = -1;
552 }
553 /* Initialise timers */
554 for (i = 0; i < MAX_TMR; i++) {
555 opp->timers[i].tccr = 0;
556 opp->timers[i].tbcr = TBCR_CI;
557 }
558 /* Go out of RESET state */
559 opp->gcr = 0;
560}
561
562static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
563{
564 return opp->src[n_IRQ].idr;
565}
566
567static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
568{
569 if (opp->flags & OPENPIC_FLAG_ILR)
570 return opp->src[n_IRQ].output;
571
572 return 0xffffffff;
573}
574
575static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
576{
577 return opp->src[n_IRQ].ivpr;
578}
579
580static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
581 uint32_t val)
582{
583 struct irq_source *src = &opp->src[n_IRQ];
584 uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
585 uint32_t crit_mask = 0;
586 uint32_t mask = normal_mask;
587 int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
588 int i;
589
590 if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
591 crit_mask = mask << crit_shift;
592 mask |= crit_mask | IDR_EP;
593 }
594
595 src->idr = val & mask;
596 pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
597
598 if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
599 if (src->idr & crit_mask) {
600 if (src->idr & normal_mask) {
601 pr_debug("%s: IRQ configured for multiple output types, using critical\n",
602 __func__);
603 }
604
605 src->output = ILR_INTTGT_CINT;
606 src->nomask = true;
607 src->destmask = 0;
608
609 for (i = 0; i < opp->nb_cpus; i++) {
610 int n_ci = IDR_CI0_SHIFT - i;
611
612 if (src->idr & (1UL << n_ci))
613 src->destmask |= 1UL << i;
614 }
615 } else {
616 src->output = ILR_INTTGT_INT;
617 src->nomask = false;
618 src->destmask = src->idr & normal_mask;
619 }
620 } else {
621 src->destmask = src->idr;
622 }
623}
624
625static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
626 uint32_t val)
627{
628 if (opp->flags & OPENPIC_FLAG_ILR) {
629 struct irq_source *src = &opp->src[n_IRQ];
630
631 src->output = val & ILR_INTTGT_MASK;
632 pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
633 src->output);
634
635 /* TODO: on MPIC v4.0 only, set nomask for non-INT */
636 }
637}
638
639static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
640 uint32_t val)
641{
642 uint32_t mask;
643
644 /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
645 * the polarity bit is read-only on internal interrupts.
646 */
647 mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
648 IVPR_POLARITY_MASK | opp->vector_mask;
649
650 /* ACTIVITY bit is read-only */
651 opp->src[n_IRQ].ivpr =
652 (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
653
654 /* For FSL internal interrupts, The sense bit is reserved and zero,
655 * and the interrupt is always level-triggered. Timers and IPIs
656 * have no sense or polarity bits, and are edge-triggered.
657 */
658 switch (opp->src[n_IRQ].type) {
659 case IRQ_TYPE_NORMAL:
660 opp->src[n_IRQ].level =
661 !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
662 break;
663
664 case IRQ_TYPE_FSLINT:
665 opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
666 break;
667
668 case IRQ_TYPE_FSLSPECIAL:
669 opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
670 break;
671 }
672
673 openpic_update_irq(opp, n_IRQ);
674 pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
675 opp->src[n_IRQ].ivpr);
676}
677
678static void openpic_gcr_write(struct openpic *opp, uint64_t val)
679{
680 if (val & GCR_RESET) {
681 openpic_reset(opp);
682 return;
683 }
684
685 opp->gcr &= ~opp->mpic_mode_mask;
686 opp->gcr |= val & opp->mpic_mode_mask;
687}
688
689static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
690{
691 struct openpic *opp = opaque;
692 int err = 0;
693
694 pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
695 if (addr & 0xF)
696 return 0;
697
698 switch (addr) {
699 case 0x00: /* Block Revision Register1 (BRR1) is Readonly */
700 break;
701 case 0x40:
702 case 0x50:
703 case 0x60:
704 case 0x70:
705 case 0x80:
706 case 0x90:
707 case 0xA0:
708 case 0xB0:
709 err = openpic_cpu_write_internal(opp, addr, val,
710 get_current_cpu());
711 break;
712 case 0x1000: /* FRR */
713 break;
714 case 0x1020: /* GCR */
715 openpic_gcr_write(opp, val);
716 break;
717 case 0x1080: /* VIR */
718 break;
719 case 0x1090: /* PIR */
720 /*
721 * This register is used to reset a CPU core --
722 * let userspace handle it.
723 */
724 err = -ENXIO;
725 break;
726 case 0x10A0: /* IPI_IVPR */
727 case 0x10B0:
728 case 0x10C0:
729 case 0x10D0: {
730 int idx;
731 idx = (addr - 0x10A0) >> 4;
732 write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
733 break;
734 }
735 case 0x10E0: /* SPVE */
736 opp->spve = val & opp->vector_mask;
737 break;
738 default:
739 break;
740 }
741
742 return err;
743}
744
745static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
746{
747 struct openpic *opp = opaque;
748 u32 retval;
749 int err = 0;
750
751 pr_debug("%s: addr %#llx\n", __func__, addr);
752 retval = 0xFFFFFFFF;
753 if (addr & 0xF)
754 goto out;
755
756 switch (addr) {
757 case 0x1000: /* FRR */
758 retval = opp->frr;
759 retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
760 break;
761 case 0x1020: /* GCR */
762 retval = opp->gcr;
763 break;
764 case 0x1080: /* VIR */
765 retval = opp->vir;
766 break;
767 case 0x1090: /* PIR */
768 retval = 0x00000000;
769 break;
770 case 0x00: /* Block Revision Register1 (BRR1) */
771 retval = opp->brr1;
772 break;
773 case 0x40:
774 case 0x50:
775 case 0x60:
776 case 0x70:
777 case 0x80:
778 case 0x90:
779 case 0xA0:
780 case 0xB0:
781 err = openpic_cpu_read_internal(opp, addr,
782 &retval, get_current_cpu());
783 break;
784 case 0x10A0: /* IPI_IVPR */
785 case 0x10B0:
786 case 0x10C0:
787 case 0x10D0:
788 {
789 int idx;
790 idx = (addr - 0x10A0) >> 4;
791 retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
792 }
793 break;
794 case 0x10E0: /* SPVE */
795 retval = opp->spve;
796 break;
797 default:
798 break;
799 }
800
801out:
802 pr_debug("%s: => 0x%08x\n", __func__, retval);
803 *ptr = retval;
804 return err;
805}
806
807static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
808{
809 struct openpic *opp = opaque;
810 int idx;
811
812 addr += 0x10f0;
813
814 pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
815 if (addr & 0xF)
816 return 0;
817
818 if (addr == 0x10f0) {
819 /* TFRR */
820 opp->tfrr = val;
821 return 0;
822 }
823
824 idx = (addr >> 6) & 0x3;
825 addr = addr & 0x30;
826
827 switch (addr & 0x30) {
828 case 0x00: /* TCCR */
829 break;
830 case 0x10: /* TBCR */
831 if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
832 (val & TBCR_CI) == 0 &&
833 (opp->timers[idx].tbcr & TBCR_CI) != 0)
834 opp->timers[idx].tccr &= ~TCCR_TOG;
835
836 opp->timers[idx].tbcr = val;
837 break;
838 case 0x20: /* TVPR */
839 write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
840 break;
841 case 0x30: /* TDR */
842 write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
843 break;
844 }
845
846 return 0;
847}
848
849static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
850{
851 struct openpic *opp = opaque;
852 uint32_t retval = -1;
853 int idx;
854
855 pr_debug("%s: addr %#llx\n", __func__, addr);
856 if (addr & 0xF)
857 goto out;
858
859 idx = (addr >> 6) & 0x3;
860 if (addr == 0x0) {
861 /* TFRR */
862 retval = opp->tfrr;
863 goto out;
864 }
865
866 switch (addr & 0x30) {
867 case 0x00: /* TCCR */
868 retval = opp->timers[idx].tccr;
869 break;
870 case 0x10: /* TBCR */
871 retval = opp->timers[idx].tbcr;
872 break;
873 case 0x20: /* TIPV */
874 retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
875 break;
876 case 0x30: /* TIDE (TIDR) */
877 retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
878 break;
879 }
880
881out:
882 pr_debug("%s: => 0x%08x\n", __func__, retval);
883 *ptr = retval;
884 return 0;
885}
886
887static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
888{
889 struct openpic *opp = opaque;
890 int idx;
891
892 pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
893
894 addr = addr & 0xffff;
895 idx = addr >> 5;
896
897 switch (addr & 0x1f) {
898 case 0x00:
899 write_IRQreg_ivpr(opp, idx, val);
900 break;
901 case 0x10:
902 write_IRQreg_idr(opp, idx, val);
903 break;
904 case 0x18:
905 write_IRQreg_ilr(opp, idx, val);
906 break;
907 }
908
909 return 0;
910}
911
912static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
913{
914 struct openpic *opp = opaque;
915 uint32_t retval;
916 int idx;
917
918 pr_debug("%s: addr %#llx\n", __func__, addr);
919 retval = 0xFFFFFFFF;
920
921 addr = addr & 0xffff;
922 idx = addr >> 5;
923
924 switch (addr & 0x1f) {
925 case 0x00:
926 retval = read_IRQreg_ivpr(opp, idx);
927 break;
928 case 0x10:
929 retval = read_IRQreg_idr(opp, idx);
930 break;
931 case 0x18:
932 retval = read_IRQreg_ilr(opp, idx);
933 break;
934 }
935
936 pr_debug("%s: => 0x%08x\n", __func__, retval);
937 *ptr = retval;
938 return 0;
939}
940
941static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
942{
943 struct openpic *opp = opaque;
944 int idx = opp->irq_msi;
945 int srs, ibs;
946
947 pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
948 if (addr & 0xF)
949 return 0;
950
951 switch (addr) {
952 case MSIIR_OFFSET:
953 srs = val >> MSIIR_SRS_SHIFT;
954 idx += srs;
955 ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
956 opp->msi[srs].msir |= 1 << ibs;
957 openpic_set_irq(opp, idx, 1);
958 break;
959 default:
960 /* most registers are read-only, thus ignored */
961 break;
962 }
963
964 return 0;
965}
966
967static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
968{
969 struct openpic *opp = opaque;
970 uint32_t r = 0;
971 int i, srs;
972
973 pr_debug("%s: addr %#llx\n", __func__, addr);
974 if (addr & 0xF)
975 return -ENXIO;
976
977 srs = addr >> 4;
978
979 switch (addr) {
980 case 0x00:
981 case 0x10:
982 case 0x20:
983 case 0x30:
984 case 0x40:
985 case 0x50:
986 case 0x60:
987 case 0x70: /* MSIRs */
988 r = opp->msi[srs].msir;
989 /* Clear on read */
990 opp->msi[srs].msir = 0;
991 openpic_set_irq(opp, opp->irq_msi + srs, 0);
992 break;
993 case 0x120: /* MSISR */
994 for (i = 0; i < MAX_MSI; i++)
995 r |= (opp->msi[i].msir ? 1 : 0) << i;
996 break;
997 }
998
999 pr_debug("%s: => 0x%08x\n", __func__, r);
1000 *ptr = r;
1001 return 0;
1002}
1003
1004static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
1005{
1006 uint32_t r = 0;
1007
1008 pr_debug("%s: addr %#llx\n", __func__, addr);
1009
1010 /* TODO: EISR/EIMR */
1011
1012 *ptr = r;
1013 return 0;
1014}
1015
1016static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
1017{
1018 pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
1019
1020 /* TODO: EISR/EIMR */
1021 return 0;
1022}
1023
1024static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
1025 u32 val, int idx)
1026{
1027 struct openpic *opp = opaque;
1028 struct irq_source *src;
1029 struct irq_dest *dst;
1030 int s_IRQ, n_IRQ;
1031
1032 pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
1033 addr, val);
1034
1035 if (idx < 0)
1036 return 0;
1037
1038 if (addr & 0xF)
1039 return 0;
1040
1041 dst = &opp->dst[idx];
1042 addr &= 0xFF0;
1043 switch (addr) {
1044 case 0x40: /* IPIDR */
1045 case 0x50:
1046 case 0x60:
1047 case 0x70:
1048 idx = (addr - 0x40) >> 4;
1049 /* we use IDE as mask which CPUs to deliver the IPI to still. */
1050 opp->src[opp->irq_ipi0 + idx].destmask |= val;
1051 openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
1052 openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
1053 break;
1054 case 0x80: /* CTPR */
1055 dst->ctpr = val & 0x0000000F;
1056
1057 pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
1058 __func__, idx, dst->ctpr, dst->raised.priority,
1059 dst->servicing.priority);
1060
1061 if (dst->raised.priority <= dst->ctpr) {
1062 pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
1063 __func__, idx);
1064 mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1065 } else if (dst->raised.priority > dst->servicing.priority) {
1066 pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
1067 __func__, idx, dst->raised.next);
1068 mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1069 }
1070
1071 break;
1072 case 0x90: /* WHOAMI */
1073 /* Read-only register */
1074 break;
1075 case 0xA0: /* IACK */
1076 /* Read-only register */
1077 break;
1078 case 0xB0: { /* EOI */
1079 int notify_eoi;
1080
1081 pr_debug("EOI\n");
1082 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1083
1084 if (s_IRQ < 0) {
1085 pr_debug("%s: EOI with no interrupt in service\n",
1086 __func__);
1087 break;
1088 }
1089
1090 IRQ_resetbit(&dst->servicing, s_IRQ);
1091 /* Notify listeners that the IRQ is over */
1092 notify_eoi = s_IRQ;
1093 /* Set up next servicing IRQ */
1094 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1095 /* Check queued interrupts. */
1096 n_IRQ = IRQ_get_next(opp, &dst->raised);
1097 src = &opp->src[n_IRQ];
1098 if (n_IRQ != -1 &&
1099 (s_IRQ == -1 ||
1100 IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
1101 pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
1102 idx, n_IRQ);
1103 mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1104 }
1105
1106 spin_unlock(&opp->lock);
1107 kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
1108 spin_lock(&opp->lock);
1109
1110 break;
1111 }
1112 default:
1113 break;
1114 }
1115
1116 return 0;
1117}
1118
1119static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
1120{
1121 struct openpic *opp = opaque;
1122
1123 return openpic_cpu_write_internal(opp, addr, val,
1124 (addr & 0x1f000) >> 12);
1125}
1126
1127static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
1128 int cpu)
1129{
1130 struct irq_source *src;
1131 int retval, irq;
1132
1133 pr_debug("Lower OpenPIC INT output\n");
1134 mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1135
1136 irq = IRQ_get_next(opp, &dst->raised);
1137 pr_debug("IACK: irq=%d\n", irq);
1138
1139 if (irq == -1)
1140 /* No more interrupt pending */
1141 return opp->spve;
1142
1143 src = &opp->src[irq];
1144 if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
1145 !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
1146 pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
1147 __func__, irq, dst->ctpr, src->ivpr);
1148 openpic_update_irq(opp, irq);
1149 retval = opp->spve;
1150 } else {
1151 /* IRQ enter servicing state */
1152 IRQ_setbit(&dst->servicing, irq);
1153 retval = IVPR_VECTOR(opp, src->ivpr);
1154 }
1155
1156 if (!src->level) {
1157 /* edge-sensitive IRQ */
1158 src->ivpr &= ~IVPR_ACTIVITY_MASK;
1159 src->pending = 0;
1160 IRQ_resetbit(&dst->raised, irq);
1161 }
1162
1163 if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
1164 src->destmask &= ~(1 << cpu);
1165 if (src->destmask && !src->level) {
1166 /* trigger on CPUs that didn't know about it yet */
1167 openpic_set_irq(opp, irq, 1);
1168 openpic_set_irq(opp, irq, 0);
1169 /* if all CPUs knew about it, set active bit again */
1170 src->ivpr |= IVPR_ACTIVITY_MASK;
1171 }
1172 }
1173
1174 return retval;
1175}
1176
1177void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
1178{
1179 struct openpic *opp = vcpu->arch.mpic;
1180 int cpu = vcpu->arch.irq_cpu_id;
1181 unsigned long flags;
1182
1183 spin_lock_irqsave(&opp->lock, flags);
1184
1185 if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
1186 kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
1187
1188 spin_unlock_irqrestore(&opp->lock, flags);
1189}
1190
1191static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
1192 u32 *ptr, int idx)
1193{
1194 struct openpic *opp = opaque;
1195 struct irq_dest *dst;
1196 uint32_t retval;
1197
1198 pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
1199 retval = 0xFFFFFFFF;
1200
1201 if (idx < 0)
1202 goto out;
1203
1204 if (addr & 0xF)
1205 goto out;
1206
1207 dst = &opp->dst[idx];
1208 addr &= 0xFF0;
1209 switch (addr) {
1210 case 0x80: /* CTPR */
1211 retval = dst->ctpr;
1212 break;
1213 case 0x90: /* WHOAMI */
1214 retval = idx;
1215 break;
1216 case 0xA0: /* IACK */
1217 retval = openpic_iack(opp, dst, idx);
1218 break;
1219 case 0xB0: /* EOI */
1220 retval = 0;
1221 break;
1222 default:
1223 break;
1224 }
1225 pr_debug("%s: => 0x%08x\n", __func__, retval);
1226
1227out:
1228 *ptr = retval;
1229 return 0;
1230}
1231
1232static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
1233{
1234 struct openpic *opp = opaque;
1235
1236 return openpic_cpu_read_internal(opp, addr, ptr,
1237 (addr & 0x1f000) >> 12);
1238}
1239
1240struct mem_reg {
1241 struct list_head list;
1242 int (*read)(void *opaque, gpa_t addr, u32 *ptr);
1243 int (*write)(void *opaque, gpa_t addr, u32 val);
1244 gpa_t start_addr;
1245 int size;
1246};
1247
1248static struct mem_reg openpic_gbl_mmio = {
1249 .write = openpic_gbl_write,
1250 .read = openpic_gbl_read,
1251 .start_addr = OPENPIC_GLB_REG_START,
1252 .size = OPENPIC_GLB_REG_SIZE,
1253};
1254
1255static struct mem_reg openpic_tmr_mmio = {
1256 .write = openpic_tmr_write,
1257 .read = openpic_tmr_read,
1258 .start_addr = OPENPIC_TMR_REG_START,
1259 .size = OPENPIC_TMR_REG_SIZE,
1260};
1261
1262static struct mem_reg openpic_cpu_mmio = {
1263 .write = openpic_cpu_write,
1264 .read = openpic_cpu_read,
1265 .start_addr = OPENPIC_CPU_REG_START,
1266 .size = OPENPIC_CPU_REG_SIZE,
1267};
1268
1269static struct mem_reg openpic_src_mmio = {
1270 .write = openpic_src_write,
1271 .read = openpic_src_read,
1272 .start_addr = OPENPIC_SRC_REG_START,
1273 .size = OPENPIC_SRC_REG_SIZE,
1274};
1275
1276static struct mem_reg openpic_msi_mmio = {
1277 .read = openpic_msi_read,
1278 .write = openpic_msi_write,
1279 .start_addr = OPENPIC_MSI_REG_START,
1280 .size = OPENPIC_MSI_REG_SIZE,
1281};
1282
1283static struct mem_reg openpic_summary_mmio = {
1284 .read = openpic_summary_read,
1285 .write = openpic_summary_write,
1286 .start_addr = OPENPIC_SUMMARY_REG_START,
1287 .size = OPENPIC_SUMMARY_REG_SIZE,
1288};
1289
1290static void fsl_common_init(struct openpic *opp)
1291{
1292 int i;
1293 int virq = MAX_SRC;
1294
1295 list_add(&openpic_msi_mmio.list, &opp->mmio_regions);
1296 list_add(&openpic_summary_mmio.list, &opp->mmio_regions);
1297
1298 opp->vid = VID_REVISION_1_2;
1299 opp->vir = VIR_GENERIC;
1300 opp->vector_mask = 0xFFFF;
1301 opp->tfrr_reset = 0;
1302 opp->ivpr_reset = IVPR_MASK_MASK;
1303 opp->idr_reset = 1 << 0;
1304 opp->max_irq = MAX_IRQ;
1305
1306 opp->irq_ipi0 = virq;
1307 virq += MAX_IPI;
1308 opp->irq_tim0 = virq;
1309 virq += MAX_TMR;
1310
1311 BUG_ON(virq > MAX_IRQ);
1312
1313 opp->irq_msi = 224;
1314
1315 for (i = 0; i < opp->fsl->max_ext; i++)
1316 opp->src[i].level = false;
1317
1318 /* Internal interrupts, including message and MSI */
1319 for (i = 16; i < MAX_SRC; i++) {
1320 opp->src[i].type = IRQ_TYPE_FSLINT;
1321 opp->src[i].level = true;
1322 }
1323
1324 /* timers and IPIs */
1325 for (i = MAX_SRC; i < virq; i++) {
1326 opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
1327 opp->src[i].level = false;
1328 }
1329}
1330
1331static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
1332{
1333 struct list_head *node;
1334
1335 list_for_each(node, &opp->mmio_regions) {
1336 struct mem_reg *mr = list_entry(node, struct mem_reg, list);
1337
1338 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1339 continue;
1340
1341 return mr->read(opp, addr - mr->start_addr, ptr);
1342 }
1343
1344 return -ENXIO;
1345}
1346
1347static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
1348{
1349 struct list_head *node;
1350
1351 list_for_each(node, &opp->mmio_regions) {
1352 struct mem_reg *mr = list_entry(node, struct mem_reg, list);
1353
1354 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1355 continue;
1356
1357 return mr->write(opp, addr - mr->start_addr, val);
1358 }
1359
1360 return -ENXIO;
1361}
1362
1363static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
1364 int len, void *ptr)
1365{
1366 struct openpic *opp = container_of(this, struct openpic, mmio);
1367 int ret;
1368 union {
1369 u32 val;
1370 u8 bytes[4];
1371 } u;
1372
1373 if (addr & (len - 1)) {
1374 pr_debug("%s: bad alignment %llx/%d\n",
1375 __func__, addr, len);
1376 return -EINVAL;
1377 }
1378
1379 spin_lock_irq(&opp->lock);
1380 ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
1381 spin_unlock_irq(&opp->lock);
1382
1383 /*
1384 * Technically only 32-bit accesses are allowed, but be nice to
1385 * people dumping registers a byte at a time -- it works in real
1386 * hardware (reads only, not writes).
1387 */
1388 if (len == 4) {
1389 *(u32 *)ptr = u.val;
1390 pr_debug("%s: addr %llx ret %d len 4 val %x\n",
1391 __func__, addr, ret, u.val);
1392 } else if (len == 1) {
1393 *(u8 *)ptr = u.bytes[addr & 3];
1394 pr_debug("%s: addr %llx ret %d len 1 val %x\n",
1395 __func__, addr, ret, u.bytes[addr & 3]);
1396 } else {
1397 pr_debug("%s: bad length %d\n", __func__, len);
1398 return -EINVAL;
1399 }
1400
1401 return ret;
1402}
1403
1404static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
1405 int len, const void *ptr)
1406{
1407 struct openpic *opp = container_of(this, struct openpic, mmio);
1408 int ret;
1409
1410 if (len != 4) {
1411 pr_debug("%s: bad length %d\n", __func__, len);
1412 return -EOPNOTSUPP;
1413 }
1414 if (addr & 3) {
1415 pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
1416 return -EOPNOTSUPP;
1417 }
1418
1419 spin_lock_irq(&opp->lock);
1420 ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
1421 *(const u32 *)ptr);
1422 spin_unlock_irq(&opp->lock);
1423
1424 pr_debug("%s: addr %llx ret %d val %x\n",
1425 __func__, addr, ret, *(const u32 *)ptr);
1426
1427 return ret;
1428}
1429
1430static const struct kvm_io_device_ops mpic_mmio_ops = {
1431 .read = kvm_mpic_read,
1432 .write = kvm_mpic_write,
1433};
1434
1435static void map_mmio(struct openpic *opp)
1436{
1437 kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
1438
1439 kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
1440 opp->reg_base, OPENPIC_REG_SIZE,
1441 &opp->mmio);
1442}
1443
1444static void unmap_mmio(struct openpic *opp)
1445{
1446 kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
1447}
1448
1449static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
1450{
1451 u64 base;
1452
1453 if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
1454 return -EFAULT;
1455
1456 if (base & 0x3ffff) {
1457 pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
1458 __func__, base);
1459 return -EINVAL;
1460 }
1461
1462 if (base == opp->reg_base)
1463 return 0;
1464
1465 mutex_lock(&opp->kvm->slots_lock);
1466
1467 unmap_mmio(opp);
1468 opp->reg_base = base;
1469
1470 pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
1471 __func__, base);
1472
1473 if (base == 0)
1474 goto out;
1475
1476 map_mmio(opp);
1477
1478 mutex_unlock(&opp->kvm->slots_lock);
1479out:
1480 return 0;
1481}
1482
1483#define ATTR_SET 0
1484#define ATTR_GET 1
1485
1486static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
1487{
1488 int ret;
1489
1490 if (addr & 3)
1491 return -ENXIO;
1492
1493 spin_lock_irq(&opp->lock);
1494
1495 if (type == ATTR_SET)
1496 ret = kvm_mpic_write_internal(opp, addr, *val);
1497 else
1498 ret = kvm_mpic_read_internal(opp, addr, val);
1499
1500 spin_unlock_irq(&opp->lock);
1501
1502 pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
1503
1504 return ret;
1505}
1506
1507static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1508{
1509 struct openpic *opp = dev->private;
1510 u32 attr32;
1511
1512 switch (attr->group) {
1513 case KVM_DEV_MPIC_GRP_MISC:
1514 switch (attr->attr) {
1515 case KVM_DEV_MPIC_BASE_ADDR:
1516 return set_base_addr(opp, attr);
1517 }
1518
1519 break;
1520
1521 case KVM_DEV_MPIC_GRP_REGISTER:
1522 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1523 return -EFAULT;
1524
1525 return access_reg(opp, attr->attr, &attr32, ATTR_SET);
1526
1527 case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1528 if (attr->attr > MAX_SRC)
1529 return -EINVAL;
1530
1531 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1532 return -EFAULT;
1533
1534 if (attr32 != 0 && attr32 != 1)
1535 return -EINVAL;
1536
1537 spin_lock_irq(&opp->lock);
1538 openpic_set_irq(opp, attr->attr, attr32);
1539 spin_unlock_irq(&opp->lock);
1540 return 0;
1541 }
1542
1543 return -ENXIO;
1544}
1545
1546static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1547{
1548 struct openpic *opp = dev->private;
1549 u64 attr64;
1550 u32 attr32;
1551 int ret;
1552
1553 switch (attr->group) {
1554 case KVM_DEV_MPIC_GRP_MISC:
1555 switch (attr->attr) {
1556 case KVM_DEV_MPIC_BASE_ADDR:
1557 mutex_lock(&opp->kvm->slots_lock);
1558 attr64 = opp->reg_base;
1559 mutex_unlock(&opp->kvm->slots_lock);
1560
1561 if (copy_to_user((u64 __user *)(long)attr->addr,
1562 &attr64, sizeof(u64)))
1563 return -EFAULT;
1564
1565 return 0;
1566 }
1567
1568 break;
1569
1570 case KVM_DEV_MPIC_GRP_REGISTER:
1571 ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
1572 if (ret)
1573 return ret;
1574
1575 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1576 return -EFAULT;
1577
1578 return 0;
1579
1580 case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1581 if (attr->attr > MAX_SRC)
1582 return -EINVAL;
1583
1584 spin_lock_irq(&opp->lock);
1585 attr32 = opp->src[attr->attr].pending;
1586 spin_unlock_irq(&opp->lock);
1587
1588 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1589 return -EFAULT;
1590
1591 return 0;
1592 }
1593
1594 return -ENXIO;
1595}
1596
1597static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1598{
1599 switch (attr->group) {
1600 case KVM_DEV_MPIC_GRP_MISC:
1601 switch (attr->attr) {
1602 case KVM_DEV_MPIC_BASE_ADDR:
1603 return 0;
1604 }
1605
1606 break;
1607
1608 case KVM_DEV_MPIC_GRP_REGISTER:
1609 return 0;
1610
1611 case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1612 if (attr->attr > MAX_SRC)
1613 break;
1614
1615 return 0;
1616 }
1617
1618 return -ENXIO;
1619}
1620
1621static void mpic_destroy(struct kvm_device *dev)
1622{
1623 struct openpic *opp = dev->private;
1624
1625 dev->kvm->arch.mpic = NULL;
1626 kfree(opp);
1627}
1628
1629static int mpic_set_default_irq_routing(struct openpic *opp)
1630{
1631 struct kvm_irq_routing_entry *routing;
1632
1633 /* Create a nop default map, so that dereferencing it still works */
1634 routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
1635 if (!routing)
1636 return -ENOMEM;
1637
1638 kvm_set_irq_routing(opp->kvm, routing, 0, 0);
1639
1640 kfree(routing);
1641 return 0;
1642}
1643
1644static int mpic_create(struct kvm_device *dev, u32 type)
1645{
1646 struct openpic *opp;
1647 int ret;
1648
1649 /* We only support one MPIC at a time for now */
1650 if (dev->kvm->arch.mpic)
1651 return -EINVAL;
1652
1653 opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
1654 if (!opp)
1655 return -ENOMEM;
1656
1657 dev->private = opp;
1658 opp->kvm = dev->kvm;
1659 opp->dev = dev;
1660 opp->model = type;
1661 spin_lock_init(&opp->lock);
1662
1663 INIT_LIST_HEAD(&opp->mmio_regions);
1664 list_add(&openpic_gbl_mmio.list, &opp->mmio_regions);
1665 list_add(&openpic_tmr_mmio.list, &opp->mmio_regions);
1666 list_add(&openpic_src_mmio.list, &opp->mmio_regions);
1667 list_add(&openpic_cpu_mmio.list, &opp->mmio_regions);
1668
1669 switch (opp->model) {
1670 case KVM_DEV_TYPE_FSL_MPIC_20:
1671 opp->fsl = &fsl_mpic_20;
1672 opp->brr1 = 0x00400200;
1673 opp->flags |= OPENPIC_FLAG_IDR_CRIT;
1674 opp->nb_irqs = 80;
1675 opp->mpic_mode_mask = GCR_MODE_MIXED;
1676
1677 fsl_common_init(opp);
1678
1679 break;
1680
1681 case KVM_DEV_TYPE_FSL_MPIC_42:
1682 opp->fsl = &fsl_mpic_42;
1683 opp->brr1 = 0x00400402;
1684 opp->flags |= OPENPIC_FLAG_ILR;
1685 opp->nb_irqs = 196;
1686 opp->mpic_mode_mask = GCR_MODE_PROXY;
1687
1688 fsl_common_init(opp);
1689
1690 break;
1691
1692 default:
1693 ret = -ENODEV;
1694 goto err;
1695 }
1696
1697 ret = mpic_set_default_irq_routing(opp);
1698 if (ret)
1699 goto err;
1700
1701 openpic_reset(opp);
1702
1703 smp_wmb();
1704 dev->kvm->arch.mpic = opp;
1705
1706 return 0;
1707
1708err:
1709 kfree(opp);
1710 return ret;
1711}
1712
1713struct kvm_device_ops kvm_mpic_ops = {
1714 .name = "kvm-mpic",
1715 .create = mpic_create,
1716 .destroy = mpic_destroy,
1717 .set_attr = mpic_set_attr,
1718 .get_attr = mpic_get_attr,
1719 .has_attr = mpic_has_attr,
1720};
1721
1722int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
1723 u32 cpu)
1724{
1725 struct openpic *opp = dev->private;
1726 int ret = 0;
1727
1728 if (dev->ops != &kvm_mpic_ops)
1729 return -EPERM;
1730 if (opp->kvm != vcpu->kvm)
1731 return -EPERM;
1732 if (cpu < 0 || cpu >= MAX_CPU)
1733 return -EPERM;
1734
1735 spin_lock_irq(&opp->lock);
1736
1737 if (opp->dst[cpu].vcpu) {
1738 ret = -EEXIST;
1739 goto out;
1740 }
1741 if (vcpu->arch.irq_type) {
1742 ret = -EBUSY;
1743 goto out;
1744 }
1745
1746 opp->dst[cpu].vcpu = vcpu;
1747 opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
1748
1749 vcpu->arch.mpic = opp;
1750 vcpu->arch.irq_cpu_id = cpu;
1751 vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
1752
1753 /* This might need to be changed if GCR gets extended */
1754 if (opp->mpic_mode_mask == GCR_MODE_PROXY)
1755 vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
1756
1757out:
1758 spin_unlock_irq(&opp->lock);
1759 return ret;
1760}
1761
1762/*
1763 * This should only happen immediately before the mpic is destroyed,
1764 * so we shouldn't need to worry about anything still trying to
1765 * access the vcpu pointer.
1766 */
1767void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
1768{
1769 BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
1770
1771 opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
1772}
1773
1774/*
1775 * Return value:
1776 * < 0 Interrupt was ignored (masked or not delivered for other reasons)
1777 * = 0 Interrupt was coalesced (previous irq is still pending)
1778 * > 0 Number of CPUs interrupt was delivered to
1779 */
1780static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
1781 struct kvm *kvm, int irq_source_id, int level,
1782 bool line_status)
1783{
1784 u32 irq = e->irqchip.pin;
1785 struct openpic *opp = kvm->arch.mpic;
1786 unsigned long flags;
1787
1788 spin_lock_irqsave(&opp->lock, flags);
1789 openpic_set_irq(opp, irq, level);
1790 spin_unlock_irqrestore(&opp->lock, flags);
1791
1792 /* All code paths we care about don't check for the return value */
1793 return 0;
1794}
1795
1796int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
1797 struct kvm *kvm, int irq_source_id, int level, bool line_status)
1798{
1799 struct openpic *opp = kvm->arch.mpic;
1800 unsigned long flags;
1801
1802 spin_lock_irqsave(&opp->lock, flags);
1803
1804 /*
1805 * XXX We ignore the target address for now, as we only support
1806 * a single MSI bank.
1807 */
1808 openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
1809 spin_unlock_irqrestore(&opp->lock, flags);
1810
1811 /* All code paths we care about don't check for the return value */
1812 return 0;
1813}
1814
1815int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
1816 struct kvm_kernel_irq_routing_entry *e,
1817 const struct kvm_irq_routing_entry *ue)
1818{
1819 int r = -EINVAL;
1820
1821 switch (ue->type) {
1822 case KVM_IRQ_ROUTING_IRQCHIP:
1823 e->set = mpic_set_irq;
1824 e->irqchip.irqchip = ue->u.irqchip.irqchip;
1825 e->irqchip.pin = ue->u.irqchip.pin;
1826 if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
1827 goto out;
1828 rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
1829 break;
1830 case KVM_IRQ_ROUTING_MSI:
1831 e->set = kvm_set_msi;
1832 e->msi.address_lo = ue->u.msi.address_lo;
1833 e->msi.address_hi = ue->u.msi.address_hi;
1834 e->msi.data = ue->u.msi.data;
1835 break;
1836 default:
1837 goto out;
1838 }
1839
1840 r = 0;
1841out:
1842 return r;
1843}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a822659db50a..31084c6335c9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -25,6 +25,7 @@
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/file.h>
28#include <asm/cputable.h> 29#include <asm/cputable.h>
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
30#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
@@ -32,6 +33,7 @@
32#include <asm/cputhreads.h> 33#include <asm/cputhreads.h>
33#include <asm/irqflags.h> 34#include <asm/irqflags.h>
34#include "timing.h" 35#include "timing.h"
36#include "irq.h"
35#include "../mm/mmu_decl.h" 37#include "../mm/mmu_decl.h"
36 38
37#define CREATE_TRACE_POINTS 39#define CREATE_TRACE_POINTS
@@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext)
317 case KVM_CAP_ENABLE_CAP: 319 case KVM_CAP_ENABLE_CAP:
318 case KVM_CAP_ONE_REG: 320 case KVM_CAP_ONE_REG:
319 case KVM_CAP_IOEVENTFD: 321 case KVM_CAP_IOEVENTFD:
322 case KVM_CAP_DEVICE_CTRL:
320 r = 1; 323 r = 1;
321 break; 324 break;
322#ifndef CONFIG_KVM_BOOK3S_64_HV 325#ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -326,6 +329,9 @@ int kvm_dev_ioctl_check_extension(long ext)
326#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) 329#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
327 case KVM_CAP_SW_TLB: 330 case KVM_CAP_SW_TLB:
328#endif 331#endif
332#ifdef CONFIG_KVM_MPIC
333 case KVM_CAP_IRQ_MPIC:
334#endif
329 r = 1; 335 r = 1;
330 break; 336 break;
331 case KVM_CAP_COALESCED_MMIO: 337 case KVM_CAP_COALESCED_MMIO:
@@ -335,6 +341,7 @@ int kvm_dev_ioctl_check_extension(long ext)
335#ifdef CONFIG_PPC_BOOK3S_64 341#ifdef CONFIG_PPC_BOOK3S_64
336 case KVM_CAP_SPAPR_TCE: 342 case KVM_CAP_SPAPR_TCE:
337 case KVM_CAP_PPC_ALLOC_HTAB: 343 case KVM_CAP_PPC_ALLOC_HTAB:
344 case KVM_CAP_PPC_RTAS:
338 r = 1; 345 r = 1;
339 break; 346 break;
340#endif /* CONFIG_PPC_BOOK3S_64 */ 347#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -459,6 +466,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
459 tasklet_kill(&vcpu->arch.tasklet); 466 tasklet_kill(&vcpu->arch.tasklet);
460 467
461 kvmppc_remove_vcpu_debugfs(vcpu); 468 kvmppc_remove_vcpu_debugfs(vcpu);
469
470 switch (vcpu->arch.irq_type) {
471 case KVMPPC_IRQ_MPIC:
472 kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
473 break;
474 case KVMPPC_IRQ_XICS:
475 kvmppc_xics_free_icp(vcpu);
476 break;
477 }
478
462 kvmppc_core_vcpu_free(vcpu); 479 kvmppc_core_vcpu_free(vcpu);
463} 480}
464 481
@@ -531,12 +548,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
531#endif 548#endif
532} 549}
533 550
534int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
535 struct kvm_guest_debug *dbg)
536{
537 return -EINVAL;
538}
539
540static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 551static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
541 struct kvm_run *run) 552 struct kvm_run *run)
542{ 553{
@@ -768,7 +779,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
768 break; 779 break;
769 case KVM_CAP_PPC_EPR: 780 case KVM_CAP_PPC_EPR:
770 r = 0; 781 r = 0;
771 vcpu->arch.epr_enabled = cap->args[0]; 782 if (cap->args[0])
783 vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
784 else
785 vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
772 break; 786 break;
773#ifdef CONFIG_BOOKE 787#ifdef CONFIG_BOOKE
774 case KVM_CAP_PPC_BOOKE_WATCHDOG: 788 case KVM_CAP_PPC_BOOKE_WATCHDOG:
@@ -789,6 +803,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
789 break; 803 break;
790 } 804 }
791#endif 805#endif
806#ifdef CONFIG_KVM_MPIC
807 case KVM_CAP_IRQ_MPIC: {
808 struct file *filp;
809 struct kvm_device *dev;
810
811 r = -EBADF;
812 filp = fget(cap->args[0]);
813 if (!filp)
814 break;
815
816 r = -EPERM;
817 dev = kvm_device_from_filp(filp);
818 if (dev)
819 r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
820
821 fput(filp);
822 break;
823 }
824#endif
792 default: 825 default:
793 r = -EINVAL; 826 r = -EINVAL;
794 break; 827 break;
@@ -911,9 +944,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
911 return 0; 944 return 0;
912} 945}
913 946
947int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
948 bool line_status)
949{
950 if (!irqchip_in_kernel(kvm))
951 return -ENXIO;
952
953 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
954 irq_event->irq, irq_event->level,
955 line_status);
956 return 0;
957}
958
914long kvm_arch_vm_ioctl(struct file *filp, 959long kvm_arch_vm_ioctl(struct file *filp,
915 unsigned int ioctl, unsigned long arg) 960 unsigned int ioctl, unsigned long arg)
916{ 961{
962 struct kvm *kvm __maybe_unused = filp->private_data;
917 void __user *argp = (void __user *)arg; 963 void __user *argp = (void __user *)arg;
918 long r; 964 long r;
919 965
@@ -932,7 +978,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
932#ifdef CONFIG_PPC_BOOK3S_64 978#ifdef CONFIG_PPC_BOOK3S_64
933 case KVM_CREATE_SPAPR_TCE: { 979 case KVM_CREATE_SPAPR_TCE: {
934 struct kvm_create_spapr_tce create_tce; 980 struct kvm_create_spapr_tce create_tce;
935 struct kvm *kvm = filp->private_data;
936 981
937 r = -EFAULT; 982 r = -EFAULT;
938 if (copy_from_user(&create_tce, argp, sizeof(create_tce))) 983 if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
@@ -944,8 +989,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
944 989
945#ifdef CONFIG_KVM_BOOK3S_64_HV 990#ifdef CONFIG_KVM_BOOK3S_64_HV
946 case KVM_ALLOCATE_RMA: { 991 case KVM_ALLOCATE_RMA: {
947 struct kvm *kvm = filp->private_data;
948 struct kvm_allocate_rma rma; 992 struct kvm_allocate_rma rma;
993 struct kvm *kvm = filp->private_data;
949 994
950 r = kvm_vm_ioctl_allocate_rma(kvm, &rma); 995 r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
951 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) 996 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@@ -954,7 +999,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
954 } 999 }
955 1000
956 case KVM_PPC_ALLOCATE_HTAB: { 1001 case KVM_PPC_ALLOCATE_HTAB: {
957 struct kvm *kvm = filp->private_data;
958 u32 htab_order; 1002 u32 htab_order;
959 1003
960 r = -EFAULT; 1004 r = -EFAULT;
@@ -971,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
971 } 1015 }
972 1016
973 case KVM_PPC_GET_HTAB_FD: { 1017 case KVM_PPC_GET_HTAB_FD: {
974 struct kvm *kvm = filp->private_data;
975 struct kvm_get_htab_fd ghf; 1018 struct kvm_get_htab_fd ghf;
976 1019
977 r = -EFAULT; 1020 r = -EFAULT;
@@ -984,7 +1027,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
984 1027
985#ifdef CONFIG_PPC_BOOK3S_64 1028#ifdef CONFIG_PPC_BOOK3S_64
986 case KVM_PPC_GET_SMMU_INFO: { 1029 case KVM_PPC_GET_SMMU_INFO: {
987 struct kvm *kvm = filp->private_data;
988 struct kvm_ppc_smmu_info info; 1030 struct kvm_ppc_smmu_info info;
989 1031
990 memset(&info, 0, sizeof(info)); 1032 memset(&info, 0, sizeof(info));
@@ -993,6 +1035,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
993 r = -EFAULT; 1035 r = -EFAULT;
994 break; 1036 break;
995 } 1037 }
1038 case KVM_PPC_RTAS_DEFINE_TOKEN: {
1039 struct kvm *kvm = filp->private_data;
1040
1041 r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
1042 break;
1043 }
996#endif /* CONFIG_PPC_BOOK3S_64 */ 1044#endif /* CONFIG_PPC_BOOK3S_64 */
997 default: 1045 default:
998 r = -ENOTTY; 1046 r = -ENOTTY;
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 48861d3fcd07..20b328bb494d 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
51static inline unsigned int icp_native_get_xirr(void) 51static inline unsigned int icp_native_get_xirr(void)
52{ 52{
53 int cpu = smp_processor_id(); 53 int cpu = smp_processor_id();
54 unsigned int xirr;
55
56 /* Handled an interrupt latched by KVM */
57 xirr = kvmppc_get_xics_latch();
58 if (xirr)
59 return xirr;
54 60
55 return in_be32(&icp_native_regs[cpu]->xirr.word); 61 return in_be32(&icp_native_regs[cpu]->xirr.word);
56} 62}
@@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void)
138 144
139static void icp_native_cause_ipi(int cpu, unsigned long data) 145static void icp_native_cause_ipi(int cpu, unsigned long data)
140{ 146{
147 kvmppc_set_host_ipi(cpu, 1);
141 icp_native_set_qirr(cpu, IPI_PRIORITY); 148 icp_native_set_qirr(cpu, IPI_PRIORITY);
142} 149}
143 150
@@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
151{ 158{
152 int cpu = smp_processor_id(); 159 int cpu = smp_processor_id();
153 160
161 kvmppc_set_host_ipi(cpu, 0);
154 icp_native_set_qirr(cpu, 0xff); 162 icp_native_set_qirr(cpu, 0xff);
155 163
156 return smp_ipi_demux(); 164 return smp_ipi_demux();