aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-05 17:47:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-05 17:47:31 -0400
commit01227a889ed56ae53aeebb9f93be9d54dd8b2de8 (patch)
treed5eba9359a9827e84d4112b84d48c54df5c5acde /arch/powerpc/kvm
parent9e6879460c8edb0cd3c24c09b83d06541b5af0dc (diff)
parentdb6ae6158186a17165ef990bda2895ae7594b039 (diff)
Merge tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Gleb Natapov: "Highlights of the updates are: general: - new emulated device API - legacy device assignment is now optional - irqfd interface is more generic and can be shared between arches x86: - VMCS shadow support and other nested VMX improvements - APIC virtualization and Posted Interrupt hardware support - Optimize mmio spte zapping ppc: - BookE: in-kernel MPIC emulation with irqfd support - Book3S: in-kernel XICS emulation (incomplete) - Book3S: HV: migration fixes - BookE: more debug support preparation - BookE: e6500 support ARM: - reworking of Hyp idmaps s390: - ioeventfd for virtio-ccw And many other bug fixes, cleanups and improvements" * tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits) kvm: Add compat_ioctl for device control API KVM: x86: Account for failing enable_irq_window for NMI window request KVM: PPC: Book3S: Add API for in-kernel XICS emulation kvm/ppc/mpic: fix missing unlock in set_base_addr() kvm/ppc: Hold srcu lock when calling kvm_io_bus_read/write kvm/ppc/mpic: remove users kvm/ppc/mpic: fix mmio region lists when multiple guests used kvm/ppc/mpic: remove default routes from documentation kvm: KVM_CAP_IOMMU only available with device assignment ARM: KVM: iterate over all CPUs for CPU compatibility check KVM: ARM: Fix spelling in error message ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally KVM: ARM: Fix API documentation for ONE_REG encoding ARM: KVM: promote vfp_host pointer to generic host cpu context ARM: KVM: add architecture specific hook for capabilities ARM: KVM: perform HYP initilization for hotplugged CPUs ARM: KVM: switch to a dual-step HYP init code ARM: KVM: rework HYP page table freeing ARM: KVM: enforce maximum size for identity mapped code ARM: KVM: move to a KVM provided HYP idmap ...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x.c12
-rw-r--r--arch/powerpc/kvm/Kconfig26
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/book3s.c36
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c120
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c92
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c406
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S228
-rw-r--r--arch/powerpc/kvm/book3s_pr.c7
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c21
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c274
-rw-r--r--arch/powerpc/kvm/book3s_xics.c1270
-rw-r--r--arch/powerpc/kvm/book3s_xics.h130
-rw-r--r--arch/powerpc/kvm/booke.c158
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S42
-rw-r--r--arch/powerpc/kvm/e500.c14
-rw-r--r--arch/powerpc/kvm/e500.h22
-rw-r--r--arch/powerpc/kvm/e500_emulate.c19
-rw-r--r--arch/powerpc/kvm/e500_mmu.c192
-rw-r--r--arch/powerpc/kvm/e500mc.c16
-rw-r--r--arch/powerpc/kvm/emulate.c2
-rw-r--r--arch/powerpc/kvm/irq.h20
-rw-r--r--arch/powerpc/kvm/mpic.c1853
-rw-r--r--arch/powerpc/kvm/powerpc.c133
26 files changed, 4898 insertions, 222 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 3d7fd21c65f9..2f5c6b6d6877 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
124 return kvmppc_set_sregs_ivor(vcpu, sregs); 124 return kvmppc_set_sregs_ivor(vcpu, sregs);
125} 125}
126 126
127int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
128 union kvmppc_one_reg *val)
129{
130 return -EINVAL;
131}
132
133int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
134 union kvmppc_one_reg *val)
135{
136 return -EINVAL;
137}
138
127struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 139struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
128{ 140{
129 struct kvmppc_vcpu_44x *vcpu_44x; 141 struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec72e43..eb643f862579 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,21 +136,41 @@ config KVM_E500V2
136 If unsure, say N. 136 If unsure, say N.
137 137
138config KVM_E500MC 138config KVM_E500MC
139 bool "KVM support for PowerPC E500MC/E5500 processors" 139 bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
140 depends on PPC_E500MC 140 depends on PPC_E500MC
141 select KVM 141 select KVM
142 select KVM_MMIO 142 select KVM_MMIO
143 select KVM_BOOKE_HV 143 select KVM_BOOKE_HV
144 select MMU_NOTIFIER 144 select MMU_NOTIFIER
145 ---help--- 145 ---help---
146 Support running unmodified E500MC/E5500 (32-bit) guest kernels in 146 Support running unmodified E500MC/E5500/E6500 guest kernels in
147 virtual machines on E500MC/E5500 host processors. 147 virtual machines on E500MC/E5500/E6500 host processors.
148 148
149 This module provides access to the hardware capabilities through 149 This module provides access to the hardware capabilities through
150 a character device node named /dev/kvm. 150 a character device node named /dev/kvm.
151 151
152 If unsure, say N. 152 If unsure, say N.
153 153
154config KVM_MPIC
155 bool "KVM in-kernel MPIC emulation"
156 depends on KVM && E500
157 select HAVE_KVM_IRQCHIP
158 select HAVE_KVM_IRQ_ROUTING
159 select HAVE_KVM_MSI
160 help
161 Enable support for emulating MPIC devices inside the
162 host kernel, rather than relying on userspace to emulate.
163 Currently, support is limited to certain versions of
164 Freescale's MPIC implementation.
165
166config KVM_XICS
167 bool "KVM in-kernel XICS emulation"
168 depends on KVM_BOOK3S_64 && !KVM_MPIC
169 ---help---
170 Include support for the XICS (eXternal Interrupt Controller
171 Specification) interrupt controller architecture used on
172 IBM POWER (pSeries) servers.
173
154source drivers/vhost/Kconfig 174source drivers/vhost/Kconfig
155 175
156endif # VIRTUALIZATION 176endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b772eded8c26..422de3f4d46c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
72 book3s_hv.o \ 72 book3s_hv.o \
73 book3s_hv_interrupts.o \ 73 book3s_hv_interrupts.o \
74 book3s_64_mmu_hv.o 74 book3s_64_mmu_hv.o
75kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
76 book3s_hv_rm_xics.o
75kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 77kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
76 book3s_hv_rmhandlers.o \ 78 book3s_hv_rmhandlers.o \
77 book3s_hv_rm_mmu.o \ 79 book3s_hv_rm_mmu.o \
78 book3s_64_vio_hv.o \ 80 book3s_64_vio_hv.o \
79 book3s_hv_ras.o \ 81 book3s_hv_ras.o \
80 book3s_hv_builtin.o 82 book3s_hv_builtin.o \
83 $(kvm-book3s_64-builtin-xics-objs-y)
84
85kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
86 book3s_xics.o
81 87
82kvm-book3s_64-module-objs := \ 88kvm-book3s_64-module-objs := \
83 ../../../virt/kvm/kvm_main.o \ 89 ../../../virt/kvm/kvm_main.o \
@@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \
86 emulate.o \ 92 emulate.o \
87 book3s.o \ 93 book3s.o \
88 book3s_64_vio.o \ 94 book3s_64_vio.o \
95 book3s_rtas.o \
89 $(kvm-book3s_64-objs-y) 96 $(kvm-book3s_64-objs-y)
90 97
91kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) 98kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -103,6 +110,9 @@ kvm-book3s_32-objs := \
103 book3s_32_mmu.o 110 book3s_32_mmu.o
104kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) 111kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
105 112
113kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
114kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
115
106kvm-objs := $(kvm-objs-m) $(kvm-objs-y) 116kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
107 117
108obj-$(CONFIG_KVM_440) += kvm.o 118obj-$(CONFIG_KVM_440) += kvm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a4b645285240..700df6f1d32c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
104 return prio; 104 return prio;
105} 105}
106 106
107static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, 107void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
108 unsigned int vec) 108 unsigned int vec)
109{ 109{
110 unsigned long old_pending = vcpu->arch.pending_exceptions; 110 unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
160 kvmppc_book3s_queue_irqprio(vcpu, vec); 160 kvmppc_book3s_queue_irqprio(vcpu, vec);
161} 161}
162 162
163void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, 163void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
164 struct kvm_interrupt *irq)
165{ 164{
166 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 165 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
167 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 166 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
@@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
530 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); 529 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
531 break; 530 break;
532#endif /* CONFIG_ALTIVEC */ 531#endif /* CONFIG_ALTIVEC */
532 case KVM_REG_PPC_DEBUG_INST: {
533 u32 opcode = INS_TW;
534 r = copy_to_user((u32 __user *)(long)reg->addr,
535 &opcode, sizeof(u32));
536 break;
537 }
538#ifdef CONFIG_KVM_XICS
539 case KVM_REG_PPC_ICP_STATE:
540 if (!vcpu->arch.icp) {
541 r = -ENXIO;
542 break;
543 }
544 val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
545 break;
546#endif /* CONFIG_KVM_XICS */
533 default: 547 default:
534 r = -EINVAL; 548 r = -EINVAL;
535 break; 549 break;
@@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
592 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); 606 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
593 break; 607 break;
594#endif /* CONFIG_ALTIVEC */ 608#endif /* CONFIG_ALTIVEC */
609#ifdef CONFIG_KVM_XICS
610 case KVM_REG_PPC_ICP_STATE:
611 if (!vcpu->arch.icp) {
612 r = -ENXIO;
613 break;
614 }
615 r = kvmppc_xics_set_icp(vcpu,
616 set_reg_val(reg->id, val));
617 break;
618#endif /* CONFIG_KVM_XICS */
595 default: 619 default:
596 r = -EINVAL; 620 r = -EINVAL;
597 break; 621 break;
@@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
607 return 0; 631 return 0;
608} 632}
609 633
634int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
635 struct kvm_guest_debug *dbg)
636{
637 return -EINVAL;
638}
639
610void kvmppc_decrementer_func(unsigned long data) 640void kvmppc_decrementer_func(unsigned long data)
611{ 641{
612 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 642 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index da98e26f6e45..5880dfb31074 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
893 /* Harvest R and C */ 893 /* Harvest R and C */
894 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); 894 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
895 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 895 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
896 rev[i].guest_rpte = ptel | rcbits; 896 if (rcbits & ~rev[i].guest_rpte) {
897 rev[i].guest_rpte = ptel | rcbits;
898 note_hpte_modification(kvm, &rev[i]);
899 }
897 } 900 }
898 unlock_rmap(rmapp); 901 unlock_rmap(rmapp);
899 hptep[0] &= ~HPTE_V_HVLOCK; 902 hptep[0] &= ~HPTE_V_HVLOCK;
@@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
976 /* Now check and modify the HPTE */ 979 /* Now check and modify the HPTE */
977 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { 980 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
978 kvmppc_clear_ref_hpte(kvm, hptep, i); 981 kvmppc_clear_ref_hpte(kvm, hptep, i);
979 rev[i].guest_rpte |= HPTE_R_R; 982 if (!(rev[i].guest_rpte & HPTE_R_R)) {
983 rev[i].guest_rpte |= HPTE_R_R;
984 note_hpte_modification(kvm, &rev[i]);
985 }
980 ret = 1; 986 ret = 1;
981 } 987 }
982 hptep[0] &= ~HPTE_V_HVLOCK; 988 hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1080 hptep[1] &= ~HPTE_R_C; 1086 hptep[1] &= ~HPTE_R_C;
1081 eieio(); 1087 eieio();
1082 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 1088 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
1083 rev[i].guest_rpte |= HPTE_R_C; 1089 if (!(rev[i].guest_rpte & HPTE_R_C)) {
1090 rev[i].guest_rpte |= HPTE_R_C;
1091 note_hpte_modification(kvm, &rev[i]);
1092 }
1084 ret = 1; 1093 ret = 1;
1085 } 1094 }
1086 hptep[0] &= ~HPTE_V_HVLOCK; 1095 hptep[0] &= ~HPTE_V_HVLOCK;
@@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1090 return ret; 1099 return ret;
1091} 1100}
1092 1101
1102static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1103 struct kvm_memory_slot *memslot,
1104 unsigned long *map)
1105{
1106 unsigned long gfn;
1107
1108 if (!vpa->dirty || !vpa->pinned_addr)
1109 return;
1110 gfn = vpa->gpa >> PAGE_SHIFT;
1111 if (gfn < memslot->base_gfn ||
1112 gfn >= memslot->base_gfn + memslot->npages)
1113 return;
1114
1115 vpa->dirty = false;
1116 if (map)
1117 __set_bit_le(gfn - memslot->base_gfn, map);
1118}
1119
1093long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1120long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1094 unsigned long *map) 1121 unsigned long *map)
1095{ 1122{
1096 unsigned long i; 1123 unsigned long i;
1097 unsigned long *rmapp; 1124 unsigned long *rmapp;
1125 struct kvm_vcpu *vcpu;
1098 1126
1099 preempt_disable(); 1127 preempt_disable();
1100 rmapp = memslot->arch.rmap; 1128 rmapp = memslot->arch.rmap;
@@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1103 __set_bit_le(i, map); 1131 __set_bit_le(i, map);
1104 ++rmapp; 1132 ++rmapp;
1105 } 1133 }
1134
1135 /* Harvest dirty bits from VPA and DTL updates */
1136 /* Note: we never modify the SLB shadow buffer areas */
1137 kvm_for_each_vcpu(i, vcpu, kvm) {
1138 spin_lock(&vcpu->arch.vpa_update_lock);
1139 harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
1140 harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
1141 spin_unlock(&vcpu->arch.vpa_update_lock);
1142 }
1106 preempt_enable(); 1143 preempt_enable();
1107 return 0; 1144 return 0;
1108} 1145}
@@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1114 unsigned long gfn = gpa >> PAGE_SHIFT; 1151 unsigned long gfn = gpa >> PAGE_SHIFT;
1115 struct page *page, *pages[1]; 1152 struct page *page, *pages[1];
1116 int npages; 1153 int npages;
1117 unsigned long hva, psize, offset; 1154 unsigned long hva, offset;
1118 unsigned long pa; 1155 unsigned long pa;
1119 unsigned long *physp; 1156 unsigned long *physp;
1120 int srcu_idx; 1157 int srcu_idx;
@@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1146 } 1183 }
1147 srcu_read_unlock(&kvm->srcu, srcu_idx); 1184 srcu_read_unlock(&kvm->srcu, srcu_idx);
1148 1185
1149 psize = PAGE_SIZE; 1186 offset = gpa & (PAGE_SIZE - 1);
1150 if (PageHuge(page)) {
1151 page = compound_head(page);
1152 psize <<= compound_order(page);
1153 }
1154 offset = gpa & (psize - 1);
1155 if (nb_ret) 1187 if (nb_ret)
1156 *nb_ret = psize - offset; 1188 *nb_ret = PAGE_SIZE - offset;
1157 return page_address(page) + offset; 1189 return page_address(page) + offset;
1158 1190
1159 err: 1191 err:
@@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1161 return NULL; 1193 return NULL;
1162} 1194}
1163 1195
1164void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) 1196void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1197 bool dirty)
1165{ 1198{
1166 struct page *page = virt_to_page(va); 1199 struct page *page = virt_to_page(va);
1200 struct kvm_memory_slot *memslot;
1201 unsigned long gfn;
1202 unsigned long *rmap;
1203 int srcu_idx;
1167 1204
1168 put_page(page); 1205 put_page(page);
1206
1207 if (!dirty || !kvm->arch.using_mmu_notifiers)
1208 return;
1209
1210 /* We need to mark this page dirty in the rmap chain */
1211 gfn = gpa >> PAGE_SHIFT;
1212 srcu_idx = srcu_read_lock(&kvm->srcu);
1213 memslot = gfn_to_memslot(kvm, gfn);
1214 if (memslot) {
1215 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
1216 lock_rmap(rmap);
1217 *rmap |= KVMPPC_RMAP_CHANGED;
1218 unlock_rmap(rmap);
1219 }
1220 srcu_read_unlock(&kvm->srcu, srcu_idx);
1169} 1221}
1170 1222
1171/* 1223/*
@@ -1193,16 +1245,36 @@ struct kvm_htab_ctx {
1193 1245
1194#define HPTE_SIZE (2 * sizeof(unsigned long)) 1246#define HPTE_SIZE (2 * sizeof(unsigned long))
1195 1247
1248/*
1249 * Returns 1 if this HPT entry has been modified or has pending
1250 * R/C bit changes.
1251 */
1252static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
1253{
1254 unsigned long rcbits_unset;
1255
1256 if (revp->guest_rpte & HPTE_GR_MODIFIED)
1257 return 1;
1258
1259 /* Also need to consider changes in reference and changed bits */
1260 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1261 if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
1262 return 1;
1263
1264 return 0;
1265}
1266
1196static long record_hpte(unsigned long flags, unsigned long *hptp, 1267static long record_hpte(unsigned long flags, unsigned long *hptp,
1197 unsigned long *hpte, struct revmap_entry *revp, 1268 unsigned long *hpte, struct revmap_entry *revp,
1198 int want_valid, int first_pass) 1269 int want_valid, int first_pass)
1199{ 1270{
1200 unsigned long v, r; 1271 unsigned long v, r;
1272 unsigned long rcbits_unset;
1201 int ok = 1; 1273 int ok = 1;
1202 int valid, dirty; 1274 int valid, dirty;
1203 1275
1204 /* Unmodified entries are uninteresting except on the first pass */ 1276 /* Unmodified entries are uninteresting except on the first pass */
1205 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1277 dirty = hpte_dirty(revp, hptp);
1206 if (!first_pass && !dirty) 1278 if (!first_pass && !dirty)
1207 return 0; 1279 return 0;
1208 1280
@@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
1223 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) 1295 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1224 cpu_relax(); 1296 cpu_relax();
1225 v = hptp[0]; 1297 v = hptp[0];
1298
1299 /* re-evaluate valid and dirty from synchronized HPTE value */
1300 valid = !!(v & HPTE_V_VALID);
1301 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1302
1303 /* Harvest R and C into guest view if necessary */
1304 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1305 if (valid && (rcbits_unset & hptp[1])) {
1306 revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
1307 HPTE_GR_MODIFIED;
1308 dirty = 1;
1309 }
1310
1226 if (v & HPTE_V_ABSENT) { 1311 if (v & HPTE_V_ABSENT) {
1227 v &= ~HPTE_V_ABSENT; 1312 v &= ~HPTE_V_ABSENT;
1228 v |= HPTE_V_VALID; 1313 v |= HPTE_V_VALID;
1314 valid = 1;
1229 } 1315 }
1230 /* re-evaluate valid and dirty from synchronized HPTE value */
1231 valid = !!(v & HPTE_V_VALID);
1232 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) 1316 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
1233 valid = 0; 1317 valid = 0;
1234 r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); 1318
1235 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); 1319 r = revp->guest_rpte;
1236 /* only clear modified if this is the right sort of entry */ 1320 /* only clear modified if this is the right sort of entry */
1237 if (valid == want_valid && dirty) { 1321 if (valid == want_valid && dirty) {
1238 r &= ~HPTE_GR_MODIFIED; 1322 r &= ~HPTE_GR_MODIFIED;
@@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1288 /* Skip uninteresting entries, i.e. clean on not-first pass */ 1372 /* Skip uninteresting entries, i.e. clean on not-first pass */
1289 if (!first_pass) { 1373 if (!first_pass) {
1290 while (i < kvm->arch.hpt_npte && 1374 while (i < kvm->arch.hpt_npte &&
1291 !(revp->guest_rpte & HPTE_GR_MODIFIED)) { 1375 !hpte_dirty(revp, hptp)) {
1292 ++i; 1376 ++i;
1293 hptp += 2; 1377 hptp += 2;
1294 ++revp; 1378 ++revp;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 836c56975e21..1f6344c4408d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
194 run->papr_hcall.args[i] = gpr; 194 run->papr_hcall.args[i] = gpr;
195 } 195 }
196 196
197 emulated = EMULATE_DO_PAPR; 197 run->exit_reason = KVM_EXIT_PAPR_HCALL;
198 vcpu->arch.hcall_needed = 1;
199 emulated = EMULATE_EXIT_USER;
198 break; 200 break;
199 } 201 }
200#endif 202#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f5416934932b..9de24f8e03c7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -66,6 +66,31 @@
66static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 66static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
67static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 67static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
68 68
69void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
70{
71 int me;
72 int cpu = vcpu->cpu;
73 wait_queue_head_t *wqp;
74
75 wqp = kvm_arch_vcpu_wq(vcpu);
76 if (waitqueue_active(wqp)) {
77 wake_up_interruptible(wqp);
78 ++vcpu->stat.halt_wakeup;
79 }
80
81 me = get_cpu();
82
83 /* CPU points to the first thread of the core */
84 if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
85 int real_cpu = cpu + vcpu->arch.ptid;
86 if (paca[real_cpu].kvm_hstate.xics_phys)
87 xics_wake_cpu(real_cpu);
88 else if (cpu_online(cpu))
89 smp_send_reschedule(cpu);
90 }
91 put_cpu();
92}
93
69/* 94/*
70 * We use the vcpu_load/put functions to measure stolen time. 95 * We use the vcpu_load/put functions to measure stolen time.
71 * Stolen time is counted as time when either the vcpu is able to 96 * Stolen time is counted as time when either the vcpu is able to
@@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
259 len = ((struct reg_vpa *)va)->length.hword; 284 len = ((struct reg_vpa *)va)->length.hword;
260 else 285 else
261 len = ((struct reg_vpa *)va)->length.word; 286 len = ((struct reg_vpa *)va)->length.word;
262 kvmppc_unpin_guest_page(kvm, va); 287 kvmppc_unpin_guest_page(kvm, va, vpa, false);
263 288
264 /* Check length */ 289 /* Check length */
265 if (len > nb || len < sizeof(struct reg_vpa)) 290 if (len > nb || len < sizeof(struct reg_vpa))
@@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
359 va = NULL; 384 va = NULL;
360 nb = 0; 385 nb = 0;
361 if (gpa) 386 if (gpa)
362 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); 387 va = kvmppc_pin_guest_page(kvm, gpa, &nb);
363 spin_lock(&vcpu->arch.vpa_update_lock); 388 spin_lock(&vcpu->arch.vpa_update_lock);
364 if (gpa == vpap->next_gpa) 389 if (gpa == vpap->next_gpa)
365 break; 390 break;
366 /* sigh... unpin that one and try again */ 391 /* sigh... unpin that one and try again */
367 if (va) 392 if (va)
368 kvmppc_unpin_guest_page(kvm, va); 393 kvmppc_unpin_guest_page(kvm, va, gpa, false);
369 } 394 }
370 395
371 vpap->update_pending = 0; 396 vpap->update_pending = 0;
@@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
375 * has changed the mappings underlying guest memory, 400 * has changed the mappings underlying guest memory,
376 * so unregister the region. 401 * so unregister the region.
377 */ 402 */
378 kvmppc_unpin_guest_page(kvm, va); 403 kvmppc_unpin_guest_page(kvm, va, gpa, false);
379 va = NULL; 404 va = NULL;
380 } 405 }
381 if (vpap->pinned_addr) 406 if (vpap->pinned_addr)
382 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); 407 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
408 vpap->dirty);
409 vpap->gpa = gpa;
383 vpap->pinned_addr = va; 410 vpap->pinned_addr = va;
411 vpap->dirty = false;
384 if (va) 412 if (va)
385 vpap->pinned_end = va + vpap->len; 413 vpap->pinned_end = va + vpap->len;
386} 414}
@@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
472 /* order writing *dt vs. writing vpa->dtl_idx */ 500 /* order writing *dt vs. writing vpa->dtl_idx */
473 smp_wmb(); 501 smp_wmb();
474 vpa->dtl_idx = ++vcpu->arch.dtl_index; 502 vpa->dtl_idx = ++vcpu->arch.dtl_index;
503 vcpu->arch.dtl.dirty = true;
475} 504}
476 505
477int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 506int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
479 unsigned long req = kvmppc_get_gpr(vcpu, 3); 508 unsigned long req = kvmppc_get_gpr(vcpu, 3);
480 unsigned long target, ret = H_SUCCESS; 509 unsigned long target, ret = H_SUCCESS;
481 struct kvm_vcpu *tvcpu; 510 struct kvm_vcpu *tvcpu;
482 int idx; 511 int idx, rc;
483 512
484 switch (req) { 513 switch (req) {
485 case H_ENTER: 514 case H_ENTER:
@@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
515 kvmppc_get_gpr(vcpu, 5), 544 kvmppc_get_gpr(vcpu, 5),
516 kvmppc_get_gpr(vcpu, 6)); 545 kvmppc_get_gpr(vcpu, 6));
517 break; 546 break;
547 case H_RTAS:
548 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
549 return RESUME_HOST;
550
551 rc = kvmppc_rtas_hcall(vcpu);
552
553 if (rc == -ENOENT)
554 return RESUME_HOST;
555 else if (rc == 0)
556 break;
557
558 /* Send the error out to userspace via KVM_RUN */
559 return rc;
560
561 case H_XIRR:
562 case H_CPPR:
563 case H_EOI:
564 case H_IPI:
565 if (kvmppc_xics_enabled(vcpu)) {
566 ret = kvmppc_xics_hcall(vcpu, req);
567 break;
568 } /* fallthrough */
518 default: 569 default:
519 return RESUME_HOST; 570 return RESUME_HOST;
520 } 571 }
@@ -913,15 +964,19 @@ out:
913 return ERR_PTR(err); 964 return ERR_PTR(err);
914} 965}
915 966
967static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
968{
969 if (vpa->pinned_addr)
970 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
971 vpa->dirty);
972}
973
916void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 974void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
917{ 975{
918 spin_lock(&vcpu->arch.vpa_update_lock); 976 spin_lock(&vcpu->arch.vpa_update_lock);
919 if (vcpu->arch.dtl.pinned_addr) 977 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
920 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); 978 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
921 if (vcpu->arch.slb_shadow.pinned_addr) 979 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
922 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
923 if (vcpu->arch.vpa.pinned_addr)
924 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
925 spin_unlock(&vcpu->arch.vpa_update_lock); 980 spin_unlock(&vcpu->arch.vpa_update_lock);
926 kvm_vcpu_uninit(vcpu); 981 kvm_vcpu_uninit(vcpu);
927 kmem_cache_free(kvm_vcpu_cache, vcpu); 982 kmem_cache_free(kvm_vcpu_cache, vcpu);
@@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
955} 1010}
956 1011
957extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 1012extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
958extern void xics_wake_cpu(int cpu);
959 1013
960static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 1014static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
961 struct kvm_vcpu *vcpu) 1015 struct kvm_vcpu *vcpu)
@@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1330 break; 1384 break;
1331 vc->runner = vcpu; 1385 vc->runner = vcpu;
1332 n_ceded = 0; 1386 n_ceded = 0;
1333 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) 1387 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
1334 if (!v->arch.pending_exceptions) 1388 if (!v->arch.pending_exceptions)
1335 n_ceded += v->arch.ceded; 1389 n_ceded += v->arch.ceded;
1390 else
1391 v->arch.ceded = 0;
1392 }
1336 if (n_ceded == vc->n_runnable) 1393 if (n_ceded == vc->n_runnable)
1337 kvmppc_vcore_blocked(vc); 1394 kvmppc_vcore_blocked(vc);
1338 else 1395 else
@@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1645 1702
1646void kvmppc_core_commit_memory_region(struct kvm *kvm, 1703void kvmppc_core_commit_memory_region(struct kvm *kvm,
1647 struct kvm_userspace_memory_region *mem, 1704 struct kvm_userspace_memory_region *mem,
1648 struct kvm_memory_slot old) 1705 const struct kvm_memory_slot *old)
1649{ 1706{
1650 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 1707 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
1651 struct kvm_memory_slot *memslot; 1708 struct kvm_memory_slot *memslot;
1652 1709
1653 if (npages && old.npages) { 1710 if (npages && old->npages) {
1654 /* 1711 /*
1655 * If modifying a memslot, reset all the rmap dirty bits. 1712 * If modifying a memslot, reset all the rmap dirty bits.
1656 * If this is a new memslot, we don't need to do anything 1713 * If this is a new memslot, we don't need to do anything
@@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1827 cpumask_setall(&kvm->arch.need_tlb_flush); 1884 cpumask_setall(&kvm->arch.need_tlb_flush);
1828 1885
1829 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1886 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1887 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
1830 1888
1831 kvm->arch.rma = NULL; 1889 kvm->arch.rma = NULL;
1832 1890
@@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
1872 kvm->arch.rma = NULL; 1930 kvm->arch.rma = NULL;
1873 } 1931 }
1874 1932
1933 kvmppc_rtas_tokens_free(kvm);
1934
1875 kvmppc_free_hpt(kvm); 1935 kvmppc_free_hpt(kvm);
1876 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1936 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
1877} 1937}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 19c93bae1aea..6dcbb49105a4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
97} 97}
98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
99 99
100/*
101 * Note modification of an HPTE; set the HPTE modified bit
102 * if anyone is interested.
103 */
104static inline void note_hpte_modification(struct kvm *kvm,
105 struct revmap_entry *rev)
106{
107 if (atomic_read(&kvm->arch.hpte_mod_interest))
108 rev->guest_rpte |= HPTE_GR_MODIFIED;
109}
110
111/* Remove this HPTE from the chain for a real page */ 100/* Remove this HPTE from the chain for a real page */
112static void remove_revmap_chain(struct kvm *kvm, long pte_index, 101static void remove_revmap_chain(struct kvm *kvm, long pte_index,
113 struct revmap_entry *rev, 102 struct revmap_entry *rev,
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 000000000000..b4b0082f761c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,406 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/kvm_host.h>
12#include <linux/err.h>
13
14#include <asm/kvm_book3s.h>
15#include <asm/kvm_ppc.h>
16#include <asm/hvcall.h>
17#include <asm/xics.h>
18#include <asm/debug.h>
19#include <asm/synch.h>
20#include <asm/ppc-opcode.h>
21
22#include "book3s_xics.h"
23
24#define DEBUG_PASSUP
25
26static inline void rm_writeb(unsigned long paddr, u8 val)
27{
28 __asm__ __volatile__("sync; stbcix %0,0,%1"
29 : : "r" (val), "r" (paddr) : "memory");
30}
31
32static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
33 struct kvm_vcpu *this_vcpu)
34{
35 struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
36 unsigned long xics_phys;
37 int cpu;
38
39 /* Mark the target VCPU as having an interrupt pending */
40 vcpu->stat.queue_intr++;
41 set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
42
43 /* Kick self ? Just set MER and return */
44 if (vcpu == this_vcpu) {
45 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
46 return;
47 }
48
49 /* Check if the core is loaded, if not, too hard */
50 cpu = vcpu->cpu;
51 if (cpu < 0 || cpu >= nr_cpu_ids) {
52 this_icp->rm_action |= XICS_RM_KICK_VCPU;
53 this_icp->rm_kick_target = vcpu;
54 return;
55 }
56 /* In SMT cpu will always point to thread 0, we adjust it */
57 cpu += vcpu->arch.ptid;
58
59 /* Not too hard, then poke the target */
60 xics_phys = paca[cpu].kvm_hstate.xics_phys;
61 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
62}
63
64static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
65{
66 /* Note: Only called on self ! */
67 clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
68 &vcpu->arch.pending_exceptions);
69 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
70}
71
72static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
73 union kvmppc_icp_state old,
74 union kvmppc_icp_state new)
75{
76 struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
77 bool success;
78
79 /* Calculate new output value */
80 new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
81
82 /* Attempt atomic update */
83 success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
84 if (!success)
85 goto bail;
86
87 /*
88 * Check for output state update
89 *
90 * Note that this is racy since another processor could be updating
91 * the state already. This is why we never clear the interrupt output
92 * here, we only ever set it. The clear only happens prior to doing
93 * an update and only by the processor itself. Currently we do it
94 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
95 *
96 * We also do not try to figure out whether the EE state has changed,
97 * we unconditionally set it if the new state calls for it. The reason
98 * for that is that we opportunistically remove the pending interrupt
99 * flag when raising CPPR, so we need to set it back here if an
100 * interrupt is still pending.
101 */
102 if (new.out_ee)
103 icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
104
105 /* Expose the state change for debug purposes */
106 this_vcpu->arch.icp->rm_dbgstate = new;
107 this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
108
109 bail:
110 return success;
111}
112
113static inline int check_too_hard(struct kvmppc_xics *xics,
114 struct kvmppc_icp *icp)
115{
116 return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
117}
118
119static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
120 u8 new_cppr)
121{
122 union kvmppc_icp_state old_state, new_state;
123 bool resend;
124
125 /*
126 * This handles several related states in one operation:
127 *
128 * ICP State: Down_CPPR
129 *
130 * Load CPPR with new value and if the XISR is 0
131 * then check for resends:
132 *
133 * ICP State: Resend
134 *
135 * If MFRR is more favored than CPPR, check for IPIs
136 * and notify ICS of a potential resend. This is done
137 * asynchronously (when used in real mode, we will have
138 * to exit here).
139 *
140 * We do not handle the complete Check_IPI as documented
141 * here. In the PAPR, this state will be used for both
142 * Set_MFRR and Down_CPPR. However, we know that we aren't
143 * changing the MFRR state here so we don't need to handle
144 * the case of an MFRR causing a reject of a pending irq,
145 * this will have been handled when the MFRR was set in the
146 * first place.
147 *
148 * Thus we don't have to handle rejects, only resends.
149 *
150 * When implementing real mode for HV KVM, resend will lead to
151 * a H_TOO_HARD return and the whole transaction will be handled
152 * in virtual mode.
153 */
154 do {
155 old_state = new_state = ACCESS_ONCE(icp->state);
156
157 /* Down_CPPR */
158 new_state.cppr = new_cppr;
159
160 /*
161 * Cut down Resend / Check_IPI / IPI
162 *
163 * The logic is that we cannot have a pending interrupt
164 * trumped by an IPI at this point (see above), so we
165 * know that either the pending interrupt is already an
166 * IPI (in which case we don't care to override it) or
167 * it's either more favored than us or non existent
168 */
169 if (new_state.mfrr < new_cppr &&
170 new_state.mfrr <= new_state.pending_pri) {
171 new_state.pending_pri = new_state.mfrr;
172 new_state.xisr = XICS_IPI;
173 }
174
175 /* Latch/clear resend bit */
176 resend = new_state.need_resend;
177 new_state.need_resend = 0;
178
179 } while (!icp_rm_try_update(icp, old_state, new_state));
180
181 /*
182 * Now handle resend checks. Those are asynchronous to the ICP
183 * state update in HW (ie bus transactions) so we can handle them
184 * separately here as well.
185 */
186 if (resend)
187 icp->rm_action |= XICS_RM_CHECK_RESEND;
188}
189
190
191unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
192{
193 union kvmppc_icp_state old_state, new_state;
194 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
195 struct kvmppc_icp *icp = vcpu->arch.icp;
196 u32 xirr;
197
198 if (!xics || !xics->real_mode)
199 return H_TOO_HARD;
200
201 /* First clear the interrupt */
202 icp_rm_clr_vcpu_irq(icp->vcpu);
203
204 /*
205 * ICP State: Accept_Interrupt
206 *
207 * Return the pending interrupt (if any) along with the
208 * current CPPR, then clear the XISR & set CPPR to the
209 * pending priority
210 */
211 do {
212 old_state = new_state = ACCESS_ONCE(icp->state);
213
214 xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
215 if (!old_state.xisr)
216 break;
217 new_state.cppr = new_state.pending_pri;
218 new_state.pending_pri = 0xff;
219 new_state.xisr = 0;
220
221 } while (!icp_rm_try_update(icp, old_state, new_state));
222
223 /* Return the result in GPR4 */
224 vcpu->arch.gpr[4] = xirr;
225
226 return check_too_hard(xics, icp);
227}
228
229int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
230 unsigned long mfrr)
231{
232 union kvmppc_icp_state old_state, new_state;
233 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
234 struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
235 u32 reject;
236 bool resend;
237 bool local;
238
239 if (!xics || !xics->real_mode)
240 return H_TOO_HARD;
241
242 local = this_icp->server_num == server;
243 if (local)
244 icp = this_icp;
245 else
246 icp = kvmppc_xics_find_server(vcpu->kvm, server);
247 if (!icp)
248 return H_PARAMETER;
249
250 /*
251 * ICP state: Set_MFRR
252 *
253 * If the CPPR is more favored than the new MFRR, then
254 * nothing needs to be done as there can be no XISR to
255 * reject.
256 *
257 * If the CPPR is less favored, then we might be replacing
258 * an interrupt, and thus need to possibly reject it as in
259 *
260 * ICP state: Check_IPI
261 */
262 do {
263 old_state = new_state = ACCESS_ONCE(icp->state);
264
265 /* Set_MFRR */
266 new_state.mfrr = mfrr;
267
268 /* Check_IPI */
269 reject = 0;
270 resend = false;
271 if (mfrr < new_state.cppr) {
272 /* Reject a pending interrupt if not an IPI */
273 if (mfrr <= new_state.pending_pri)
274 reject = new_state.xisr;
275 new_state.pending_pri = mfrr;
276 new_state.xisr = XICS_IPI;
277 }
278
279 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
280 resend = new_state.need_resend;
281 new_state.need_resend = 0;
282 }
283 } while (!icp_rm_try_update(icp, old_state, new_state));
284
285 /* Pass rejects to virtual mode */
286 if (reject && reject != XICS_IPI) {
287 this_icp->rm_action |= XICS_RM_REJECT;
288 this_icp->rm_reject = reject;
289 }
290
291 /* Pass resends to virtual mode */
292 if (resend)
293 this_icp->rm_action |= XICS_RM_CHECK_RESEND;
294
295 return check_too_hard(xics, this_icp);
296}
297
298int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
299{
300 union kvmppc_icp_state old_state, new_state;
301 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
302 struct kvmppc_icp *icp = vcpu->arch.icp;
303 u32 reject;
304
305 if (!xics || !xics->real_mode)
306 return H_TOO_HARD;
307
308 /*
309 * ICP State: Set_CPPR
310 *
311 * We can safely compare the new value with the current
312 * value outside of the transaction as the CPPR is only
313 * ever changed by the processor on itself
314 */
315 if (cppr > icp->state.cppr) {
316 icp_rm_down_cppr(xics, icp, cppr);
317 goto bail;
318 } else if (cppr == icp->state.cppr)
319 return H_SUCCESS;
320
321 /*
322 * ICP State: Up_CPPR
323 *
324 * The processor is raising its priority, this can result
325 * in a rejection of a pending interrupt:
326 *
327 * ICP State: Reject_Current
328 *
329 * We can remove EE from the current processor, the update
330 * transaction will set it again if needed
331 */
332 icp_rm_clr_vcpu_irq(icp->vcpu);
333
334 do {
335 old_state = new_state = ACCESS_ONCE(icp->state);
336
337 reject = 0;
338 new_state.cppr = cppr;
339
340 if (cppr <= new_state.pending_pri) {
341 reject = new_state.xisr;
342 new_state.xisr = 0;
343 new_state.pending_pri = 0xff;
344 }
345
346 } while (!icp_rm_try_update(icp, old_state, new_state));
347
348 /* Pass rejects to virtual mode */
349 if (reject && reject != XICS_IPI) {
350 icp->rm_action |= XICS_RM_REJECT;
351 icp->rm_reject = reject;
352 }
353 bail:
354 return check_too_hard(xics, icp);
355}
356
357int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
358{
359 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
360 struct kvmppc_icp *icp = vcpu->arch.icp;
361 struct kvmppc_ics *ics;
362 struct ics_irq_state *state;
363 u32 irq = xirr & 0x00ffffff;
364 u16 src;
365
366 if (!xics || !xics->real_mode)
367 return H_TOO_HARD;
368
369 /*
370 * ICP State: EOI
371 *
372 * Note: If EOI is incorrectly used by SW to lower the CPPR
373 * value (ie more favored), we do not check for rejection of
374 * a pending interrupt, this is a SW error and PAPR sepcifies
375 * that we don't have to deal with it.
376 *
377 * The sending of an EOI to the ICS is handled after the
378 * CPPR update
379 *
380 * ICP State: Down_CPPR which we handle
381 * in a separate function as it's shared with H_CPPR.
382 */
383 icp_rm_down_cppr(xics, icp, xirr >> 24);
384
385 /* IPIs have no EOI */
386 if (irq == XICS_IPI)
387 goto bail;
388 /*
389 * EOI handling: If the interrupt is still asserted, we need to
390 * resend it. We can take a lockless "peek" at the ICS state here.
391 *
392 * "Message" interrupts will never have "asserted" set
393 */
394 ics = kvmppc_xics_find_ics(xics, irq, &src);
395 if (!ics)
396 goto bail;
397 state = &ics->irq_state[src];
398
399 /* Still asserted, resend it, we make it look like a reject */
400 if (state->asserted) {
401 icp->rm_action |= XICS_RM_REJECT;
402 icp->rm_reject = irq;
403 }
404 bail:
405 return check_too_hard(xics, icp);
406}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e33d11f1b977..b02f91e4c70d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
79 * * 79 * *
80 *****************************************************************************/ 80 *****************************************************************************/
81 81
82#define XICS_XIRR 4
83#define XICS_QIRR 0xc
84#define XICS_IPI 2 /* interrupt source # for IPIs */
85
86/* 82/*
87 * We come in here when wakened from nap mode on a secondary hw thread. 83 * We come in here when wakened from nap mode on a secondary hw thread.
88 * Relocation is off and most register values are lost. 84 * Relocation is off and most register values are lost.
@@ -101,50 +97,51 @@ kvm_start_guest:
101 li r0,1 97 li r0,1
102 stb r0,PACA_NAPSTATELOST(r13) 98 stb r0,PACA_NAPSTATELOST(r13)
103 99
104 /* get vcpu pointer, NULL if we have no vcpu to run */ 100 /* were we napping due to cede? */
105 ld r4,HSTATE_KVM_VCPU(r13) 101 lbz r0,HSTATE_NAPPING(r13)
106 cmpdi cr1,r4,0 102 cmpwi r0,0
103 bne kvm_end_cede
104
105 /*
106 * We weren't napping due to cede, so this must be a secondary
107 * thread being woken up to run a guest, or being woken up due
108 * to a stray IPI. (Or due to some machine check or hypervisor
109 * maintenance interrupt while the core is in KVM.)
110 */
107 111
108 /* Check the wake reason in SRR1 to see why we got here */ 112 /* Check the wake reason in SRR1 to see why we got here */
109 mfspr r3,SPRN_SRR1 113 mfspr r3,SPRN_SRR1
110 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ 114 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
111 cmpwi r3,4 /* was it an external interrupt? */ 115 cmpwi r3,4 /* was it an external interrupt? */
112 bne 27f 116 bne 27f /* if not */
113 117 ld r5,HSTATE_XICS_PHYS(r13)
114 /* 118 li r7,XICS_XIRR /* if it was an external interrupt, */
115 * External interrupt - for now assume it is an IPI, since we
116 * should never get any other interrupts sent to offline threads.
117 * Only do this for secondary threads.
118 */
119 beq cr1,25f
120 lwz r3,VCPU_PTID(r4)
121 cmpwi r3,0
122 beq 27f
12325: ld r5,HSTATE_XICS_PHYS(r13)
124 li r0,0xff
125 li r6,XICS_QIRR
126 li r7,XICS_XIRR
127 lwzcix r8,r5,r7 /* get and ack the interrupt */ 119 lwzcix r8,r5,r7 /* get and ack the interrupt */
128 sync 120 sync
129 clrldi. r9,r8,40 /* get interrupt source ID. */ 121 clrldi. r9,r8,40 /* get interrupt source ID. */
130 beq 27f /* none there? */ 122 beq 28f /* none there? */
131 cmpwi r9,XICS_IPI 123 cmpwi r9,XICS_IPI /* was it an IPI? */
132 bne 26f 124 bne 29f
125 li r0,0xff
126 li r6,XICS_MFRR
133 stbcix r0,r5,r6 /* clear IPI */ 127 stbcix r0,r5,r6 /* clear IPI */
13426: stwcix r8,r5,r7 /* EOI the interrupt */ 128 stwcix r8,r5,r7 /* EOI the interrupt */
135 129 sync /* order loading of vcpu after that */
13627: /* XXX should handle hypervisor maintenance interrupts etc. here */
137 130
138 /* reload vcpu pointer after clearing the IPI */ 131 /* get vcpu pointer, NULL if we have no vcpu to run */
139 ld r4,HSTATE_KVM_VCPU(r13) 132 ld r4,HSTATE_KVM_VCPU(r13)
140 cmpdi r4,0 133 cmpdi r4,0
141 /* if we have no vcpu to run, go back to sleep */ 134 /* if we have no vcpu to run, go back to sleep */
142 beq kvm_no_guest 135 beq kvm_no_guest
136 b kvmppc_hv_entry
143 137
144 /* were we napping due to cede? */ 13827: /* XXX should handle hypervisor maintenance interrupts etc. here */
145 lbz r0,HSTATE_NAPPING(r13) 139 b kvm_no_guest
146 cmpwi r0,0 14028: /* SRR1 said external but ICP said nope?? */
147 bne kvm_end_cede 141 b kvm_no_guest
14229: /* External non-IPI interrupt to offline secondary thread? help?? */
143 stw r8,HSTATE_SAVED_XIRR(r13)
144 b kvm_no_guest
148 145
149.global kvmppc_hv_entry 146.global kvmppc_hv_entry
150kvmppc_hv_entry: 147kvmppc_hv_entry:
@@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
260 lwz r5, LPPACA_YIELDCOUNT(r3) 257 lwz r5, LPPACA_YIELDCOUNT(r3)
261 addi r5, r5, 1 258 addi r5, r5, 1
262 stw r5, LPPACA_YIELDCOUNT(r3) 259 stw r5, LPPACA_YIELDCOUNT(r3)
260 li r6, 1
261 stb r6, VCPU_VPA_DIRTY(r4)
26325: 26225:
264 /* Load up DAR and DSISR */ 263 /* Load up DAR and DSISR */
265 ld r5, VCPU_DAR(r4) 264 ld r5, VCPU_DAR(r4)
@@ -485,20 +484,20 @@ toc_tlbie_lock:
485 mtctr r6 484 mtctr r6
486 mtxer r7 485 mtxer r7
487 486
487 ld r10, VCPU_PC(r4)
488 ld r11, VCPU_MSR(r4)
488kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ 489kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
489 ld r6, VCPU_SRR0(r4) 490 ld r6, VCPU_SRR0(r4)
490 ld r7, VCPU_SRR1(r4) 491 ld r7, VCPU_SRR1(r4)
491 ld r10, VCPU_PC(r4)
492 ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
493 492
493 /* r11 = vcpu->arch.msr & ~MSR_HV */
494 rldicl r11, r11, 63 - MSR_HV_LG, 1 494 rldicl r11, r11, 63 - MSR_HV_LG, 1
495 rotldi r11, r11, 1 + MSR_HV_LG 495 rotldi r11, r11, 1 + MSR_HV_LG
496 ori r11, r11, MSR_ME 496 ori r11, r11, MSR_ME
497 497
498 /* Check if we can deliver an external or decrementer interrupt now */ 498 /* Check if we can deliver an external or decrementer interrupt now */
499 ld r0,VCPU_PENDING_EXC(r4) 499 ld r0,VCPU_PENDING_EXC(r4)
500 li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) 500 lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
501 oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
502 and r0,r0,r8 501 and r0,r0,r8
503 cmpdi cr1,r0,0 502 cmpdi cr1,r0,0
504 andi. r0,r11,MSR_EE 503 andi. r0,r11,MSR_EE
@@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
526 /* Move SRR0 and SRR1 into the respective regs */ 525 /* Move SRR0 and SRR1 into the respective regs */
5275: mtspr SPRN_SRR0, r6 5265: mtspr SPRN_SRR0, r6
528 mtspr SPRN_SRR1, r7 527 mtspr SPRN_SRR1, r7
529 li r0,0
530 stb r0,VCPU_CEDED(r4) /* cancel cede */
531 528
532fast_guest_return: 529fast_guest_return:
530 li r0,0
531 stb r0,VCPU_CEDED(r4) /* cancel cede */
533 mtspr SPRN_HSRR0,r10 532 mtspr SPRN_HSRR0,r10
534 mtspr SPRN_HSRR1,r11 533 mtspr SPRN_HSRR1,r11
535 534
@@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
676 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL 675 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
677 beq hcall_try_real_mode 676 beq hcall_try_real_mode
678 677
679 /* Check for mediated interrupts (could be done earlier really ...) */ 678 /* Only handle external interrupts here on arch 206 and later */
680BEGIN_FTR_SECTION 679BEGIN_FTR_SECTION
681 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL 680 b ext_interrupt_to_host
682 bne+ 1f 681END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
683 andi. r0,r11,MSR_EE 682
684 beq 1f 683 /* External interrupt ? */
685 mfspr r5,SPRN_LPCR 684 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
686 andi. r0,r5,LPCR_MER 685 bne+ ext_interrupt_to_host
687 bne bounce_ext_interrupt 686
6881: 687 /* External interrupt, first check for host_ipi. If this is
689END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 688 * set, we know the host wants us out so let's do it now
689 */
690do_ext_interrupt:
691 lbz r0, HSTATE_HOST_IPI(r13)
692 cmpwi r0, 0
693 bne ext_interrupt_to_host
694
695 /* Now read the interrupt from the ICP */
696 ld r5, HSTATE_XICS_PHYS(r13)
697 li r7, XICS_XIRR
698 cmpdi r5, 0
699 beq- ext_interrupt_to_host
700 lwzcix r3, r5, r7
701 rlwinm. r0, r3, 0, 0xffffff
702 sync
703 beq 3f /* if nothing pending in the ICP */
704
705 /* We found something in the ICP...
706 *
707 * If it's not an IPI, stash it in the PACA and return to
708 * the host, we don't (yet) handle directing real external
709 * interrupts directly to the guest
710 */
711 cmpwi r0, XICS_IPI
712 bne ext_stash_for_host
713
714 /* It's an IPI, clear the MFRR and EOI it */
715 li r0, 0xff
716 li r6, XICS_MFRR
717 stbcix r0, r5, r6 /* clear the IPI */
718 stwcix r3, r5, r7 /* EOI it */
719 sync
720
721 /* We need to re-check host IPI now in case it got set in the
722 * meantime. If it's clear, we bounce the interrupt to the
723 * guest
724 */
725 lbz r0, HSTATE_HOST_IPI(r13)
726 cmpwi r0, 0
727 bne- 1f
728
729 /* Allright, looks like an IPI for the guest, we need to set MER */
7303:
731 /* Check if any CPU is heading out to the host, if so head out too */
732 ld r5, HSTATE_KVM_VCORE(r13)
733 lwz r0, VCORE_ENTRY_EXIT(r5)
734 cmpwi r0, 0x100
735 bge ext_interrupt_to_host
736
737 /* See if there is a pending interrupt for the guest */
738 mfspr r8, SPRN_LPCR
739 ld r0, VCPU_PENDING_EXC(r9)
740 /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
741 rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
742 rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
743 beq 2f
744
745 /* And if the guest EE is set, we can deliver immediately, else
746 * we return to the guest with MER set
747 */
748 andi. r0, r11, MSR_EE
749 beq 2f
750 mtspr SPRN_SRR0, r10
751 mtspr SPRN_SRR1, r11
752 li r10, BOOK3S_INTERRUPT_EXTERNAL
753 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
754 rotldi r11, r11, 63
7552: mr r4, r9
756 mtspr SPRN_LPCR, r8
757 b fast_guest_return
758
759 /* We raced with the host, we need to resend that IPI, bummer */
7601: li r0, IPI_PRIORITY
761 stbcix r0, r5, r6 /* set the IPI */
762 sync
763 b ext_interrupt_to_host
764
765ext_stash_for_host:
766 /* It's not an IPI and it's for the host, stash it in the PACA
767 * before exit, it will be picked up by the host ICP driver
768 */
769 stw r3, HSTATE_SAVED_XIRR(r13)
770ext_interrupt_to_host:
690 771
691guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 772guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
692 /* Save DEC */ 773 /* Save DEC */
@@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
829 beq 44f 910 beq 44f
830 ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ 911 ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
831 li r0,IPI_PRIORITY 912 li r0,IPI_PRIORITY
832 li r7,XICS_QIRR 913 li r7,XICS_MFRR
833 stbcix r0,r7,r8 /* trigger the IPI */ 914 stbcix r0,r7,r8 /* trigger the IPI */
83444: srdi. r3,r3,1 91544: srdi. r3,r3,1
835 addi r6,r6,PACA_SIZE 916 addi r6,r6,PACA_SIZE
@@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1018 lwz r3, LPPACA_YIELDCOUNT(r8) 1099 lwz r3, LPPACA_YIELDCOUNT(r8)
1019 addi r3, r3, 1 1100 addi r3, r3, 1
1020 stw r3, LPPACA_YIELDCOUNT(r8) 1101 stw r3, LPPACA_YIELDCOUNT(r8)
1102 li r3, 1
1103 stb r3, VCPU_VPA_DIRTY(r9)
102125: 110425:
1022 /* Save PMU registers if requested */ 1105 /* Save PMU registers if requested */
1023 /* r8 and cr0.eq are live here */ 1106 /* r8 and cr0.eq are live here */
@@ -1350,11 +1433,19 @@ hcall_real_table:
1350 .long 0 /* 0x58 */ 1433 .long 0 /* 0x58 */
1351 .long 0 /* 0x5c */ 1434 .long 0 /* 0x5c */
1352 .long 0 /* 0x60 */ 1435 .long 0 /* 0x60 */
1353 .long 0 /* 0x64 */ 1436#ifdef CONFIG_KVM_XICS
1354 .long 0 /* 0x68 */ 1437 .long .kvmppc_rm_h_eoi - hcall_real_table
1355 .long 0 /* 0x6c */ 1438 .long .kvmppc_rm_h_cppr - hcall_real_table
1356 .long 0 /* 0x70 */ 1439 .long .kvmppc_rm_h_ipi - hcall_real_table
1357 .long 0 /* 0x74 */ 1440 .long 0 /* 0x70 - H_IPOLL */
1441 .long .kvmppc_rm_h_xirr - hcall_real_table
1442#else
1443 .long 0 /* 0x64 - H_EOI */
1444 .long 0 /* 0x68 - H_CPPR */
1445 .long 0 /* 0x6c - H_IPI */
1446 .long 0 /* 0x70 - H_IPOLL */
1447 .long 0 /* 0x74 - H_XIRR */
1448#endif
1358 .long 0 /* 0x78 */ 1449 .long 0 /* 0x78 */
1359 .long 0 /* 0x7c */ 1450 .long 0 /* 0x7c */
1360 .long 0 /* 0x80 */ 1451 .long 0 /* 0x80 */
@@ -1405,15 +1496,6 @@ ignore_hdec:
1405 mr r4,r9 1496 mr r4,r9
1406 b fast_guest_return 1497 b fast_guest_return
1407 1498
1408bounce_ext_interrupt:
1409 mr r4,r9
1410 mtspr SPRN_SRR0,r10
1411 mtspr SPRN_SRR1,r11
1412 li r10,BOOK3S_INTERRUPT_EXTERNAL
1413 li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1414 rotldi r11,r11,63
1415 b fast_guest_return
1416
1417_GLOBAL(kvmppc_h_set_dabr) 1499_GLOBAL(kvmppc_h_set_dabr)
1418 std r4,VCPU_DABR(r3) 1500 std r4,VCPU_DABR(r3)
1419 /* Work around P7 bug where DABR can get corrupted on mtspr */ 1501 /* Work around P7 bug where DABR can get corrupted on mtspr */
@@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1519 b . 1601 b .
1520 1602
1521kvm_end_cede: 1603kvm_end_cede:
1604 /* get vcpu pointer */
1605 ld r4, HSTATE_KVM_VCPU(r13)
1606
1522 /* Woken by external or decrementer interrupt */ 1607 /* Woken by external or decrementer interrupt */
1523 ld r1, HSTATE_HOST_R1(r13) 1608 ld r1, HSTATE_HOST_R1(r13)
1524 1609
@@ -1558,6 +1643,16 @@ kvm_end_cede:
1558 li r0,0 1643 li r0,0
1559 stb r0,HSTATE_NAPPING(r13) 1644 stb r0,HSTATE_NAPPING(r13)
1560 1645
1646 /* Check the wake reason in SRR1 to see why we got here */
1647 mfspr r3, SPRN_SRR1
1648 rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */
1649 cmpwi r3, 4 /* was it an external interrupt? */
1650 li r12, BOOK3S_INTERRUPT_EXTERNAL
1651 mr r9, r4
1652 ld r10, VCPU_PC(r9)
1653 ld r11, VCPU_MSR(r9)
1654 beq do_ext_interrupt /* if so */
1655
1561 /* see if any other thread is already exiting */ 1656 /* see if any other thread is already exiting */
1562 lwz r0,VCORE_ENTRY_EXIT(r5) 1657 lwz r0,VCORE_ENTRY_EXIT(r5)
1563 cmpwi r0,0x100 1658 cmpwi r0,0x100
@@ -1577,8 +1672,7 @@ kvm_cede_prodded:
1577 1672
1578 /* we've ceded but we want to give control to the host */ 1673 /* we've ceded but we want to give control to the host */
1579kvm_cede_exit: 1674kvm_cede_exit:
1580 li r3,H_TOO_HARD 1675 b hcall_real_fallback
1581 blr
1582 1676
1583 /* Try to handle a machine check in real mode */ 1677 /* Try to handle a machine check in real mode */
1584machine_check_realmode: 1678machine_check_realmode:
@@ -1626,7 +1720,7 @@ secondary_nap:
1626 beq 37f 1720 beq 37f
1627 sync 1721 sync
1628 li r0, 0xff 1722 li r0, 0xff
1629 li r6, XICS_QIRR 1723 li r6, XICS_MFRR
1630 stbcix r0, r5, r6 /* clear the IPI */ 1724 stbcix r0, r5, r6 /* clear the IPI */
1631 stwcix r3, r5, r7 /* EOI it */ 1725 stwcix r3, r5, r7 /* EOI it */
163237: sync 172637: sync
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index dbdc15aa8127..bdc40b8e77d9 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -762,9 +762,7 @@ program_interrupt:
762 run->exit_reason = KVM_EXIT_MMIO; 762 run->exit_reason = KVM_EXIT_MMIO;
763 r = RESUME_HOST_NV; 763 r = RESUME_HOST_NV;
764 break; 764 break;
765 case EMULATE_DO_PAPR: 765 case EMULATE_EXIT_USER:
766 run->exit_reason = KVM_EXIT_PAPR_HCALL;
767 vcpu->arch.hcall_needed = 1;
768 r = RESUME_HOST_NV; 766 r = RESUME_HOST_NV;
769 break; 767 break;
770 default: 768 default:
@@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1283 1281
1284void kvmppc_core_commit_memory_region(struct kvm *kvm, 1282void kvmppc_core_commit_memory_region(struct kvm *kvm,
1285 struct kvm_userspace_memory_region *mem, 1283 struct kvm_userspace_memory_region *mem,
1286 struct kvm_memory_slot old) 1284 const struct kvm_memory_slot *old)
1287{ 1285{
1288} 1286}
1289 1287
@@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1298{ 1296{
1299#ifdef CONFIG_PPC64 1297#ifdef CONFIG_PPC64
1300 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1298 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1299 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
1301#endif 1300#endif
1302 1301
1303 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1302 if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ee02b30878ed..b24309c6c2d5 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
227 return EMULATE_DONE; 227 return EMULATE_DONE;
228} 228}
229 229
230static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
231{
232 long rc = kvmppc_xics_hcall(vcpu, cmd);
233 kvmppc_set_gpr(vcpu, 3, rc);
234 return EMULATE_DONE;
235}
236
230int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) 237int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
231{ 238{
232 switch (cmd) { 239 switch (cmd) {
@@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
246 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 253 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
247 vcpu->stat.halt_wakeup++; 254 vcpu->stat.halt_wakeup++;
248 return EMULATE_DONE; 255 return EMULATE_DONE;
256 case H_XIRR:
257 case H_CPPR:
258 case H_EOI:
259 case H_IPI:
260 if (kvmppc_xics_enabled(vcpu))
261 return kvmppc_h_pr_xics_hcall(vcpu, cmd);
262 break;
263 case H_RTAS:
264 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
265 return RESUME_HOST;
266 if (kvmppc_rtas_hcall(vcpu))
267 break;
268 kvmppc_set_gpr(vcpu, 3, 0);
269 return EMULATE_DONE;
249 } 270 }
250 271
251 return EMULATE_FAIL; 272 return EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 000000000000..3219ba895246
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,274 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kernel.h>
10#include <linux/kvm_host.h>
11#include <linux/kvm.h>
12#include <linux/err.h>
13
14#include <asm/uaccess.h>
15#include <asm/kvm_book3s.h>
16#include <asm/kvm_ppc.h>
17#include <asm/hvcall.h>
18#include <asm/rtas.h>
19
20#ifdef CONFIG_KVM_XICS
21static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
22{
23 u32 irq, server, priority;
24 int rc;
25
26 if (args->nargs != 3 || args->nret != 1) {
27 rc = -3;
28 goto out;
29 }
30
31 irq = args->args[0];
32 server = args->args[1];
33 priority = args->args[2];
34
35 rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
36 if (rc)
37 rc = -3;
38out:
39 args->rets[0] = rc;
40}
41
42static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
43{
44 u32 irq, server, priority;
45 int rc;
46
47 if (args->nargs != 1 || args->nret != 3) {
48 rc = -3;
49 goto out;
50 }
51
52 irq = args->args[0];
53
54 server = priority = 0;
55 rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
56 if (rc) {
57 rc = -3;
58 goto out;
59 }
60
61 args->rets[1] = server;
62 args->rets[2] = priority;
63out:
64 args->rets[0] = rc;
65}
66
67static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
68{
69 u32 irq;
70 int rc;
71
72 if (args->nargs != 1 || args->nret != 1) {
73 rc = -3;
74 goto out;
75 }
76
77 irq = args->args[0];
78
79 rc = kvmppc_xics_int_off(vcpu->kvm, irq);
80 if (rc)
81 rc = -3;
82out:
83 args->rets[0] = rc;
84}
85
86static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
87{
88 u32 irq;
89 int rc;
90
91 if (args->nargs != 1 || args->nret != 1) {
92 rc = -3;
93 goto out;
94 }
95
96 irq = args->args[0];
97
98 rc = kvmppc_xics_int_on(vcpu->kvm, irq);
99 if (rc)
100 rc = -3;
101out:
102 args->rets[0] = rc;
103}
104#endif /* CONFIG_KVM_XICS */
105
106struct rtas_handler {
107 void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
108 char *name;
109};
110
111static struct rtas_handler rtas_handlers[] = {
112#ifdef CONFIG_KVM_XICS
113 { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
114 { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
115 { .name = "ibm,int-off", .handler = kvm_rtas_int_off },
116 { .name = "ibm,int-on", .handler = kvm_rtas_int_on },
117#endif
118};
119
120struct rtas_token_definition {
121 struct list_head list;
122 struct rtas_handler *handler;
123 u64 token;
124};
125
126static int rtas_name_matches(char *s1, char *s2)
127{
128 struct kvm_rtas_token_args args;
129 return !strncmp(s1, s2, sizeof(args.name));
130}
131
132static int rtas_token_undefine(struct kvm *kvm, char *name)
133{
134 struct rtas_token_definition *d, *tmp;
135
136 lockdep_assert_held(&kvm->lock);
137
138 list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
139 if (rtas_name_matches(d->handler->name, name)) {
140 list_del(&d->list);
141 kfree(d);
142 return 0;
143 }
144 }
145
146 /* It's not an error to undefine an undefined token */
147 return 0;
148}
149
150static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
151{
152 struct rtas_token_definition *d;
153 struct rtas_handler *h = NULL;
154 bool found;
155 int i;
156
157 lockdep_assert_held(&kvm->lock);
158
159 list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
160 if (d->token == token)
161 return -EEXIST;
162 }
163
164 found = false;
165 for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
166 h = &rtas_handlers[i];
167 if (rtas_name_matches(h->name, name)) {
168 found = true;
169 break;
170 }
171 }
172
173 if (!found)
174 return -ENOENT;
175
176 d = kzalloc(sizeof(*d), GFP_KERNEL);
177 if (!d)
178 return -ENOMEM;
179
180 d->handler = h;
181 d->token = token;
182
183 list_add_tail(&d->list, &kvm->arch.rtas_tokens);
184
185 return 0;
186}
187
188int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
189{
190 struct kvm_rtas_token_args args;
191 int rc;
192
193 if (copy_from_user(&args, argp, sizeof(args)))
194 return -EFAULT;
195
196 mutex_lock(&kvm->lock);
197
198 if (args.token)
199 rc = rtas_token_define(kvm, args.name, args.token);
200 else
201 rc = rtas_token_undefine(kvm, args.name);
202
203 mutex_unlock(&kvm->lock);
204
205 return rc;
206}
207
208int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
209{
210 struct rtas_token_definition *d;
211 struct rtas_args args;
212 rtas_arg_t *orig_rets;
213 gpa_t args_phys;
214 int rc;
215
216 /* r4 contains the guest physical address of the RTAS args */
217 args_phys = kvmppc_get_gpr(vcpu, 4);
218
219 rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
220 if (rc)
221 goto fail;
222
223 /*
224 * args->rets is a pointer into args->args. Now that we've
225 * copied args we need to fix it up to point into our copy,
226 * not the guest args. We also need to save the original
227 * value so we can restore it on the way out.
228 */
229 orig_rets = args.rets;
230 args.rets = &args.args[args.nargs];
231
232 mutex_lock(&vcpu->kvm->lock);
233
234 rc = -ENOENT;
235 list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
236 if (d->token == args.token) {
237 d->handler->handler(vcpu, &args);
238 rc = 0;
239 break;
240 }
241 }
242
243 mutex_unlock(&vcpu->kvm->lock);
244
245 if (rc == 0) {
246 args.rets = orig_rets;
247 rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
248 if (rc)
249 goto fail;
250 }
251
252 return rc;
253
254fail:
255 /*
256 * We only get here if the guest has called RTAS with a bogus
257 * args pointer. That means we can't get to the args, and so we
258 * can't fail the RTAS call. So fail right out to userspace,
259 * which should kill the guest.
260 */
261 return rc;
262}
263
264void kvmppc_rtas_tokens_free(struct kvm *kvm)
265{
266 struct rtas_token_definition *d, *tmp;
267
268 lockdep_assert_held(&kvm->lock);
269
270 list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
271 list_del(&d->list);
272 kfree(d);
273 }
274}
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 000000000000..f7a103756618
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1270 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/kvm_host.h>
12#include <linux/err.h>
13#include <linux/gfp.h>
14#include <linux/anon_inodes.h>
15
16#include <asm/uaccess.h>
17#include <asm/kvm_book3s.h>
18#include <asm/kvm_ppc.h>
19#include <asm/hvcall.h>
20#include <asm/xics.h>
21#include <asm/debug.h>
22
23#include <linux/debugfs.h>
24#include <linux/seq_file.h>
25
26#include "book3s_xics.h"
27
28#if 1
29#define XICS_DBG(fmt...) do { } while (0)
30#else
31#define XICS_DBG(fmt...) trace_printk(fmt)
32#endif
33
34#define ENABLE_REALMODE true
35#define DEBUG_REALMODE false
36
37/*
38 * LOCKING
39 * =======
40 *
41 * Each ICS has a mutex protecting the information about the IRQ
42 * sources and avoiding simultaneous deliveries if the same interrupt.
43 *
44 * ICP operations are done via a single compare & swap transaction
45 * (most ICP state fits in the union kvmppc_icp_state)
46 */
47
48/*
49 * TODO
50 * ====
51 *
52 * - To speed up resends, keep a bitmap of "resend" set bits in the
53 * ICS
54 *
55 * - Speed up server# -> ICP lookup (array ? hash table ?)
56 *
57 * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
58 * locks array to improve scalability
59 */
60
61/* -- ICS routines -- */
62
63static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
64 u32 new_irq);
65
66static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
67 bool report_status)
68{
69 struct ics_irq_state *state;
70 struct kvmppc_ics *ics;
71 u16 src;
72
73 XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
74
75 ics = kvmppc_xics_find_ics(xics, irq, &src);
76 if (!ics) {
77 XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
78 return -EINVAL;
79 }
80 state = &ics->irq_state[src];
81 if (!state->exists)
82 return -EINVAL;
83
84 if (report_status)
85 return state->asserted;
86
87 /*
88 * We set state->asserted locklessly. This should be fine as
89 * we are the only setter, thus concurrent access is undefined
90 * to begin with.
91 */
92 if (level == KVM_INTERRUPT_SET_LEVEL)
93 state->asserted = 1;
94 else if (level == KVM_INTERRUPT_UNSET) {
95 state->asserted = 0;
96 return 0;
97 }
98
99 /* Attempt delivery */
100 icp_deliver_irq(xics, NULL, irq);
101
102 return state->asserted;
103}
104
105static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
106 struct kvmppc_icp *icp)
107{
108 int i;
109
110 mutex_lock(&ics->lock);
111
112 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
113 struct ics_irq_state *state = &ics->irq_state[i];
114
115 if (!state->resend)
116 continue;
117
118 XICS_DBG("resend %#x prio %#x\n", state->number,
119 state->priority);
120
121 mutex_unlock(&ics->lock);
122 icp_deliver_irq(xics, icp, state->number);
123 mutex_lock(&ics->lock);
124 }
125
126 mutex_unlock(&ics->lock);
127}
128
129static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
130 struct ics_irq_state *state,
131 u32 server, u32 priority, u32 saved_priority)
132{
133 bool deliver;
134
135 mutex_lock(&ics->lock);
136
137 state->server = server;
138 state->priority = priority;
139 state->saved_priority = saved_priority;
140 deliver = false;
141 if ((state->masked_pending || state->resend) && priority != MASKED) {
142 state->masked_pending = 0;
143 deliver = true;
144 }
145
146 mutex_unlock(&ics->lock);
147
148 return deliver;
149}
150
151int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
152{
153 struct kvmppc_xics *xics = kvm->arch.xics;
154 struct kvmppc_icp *icp;
155 struct kvmppc_ics *ics;
156 struct ics_irq_state *state;
157 u16 src;
158
159 if (!xics)
160 return -ENODEV;
161
162 ics = kvmppc_xics_find_ics(xics, irq, &src);
163 if (!ics)
164 return -EINVAL;
165 state = &ics->irq_state[src];
166
167 icp = kvmppc_xics_find_server(kvm, server);
168 if (!icp)
169 return -EINVAL;
170
171 XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
172 irq, server, priority,
173 state->masked_pending, state->resend);
174
175 if (write_xive(xics, ics, state, server, priority, priority))
176 icp_deliver_irq(xics, icp, irq);
177
178 return 0;
179}
180
181int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
182{
183 struct kvmppc_xics *xics = kvm->arch.xics;
184 struct kvmppc_ics *ics;
185 struct ics_irq_state *state;
186 u16 src;
187
188 if (!xics)
189 return -ENODEV;
190
191 ics = kvmppc_xics_find_ics(xics, irq, &src);
192 if (!ics)
193 return -EINVAL;
194 state = &ics->irq_state[src];
195
196 mutex_lock(&ics->lock);
197 *server = state->server;
198 *priority = state->priority;
199 mutex_unlock(&ics->lock);
200
201 return 0;
202}
203
204int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
205{
206 struct kvmppc_xics *xics = kvm->arch.xics;
207 struct kvmppc_icp *icp;
208 struct kvmppc_ics *ics;
209 struct ics_irq_state *state;
210 u16 src;
211
212 if (!xics)
213 return -ENODEV;
214
215 ics = kvmppc_xics_find_ics(xics, irq, &src);
216 if (!ics)
217 return -EINVAL;
218 state = &ics->irq_state[src];
219
220 icp = kvmppc_xics_find_server(kvm, state->server);
221 if (!icp)
222 return -EINVAL;
223
224 if (write_xive(xics, ics, state, state->server, state->saved_priority,
225 state->saved_priority))
226 icp_deliver_irq(xics, icp, irq);
227
228 return 0;
229}
230
231int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
232{
233 struct kvmppc_xics *xics = kvm->arch.xics;
234 struct kvmppc_ics *ics;
235 struct ics_irq_state *state;
236 u16 src;
237
238 if (!xics)
239 return -ENODEV;
240
241 ics = kvmppc_xics_find_ics(xics, irq, &src);
242 if (!ics)
243 return -EINVAL;
244 state = &ics->irq_state[src];
245
246 write_xive(xics, ics, state, state->server, MASKED, state->priority);
247
248 return 0;
249}
250
251/* -- ICP routines, including hcalls -- */
252
253static inline bool icp_try_update(struct kvmppc_icp *icp,
254 union kvmppc_icp_state old,
255 union kvmppc_icp_state new,
256 bool change_self)
257{
258 bool success;
259
260 /* Calculate new output value */
261 new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
262
263 /* Attempt atomic update */
264 success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
265 if (!success)
266 goto bail;
267
268 XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
269 icp->server_num,
270 old.cppr, old.mfrr, old.pending_pri, old.xisr,
271 old.need_resend, old.out_ee);
272 XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
273 new.cppr, new.mfrr, new.pending_pri, new.xisr,
274 new.need_resend, new.out_ee);
275 /*
276 * Check for output state update
277 *
278 * Note that this is racy since another processor could be updating
279 * the state already. This is why we never clear the interrupt output
280 * here, we only ever set it. The clear only happens prior to doing
281 * an update and only by the processor itself. Currently we do it
282 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
283 *
284 * We also do not try to figure out whether the EE state has changed,
285 * we unconditionally set it if the new state calls for it. The reason
286 * for that is that we opportunistically remove the pending interrupt
287 * flag when raising CPPR, so we need to set it back here if an
288 * interrupt is still pending.
289 */
290 if (new.out_ee) {
291 kvmppc_book3s_queue_irqprio(icp->vcpu,
292 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
293 if (!change_self)
294 kvmppc_fast_vcpu_kick(icp->vcpu);
295 }
296 bail:
297 return success;
298}
299
300static void icp_check_resend(struct kvmppc_xics *xics,
301 struct kvmppc_icp *icp)
302{
303 u32 icsid;
304
305 /* Order this load with the test for need_resend in the caller */
306 smp_rmb();
307 for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
308 struct kvmppc_ics *ics = xics->ics[icsid];
309
310 if (!test_and_clear_bit(icsid, icp->resend_map))
311 continue;
312 if (!ics)
313 continue;
314 ics_check_resend(xics, ics, icp);
315 }
316}
317
318static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
319 u32 *reject)
320{
321 union kvmppc_icp_state old_state, new_state;
322 bool success;
323
324 XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
325 icp->server_num);
326
327 do {
328 old_state = new_state = ACCESS_ONCE(icp->state);
329
330 *reject = 0;
331
332 /* See if we can deliver */
333 success = new_state.cppr > priority &&
334 new_state.mfrr > priority &&
335 new_state.pending_pri > priority;
336
337 /*
338 * If we can, check for a rejection and perform the
339 * delivery
340 */
341 if (success) {
342 *reject = new_state.xisr;
343 new_state.xisr = irq;
344 new_state.pending_pri = priority;
345 } else {
346 /*
347 * If we failed to deliver we set need_resend
348 * so a subsequent CPPR state change causes us
349 * to try a new delivery.
350 */
351 new_state.need_resend = true;
352 }
353
354 } while (!icp_try_update(icp, old_state, new_state, false));
355
356 return success;
357}
358
359static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
360 u32 new_irq)
361{
362 struct ics_irq_state *state;
363 struct kvmppc_ics *ics;
364 u32 reject;
365 u16 src;
366
367 /*
368 * This is used both for initial delivery of an interrupt and
369 * for subsequent rejection.
370 *
371 * Rejection can be racy vs. resends. We have evaluated the
372 * rejection in an atomic ICP transaction which is now complete,
373 * so potentially the ICP can already accept the interrupt again.
374 *
375 * So we need to retry the delivery. Essentially the reject path
376 * boils down to a failed delivery. Always.
377 *
378 * Now the interrupt could also have moved to a different target,
379 * thus we may need to re-do the ICP lookup as well
380 */
381
382 again:
383 /* Get the ICS state and lock it */
384 ics = kvmppc_xics_find_ics(xics, new_irq, &src);
385 if (!ics) {
386 XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
387 return;
388 }
389 state = &ics->irq_state[src];
390
391 /* Get a lock on the ICS */
392 mutex_lock(&ics->lock);
393
394 /* Get our server */
395 if (!icp || state->server != icp->server_num) {
396 icp = kvmppc_xics_find_server(xics->kvm, state->server);
397 if (!icp) {
398 pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
399 new_irq, state->server);
400 goto out;
401 }
402 }
403
404 /* Clear the resend bit of that interrupt */
405 state->resend = 0;
406
407 /*
408 * If masked, bail out
409 *
410 * Note: PAPR doesn't mention anything about masked pending
411 * when doing a resend, only when doing a delivery.
412 *
413 * However that would have the effect of losing a masked
414 * interrupt that was rejected and isn't consistent with
415 * the whole masked_pending business which is about not
416 * losing interrupts that occur while masked.
417 *
418 * I don't differenciate normal deliveries and resends, this
419 * implementation will differ from PAPR and not lose such
420 * interrupts.
421 */
422 if (state->priority == MASKED) {
423 XICS_DBG("irq %#x masked pending\n", new_irq);
424 state->masked_pending = 1;
425 goto out;
426 }
427
428 /*
429 * Try the delivery, this will set the need_resend flag
430 * in the ICP as part of the atomic transaction if the
431 * delivery is not possible.
432 *
433 * Note that if successful, the new delivery might have itself
434 * rejected an interrupt that was "delivered" before we took the
435 * icp mutex.
436 *
437 * In this case we do the whole sequence all over again for the
438 * new guy. We cannot assume that the rejected interrupt is less
439 * favored than the new one, and thus doesn't need to be delivered,
440 * because by the time we exit icp_try_to_deliver() the target
441 * processor may well have alrady consumed & completed it, and thus
442 * the rejected interrupt might actually be already acceptable.
443 */
444 if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
445 /*
446 * Delivery was successful, did we reject somebody else ?
447 */
448 if (reject && reject != XICS_IPI) {
449 mutex_unlock(&ics->lock);
450 new_irq = reject;
451 goto again;
452 }
453 } else {
454 /*
455 * We failed to deliver the interrupt we need to set the
456 * resend map bit and mark the ICS state as needing a resend
457 */
458 set_bit(ics->icsid, icp->resend_map);
459 state->resend = 1;
460
461 /*
462 * If the need_resend flag got cleared in the ICP some time
463 * between icp_try_to_deliver() atomic update and now, then
464 * we know it might have missed the resend_map bit. So we
465 * retry
466 */
467 smp_mb();
468 if (!icp->state.need_resend) {
469 mutex_unlock(&ics->lock);
470 goto again;
471 }
472 }
473 out:
474 mutex_unlock(&ics->lock);
475}
476
477static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
478 u8 new_cppr)
479{
480 union kvmppc_icp_state old_state, new_state;
481 bool resend;
482
483 /*
484 * This handles several related states in one operation:
485 *
486 * ICP State: Down_CPPR
487 *
488 * Load CPPR with new value and if the XISR is 0
489 * then check for resends:
490 *
491 * ICP State: Resend
492 *
493 * If MFRR is more favored than CPPR, check for IPIs
494 * and notify ICS of a potential resend. This is done
495 * asynchronously (when used in real mode, we will have
496 * to exit here).
497 *
498 * We do not handle the complete Check_IPI as documented
499 * here. In the PAPR, this state will be used for both
500 * Set_MFRR and Down_CPPR. However, we know that we aren't
501 * changing the MFRR state here so we don't need to handle
502 * the case of an MFRR causing a reject of a pending irq,
503 * this will have been handled when the MFRR was set in the
504 * first place.
505 *
506 * Thus we don't have to handle rejects, only resends.
507 *
508 * When implementing real mode for HV KVM, resend will lead to
509 * a H_TOO_HARD return and the whole transaction will be handled
510 * in virtual mode.
511 */
512 do {
513 old_state = new_state = ACCESS_ONCE(icp->state);
514
515 /* Down_CPPR */
516 new_state.cppr = new_cppr;
517
518 /*
519 * Cut down Resend / Check_IPI / IPI
520 *
521 * The logic is that we cannot have a pending interrupt
522 * trumped by an IPI at this point (see above), so we
523 * know that either the pending interrupt is already an
524 * IPI (in which case we don't care to override it) or
525 * it's either more favored than us or non existent
526 */
527 if (new_state.mfrr < new_cppr &&
528 new_state.mfrr <= new_state.pending_pri) {
529 WARN_ON(new_state.xisr != XICS_IPI &&
530 new_state.xisr != 0);
531 new_state.pending_pri = new_state.mfrr;
532 new_state.xisr = XICS_IPI;
533 }
534
535 /* Latch/clear resend bit */
536 resend = new_state.need_resend;
537 new_state.need_resend = 0;
538
539 } while (!icp_try_update(icp, old_state, new_state, true));
540
541 /*
542 * Now handle resend checks. Those are asynchronous to the ICP
543 * state update in HW (ie bus transactions) so we can handle them
544 * separately here too
545 */
546 if (resend)
547 icp_check_resend(xics, icp);
548}
549
550static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
551{
552 union kvmppc_icp_state old_state, new_state;
553 struct kvmppc_icp *icp = vcpu->arch.icp;
554 u32 xirr;
555
556 /* First, remove EE from the processor */
557 kvmppc_book3s_dequeue_irqprio(icp->vcpu,
558 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
559
560 /*
561 * ICP State: Accept_Interrupt
562 *
563 * Return the pending interrupt (if any) along with the
564 * current CPPR, then clear the XISR & set CPPR to the
565 * pending priority
566 */
567 do {
568 old_state = new_state = ACCESS_ONCE(icp->state);
569
570 xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
571 if (!old_state.xisr)
572 break;
573 new_state.cppr = new_state.pending_pri;
574 new_state.pending_pri = 0xff;
575 new_state.xisr = 0;
576
577 } while (!icp_try_update(icp, old_state, new_state, true));
578
579 XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
580
581 return xirr;
582}
583
584static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
585 unsigned long mfrr)
586{
587 union kvmppc_icp_state old_state, new_state;
588 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
589 struct kvmppc_icp *icp;
590 u32 reject;
591 bool resend;
592 bool local;
593
594 XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
595 vcpu->vcpu_id, server, mfrr);
596
597 icp = vcpu->arch.icp;
598 local = icp->server_num == server;
599 if (!local) {
600 icp = kvmppc_xics_find_server(vcpu->kvm, server);
601 if (!icp)
602 return H_PARAMETER;
603 }
604
605 /*
606 * ICP state: Set_MFRR
607 *
608 * If the CPPR is more favored than the new MFRR, then
609 * nothing needs to be rejected as there can be no XISR to
610 * reject. If the MFRR is being made less favored then
611 * there might be a previously-rejected interrupt needing
612 * to be resent.
613 *
614 * If the CPPR is less favored, then we might be replacing
615 * an interrupt, and thus need to possibly reject it as in
616 *
617 * ICP state: Check_IPI
618 */
619 do {
620 old_state = new_state = ACCESS_ONCE(icp->state);
621
622 /* Set_MFRR */
623 new_state.mfrr = mfrr;
624
625 /* Check_IPI */
626 reject = 0;
627 resend = false;
628 if (mfrr < new_state.cppr) {
629 /* Reject a pending interrupt if not an IPI */
630 if (mfrr <= new_state.pending_pri)
631 reject = new_state.xisr;
632 new_state.pending_pri = mfrr;
633 new_state.xisr = XICS_IPI;
634 }
635
636 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
637 resend = new_state.need_resend;
638 new_state.need_resend = 0;
639 }
640 } while (!icp_try_update(icp, old_state, new_state, local));
641
642 /* Handle reject */
643 if (reject && reject != XICS_IPI)
644 icp_deliver_irq(xics, icp, reject);
645
646 /* Handle resend */
647 if (resend)
648 icp_check_resend(xics, icp);
649
650 return H_SUCCESS;
651}
652
653static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
654{
655 union kvmppc_icp_state old_state, new_state;
656 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
657 struct kvmppc_icp *icp = vcpu->arch.icp;
658 u32 reject;
659
660 XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
661
662 /*
663 * ICP State: Set_CPPR
664 *
665 * We can safely compare the new value with the current
666 * value outside of the transaction as the CPPR is only
667 * ever changed by the processor on itself
668 */
669 if (cppr > icp->state.cppr)
670 icp_down_cppr(xics, icp, cppr);
671 else if (cppr == icp->state.cppr)
672 return;
673
674 /*
675 * ICP State: Up_CPPR
676 *
677 * The processor is raising its priority, this can result
678 * in a rejection of a pending interrupt:
679 *
680 * ICP State: Reject_Current
681 *
682 * We can remove EE from the current processor, the update
683 * transaction will set it again if needed
684 */
685 kvmppc_book3s_dequeue_irqprio(icp->vcpu,
686 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
687
688 do {
689 old_state = new_state = ACCESS_ONCE(icp->state);
690
691 reject = 0;
692 new_state.cppr = cppr;
693
694 if (cppr <= new_state.pending_pri) {
695 reject = new_state.xisr;
696 new_state.xisr = 0;
697 new_state.pending_pri = 0xff;
698 }
699
700 } while (!icp_try_update(icp, old_state, new_state, true));
701
702 /*
703 * Check for rejects. They are handled by doing a new delivery
704 * attempt (see comments in icp_deliver_irq).
705 */
706 if (reject && reject != XICS_IPI)
707 icp_deliver_irq(xics, icp, reject);
708}
709
710static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
711{
712 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
713 struct kvmppc_icp *icp = vcpu->arch.icp;
714 struct kvmppc_ics *ics;
715 struct ics_irq_state *state;
716 u32 irq = xirr & 0x00ffffff;
717 u16 src;
718
719 XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
720
721 /*
722 * ICP State: EOI
723 *
724 * Note: If EOI is incorrectly used by SW to lower the CPPR
725 * value (ie more favored), we do not check for rejection of
726 * a pending interrupt, this is a SW error and PAPR sepcifies
727 * that we don't have to deal with it.
728 *
729 * The sending of an EOI to the ICS is handled after the
730 * CPPR update
731 *
732 * ICP State: Down_CPPR which we handle
733 * in a separate function as it's shared with H_CPPR.
734 */
735 icp_down_cppr(xics, icp, xirr >> 24);
736
737 /* IPIs have no EOI */
738 if (irq == XICS_IPI)
739 return H_SUCCESS;
740 /*
741 * EOI handling: If the interrupt is still asserted, we need to
742 * resend it. We can take a lockless "peek" at the ICS state here.
743 *
744 * "Message" interrupts will never have "asserted" set
745 */
746 ics = kvmppc_xics_find_ics(xics, irq, &src);
747 if (!ics) {
748 XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
749 return H_PARAMETER;
750 }
751 state = &ics->irq_state[src];
752
753 /* Still asserted, resend it */
754 if (state->asserted)
755 icp_deliver_irq(xics, icp, irq);
756
757 return H_SUCCESS;
758}
759
760static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
761{
762 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
763 struct kvmppc_icp *icp = vcpu->arch.icp;
764
765 XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
766 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
767
768 if (icp->rm_action & XICS_RM_KICK_VCPU)
769 kvmppc_fast_vcpu_kick(icp->rm_kick_target);
770 if (icp->rm_action & XICS_RM_CHECK_RESEND)
771 icp_check_resend(xics, icp);
772 if (icp->rm_action & XICS_RM_REJECT)
773 icp_deliver_irq(xics, icp, icp->rm_reject);
774
775 icp->rm_action = 0;
776
777 return H_SUCCESS;
778}
779
780int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
781{
782 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
783 unsigned long res;
784 int rc = H_SUCCESS;
785
786 /* Check if we have an ICP */
787 if (!xics || !vcpu->arch.icp)
788 return H_HARDWARE;
789
790 /* Check for real mode returning too hard */
791 if (xics->real_mode)
792 return kvmppc_xics_rm_complete(vcpu, req);
793
794 switch (req) {
795 case H_XIRR:
796 res = kvmppc_h_xirr(vcpu);
797 kvmppc_set_gpr(vcpu, 4, res);
798 break;
799 case H_CPPR:
800 kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
801 break;
802 case H_EOI:
803 rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
804 break;
805 case H_IPI:
806 rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
807 kvmppc_get_gpr(vcpu, 5));
808 break;
809 }
810
811 return rc;
812}
813
814
815/* -- Initialisation code etc. -- */
816
817static int xics_debug_show(struct seq_file *m, void *private)
818{
819 struct kvmppc_xics *xics = m->private;
820 struct kvm *kvm = xics->kvm;
821 struct kvm_vcpu *vcpu;
822 int icsid, i;
823
824 if (!kvm)
825 return 0;
826
827 seq_printf(m, "=========\nICP state\n=========\n");
828
829 kvm_for_each_vcpu(i, vcpu, kvm) {
830 struct kvmppc_icp *icp = vcpu->arch.icp;
831 union kvmppc_icp_state state;
832
833 if (!icp)
834 continue;
835
836 state.raw = ACCESS_ONCE(icp->state.raw);
837 seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
838 icp->server_num, state.xisr,
839 state.pending_pri, state.cppr, state.mfrr,
840 state.out_ee, state.need_resend);
841 }
842
843 for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
844 struct kvmppc_ics *ics = xics->ics[icsid];
845
846 if (!ics)
847 continue;
848
849 seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
850 icsid);
851
852 mutex_lock(&ics->lock);
853
854 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
855 struct ics_irq_state *irq = &ics->irq_state[i];
856
857 seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
858 irq->number, irq->server, irq->priority,
859 irq->saved_priority, irq->asserted,
860 irq->resend, irq->masked_pending);
861
862 }
863 mutex_unlock(&ics->lock);
864 }
865 return 0;
866}
867
868static int xics_debug_open(struct inode *inode, struct file *file)
869{
870 return single_open(file, xics_debug_show, inode->i_private);
871}
872
873static const struct file_operations xics_debug_fops = {
874 .open = xics_debug_open,
875 .read = seq_read,
876 .llseek = seq_lseek,
877 .release = single_release,
878};
879
880static void xics_debugfs_init(struct kvmppc_xics *xics)
881{
882 char *name;
883
884 name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
885 if (!name) {
886 pr_err("%s: no memory for name\n", __func__);
887 return;
888 }
889
890 xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
891 xics, &xics_debug_fops);
892
893 pr_debug("%s: created %s\n", __func__, name);
894 kfree(name);
895}
896
897static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
898 struct kvmppc_xics *xics, int irq)
899{
900 struct kvmppc_ics *ics;
901 int i, icsid;
902
903 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
904
905 mutex_lock(&kvm->lock);
906
907 /* ICS already exists - somebody else got here first */
908 if (xics->ics[icsid])
909 goto out;
910
911 /* Create the ICS */
912 ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
913 if (!ics)
914 goto out;
915
916 mutex_init(&ics->lock);
917 ics->icsid = icsid;
918
919 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
920 ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
921 ics->irq_state[i].priority = MASKED;
922 ics->irq_state[i].saved_priority = MASKED;
923 }
924 smp_wmb();
925 xics->ics[icsid] = ics;
926
927 if (icsid > xics->max_icsid)
928 xics->max_icsid = icsid;
929
930 out:
931 mutex_unlock(&kvm->lock);
932 return xics->ics[icsid];
933}
934
935int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
936{
937 struct kvmppc_icp *icp;
938
939 if (!vcpu->kvm->arch.xics)
940 return -ENODEV;
941
942 if (kvmppc_xics_find_server(vcpu->kvm, server_num))
943 return -EEXIST;
944
945 icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
946 if (!icp)
947 return -ENOMEM;
948
949 icp->vcpu = vcpu;
950 icp->server_num = server_num;
951 icp->state.mfrr = MASKED;
952 icp->state.pending_pri = MASKED;
953 vcpu->arch.icp = icp;
954
955 XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
956
957 return 0;
958}
959
960u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
961{
962 struct kvmppc_icp *icp = vcpu->arch.icp;
963 union kvmppc_icp_state state;
964
965 if (!icp)
966 return 0;
967 state = icp->state;
968 return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
969 ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
970 ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
971 ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
972}
973
974int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
975{
976 struct kvmppc_icp *icp = vcpu->arch.icp;
977 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
978 union kvmppc_icp_state old_state, new_state;
979 struct kvmppc_ics *ics;
980 u8 cppr, mfrr, pending_pri;
981 u32 xisr;
982 u16 src;
983 bool resend;
984
985 if (!icp || !xics)
986 return -ENOENT;
987
988 cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
989 xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
990 KVM_REG_PPC_ICP_XISR_MASK;
991 mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
992 pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
993
994 /* Require the new state to be internally consistent */
995 if (xisr == 0) {
996 if (pending_pri != 0xff)
997 return -EINVAL;
998 } else if (xisr == XICS_IPI) {
999 if (pending_pri != mfrr || pending_pri >= cppr)
1000 return -EINVAL;
1001 } else {
1002 if (pending_pri >= mfrr || pending_pri >= cppr)
1003 return -EINVAL;
1004 ics = kvmppc_xics_find_ics(xics, xisr, &src);
1005 if (!ics)
1006 return -EINVAL;
1007 }
1008
1009 new_state.raw = 0;
1010 new_state.cppr = cppr;
1011 new_state.xisr = xisr;
1012 new_state.mfrr = mfrr;
1013 new_state.pending_pri = pending_pri;
1014
1015 /*
1016 * Deassert the CPU interrupt request.
1017 * icp_try_update will reassert it if necessary.
1018 */
1019 kvmppc_book3s_dequeue_irqprio(icp->vcpu,
1020 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
1021
1022 /*
1023 * Note that if we displace an interrupt from old_state.xisr,
1024 * we don't mark it as rejected. We expect userspace to set
1025 * the state of the interrupt sources to be consistent with
1026 * the ICP states (either before or afterwards, which doesn't
1027 * matter). We do handle resends due to CPPR becoming less
1028 * favoured because that is necessary to end up with a
1029 * consistent state in the situation where userspace restores
1030 * the ICS states before the ICP states.
1031 */
1032 do {
1033 old_state = ACCESS_ONCE(icp->state);
1034
1035 if (new_state.mfrr <= old_state.mfrr) {
1036 resend = false;
1037 new_state.need_resend = old_state.need_resend;
1038 } else {
1039 resend = old_state.need_resend;
1040 new_state.need_resend = 0;
1041 }
1042 } while (!icp_try_update(icp, old_state, new_state, false));
1043
1044 if (resend)
1045 icp_check_resend(xics, icp);
1046
1047 return 0;
1048}
1049
1050static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
1051{
1052 int ret;
1053 struct kvmppc_ics *ics;
1054 struct ics_irq_state *irqp;
1055 u64 __user *ubufp = (u64 __user *) addr;
1056 u16 idx;
1057 u64 val, prio;
1058
1059 ics = kvmppc_xics_find_ics(xics, irq, &idx);
1060 if (!ics)
1061 return -ENOENT;
1062
1063 irqp = &ics->irq_state[idx];
1064 mutex_lock(&ics->lock);
1065 ret = -ENOENT;
1066 if (irqp->exists) {
1067 val = irqp->server;
1068 prio = irqp->priority;
1069 if (prio == MASKED) {
1070 val |= KVM_XICS_MASKED;
1071 prio = irqp->saved_priority;
1072 }
1073 val |= prio << KVM_XICS_PRIORITY_SHIFT;
1074 if (irqp->asserted)
1075 val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
1076 else if (irqp->masked_pending || irqp->resend)
1077 val |= KVM_XICS_PENDING;
1078 ret = 0;
1079 }
1080 mutex_unlock(&ics->lock);
1081
1082 if (!ret && put_user(val, ubufp))
1083 ret = -EFAULT;
1084
1085 return ret;
1086}
1087
1088static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
1089{
1090 struct kvmppc_ics *ics;
1091 struct ics_irq_state *irqp;
1092 u64 __user *ubufp = (u64 __user *) addr;
1093 u16 idx;
1094 u64 val;
1095 u8 prio;
1096 u32 server;
1097
1098 if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
1099 return -ENOENT;
1100
1101 ics = kvmppc_xics_find_ics(xics, irq, &idx);
1102 if (!ics) {
1103 ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
1104 if (!ics)
1105 return -ENOMEM;
1106 }
1107 irqp = &ics->irq_state[idx];
1108 if (get_user(val, ubufp))
1109 return -EFAULT;
1110
1111 server = val & KVM_XICS_DESTINATION_MASK;
1112 prio = val >> KVM_XICS_PRIORITY_SHIFT;
1113 if (prio != MASKED &&
1114 kvmppc_xics_find_server(xics->kvm, server) == NULL)
1115 return -EINVAL;
1116
1117 mutex_lock(&ics->lock);
1118 irqp->server = server;
1119 irqp->saved_priority = prio;
1120 if (val & KVM_XICS_MASKED)
1121 prio = MASKED;
1122 irqp->priority = prio;
1123 irqp->resend = 0;
1124 irqp->masked_pending = 0;
1125 irqp->asserted = 0;
1126 if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
1127 irqp->asserted = 1;
1128 irqp->exists = 1;
1129 mutex_unlock(&ics->lock);
1130
1131 if (val & KVM_XICS_PENDING)
1132 icp_deliver_irq(xics, NULL, irqp->number);
1133
1134 return 0;
1135}
1136
1137int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1138 bool line_status)
1139{
1140 struct kvmppc_xics *xics = kvm->arch.xics;
1141
1142 return ics_deliver_irq(xics, irq, level, line_status);
1143}
1144
1145static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1146{
1147 struct kvmppc_xics *xics = dev->private;
1148
1149 switch (attr->group) {
1150 case KVM_DEV_XICS_GRP_SOURCES:
1151 return xics_set_source(xics, attr->attr, attr->addr);
1152 }
1153 return -ENXIO;
1154}
1155
1156static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1157{
1158 struct kvmppc_xics *xics = dev->private;
1159
1160 switch (attr->group) {
1161 case KVM_DEV_XICS_GRP_SOURCES:
1162 return xics_get_source(xics, attr->attr, attr->addr);
1163 }
1164 return -ENXIO;
1165}
1166
1167static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1168{
1169 switch (attr->group) {
1170 case KVM_DEV_XICS_GRP_SOURCES:
1171 if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
1172 attr->attr < KVMPPC_XICS_NR_IRQS)
1173 return 0;
1174 break;
1175 }
1176 return -ENXIO;
1177}
1178
1179static void kvmppc_xics_free(struct kvm_device *dev)
1180{
1181 struct kvmppc_xics *xics = dev->private;
1182 int i;
1183 struct kvm *kvm = xics->kvm;
1184
1185 debugfs_remove(xics->dentry);
1186
1187 if (kvm)
1188 kvm->arch.xics = NULL;
1189
1190 for (i = 0; i <= xics->max_icsid; i++)
1191 kfree(xics->ics[i]);
1192 kfree(xics);
1193 kfree(dev);
1194}
1195
1196static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
1197{
1198 struct kvmppc_xics *xics;
1199 struct kvm *kvm = dev->kvm;
1200 int ret = 0;
1201
1202 xics = kzalloc(sizeof(*xics), GFP_KERNEL);
1203 if (!xics)
1204 return -ENOMEM;
1205
1206 dev->private = xics;
1207 xics->dev = dev;
1208 xics->kvm = kvm;
1209
1210 /* Already there ? */
1211 mutex_lock(&kvm->lock);
1212 if (kvm->arch.xics)
1213 ret = -EEXIST;
1214 else
1215 kvm->arch.xics = xics;
1216 mutex_unlock(&kvm->lock);
1217
1218 if (ret)
1219 return ret;
1220
1221 xics_debugfs_init(xics);
1222
1223#ifdef CONFIG_KVM_BOOK3S_64_HV
1224 if (cpu_has_feature(CPU_FTR_ARCH_206)) {
1225 /* Enable real mode support */
1226 xics->real_mode = ENABLE_REALMODE;
1227 xics->real_mode_dbg = DEBUG_REALMODE;
1228 }
1229#endif /* CONFIG_KVM_BOOK3S_64_HV */
1230
1231 return 0;
1232}
1233
1234struct kvm_device_ops kvm_xics_ops = {
1235 .name = "kvm-xics",
1236 .create = kvmppc_xics_create,
1237 .destroy = kvmppc_xics_free,
1238 .set_attr = xics_set_attr,
1239 .get_attr = xics_get_attr,
1240 .has_attr = xics_has_attr,
1241};
1242
1243int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
1244 u32 xcpu)
1245{
1246 struct kvmppc_xics *xics = dev->private;
1247 int r = -EBUSY;
1248
1249 if (dev->ops != &kvm_xics_ops)
1250 return -EPERM;
1251 if (xics->kvm != vcpu->kvm)
1252 return -EPERM;
1253 if (vcpu->arch.irq_type)
1254 return -EBUSY;
1255
1256 r = kvmppc_xics_create_icp(vcpu, xcpu);
1257 if (!r)
1258 vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
1259
1260 return r;
1261}
1262
1263void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
1264{
1265 if (!vcpu->arch.icp)
1266 return;
1267 kfree(vcpu->arch.icp);
1268 vcpu->arch.icp = NULL;
1269 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
1270}
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 000000000000..dd9326c5c19b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,130 @@
1/*
2 * Copyright 2012 Michael Ellerman, IBM Corporation.
3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef _KVM_PPC_BOOK3S_XICS_H
11#define _KVM_PPC_BOOK3S_XICS_H
12
13/*
14 * We use a two-level tree to store interrupt source information.
15 * There are up to 1024 ICS nodes, each of which can represent
16 * 1024 sources.
17 */
18#define KVMPPC_XICS_MAX_ICS_ID 1023
19#define KVMPPC_XICS_ICS_SHIFT 10
20#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT)
21#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1)
22
23/*
24 * Interrupt source numbers below this are reserved, for example
25 * 0 is "no interrupt", and 2 is used for IPIs.
26 */
27#define KVMPPC_XICS_FIRST_IRQ 16
28#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \
29 KVMPPC_XICS_IRQ_PER_ICS)
30
31/* Priority value to use for disabling an interrupt */
32#define MASKED 0xff
33
34/* State for one irq source */
35struct ics_irq_state {
36 u32 number;
37 u32 server;
38 u8 priority;
39 u8 saved_priority;
40 u8 resend;
41 u8 masked_pending;
42 u8 asserted; /* Only for LSI */
43 u8 exists;
44};
45
46/* Atomic ICP state, updated with a single compare & swap */
47union kvmppc_icp_state {
48 unsigned long raw;
49 struct {
50 u8 out_ee:1;
51 u8 need_resend:1;
52 u8 cppr;
53 u8 mfrr;
54 u8 pending_pri;
55 u32 xisr;
56 };
57};
58
59/* One bit per ICS */
60#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
61
62struct kvmppc_icp {
63 struct kvm_vcpu *vcpu;
64 unsigned long server_num;
65 union kvmppc_icp_state state;
66 unsigned long resend_map[ICP_RESEND_MAP_SIZE];
67
68 /* Real mode might find something too hard, here's the action
69 * it might request from virtual mode
70 */
71#define XICS_RM_KICK_VCPU 0x1
72#define XICS_RM_CHECK_RESEND 0x2
73#define XICS_RM_REJECT 0x4
74 u32 rm_action;
75 struct kvm_vcpu *rm_kick_target;
76 u32 rm_reject;
77
78 /* Debug stuff for real mode */
79 union kvmppc_icp_state rm_dbgstate;
80 struct kvm_vcpu *rm_dbgtgt;
81};
82
83struct kvmppc_ics {
84 struct mutex lock;
85 u16 icsid;
86 struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
87};
88
89struct kvmppc_xics {
90 struct kvm *kvm;
91 struct kvm_device *dev;
92 struct dentry *dentry;
93 u32 max_icsid;
94 bool real_mode;
95 bool real_mode_dbg;
96 struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
97};
98
99static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
100 u32 nr)
101{
102 struct kvm_vcpu *vcpu = NULL;
103 int i;
104
105 kvm_for_each_vcpu(i, vcpu, kvm) {
106 if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
107 return vcpu->arch.icp;
108 }
109 return NULL;
110}
111
112static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
113 u32 irq, u16 *source)
114{
115 u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
116 u16 src = irq & KVMPPC_XICS_SRC_MASK;
117 struct kvmppc_ics *ics;
118
119 if (source)
120 *source = src;
121 if (icsid > KVMPPC_XICS_MAX_ICS_ID)
122 return NULL;
123 ics = xics->ics[icsid];
124 if (!ics)
125 return NULL;
126 return ics;
127}
128
129
130#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 020923e43134..1020119226db 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
222 kvmppc_booke_queue_irqprio(vcpu, prio); 222 kvmppc_booke_queue_irqprio(vcpu, prio);
223} 223}
224 224
225void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, 225void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
226 struct kvm_interrupt *irq)
227{ 226{
228 clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); 227 clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
229 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); 228 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
@@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
347 keep_irq = true; 346 keep_irq = true;
348 } 347 }
349 348
350 if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) 349 if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
351 update_epr = true; 350 update_epr = true;
352 351
353 switch (priority) { 352 switch (priority) {
@@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
428 set_guest_esr(vcpu, vcpu->arch.queued_esr); 427 set_guest_esr(vcpu, vcpu->arch.queued_esr);
429 if (update_dear == true) 428 if (update_dear == true)
430 set_guest_dear(vcpu, vcpu->arch.queued_dear); 429 set_guest_dear(vcpu, vcpu->arch.queued_dear);
431 if (update_epr == true) 430 if (update_epr == true) {
432 kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); 431 if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
432 kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
433 else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
434 BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
435 kvmppc_mpic_set_epr(vcpu);
436 }
437 }
433 438
434 new_msr &= msr_mask; 439 new_msr &= msr_mask;
435#if defined(CONFIG_64BIT) 440#if defined(CONFIG_64BIT)
@@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
746 kvmppc_core_queue_program(vcpu, ESR_PIL); 751 kvmppc_core_queue_program(vcpu, ESR_PIL);
747 return RESUME_HOST; 752 return RESUME_HOST;
748 753
754 case EMULATE_EXIT_USER:
755 return RESUME_HOST;
756
749 default: 757 default:
750 BUG(); 758 BUG();
751 } 759 }
@@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1148 return r; 1156 return r;
1149} 1157}
1150 1158
1159static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
1160{
1161 u32 old_tsr = vcpu->arch.tsr;
1162
1163 vcpu->arch.tsr = new_tsr;
1164
1165 if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
1166 arm_next_watchdog(vcpu);
1167
1168 update_timer_ints(vcpu);
1169}
1170
1151/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ 1171/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
1152int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 1172int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1153{ 1173{
@@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
1287 kvmppc_emulate_dec(vcpu); 1307 kvmppc_emulate_dec(vcpu);
1288 } 1308 }
1289 1309
1290 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { 1310 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR)
1291 u32 old_tsr = vcpu->arch.tsr; 1311 kvmppc_set_tsr(vcpu, sregs->u.e.tsr);
1292
1293 vcpu->arch.tsr = sregs->u.e.tsr;
1294
1295 if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
1296 arm_next_watchdog(vcpu);
1297
1298 update_timer_ints(vcpu);
1299 }
1300 1312
1301 return 0; 1313 return 0;
1302} 1314}
@@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1409 1421
1410int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1422int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1411{ 1423{
1412 int r = -EINVAL; 1424 int r = 0;
1425 union kvmppc_one_reg val;
1426 int size;
1427 long int i;
1428
1429 size = one_reg_size(reg->id);
1430 if (size > sizeof(val))
1431 return -EINVAL;
1413 1432
1414 switch (reg->id) { 1433 switch (reg->id) {
1415 case KVM_REG_PPC_IAC1: 1434 case KVM_REG_PPC_IAC1:
1416 case KVM_REG_PPC_IAC2: 1435 case KVM_REG_PPC_IAC2:
1417 case KVM_REG_PPC_IAC3: 1436 case KVM_REG_PPC_IAC3:
1418 case KVM_REG_PPC_IAC4: { 1437 case KVM_REG_PPC_IAC4:
1419 int iac = reg->id - KVM_REG_PPC_IAC1; 1438 i = reg->id - KVM_REG_PPC_IAC1;
1420 r = copy_to_user((u64 __user *)(long)reg->addr, 1439 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
1421 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
1422 break; 1440 break;
1423 }
1424 case KVM_REG_PPC_DAC1: 1441 case KVM_REG_PPC_DAC1:
1425 case KVM_REG_PPC_DAC2: { 1442 case KVM_REG_PPC_DAC2:
1426 int dac = reg->id - KVM_REG_PPC_DAC1; 1443 i = reg->id - KVM_REG_PPC_DAC1;
1427 r = copy_to_user((u64 __user *)(long)reg->addr, 1444 val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
1428 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
1429 break; 1445 break;
1430 }
1431 case KVM_REG_PPC_EPR: { 1446 case KVM_REG_PPC_EPR: {
1432 u32 epr = get_guest_epr(vcpu); 1447 u32 epr = get_guest_epr(vcpu);
1433 r = put_user(epr, (u32 __user *)(long)reg->addr); 1448 val = get_reg_val(reg->id, epr);
1434 break; 1449 break;
1435 } 1450 }
1436#if defined(CONFIG_64BIT) 1451#if defined(CONFIG_64BIT)
1437 case KVM_REG_PPC_EPCR: 1452 case KVM_REG_PPC_EPCR:
1438 r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); 1453 val = get_reg_val(reg->id, vcpu->arch.epcr);
1439 break; 1454 break;
1440#endif 1455#endif
1456 case KVM_REG_PPC_TCR:
1457 val = get_reg_val(reg->id, vcpu->arch.tcr);
1458 break;
1459 case KVM_REG_PPC_TSR:
1460 val = get_reg_val(reg->id, vcpu->arch.tsr);
1461 break;
1462 case KVM_REG_PPC_DEBUG_INST:
1463 val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
1464 break;
1441 default: 1465 default:
1466 r = kvmppc_get_one_reg(vcpu, reg->id, &val);
1442 break; 1467 break;
1443 } 1468 }
1469
1470 if (r)
1471 return r;
1472
1473 if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
1474 r = -EFAULT;
1475
1444 return r; 1476 return r;
1445} 1477}
1446 1478
1447int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1479int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1448{ 1480{
1449 int r = -EINVAL; 1481 int r = 0;
1482 union kvmppc_one_reg val;
1483 int size;
1484 long int i;
1485
1486 size = one_reg_size(reg->id);
1487 if (size > sizeof(val))
1488 return -EINVAL;
1489
1490 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
1491 return -EFAULT;
1450 1492
1451 switch (reg->id) { 1493 switch (reg->id) {
1452 case KVM_REG_PPC_IAC1: 1494 case KVM_REG_PPC_IAC1:
1453 case KVM_REG_PPC_IAC2: 1495 case KVM_REG_PPC_IAC2:
1454 case KVM_REG_PPC_IAC3: 1496 case KVM_REG_PPC_IAC3:
1455 case KVM_REG_PPC_IAC4: { 1497 case KVM_REG_PPC_IAC4:
1456 int iac = reg->id - KVM_REG_PPC_IAC1; 1498 i = reg->id - KVM_REG_PPC_IAC1;
1457 r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], 1499 vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
1458 (u64 __user *)(long)reg->addr, sizeof(u64));
1459 break; 1500 break;
1460 }
1461 case KVM_REG_PPC_DAC1: 1501 case KVM_REG_PPC_DAC1:
1462 case KVM_REG_PPC_DAC2: { 1502 case KVM_REG_PPC_DAC2:
1463 int dac = reg->id - KVM_REG_PPC_DAC1; 1503 i = reg->id - KVM_REG_PPC_DAC1;
1464 r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], 1504 vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
1465 (u64 __user *)(long)reg->addr, sizeof(u64));
1466 break; 1505 break;
1467 }
1468 case KVM_REG_PPC_EPR: { 1506 case KVM_REG_PPC_EPR: {
1469 u32 new_epr; 1507 u32 new_epr = set_reg_val(reg->id, val);
1470 r = get_user(new_epr, (u32 __user *)(long)reg->addr); 1508 kvmppc_set_epr(vcpu, new_epr);
1471 if (!r)
1472 kvmppc_set_epr(vcpu, new_epr);
1473 break; 1509 break;
1474 } 1510 }
1475#if defined(CONFIG_64BIT) 1511#if defined(CONFIG_64BIT)
1476 case KVM_REG_PPC_EPCR: { 1512 case KVM_REG_PPC_EPCR: {
1477 u32 new_epcr; 1513 u32 new_epcr = set_reg_val(reg->id, val);
1478 r = get_user(new_epcr, (u32 __user *)(long)reg->addr); 1514 kvmppc_set_epcr(vcpu, new_epcr);
1479 if (r == 0)
1480 kvmppc_set_epcr(vcpu, new_epcr);
1481 break; 1515 break;
1482 } 1516 }
1483#endif 1517#endif
1518 case KVM_REG_PPC_OR_TSR: {
1519 u32 tsr_bits = set_reg_val(reg->id, val);
1520 kvmppc_set_tsr_bits(vcpu, tsr_bits);
1521 break;
1522 }
1523 case KVM_REG_PPC_CLEAR_TSR: {
1524 u32 tsr_bits = set_reg_val(reg->id, val);
1525 kvmppc_clr_tsr_bits(vcpu, tsr_bits);
1526 break;
1527 }
1528 case KVM_REG_PPC_TSR: {
1529 u32 tsr = set_reg_val(reg->id, val);
1530 kvmppc_set_tsr(vcpu, tsr);
1531 break;
1532 }
1533 case KVM_REG_PPC_TCR: {
1534 u32 tcr = set_reg_val(reg->id, val);
1535 kvmppc_set_tcr(vcpu, tcr);
1536 break;
1537 }
1484 default: 1538 default:
1539 r = kvmppc_set_one_reg(vcpu, reg->id, &val);
1485 break; 1540 break;
1486 } 1541 }
1542
1487 return r; 1543 return r;
1488} 1544}
1489 1545
1546int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1547 struct kvm_guest_debug *dbg)
1548{
1549 return -EINVAL;
1550}
1551
1490int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1552int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1491{ 1553{
1492 return -ENOTSUPP; 1554 return -ENOTSUPP;
@@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1531 1593
1532void kvmppc_core_commit_memory_region(struct kvm *kvm, 1594void kvmppc_core_commit_memory_region(struct kvm *kvm,
1533 struct kvm_userspace_memory_region *mem, 1595 struct kvm_userspace_memory_region *mem,
1534 struct kvm_memory_slot old) 1596 const struct kvm_memory_slot *old)
1535{ 1597{
1536} 1598}
1537 1599
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index f4bb55c96517..2c6deb5ef2fe 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -54,8 +54,7 @@
54 (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ 54 (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
55 (1<<BOOKE_INTERRUPT_ALIGNMENT)) 55 (1<<BOOKE_INTERRUPT_ALIGNMENT))
56 56
57.macro KVM_HANDLER ivor_nr scratch srr0 57.macro __KVM_HANDLER ivor_nr scratch srr0
58_GLOBAL(kvmppc_handler_\ivor_nr)
59 /* Get pointer to vcpu and record exit number. */ 58 /* Get pointer to vcpu and record exit number. */
60 mtspr \scratch , r4 59 mtspr \scratch , r4
61 mfspr r4, SPRN_SPRG_THREAD 60 mfspr r4, SPRN_SPRG_THREAD
@@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
76 bctr 75 bctr
77.endm 76.endm
78 77
78.macro KVM_HANDLER ivor_nr scratch srr0
79_GLOBAL(kvmppc_handler_\ivor_nr)
80 __KVM_HANDLER \ivor_nr \scratch \srr0
81.endm
82
83.macro KVM_DBG_HANDLER ivor_nr scratch srr0
84_GLOBAL(kvmppc_handler_\ivor_nr)
85 mtspr \scratch, r4
86 mfspr r4, SPRN_SPRG_THREAD
87 lwz r4, THREAD_KVM_VCPU(r4)
88 stw r3, VCPU_CRIT_SAVE(r4)
89 mfcr r3
90 mfspr r4, SPRN_CSRR1
91 andi. r4, r4, MSR_PR
92 bne 1f
93 /* debug interrupt happened in enter/exit path */
94 mfspr r4, SPRN_CSRR1
95 rlwinm r4, r4, 0, ~MSR_DE
96 mtspr SPRN_CSRR1, r4
97 lis r4, 0xffff
98 ori r4, r4, 0xffff
99 mtspr SPRN_DBSR, r4
100 mfspr r4, SPRN_SPRG_THREAD
101 lwz r4, THREAD_KVM_VCPU(r4)
102 mtcr r3
103 lwz r3, VCPU_CRIT_SAVE(r4)
104 mfspr r4, \scratch
105 rfci
1061: /* debug interrupt happened in guest */
107 mtcr r3
108 mfspr r4, SPRN_SPRG_THREAD
109 lwz r4, THREAD_KVM_VCPU(r4)
110 lwz r3, VCPU_CRIT_SAVE(r4)
111 mfspr r4, \scratch
112 __KVM_HANDLER \ivor_nr \scratch \srr0
113.endm
114
79.macro KVM_HANDLER_ADDR ivor_nr 115.macro KVM_HANDLER_ADDR ivor_nr
80 .long kvmppc_handler_\ivor_nr 116 .long kvmppc_handler_\ivor_nr
81.endm 117.endm
@@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
100KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 136KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
101KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 137KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
102KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 138KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
103KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 139KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
104KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 140KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
105KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 141KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
106KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 142KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 6dd4de7802bf..ce6b73c29612 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
425 return kvmppc_set_sregs_ivor(vcpu, sregs); 425 return kvmppc_set_sregs_ivor(vcpu, sregs);
426} 426}
427 427
428int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
429 union kvmppc_one_reg *val)
430{
431 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
432 return r;
433}
434
435int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
436 union kvmppc_one_reg *val)
437{
438 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
439 return r;
440}
441
428struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 442struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
429{ 443{
430 struct kvmppc_vcpu_e500 *vcpu_e500; 444 struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 33db48a8ce24..c2e5e98453a6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -23,6 +23,10 @@
23#include <asm/mmu-book3e.h> 23#include <asm/mmu-book3e.h>
24#include <asm/tlb.h> 24#include <asm/tlb.h>
25 25
26enum vcpu_ftr {
27 VCPU_FTR_MMU_V2
28};
29
26#define E500_PID_NUM 3 30#define E500_PID_NUM 3
27#define E500_TLB_NUM 2 31#define E500_TLB_NUM 2
28 32
@@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
131void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 135void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
132int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 136int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
133 137
138int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
139 union kvmppc_one_reg *val);
140int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
141 union kvmppc_one_reg *val);
134 142
135#ifdef CONFIG_KVM_E500V2 143#ifdef CONFIG_KVM_E500V2
136unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, 144unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
295#define get_tlb_sts(gtlbe) (MAS1_TS) 303#define get_tlb_sts(gtlbe) (MAS1_TS)
296#endif /* !BOOKE_HV */ 304#endif /* !BOOKE_HV */
297 305
306static inline bool has_feature(const struct kvm_vcpu *vcpu,
307 enum vcpu_ftr ftr)
308{
309 bool has_ftr;
310 switch (ftr) {
311 case VCPU_FTR_MMU_V2:
312 has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
313 break;
314 default:
315 return false;
316 }
317 return has_ftr;
318}
319
298#endif /* KVM_E500_H */ 320#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353a836a..b10a01243abd 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
284 case SPRN_TLB1CFG: 284 case SPRN_TLB1CFG:
285 *spr_val = vcpu->arch.tlbcfg[1]; 285 *spr_val = vcpu->arch.tlbcfg[1];
286 break; 286 break;
287 case SPRN_TLB0PS:
288 if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
289 return EMULATE_FAIL;
290 *spr_val = vcpu->arch.tlbps[0];
291 break;
292 case SPRN_TLB1PS:
293 if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
294 return EMULATE_FAIL;
295 *spr_val = vcpu->arch.tlbps[1];
296 break;
287 case SPRN_L1CSR0: 297 case SPRN_L1CSR0:
288 *spr_val = vcpu_e500->l1csr0; 298 *spr_val = vcpu_e500->l1csr0;
289 break; 299 break;
@@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
307 case SPRN_MMUCFG: 317 case SPRN_MMUCFG:
308 *spr_val = vcpu->arch.mmucfg; 318 *spr_val = vcpu->arch.mmucfg;
309 break; 319 break;
320 case SPRN_EPTCFG:
321 if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
322 return EMULATE_FAIL;
323 /*
324 * Legacy Linux guests access EPTCFG register even if the E.PT
325 * category is disabled in the VM. Give them a chance to live.
326 */
327 *spr_val = vcpu->arch.eptcfg;
328 break;
310 329
311 /* extra exceptions */ 330 /* extra exceptions */
312 case SPRN_IVOR32: 331 case SPRN_IVOR32:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 5c4475983f78..c41a5a96b558 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
596 return 0; 596 return 0;
597} 597}
598 598
599int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
600 union kvmppc_one_reg *val)
601{
602 int r = 0;
603 long int i;
604
605 switch (id) {
606 case KVM_REG_PPC_MAS0:
607 *val = get_reg_val(id, vcpu->arch.shared->mas0);
608 break;
609 case KVM_REG_PPC_MAS1:
610 *val = get_reg_val(id, vcpu->arch.shared->mas1);
611 break;
612 case KVM_REG_PPC_MAS2:
613 *val = get_reg_val(id, vcpu->arch.shared->mas2);
614 break;
615 case KVM_REG_PPC_MAS7_3:
616 *val = get_reg_val(id, vcpu->arch.shared->mas7_3);
617 break;
618 case KVM_REG_PPC_MAS4:
619 *val = get_reg_val(id, vcpu->arch.shared->mas4);
620 break;
621 case KVM_REG_PPC_MAS6:
622 *val = get_reg_val(id, vcpu->arch.shared->mas6);
623 break;
624 case KVM_REG_PPC_MMUCFG:
625 *val = get_reg_val(id, vcpu->arch.mmucfg);
626 break;
627 case KVM_REG_PPC_EPTCFG:
628 *val = get_reg_val(id, vcpu->arch.eptcfg);
629 break;
630 case KVM_REG_PPC_TLB0CFG:
631 case KVM_REG_PPC_TLB1CFG:
632 case KVM_REG_PPC_TLB2CFG:
633 case KVM_REG_PPC_TLB3CFG:
634 i = id - KVM_REG_PPC_TLB0CFG;
635 *val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
636 break;
637 case KVM_REG_PPC_TLB0PS:
638 case KVM_REG_PPC_TLB1PS:
639 case KVM_REG_PPC_TLB2PS:
640 case KVM_REG_PPC_TLB3PS:
641 i = id - KVM_REG_PPC_TLB0PS;
642 *val = get_reg_val(id, vcpu->arch.tlbps[i]);
643 break;
644 default:
645 r = -EINVAL;
646 break;
647 }
648
649 return r;
650}
651
652int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
653 union kvmppc_one_reg *val)
654{
655 int r = 0;
656 long int i;
657
658 switch (id) {
659 case KVM_REG_PPC_MAS0:
660 vcpu->arch.shared->mas0 = set_reg_val(id, *val);
661 break;
662 case KVM_REG_PPC_MAS1:
663 vcpu->arch.shared->mas1 = set_reg_val(id, *val);
664 break;
665 case KVM_REG_PPC_MAS2:
666 vcpu->arch.shared->mas2 = set_reg_val(id, *val);
667 break;
668 case KVM_REG_PPC_MAS7_3:
669 vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
670 break;
671 case KVM_REG_PPC_MAS4:
672 vcpu->arch.shared->mas4 = set_reg_val(id, *val);
673 break;
674 case KVM_REG_PPC_MAS6:
675 vcpu->arch.shared->mas6 = set_reg_val(id, *val);
676 break;
677 /* Only allow MMU registers to be set to the config supported by KVM */
678 case KVM_REG_PPC_MMUCFG: {
679 u32 reg = set_reg_val(id, *val);
680 if (reg != vcpu->arch.mmucfg)
681 r = -EINVAL;
682 break;
683 }
684 case KVM_REG_PPC_EPTCFG: {
685 u32 reg = set_reg_val(id, *val);
686 if (reg != vcpu->arch.eptcfg)
687 r = -EINVAL;
688 break;
689 }
690 case KVM_REG_PPC_TLB0CFG:
691 case KVM_REG_PPC_TLB1CFG:
692 case KVM_REG_PPC_TLB2CFG:
693 case KVM_REG_PPC_TLB3CFG: {
694 /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
695 u32 reg = set_reg_val(id, *val);
696 i = id - KVM_REG_PPC_TLB0CFG;
697 if (reg != vcpu->arch.tlbcfg[i])
698 r = -EINVAL;
699 break;
700 }
701 case KVM_REG_PPC_TLB0PS:
702 case KVM_REG_PPC_TLB1PS:
703 case KVM_REG_PPC_TLB2PS:
704 case KVM_REG_PPC_TLB3PS: {
705 u32 reg = set_reg_val(id, *val);
706 i = id - KVM_REG_PPC_TLB0PS;
707 if (reg != vcpu->arch.tlbps[i])
708 r = -EINVAL;
709 break;
710 }
711 default:
712 r = -EINVAL;
713 break;
714 }
715
716 return r;
717}
718
719static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
720 struct kvm_book3e_206_tlb_params *params)
721{
722 vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
723 if (params->tlb_sizes[0] <= 2048)
724 vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
725 vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
726
727 vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
728 vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
729 vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
730 return 0;
731}
732
599int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, 733int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
600 struct kvm_config_tlb *cfg) 734 struct kvm_config_tlb *cfg)
601{ 735{
@@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
692 vcpu_e500->gtlb_offset[0] = 0; 826 vcpu_e500->gtlb_offset[0] = 0;
693 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; 827 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
694 828
695 vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; 829 /* Update vcpu's MMU geometry based on SW_TLB input */
696 830 vcpu_mmu_geometry_update(vcpu, &params);
697 vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
698 if (params.tlb_sizes[0] <= 2048)
699 vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
700 vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
701
702 vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
703 vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
704 vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
705 831
706 vcpu_e500->shared_tlb_pages = pages; 832 vcpu_e500->shared_tlb_pages = pages;
707 vcpu_e500->num_shared_tlb_pages = num_pages; 833 vcpu_e500->num_shared_tlb_pages = num_pages;
@@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
737 return 0; 863 return 0;
738} 864}
739 865
866/* Vcpu's MMU default configuration */
867static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
868 struct kvmppc_e500_tlb_params *params)
869{
870 /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
871 vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
872
873 /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
874 vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
875 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
876 vcpu->arch.tlbcfg[0] |= params[0].entries;
877 vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
878
879 vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
880 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
881 vcpu->arch.tlbcfg[1] |= params[1].entries;
882 vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
883
884 if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
885 vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
886 vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
887
888 vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
889
890 /* Guest mmu emulation currently doesn't handle E.PT */
891 vcpu->arch.eptcfg = 0;
892 vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
893 vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
894 }
895
896 return 0;
897}
898
740int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) 899int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
741{ 900{
742 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; 901 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
@@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
781 if (!vcpu_e500->g2h_tlb1_map) 940 if (!vcpu_e500->g2h_tlb1_map)
782 goto err; 941 goto err;
783 942
784 /* Init TLB configuration register */ 943 vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
785 vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
786 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
787 vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
788 vcpu->arch.tlbcfg[0] |=
789 vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
790
791 vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
792 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
793 vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
794 vcpu->arch.tlbcfg[1] |=
795 vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
796 944
797 kvmppc_recalc_tlb1map_range(vcpu_e500); 945 kvmppc_recalc_tlb1map_range(vcpu_e500);
798 return 0; 946 return 0;
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 2f4baa074b2e..753cc99eff2b 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void)
177 r = 0; 177 r = 0;
178 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) 178 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
179 r = 0; 179 r = 0;
180 else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
181 r = 0;
180 else 182 else
181 r = -ENOTSUPP; 183 r = -ENOTSUPP;
182 184
@@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
260 return kvmppc_set_sregs_ivor(vcpu, sregs); 262 return kvmppc_set_sregs_ivor(vcpu, sregs);
261} 263}
262 264
265int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
266 union kvmppc_one_reg *val)
267{
268 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
269 return r;
270}
271
272int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
273 union kvmppc_one_reg *val)
274{
275 int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
276 return r;
277}
278
263struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 279struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
264{ 280{
265 struct kvmppc_vcpu_e500 *vcpu_e500; 281 struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 7a73b6f72a8b..631a2650e4e4 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -38,6 +38,7 @@
38 38
39#define OP_31_XOP_TRAP 4 39#define OP_31_XOP_TRAP 4
40#define OP_31_XOP_LWZX 23 40#define OP_31_XOP_LWZX 23
41#define OP_31_XOP_DCBST 54
41#define OP_31_XOP_TRAP_64 68 42#define OP_31_XOP_TRAP_64 68
42#define OP_31_XOP_DCBF 86 43#define OP_31_XOP_DCBF 86
43#define OP_31_XOP_LBZX 87 44#define OP_31_XOP_LBZX 87
@@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
370 emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); 371 emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
371 break; 372 break;
372 373
374 case OP_31_XOP_DCBST:
373 case OP_31_XOP_DCBF: 375 case OP_31_XOP_DCBF:
374 case OP_31_XOP_DCBI: 376 case OP_31_XOP_DCBI:
375 /* Do nothing. The guest is performing dcbi because 377 /* Do nothing. The guest is performing dcbi because
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
new file mode 100644
index 000000000000..5a9a10b90762
--- /dev/null
+++ b/arch/powerpc/kvm/irq.h
@@ -0,0 +1,20 @@
1#ifndef __IRQ_H
2#define __IRQ_H
3
4#include <linux/kvm_host.h>
5
6static inline int irqchip_in_kernel(struct kvm *kvm)
7{
8 int ret = 0;
9
10#ifdef CONFIG_KVM_MPIC
11 ret = ret || (kvm->arch.mpic != NULL);
12#endif
13#ifdef CONFIG_KVM_XICS
14 ret = ret || (kvm->arch.xics != NULL);
15#endif
16 smp_rmb();
17 return ret;
18}
19
20#endif
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
new file mode 100644
index 000000000000..2861ae9eaae6
--- /dev/null
+++ b/arch/powerpc/kvm/mpic.c
@@ -0,0 +1,1853 @@
1/*
2 * OpenPIC emulation
3 *
4 * Copyright (c) 2004 Jocelyn Mayer
5 * 2011 Alexander Graf
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26#include <linux/slab.h>
27#include <linux/mutex.h>
28#include <linux/kvm_host.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/anon_inodes.h>
32#include <asm/uaccess.h>
33#include <asm/mpic.h>
34#include <asm/kvm_para.h>
35#include <asm/kvm_host.h>
36#include <asm/kvm_ppc.h>
37#include "iodev.h"
38
39#define MAX_CPU 32
40#define MAX_SRC 256
41#define MAX_TMR 4
42#define MAX_IPI 4
43#define MAX_MSI 8
44#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR)
45#define VID 0x03 /* MPIC version ID */
46
47/* OpenPIC capability flags */
48#define OPENPIC_FLAG_IDR_CRIT (1 << 0)
49#define OPENPIC_FLAG_ILR (2 << 0)
50
51/* OpenPIC address map */
52#define OPENPIC_REG_SIZE 0x40000
53#define OPENPIC_GLB_REG_START 0x0
54#define OPENPIC_GLB_REG_SIZE 0x10F0
55#define OPENPIC_TMR_REG_START 0x10F0
56#define OPENPIC_TMR_REG_SIZE 0x220
57#define OPENPIC_MSI_REG_START 0x1600
58#define OPENPIC_MSI_REG_SIZE 0x200
59#define OPENPIC_SUMMARY_REG_START 0x3800
60#define OPENPIC_SUMMARY_REG_SIZE 0x800
61#define OPENPIC_SRC_REG_START 0x10000
62#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20)
63#define OPENPIC_CPU_REG_START 0x20000
64#define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000))
65
66struct fsl_mpic_info {
67 int max_ext;
68};
69
70static struct fsl_mpic_info fsl_mpic_20 = {
71 .max_ext = 12,
72};
73
74static struct fsl_mpic_info fsl_mpic_42 = {
75 .max_ext = 12,
76};
77
78#define FRR_NIRQ_SHIFT 16
79#define FRR_NCPU_SHIFT 8
80#define FRR_VID_SHIFT 0
81
82#define VID_REVISION_1_2 2
83#define VID_REVISION_1_3 3
84
85#define VIR_GENERIC 0x00000000 /* Generic Vendor ID */
86
87#define GCR_RESET 0x80000000
88#define GCR_MODE_PASS 0x00000000
89#define GCR_MODE_MIXED 0x20000000
90#define GCR_MODE_PROXY 0x60000000
91
92#define TBCR_CI 0x80000000 /* count inhibit */
93#define TCCR_TOG 0x80000000 /* toggles when decrement to zero */
94
95#define IDR_EP_SHIFT 31
96#define IDR_EP_MASK (1 << IDR_EP_SHIFT)
97#define IDR_CI0_SHIFT 30
98#define IDR_CI1_SHIFT 29
99#define IDR_P1_SHIFT 1
100#define IDR_P0_SHIFT 0
101
102#define ILR_INTTGT_MASK 0x000000ff
103#define ILR_INTTGT_INT 0x00
104#define ILR_INTTGT_CINT 0x01 /* critical */
105#define ILR_INTTGT_MCP 0x02 /* machine check */
106#define NUM_OUTPUTS 3
107
108#define MSIIR_OFFSET 0x140
109#define MSIIR_SRS_SHIFT 29
110#define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT)
111#define MSIIR_IBS_SHIFT 24
112#define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT)
113
114static int get_current_cpu(void)
115{
116#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
117 struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
118 return vcpu ? vcpu->arch.irq_cpu_id : -1;
119#else
120 /* XXX */
121 return -1;
122#endif
123}
124
125static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
126 u32 val, int idx);
127static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
128 u32 *ptr, int idx);
129
130enum irq_type {
131 IRQ_TYPE_NORMAL = 0,
132 IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */
133 IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */
134};
135
136struct irq_queue {
137 /* Round up to the nearest 64 IRQs so that the queue length
138 * won't change when moving between 32 and 64 bit hosts.
139 */
140 unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
141 int next;
142 int priority;
143};
144
145struct irq_source {
146 uint32_t ivpr; /* IRQ vector/priority register */
147 uint32_t idr; /* IRQ destination register */
148 uint32_t destmask; /* bitmap of CPU destinations */
149 int last_cpu;
150 int output; /* IRQ level, e.g. ILR_INTTGT_INT */
151 int pending; /* TRUE if IRQ is pending */
152 enum irq_type type;
153 bool level:1; /* level-triggered */
154 bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */
155};
156
157#define IVPR_MASK_SHIFT 31
158#define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT)
159#define IVPR_ACTIVITY_SHIFT 30
160#define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT)
161#define IVPR_MODE_SHIFT 29
162#define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT)
163#define IVPR_POLARITY_SHIFT 23
164#define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT)
165#define IVPR_SENSE_SHIFT 22
166#define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT)
167
168#define IVPR_PRIORITY_MASK (0xF << 16)
169#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
170#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
171
172/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
173#define IDR_EP 0x80000000 /* external pin */
174#define IDR_CI 0x40000000 /* critical interrupt */
175
176struct irq_dest {
177 struct kvm_vcpu *vcpu;
178
179 int32_t ctpr; /* CPU current task priority */
180 struct irq_queue raised;
181 struct irq_queue servicing;
182
183 /* Count of IRQ sources asserting on non-INT outputs */
184 uint32_t outputs_active[NUM_OUTPUTS];
185};
186
187#define MAX_MMIO_REGIONS 10
188
189struct openpic {
190 struct kvm *kvm;
191 struct kvm_device *dev;
192 struct kvm_io_device mmio;
193 const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
194 int num_mmio_regions;
195
196 gpa_t reg_base;
197 spinlock_t lock;
198
199 /* Behavior control */
200 struct fsl_mpic_info *fsl;
201 uint32_t model;
202 uint32_t flags;
203 uint32_t nb_irqs;
204 uint32_t vid;
205 uint32_t vir; /* Vendor identification register */
206 uint32_t vector_mask;
207 uint32_t tfrr_reset;
208 uint32_t ivpr_reset;
209 uint32_t idr_reset;
210 uint32_t brr1;
211 uint32_t mpic_mode_mask;
212
213 /* Global registers */
214 uint32_t frr; /* Feature reporting register */
215 uint32_t gcr; /* Global configuration register */
216 uint32_t pir; /* Processor initialization register */
217 uint32_t spve; /* Spurious vector register */
218 uint32_t tfrr; /* Timer frequency reporting register */
219 /* Source registers */
220 struct irq_source src[MAX_IRQ];
221 /* Local registers per output pin */
222 struct irq_dest dst[MAX_CPU];
223 uint32_t nb_cpus;
224 /* Timer registers */
225 struct {
226 uint32_t tccr; /* Global timer current count register */
227 uint32_t tbcr; /* Global timer base count register */
228 } timers[MAX_TMR];
229 /* Shared MSI registers */
230 struct {
231 uint32_t msir; /* Shared Message Signaled Interrupt Register */
232 } msi[MAX_MSI];
233 uint32_t max_irq;
234 uint32_t irq_ipi0;
235 uint32_t irq_tim0;
236 uint32_t irq_msi;
237};
238
239
240static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
241 int output)
242{
243 struct kvm_interrupt irq = {
244 .irq = KVM_INTERRUPT_SET_LEVEL,
245 };
246
247 if (!dst->vcpu) {
248 pr_debug("%s: destination cpu %d does not exist\n",
249 __func__, (int)(dst - &opp->dst[0]));
250 return;
251 }
252
253 pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
254 output);
255
256 if (output != ILR_INTTGT_INT) /* TODO */
257 return;
258
259 kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
260}
261
262static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
263 int output)
264{
265 if (!dst->vcpu) {
266 pr_debug("%s: destination cpu %d does not exist\n",
267 __func__, (int)(dst - &opp->dst[0]));
268 return;
269 }
270
271 pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
272 output);
273
274 if (output != ILR_INTTGT_INT) /* TODO */
275 return;
276
277 kvmppc_core_dequeue_external(dst->vcpu);
278}
279
280static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
281{
282 set_bit(n_IRQ, q->queue);
283}
284
285static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
286{
287 clear_bit(n_IRQ, q->queue);
288}
289
290static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
291{
292 return test_bit(n_IRQ, q->queue);
293}
294
295static void IRQ_check(struct openpic *opp, struct irq_queue *q)
296{
297 int irq = -1;
298 int next = -1;
299 int priority = -1;
300
301 for (;;) {
302 irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
303 if (irq == opp->max_irq)
304 break;
305
306 pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
307 irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
308
309 if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
310 next = irq;
311 priority = IVPR_PRIORITY(opp->src[irq].ivpr);
312 }
313 }
314
315 q->next = next;
316 q->priority = priority;
317}
318
319static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
320{
321 /* XXX: optimize */
322 IRQ_check(opp, q);
323
324 return q->next;
325}
326
327static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
328 bool active, bool was_active)
329{
330 struct irq_dest *dst;
331 struct irq_source *src;
332 int priority;
333
334 dst = &opp->dst[n_CPU];
335 src = &opp->src[n_IRQ];
336
337 pr_debug("%s: IRQ %d active %d was %d\n",
338 __func__, n_IRQ, active, was_active);
339
340 if (src->output != ILR_INTTGT_INT) {
341 pr_debug("%s: output %d irq %d active %d was %d count %d\n",
342 __func__, src->output, n_IRQ, active, was_active,
343 dst->outputs_active[src->output]);
344
345 /* On Freescale MPIC, critical interrupts ignore priority,
346 * IACK, EOI, etc. Before MPIC v4.1 they also ignore
347 * masking.
348 */
349 if (active) {
350 if (!was_active &&
351 dst->outputs_active[src->output]++ == 0) {
352 pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
353 __func__, src->output, n_CPU, n_IRQ);
354 mpic_irq_raise(opp, dst, src->output);
355 }
356 } else {
357 if (was_active &&
358 --dst->outputs_active[src->output] == 0) {
359 pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
360 __func__, src->output, n_CPU, n_IRQ);
361 mpic_irq_lower(opp, dst, src->output);
362 }
363 }
364
365 return;
366 }
367
368 priority = IVPR_PRIORITY(src->ivpr);
369
370 /* Even if the interrupt doesn't have enough priority,
371 * it is still raised, in case ctpr is lowered later.
372 */
373 if (active)
374 IRQ_setbit(&dst->raised, n_IRQ);
375 else
376 IRQ_resetbit(&dst->raised, n_IRQ);
377
378 IRQ_check(opp, &dst->raised);
379
380 if (active && priority <= dst->ctpr) {
381 pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
382 __func__, n_IRQ, priority, dst->ctpr, n_CPU);
383 active = 0;
384 }
385
386 if (active) {
387 if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
388 priority <= dst->servicing.priority) {
389 pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
390 __func__, n_IRQ, dst->servicing.next, n_CPU);
391 } else {
392 pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
393 __func__, n_CPU, n_IRQ, dst->raised.next);
394 mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
395 }
396 } else {
397 IRQ_get_next(opp, &dst->servicing);
398 if (dst->raised.priority > dst->ctpr &&
399 dst->raised.priority > dst->servicing.priority) {
400 pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
401 __func__, n_IRQ, dst->raised.next,
402 dst->raised.priority, dst->ctpr,
403 dst->servicing.priority, n_CPU);
404 /* IRQ line stays asserted */
405 } else {
406 pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
407 __func__, n_IRQ, dst->ctpr,
408 dst->servicing.priority, n_CPU);
409 mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
410 }
411 }
412}
413
414/* update pic state because registers for n_IRQ have changed value */
415static void openpic_update_irq(struct openpic *opp, int n_IRQ)
416{
417 struct irq_source *src;
418 bool active, was_active;
419 int i;
420
421 src = &opp->src[n_IRQ];
422 active = src->pending;
423
424 if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
425 /* Interrupt source is disabled */
426 pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
427 active = false;
428 }
429
430 was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
431
432 /*
433 * We don't have a similar check for already-active because
434 * ctpr may have changed and we need to withdraw the interrupt.
435 */
436 if (!active && !was_active) {
437 pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
438 return;
439 }
440
441 if (active)
442 src->ivpr |= IVPR_ACTIVITY_MASK;
443 else
444 src->ivpr &= ~IVPR_ACTIVITY_MASK;
445
446 if (src->destmask == 0) {
447 /* No target */
448 pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
449 return;
450 }
451
452 if (src->destmask == (1 << src->last_cpu)) {
453 /* Only one CPU is allowed to receive this IRQ */
454 IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
455 } else if (!(src->ivpr & IVPR_MODE_MASK)) {
456 /* Directed delivery mode */
457 for (i = 0; i < opp->nb_cpus; i++) {
458 if (src->destmask & (1 << i)) {
459 IRQ_local_pipe(opp, i, n_IRQ, active,
460 was_active);
461 }
462 }
463 } else {
464 /* Distributed delivery mode */
465 for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
466 if (i == opp->nb_cpus)
467 i = 0;
468
469 if (src->destmask & (1 << i)) {
470 IRQ_local_pipe(opp, i, n_IRQ, active,
471 was_active);
472 src->last_cpu = i;
473 break;
474 }
475 }
476 }
477}
478
479static void openpic_set_irq(void *opaque, int n_IRQ, int level)
480{
481 struct openpic *opp = opaque;
482 struct irq_source *src;
483
484 if (n_IRQ >= MAX_IRQ) {
485 WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
486 return;
487 }
488
489 src = &opp->src[n_IRQ];
490 pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
491 n_IRQ, level, src->ivpr);
492 if (src->level) {
493 /* level-sensitive irq */
494 src->pending = level;
495 openpic_update_irq(opp, n_IRQ);
496 } else {
497 /* edge-sensitive irq */
498 if (level) {
499 src->pending = 1;
500 openpic_update_irq(opp, n_IRQ);
501 }
502
503 if (src->output != ILR_INTTGT_INT) {
504 /* Edge-triggered interrupts shouldn't be used
505 * with non-INT delivery, but just in case,
506 * try to make it do something sane rather than
507 * cause an interrupt storm. This is close to
508 * what you'd probably see happen in real hardware.
509 */
510 src->pending = 0;
511 openpic_update_irq(opp, n_IRQ);
512 }
513 }
514}
515
516static void openpic_reset(struct openpic *opp)
517{
518 int i;
519
520 opp->gcr = GCR_RESET;
521 /* Initialise controller registers */
522 opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
523 (opp->vid << FRR_VID_SHIFT);
524
525 opp->pir = 0;
526 opp->spve = -1 & opp->vector_mask;
527 opp->tfrr = opp->tfrr_reset;
528 /* Initialise IRQ sources */
529 for (i = 0; i < opp->max_irq; i++) {
530 opp->src[i].ivpr = opp->ivpr_reset;
531 opp->src[i].idr = opp->idr_reset;
532
533 switch (opp->src[i].type) {
534 case IRQ_TYPE_NORMAL:
535 opp->src[i].level =
536 !!(opp->ivpr_reset & IVPR_SENSE_MASK);
537 break;
538
539 case IRQ_TYPE_FSLINT:
540 opp->src[i].ivpr |= IVPR_POLARITY_MASK;
541 break;
542
543 case IRQ_TYPE_FSLSPECIAL:
544 break;
545 }
546 }
547 /* Initialise IRQ destinations */
548 for (i = 0; i < MAX_CPU; i++) {
549 opp->dst[i].ctpr = 15;
550 memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
551 opp->dst[i].raised.next = -1;
552 memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
553 opp->dst[i].servicing.next = -1;
554 }
555 /* Initialise timers */
556 for (i = 0; i < MAX_TMR; i++) {
557 opp->timers[i].tccr = 0;
558 opp->timers[i].tbcr = TBCR_CI;
559 }
560 /* Go out of RESET state */
561 opp->gcr = 0;
562}
563
564static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
565{
566 return opp->src[n_IRQ].idr;
567}
568
569static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
570{
571 if (opp->flags & OPENPIC_FLAG_ILR)
572 return opp->src[n_IRQ].output;
573
574 return 0xffffffff;
575}
576
577static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
578{
579 return opp->src[n_IRQ].ivpr;
580}
581
582static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
583 uint32_t val)
584{
585 struct irq_source *src = &opp->src[n_IRQ];
586 uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
587 uint32_t crit_mask = 0;
588 uint32_t mask = normal_mask;
589 int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
590 int i;
591
592 if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
593 crit_mask = mask << crit_shift;
594 mask |= crit_mask | IDR_EP;
595 }
596
597 src->idr = val & mask;
598 pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
599
600 if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
601 if (src->idr & crit_mask) {
602 if (src->idr & normal_mask) {
603 pr_debug("%s: IRQ configured for multiple output types, using critical\n",
604 __func__);
605 }
606
607 src->output = ILR_INTTGT_CINT;
608 src->nomask = true;
609 src->destmask = 0;
610
611 for (i = 0; i < opp->nb_cpus; i++) {
612 int n_ci = IDR_CI0_SHIFT - i;
613
614 if (src->idr & (1UL << n_ci))
615 src->destmask |= 1UL << i;
616 }
617 } else {
618 src->output = ILR_INTTGT_INT;
619 src->nomask = false;
620 src->destmask = src->idr & normal_mask;
621 }
622 } else {
623 src->destmask = src->idr;
624 }
625}
626
627static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
628 uint32_t val)
629{
630 if (opp->flags & OPENPIC_FLAG_ILR) {
631 struct irq_source *src = &opp->src[n_IRQ];
632
633 src->output = val & ILR_INTTGT_MASK;
634 pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
635 src->output);
636
637 /* TODO: on MPIC v4.0 only, set nomask for non-INT */
638 }
639}
640
641static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
642 uint32_t val)
643{
644 uint32_t mask;
645
646 /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
647 * the polarity bit is read-only on internal interrupts.
648 */
649 mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
650 IVPR_POLARITY_MASK | opp->vector_mask;
651
652 /* ACTIVITY bit is read-only */
653 opp->src[n_IRQ].ivpr =
654 (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
655
656 /* For FSL internal interrupts, The sense bit is reserved and zero,
657 * and the interrupt is always level-triggered. Timers and IPIs
658 * have no sense or polarity bits, and are edge-triggered.
659 */
660 switch (opp->src[n_IRQ].type) {
661 case IRQ_TYPE_NORMAL:
662 opp->src[n_IRQ].level =
663 !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
664 break;
665
666 case IRQ_TYPE_FSLINT:
667 opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
668 break;
669
670 case IRQ_TYPE_FSLSPECIAL:
671 opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
672 break;
673 }
674
675 openpic_update_irq(opp, n_IRQ);
676 pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
677 opp->src[n_IRQ].ivpr);
678}
679
680static void openpic_gcr_write(struct openpic *opp, uint64_t val)
681{
682 if (val & GCR_RESET) {
683 openpic_reset(opp);
684 return;
685 }
686
687 opp->gcr &= ~opp->mpic_mode_mask;
688 opp->gcr |= val & opp->mpic_mode_mask;
689}
690
691static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
692{
693 struct openpic *opp = opaque;
694 int err = 0;
695
696 pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
697 if (addr & 0xF)
698 return 0;
699
700 switch (addr) {
701 case 0x00: /* Block Revision Register1 (BRR1) is Readonly */
702 break;
703 case 0x40:
704 case 0x50:
705 case 0x60:
706 case 0x70:
707 case 0x80:
708 case 0x90:
709 case 0xA0:
710 case 0xB0:
711 err = openpic_cpu_write_internal(opp, addr, val,
712 get_current_cpu());
713 break;
714 case 0x1000: /* FRR */
715 break;
716 case 0x1020: /* GCR */
717 openpic_gcr_write(opp, val);
718 break;
719 case 0x1080: /* VIR */
720 break;
721 case 0x1090: /* PIR */
722 /*
723 * This register is used to reset a CPU core --
724 * let userspace handle it.
725 */
726 err = -ENXIO;
727 break;
728 case 0x10A0: /* IPI_IVPR */
729 case 0x10B0:
730 case 0x10C0:
731 case 0x10D0: {
732 int idx;
733 idx = (addr - 0x10A0) >> 4;
734 write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
735 break;
736 }
737 case 0x10E0: /* SPVE */
738 opp->spve = val & opp->vector_mask;
739 break;
740 default:
741 break;
742 }
743
744 return err;
745}
746
747static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
748{
749 struct openpic *opp = opaque;
750 u32 retval;
751 int err = 0;
752
753 pr_debug("%s: addr %#llx\n", __func__, addr);
754 retval = 0xFFFFFFFF;
755 if (addr & 0xF)
756 goto out;
757
758 switch (addr) {
759 case 0x1000: /* FRR */
760 retval = opp->frr;
761 retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
762 break;
763 case 0x1020: /* GCR */
764 retval = opp->gcr;
765 break;
766 case 0x1080: /* VIR */
767 retval = opp->vir;
768 break;
769 case 0x1090: /* PIR */
770 retval = 0x00000000;
771 break;
772 case 0x00: /* Block Revision Register1 (BRR1) */
773 retval = opp->brr1;
774 break;
775 case 0x40:
776 case 0x50:
777 case 0x60:
778 case 0x70:
779 case 0x80:
780 case 0x90:
781 case 0xA0:
782 case 0xB0:
783 err = openpic_cpu_read_internal(opp, addr,
784 &retval, get_current_cpu());
785 break;
786 case 0x10A0: /* IPI_IVPR */
787 case 0x10B0:
788 case 0x10C0:
789 case 0x10D0:
790 {
791 int idx;
792 idx = (addr - 0x10A0) >> 4;
793 retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
794 }
795 break;
796 case 0x10E0: /* SPVE */
797 retval = opp->spve;
798 break;
799 default:
800 break;
801 }
802
803out:
804 pr_debug("%s: => 0x%08x\n", __func__, retval);
805 *ptr = retval;
806 return err;
807}
808
809static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
810{
811 struct openpic *opp = opaque;
812 int idx;
813
814 addr += 0x10f0;
815
816 pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
817 if (addr & 0xF)
818 return 0;
819
820 if (addr == 0x10f0) {
821 /* TFRR */
822 opp->tfrr = val;
823 return 0;
824 }
825
826 idx = (addr >> 6) & 0x3;
827 addr = addr & 0x30;
828
829 switch (addr & 0x30) {
830 case 0x00: /* TCCR */
831 break;
832 case 0x10: /* TBCR */
833 if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
834 (val & TBCR_CI) == 0 &&
835 (opp->timers[idx].tbcr & TBCR_CI) != 0)
836 opp->timers[idx].tccr &= ~TCCR_TOG;
837
838 opp->timers[idx].tbcr = val;
839 break;
840 case 0x20: /* TVPR */
841 write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
842 break;
843 case 0x30: /* TDR */
844 write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
845 break;
846 }
847
848 return 0;
849}
850
851static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
852{
853 struct openpic *opp = opaque;
854 uint32_t retval = -1;
855 int idx;
856
857 pr_debug("%s: addr %#llx\n", __func__, addr);
858 if (addr & 0xF)
859 goto out;
860
861 idx = (addr >> 6) & 0x3;
862 if (addr == 0x0) {
863 /* TFRR */
864 retval = opp->tfrr;
865 goto out;
866 }
867
868 switch (addr & 0x30) {
869 case 0x00: /* TCCR */
870 retval = opp->timers[idx].tccr;
871 break;
872 case 0x10: /* TBCR */
873 retval = opp->timers[idx].tbcr;
874 break;
875 case 0x20: /* TIPV */
876 retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
877 break;
878 case 0x30: /* TIDE (TIDR) */
879 retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
880 break;
881 }
882
883out:
884 pr_debug("%s: => 0x%08x\n", __func__, retval);
885 *ptr = retval;
886 return 0;
887}
888
889static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
890{
891 struct openpic *opp = opaque;
892 int idx;
893
894 pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
895
896 addr = addr & 0xffff;
897 idx = addr >> 5;
898
899 switch (addr & 0x1f) {
900 case 0x00:
901 write_IRQreg_ivpr(opp, idx, val);
902 break;
903 case 0x10:
904 write_IRQreg_idr(opp, idx, val);
905 break;
906 case 0x18:
907 write_IRQreg_ilr(opp, idx, val);
908 break;
909 }
910
911 return 0;
912}
913
914static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
915{
916 struct openpic *opp = opaque;
917 uint32_t retval;
918 int idx;
919
920 pr_debug("%s: addr %#llx\n", __func__, addr);
921 retval = 0xFFFFFFFF;
922
923 addr = addr & 0xffff;
924 idx = addr >> 5;
925
926 switch (addr & 0x1f) {
927 case 0x00:
928 retval = read_IRQreg_ivpr(opp, idx);
929 break;
930 case 0x10:
931 retval = read_IRQreg_idr(opp, idx);
932 break;
933 case 0x18:
934 retval = read_IRQreg_ilr(opp, idx);
935 break;
936 }
937
938 pr_debug("%s: => 0x%08x\n", __func__, retval);
939 *ptr = retval;
940 return 0;
941}
942
943static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
944{
945 struct openpic *opp = opaque;
946 int idx = opp->irq_msi;
947 int srs, ibs;
948
949 pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
950 if (addr & 0xF)
951 return 0;
952
953 switch (addr) {
954 case MSIIR_OFFSET:
955 srs = val >> MSIIR_SRS_SHIFT;
956 idx += srs;
957 ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
958 opp->msi[srs].msir |= 1 << ibs;
959 openpic_set_irq(opp, idx, 1);
960 break;
961 default:
962 /* most registers are read-only, thus ignored */
963 break;
964 }
965
966 return 0;
967}
968
969static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
970{
971 struct openpic *opp = opaque;
972 uint32_t r = 0;
973 int i, srs;
974
975 pr_debug("%s: addr %#llx\n", __func__, addr);
976 if (addr & 0xF)
977 return -ENXIO;
978
979 srs = addr >> 4;
980
981 switch (addr) {
982 case 0x00:
983 case 0x10:
984 case 0x20:
985 case 0x30:
986 case 0x40:
987 case 0x50:
988 case 0x60:
989 case 0x70: /* MSIRs */
990 r = opp->msi[srs].msir;
991 /* Clear on read */
992 opp->msi[srs].msir = 0;
993 openpic_set_irq(opp, opp->irq_msi + srs, 0);
994 break;
995 case 0x120: /* MSISR */
996 for (i = 0; i < MAX_MSI; i++)
997 r |= (opp->msi[i].msir ? 1 : 0) << i;
998 break;
999 }
1000
1001 pr_debug("%s: => 0x%08x\n", __func__, r);
1002 *ptr = r;
1003 return 0;
1004}
1005
1006static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
1007{
1008 uint32_t r = 0;
1009
1010 pr_debug("%s: addr %#llx\n", __func__, addr);
1011
1012 /* TODO: EISR/EIMR */
1013
1014 *ptr = r;
1015 return 0;
1016}
1017
1018static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
1019{
1020 pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
1021
1022 /* TODO: EISR/EIMR */
1023 return 0;
1024}
1025
1026static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
1027 u32 val, int idx)
1028{
1029 struct openpic *opp = opaque;
1030 struct irq_source *src;
1031 struct irq_dest *dst;
1032 int s_IRQ, n_IRQ;
1033
1034 pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
1035 addr, val);
1036
1037 if (idx < 0)
1038 return 0;
1039
1040 if (addr & 0xF)
1041 return 0;
1042
1043 dst = &opp->dst[idx];
1044 addr &= 0xFF0;
1045 switch (addr) {
1046 case 0x40: /* IPIDR */
1047 case 0x50:
1048 case 0x60:
1049 case 0x70:
1050 idx = (addr - 0x40) >> 4;
1051 /* we use IDE as mask which CPUs to deliver the IPI to still. */
1052 opp->src[opp->irq_ipi0 + idx].destmask |= val;
1053 openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
1054 openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
1055 break;
1056 case 0x80: /* CTPR */
1057 dst->ctpr = val & 0x0000000F;
1058
1059 pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
1060 __func__, idx, dst->ctpr, dst->raised.priority,
1061 dst->servicing.priority);
1062
1063 if (dst->raised.priority <= dst->ctpr) {
1064 pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
1065 __func__, idx);
1066 mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1067 } else if (dst->raised.priority > dst->servicing.priority) {
1068 pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
1069 __func__, idx, dst->raised.next);
1070 mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1071 }
1072
1073 break;
1074 case 0x90: /* WHOAMI */
1075 /* Read-only register */
1076 break;
1077 case 0xA0: /* IACK */
1078 /* Read-only register */
1079 break;
1080 case 0xB0: { /* EOI */
1081 int notify_eoi;
1082
1083 pr_debug("EOI\n");
1084 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1085
1086 if (s_IRQ < 0) {
1087 pr_debug("%s: EOI with no interrupt in service\n",
1088 __func__);
1089 break;
1090 }
1091
1092 IRQ_resetbit(&dst->servicing, s_IRQ);
1093 /* Notify listeners that the IRQ is over */
1094 notify_eoi = s_IRQ;
1095 /* Set up next servicing IRQ */
1096 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1097 /* Check queued interrupts. */
1098 n_IRQ = IRQ_get_next(opp, &dst->raised);
1099 src = &opp->src[n_IRQ];
1100 if (n_IRQ != -1 &&
1101 (s_IRQ == -1 ||
1102 IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
1103 pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
1104 idx, n_IRQ);
1105 mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1106 }
1107
1108 spin_unlock(&opp->lock);
1109 kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
1110 spin_lock(&opp->lock);
1111
1112 break;
1113 }
1114 default:
1115 break;
1116 }
1117
1118 return 0;
1119}
1120
1121static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
1122{
1123 struct openpic *opp = opaque;
1124
1125 return openpic_cpu_write_internal(opp, addr, val,
1126 (addr & 0x1f000) >> 12);
1127}
1128
1129static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
1130 int cpu)
1131{
1132 struct irq_source *src;
1133 int retval, irq;
1134
1135 pr_debug("Lower OpenPIC INT output\n");
1136 mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1137
1138 irq = IRQ_get_next(opp, &dst->raised);
1139 pr_debug("IACK: irq=%d\n", irq);
1140
1141 if (irq == -1)
1142 /* No more interrupt pending */
1143 return opp->spve;
1144
1145 src = &opp->src[irq];
1146 if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
1147 !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
1148 pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
1149 __func__, irq, dst->ctpr, src->ivpr);
1150 openpic_update_irq(opp, irq);
1151 retval = opp->spve;
1152 } else {
1153 /* IRQ enter servicing state */
1154 IRQ_setbit(&dst->servicing, irq);
1155 retval = IVPR_VECTOR(opp, src->ivpr);
1156 }
1157
1158 if (!src->level) {
1159 /* edge-sensitive IRQ */
1160 src->ivpr &= ~IVPR_ACTIVITY_MASK;
1161 src->pending = 0;
1162 IRQ_resetbit(&dst->raised, irq);
1163 }
1164
1165 if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
1166 src->destmask &= ~(1 << cpu);
1167 if (src->destmask && !src->level) {
1168 /* trigger on CPUs that didn't know about it yet */
1169 openpic_set_irq(opp, irq, 1);
1170 openpic_set_irq(opp, irq, 0);
1171 /* if all CPUs knew about it, set active bit again */
1172 src->ivpr |= IVPR_ACTIVITY_MASK;
1173 }
1174 }
1175
1176 return retval;
1177}
1178
1179void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
1180{
1181 struct openpic *opp = vcpu->arch.mpic;
1182 int cpu = vcpu->arch.irq_cpu_id;
1183 unsigned long flags;
1184
1185 spin_lock_irqsave(&opp->lock, flags);
1186
1187 if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
1188 kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
1189
1190 spin_unlock_irqrestore(&opp->lock, flags);
1191}
1192
1193static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
1194 u32 *ptr, int idx)
1195{
1196 struct openpic *opp = opaque;
1197 struct irq_dest *dst;
1198 uint32_t retval;
1199
1200 pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
1201 retval = 0xFFFFFFFF;
1202
1203 if (idx < 0)
1204 goto out;
1205
1206 if (addr & 0xF)
1207 goto out;
1208
1209 dst = &opp->dst[idx];
1210 addr &= 0xFF0;
1211 switch (addr) {
1212 case 0x80: /* CTPR */
1213 retval = dst->ctpr;
1214 break;
1215 case 0x90: /* WHOAMI */
1216 retval = idx;
1217 break;
1218 case 0xA0: /* IACK */
1219 retval = openpic_iack(opp, dst, idx);
1220 break;
1221 case 0xB0: /* EOI */
1222 retval = 0;
1223 break;
1224 default:
1225 break;
1226 }
1227 pr_debug("%s: => 0x%08x\n", __func__, retval);
1228
1229out:
1230 *ptr = retval;
1231 return 0;
1232}
1233
1234static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
1235{
1236 struct openpic *opp = opaque;
1237
1238 return openpic_cpu_read_internal(opp, addr, ptr,
1239 (addr & 0x1f000) >> 12);
1240}
1241
1242struct mem_reg {
1243 int (*read)(void *opaque, gpa_t addr, u32 *ptr);
1244 int (*write)(void *opaque, gpa_t addr, u32 val);
1245 gpa_t start_addr;
1246 int size;
1247};
1248
1249static const struct mem_reg openpic_gbl_mmio = {
1250 .write = openpic_gbl_write,
1251 .read = openpic_gbl_read,
1252 .start_addr = OPENPIC_GLB_REG_START,
1253 .size = OPENPIC_GLB_REG_SIZE,
1254};
1255
1256static const struct mem_reg openpic_tmr_mmio = {
1257 .write = openpic_tmr_write,
1258 .read = openpic_tmr_read,
1259 .start_addr = OPENPIC_TMR_REG_START,
1260 .size = OPENPIC_TMR_REG_SIZE,
1261};
1262
1263static const struct mem_reg openpic_cpu_mmio = {
1264 .write = openpic_cpu_write,
1265 .read = openpic_cpu_read,
1266 .start_addr = OPENPIC_CPU_REG_START,
1267 .size = OPENPIC_CPU_REG_SIZE,
1268};
1269
1270static const struct mem_reg openpic_src_mmio = {
1271 .write = openpic_src_write,
1272 .read = openpic_src_read,
1273 .start_addr = OPENPIC_SRC_REG_START,
1274 .size = OPENPIC_SRC_REG_SIZE,
1275};
1276
1277static const struct mem_reg openpic_msi_mmio = {
1278 .read = openpic_msi_read,
1279 .write = openpic_msi_write,
1280 .start_addr = OPENPIC_MSI_REG_START,
1281 .size = OPENPIC_MSI_REG_SIZE,
1282};
1283
1284static const struct mem_reg openpic_summary_mmio = {
1285 .read = openpic_summary_read,
1286 .write = openpic_summary_write,
1287 .start_addr = OPENPIC_SUMMARY_REG_START,
1288 .size = OPENPIC_SUMMARY_REG_SIZE,
1289};
1290
1291static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
1292{
1293 if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
1294 WARN(1, "kvm mpic: too many mmio regions\n");
1295 return;
1296 }
1297
1298 opp->mmio_regions[opp->num_mmio_regions++] = mr;
1299}
1300
1301static void fsl_common_init(struct openpic *opp)
1302{
1303 int i;
1304 int virq = MAX_SRC;
1305
1306 add_mmio_region(opp, &openpic_msi_mmio);
1307 add_mmio_region(opp, &openpic_summary_mmio);
1308
1309 opp->vid = VID_REVISION_1_2;
1310 opp->vir = VIR_GENERIC;
1311 opp->vector_mask = 0xFFFF;
1312 opp->tfrr_reset = 0;
1313 opp->ivpr_reset = IVPR_MASK_MASK;
1314 opp->idr_reset = 1 << 0;
1315 opp->max_irq = MAX_IRQ;
1316
1317 opp->irq_ipi0 = virq;
1318 virq += MAX_IPI;
1319 opp->irq_tim0 = virq;
1320 virq += MAX_TMR;
1321
1322 BUG_ON(virq > MAX_IRQ);
1323
1324 opp->irq_msi = 224;
1325
1326 for (i = 0; i < opp->fsl->max_ext; i++)
1327 opp->src[i].level = false;
1328
1329 /* Internal interrupts, including message and MSI */
1330 for (i = 16; i < MAX_SRC; i++) {
1331 opp->src[i].type = IRQ_TYPE_FSLINT;
1332 opp->src[i].level = true;
1333 }
1334
1335 /* timers and IPIs */
1336 for (i = MAX_SRC; i < virq; i++) {
1337 opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
1338 opp->src[i].level = false;
1339 }
1340}
1341
1342static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
1343{
1344 int i;
1345
1346 for (i = 0; i < opp->num_mmio_regions; i++) {
1347 const struct mem_reg *mr = opp->mmio_regions[i];
1348
1349 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1350 continue;
1351
1352 return mr->read(opp, addr - mr->start_addr, ptr);
1353 }
1354
1355 return -ENXIO;
1356}
1357
1358static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
1359{
1360 int i;
1361
1362 for (i = 0; i < opp->num_mmio_regions; i++) {
1363 const struct mem_reg *mr = opp->mmio_regions[i];
1364
1365 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1366 continue;
1367
1368 return mr->write(opp, addr - mr->start_addr, val);
1369 }
1370
1371 return -ENXIO;
1372}
1373
1374static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
1375 int len, void *ptr)
1376{
1377 struct openpic *opp = container_of(this, struct openpic, mmio);
1378 int ret;
1379 union {
1380 u32 val;
1381 u8 bytes[4];
1382 } u;
1383
1384 if (addr & (len - 1)) {
1385 pr_debug("%s: bad alignment %llx/%d\n",
1386 __func__, addr, len);
1387 return -EINVAL;
1388 }
1389
1390 spin_lock_irq(&opp->lock);
1391 ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
1392 spin_unlock_irq(&opp->lock);
1393
1394 /*
1395 * Technically only 32-bit accesses are allowed, but be nice to
1396 * people dumping registers a byte at a time -- it works in real
1397 * hardware (reads only, not writes).
1398 */
1399 if (len == 4) {
1400 *(u32 *)ptr = u.val;
1401 pr_debug("%s: addr %llx ret %d len 4 val %x\n",
1402 __func__, addr, ret, u.val);
1403 } else if (len == 1) {
1404 *(u8 *)ptr = u.bytes[addr & 3];
1405 pr_debug("%s: addr %llx ret %d len 1 val %x\n",
1406 __func__, addr, ret, u.bytes[addr & 3]);
1407 } else {
1408 pr_debug("%s: bad length %d\n", __func__, len);
1409 return -EINVAL;
1410 }
1411
1412 return ret;
1413}
1414
1415static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
1416 int len, const void *ptr)
1417{
1418 struct openpic *opp = container_of(this, struct openpic, mmio);
1419 int ret;
1420
1421 if (len != 4) {
1422 pr_debug("%s: bad length %d\n", __func__, len);
1423 return -EOPNOTSUPP;
1424 }
1425 if (addr & 3) {
1426 pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
1427 return -EOPNOTSUPP;
1428 }
1429
1430 spin_lock_irq(&opp->lock);
1431 ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
1432 *(const u32 *)ptr);
1433 spin_unlock_irq(&opp->lock);
1434
1435 pr_debug("%s: addr %llx ret %d val %x\n",
1436 __func__, addr, ret, *(const u32 *)ptr);
1437
1438 return ret;
1439}
1440
1441static const struct kvm_io_device_ops mpic_mmio_ops = {
1442 .read = kvm_mpic_read,
1443 .write = kvm_mpic_write,
1444};
1445
1446static void map_mmio(struct openpic *opp)
1447{
1448 kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
1449
1450 kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
1451 opp->reg_base, OPENPIC_REG_SIZE,
1452 &opp->mmio);
1453}
1454
1455static void unmap_mmio(struct openpic *opp)
1456{
1457 kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
1458}
1459
1460static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
1461{
1462 u64 base;
1463
1464 if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
1465 return -EFAULT;
1466
1467 if (base & 0x3ffff) {
1468 pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
1469 __func__, base);
1470 return -EINVAL;
1471 }
1472
1473 if (base == opp->reg_base)
1474 return 0;
1475
1476 mutex_lock(&opp->kvm->slots_lock);
1477
1478 unmap_mmio(opp);
1479 opp->reg_base = base;
1480
1481 pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
1482 __func__, base);
1483
1484 if (base == 0)
1485 goto out;
1486
1487 map_mmio(opp);
1488
1489out:
1490 mutex_unlock(&opp->kvm->slots_lock);
1491 return 0;
1492}
1493
1494#define ATTR_SET 0
1495#define ATTR_GET 1
1496
1497static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
1498{
1499 int ret;
1500
1501 if (addr & 3)
1502 return -ENXIO;
1503
1504 spin_lock_irq(&opp->lock);
1505
1506 if (type == ATTR_SET)
1507 ret = kvm_mpic_write_internal(opp, addr, *val);
1508 else
1509 ret = kvm_mpic_read_internal(opp, addr, val);
1510
1511 spin_unlock_irq(&opp->lock);
1512
1513 pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
1514
1515 return ret;
1516}
1517
1518static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1519{
1520 struct openpic *opp = dev->private;
1521 u32 attr32;
1522
1523 switch (attr->group) {
1524 case KVM_DEV_MPIC_GRP_MISC:
1525 switch (attr->attr) {
1526 case KVM_DEV_MPIC_BASE_ADDR:
1527 return set_base_addr(opp, attr);
1528 }
1529
1530 break;
1531
1532 case KVM_DEV_MPIC_GRP_REGISTER:
1533 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1534 return -EFAULT;
1535
1536 return access_reg(opp, attr->attr, &attr32, ATTR_SET);
1537
1538 case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1539 if (attr->attr > MAX_SRC)
1540 return -EINVAL;
1541
1542 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1543 return -EFAULT;
1544
1545 if (attr32 != 0 && attr32 != 1)
1546 return -EINVAL;
1547
1548 spin_lock_irq(&opp->lock);
1549 openpic_set_irq(opp, attr->attr, attr32);
1550 spin_unlock_irq(&opp->lock);
1551 return 0;
1552 }
1553
1554 return -ENXIO;
1555}
1556
1557static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1558{
1559 struct openpic *opp = dev->private;
1560 u64 attr64;
1561 u32 attr32;
1562 int ret;
1563
1564 switch (attr->group) {
1565 case KVM_DEV_MPIC_GRP_MISC:
1566 switch (attr->attr) {
1567 case KVM_DEV_MPIC_BASE_ADDR:
1568 mutex_lock(&opp->kvm->slots_lock);
1569 attr64 = opp->reg_base;
1570 mutex_unlock(&opp->kvm->slots_lock);
1571
1572 if (copy_to_user((u64 __user *)(long)attr->addr,
1573 &attr64, sizeof(u64)))
1574 return -EFAULT;
1575
1576 return 0;
1577 }
1578
1579 break;
1580
1581 case KVM_DEV_MPIC_GRP_REGISTER:
1582 ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
1583 if (ret)
1584 return ret;
1585
1586 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1587 return -EFAULT;
1588
1589 return 0;
1590
1591 case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1592 if (attr->attr > MAX_SRC)
1593 return -EINVAL;
1594
1595 spin_lock_irq(&opp->lock);
1596 attr32 = opp->src[attr->attr].pending;
1597 spin_unlock_irq(&opp->lock);
1598
1599 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1600 return -EFAULT;
1601
1602 return 0;
1603 }
1604
1605 return -ENXIO;
1606}
1607
1608static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1609{
1610 switch (attr->group) {
1611 case KVM_DEV_MPIC_GRP_MISC:
1612 switch (attr->attr) {
1613 case KVM_DEV_MPIC_BASE_ADDR:
1614 return 0;
1615 }
1616
1617 break;
1618
1619 case KVM_DEV_MPIC_GRP_REGISTER:
1620 return 0;
1621
1622 case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1623 if (attr->attr > MAX_SRC)
1624 break;
1625
1626 return 0;
1627 }
1628
1629 return -ENXIO;
1630}
1631
1632static void mpic_destroy(struct kvm_device *dev)
1633{
1634 struct openpic *opp = dev->private;
1635
1636 dev->kvm->arch.mpic = NULL;
1637 kfree(opp);
1638}
1639
1640static int mpic_set_default_irq_routing(struct openpic *opp)
1641{
1642 struct kvm_irq_routing_entry *routing;
1643
1644 /* Create a nop default map, so that dereferencing it still works */
1645 routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
1646 if (!routing)
1647 return -ENOMEM;
1648
1649 kvm_set_irq_routing(opp->kvm, routing, 0, 0);
1650
1651 kfree(routing);
1652 return 0;
1653}
1654
1655static int mpic_create(struct kvm_device *dev, u32 type)
1656{
1657 struct openpic *opp;
1658 int ret;
1659
1660 /* We only support one MPIC at a time for now */
1661 if (dev->kvm->arch.mpic)
1662 return -EINVAL;
1663
1664 opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
1665 if (!opp)
1666 return -ENOMEM;
1667
1668 dev->private = opp;
1669 opp->kvm = dev->kvm;
1670 opp->dev = dev;
1671 opp->model = type;
1672 spin_lock_init(&opp->lock);
1673
1674 add_mmio_region(opp, &openpic_gbl_mmio);
1675 add_mmio_region(opp, &openpic_tmr_mmio);
1676 add_mmio_region(opp, &openpic_src_mmio);
1677 add_mmio_region(opp, &openpic_cpu_mmio);
1678
1679 switch (opp->model) {
1680 case KVM_DEV_TYPE_FSL_MPIC_20:
1681 opp->fsl = &fsl_mpic_20;
1682 opp->brr1 = 0x00400200;
1683 opp->flags |= OPENPIC_FLAG_IDR_CRIT;
1684 opp->nb_irqs = 80;
1685 opp->mpic_mode_mask = GCR_MODE_MIXED;
1686
1687 fsl_common_init(opp);
1688
1689 break;
1690
1691 case KVM_DEV_TYPE_FSL_MPIC_42:
1692 opp->fsl = &fsl_mpic_42;
1693 opp->brr1 = 0x00400402;
1694 opp->flags |= OPENPIC_FLAG_ILR;
1695 opp->nb_irqs = 196;
1696 opp->mpic_mode_mask = GCR_MODE_PROXY;
1697
1698 fsl_common_init(opp);
1699
1700 break;
1701
1702 default:
1703 ret = -ENODEV;
1704 goto err;
1705 }
1706
1707 ret = mpic_set_default_irq_routing(opp);
1708 if (ret)
1709 goto err;
1710
1711 openpic_reset(opp);
1712
1713 smp_wmb();
1714 dev->kvm->arch.mpic = opp;
1715
1716 return 0;
1717
1718err:
1719 kfree(opp);
1720 return ret;
1721}
1722
1723struct kvm_device_ops kvm_mpic_ops = {
1724 .name = "kvm-mpic",
1725 .create = mpic_create,
1726 .destroy = mpic_destroy,
1727 .set_attr = mpic_set_attr,
1728 .get_attr = mpic_get_attr,
1729 .has_attr = mpic_has_attr,
1730};
1731
1732int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
1733 u32 cpu)
1734{
1735 struct openpic *opp = dev->private;
1736 int ret = 0;
1737
1738 if (dev->ops != &kvm_mpic_ops)
1739 return -EPERM;
1740 if (opp->kvm != vcpu->kvm)
1741 return -EPERM;
1742 if (cpu < 0 || cpu >= MAX_CPU)
1743 return -EPERM;
1744
1745 spin_lock_irq(&opp->lock);
1746
1747 if (opp->dst[cpu].vcpu) {
1748 ret = -EEXIST;
1749 goto out;
1750 }
1751 if (vcpu->arch.irq_type) {
1752 ret = -EBUSY;
1753 goto out;
1754 }
1755
1756 opp->dst[cpu].vcpu = vcpu;
1757 opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
1758
1759 vcpu->arch.mpic = opp;
1760 vcpu->arch.irq_cpu_id = cpu;
1761 vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
1762
1763 /* This might need to be changed if GCR gets extended */
1764 if (opp->mpic_mode_mask == GCR_MODE_PROXY)
1765 vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
1766
1767out:
1768 spin_unlock_irq(&opp->lock);
1769 return ret;
1770}
1771
1772/*
1773 * This should only happen immediately before the mpic is destroyed,
1774 * so we shouldn't need to worry about anything still trying to
1775 * access the vcpu pointer.
1776 */
1777void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
1778{
1779 BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
1780
1781 opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
1782}
1783
1784/*
1785 * Return value:
1786 * < 0 Interrupt was ignored (masked or not delivered for other reasons)
1787 * = 0 Interrupt was coalesced (previous irq is still pending)
1788 * > 0 Number of CPUs interrupt was delivered to
1789 */
1790static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
1791 struct kvm *kvm, int irq_source_id, int level,
1792 bool line_status)
1793{
1794 u32 irq = e->irqchip.pin;
1795 struct openpic *opp = kvm->arch.mpic;
1796 unsigned long flags;
1797
1798 spin_lock_irqsave(&opp->lock, flags);
1799 openpic_set_irq(opp, irq, level);
1800 spin_unlock_irqrestore(&opp->lock, flags);
1801
1802 /* All code paths we care about don't check for the return value */
1803 return 0;
1804}
1805
1806int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
1807 struct kvm *kvm, int irq_source_id, int level, bool line_status)
1808{
1809 struct openpic *opp = kvm->arch.mpic;
1810 unsigned long flags;
1811
1812 spin_lock_irqsave(&opp->lock, flags);
1813
1814 /*
1815 * XXX We ignore the target address for now, as we only support
1816 * a single MSI bank.
1817 */
1818 openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
1819 spin_unlock_irqrestore(&opp->lock, flags);
1820
1821 /* All code paths we care about don't check for the return value */
1822 return 0;
1823}
1824
1825int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
1826 struct kvm_kernel_irq_routing_entry *e,
1827 const struct kvm_irq_routing_entry *ue)
1828{
1829 int r = -EINVAL;
1830
1831 switch (ue->type) {
1832 case KVM_IRQ_ROUTING_IRQCHIP:
1833 e->set = mpic_set_irq;
1834 e->irqchip.irqchip = ue->u.irqchip.irqchip;
1835 e->irqchip.pin = ue->u.irqchip.pin;
1836 if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
1837 goto out;
1838 rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
1839 break;
1840 case KVM_IRQ_ROUTING_MSI:
1841 e->set = kvm_set_msi;
1842 e->msi.address_lo = ue->u.msi.address_lo;
1843 e->msi.address_hi = ue->u.msi.address_hi;
1844 e->msi.data = ue->u.msi.data;
1845 break;
1846 default:
1847 goto out;
1848 }
1849
1850 r = 0;
1851out:
1852 return r;
1853}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 934413cd3a1b..6316ee336e88 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -25,6 +25,7 @@
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/file.h>
28#include <asm/cputable.h> 29#include <asm/cputable.h>
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
30#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
@@ -32,6 +33,7 @@
32#include <asm/cputhreads.h> 33#include <asm/cputhreads.h>
33#include <asm/irqflags.h> 34#include <asm/irqflags.h>
34#include "timing.h" 35#include "timing.h"
36#include "irq.h"
35#include "../mm/mmu_decl.h" 37#include "../mm/mmu_decl.h"
36 38
37#define CREATE_TRACE_POINTS 39#define CREATE_TRACE_POINTS
@@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext)
317 case KVM_CAP_ENABLE_CAP: 319 case KVM_CAP_ENABLE_CAP:
318 case KVM_CAP_ONE_REG: 320 case KVM_CAP_ONE_REG:
319 case KVM_CAP_IOEVENTFD: 321 case KVM_CAP_IOEVENTFD:
322 case KVM_CAP_DEVICE_CTRL:
320 r = 1; 323 r = 1;
321 break; 324 break;
322#ifndef CONFIG_KVM_BOOK3S_64_HV 325#ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -326,6 +329,9 @@ int kvm_dev_ioctl_check_extension(long ext)
326#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) 329#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
327 case KVM_CAP_SW_TLB: 330 case KVM_CAP_SW_TLB:
328#endif 331#endif
332#ifdef CONFIG_KVM_MPIC
333 case KVM_CAP_IRQ_MPIC:
334#endif
329 r = 1; 335 r = 1;
330 break; 336 break;
331 case KVM_CAP_COALESCED_MMIO: 337 case KVM_CAP_COALESCED_MMIO:
@@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext)
335#ifdef CONFIG_PPC_BOOK3S_64 341#ifdef CONFIG_PPC_BOOK3S_64
336 case KVM_CAP_SPAPR_TCE: 342 case KVM_CAP_SPAPR_TCE:
337 case KVM_CAP_PPC_ALLOC_HTAB: 343 case KVM_CAP_PPC_ALLOC_HTAB:
344 case KVM_CAP_PPC_RTAS:
345#ifdef CONFIG_KVM_XICS
346 case KVM_CAP_IRQ_XICS:
347#endif
338 r = 1; 348 r = 1;
339 break; 349 break;
340#endif /* CONFIG_PPC_BOOK3S_64 */ 350#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -411,18 +421,17 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
411} 421}
412 422
413int kvm_arch_prepare_memory_region(struct kvm *kvm, 423int kvm_arch_prepare_memory_region(struct kvm *kvm,
414 struct kvm_memory_slot *memslot, 424 struct kvm_memory_slot *memslot,
415 struct kvm_memory_slot old, 425 struct kvm_userspace_memory_region *mem,
416 struct kvm_userspace_memory_region *mem, 426 enum kvm_mr_change change)
417 bool user_alloc)
418{ 427{
419 return kvmppc_core_prepare_memory_region(kvm, memslot, mem); 428 return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
420} 429}
421 430
422void kvm_arch_commit_memory_region(struct kvm *kvm, 431void kvm_arch_commit_memory_region(struct kvm *kvm,
423 struct kvm_userspace_memory_region *mem, 432 struct kvm_userspace_memory_region *mem,
424 struct kvm_memory_slot old, 433 const struct kvm_memory_slot *old,
425 bool user_alloc) 434 enum kvm_mr_change change)
426{ 435{
427 kvmppc_core_commit_memory_region(kvm, mem, old); 436 kvmppc_core_commit_memory_region(kvm, mem, old);
428} 437}
@@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
460 tasklet_kill(&vcpu->arch.tasklet); 469 tasklet_kill(&vcpu->arch.tasklet);
461 470
462 kvmppc_remove_vcpu_debugfs(vcpu); 471 kvmppc_remove_vcpu_debugfs(vcpu);
472
473 switch (vcpu->arch.irq_type) {
474 case KVMPPC_IRQ_MPIC:
475 kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
476 break;
477 case KVMPPC_IRQ_XICS:
478 kvmppc_xics_free_icp(vcpu);
479 break;
480 }
481
463 kvmppc_core_vcpu_free(vcpu); 482 kvmppc_core_vcpu_free(vcpu);
464} 483}
465 484
@@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
532#endif 551#endif
533} 552}
534 553
535int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
536 struct kvm_guest_debug *dbg)
537{
538 return -EINVAL;
539}
540
541static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 554static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
542 struct kvm_run *run) 555 struct kvm_run *run)
543{ 556{
@@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
612int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 625int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
613 unsigned int rt, unsigned int bytes, int is_bigendian) 626 unsigned int rt, unsigned int bytes, int is_bigendian)
614{ 627{
628 int idx, ret;
629
615 if (bytes > sizeof(run->mmio.data)) { 630 if (bytes > sizeof(run->mmio.data)) {
616 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, 631 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
617 run->mmio.len); 632 run->mmio.len);
@@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
627 vcpu->mmio_is_write = 0; 642 vcpu->mmio_is_write = 0;
628 vcpu->arch.mmio_sign_extend = 0; 643 vcpu->arch.mmio_sign_extend = 0;
629 644
630 if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, 645 idx = srcu_read_lock(&vcpu->kvm->srcu);
631 bytes, &run->mmio.data)) { 646
647 ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
648 bytes, &run->mmio.data);
649
650 srcu_read_unlock(&vcpu->kvm->srcu, idx);
651
652 if (!ret) {
632 kvmppc_complete_mmio_load(vcpu, run); 653 kvmppc_complete_mmio_load(vcpu, run);
633 vcpu->mmio_needed = 0; 654 vcpu->mmio_needed = 0;
634 return EMULATE_DONE; 655 return EMULATE_DONE;
@@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
653 u64 val, unsigned int bytes, int is_bigendian) 674 u64 val, unsigned int bytes, int is_bigendian)
654{ 675{
655 void *data = run->mmio.data; 676 void *data = run->mmio.data;
677 int idx, ret;
656 678
657 if (bytes > sizeof(run->mmio.data)) { 679 if (bytes > sizeof(run->mmio.data)) {
658 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, 680 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
682 } 704 }
683 } 705 }
684 706
685 if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, 707 idx = srcu_read_lock(&vcpu->kvm->srcu);
686 bytes, &run->mmio.data)) { 708
687 kvmppc_complete_mmio_load(vcpu, run); 709 ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
710 bytes, &run->mmio.data);
711
712 srcu_read_unlock(&vcpu->kvm->srcu, idx);
713
714 if (!ret) {
688 vcpu->mmio_needed = 0; 715 vcpu->mmio_needed = 0;
689 return EMULATE_DONE; 716 return EMULATE_DONE;
690 } 717 }
@@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
740int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 767int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
741{ 768{
742 if (irq->irq == KVM_INTERRUPT_UNSET) { 769 if (irq->irq == KVM_INTERRUPT_UNSET) {
743 kvmppc_core_dequeue_external(vcpu, irq); 770 kvmppc_core_dequeue_external(vcpu);
744 return 0; 771 return 0;
745 } 772 }
746 773
@@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
770 break; 797 break;
771 case KVM_CAP_PPC_EPR: 798 case KVM_CAP_PPC_EPR:
772 r = 0; 799 r = 0;
773 vcpu->arch.epr_enabled = cap->args[0]; 800 if (cap->args[0])
801 vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
802 else
803 vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
774 break; 804 break;
775#ifdef CONFIG_BOOKE 805#ifdef CONFIG_BOOKE
776 case KVM_CAP_PPC_BOOKE_WATCHDOG: 806 case KVM_CAP_PPC_BOOKE_WATCHDOG:
@@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
791 break; 821 break;
792 } 822 }
793#endif 823#endif
824#ifdef CONFIG_KVM_MPIC
825 case KVM_CAP_IRQ_MPIC: {
826 struct file *filp;
827 struct kvm_device *dev;
828
829 r = -EBADF;
830 filp = fget(cap->args[0]);
831 if (!filp)
832 break;
833
834 r = -EPERM;
835 dev = kvm_device_from_filp(filp);
836 if (dev)
837 r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
838
839 fput(filp);
840 break;
841 }
842#endif
843#ifdef CONFIG_KVM_XICS
844 case KVM_CAP_IRQ_XICS: {
845 struct file *filp;
846 struct kvm_device *dev;
847
848 r = -EBADF;
849 filp = fget(cap->args[0]);
850 if (!filp)
851 break;
852
853 r = -EPERM;
854 dev = kvm_device_from_filp(filp);
855 if (dev)
856 r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
857
858 fput(filp);
859 break;
860 }
861#endif /* CONFIG_KVM_XICS */
794 default: 862 default:
795 r = -EINVAL; 863 r = -EINVAL;
796 break; 864 break;
@@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
913 return 0; 981 return 0;
914} 982}
915 983
984int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
985 bool line_status)
986{
987 if (!irqchip_in_kernel(kvm))
988 return -ENXIO;
989
990 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
991 irq_event->irq, irq_event->level,
992 line_status);
993 return 0;
994}
995
916long kvm_arch_vm_ioctl(struct file *filp, 996long kvm_arch_vm_ioctl(struct file *filp,
917 unsigned int ioctl, unsigned long arg) 997 unsigned int ioctl, unsigned long arg)
918{ 998{
999 struct kvm *kvm __maybe_unused = filp->private_data;
919 void __user *argp = (void __user *)arg; 1000 void __user *argp = (void __user *)arg;
920 long r; 1001 long r;
921 1002
@@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
934#ifdef CONFIG_PPC_BOOK3S_64 1015#ifdef CONFIG_PPC_BOOK3S_64
935 case KVM_CREATE_SPAPR_TCE: { 1016 case KVM_CREATE_SPAPR_TCE: {
936 struct kvm_create_spapr_tce create_tce; 1017 struct kvm_create_spapr_tce create_tce;
937 struct kvm *kvm = filp->private_data;
938 1018
939 r = -EFAULT; 1019 r = -EFAULT;
940 if (copy_from_user(&create_tce, argp, sizeof(create_tce))) 1020 if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
@@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
946 1026
947#ifdef CONFIG_KVM_BOOK3S_64_HV 1027#ifdef CONFIG_KVM_BOOK3S_64_HV
948 case KVM_ALLOCATE_RMA: { 1028 case KVM_ALLOCATE_RMA: {
949 struct kvm *kvm = filp->private_data;
950 struct kvm_allocate_rma rma; 1029 struct kvm_allocate_rma rma;
1030 struct kvm *kvm = filp->private_data;
951 1031
952 r = kvm_vm_ioctl_allocate_rma(kvm, &rma); 1032 r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
953 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) 1033 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
956 } 1036 }
957 1037
958 case KVM_PPC_ALLOCATE_HTAB: { 1038 case KVM_PPC_ALLOCATE_HTAB: {
959 struct kvm *kvm = filp->private_data;
960 u32 htab_order; 1039 u32 htab_order;
961 1040
962 r = -EFAULT; 1041 r = -EFAULT;
@@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
973 } 1052 }
974 1053
975 case KVM_PPC_GET_HTAB_FD: { 1054 case KVM_PPC_GET_HTAB_FD: {
976 struct kvm *kvm = filp->private_data;
977 struct kvm_get_htab_fd ghf; 1055 struct kvm_get_htab_fd ghf;
978 1056
979 r = -EFAULT; 1057 r = -EFAULT;
@@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
986 1064
987#ifdef CONFIG_PPC_BOOK3S_64 1065#ifdef CONFIG_PPC_BOOK3S_64
988 case KVM_PPC_GET_SMMU_INFO: { 1066 case KVM_PPC_GET_SMMU_INFO: {
989 struct kvm *kvm = filp->private_data;
990 struct kvm_ppc_smmu_info info; 1067 struct kvm_ppc_smmu_info info;
991 1068
992 memset(&info, 0, sizeof(info)); 1069 memset(&info, 0, sizeof(info));
@@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
995 r = -EFAULT; 1072 r = -EFAULT;
996 break; 1073 break;
997 } 1074 }
1075 case KVM_PPC_RTAS_DEFINE_TOKEN: {
1076 struct kvm *kvm = filp->private_data;
1077
1078 r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
1079 break;
1080 }
998#endif /* CONFIG_PPC_BOOK3S_64 */ 1081#endif /* CONFIG_PPC_BOOK3S_64 */
999 default: 1082 default:
1000 r = -ENOTTY; 1083 r = -ENOTTY;