summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 19:26:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 19:26:26 -0500
commit933425fb0010bd02bd459b41e63082756818ffce (patch)
tree1cbc6c2035b9dcff8cb265c9ac562cbee7c6bb82 /virt
parenta3e7531535a0c6e5acbaa5436f37933bb471aa95 (diff)
parenta3eaa8649e4c6a6afdafaa04b9114fb230617bb1 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "First batch of KVM changes for 4.4. s390: A bunch of fixes and optimizations for interrupt and time handling. PPC: Mostly bug fixes. ARM: No big features, but many small fixes and prerequisites including: - a number of fixes for the arch-timer - introducing proper level-triggered semantics for the arch-timers - a series of patches to synchronously halt a guest (prerequisite for IRQ forwarding) - some tracepoint improvements - a tweak for the EL2 panic handlers - some more VGIC cleanups getting rid of redundant state x86: Quite a few changes: - support for VT-d posted interrupts (i.e. PCI devices can inject interrupts directly into vCPUs). This introduces a new component (in virt/lib/) that connects VFIO and KVM together. The same infrastructure will be used for ARM interrupt forwarding as well. - more Hyper-V features, though the main one Hyper-V synthetic interrupt controller will have to wait for 4.5. These will let KVM expose Hyper-V devices. - nested virtualization now supports VPID (same as PCID but for vCPUs) which makes it quite a bit faster - for future hardware that supports NVDIMM, there is support for clflushopt, clwb, pcommit - support for "split irqchip", i.e. LAPIC in kernel + IOAPIC/PIC/PIT in userspace, which reduces the attack surface of the hypervisor - obligatory smattering of SMM fixes - on the guest side, stable scheduler clock support was rewritten to not require help from the hypervisor" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits) KVM: VMX: Fix commit which broke PML KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0() KVM: x86: allow RSM from 64-bit mode KVM: VMX: fix SMEP and SMAP without EPT KVM: x86: move kvm_set_irq_inatomic to legacy device assignment KVM: device assignment: remove pointless #ifdefs KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic KVM: x86: zero apic_arb_prio on reset drivers/hv: share Hyper-V SynIC constants with userspace KVM: x86: handle SMBASE as physical address in RSM KVM: x86: add read_phys to x86_emulate_ops KVM: x86: removing unused variable KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr() KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings KVM: arm/arm64: Optimize away redundant LR tracking KVM: s390: use simple switch statement as multiplexer KVM: s390: drop useless newline in debugging data KVM: s390: SCA must not cross page boundaries KVM: arm: Do not indent the arguments of DECLARE_BITMAP ...
Diffstat (limited to 'virt')
-rw-r--r--virt/Makefile1
-rw-r--r--virt/kvm/Kconfig5
-rw-r--r--virt/kvm/arm/arch_timer.c173
-rw-r--r--virt/kvm/arm/trace.h63
-rw-r--r--virt/kvm/arm/vgic-v2.c6
-rw-r--r--virt/kvm/arm/vgic-v3.c6
-rw-r--r--virt/kvm/arm/vgic.c308
-rw-r--r--virt/kvm/async_pf.c4
-rw-r--r--virt/kvm/eventfd.c190
-rw-r--r--virt/kvm/irqchip.c18
-rw-r--r--virt/kvm/kvm_main.c11
-rw-r--r--virt/lib/Kconfig2
-rw-r--r--virt/lib/Makefile1
-rw-r--r--virt/lib/irqbypass.c257
14 files changed, 681 insertions, 364 deletions
diff --git a/virt/Makefile b/virt/Makefile
new file mode 100644
index 000000000000..be783472ac81
--- /dev/null
+++ b/virt/Makefile
@@ -0,0 +1 @@
obj-y += lib/
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index e2c876d5a03b..7a79b6853583 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -46,4 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
46 46
47config KVM_COMPAT 47config KVM_COMPAT
48 def_bool y 48 def_bool y
49 depends on COMPAT && !S390 49 depends on KVM && COMPAT && !S390
50
51config HAVE_KVM_IRQ_BYPASS
52 bool
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index b9d3a32cbc04..21a0ab2d8919 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -28,6 +28,8 @@
28#include <kvm/arm_vgic.h> 28#include <kvm/arm_vgic.h>
29#include <kvm/arm_arch_timer.h> 29#include <kvm/arm_arch_timer.h>
30 30
31#include "trace.h"
32
31static struct timecounter *timecounter; 33static struct timecounter *timecounter;
32static struct workqueue_struct *wqueue; 34static struct workqueue_struct *wqueue;
33static unsigned int host_vtimer_irq; 35static unsigned int host_vtimer_irq;
@@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer)
59 } 61 }
60} 62}
61 63
62static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
63{
64 int ret;
65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
66
67 kvm_vgic_set_phys_irq_active(timer->map, true);
68 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
69 timer->map,
70 timer->irq->level);
71 WARN_ON(ret);
72}
73
74static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 64static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
75{ 65{
76 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 66 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
111 return HRTIMER_NORESTART; 101 return HRTIMER_NORESTART;
112} 102}
113 103
104static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
105{
106 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
107
108 return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
109 (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
110}
111
114bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) 112bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
115{ 113{
116 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 114 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
117 cycle_t cval, now; 115 cycle_t cval, now;
118 116
119 if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || 117 if (!kvm_timer_irq_can_fire(vcpu))
120 !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
121 kvm_vgic_get_phys_irq_active(timer->map))
122 return false; 118 return false;
123 119
124 cval = timer->cntv_cval; 120 cval = timer->cntv_cval;
@@ -127,12 +123,94 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
127 return cval <= now; 123 return cval <= now;
128} 124}
129 125
126static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
127{
128 int ret;
129 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
130
131 BUG_ON(!vgic_initialized(vcpu->kvm));
132
133 timer->irq.level = new_level;
134 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
135 timer->irq.level);
136 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
137 timer->map,
138 timer->irq.level);
139 WARN_ON(ret);
140}
141
142/*
143 * Check if there was a change in the timer state (should we raise or lower
144 * the line level to the GIC).
145 */
146static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
147{
148 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
149
150 /*
151 * If userspace modified the timer registers via SET_ONE_REG before
152 * the vgic was initialized, we mustn't set the timer->irq.level value
153 * because the guest would never see the interrupt. Instead wait
154 * until we call this function from kvm_timer_flush_hwstate.
155 */
156 if (!vgic_initialized(vcpu->kvm))
157 return;
158
159 if (kvm_timer_should_fire(vcpu) != timer->irq.level)
160 kvm_timer_update_irq(vcpu, !timer->irq.level);
161}
162
163/*
164 * Schedule the background timer before calling kvm_vcpu_block, so that this
165 * thread is removed from its waitqueue and made runnable when there's a timer
166 * interrupt to handle.
167 */
168void kvm_timer_schedule(struct kvm_vcpu *vcpu)
169{
170 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
171 u64 ns;
172 cycle_t cval, now;
173
174 BUG_ON(timer_is_armed(timer));
175
176 /*
177 * No need to schedule a background timer if the guest timer has
178 * already expired, because kvm_vcpu_block will return before putting
179 * the thread to sleep.
180 */
181 if (kvm_timer_should_fire(vcpu))
182 return;
183
184 /*
185 * If the timer is not capable of raising interrupts (disabled or
186 * masked), then there's no more work for us to do.
187 */
188 if (!kvm_timer_irq_can_fire(vcpu))
189 return;
190
191 /* The timer has not yet expired, schedule a background timer */
192 cval = timer->cntv_cval;
193 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
194
195 ns = cyclecounter_cyc2ns(timecounter->cc,
196 cval - now,
197 timecounter->mask,
198 &timecounter->frac);
199 timer_arm(timer, ns);
200}
201
202void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
203{
204 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
205 timer_disarm(timer);
206}
207
130/** 208/**
131 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu 209 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
132 * @vcpu: The vcpu pointer 210 * @vcpu: The vcpu pointer
133 * 211 *
134 * Disarm any pending soft timers, since the world-switch code will write the 212 * Check if the virtual timer has expired while we were running in the host,
135 * virtual timer state back to the physical CPU. 213 * and inject an interrupt if that was the case.
136 */ 214 */
137void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) 215void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
138{ 216{
@@ -140,28 +218,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
140 bool phys_active; 218 bool phys_active;
141 int ret; 219 int ret;
142 220
143 /* 221 kvm_timer_update_state(vcpu);
144 * We're about to run this vcpu again, so there is no need to
145 * keep the background timer running, as we're about to
146 * populate the CPU timer again.
147 */
148 timer_disarm(timer);
149 222
150 /* 223 /*
151 * If the timer expired while we were not scheduled, now is the time 224 * If we enter the guest with the virtual input level to the VGIC
152 * to inject it. 225 * asserted, then we have already told the VGIC what we need to, and
226 * we don't need to exit from the guest until the guest deactivates
227 * the already injected interrupt, so therefore we should set the
228 * hardware active state to prevent unnecessary exits from the guest.
229 *
230 * Conversely, if the virtual input level is deasserted, then always
231 * clear the hardware active state to ensure that hardware interrupts
232 * from the timer triggers a guest exit.
153 */ 233 */
154 if (kvm_timer_should_fire(vcpu)) 234 if (timer->irq.level)
155 kvm_timer_inject_irq(vcpu);
156
157 /*
158 * We keep track of whether the edge-triggered interrupt has been
159 * signalled to the vgic/guest, and if so, we mask the interrupt and
160 * the physical distributor to prevent the timer from raising a
161 * physical interrupt whenever we run a guest, preventing forward
162 * VCPU progress.
163 */
164 if (kvm_vgic_get_phys_irq_active(timer->map))
165 phys_active = true; 235 phys_active = true;
166 else 236 else
167 phys_active = false; 237 phys_active = false;
@@ -176,32 +246,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
176 * kvm_timer_sync_hwstate - sync timer state from cpu 246 * kvm_timer_sync_hwstate - sync timer state from cpu
177 * @vcpu: The vcpu pointer 247 * @vcpu: The vcpu pointer
178 * 248 *
179 * Check if the virtual timer was armed and either schedule a corresponding 249 * Check if the virtual timer has expired while we were running in the guest,
180 * soft timer or inject directly if already expired. 250 * and inject an interrupt if that was the case.
181 */ 251 */
182void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 252void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
183{ 253{
184 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 254 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
185 cycle_t cval, now;
186 u64 ns;
187 255
188 BUG_ON(timer_is_armed(timer)); 256 BUG_ON(timer_is_armed(timer));
189 257
190 if (kvm_timer_should_fire(vcpu)) { 258 /*
191 /* 259 * The guest could have modified the timer registers or the timer
192 * Timer has already expired while we were not 260 * could have expired, update the timer state.
193 * looking. Inject the interrupt and carry on. 261 */
194 */ 262 kvm_timer_update_state(vcpu);
195 kvm_timer_inject_irq(vcpu);
196 return;
197 }
198
199 cval = timer->cntv_cval;
200 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
201
202 ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
203 &timecounter->frac);
204 timer_arm(timer, ns);
205} 263}
206 264
207int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 265int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
@@ -216,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
216 * kvm_vcpu_set_target(). To handle this, we determine 274 * kvm_vcpu_set_target(). To handle this, we determine
217 * vcpu timer irq number when the vcpu is reset. 275 * vcpu timer irq number when the vcpu is reset.
218 */ 276 */
219 timer->irq = irq; 277 timer->irq.irq = irq->irq;
220 278
221 /* 279 /*
222 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 280 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -225,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
225 * the ARMv7 architecture. 283 * the ARMv7 architecture.
226 */ 284 */
227 timer->cntv_ctl = 0; 285 timer->cntv_ctl = 0;
286 kvm_timer_update_state(vcpu);
228 287
229 /* 288 /*
230 * Tell the VGIC that the virtual interrupt is tied to a 289 * Tell the VGIC that the virtual interrupt is tied to a
@@ -269,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
269 default: 328 default:
270 return -1; 329 return -1;
271 } 330 }
331
332 kvm_timer_update_state(vcpu);
272 return 0; 333 return 0;
273} 334}
274 335
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
new file mode 100644
index 000000000000..37d8b98867d5
--- /dev/null
+++ b/virt/kvm/arm/trace.h
@@ -0,0 +1,63 @@
1#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_KVM_H
3
4#include <linux/tracepoint.h>
5
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm
8
9/*
10 * Tracepoints for vgic
11 */
12TRACE_EVENT(vgic_update_irq_pending,
13 TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
14 TP_ARGS(vcpu_id, irq, level),
15
16 TP_STRUCT__entry(
17 __field( unsigned long, vcpu_id )
18 __field( __u32, irq )
19 __field( bool, level )
20 ),
21
22 TP_fast_assign(
23 __entry->vcpu_id = vcpu_id;
24 __entry->irq = irq;
25 __entry->level = level;
26 ),
27
28 TP_printk("VCPU: %ld, IRQ %d, level: %d",
29 __entry->vcpu_id, __entry->irq, __entry->level)
30);
31
32/*
33 * Tracepoints for arch_timer
34 */
35TRACE_EVENT(kvm_timer_update_irq,
36 TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
37 TP_ARGS(vcpu_id, irq, level),
38
39 TP_STRUCT__entry(
40 __field( unsigned long, vcpu_id )
41 __field( __u32, irq )
42 __field( int, level )
43 ),
44
45 TP_fast_assign(
46 __entry->vcpu_id = vcpu_id;
47 __entry->irq = irq;
48 __entry->level = level;
49 ),
50
51 TP_printk("VCPU: %ld, IRQ %d, level %d",
52 __entry->vcpu_id, __entry->irq, __entry->level)
53);
54
55#endif /* _TRACE_KVM_H */
56
57#undef TRACE_INCLUDE_PATH
58#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
59#undef TRACE_INCLUDE_FILE
60#define TRACE_INCLUDE_FILE trace
61
62/* This part must be outside protection */
63#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 8d7b04db8471..ff02f08df74d 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
79 lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); 79 lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
80 80
81 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; 81 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
82}
83 82
84static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
85 struct vgic_lr lr_desc)
86{
87 if (!(lr_desc.state & LR_STATE_MASK)) 83 if (!(lr_desc.state & LR_STATE_MASK))
88 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); 84 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
89 else 85 else
@@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
158 * anyway. 154 * anyway.
159 */ 155 */
160 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; 156 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
157 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
161 158
162 /* Get the show on the road... */ 159 /* Get the show on the road... */
163 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; 160 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
@@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
166static const struct vgic_ops vgic_v2_ops = { 163static const struct vgic_ops vgic_v2_ops = {
167 .get_lr = vgic_v2_get_lr, 164 .get_lr = vgic_v2_get_lr,
168 .set_lr = vgic_v2_set_lr, 165 .set_lr = vgic_v2_set_lr,
169 .sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
170 .get_elrsr = vgic_v2_get_elrsr, 166 .get_elrsr = vgic_v2_get_elrsr,
171 .get_eisr = vgic_v2_get_eisr, 167 .get_eisr = vgic_v2_get_eisr,
172 .clear_eisr = vgic_v2_clear_eisr, 168 .clear_eisr = vgic_v2_clear_eisr,
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 7dd5d62f10a1..487d6357b7e7 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
112 } 112 }
113 113
114 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; 114 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
115}
116 115
117static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
118 struct vgic_lr lr_desc)
119{
120 if (!(lr_desc.state & LR_STATE_MASK)) 116 if (!(lr_desc.state & LR_STATE_MASK))
121 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); 117 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
122 else 118 else
@@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
193 * anyway. 189 * anyway.
194 */ 190 */
195 vgic_v3->vgic_vmcr = 0; 191 vgic_v3->vgic_vmcr = 0;
192 vgic_v3->vgic_elrsr = ~0;
196 193
197 /* 194 /*
198 * If we are emulating a GICv3, we do it in an non-GICv2-compatible 195 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
@@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
211static const struct vgic_ops vgic_v3_ops = { 208static const struct vgic_ops vgic_v3_ops = {
212 .get_lr = vgic_v3_get_lr, 209 .get_lr = vgic_v3_get_lr,
213 .set_lr = vgic_v3_set_lr, 210 .set_lr = vgic_v3_set_lr,
214 .sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
215 .get_elrsr = vgic_v3_get_elrsr, 211 .get_elrsr = vgic_v3_get_elrsr,
216 .get_eisr = vgic_v3_get_eisr, 212 .get_eisr = vgic_v3_get_eisr,
217 .clear_eisr = vgic_v3_clear_eisr, 213 .clear_eisr = vgic_v3_clear_eisr,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 30489181922d..533538385d5d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -34,6 +34,9 @@
34#include <asm/kvm.h> 34#include <asm/kvm.h>
35#include <kvm/iodev.h> 35#include <kvm/iodev.h>
36 36
37#define CREATE_TRACE_POINTS
38#include "trace.h"
39
37/* 40/*
38 * How the whole thing works (courtesy of Christoffer Dall): 41 * How the whole thing works (courtesy of Christoffer Dall):
39 * 42 *
@@ -102,11 +105,13 @@
102#include "vgic.h" 105#include "vgic.h"
103 106
104static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); 107static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
105static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 108static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
106static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 109static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
107static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); 110static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
111static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
108static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, 112static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
109 int virt_irq); 113 int virt_irq);
114static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
110 115
111static const struct vgic_ops *vgic_ops; 116static const struct vgic_ops *vgic_ops;
112static const struct vgic_params *vgic; 117static const struct vgic_params *vgic;
@@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
357 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 362 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
358 363
359 vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); 364 vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
365 if (!vgic_dist_irq_get_level(vcpu, irq)) {
366 vgic_dist_irq_clear_pending(vcpu, irq);
367 if (!compute_pending_for_cpu(vcpu))
368 clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
369 }
360} 370}
361 371
362static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) 372static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@ -531,34 +541,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm,
531 return false; 541 return false;
532} 542}
533 543
534/*
535 * If a mapped interrupt's state has been modified by the guest such that it
536 * is no longer active or pending, without it have gone through the sync path,
537 * then the map->active field must be cleared so the interrupt can be taken
538 * again.
539 */
540static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
541{
542 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
543 struct list_head *root;
544 struct irq_phys_map_entry *entry;
545 struct irq_phys_map *map;
546
547 rcu_read_lock();
548
549 /* Check for PPIs */
550 root = &vgic_cpu->irq_phys_map_list;
551 list_for_each_entry_rcu(entry, root, entry) {
552 map = &entry->map;
553
554 if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
555 !vgic_irq_is_active(vcpu, map->virt_irq))
556 map->active = false;
557 }
558
559 rcu_read_unlock();
560}
561
562bool vgic_handle_clear_pending_reg(struct kvm *kvm, 544bool vgic_handle_clear_pending_reg(struct kvm *kvm,
563 struct kvm_exit_mmio *mmio, 545 struct kvm_exit_mmio *mmio,
564 phys_addr_t offset, int vcpu_id) 546 phys_addr_t offset, int vcpu_id)
@@ -589,7 +571,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
589 vcpu_id, offset); 571 vcpu_id, offset);
590 vgic_reg_access(mmio, reg, offset, mode); 572 vgic_reg_access(mmio, reg, offset, mode);
591 573
592 vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
593 vgic_update_state(kvm); 574 vgic_update_state(kvm);
594 return true; 575 return true;
595 } 576 }
@@ -627,7 +608,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm,
627 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); 608 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
628 609
629 if (mmio->is_write) { 610 if (mmio->is_write) {
630 vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
631 vgic_update_state(kvm); 611 vgic_update_state(kvm);
632 return true; 612 return true;
633 } 613 }
@@ -684,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
684 vgic_reg_access(mmio, &val, offset, 664 vgic_reg_access(mmio, &val, offset,
685 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); 665 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
686 if (mmio->is_write) { 666 if (mmio->is_write) {
687 if (offset < 8) { 667 /* Ignore writes to read-only SGI and PPI bits */
688 *reg = ~0U; /* Force PPIs/SGIs to 1 */ 668 if (offset < 8)
689 return false; 669 return false;
690 }
691 670
692 val = vgic_cfg_compress(val); 671 val = vgic_cfg_compress(val);
693 if (offset & 4) { 672 if (offset & 4) {
@@ -713,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
713void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) 692void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
714{ 693{
715 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 694 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
695 u64 elrsr = vgic_get_elrsr(vcpu);
696 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
716 int i; 697 int i;
717 698
718 for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { 699 for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
719 struct vgic_lr lr = vgic_get_lr(vcpu, i); 700 struct vgic_lr lr = vgic_get_lr(vcpu, i);
720 701
721 /* 702 /*
@@ -736,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
736 * interrupt then move the active state to the 717 * interrupt then move the active state to the
737 * distributor tracking bit. 718 * distributor tracking bit.
738 */ 719 */
739 if (lr.state & LR_STATE_ACTIVE) { 720 if (lr.state & LR_STATE_ACTIVE)
740 vgic_irq_set_active(vcpu, lr.irq); 721 vgic_irq_set_active(vcpu, lr.irq);
741 lr.state &= ~LR_STATE_ACTIVE;
742 }
743 722
744 /* 723 /*
745 * Reestablish the pending state on the distributor and the 724 * Reestablish the pending state on the distributor and the
746 * CPU interface. It may have already been pending, but that 725 * CPU interface and mark the LR as free for other use.
747 * is fine, then we are only setting a few bits that were
748 * already set.
749 */ 726 */
750 if (lr.state & LR_STATE_PENDING) { 727 vgic_retire_lr(i, vcpu);
751 vgic_dist_irq_set_pending(vcpu, lr.irq);
752 lr.state &= ~LR_STATE_PENDING;
753 }
754
755 vgic_set_lr(vcpu, i, lr);
756
757 /*
758 * Mark the LR as free for other use.
759 */
760 BUG_ON(lr.state & LR_STATE_MASK);
761 vgic_retire_lr(i, lr.irq, vcpu);
762 vgic_irq_clear_queued(vcpu, lr.irq);
763 728
764 /* Finally update the VGIC state. */ 729 /* Finally update the VGIC state. */
765 vgic_update_state(vcpu->kvm); 730 vgic_update_state(vcpu->kvm);
@@ -1067,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
1067 vgic_ops->set_lr(vcpu, lr, vlr); 1032 vgic_ops->set_lr(vcpu, lr, vlr);
1068} 1033}
1069 1034
1070static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
1071 struct vgic_lr vlr)
1072{
1073 vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
1074}
1075
1076static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) 1035static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
1077{ 1036{
1078 return vgic_ops->get_elrsr(vcpu); 1037 return vgic_ops->get_elrsr(vcpu);
@@ -1118,25 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
1118 vgic_ops->enable(vcpu); 1077 vgic_ops->enable(vcpu);
1119} 1078}
1120 1079
1121static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) 1080static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
1122{ 1081{
1123 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1124 struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); 1082 struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
1125 1083
1084 vgic_irq_clear_queued(vcpu, vlr.irq);
1085
1126 /* 1086 /*
1127 * We must transfer the pending state back to the distributor before 1087 * We must transfer the pending state back to the distributor before
1128 * retiring the LR, otherwise we may loose edge-triggered interrupts. 1088 * retiring the LR, otherwise we may loose edge-triggered interrupts.
1129 */ 1089 */
1130 if (vlr.state & LR_STATE_PENDING) { 1090 if (vlr.state & LR_STATE_PENDING) {
1131 vgic_dist_irq_set_pending(vcpu, irq); 1091 vgic_dist_irq_set_pending(vcpu, vlr.irq);
1132 vlr.hwirq = 0; 1092 vlr.hwirq = 0;
1133 } 1093 }
1134 1094
1135 vlr.state = 0; 1095 vlr.state = 0;
1136 vgic_set_lr(vcpu, lr_nr, vlr); 1096 vgic_set_lr(vcpu, lr_nr, vlr);
1137 clear_bit(lr_nr, vgic_cpu->lr_used);
1138 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1139 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1140} 1097}
1141 1098
1142/* 1099/*
@@ -1150,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
1150 */ 1107 */
1151static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) 1108static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1152{ 1109{
1153 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1110 u64 elrsr = vgic_get_elrsr(vcpu);
1111 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
1154 int lr; 1112 int lr;
1155 1113
1156 for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { 1114 for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
1157 struct vgic_lr vlr = vgic_get_lr(vcpu, lr); 1115 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1158 1116
1159 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { 1117 if (!vgic_irq_is_enabled(vcpu, vlr.irq))
1160 vgic_retire_lr(lr, vlr.irq, vcpu); 1118 vgic_retire_lr(lr, vcpu);
1161 if (vgic_irq_is_queued(vcpu, vlr.irq))
1162 vgic_irq_clear_queued(vcpu, vlr.irq);
1163 }
1164 } 1119 }
1165} 1120}
1166 1121
@@ -1200,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1200 } 1155 }
1201 1156
1202 vgic_set_lr(vcpu, lr_nr, vlr); 1157 vgic_set_lr(vcpu, lr_nr, vlr);
1203 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1204} 1158}
1205 1159
1206/* 1160/*
@@ -1210,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1210 */ 1164 */
1211bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) 1165bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1212{ 1166{
1213 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1214 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1167 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1168 u64 elrsr = vgic_get_elrsr(vcpu);
1169 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
1215 struct vgic_lr vlr; 1170 struct vgic_lr vlr;
1216 int lr; 1171 int lr;
1217 1172
@@ -1222,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1222 1177
1223 kvm_debug("Queue IRQ%d\n", irq); 1178 kvm_debug("Queue IRQ%d\n", irq);
1224 1179
1225 lr = vgic_cpu->vgic_irq_lr_map[irq];
1226
1227 /* Do we have an active interrupt for the same CPUID? */ 1180 /* Do we have an active interrupt for the same CPUID? */
1228 if (lr != LR_EMPTY) { 1181 for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
1229 vlr = vgic_get_lr(vcpu, lr); 1182 vlr = vgic_get_lr(vcpu, lr);
1230 if (vlr.source == sgi_source_id) { 1183 if (vlr.irq == irq && vlr.source == sgi_source_id) {
1231 kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); 1184 kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
1232 BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
1233 vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); 1185 vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1234 return true; 1186 return true;
1235 } 1187 }
1236 } 1188 }
1237 1189
1238 /* Try to use another LR for this interrupt */ 1190 /* Try to use another LR for this interrupt */
1239 lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, 1191 lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
1240 vgic->nr_lr);
1241 if (lr >= vgic->nr_lr) 1192 if (lr >= vgic->nr_lr)
1242 return false; 1193 return false;
1243 1194
1244 kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); 1195 kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1245 vgic_cpu->vgic_irq_lr_map[irq] = lr;
1246 set_bit(lr, vgic_cpu->lr_used);
1247 1196
1248 vlr.irq = irq; 1197 vlr.irq = irq;
1249 vlr.source = sgi_source_id; 1198 vlr.source = sgi_source_id;
@@ -1338,12 +1287,60 @@ epilog:
1338 } 1287 }
1339} 1288}
1340 1289
1290static int process_queued_irq(struct kvm_vcpu *vcpu,
1291 int lr, struct vgic_lr vlr)
1292{
1293 int pending = 0;
1294
1295 /*
1296 * If the IRQ was EOIed (called from vgic_process_maintenance) or it
1297 * went from active to non-active (called from vgic_sync_hwirq) it was
1298 * also ACKed and we we therefore assume we can clear the soft pending
1299 * state (should it had been set) for this interrupt.
1300 *
1301 * Note: if the IRQ soft pending state was set after the IRQ was
1302 * acked, it actually shouldn't be cleared, but we have no way of
1303 * knowing that unless we start trapping ACKs when the soft-pending
1304 * state is set.
1305 */
1306 vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1307
1308 /*
1309 * Tell the gic to start sampling this interrupt again.
1310 */
1311 vgic_irq_clear_queued(vcpu, vlr.irq);
1312
1313 /* Any additional pending interrupt? */
1314 if (vgic_irq_is_edge(vcpu, vlr.irq)) {
1315 BUG_ON(!(vlr.state & LR_HW));
1316 pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
1317 } else {
1318 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1319 vgic_cpu_irq_set(vcpu, vlr.irq);
1320 pending = 1;
1321 } else {
1322 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1323 vgic_cpu_irq_clear(vcpu, vlr.irq);
1324 }
1325 }
1326
1327 /*
1328 * Despite being EOIed, the LR may not have
1329 * been marked as empty.
1330 */
1331 vlr.state = 0;
1332 vlr.hwirq = 0;
1333 vgic_set_lr(vcpu, lr, vlr);
1334
1335 return pending;
1336}
1337
1341static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) 1338static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1342{ 1339{
1343 u32 status = vgic_get_interrupt_status(vcpu); 1340 u32 status = vgic_get_interrupt_status(vcpu);
1344 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1341 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1345 bool level_pending = false;
1346 struct kvm *kvm = vcpu->kvm; 1342 struct kvm *kvm = vcpu->kvm;
1343 int level_pending = 0;
1347 1344
1348 kvm_debug("STATUS = %08x\n", status); 1345 kvm_debug("STATUS = %08x\n", status);
1349 1346
@@ -1358,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1358 1355
1359 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { 1356 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1360 struct vgic_lr vlr = vgic_get_lr(vcpu, lr); 1357 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1361 WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1362 1358
1363 spin_lock(&dist->lock); 1359 WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1364 vgic_irq_clear_queued(vcpu, vlr.irq);
1365 WARN_ON(vlr.state & LR_STATE_MASK); 1360 WARN_ON(vlr.state & LR_STATE_MASK);
1366 vlr.state = 0;
1367 vgic_set_lr(vcpu, lr, vlr);
1368 1361
1369 /*
1370 * If the IRQ was EOIed it was also ACKed and we we
1371 * therefore assume we can clear the soft pending
1372 * state (should it had been set) for this interrupt.
1373 *
1374 * Note: if the IRQ soft pending state was set after
1375 * the IRQ was acked, it actually shouldn't be
1376 * cleared, but we have no way of knowing that unless
1377 * we start trapping ACKs when the soft-pending state
1378 * is set.
1379 */
1380 vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1381 1362
1382 /* 1363 /*
1383 * kvm_notify_acked_irq calls kvm_set_irq() 1364 * kvm_notify_acked_irq calls kvm_set_irq()
1384 * to reset the IRQ level. Need to release the 1365 * to reset the IRQ level, which grabs the dist->lock
1385 * lock for kvm_set_irq to grab it. 1366 * so we call this before taking the dist->lock.
1386 */ 1367 */
1387 spin_unlock(&dist->lock);
1388
1389 kvm_notify_acked_irq(kvm, 0, 1368 kvm_notify_acked_irq(kvm, 0,
1390 vlr.irq - VGIC_NR_PRIVATE_IRQS); 1369 vlr.irq - VGIC_NR_PRIVATE_IRQS);
1391 spin_lock(&dist->lock);
1392
1393 /* Any additional pending interrupt? */
1394 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1395 vgic_cpu_irq_set(vcpu, vlr.irq);
1396 level_pending = true;
1397 } else {
1398 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1399 vgic_cpu_irq_clear(vcpu, vlr.irq);
1400 }
1401 1370
1371 spin_lock(&dist->lock);
1372 level_pending |= process_queued_irq(vcpu, lr, vlr);
1402 spin_unlock(&dist->lock); 1373 spin_unlock(&dist->lock);
1403
1404 /*
1405 * Despite being EOIed, the LR may not have
1406 * been marked as empty.
1407 */
1408 vgic_sync_lr_elrsr(vcpu, lr, vlr);
1409 } 1374 }
1410 } 1375 }
1411 1376
@@ -1426,35 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1426/* 1391/*
1427 * Save the physical active state, and reset it to inactive. 1392 * Save the physical active state, and reset it to inactive.
1428 * 1393 *
1429 * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. 1394 * Return true if there's a pending forwarded interrupt to queue.
1430 */ 1395 */
1431static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) 1396static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
1432{ 1397{
1398 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1433 struct irq_phys_map *map; 1399 struct irq_phys_map *map;
1400 bool phys_active;
1401 bool level_pending;
1434 int ret; 1402 int ret;
1435 1403
1436 if (!(vlr.state & LR_HW)) 1404 if (!(vlr.state & LR_HW))
1437 return 0; 1405 return false;
1438 1406
1439 map = vgic_irq_map_search(vcpu, vlr.irq); 1407 map = vgic_irq_map_search(vcpu, vlr.irq);
1440 BUG_ON(!map); 1408 BUG_ON(!map);
1441 1409
1442 ret = irq_get_irqchip_state(map->irq, 1410 ret = irq_get_irqchip_state(map->irq,
1443 IRQCHIP_STATE_ACTIVE, 1411 IRQCHIP_STATE_ACTIVE,
1444 &map->active); 1412 &phys_active);
1445 1413
1446 WARN_ON(ret); 1414 WARN_ON(ret);
1447 1415
1448 if (map->active) 1416 if (phys_active)
1449 return 0; 1417 return 0;
1450 1418
1451 return 1; 1419 spin_lock(&dist->lock);
1420 level_pending = process_queued_irq(vcpu, lr, vlr);
1421 spin_unlock(&dist->lock);
1422 return level_pending;
1452} 1423}
1453 1424
1454/* Sync back the VGIC state after a guest run */ 1425/* Sync back the VGIC state after a guest run */
1455static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1426static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1456{ 1427{
1457 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1458 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1428 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1459 u64 elrsr; 1429 u64 elrsr;
1460 unsigned long *elrsr_ptr; 1430 unsigned long *elrsr_ptr;
@@ -1462,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1462 bool level_pending; 1432 bool level_pending;
1463 1433
1464 level_pending = vgic_process_maintenance(vcpu); 1434 level_pending = vgic_process_maintenance(vcpu);
1465 elrsr = vgic_get_elrsr(vcpu);
1466 elrsr_ptr = u64_to_bitmask(&elrsr);
1467 1435
1468 /* Deal with HW interrupts, and clear mappings for empty LRs */ 1436 /* Deal with HW interrupts, and clear mappings for empty LRs */
1469 for (lr = 0; lr < vgic->nr_lr; lr++) { 1437 for (lr = 0; lr < vgic->nr_lr; lr++) {
1470 struct vgic_lr vlr; 1438 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1471
1472 if (!test_bit(lr, vgic_cpu->lr_used))
1473 continue;
1474
1475 vlr = vgic_get_lr(vcpu, lr);
1476 if (vgic_sync_hwirq(vcpu, vlr)) {
1477 /*
1478 * So this is a HW interrupt that the guest
1479 * EOI-ed. Clean the LR state and allow the
1480 * interrupt to be sampled again.
1481 */
1482 vlr.state = 0;
1483 vlr.hwirq = 0;
1484 vgic_set_lr(vcpu, lr, vlr);
1485 vgic_irq_clear_queued(vcpu, vlr.irq);
1486 set_bit(lr, elrsr_ptr);
1487 }
1488
1489 if (!test_bit(lr, elrsr_ptr))
1490 continue;
1491
1492 clear_bit(lr, vgic_cpu->lr_used);
1493 1439
1440 level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
1494 BUG_ON(vlr.irq >= dist->nr_irqs); 1441 BUG_ON(vlr.irq >= dist->nr_irqs);
1495 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1496 } 1442 }
1497 1443
1498 /* Check if we still have something up our sleeve... */ 1444 /* Check if we still have something up our sleeve... */
1445 elrsr = vgic_get_elrsr(vcpu);
1446 elrsr_ptr = u64_to_bitmask(&elrsr);
1499 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); 1447 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1500 if (level_pending || pending < vgic->nr_lr) 1448 if (level_pending || pending < vgic->nr_lr)
1501 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); 1449 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
@@ -1585,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1585 int enabled; 1533 int enabled;
1586 bool ret = true, can_inject = true; 1534 bool ret = true, can_inject = true;
1587 1535
1536 trace_vgic_update_irq_pending(cpuid, irq_num, level);
1537
1588 if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) 1538 if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
1589 return -EINVAL; 1539 return -EINVAL;
1590 1540
@@ -1864,30 +1814,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
1864} 1814}
1865 1815
1866/** 1816/**
1867 * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
1868 *
1869 * Return the logical active state of a mapped interrupt. This doesn't
1870 * necessarily reflects the current HW state.
1871 */
1872bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
1873{
1874 BUG_ON(!map);
1875 return map->active;
1876}
1877
1878/**
1879 * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
1880 *
1881 * Set the logical active state of a mapped interrupt. This doesn't
1882 * immediately affects the HW state.
1883 */
1884void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
1885{
1886 BUG_ON(!map);
1887 map->active = active;
1888}
1889
1890/**
1891 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping 1817 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
1892 * @vcpu: The VCPU pointer 1818 * @vcpu: The VCPU pointer
1893 * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq 1819 * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
@@ -1942,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1942 kfree(vgic_cpu->pending_shared); 1868 kfree(vgic_cpu->pending_shared);
1943 kfree(vgic_cpu->active_shared); 1869 kfree(vgic_cpu->active_shared);
1944 kfree(vgic_cpu->pend_act_shared); 1870 kfree(vgic_cpu->pend_act_shared);
1945 kfree(vgic_cpu->vgic_irq_lr_map);
1946 vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); 1871 vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
1947 vgic_cpu->pending_shared = NULL; 1872 vgic_cpu->pending_shared = NULL;
1948 vgic_cpu->active_shared = NULL; 1873 vgic_cpu->active_shared = NULL;
1949 vgic_cpu->pend_act_shared = NULL; 1874 vgic_cpu->pend_act_shared = NULL;
1950 vgic_cpu->vgic_irq_lr_map = NULL;
1951} 1875}
1952 1876
1953static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) 1877static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
@@ -1958,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1958 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); 1882 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1959 vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); 1883 vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
1960 vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); 1884 vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
1961 vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
1962 1885
1963 if (!vgic_cpu->pending_shared 1886 if (!vgic_cpu->pending_shared
1964 || !vgic_cpu->active_shared 1887 || !vgic_cpu->active_shared
1965 || !vgic_cpu->pend_act_shared 1888 || !vgic_cpu->pend_act_shared) {
1966 || !vgic_cpu->vgic_irq_lr_map) {
1967 kvm_vgic_vcpu_destroy(vcpu); 1889 kvm_vgic_vcpu_destroy(vcpu);
1968 return -ENOMEM; 1890 return -ENOMEM;
1969 } 1891 }
1970 1892
1971 memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
1972
1973 /* 1893 /*
1974 * Store the number of LRs per vcpu, so we don't have to go 1894 * Store the number of LRs per vcpu, so we don't have to go
1975 * all the way to the distributor structure to find out. Only 1895 * all the way to the distributor structure to find out. Only
@@ -2111,14 +2031,24 @@ int vgic_init(struct kvm *kvm)
2111 break; 2031 break;
2112 } 2032 }
2113 2033
2114 for (i = 0; i < dist->nr_irqs; i++) { 2034 /*
2115 if (i < VGIC_NR_PPIS) 2035 * Enable and configure all SGIs to be edge-triggere and
2036 * configure all PPIs as level-triggered.
2037 */
2038 for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
2039 if (i < VGIC_NR_SGIS) {
2040 /* SGIs */
2116 vgic_bitmap_set_irq_val(&dist->irq_enabled, 2041 vgic_bitmap_set_irq_val(&dist->irq_enabled,
2117 vcpu->vcpu_id, i, 1); 2042 vcpu->vcpu_id, i, 1);
2118 if (i < VGIC_NR_PRIVATE_IRQS)
2119 vgic_bitmap_set_irq_val(&dist->irq_cfg, 2043 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2120 vcpu->vcpu_id, i, 2044 vcpu->vcpu_id, i,
2121 VGIC_CFG_EDGE); 2045 VGIC_CFG_EDGE);
2046 } else if (i < VGIC_NR_PRIVATE_IRQS) {
2047 /* PPIs */
2048 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2049 vcpu->vcpu_id, i,
2050 VGIC_CFG_LEVEL);
2051 }
2122 } 2052 }
2123 2053
2124 vgic_enable(vcpu); 2054 vgic_enable(vcpu);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 44660aee335f..77d42be6970e 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work)
94 94
95 trace_kvm_async_pf_completed(addr, gva); 95 trace_kvm_async_pf_completed(addr, gva);
96 96
97 /*
98 * This memory barrier pairs with prepare_to_wait's set_current_state()
99 */
100 smp_mb();
97 if (waitqueue_active(&vcpu->wq)) 101 if (waitqueue_active(&vcpu->wq))
98 wake_up_interruptible(&vcpu->wq); 102 wake_up_interruptible(&vcpu->wq);
99 103
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 79db45336e3a..46dbc0a7dfc1 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -23,6 +23,7 @@
23 23
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25#include <linux/kvm.h> 25#include <linux/kvm.h>
26#include <linux/kvm_irqfd.h>
26#include <linux/workqueue.h> 27#include <linux/workqueue.h>
27#include <linux/syscalls.h> 28#include <linux/syscalls.h>
28#include <linux/wait.h> 29#include <linux/wait.h>
@@ -34,73 +35,20 @@
34#include <linux/srcu.h> 35#include <linux/srcu.h>
35#include <linux/slab.h> 36#include <linux/slab.h>
36#include <linux/seqlock.h> 37#include <linux/seqlock.h>
38#include <linux/irqbypass.h>
37#include <trace/events/kvm.h> 39#include <trace/events/kvm.h>
38 40
39#include <kvm/iodev.h> 41#include <kvm/iodev.h>
40 42
41#ifdef CONFIG_HAVE_KVM_IRQFD 43#ifdef CONFIG_HAVE_KVM_IRQFD
42/*
43 * --------------------------------------------------------------------
44 * irqfd: Allows an fd to be used to inject an interrupt to the guest
45 *
46 * Credit goes to Avi Kivity for the original idea.
47 * --------------------------------------------------------------------
48 */
49
50/*
51 * Resampling irqfds are a special variety of irqfds used to emulate
52 * level triggered interrupts. The interrupt is asserted on eventfd
53 * trigger. On acknowledgement through the irq ack notifier, the
54 * interrupt is de-asserted and userspace is notified through the
55 * resamplefd. All resamplers on the same gsi are de-asserted
56 * together, so we don't need to track the state of each individual
57 * user. We can also therefore share the same irq source ID.
58 */
59struct _irqfd_resampler {
60 struct kvm *kvm;
61 /*
62 * List of resampling struct _irqfd objects sharing this gsi.
63 * RCU list modified under kvm->irqfds.resampler_lock
64 */
65 struct list_head list;
66 struct kvm_irq_ack_notifier notifier;
67 /*
68 * Entry in list of kvm->irqfd.resampler_list. Use for sharing
69 * resamplers among irqfds on the same gsi.
70 * Accessed and modified under kvm->irqfds.resampler_lock
71 */
72 struct list_head link;
73};
74
75struct _irqfd {
76 /* Used for MSI fast-path */
77 struct kvm *kvm;
78 wait_queue_t wait;
79 /* Update side is protected by irqfds.lock */
80 struct kvm_kernel_irq_routing_entry irq_entry;
81 seqcount_t irq_entry_sc;
82 /* Used for level IRQ fast-path */
83 int gsi;
84 struct work_struct inject;
85 /* The resampler used by this irqfd (resampler-only) */
86 struct _irqfd_resampler *resampler;
87 /* Eventfd notified on resample (resampler-only) */
88 struct eventfd_ctx *resamplefd;
89 /* Entry in list of irqfds for a resampler (resampler-only) */
90 struct list_head resampler_link;
91 /* Used for setup/shutdown */
92 struct eventfd_ctx *eventfd;
93 struct list_head list;
94 poll_table pt;
95 struct work_struct shutdown;
96};
97 44
98static struct workqueue_struct *irqfd_cleanup_wq; 45static struct workqueue_struct *irqfd_cleanup_wq;
99 46
100static void 47static void
101irqfd_inject(struct work_struct *work) 48irqfd_inject(struct work_struct *work)
102{ 49{
103 struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); 50 struct kvm_kernel_irqfd *irqfd =
51 container_of(work, struct kvm_kernel_irqfd, inject);
104 struct kvm *kvm = irqfd->kvm; 52 struct kvm *kvm = irqfd->kvm;
105 53
106 if (!irqfd->resampler) { 54 if (!irqfd->resampler) {
@@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work)
121static void 69static void
122irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) 70irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
123{ 71{
124 struct _irqfd_resampler *resampler; 72 struct kvm_kernel_irqfd_resampler *resampler;
125 struct kvm *kvm; 73 struct kvm *kvm;
126 struct _irqfd *irqfd; 74 struct kvm_kernel_irqfd *irqfd;
127 int idx; 75 int idx;
128 76
129 resampler = container_of(kian, struct _irqfd_resampler, notifier); 77 resampler = container_of(kian,
78 struct kvm_kernel_irqfd_resampler, notifier);
130 kvm = resampler->kvm; 79 kvm = resampler->kvm;
131 80
132 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 81 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
@@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
141} 90}
142 91
143static void 92static void
144irqfd_resampler_shutdown(struct _irqfd *irqfd) 93irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
145{ 94{
146 struct _irqfd_resampler *resampler = irqfd->resampler; 95 struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
147 struct kvm *kvm = resampler->kvm; 96 struct kvm *kvm = resampler->kvm;
148 97
149 mutex_lock(&kvm->irqfds.resampler_lock); 98 mutex_lock(&kvm->irqfds.resampler_lock);
@@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
168static void 117static void
169irqfd_shutdown(struct work_struct *work) 118irqfd_shutdown(struct work_struct *work)
170{ 119{
171 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); 120 struct kvm_kernel_irqfd *irqfd =
121 container_of(work, struct kvm_kernel_irqfd, shutdown);
172 u64 cnt; 122 u64 cnt;
173 123
174 /* 124 /*
@@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work)
191 /* 141 /*
192 * It is now safe to release the object's resources 142 * It is now safe to release the object's resources
193 */ 143 */
144#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
145 irq_bypass_unregister_consumer(&irqfd->consumer);
146#endif
194 eventfd_ctx_put(irqfd->eventfd); 147 eventfd_ctx_put(irqfd->eventfd);
195 kfree(irqfd); 148 kfree(irqfd);
196} 149}
@@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work)
198 151
199/* assumes kvm->irqfds.lock is held */ 152/* assumes kvm->irqfds.lock is held */
200static bool 153static bool
201irqfd_is_active(struct _irqfd *irqfd) 154irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
202{ 155{
203 return list_empty(&irqfd->list) ? false : true; 156 return list_empty(&irqfd->list) ? false : true;
204} 157}
@@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd)
209 * assumes kvm->irqfds.lock is held 162 * assumes kvm->irqfds.lock is held
210 */ 163 */
211static void 164static void
212irqfd_deactivate(struct _irqfd *irqfd) 165irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
213{ 166{
214 BUG_ON(!irqfd_is_active(irqfd)); 167 BUG_ON(!irqfd_is_active(irqfd));
215 168
@@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd)
218 queue_work(irqfd_cleanup_wq, &irqfd->shutdown); 171 queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
219} 172}
220 173
174int __attribute__((weak)) kvm_arch_set_irq_inatomic(
175 struct kvm_kernel_irq_routing_entry *irq,
176 struct kvm *kvm, int irq_source_id,
177 int level,
178 bool line_status)
179{
180 return -EWOULDBLOCK;
181}
182
221/* 183/*
222 * Called with wqh->lock held and interrupts disabled 184 * Called with wqh->lock held and interrupts disabled
223 */ 185 */
224static int 186static int
225irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 187irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
226{ 188{
227 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 189 struct kvm_kernel_irqfd *irqfd =
190 container_of(wait, struct kvm_kernel_irqfd, wait);
228 unsigned long flags = (unsigned long)key; 191 unsigned long flags = (unsigned long)key;
229 struct kvm_kernel_irq_routing_entry irq; 192 struct kvm_kernel_irq_routing_entry irq;
230 struct kvm *kvm = irqfd->kvm; 193 struct kvm *kvm = irqfd->kvm;
@@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
238 irq = irqfd->irq_entry; 201 irq = irqfd->irq_entry;
239 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); 202 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
240 /* An event has been signaled, inject an interrupt */ 203 /* An event has been signaled, inject an interrupt */
241 if (irq.type == KVM_IRQ_ROUTING_MSI) 204 if (kvm_arch_set_irq_inatomic(&irq, kvm,
242 kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, 205 KVM_USERSPACE_IRQ_SOURCE_ID, 1,
243 false); 206 false) == -EWOULDBLOCK)
244 else
245 schedule_work(&irqfd->inject); 207 schedule_work(&irqfd->inject);
246 srcu_read_unlock(&kvm->irq_srcu, idx); 208 srcu_read_unlock(&kvm->irq_srcu, idx);
247 } 209 }
@@ -274,37 +236,54 @@ static void
274irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 236irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
275 poll_table *pt) 237 poll_table *pt)
276{ 238{
277 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 239 struct kvm_kernel_irqfd *irqfd =
240 container_of(pt, struct kvm_kernel_irqfd, pt);
278 add_wait_queue(wqh, &irqfd->wait); 241 add_wait_queue(wqh, &irqfd->wait);
279} 242}
280 243
281/* Must be called under irqfds.lock */ 244/* Must be called under irqfds.lock */
282static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) 245static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
283{ 246{
284 struct kvm_kernel_irq_routing_entry *e; 247 struct kvm_kernel_irq_routing_entry *e;
285 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; 248 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
286 int i, n_entries; 249 int n_entries;
287 250
288 n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); 251 n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
289 252
290 write_seqcount_begin(&irqfd->irq_entry_sc); 253 write_seqcount_begin(&irqfd->irq_entry_sc);
291 254
292 irqfd->irq_entry.type = 0;
293
294 e = entries; 255 e = entries;
295 for (i = 0; i < n_entries; ++i, ++e) { 256 if (n_entries == 1)
296 /* Only fast-path MSI. */ 257 irqfd->irq_entry = *e;
297 if (e->type == KVM_IRQ_ROUTING_MSI) 258 else
298 irqfd->irq_entry = *e; 259 irqfd->irq_entry.type = 0;
299 }
300 260
301 write_seqcount_end(&irqfd->irq_entry_sc); 261 write_seqcount_end(&irqfd->irq_entry_sc);
302} 262}
303 263
264#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
265void __attribute__((weak)) kvm_arch_irq_bypass_stop(
266 struct irq_bypass_consumer *cons)
267{
268}
269
270void __attribute__((weak)) kvm_arch_irq_bypass_start(
271 struct irq_bypass_consumer *cons)
272{
273}
274
275int __attribute__((weak)) kvm_arch_update_irqfd_routing(
276 struct kvm *kvm, unsigned int host_irq,
277 uint32_t guest_irq, bool set)
278{
279 return 0;
280}
281#endif
282
304static int 283static int
305kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) 284kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
306{ 285{
307 struct _irqfd *irqfd, *tmp; 286 struct kvm_kernel_irqfd *irqfd, *tmp;
308 struct fd f; 287 struct fd f;
309 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; 288 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
310 int ret; 289 int ret;
@@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
340 irqfd->eventfd = eventfd; 319 irqfd->eventfd = eventfd;
341 320
342 if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { 321 if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
343 struct _irqfd_resampler *resampler; 322 struct kvm_kernel_irqfd_resampler *resampler;
344 323
345 resamplefd = eventfd_ctx_fdget(args->resamplefd); 324 resamplefd = eventfd_ctx_fdget(args->resamplefd);
346 if (IS_ERR(resamplefd)) { 325 if (IS_ERR(resamplefd)) {
@@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
428 * we might race against the POLLHUP 407 * we might race against the POLLHUP
429 */ 408 */
430 fdput(f); 409 fdput(f);
410#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
411 irqfd->consumer.token = (void *)irqfd->eventfd;
412 irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
413 irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
414 irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
415 irqfd->consumer.start = kvm_arch_irq_bypass_start;
416 ret = irq_bypass_register_consumer(&irqfd->consumer);
417 if (ret)
418 pr_info("irq bypass consumer (token %p) registration fails: %d\n",
419 irqfd->consumer.token, ret);
420#endif
431 421
432 return 0; 422 return 0;
433 423
@@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
469} 459}
470EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); 460EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
471 461
472void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) 462void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
473{ 463{
474 struct kvm_irq_ack_notifier *kian; 464 struct kvm_irq_ack_notifier *kian;
465
466 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
467 link)
468 if (kian->gsi == gsi)
469 kian->irq_acked(kian);
470}
471
472void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
473{
475 int gsi, idx; 474 int gsi, idx;
476 475
477 trace_kvm_ack_irq(irqchip, pin); 476 trace_kvm_ack_irq(irqchip, pin);
@@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
479 idx = srcu_read_lock(&kvm->irq_srcu); 478 idx = srcu_read_lock(&kvm->irq_srcu);
480 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 479 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
481 if (gsi != -1) 480 if (gsi != -1)
482 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 481 kvm_notify_acked_gsi(kvm, gsi);
483 link)
484 if (kian->gsi == gsi)
485 kian->irq_acked(kian);
486 srcu_read_unlock(&kvm->irq_srcu, idx); 482 srcu_read_unlock(&kvm->irq_srcu, idx);
487} 483}
488 484
@@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm)
525static int 521static int
526kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) 522kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
527{ 523{
528 struct _irqfd *irqfd, *tmp; 524 struct kvm_kernel_irqfd *irqfd, *tmp;
529 struct eventfd_ctx *eventfd; 525 struct eventfd_ctx *eventfd;
530 526
531 eventfd = eventfd_ctx_fdget(args->fd); 527 eventfd = eventfd_ctx_fdget(args->fd);
@@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
581void 577void
582kvm_irqfd_release(struct kvm *kvm) 578kvm_irqfd_release(struct kvm *kvm)
583{ 579{
584 struct _irqfd *irqfd, *tmp; 580 struct kvm_kernel_irqfd *irqfd, *tmp;
585 581
586 spin_lock_irq(&kvm->irqfds.lock); 582 spin_lock_irq(&kvm->irqfds.lock);
587 583
@@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm)
604 */ 600 */
605void kvm_irq_routing_update(struct kvm *kvm) 601void kvm_irq_routing_update(struct kvm *kvm)
606{ 602{
607 struct _irqfd *irqfd; 603 struct kvm_kernel_irqfd *irqfd;
608 604
609 spin_lock_irq(&kvm->irqfds.lock); 605 spin_lock_irq(&kvm->irqfds.lock);
610 606
611 list_for_each_entry(irqfd, &kvm->irqfds.items, list) 607 list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
612 irqfd_update(kvm, irqfd); 608 irqfd_update(kvm, irqfd);
613 609
610#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
611 if (irqfd->producer) {
612 int ret = kvm_arch_update_irqfd_routing(
613 irqfd->kvm, irqfd->producer->irq,
614 irqfd->gsi, 1);
615 WARN_ON(ret);
616 }
617#endif
618 }
619
614 spin_unlock_irq(&kvm->irqfds.lock); 620 spin_unlock_irq(&kvm->irqfds.lock);
615} 621}
616 622
@@ -914,9 +920,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
914 return -EINVAL; 920 return -EINVAL;
915 921
916 /* ioeventfd with no length can't be combined with DATAMATCH */ 922 /* ioeventfd with no length can't be combined with DATAMATCH */
917 if (!args->len && 923 if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
918 args->flags & (KVM_IOEVENTFD_FLAG_PIO |
919 KVM_IOEVENTFD_FLAG_DATAMATCH))
920 return -EINVAL; 924 return -EINVAL;
921 925
922 ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); 926 ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index d7ea8e20dae4..f0b08a2a48ba 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
31#include <trace/events/kvm.h> 31#include <trace/events/kvm.h>
32#include "irq.h" 32#include "irq.h"
33 33
34struct kvm_irq_routing_table {
35 int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
36 u32 nr_rt_entries;
37 /*
38 * Array indexed by gsi. Each entry contains list of irq chips
39 * the gsi is connected to.
40 */
41 struct hlist_head map[0];
42};
43
44int kvm_irq_map_gsi(struct kvm *kvm, 34int kvm_irq_map_gsi(struct kvm *kvm,
45 struct kvm_kernel_irq_routing_entry *entries, int gsi) 35 struct kvm_kernel_irq_routing_entry *entries, int gsi)
46{ 36{
@@ -154,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
154 144
155 /* 145 /*
156 * Do not allow GSI to be mapped to the same irqchip more than once. 146 * Do not allow GSI to be mapped to the same irqchip more than once.
157 * Allow only one to one mapping between GSI and MSI. 147 * Allow only one to one mapping between GSI and non-irqchip routing.
158 */ 148 */
159 hlist_for_each_entry(ei, &rt->map[ue->gsi], link) 149 hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
160 if (ei->type == KVM_IRQ_ROUTING_MSI || 150 if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
161 ue->type == KVM_IRQ_ROUTING_MSI || 151 ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
162 ue->u.irqchip.irqchip == ei->irqchip.irqchip) 152 ue->u.irqchip.irqchip == ei->irqchip.irqchip)
163 return r; 153 return r;
164 154
@@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
231 kvm_irq_routing_update(kvm); 221 kvm_irq_routing_update(kvm);
232 mutex_unlock(&kvm->irq_lock); 222 mutex_unlock(&kvm->irq_lock);
233 223
224 kvm_arch_irq_routing_update(kvm);
225
234 synchronize_srcu_expedited(&kvm->irq_srcu); 226 synchronize_srcu_expedited(&kvm->irq_srcu);
235 227
236 new = old; 228 new = old;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8db1d9361993..484079efea5b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
230 init_waitqueue_head(&vcpu->wq); 230 init_waitqueue_head(&vcpu->wq);
231 kvm_async_pf_vcpu_init(vcpu); 231 kvm_async_pf_vcpu_init(vcpu);
232 232
233 vcpu->pre_pcpu = -1;
234 INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
235
233 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 236 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
234 if (!page) { 237 if (!page) {
235 r = -ENOMEM; 238 r = -ENOMEM;
@@ -2018,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2018 } while (single_task_running() && ktime_before(cur, stop)); 2021 } while (single_task_running() && ktime_before(cur, stop));
2019 } 2022 }
2020 2023
2024 kvm_arch_vcpu_blocking(vcpu);
2025
2021 for (;;) { 2026 for (;;) {
2022 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 2027 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
2023 2028
@@ -2031,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2031 finish_wait(&vcpu->wq, &wait); 2036 finish_wait(&vcpu->wq, &wait);
2032 cur = ktime_get(); 2037 cur = ktime_get();
2033 2038
2039 kvm_arch_vcpu_unblocking(vcpu);
2034out: 2040out:
2035 block_ns = ktime_to_ns(cur) - ktime_to_ns(start); 2041 block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
2036 2042
@@ -2718,6 +2724,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
2718 case KVM_CAP_IRQFD: 2724 case KVM_CAP_IRQFD:
2719 case KVM_CAP_IRQFD_RESAMPLE: 2725 case KVM_CAP_IRQFD_RESAMPLE:
2720#endif 2726#endif
2727 case KVM_CAP_IOEVENTFD_ANY_LENGTH:
2721 case KVM_CAP_CHECK_EXTENSION_VM: 2728 case KVM_CAP_CHECK_EXTENSION_VM:
2722 return 1; 2729 return 1;
2723#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 2730#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
@@ -3341,7 +3348,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
3341 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) 3348 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
3342 return -ENOSPC; 3349 return -ENOSPC;
3343 3350
3344 new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * 3351 new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
3345 sizeof(struct kvm_io_range)), GFP_KERNEL); 3352 sizeof(struct kvm_io_range)), GFP_KERNEL);
3346 if (!new_bus) 3353 if (!new_bus)
3347 return -ENOMEM; 3354 return -ENOMEM;
@@ -3373,7 +3380,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
3373 if (r) 3380 if (r)
3374 return r; 3381 return r;
3375 3382
3376 new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * 3383 new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
3377 sizeof(struct kvm_io_range)), GFP_KERNEL); 3384 sizeof(struct kvm_io_range)), GFP_KERNEL);
3378 if (!new_bus) 3385 if (!new_bus)
3379 return -ENOMEM; 3386 return -ENOMEM;
diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig
new file mode 100644
index 000000000000..89a414f815d2
--- /dev/null
+++ b/virt/lib/Kconfig
@@ -0,0 +1,2 @@
1config IRQ_BYPASS_MANAGER
2 tristate
diff --git a/virt/lib/Makefile b/virt/lib/Makefile
new file mode 100644
index 000000000000..901228d1ffbc
--- /dev/null
+++ b/virt/lib/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
new file mode 100644
index 000000000000..09a03b5a21ff
--- /dev/null
+++ b/virt/lib/irqbypass.c
@@ -0,0 +1,257 @@
1/*
2 * IRQ offload/bypass manager
3 *
4 * Copyright (C) 2015 Red Hat, Inc.
5 * Copyright (c) 2015 Linaro Ltd.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Various virtualization hardware acceleration techniques allow bypassing or
12 * offloading interrupts received from devices around the host kernel. Posted
13 * Interrupts on Intel VT-d systems can allow interrupts to be received
14 * directly by a virtual machine. ARM IRQ Forwarding allows forwarded physical
15 * interrupts to be directly deactivated by the guest. This manager allows
16 * interrupt producers and consumers to find each other to enable this sort of
17 * bypass.
18 */
19
20#include <linux/irqbypass.h>
21#include <linux/list.h>
22#include <linux/module.h>
23#include <linux/mutex.h>
24
25MODULE_LICENSE("GPL v2");
26MODULE_DESCRIPTION("IRQ bypass manager utility module");
27
28static LIST_HEAD(producers);
29static LIST_HEAD(consumers);
30static DEFINE_MUTEX(lock);
31
32/* @lock must be held when calling connect */
33static int __connect(struct irq_bypass_producer *prod,
34 struct irq_bypass_consumer *cons)
35{
36 int ret = 0;
37
38 if (prod->stop)
39 prod->stop(prod);
40 if (cons->stop)
41 cons->stop(cons);
42
43 if (prod->add_consumer)
44 ret = prod->add_consumer(prod, cons);
45
46 if (!ret) {
47 ret = cons->add_producer(cons, prod);
48 if (ret && prod->del_consumer)
49 prod->del_consumer(prod, cons);
50 }
51
52 if (cons->start)
53 cons->start(cons);
54 if (prod->start)
55 prod->start(prod);
56
57 return ret;
58}
59
60/* @lock must be held when calling disconnect */
61static void __disconnect(struct irq_bypass_producer *prod,
62 struct irq_bypass_consumer *cons)
63{
64 if (prod->stop)
65 prod->stop(prod);
66 if (cons->stop)
67 cons->stop(cons);
68
69 cons->del_producer(cons, prod);
70
71 if (prod->del_consumer)
72 prod->del_consumer(prod, cons);
73
74 if (cons->start)
75 cons->start(cons);
76 if (prod->start)
77 prod->start(prod);
78}
79
80/**
81 * irq_bypass_register_producer - register IRQ bypass producer
82 * @producer: pointer to producer structure
83 *
84 * Add the provided IRQ producer to the list of producers and connect
85 * with any matching token found on the IRQ consumers list.
86 */
87int irq_bypass_register_producer(struct irq_bypass_producer *producer)
88{
89 struct irq_bypass_producer *tmp;
90 struct irq_bypass_consumer *consumer;
91
92 might_sleep();
93
94 if (!try_module_get(THIS_MODULE))
95 return -ENODEV;
96
97 mutex_lock(&lock);
98
99 list_for_each_entry(tmp, &producers, node) {
100 if (tmp->token == producer->token) {
101 mutex_unlock(&lock);
102 module_put(THIS_MODULE);
103 return -EBUSY;
104 }
105 }
106
107 list_for_each_entry(consumer, &consumers, node) {
108 if (consumer->token == producer->token) {
109 int ret = __connect(producer, consumer);
110 if (ret) {
111 mutex_unlock(&lock);
112 module_put(THIS_MODULE);
113 return ret;
114 }
115 break;
116 }
117 }
118
119 list_add(&producer->node, &producers);
120
121 mutex_unlock(&lock);
122
123 return 0;
124}
125EXPORT_SYMBOL_GPL(irq_bypass_register_producer);
126
127/**
128 * irq_bypass_unregister_producer - unregister IRQ bypass producer
129 * @producer: pointer to producer structure
130 *
131 * Remove a previously registered IRQ producer from the list of producers
132 * and disconnect it from any connected IRQ consumer.
133 */
134void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
135{
136 struct irq_bypass_producer *tmp;
137 struct irq_bypass_consumer *consumer;
138
139 might_sleep();
140
141 if (!try_module_get(THIS_MODULE))
142 return; /* nothing in the list anyway */
143
144 mutex_lock(&lock);
145
146 list_for_each_entry(tmp, &producers, node) {
147 if (tmp->token != producer->token)
148 continue;
149
150 list_for_each_entry(consumer, &consumers, node) {
151 if (consumer->token == producer->token) {
152 __disconnect(producer, consumer);
153 break;
154 }
155 }
156
157 list_del(&producer->node);
158 module_put(THIS_MODULE);
159 break;
160 }
161
162 mutex_unlock(&lock);
163
164 module_put(THIS_MODULE);
165}
166EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer);
167
168/**
169 * irq_bypass_register_consumer - register IRQ bypass consumer
170 * @consumer: pointer to consumer structure
171 *
172 * Add the provided IRQ consumer to the list of consumers and connect
173 * with any matching token found on the IRQ producer list.
174 */
175int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
176{
177 struct irq_bypass_consumer *tmp;
178 struct irq_bypass_producer *producer;
179
180 if (!consumer->add_producer || !consumer->del_producer)
181 return -EINVAL;
182
183 might_sleep();
184
185 if (!try_module_get(THIS_MODULE))
186 return -ENODEV;
187
188 mutex_lock(&lock);
189
190 list_for_each_entry(tmp, &consumers, node) {
191 if (tmp->token == consumer->token) {
192 mutex_unlock(&lock);
193 module_put(THIS_MODULE);
194 return -EBUSY;
195 }
196 }
197
198 list_for_each_entry(producer, &producers, node) {
199 if (producer->token == consumer->token) {
200 int ret = __connect(producer, consumer);
201 if (ret) {
202 mutex_unlock(&lock);
203 module_put(THIS_MODULE);
204 return ret;
205 }
206 break;
207 }
208 }
209
210 list_add(&consumer->node, &consumers);
211
212 mutex_unlock(&lock);
213
214 return 0;
215}
216EXPORT_SYMBOL_GPL(irq_bypass_register_consumer);
217
218/**
219 * irq_bypass_unregister_consumer - unregister IRQ bypass consumer
220 * @consumer: pointer to consumer structure
221 *
222 * Remove a previously registered IRQ consumer from the list of consumers
223 * and disconnect it from any connected IRQ producer.
224 */
225void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
226{
227 struct irq_bypass_consumer *tmp;
228 struct irq_bypass_producer *producer;
229
230 might_sleep();
231
232 if (!try_module_get(THIS_MODULE))
233 return; /* nothing in the list anyway */
234
235 mutex_lock(&lock);
236
237 list_for_each_entry(tmp, &consumers, node) {
238 if (tmp->token != consumer->token)
239 continue;
240
241 list_for_each_entry(producer, &producers, node) {
242 if (producer->token == consumer->token) {
243 __disconnect(producer, consumer);
244 break;
245 }
246 }
247
248 list_del(&consumer->node);
249 module_put(THIS_MODULE);
250 break;
251 }
252
253 mutex_unlock(&lock);
254
255 module_put(THIS_MODULE);
256}
257EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer);