diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 19:26:26 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 19:26:26 -0500 |
commit | 933425fb0010bd02bd459b41e63082756818ffce (patch) | |
tree | 1cbc6c2035b9dcff8cb265c9ac562cbee7c6bb82 /virt | |
parent | a3e7531535a0c6e5acbaa5436f37933bb471aa95 (diff) | |
parent | a3eaa8649e4c6a6afdafaa04b9114fb230617bb1 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"First batch of KVM changes for 4.4.
s390:
A bunch of fixes and optimizations for interrupt and time handling.
PPC:
Mostly bug fixes.
ARM:
No big features, but many small fixes and prerequisites including:
- a number of fixes for the arch-timer
- introducing proper level-triggered semantics for the arch-timers
- a series of patches to synchronously halt a guest (prerequisite
for IRQ forwarding)
- some tracepoint improvements
- a tweak for the EL2 panic handlers
- some more VGIC cleanups getting rid of redundant state
x86:
Quite a few changes:
- support for VT-d posted interrupts (i.e. PCI devices can inject
interrupts directly into vCPUs). This introduces a new
component (in virt/lib/) that connects VFIO and KVM together.
The same infrastructure will be used for ARM interrupt
forwarding as well.
- more Hyper-V features, though the main one Hyper-V synthetic
interrupt controller will have to wait for 4.5. These will let
KVM expose Hyper-V devices.
- nested virtualization now supports VPID (same as PCID but for
vCPUs) which makes it quite a bit faster
- for future hardware that supports NVDIMM, there is support for
clflushopt, clwb, pcommit
- support for "split irqchip", i.e. LAPIC in kernel +
IOAPIC/PIC/PIT in userspace, which reduces the attack surface of
the hypervisor
- obligatory smattering of SMM fixes
- on the guest side, stable scheduler clock support was rewritten
to not require help from the hypervisor"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits)
KVM: VMX: Fix commit which broke PML
KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()
KVM: x86: allow RSM from 64-bit mode
KVM: VMX: fix SMEP and SMAP without EPT
KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
KVM: device assignment: remove pointless #ifdefs
KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
KVM: x86: zero apic_arb_prio on reset
drivers/hv: share Hyper-V SynIC constants with userspace
KVM: x86: handle SMBASE as physical address in RSM
KVM: x86: add read_phys to x86_emulate_ops
KVM: x86: removing unused variable
KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs
KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()
KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings
KVM: arm/arm64: Optimize away redundant LR tracking
KVM: s390: use simple switch statement as multiplexer
KVM: s390: drop useless newline in debugging data
KVM: s390: SCA must not cross page boundaries
KVM: arm: Do not indent the arguments of DECLARE_BITMAP
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/Makefile | 1 | ||||
-rw-r--r-- | virt/kvm/Kconfig | 5 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 173 | ||||
-rw-r--r-- | virt/kvm/arm/trace.h | 63 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v2.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v3.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.c | 308 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 4 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 190 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 18 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 11 | ||||
-rw-r--r-- | virt/lib/Kconfig | 2 | ||||
-rw-r--r-- | virt/lib/Makefile | 1 | ||||
-rw-r--r-- | virt/lib/irqbypass.c | 257 |
14 files changed, 681 insertions, 364 deletions
diff --git a/virt/Makefile b/virt/Makefile new file mode 100644 index 000000000000..be783472ac81 --- /dev/null +++ b/virt/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y += lib/ | |||
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e2c876d5a03b..7a79b6853583 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -46,4 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT | |||
46 | 46 | ||
47 | config KVM_COMPAT | 47 | config KVM_COMPAT |
48 | def_bool y | 48 | def_bool y |
49 | depends on COMPAT && !S390 | 49 | depends on KVM && COMPAT && !S390 |
50 | |||
51 | config HAVE_KVM_IRQ_BYPASS | ||
52 | bool | ||
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b9d3a32cbc04..21a0ab2d8919 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #include <kvm/arm_vgic.h> | 28 | #include <kvm/arm_vgic.h> |
29 | #include <kvm/arm_arch_timer.h> | 29 | #include <kvm/arm_arch_timer.h> |
30 | 30 | ||
31 | #include "trace.h" | ||
32 | |||
31 | static struct timecounter *timecounter; | 33 | static struct timecounter *timecounter; |
32 | static struct workqueue_struct *wqueue; | 34 | static struct workqueue_struct *wqueue; |
33 | static unsigned int host_vtimer_irq; | 35 | static unsigned int host_vtimer_irq; |
@@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) | |||
59 | } | 61 | } |
60 | } | 62 | } |
61 | 63 | ||
62 | static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) | ||
63 | { | ||
64 | int ret; | ||
65 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
66 | |||
67 | kvm_vgic_set_phys_irq_active(timer->map, true); | ||
68 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | ||
69 | timer->map, | ||
70 | timer->irq->level); | ||
71 | WARN_ON(ret); | ||
72 | } | ||
73 | |||
74 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 64 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
75 | { | 65 | { |
76 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | 66 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; |
@@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) | |||
111 | return HRTIMER_NORESTART; | 101 | return HRTIMER_NORESTART; |
112 | } | 102 | } |
113 | 103 | ||
104 | static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) | ||
105 | { | ||
106 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
107 | |||
108 | return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
109 | (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); | ||
110 | } | ||
111 | |||
114 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | 112 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) |
115 | { | 113 | { |
116 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 114 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
117 | cycle_t cval, now; | 115 | cycle_t cval, now; |
118 | 116 | ||
119 | if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || | 117 | if (!kvm_timer_irq_can_fire(vcpu)) |
120 | !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || | ||
121 | kvm_vgic_get_phys_irq_active(timer->map)) | ||
122 | return false; | 118 | return false; |
123 | 119 | ||
124 | cval = timer->cntv_cval; | 120 | cval = timer->cntv_cval; |
@@ -127,12 +123,94 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | |||
127 | return cval <= now; | 123 | return cval <= now; |
128 | } | 124 | } |
129 | 125 | ||
126 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | ||
127 | { | ||
128 | int ret; | ||
129 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
130 | |||
131 | BUG_ON(!vgic_initialized(vcpu->kvm)); | ||
132 | |||
133 | timer->irq.level = new_level; | ||
134 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, | ||
135 | timer->irq.level); | ||
136 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | ||
137 | timer->map, | ||
138 | timer->irq.level); | ||
139 | WARN_ON(ret); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Check if there was a change in the timer state (should we raise or lower | ||
144 | * the line level to the GIC). | ||
145 | */ | ||
146 | static void kvm_timer_update_state(struct kvm_vcpu *vcpu) | ||
147 | { | ||
148 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
149 | |||
150 | /* | ||
151 | * If userspace modified the timer registers via SET_ONE_REG before | ||
152 | * the vgic was initialized, we mustn't set the timer->irq.level value | ||
153 | * because the guest would never see the interrupt. Instead wait | ||
154 | * until we call this function from kvm_timer_flush_hwstate. | ||
155 | */ | ||
156 | if (!vgic_initialized(vcpu->kvm)) | ||
157 | return; | ||
158 | |||
159 | if (kvm_timer_should_fire(vcpu) != timer->irq.level) | ||
160 | kvm_timer_update_irq(vcpu, !timer->irq.level); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Schedule the background timer before calling kvm_vcpu_block, so that this | ||
165 | * thread is removed from its waitqueue and made runnable when there's a timer | ||
166 | * interrupt to handle. | ||
167 | */ | ||
168 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | ||
169 | { | ||
170 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
171 | u64 ns; | ||
172 | cycle_t cval, now; | ||
173 | |||
174 | BUG_ON(timer_is_armed(timer)); | ||
175 | |||
176 | /* | ||
177 | * No need to schedule a background timer if the guest timer has | ||
178 | * already expired, because kvm_vcpu_block will return before putting | ||
179 | * the thread to sleep. | ||
180 | */ | ||
181 | if (kvm_timer_should_fire(vcpu)) | ||
182 | return; | ||
183 | |||
184 | /* | ||
185 | * If the timer is not capable of raising interrupts (disabled or | ||
186 | * masked), then there's no more work for us to do. | ||
187 | */ | ||
188 | if (!kvm_timer_irq_can_fire(vcpu)) | ||
189 | return; | ||
190 | |||
191 | /* The timer has not yet expired, schedule a background timer */ | ||
192 | cval = timer->cntv_cval; | ||
193 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | ||
194 | |||
195 | ns = cyclecounter_cyc2ns(timecounter->cc, | ||
196 | cval - now, | ||
197 | timecounter->mask, | ||
198 | &timecounter->frac); | ||
199 | timer_arm(timer, ns); | ||
200 | } | ||
201 | |||
202 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | ||
203 | { | ||
204 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
205 | timer_disarm(timer); | ||
206 | } | ||
207 | |||
130 | /** | 208 | /** |
131 | * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu | 209 | * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu |
132 | * @vcpu: The vcpu pointer | 210 | * @vcpu: The vcpu pointer |
133 | * | 211 | * |
134 | * Disarm any pending soft timers, since the world-switch code will write the | 212 | * Check if the virtual timer has expired while we were running in the host, |
135 | * virtual timer state back to the physical CPU. | 213 | * and inject an interrupt if that was the case. |
136 | */ | 214 | */ |
137 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | 215 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) |
138 | { | 216 | { |
@@ -140,28 +218,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
140 | bool phys_active; | 218 | bool phys_active; |
141 | int ret; | 219 | int ret; |
142 | 220 | ||
143 | /* | 221 | kvm_timer_update_state(vcpu); |
144 | * We're about to run this vcpu again, so there is no need to | ||
145 | * keep the background timer running, as we're about to | ||
146 | * populate the CPU timer again. | ||
147 | */ | ||
148 | timer_disarm(timer); | ||
149 | 222 | ||
150 | /* | 223 | /* |
151 | * If the timer expired while we were not scheduled, now is the time | 224 | * If we enter the guest with the virtual input level to the VGIC |
152 | * to inject it. | 225 | * asserted, then we have already told the VGIC what we need to, and |
226 | * we don't need to exit from the guest until the guest deactivates | ||
227 | * the already injected interrupt, so therefore we should set the | ||
228 | * hardware active state to prevent unnecessary exits from the guest. | ||
229 | * | ||
230 | * Conversely, if the virtual input level is deasserted, then always | ||
231 | * clear the hardware active state to ensure that hardware interrupts | ||
232 | * from the timer triggers a guest exit. | ||
153 | */ | 233 | */ |
154 | if (kvm_timer_should_fire(vcpu)) | 234 | if (timer->irq.level) |
155 | kvm_timer_inject_irq(vcpu); | ||
156 | |||
157 | /* | ||
158 | * We keep track of whether the edge-triggered interrupt has been | ||
159 | * signalled to the vgic/guest, and if so, we mask the interrupt and | ||
160 | * the physical distributor to prevent the timer from raising a | ||
161 | * physical interrupt whenever we run a guest, preventing forward | ||
162 | * VCPU progress. | ||
163 | */ | ||
164 | if (kvm_vgic_get_phys_irq_active(timer->map)) | ||
165 | phys_active = true; | 235 | phys_active = true; |
166 | else | 236 | else |
167 | phys_active = false; | 237 | phys_active = false; |
@@ -176,32 +246,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
176 | * kvm_timer_sync_hwstate - sync timer state from cpu | 246 | * kvm_timer_sync_hwstate - sync timer state from cpu |
177 | * @vcpu: The vcpu pointer | 247 | * @vcpu: The vcpu pointer |
178 | * | 248 | * |
179 | * Check if the virtual timer was armed and either schedule a corresponding | 249 | * Check if the virtual timer has expired while we were running in the guest, |
180 | * soft timer or inject directly if already expired. | 250 | * and inject an interrupt if that was the case. |
181 | */ | 251 | */ |
182 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | 252 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) |
183 | { | 253 | { |
184 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 254 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
185 | cycle_t cval, now; | ||
186 | u64 ns; | ||
187 | 255 | ||
188 | BUG_ON(timer_is_armed(timer)); | 256 | BUG_ON(timer_is_armed(timer)); |
189 | 257 | ||
190 | if (kvm_timer_should_fire(vcpu)) { | 258 | /* |
191 | /* | 259 | * The guest could have modified the timer registers or the timer |
192 | * Timer has already expired while we were not | 260 | * could have expired, update the timer state. |
193 | * looking. Inject the interrupt and carry on. | 261 | */ |
194 | */ | 262 | kvm_timer_update_state(vcpu); |
195 | kvm_timer_inject_irq(vcpu); | ||
196 | return; | ||
197 | } | ||
198 | |||
199 | cval = timer->cntv_cval; | ||
200 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | ||
201 | |||
202 | ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, | ||
203 | &timecounter->frac); | ||
204 | timer_arm(timer, ns); | ||
205 | } | 263 | } |
206 | 264 | ||
207 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 265 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, |
@@ -216,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
216 | * kvm_vcpu_set_target(). To handle this, we determine | 274 | * kvm_vcpu_set_target(). To handle this, we determine |
217 | * vcpu timer irq number when the vcpu is reset. | 275 | * vcpu timer irq number when the vcpu is reset. |
218 | */ | 276 | */ |
219 | timer->irq = irq; | 277 | timer->irq.irq = irq->irq; |
220 | 278 | ||
221 | /* | 279 | /* |
222 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 280 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
@@ -225,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
225 | * the ARMv7 architecture. | 283 | * the ARMv7 architecture. |
226 | */ | 284 | */ |
227 | timer->cntv_ctl = 0; | 285 | timer->cntv_ctl = 0; |
286 | kvm_timer_update_state(vcpu); | ||
228 | 287 | ||
229 | /* | 288 | /* |
230 | * Tell the VGIC that the virtual interrupt is tied to a | 289 | * Tell the VGIC that the virtual interrupt is tied to a |
@@ -269,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | |||
269 | default: | 328 | default: |
270 | return -1; | 329 | return -1; |
271 | } | 330 | } |
331 | |||
332 | kvm_timer_update_state(vcpu); | ||
272 | return 0; | 333 | return 0; |
273 | } | 334 | } |
274 | 335 | ||
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h new file mode 100644 index 000000000000..37d8b98867d5 --- /dev/null +++ b/virt/kvm/arm/trace.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) | ||
2 | #define _TRACE_KVM_H | ||
3 | |||
4 | #include <linux/tracepoint.h> | ||
5 | |||
6 | #undef TRACE_SYSTEM | ||
7 | #define TRACE_SYSTEM kvm | ||
8 | |||
9 | /* | ||
10 | * Tracepoints for vgic | ||
11 | */ | ||
12 | TRACE_EVENT(vgic_update_irq_pending, | ||
13 | TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), | ||
14 | TP_ARGS(vcpu_id, irq, level), | ||
15 | |||
16 | TP_STRUCT__entry( | ||
17 | __field( unsigned long, vcpu_id ) | ||
18 | __field( __u32, irq ) | ||
19 | __field( bool, level ) | ||
20 | ), | ||
21 | |||
22 | TP_fast_assign( | ||
23 | __entry->vcpu_id = vcpu_id; | ||
24 | __entry->irq = irq; | ||
25 | __entry->level = level; | ||
26 | ), | ||
27 | |||
28 | TP_printk("VCPU: %ld, IRQ %d, level: %d", | ||
29 | __entry->vcpu_id, __entry->irq, __entry->level) | ||
30 | ); | ||
31 | |||
32 | /* | ||
33 | * Tracepoints for arch_timer | ||
34 | */ | ||
35 | TRACE_EVENT(kvm_timer_update_irq, | ||
36 | TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), | ||
37 | TP_ARGS(vcpu_id, irq, level), | ||
38 | |||
39 | TP_STRUCT__entry( | ||
40 | __field( unsigned long, vcpu_id ) | ||
41 | __field( __u32, irq ) | ||
42 | __field( int, level ) | ||
43 | ), | ||
44 | |||
45 | TP_fast_assign( | ||
46 | __entry->vcpu_id = vcpu_id; | ||
47 | __entry->irq = irq; | ||
48 | __entry->level = level; | ||
49 | ), | ||
50 | |||
51 | TP_printk("VCPU: %ld, IRQ %d, level %d", | ||
52 | __entry->vcpu_id, __entry->irq, __entry->level) | ||
53 | ); | ||
54 | |||
55 | #endif /* _TRACE_KVM_H */ | ||
56 | |||
57 | #undef TRACE_INCLUDE_PATH | ||
58 | #define TRACE_INCLUDE_PATH ../../../virt/kvm/arm | ||
59 | #undef TRACE_INCLUDE_FILE | ||
60 | #define TRACE_INCLUDE_FILE trace | ||
61 | |||
62 | /* This part must be outside protection */ | ||
63 | #include <trace/define_trace.h> | ||
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 8d7b04db8471..ff02f08df74d 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c | |||
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
79 | lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); | 79 | lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); |
80 | 80 | ||
81 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; | 81 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; |
82 | } | ||
83 | 82 | ||
84 | static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
85 | struct vgic_lr lr_desc) | ||
86 | { | ||
87 | if (!(lr_desc.state & LR_STATE_MASK)) | 83 | if (!(lr_desc.state & LR_STATE_MASK)) |
88 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); | 84 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); |
89 | else | 85 | else |
@@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) | |||
158 | * anyway. | 154 | * anyway. |
159 | */ | 155 | */ |
160 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; | 156 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; |
157 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; | ||
161 | 158 | ||
162 | /* Get the show on the road... */ | 159 | /* Get the show on the road... */ |
163 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; | 160 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; |
@@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) | |||
166 | static const struct vgic_ops vgic_v2_ops = { | 163 | static const struct vgic_ops vgic_v2_ops = { |
167 | .get_lr = vgic_v2_get_lr, | 164 | .get_lr = vgic_v2_get_lr, |
168 | .set_lr = vgic_v2_set_lr, | 165 | .set_lr = vgic_v2_set_lr, |
169 | .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, | ||
170 | .get_elrsr = vgic_v2_get_elrsr, | 166 | .get_elrsr = vgic_v2_get_elrsr, |
171 | .get_eisr = vgic_v2_get_eisr, | 167 | .get_eisr = vgic_v2_get_eisr, |
172 | .clear_eisr = vgic_v2_clear_eisr, | 168 | .clear_eisr = vgic_v2_clear_eisr, |
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 7dd5d62f10a1..487d6357b7e7 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c | |||
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
112 | } | 112 | } |
113 | 113 | ||
114 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; | 114 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; |
115 | } | ||
116 | 115 | ||
117 | static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
118 | struct vgic_lr lr_desc) | ||
119 | { | ||
120 | if (!(lr_desc.state & LR_STATE_MASK)) | 116 | if (!(lr_desc.state & LR_STATE_MASK)) |
121 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); | 117 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); |
122 | else | 118 | else |
@@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
193 | * anyway. | 189 | * anyway. |
194 | */ | 190 | */ |
195 | vgic_v3->vgic_vmcr = 0; | 191 | vgic_v3->vgic_vmcr = 0; |
192 | vgic_v3->vgic_elrsr = ~0; | ||
196 | 193 | ||
197 | /* | 194 | /* |
198 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible | 195 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible |
@@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
211 | static const struct vgic_ops vgic_v3_ops = { | 208 | static const struct vgic_ops vgic_v3_ops = { |
212 | .get_lr = vgic_v3_get_lr, | 209 | .get_lr = vgic_v3_get_lr, |
213 | .set_lr = vgic_v3_set_lr, | 210 | .set_lr = vgic_v3_set_lr, |
214 | .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, | ||
215 | .get_elrsr = vgic_v3_get_elrsr, | 211 | .get_elrsr = vgic_v3_get_elrsr, |
216 | .get_eisr = vgic_v3_get_eisr, | 212 | .get_eisr = vgic_v3_get_eisr, |
217 | .clear_eisr = vgic_v3_clear_eisr, | 213 | .clear_eisr = vgic_v3_clear_eisr, |
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 30489181922d..533538385d5d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -34,6 +34,9 @@ | |||
34 | #include <asm/kvm.h> | 34 | #include <asm/kvm.h> |
35 | #include <kvm/iodev.h> | 35 | #include <kvm/iodev.h> |
36 | 36 | ||
37 | #define CREATE_TRACE_POINTS | ||
38 | #include "trace.h" | ||
39 | |||
37 | /* | 40 | /* |
38 | * How the whole thing works (courtesy of Christoffer Dall): | 41 | * How the whole thing works (courtesy of Christoffer Dall): |
39 | * | 42 | * |
@@ -102,11 +105,13 @@ | |||
102 | #include "vgic.h" | 105 | #include "vgic.h" |
103 | 106 | ||
104 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); | 107 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); |
105 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); | 108 | static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); |
106 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); | 109 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); |
107 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); | 110 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); |
111 | static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); | ||
108 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, | 112 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, |
109 | int virt_irq); | 113 | int virt_irq); |
114 | static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); | ||
110 | 115 | ||
111 | static const struct vgic_ops *vgic_ops; | 116 | static const struct vgic_ops *vgic_ops; |
112 | static const struct vgic_params *vgic; | 117 | static const struct vgic_params *vgic; |
@@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) | |||
357 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 362 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
358 | 363 | ||
359 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); | 364 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); |
365 | if (!vgic_dist_irq_get_level(vcpu, irq)) { | ||
366 | vgic_dist_irq_clear_pending(vcpu, irq); | ||
367 | if (!compute_pending_for_cpu(vcpu)) | ||
368 | clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); | ||
369 | } | ||
360 | } | 370 | } |
361 | 371 | ||
362 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) | 372 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) |
@@ -531,34 +541,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, | |||
531 | return false; | 541 | return false; |
532 | } | 542 | } |
533 | 543 | ||
534 | /* | ||
535 | * If a mapped interrupt's state has been modified by the guest such that it | ||
536 | * is no longer active or pending, without it have gone through the sync path, | ||
537 | * then the map->active field must be cleared so the interrupt can be taken | ||
538 | * again. | ||
539 | */ | ||
540 | static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu) | ||
541 | { | ||
542 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
543 | struct list_head *root; | ||
544 | struct irq_phys_map_entry *entry; | ||
545 | struct irq_phys_map *map; | ||
546 | |||
547 | rcu_read_lock(); | ||
548 | |||
549 | /* Check for PPIs */ | ||
550 | root = &vgic_cpu->irq_phys_map_list; | ||
551 | list_for_each_entry_rcu(entry, root, entry) { | ||
552 | map = &entry->map; | ||
553 | |||
554 | if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) && | ||
555 | !vgic_irq_is_active(vcpu, map->virt_irq)) | ||
556 | map->active = false; | ||
557 | } | ||
558 | |||
559 | rcu_read_unlock(); | ||
560 | } | ||
561 | |||
562 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, | 544 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, |
563 | struct kvm_exit_mmio *mmio, | 545 | struct kvm_exit_mmio *mmio, |
564 | phys_addr_t offset, int vcpu_id) | 546 | phys_addr_t offset, int vcpu_id) |
@@ -589,7 +571,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, | |||
589 | vcpu_id, offset); | 571 | vcpu_id, offset); |
590 | vgic_reg_access(mmio, reg, offset, mode); | 572 | vgic_reg_access(mmio, reg, offset, mode); |
591 | 573 | ||
592 | vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); | ||
593 | vgic_update_state(kvm); | 574 | vgic_update_state(kvm); |
594 | return true; | 575 | return true; |
595 | } | 576 | } |
@@ -627,7 +608,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm, | |||
627 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | 608 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); |
628 | 609 | ||
629 | if (mmio->is_write) { | 610 | if (mmio->is_write) { |
630 | vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); | ||
631 | vgic_update_state(kvm); | 611 | vgic_update_state(kvm); |
632 | return true; | 612 | return true; |
633 | } | 613 | } |
@@ -684,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | |||
684 | vgic_reg_access(mmio, &val, offset, | 664 | vgic_reg_access(mmio, &val, offset, |
685 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | 665 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); |
686 | if (mmio->is_write) { | 666 | if (mmio->is_write) { |
687 | if (offset < 8) { | 667 | /* Ignore writes to read-only SGI and PPI bits */ |
688 | *reg = ~0U; /* Force PPIs/SGIs to 1 */ | 668 | if (offset < 8) |
689 | return false; | 669 | return false; |
690 | } | ||
691 | 670 | ||
692 | val = vgic_cfg_compress(val); | 671 | val = vgic_cfg_compress(val); |
693 | if (offset & 4) { | 672 | if (offset & 4) { |
@@ -713,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | |||
713 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | 692 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) |
714 | { | 693 | { |
715 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 694 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
695 | u64 elrsr = vgic_get_elrsr(vcpu); | ||
696 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
716 | int i; | 697 | int i; |
717 | 698 | ||
718 | for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { | 699 | for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { |
719 | struct vgic_lr lr = vgic_get_lr(vcpu, i); | 700 | struct vgic_lr lr = vgic_get_lr(vcpu, i); |
720 | 701 | ||
721 | /* | 702 | /* |
@@ -736,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
736 | * interrupt then move the active state to the | 717 | * interrupt then move the active state to the |
737 | * distributor tracking bit. | 718 | * distributor tracking bit. |
738 | */ | 719 | */ |
739 | if (lr.state & LR_STATE_ACTIVE) { | 720 | if (lr.state & LR_STATE_ACTIVE) |
740 | vgic_irq_set_active(vcpu, lr.irq); | 721 | vgic_irq_set_active(vcpu, lr.irq); |
741 | lr.state &= ~LR_STATE_ACTIVE; | ||
742 | } | ||
743 | 722 | ||
744 | /* | 723 | /* |
745 | * Reestablish the pending state on the distributor and the | 724 | * Reestablish the pending state on the distributor and the |
746 | * CPU interface. It may have already been pending, but that | 725 | * CPU interface and mark the LR as free for other use. |
747 | * is fine, then we are only setting a few bits that were | ||
748 | * already set. | ||
749 | */ | 726 | */ |
750 | if (lr.state & LR_STATE_PENDING) { | 727 | vgic_retire_lr(i, vcpu); |
751 | vgic_dist_irq_set_pending(vcpu, lr.irq); | ||
752 | lr.state &= ~LR_STATE_PENDING; | ||
753 | } | ||
754 | |||
755 | vgic_set_lr(vcpu, i, lr); | ||
756 | |||
757 | /* | ||
758 | * Mark the LR as free for other use. | ||
759 | */ | ||
760 | BUG_ON(lr.state & LR_STATE_MASK); | ||
761 | vgic_retire_lr(i, lr.irq, vcpu); | ||
762 | vgic_irq_clear_queued(vcpu, lr.irq); | ||
763 | 728 | ||
764 | /* Finally update the VGIC state. */ | 729 | /* Finally update the VGIC state. */ |
765 | vgic_update_state(vcpu->kvm); | 730 | vgic_update_state(vcpu->kvm); |
@@ -1067,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
1067 | vgic_ops->set_lr(vcpu, lr, vlr); | 1032 | vgic_ops->set_lr(vcpu, lr, vlr); |
1068 | } | 1033 | } |
1069 | 1034 | ||
1070 | static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
1071 | struct vgic_lr vlr) | ||
1072 | { | ||
1073 | vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); | ||
1074 | } | ||
1075 | |||
1076 | static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) | 1035 | static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) |
1077 | { | 1036 | { |
1078 | return vgic_ops->get_elrsr(vcpu); | 1037 | return vgic_ops->get_elrsr(vcpu); |
@@ -1118,25 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) | |||
1118 | vgic_ops->enable(vcpu); | 1077 | vgic_ops->enable(vcpu); |
1119 | } | 1078 | } |
1120 | 1079 | ||
1121 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) | 1080 | static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) |
1122 | { | 1081 | { |
1123 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1124 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); | 1082 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); |
1125 | 1083 | ||
1084 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1085 | |||
1126 | /* | 1086 | /* |
1127 | * We must transfer the pending state back to the distributor before | 1087 | * We must transfer the pending state back to the distributor before |
1128 | * retiring the LR, otherwise we may loose edge-triggered interrupts. | 1088 | * retiring the LR, otherwise we may loose edge-triggered interrupts. |
1129 | */ | 1089 | */ |
1130 | if (vlr.state & LR_STATE_PENDING) { | 1090 | if (vlr.state & LR_STATE_PENDING) { |
1131 | vgic_dist_irq_set_pending(vcpu, irq); | 1091 | vgic_dist_irq_set_pending(vcpu, vlr.irq); |
1132 | vlr.hwirq = 0; | 1092 | vlr.hwirq = 0; |
1133 | } | 1093 | } |
1134 | 1094 | ||
1135 | vlr.state = 0; | 1095 | vlr.state = 0; |
1136 | vgic_set_lr(vcpu, lr_nr, vlr); | 1096 | vgic_set_lr(vcpu, lr_nr, vlr); |
1137 | clear_bit(lr_nr, vgic_cpu->lr_used); | ||
1138 | vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; | ||
1139 | vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); | ||
1140 | } | 1097 | } |
1141 | 1098 | ||
1142 | /* | 1099 | /* |
@@ -1150,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) | |||
1150 | */ | 1107 | */ |
1151 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | 1108 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) |
1152 | { | 1109 | { |
1153 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 1110 | u64 elrsr = vgic_get_elrsr(vcpu); |
1111 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1154 | int lr; | 1112 | int lr; |
1155 | 1113 | ||
1156 | for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { | 1114 | for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { |
1157 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1115 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1158 | 1116 | ||
1159 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { | 1117 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) |
1160 | vgic_retire_lr(lr, vlr.irq, vcpu); | 1118 | vgic_retire_lr(lr, vcpu); |
1161 | if (vgic_irq_is_queued(vcpu, vlr.irq)) | ||
1162 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1163 | } | ||
1164 | } | 1119 | } |
1165 | } | 1120 | } |
1166 | 1121 | ||
@@ -1200,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, | |||
1200 | } | 1155 | } |
1201 | 1156 | ||
1202 | vgic_set_lr(vcpu, lr_nr, vlr); | 1157 | vgic_set_lr(vcpu, lr_nr, vlr); |
1203 | vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); | ||
1204 | } | 1158 | } |
1205 | 1159 | ||
1206 | /* | 1160 | /* |
@@ -1210,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, | |||
1210 | */ | 1164 | */ |
1211 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | 1165 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) |
1212 | { | 1166 | { |
1213 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1214 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1167 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1168 | u64 elrsr = vgic_get_elrsr(vcpu); | ||
1169 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1215 | struct vgic_lr vlr; | 1170 | struct vgic_lr vlr; |
1216 | int lr; | 1171 | int lr; |
1217 | 1172 | ||
@@ -1222,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | |||
1222 | 1177 | ||
1223 | kvm_debug("Queue IRQ%d\n", irq); | 1178 | kvm_debug("Queue IRQ%d\n", irq); |
1224 | 1179 | ||
1225 | lr = vgic_cpu->vgic_irq_lr_map[irq]; | ||
1226 | |||
1227 | /* Do we have an active interrupt for the same CPUID? */ | 1180 | /* Do we have an active interrupt for the same CPUID? */ |
1228 | if (lr != LR_EMPTY) { | 1181 | for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { |
1229 | vlr = vgic_get_lr(vcpu, lr); | 1182 | vlr = vgic_get_lr(vcpu, lr); |
1230 | if (vlr.source == sgi_source_id) { | 1183 | if (vlr.irq == irq && vlr.source == sgi_source_id) { |
1231 | kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); | 1184 | kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); |
1232 | BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); | ||
1233 | vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); | 1185 | vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); |
1234 | return true; | 1186 | return true; |
1235 | } | 1187 | } |
1236 | } | 1188 | } |
1237 | 1189 | ||
1238 | /* Try to use another LR for this interrupt */ | 1190 | /* Try to use another LR for this interrupt */ |
1239 | lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, | 1191 | lr = find_first_bit(elrsr_ptr, vgic->nr_lr); |
1240 | vgic->nr_lr); | ||
1241 | if (lr >= vgic->nr_lr) | 1192 | if (lr >= vgic->nr_lr) |
1242 | return false; | 1193 | return false; |
1243 | 1194 | ||
1244 | kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); | 1195 | kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); |
1245 | vgic_cpu->vgic_irq_lr_map[irq] = lr; | ||
1246 | set_bit(lr, vgic_cpu->lr_used); | ||
1247 | 1196 | ||
1248 | vlr.irq = irq; | 1197 | vlr.irq = irq; |
1249 | vlr.source = sgi_source_id; | 1198 | vlr.source = sgi_source_id; |
@@ -1338,12 +1287,60 @@ epilog: | |||
1338 | } | 1287 | } |
1339 | } | 1288 | } |
1340 | 1289 | ||
1290 | static int process_queued_irq(struct kvm_vcpu *vcpu, | ||
1291 | int lr, struct vgic_lr vlr) | ||
1292 | { | ||
1293 | int pending = 0; | ||
1294 | |||
1295 | /* | ||
1296 | * If the IRQ was EOIed (called from vgic_process_maintenance) or it | ||
1297 | * went from active to non-active (called from vgic_sync_hwirq) it was | ||
1298 | * also ACKed and we we therefore assume we can clear the soft pending | ||
1299 | * state (should it had been set) for this interrupt. | ||
1300 | * | ||
1301 | * Note: if the IRQ soft pending state was set after the IRQ was | ||
1302 | * acked, it actually shouldn't be cleared, but we have no way of | ||
1303 | * knowing that unless we start trapping ACKs when the soft-pending | ||
1304 | * state is set. | ||
1305 | */ | ||
1306 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
1307 | |||
1308 | /* | ||
1309 | * Tell the gic to start sampling this interrupt again. | ||
1310 | */ | ||
1311 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1312 | |||
1313 | /* Any additional pending interrupt? */ | ||
1314 | if (vgic_irq_is_edge(vcpu, vlr.irq)) { | ||
1315 | BUG_ON(!(vlr.state & LR_HW)); | ||
1316 | pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); | ||
1317 | } else { | ||
1318 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { | ||
1319 | vgic_cpu_irq_set(vcpu, vlr.irq); | ||
1320 | pending = 1; | ||
1321 | } else { | ||
1322 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
1323 | vgic_cpu_irq_clear(vcpu, vlr.irq); | ||
1324 | } | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Despite being EOIed, the LR may not have | ||
1329 | * been marked as empty. | ||
1330 | */ | ||
1331 | vlr.state = 0; | ||
1332 | vlr.hwirq = 0; | ||
1333 | vgic_set_lr(vcpu, lr, vlr); | ||
1334 | |||
1335 | return pending; | ||
1336 | } | ||
1337 | |||
1341 | static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | 1338 | static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) |
1342 | { | 1339 | { |
1343 | u32 status = vgic_get_interrupt_status(vcpu); | 1340 | u32 status = vgic_get_interrupt_status(vcpu); |
1344 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1341 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1345 | bool level_pending = false; | ||
1346 | struct kvm *kvm = vcpu->kvm; | 1342 | struct kvm *kvm = vcpu->kvm; |
1343 | int level_pending = 0; | ||
1347 | 1344 | ||
1348 | kvm_debug("STATUS = %08x\n", status); | 1345 | kvm_debug("STATUS = %08x\n", status); |
1349 | 1346 | ||
@@ -1358,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
1358 | 1355 | ||
1359 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { | 1356 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { |
1360 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1357 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1361 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); | ||
1362 | 1358 | ||
1363 | spin_lock(&dist->lock); | 1359 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); |
1364 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1365 | WARN_ON(vlr.state & LR_STATE_MASK); | 1360 | WARN_ON(vlr.state & LR_STATE_MASK); |
1366 | vlr.state = 0; | ||
1367 | vgic_set_lr(vcpu, lr, vlr); | ||
1368 | 1361 | ||
1369 | /* | ||
1370 | * If the IRQ was EOIed it was also ACKed and we we | ||
1371 | * therefore assume we can clear the soft pending | ||
1372 | * state (should it had been set) for this interrupt. | ||
1373 | * | ||
1374 | * Note: if the IRQ soft pending state was set after | ||
1375 | * the IRQ was acked, it actually shouldn't be | ||
1376 | * cleared, but we have no way of knowing that unless | ||
1377 | * we start trapping ACKs when the soft-pending state | ||
1378 | * is set. | ||
1379 | */ | ||
1380 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
1381 | 1362 | ||
1382 | /* | 1363 | /* |
1383 | * kvm_notify_acked_irq calls kvm_set_irq() | 1364 | * kvm_notify_acked_irq calls kvm_set_irq() |
1384 | * to reset the IRQ level. Need to release the | 1365 | * to reset the IRQ level, which grabs the dist->lock |
1385 | * lock for kvm_set_irq to grab it. | 1366 | * so we call this before taking the dist->lock. |
1386 | */ | 1367 | */ |
1387 | spin_unlock(&dist->lock); | ||
1388 | |||
1389 | kvm_notify_acked_irq(kvm, 0, | 1368 | kvm_notify_acked_irq(kvm, 0, |
1390 | vlr.irq - VGIC_NR_PRIVATE_IRQS); | 1369 | vlr.irq - VGIC_NR_PRIVATE_IRQS); |
1391 | spin_lock(&dist->lock); | ||
1392 | |||
1393 | /* Any additional pending interrupt? */ | ||
1394 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { | ||
1395 | vgic_cpu_irq_set(vcpu, vlr.irq); | ||
1396 | level_pending = true; | ||
1397 | } else { | ||
1398 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
1399 | vgic_cpu_irq_clear(vcpu, vlr.irq); | ||
1400 | } | ||
1401 | 1370 | ||
1371 | spin_lock(&dist->lock); | ||
1372 | level_pending |= process_queued_irq(vcpu, lr, vlr); | ||
1402 | spin_unlock(&dist->lock); | 1373 | spin_unlock(&dist->lock); |
1403 | |||
1404 | /* | ||
1405 | * Despite being EOIed, the LR may not have | ||
1406 | * been marked as empty. | ||
1407 | */ | ||
1408 | vgic_sync_lr_elrsr(vcpu, lr, vlr); | ||
1409 | } | 1374 | } |
1410 | } | 1375 | } |
1411 | 1376 | ||
@@ -1426,35 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
1426 | /* | 1391 | /* |
1427 | * Save the physical active state, and reset it to inactive. | 1392 | * Save the physical active state, and reset it to inactive. |
1428 | * | 1393 | * |
1429 | * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. | 1394 | * Return true if there's a pending forwarded interrupt to queue. |
1430 | */ | 1395 | */ |
1431 | static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) | 1396 | static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) |
1432 | { | 1397 | { |
1398 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
1433 | struct irq_phys_map *map; | 1399 | struct irq_phys_map *map; |
1400 | bool phys_active; | ||
1401 | bool level_pending; | ||
1434 | int ret; | 1402 | int ret; |
1435 | 1403 | ||
1436 | if (!(vlr.state & LR_HW)) | 1404 | if (!(vlr.state & LR_HW)) |
1437 | return 0; | 1405 | return false; |
1438 | 1406 | ||
1439 | map = vgic_irq_map_search(vcpu, vlr.irq); | 1407 | map = vgic_irq_map_search(vcpu, vlr.irq); |
1440 | BUG_ON(!map); | 1408 | BUG_ON(!map); |
1441 | 1409 | ||
1442 | ret = irq_get_irqchip_state(map->irq, | 1410 | ret = irq_get_irqchip_state(map->irq, |
1443 | IRQCHIP_STATE_ACTIVE, | 1411 | IRQCHIP_STATE_ACTIVE, |
1444 | &map->active); | 1412 | &phys_active); |
1445 | 1413 | ||
1446 | WARN_ON(ret); | 1414 | WARN_ON(ret); |
1447 | 1415 | ||
1448 | if (map->active) | 1416 | if (phys_active) |
1449 | return 0; | 1417 | return 0; |
1450 | 1418 | ||
1451 | return 1; | 1419 | spin_lock(&dist->lock); |
1420 | level_pending = process_queued_irq(vcpu, lr, vlr); | ||
1421 | spin_unlock(&dist->lock); | ||
1422 | return level_pending; | ||
1452 | } | 1423 | } |
1453 | 1424 | ||
1454 | /* Sync back the VGIC state after a guest run */ | 1425 | /* Sync back the VGIC state after a guest run */ |
1455 | static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | 1426 | static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) |
1456 | { | 1427 | { |
1457 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1458 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1428 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1459 | u64 elrsr; | 1429 | u64 elrsr; |
1460 | unsigned long *elrsr_ptr; | 1430 | unsigned long *elrsr_ptr; |
@@ -1462,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | |||
1462 | bool level_pending; | 1432 | bool level_pending; |
1463 | 1433 | ||
1464 | level_pending = vgic_process_maintenance(vcpu); | 1434 | level_pending = vgic_process_maintenance(vcpu); |
1465 | elrsr = vgic_get_elrsr(vcpu); | ||
1466 | elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1467 | 1435 | ||
1468 | /* Deal with HW interrupts, and clear mappings for empty LRs */ | 1436 | /* Deal with HW interrupts, and clear mappings for empty LRs */ |
1469 | for (lr = 0; lr < vgic->nr_lr; lr++) { | 1437 | for (lr = 0; lr < vgic->nr_lr; lr++) { |
1470 | struct vgic_lr vlr; | 1438 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1471 | |||
1472 | if (!test_bit(lr, vgic_cpu->lr_used)) | ||
1473 | continue; | ||
1474 | |||
1475 | vlr = vgic_get_lr(vcpu, lr); | ||
1476 | if (vgic_sync_hwirq(vcpu, vlr)) { | ||
1477 | /* | ||
1478 | * So this is a HW interrupt that the guest | ||
1479 | * EOI-ed. Clean the LR state and allow the | ||
1480 | * interrupt to be sampled again. | ||
1481 | */ | ||
1482 | vlr.state = 0; | ||
1483 | vlr.hwirq = 0; | ||
1484 | vgic_set_lr(vcpu, lr, vlr); | ||
1485 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1486 | set_bit(lr, elrsr_ptr); | ||
1487 | } | ||
1488 | |||
1489 | if (!test_bit(lr, elrsr_ptr)) | ||
1490 | continue; | ||
1491 | |||
1492 | clear_bit(lr, vgic_cpu->lr_used); | ||
1493 | 1439 | ||
1440 | level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); | ||
1494 | BUG_ON(vlr.irq >= dist->nr_irqs); | 1441 | BUG_ON(vlr.irq >= dist->nr_irqs); |
1495 | vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; | ||
1496 | } | 1442 | } |
1497 | 1443 | ||
1498 | /* Check if we still have something up our sleeve... */ | 1444 | /* Check if we still have something up our sleeve... */ |
1445 | elrsr = vgic_get_elrsr(vcpu); | ||
1446 | elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1499 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); | 1447 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); |
1500 | if (level_pending || pending < vgic->nr_lr) | 1448 | if (level_pending || pending < vgic->nr_lr) |
1501 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); | 1449 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
@@ -1585,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
1585 | int enabled; | 1533 | int enabled; |
1586 | bool ret = true, can_inject = true; | 1534 | bool ret = true, can_inject = true; |
1587 | 1535 | ||
1536 | trace_vgic_update_irq_pending(cpuid, irq_num, level); | ||
1537 | |||
1588 | if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) | 1538 | if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) |
1589 | return -EINVAL; | 1539 | return -EINVAL; |
1590 | 1540 | ||
@@ -1864,30 +1814,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) | |||
1864 | } | 1814 | } |
1865 | 1815 | ||
1866 | /** | 1816 | /** |
1867 | * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ | ||
1868 | * | ||
1869 | * Return the logical active state of a mapped interrupt. This doesn't | ||
1870 | * necessarily reflects the current HW state. | ||
1871 | */ | ||
1872 | bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) | ||
1873 | { | ||
1874 | BUG_ON(!map); | ||
1875 | return map->active; | ||
1876 | } | ||
1877 | |||
1878 | /** | ||
1879 | * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ | ||
1880 | * | ||
1881 | * Set the logical active state of a mapped interrupt. This doesn't | ||
1882 | * immediately affects the HW state. | ||
1883 | */ | ||
1884 | void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) | ||
1885 | { | ||
1886 | BUG_ON(!map); | ||
1887 | map->active = active; | ||
1888 | } | ||
1889 | |||
1890 | /** | ||
1891 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping | 1817 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping |
1892 | * @vcpu: The VCPU pointer | 1818 | * @vcpu: The VCPU pointer |
1893 | * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq | 1819 | * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq |
@@ -1942,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
1942 | kfree(vgic_cpu->pending_shared); | 1868 | kfree(vgic_cpu->pending_shared); |
1943 | kfree(vgic_cpu->active_shared); | 1869 | kfree(vgic_cpu->active_shared); |
1944 | kfree(vgic_cpu->pend_act_shared); | 1870 | kfree(vgic_cpu->pend_act_shared); |
1945 | kfree(vgic_cpu->vgic_irq_lr_map); | ||
1946 | vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); | 1871 | vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); |
1947 | vgic_cpu->pending_shared = NULL; | 1872 | vgic_cpu->pending_shared = NULL; |
1948 | vgic_cpu->active_shared = NULL; | 1873 | vgic_cpu->active_shared = NULL; |
1949 | vgic_cpu->pend_act_shared = NULL; | 1874 | vgic_cpu->pend_act_shared = NULL; |
1950 | vgic_cpu->vgic_irq_lr_map = NULL; | ||
1951 | } | 1875 | } |
1952 | 1876 | ||
1953 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | 1877 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) |
@@ -1958,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | |||
1958 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); | 1882 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); |
1959 | vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); | 1883 | vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); |
1960 | vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); | 1884 | vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); |
1961 | vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); | ||
1962 | 1885 | ||
1963 | if (!vgic_cpu->pending_shared | 1886 | if (!vgic_cpu->pending_shared |
1964 | || !vgic_cpu->active_shared | 1887 | || !vgic_cpu->active_shared |
1965 | || !vgic_cpu->pend_act_shared | 1888 | || !vgic_cpu->pend_act_shared) { |
1966 | || !vgic_cpu->vgic_irq_lr_map) { | ||
1967 | kvm_vgic_vcpu_destroy(vcpu); | 1889 | kvm_vgic_vcpu_destroy(vcpu); |
1968 | return -ENOMEM; | 1890 | return -ENOMEM; |
1969 | } | 1891 | } |
1970 | 1892 | ||
1971 | memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); | ||
1972 | |||
1973 | /* | 1893 | /* |
1974 | * Store the number of LRs per vcpu, so we don't have to go | 1894 | * Store the number of LRs per vcpu, so we don't have to go |
1975 | * all the way to the distributor structure to find out. Only | 1895 | * all the way to the distributor structure to find out. Only |
@@ -2111,14 +2031,24 @@ int vgic_init(struct kvm *kvm) | |||
2111 | break; | 2031 | break; |
2112 | } | 2032 | } |
2113 | 2033 | ||
2114 | for (i = 0; i < dist->nr_irqs; i++) { | 2034 | /* |
2115 | if (i < VGIC_NR_PPIS) | 2035 | * Enable and configure all SGIs to be edge-triggere and |
2036 | * configure all PPIs as level-triggered. | ||
2037 | */ | ||
2038 | for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { | ||
2039 | if (i < VGIC_NR_SGIS) { | ||
2040 | /* SGIs */ | ||
2116 | vgic_bitmap_set_irq_val(&dist->irq_enabled, | 2041 | vgic_bitmap_set_irq_val(&dist->irq_enabled, |
2117 | vcpu->vcpu_id, i, 1); | 2042 | vcpu->vcpu_id, i, 1); |
2118 | if (i < VGIC_NR_PRIVATE_IRQS) | ||
2119 | vgic_bitmap_set_irq_val(&dist->irq_cfg, | 2043 | vgic_bitmap_set_irq_val(&dist->irq_cfg, |
2120 | vcpu->vcpu_id, i, | 2044 | vcpu->vcpu_id, i, |
2121 | VGIC_CFG_EDGE); | 2045 | VGIC_CFG_EDGE); |
2046 | } else if (i < VGIC_NR_PRIVATE_IRQS) { | ||
2047 | /* PPIs */ | ||
2048 | vgic_bitmap_set_irq_val(&dist->irq_cfg, | ||
2049 | vcpu->vcpu_id, i, | ||
2050 | VGIC_CFG_LEVEL); | ||
2051 | } | ||
2122 | } | 2052 | } |
2123 | 2053 | ||
2124 | vgic_enable(vcpu); | 2054 | vgic_enable(vcpu); |
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 44660aee335f..77d42be6970e 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work) | |||
94 | 94 | ||
95 | trace_kvm_async_pf_completed(addr, gva); | 95 | trace_kvm_async_pf_completed(addr, gva); |
96 | 96 | ||
97 | /* | ||
98 | * This memory barrier pairs with prepare_to_wait's set_current_state() | ||
99 | */ | ||
100 | smp_mb(); | ||
97 | if (waitqueue_active(&vcpu->wq)) | 101 | if (waitqueue_active(&vcpu->wq)) |
98 | wake_up_interruptible(&vcpu->wq); | 102 | wake_up_interruptible(&vcpu->wq); |
99 | 103 | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 79db45336e3a..46dbc0a7dfc1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
25 | #include <linux/kvm.h> | 25 | #include <linux/kvm.h> |
26 | #include <linux/kvm_irqfd.h> | ||
26 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
27 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
28 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
@@ -34,73 +35,20 @@ | |||
34 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
36 | #include <linux/seqlock.h> | 37 | #include <linux/seqlock.h> |
38 | #include <linux/irqbypass.h> | ||
37 | #include <trace/events/kvm.h> | 39 | #include <trace/events/kvm.h> |
38 | 40 | ||
39 | #include <kvm/iodev.h> | 41 | #include <kvm/iodev.h> |
40 | 42 | ||
41 | #ifdef CONFIG_HAVE_KVM_IRQFD | 43 | #ifdef CONFIG_HAVE_KVM_IRQFD |
42 | /* | ||
43 | * -------------------------------------------------------------------- | ||
44 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | ||
45 | * | ||
46 | * Credit goes to Avi Kivity for the original idea. | ||
47 | * -------------------------------------------------------------------- | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * Resampling irqfds are a special variety of irqfds used to emulate | ||
52 | * level triggered interrupts. The interrupt is asserted on eventfd | ||
53 | * trigger. On acknowledgement through the irq ack notifier, the | ||
54 | * interrupt is de-asserted and userspace is notified through the | ||
55 | * resamplefd. All resamplers on the same gsi are de-asserted | ||
56 | * together, so we don't need to track the state of each individual | ||
57 | * user. We can also therefore share the same irq source ID. | ||
58 | */ | ||
59 | struct _irqfd_resampler { | ||
60 | struct kvm *kvm; | ||
61 | /* | ||
62 | * List of resampling struct _irqfd objects sharing this gsi. | ||
63 | * RCU list modified under kvm->irqfds.resampler_lock | ||
64 | */ | ||
65 | struct list_head list; | ||
66 | struct kvm_irq_ack_notifier notifier; | ||
67 | /* | ||
68 | * Entry in list of kvm->irqfd.resampler_list. Use for sharing | ||
69 | * resamplers among irqfds on the same gsi. | ||
70 | * Accessed and modified under kvm->irqfds.resampler_lock | ||
71 | */ | ||
72 | struct list_head link; | ||
73 | }; | ||
74 | |||
75 | struct _irqfd { | ||
76 | /* Used for MSI fast-path */ | ||
77 | struct kvm *kvm; | ||
78 | wait_queue_t wait; | ||
79 | /* Update side is protected by irqfds.lock */ | ||
80 | struct kvm_kernel_irq_routing_entry irq_entry; | ||
81 | seqcount_t irq_entry_sc; | ||
82 | /* Used for level IRQ fast-path */ | ||
83 | int gsi; | ||
84 | struct work_struct inject; | ||
85 | /* The resampler used by this irqfd (resampler-only) */ | ||
86 | struct _irqfd_resampler *resampler; | ||
87 | /* Eventfd notified on resample (resampler-only) */ | ||
88 | struct eventfd_ctx *resamplefd; | ||
89 | /* Entry in list of irqfds for a resampler (resampler-only) */ | ||
90 | struct list_head resampler_link; | ||
91 | /* Used for setup/shutdown */ | ||
92 | struct eventfd_ctx *eventfd; | ||
93 | struct list_head list; | ||
94 | poll_table pt; | ||
95 | struct work_struct shutdown; | ||
96 | }; | ||
97 | 44 | ||
98 | static struct workqueue_struct *irqfd_cleanup_wq; | 45 | static struct workqueue_struct *irqfd_cleanup_wq; |
99 | 46 | ||
100 | static void | 47 | static void |
101 | irqfd_inject(struct work_struct *work) | 48 | irqfd_inject(struct work_struct *work) |
102 | { | 49 | { |
103 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 50 | struct kvm_kernel_irqfd *irqfd = |
51 | container_of(work, struct kvm_kernel_irqfd, inject); | ||
104 | struct kvm *kvm = irqfd->kvm; | 52 | struct kvm *kvm = irqfd->kvm; |
105 | 53 | ||
106 | if (!irqfd->resampler) { | 54 | if (!irqfd->resampler) { |
@@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work) | |||
121 | static void | 69 | static void |
122 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | 70 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) |
123 | { | 71 | { |
124 | struct _irqfd_resampler *resampler; | 72 | struct kvm_kernel_irqfd_resampler *resampler; |
125 | struct kvm *kvm; | 73 | struct kvm *kvm; |
126 | struct _irqfd *irqfd; | 74 | struct kvm_kernel_irqfd *irqfd; |
127 | int idx; | 75 | int idx; |
128 | 76 | ||
129 | resampler = container_of(kian, struct _irqfd_resampler, notifier); | 77 | resampler = container_of(kian, |
78 | struct kvm_kernel_irqfd_resampler, notifier); | ||
130 | kvm = resampler->kvm; | 79 | kvm = resampler->kvm; |
131 | 80 | ||
132 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | 81 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, |
@@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | |||
141 | } | 90 | } |
142 | 91 | ||
143 | static void | 92 | static void |
144 | irqfd_resampler_shutdown(struct _irqfd *irqfd) | 93 | irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) |
145 | { | 94 | { |
146 | struct _irqfd_resampler *resampler = irqfd->resampler; | 95 | struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; |
147 | struct kvm *kvm = resampler->kvm; | 96 | struct kvm *kvm = resampler->kvm; |
148 | 97 | ||
149 | mutex_lock(&kvm->irqfds.resampler_lock); | 98 | mutex_lock(&kvm->irqfds.resampler_lock); |
@@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) | |||
168 | static void | 117 | static void |
169 | irqfd_shutdown(struct work_struct *work) | 118 | irqfd_shutdown(struct work_struct *work) |
170 | { | 119 | { |
171 | struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); | 120 | struct kvm_kernel_irqfd *irqfd = |
121 | container_of(work, struct kvm_kernel_irqfd, shutdown); | ||
172 | u64 cnt; | 122 | u64 cnt; |
173 | 123 | ||
174 | /* | 124 | /* |
@@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work) | |||
191 | /* | 141 | /* |
192 | * It is now safe to release the object's resources | 142 | * It is now safe to release the object's resources |
193 | */ | 143 | */ |
144 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
145 | irq_bypass_unregister_consumer(&irqfd->consumer); | ||
146 | #endif | ||
194 | eventfd_ctx_put(irqfd->eventfd); | 147 | eventfd_ctx_put(irqfd->eventfd); |
195 | kfree(irqfd); | 148 | kfree(irqfd); |
196 | } | 149 | } |
@@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work) | |||
198 | 151 | ||
199 | /* assumes kvm->irqfds.lock is held */ | 152 | /* assumes kvm->irqfds.lock is held */ |
200 | static bool | 153 | static bool |
201 | irqfd_is_active(struct _irqfd *irqfd) | 154 | irqfd_is_active(struct kvm_kernel_irqfd *irqfd) |
202 | { | 155 | { |
203 | return list_empty(&irqfd->list) ? false : true; | 156 | return list_empty(&irqfd->list) ? false : true; |
204 | } | 157 | } |
@@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd) | |||
209 | * assumes kvm->irqfds.lock is held | 162 | * assumes kvm->irqfds.lock is held |
210 | */ | 163 | */ |
211 | static void | 164 | static void |
212 | irqfd_deactivate(struct _irqfd *irqfd) | 165 | irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) |
213 | { | 166 | { |
214 | BUG_ON(!irqfd_is_active(irqfd)); | 167 | BUG_ON(!irqfd_is_active(irqfd)); |
215 | 168 | ||
@@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd) | |||
218 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); | 171 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); |
219 | } | 172 | } |
220 | 173 | ||
174 | int __attribute__((weak)) kvm_arch_set_irq_inatomic( | ||
175 | struct kvm_kernel_irq_routing_entry *irq, | ||
176 | struct kvm *kvm, int irq_source_id, | ||
177 | int level, | ||
178 | bool line_status) | ||
179 | { | ||
180 | return -EWOULDBLOCK; | ||
181 | } | ||
182 | |||
221 | /* | 183 | /* |
222 | * Called with wqh->lock held and interrupts disabled | 184 | * Called with wqh->lock held and interrupts disabled |
223 | */ | 185 | */ |
224 | static int | 186 | static int |
225 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | 187 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) |
226 | { | 188 | { |
227 | struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); | 189 | struct kvm_kernel_irqfd *irqfd = |
190 | container_of(wait, struct kvm_kernel_irqfd, wait); | ||
228 | unsigned long flags = (unsigned long)key; | 191 | unsigned long flags = (unsigned long)key; |
229 | struct kvm_kernel_irq_routing_entry irq; | 192 | struct kvm_kernel_irq_routing_entry irq; |
230 | struct kvm *kvm = irqfd->kvm; | 193 | struct kvm *kvm = irqfd->kvm; |
@@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | |||
238 | irq = irqfd->irq_entry; | 201 | irq = irqfd->irq_entry; |
239 | } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); | 202 | } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); |
240 | /* An event has been signaled, inject an interrupt */ | 203 | /* An event has been signaled, inject an interrupt */ |
241 | if (irq.type == KVM_IRQ_ROUTING_MSI) | 204 | if (kvm_arch_set_irq_inatomic(&irq, kvm, |
242 | kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, | 205 | KVM_USERSPACE_IRQ_SOURCE_ID, 1, |
243 | false); | 206 | false) == -EWOULDBLOCK) |
244 | else | ||
245 | schedule_work(&irqfd->inject); | 207 | schedule_work(&irqfd->inject); |
246 | srcu_read_unlock(&kvm->irq_srcu, idx); | 208 | srcu_read_unlock(&kvm->irq_srcu, idx); |
247 | } | 209 | } |
@@ -274,37 +236,54 @@ static void | |||
274 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | 236 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, |
275 | poll_table *pt) | 237 | poll_table *pt) |
276 | { | 238 | { |
277 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | 239 | struct kvm_kernel_irqfd *irqfd = |
240 | container_of(pt, struct kvm_kernel_irqfd, pt); | ||
278 | add_wait_queue(wqh, &irqfd->wait); | 241 | add_wait_queue(wqh, &irqfd->wait); |
279 | } | 242 | } |
280 | 243 | ||
281 | /* Must be called under irqfds.lock */ | 244 | /* Must be called under irqfds.lock */ |
282 | static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) | 245 | static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) |
283 | { | 246 | { |
284 | struct kvm_kernel_irq_routing_entry *e; | 247 | struct kvm_kernel_irq_routing_entry *e; |
285 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | 248 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; |
286 | int i, n_entries; | 249 | int n_entries; |
287 | 250 | ||
288 | n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); | 251 | n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); |
289 | 252 | ||
290 | write_seqcount_begin(&irqfd->irq_entry_sc); | 253 | write_seqcount_begin(&irqfd->irq_entry_sc); |
291 | 254 | ||
292 | irqfd->irq_entry.type = 0; | ||
293 | |||
294 | e = entries; | 255 | e = entries; |
295 | for (i = 0; i < n_entries; ++i, ++e) { | 256 | if (n_entries == 1) |
296 | /* Only fast-path MSI. */ | 257 | irqfd->irq_entry = *e; |
297 | if (e->type == KVM_IRQ_ROUTING_MSI) | 258 | else |
298 | irqfd->irq_entry = *e; | 259 | irqfd->irq_entry.type = 0; |
299 | } | ||
300 | 260 | ||
301 | write_seqcount_end(&irqfd->irq_entry_sc); | 261 | write_seqcount_end(&irqfd->irq_entry_sc); |
302 | } | 262 | } |
303 | 263 | ||
264 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
265 | void __attribute__((weak)) kvm_arch_irq_bypass_stop( | ||
266 | struct irq_bypass_consumer *cons) | ||
267 | { | ||
268 | } | ||
269 | |||
270 | void __attribute__((weak)) kvm_arch_irq_bypass_start( | ||
271 | struct irq_bypass_consumer *cons) | ||
272 | { | ||
273 | } | ||
274 | |||
275 | int __attribute__((weak)) kvm_arch_update_irqfd_routing( | ||
276 | struct kvm *kvm, unsigned int host_irq, | ||
277 | uint32_t guest_irq, bool set) | ||
278 | { | ||
279 | return 0; | ||
280 | } | ||
281 | #endif | ||
282 | |||
304 | static int | 283 | static int |
305 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | 284 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) |
306 | { | 285 | { |
307 | struct _irqfd *irqfd, *tmp; | 286 | struct kvm_kernel_irqfd *irqfd, *tmp; |
308 | struct fd f; | 287 | struct fd f; |
309 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; | 288 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; |
310 | int ret; | 289 | int ret; |
@@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
340 | irqfd->eventfd = eventfd; | 319 | irqfd->eventfd = eventfd; |
341 | 320 | ||
342 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { | 321 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { |
343 | struct _irqfd_resampler *resampler; | 322 | struct kvm_kernel_irqfd_resampler *resampler; |
344 | 323 | ||
345 | resamplefd = eventfd_ctx_fdget(args->resamplefd); | 324 | resamplefd = eventfd_ctx_fdget(args->resamplefd); |
346 | if (IS_ERR(resamplefd)) { | 325 | if (IS_ERR(resamplefd)) { |
@@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
428 | * we might race against the POLLHUP | 407 | * we might race against the POLLHUP |
429 | */ | 408 | */ |
430 | fdput(f); | 409 | fdput(f); |
410 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
411 | irqfd->consumer.token = (void *)irqfd->eventfd; | ||
412 | irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; | ||
413 | irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; | ||
414 | irqfd->consumer.stop = kvm_arch_irq_bypass_stop; | ||
415 | irqfd->consumer.start = kvm_arch_irq_bypass_start; | ||
416 | ret = irq_bypass_register_consumer(&irqfd->consumer); | ||
417 | if (ret) | ||
418 | pr_info("irq bypass consumer (token %p) registration fails: %d\n", | ||
419 | irqfd->consumer.token, ret); | ||
420 | #endif | ||
431 | 421 | ||
432 | return 0; | 422 | return 0; |
433 | 423 | ||
@@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
469 | } | 459 | } |
470 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); | 460 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); |
471 | 461 | ||
472 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 462 | void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) |
473 | { | 463 | { |
474 | struct kvm_irq_ack_notifier *kian; | 464 | struct kvm_irq_ack_notifier *kian; |
465 | |||
466 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | ||
467 | link) | ||
468 | if (kian->gsi == gsi) | ||
469 | kian->irq_acked(kian); | ||
470 | } | ||
471 | |||
472 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
473 | { | ||
475 | int gsi, idx; | 474 | int gsi, idx; |
476 | 475 | ||
477 | trace_kvm_ack_irq(irqchip, pin); | 476 | trace_kvm_ack_irq(irqchip, pin); |
@@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
479 | idx = srcu_read_lock(&kvm->irq_srcu); | 478 | idx = srcu_read_lock(&kvm->irq_srcu); |
480 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); | 479 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); |
481 | if (gsi != -1) | 480 | if (gsi != -1) |
482 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | 481 | kvm_notify_acked_gsi(kvm, gsi); |
483 | link) | ||
484 | if (kian->gsi == gsi) | ||
485 | kian->irq_acked(kian); | ||
486 | srcu_read_unlock(&kvm->irq_srcu, idx); | 482 | srcu_read_unlock(&kvm->irq_srcu, idx); |
487 | } | 483 | } |
488 | 484 | ||
@@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm) | |||
525 | static int | 521 | static int |
526 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) | 522 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) |
527 | { | 523 | { |
528 | struct _irqfd *irqfd, *tmp; | 524 | struct kvm_kernel_irqfd *irqfd, *tmp; |
529 | struct eventfd_ctx *eventfd; | 525 | struct eventfd_ctx *eventfd; |
530 | 526 | ||
531 | eventfd = eventfd_ctx_fdget(args->fd); | 527 | eventfd = eventfd_ctx_fdget(args->fd); |
@@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) | |||
581 | void | 577 | void |
582 | kvm_irqfd_release(struct kvm *kvm) | 578 | kvm_irqfd_release(struct kvm *kvm) |
583 | { | 579 | { |
584 | struct _irqfd *irqfd, *tmp; | 580 | struct kvm_kernel_irqfd *irqfd, *tmp; |
585 | 581 | ||
586 | spin_lock_irq(&kvm->irqfds.lock); | 582 | spin_lock_irq(&kvm->irqfds.lock); |
587 | 583 | ||
@@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm) | |||
604 | */ | 600 | */ |
605 | void kvm_irq_routing_update(struct kvm *kvm) | 601 | void kvm_irq_routing_update(struct kvm *kvm) |
606 | { | 602 | { |
607 | struct _irqfd *irqfd; | 603 | struct kvm_kernel_irqfd *irqfd; |
608 | 604 | ||
609 | spin_lock_irq(&kvm->irqfds.lock); | 605 | spin_lock_irq(&kvm->irqfds.lock); |
610 | 606 | ||
611 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) | 607 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) { |
612 | irqfd_update(kvm, irqfd); | 608 | irqfd_update(kvm, irqfd); |
613 | 609 | ||
610 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
611 | if (irqfd->producer) { | ||
612 | int ret = kvm_arch_update_irqfd_routing( | ||
613 | irqfd->kvm, irqfd->producer->irq, | ||
614 | irqfd->gsi, 1); | ||
615 | WARN_ON(ret); | ||
616 | } | ||
617 | #endif | ||
618 | } | ||
619 | |||
614 | spin_unlock_irq(&kvm->irqfds.lock); | 620 | spin_unlock_irq(&kvm->irqfds.lock); |
615 | } | 621 | } |
616 | 622 | ||
@@ -914,9 +920,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
914 | return -EINVAL; | 920 | return -EINVAL; |
915 | 921 | ||
916 | /* ioeventfd with no length can't be combined with DATAMATCH */ | 922 | /* ioeventfd with no length can't be combined with DATAMATCH */ |
917 | if (!args->len && | 923 | if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) |
918 | args->flags & (KVM_IOEVENTFD_FLAG_PIO | | ||
919 | KVM_IOEVENTFD_FLAG_DATAMATCH)) | ||
920 | return -EINVAL; | 924 | return -EINVAL; |
921 | 925 | ||
922 | ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); | 926 | ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index d7ea8e20dae4..f0b08a2a48ba 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
@@ -31,16 +31,6 @@ | |||
31 | #include <trace/events/kvm.h> | 31 | #include <trace/events/kvm.h> |
32 | #include "irq.h" | 32 | #include "irq.h" |
33 | 33 | ||
34 | struct kvm_irq_routing_table { | ||
35 | int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; | ||
36 | u32 nr_rt_entries; | ||
37 | /* | ||
38 | * Array indexed by gsi. Each entry contains list of irq chips | ||
39 | * the gsi is connected to. | ||
40 | */ | ||
41 | struct hlist_head map[0]; | ||
42 | }; | ||
43 | |||
44 | int kvm_irq_map_gsi(struct kvm *kvm, | 34 | int kvm_irq_map_gsi(struct kvm *kvm, |
45 | struct kvm_kernel_irq_routing_entry *entries, int gsi) | 35 | struct kvm_kernel_irq_routing_entry *entries, int gsi) |
46 | { | 36 | { |
@@ -154,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
154 | 144 | ||
155 | /* | 145 | /* |
156 | * Do not allow GSI to be mapped to the same irqchip more than once. | 146 | * Do not allow GSI to be mapped to the same irqchip more than once. |
157 | * Allow only one to one mapping between GSI and MSI. | 147 | * Allow only one to one mapping between GSI and non-irqchip routing. |
158 | */ | 148 | */ |
159 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) | 149 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) |
160 | if (ei->type == KVM_IRQ_ROUTING_MSI || | 150 | if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || |
161 | ue->type == KVM_IRQ_ROUTING_MSI || | 151 | ue->type != KVM_IRQ_ROUTING_IRQCHIP || |
162 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | 152 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) |
163 | return r; | 153 | return r; |
164 | 154 | ||
@@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
231 | kvm_irq_routing_update(kvm); | 221 | kvm_irq_routing_update(kvm); |
232 | mutex_unlock(&kvm->irq_lock); | 222 | mutex_unlock(&kvm->irq_lock); |
233 | 223 | ||
224 | kvm_arch_irq_routing_update(kvm); | ||
225 | |||
234 | synchronize_srcu_expedited(&kvm->irq_srcu); | 226 | synchronize_srcu_expedited(&kvm->irq_srcu); |
235 | 227 | ||
236 | new = old; | 228 | new = old; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..484079efea5b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
230 | init_waitqueue_head(&vcpu->wq); | 230 | init_waitqueue_head(&vcpu->wq); |
231 | kvm_async_pf_vcpu_init(vcpu); | 231 | kvm_async_pf_vcpu_init(vcpu); |
232 | 232 | ||
233 | vcpu->pre_pcpu = -1; | ||
234 | INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); | ||
235 | |||
233 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 236 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
234 | if (!page) { | 237 | if (!page) { |
235 | r = -ENOMEM; | 238 | r = -ENOMEM; |
@@ -2018,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
2018 | } while (single_task_running() && ktime_before(cur, stop)); | 2021 | } while (single_task_running() && ktime_before(cur, stop)); |
2019 | } | 2022 | } |
2020 | 2023 | ||
2024 | kvm_arch_vcpu_blocking(vcpu); | ||
2025 | |||
2021 | for (;;) { | 2026 | for (;;) { |
2022 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 2027 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
2023 | 2028 | ||
@@ -2031,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
2031 | finish_wait(&vcpu->wq, &wait); | 2036 | finish_wait(&vcpu->wq, &wait); |
2032 | cur = ktime_get(); | 2037 | cur = ktime_get(); |
2033 | 2038 | ||
2039 | kvm_arch_vcpu_unblocking(vcpu); | ||
2034 | out: | 2040 | out: |
2035 | block_ns = ktime_to_ns(cur) - ktime_to_ns(start); | 2041 | block_ns = ktime_to_ns(cur) - ktime_to_ns(start); |
2036 | 2042 | ||
@@ -2718,6 +2724,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) | |||
2718 | case KVM_CAP_IRQFD: | 2724 | case KVM_CAP_IRQFD: |
2719 | case KVM_CAP_IRQFD_RESAMPLE: | 2725 | case KVM_CAP_IRQFD_RESAMPLE: |
2720 | #endif | 2726 | #endif |
2727 | case KVM_CAP_IOEVENTFD_ANY_LENGTH: | ||
2721 | case KVM_CAP_CHECK_EXTENSION_VM: | 2728 | case KVM_CAP_CHECK_EXTENSION_VM: |
2722 | return 1; | 2729 | return 1; |
2723 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | 2730 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
@@ -3341,7 +3348,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
3341 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | 3348 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
3342 | return -ENOSPC; | 3349 | return -ENOSPC; |
3343 | 3350 | ||
3344 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 3351 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * |
3345 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3352 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
3346 | if (!new_bus) | 3353 | if (!new_bus) |
3347 | return -ENOMEM; | 3354 | return -ENOMEM; |
@@ -3373,7 +3380,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
3373 | if (r) | 3380 | if (r) |
3374 | return r; | 3381 | return r; |
3375 | 3382 | ||
3376 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * | 3383 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * |
3377 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3384 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
3378 | if (!new_bus) | 3385 | if (!new_bus) |
3379 | return -ENOMEM; | 3386 | return -ENOMEM; |
diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig new file mode 100644 index 000000000000..89a414f815d2 --- /dev/null +++ b/virt/lib/Kconfig | |||
@@ -0,0 +1,2 @@ | |||
1 | config IRQ_BYPASS_MANAGER | ||
2 | tristate | ||
diff --git a/virt/lib/Makefile b/virt/lib/Makefile new file mode 100644 index 000000000000..901228d1ffbc --- /dev/null +++ b/virt/lib/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o | |||
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c new file mode 100644 index 000000000000..09a03b5a21ff --- /dev/null +++ b/virt/lib/irqbypass.c | |||
@@ -0,0 +1,257 @@ | |||
1 | /* | ||
2 | * IRQ offload/bypass manager | ||
3 | * | ||
4 | * Copyright (C) 2015 Red Hat, Inc. | ||
5 | * Copyright (c) 2015 Linaro Ltd. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * Various virtualization hardware acceleration techniques allow bypassing or | ||
12 | * offloading interrupts received from devices around the host kernel. Posted | ||
13 | * Interrupts on Intel VT-d systems can allow interrupts to be received | ||
14 | * directly by a virtual machine. ARM IRQ Forwarding allows forwarded physical | ||
15 | * interrupts to be directly deactivated by the guest. This manager allows | ||
16 | * interrupt producers and consumers to find each other to enable this sort of | ||
17 | * bypass. | ||
18 | */ | ||
19 | |||
20 | #include <linux/irqbypass.h> | ||
21 | #include <linux/list.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/mutex.h> | ||
24 | |||
25 | MODULE_LICENSE("GPL v2"); | ||
26 | MODULE_DESCRIPTION("IRQ bypass manager utility module"); | ||
27 | |||
28 | static LIST_HEAD(producers); | ||
29 | static LIST_HEAD(consumers); | ||
30 | static DEFINE_MUTEX(lock); | ||
31 | |||
32 | /* @lock must be held when calling connect */ | ||
33 | static int __connect(struct irq_bypass_producer *prod, | ||
34 | struct irq_bypass_consumer *cons) | ||
35 | { | ||
36 | int ret = 0; | ||
37 | |||
38 | if (prod->stop) | ||
39 | prod->stop(prod); | ||
40 | if (cons->stop) | ||
41 | cons->stop(cons); | ||
42 | |||
43 | if (prod->add_consumer) | ||
44 | ret = prod->add_consumer(prod, cons); | ||
45 | |||
46 | if (!ret) { | ||
47 | ret = cons->add_producer(cons, prod); | ||
48 | if (ret && prod->del_consumer) | ||
49 | prod->del_consumer(prod, cons); | ||
50 | } | ||
51 | |||
52 | if (cons->start) | ||
53 | cons->start(cons); | ||
54 | if (prod->start) | ||
55 | prod->start(prod); | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | /* @lock must be held when calling disconnect */ | ||
61 | static void __disconnect(struct irq_bypass_producer *prod, | ||
62 | struct irq_bypass_consumer *cons) | ||
63 | { | ||
64 | if (prod->stop) | ||
65 | prod->stop(prod); | ||
66 | if (cons->stop) | ||
67 | cons->stop(cons); | ||
68 | |||
69 | cons->del_producer(cons, prod); | ||
70 | |||
71 | if (prod->del_consumer) | ||
72 | prod->del_consumer(prod, cons); | ||
73 | |||
74 | if (cons->start) | ||
75 | cons->start(cons); | ||
76 | if (prod->start) | ||
77 | prod->start(prod); | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * irq_bypass_register_producer - register IRQ bypass producer | ||
82 | * @producer: pointer to producer structure | ||
83 | * | ||
84 | * Add the provided IRQ producer to the list of producers and connect | ||
85 | * with any matching token found on the IRQ consumers list. | ||
86 | */ | ||
87 | int irq_bypass_register_producer(struct irq_bypass_producer *producer) | ||
88 | { | ||
89 | struct irq_bypass_producer *tmp; | ||
90 | struct irq_bypass_consumer *consumer; | ||
91 | |||
92 | might_sleep(); | ||
93 | |||
94 | if (!try_module_get(THIS_MODULE)) | ||
95 | return -ENODEV; | ||
96 | |||
97 | mutex_lock(&lock); | ||
98 | |||
99 | list_for_each_entry(tmp, &producers, node) { | ||
100 | if (tmp->token == producer->token) { | ||
101 | mutex_unlock(&lock); | ||
102 | module_put(THIS_MODULE); | ||
103 | return -EBUSY; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | list_for_each_entry(consumer, &consumers, node) { | ||
108 | if (consumer->token == producer->token) { | ||
109 | int ret = __connect(producer, consumer); | ||
110 | if (ret) { | ||
111 | mutex_unlock(&lock); | ||
112 | module_put(THIS_MODULE); | ||
113 | return ret; | ||
114 | } | ||
115 | break; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | list_add(&producer->node, &producers); | ||
120 | |||
121 | mutex_unlock(&lock); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | EXPORT_SYMBOL_GPL(irq_bypass_register_producer); | ||
126 | |||
127 | /** | ||
128 | * irq_bypass_unregister_producer - unregister IRQ bypass producer | ||
129 | * @producer: pointer to producer structure | ||
130 | * | ||
131 | * Remove a previously registered IRQ producer from the list of producers | ||
132 | * and disconnect it from any connected IRQ consumer. | ||
133 | */ | ||
134 | void irq_bypass_unregister_producer(struct irq_bypass_producer *producer) | ||
135 | { | ||
136 | struct irq_bypass_producer *tmp; | ||
137 | struct irq_bypass_consumer *consumer; | ||
138 | |||
139 | might_sleep(); | ||
140 | |||
141 | if (!try_module_get(THIS_MODULE)) | ||
142 | return; /* nothing in the list anyway */ | ||
143 | |||
144 | mutex_lock(&lock); | ||
145 | |||
146 | list_for_each_entry(tmp, &producers, node) { | ||
147 | if (tmp->token != producer->token) | ||
148 | continue; | ||
149 | |||
150 | list_for_each_entry(consumer, &consumers, node) { | ||
151 | if (consumer->token == producer->token) { | ||
152 | __disconnect(producer, consumer); | ||
153 | break; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | list_del(&producer->node); | ||
158 | module_put(THIS_MODULE); | ||
159 | break; | ||
160 | } | ||
161 | |||
162 | mutex_unlock(&lock); | ||
163 | |||
164 | module_put(THIS_MODULE); | ||
165 | } | ||
166 | EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer); | ||
167 | |||
168 | /** | ||
169 | * irq_bypass_register_consumer - register IRQ bypass consumer | ||
170 | * @consumer: pointer to consumer structure | ||
171 | * | ||
172 | * Add the provided IRQ consumer to the list of consumers and connect | ||
173 | * with any matching token found on the IRQ producer list. | ||
174 | */ | ||
175 | int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) | ||
176 | { | ||
177 | struct irq_bypass_consumer *tmp; | ||
178 | struct irq_bypass_producer *producer; | ||
179 | |||
180 | if (!consumer->add_producer || !consumer->del_producer) | ||
181 | return -EINVAL; | ||
182 | |||
183 | might_sleep(); | ||
184 | |||
185 | if (!try_module_get(THIS_MODULE)) | ||
186 | return -ENODEV; | ||
187 | |||
188 | mutex_lock(&lock); | ||
189 | |||
190 | list_for_each_entry(tmp, &consumers, node) { | ||
191 | if (tmp->token == consumer->token) { | ||
192 | mutex_unlock(&lock); | ||
193 | module_put(THIS_MODULE); | ||
194 | return -EBUSY; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | list_for_each_entry(producer, &producers, node) { | ||
199 | if (producer->token == consumer->token) { | ||
200 | int ret = __connect(producer, consumer); | ||
201 | if (ret) { | ||
202 | mutex_unlock(&lock); | ||
203 | module_put(THIS_MODULE); | ||
204 | return ret; | ||
205 | } | ||
206 | break; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | list_add(&consumer->node, &consumers); | ||
211 | |||
212 | mutex_unlock(&lock); | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | EXPORT_SYMBOL_GPL(irq_bypass_register_consumer); | ||
217 | |||
218 | /** | ||
219 | * irq_bypass_unregister_consumer - unregister IRQ bypass consumer | ||
220 | * @consumer: pointer to consumer structure | ||
221 | * | ||
222 | * Remove a previously registered IRQ consumer from the list of consumers | ||
223 | * and disconnect it from any connected IRQ producer. | ||
224 | */ | ||
225 | void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) | ||
226 | { | ||
227 | struct irq_bypass_consumer *tmp; | ||
228 | struct irq_bypass_producer *producer; | ||
229 | |||
230 | might_sleep(); | ||
231 | |||
232 | if (!try_module_get(THIS_MODULE)) | ||
233 | return; /* nothing in the list anyway */ | ||
234 | |||
235 | mutex_lock(&lock); | ||
236 | |||
237 | list_for_each_entry(tmp, &consumers, node) { | ||
238 | if (tmp->token != consumer->token) | ||
239 | continue; | ||
240 | |||
241 | list_for_each_entry(producer, &producers, node) { | ||
242 | if (producer->token == consumer->token) { | ||
243 | __disconnect(producer, consumer); | ||
244 | break; | ||
245 | } | ||
246 | } | ||
247 | |||
248 | list_del(&consumer->node); | ||
249 | module_put(THIS_MODULE); | ||
250 | break; | ||
251 | } | ||
252 | |||
253 | mutex_unlock(&lock); | ||
254 | |||
255 | module_put(THIS_MODULE); | ||
256 | } | ||
257 | EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer); | ||