diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 19:26:26 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 19:26:26 -0500 |
| commit | 933425fb0010bd02bd459b41e63082756818ffce (patch) | |
| tree | 1cbc6c2035b9dcff8cb265c9ac562cbee7c6bb82 /virt | |
| parent | a3e7531535a0c6e5acbaa5436f37933bb471aa95 (diff) | |
| parent | a3eaa8649e4c6a6afdafaa04b9114fb230617bb1 (diff) | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"First batch of KVM changes for 4.4.
s390:
A bunch of fixes and optimizations for interrupt and time handling.
PPC:
Mostly bug fixes.
ARM:
No big features, but many small fixes and prerequisites including:
- a number of fixes for the arch-timer
- introducing proper level-triggered semantics for the arch-timers
- a series of patches to synchronously halt a guest (prerequisite
for IRQ forwarding)
- some tracepoint improvements
- a tweak for the EL2 panic handlers
- some more VGIC cleanups getting rid of redundant state
x86:
Quite a few changes:
- support for VT-d posted interrupts (i.e. PCI devices can inject
interrupts directly into vCPUs). This introduces a new
component (in virt/lib/) that connects VFIO and KVM together.
The same infrastructure will be used for ARM interrupt
forwarding as well.
- more Hyper-V features, though the main one Hyper-V synthetic
interrupt controller will have to wait for 4.5. These will let
KVM expose Hyper-V devices.
- nested virtualization now supports VPID (same as PCID but for
vCPUs) which makes it quite a bit faster
- for future hardware that supports NVDIMM, there is support for
clflushopt, clwb, pcommit
- support for "split irqchip", i.e. LAPIC in kernel +
IOAPIC/PIC/PIT in userspace, which reduces the attack surface of
the hypervisor
- obligatory smattering of SMM fixes
- on the guest side, stable scheduler clock support was rewritten
to not require help from the hypervisor"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits)
KVM: VMX: Fix commit which broke PML
KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()
KVM: x86: allow RSM from 64-bit mode
KVM: VMX: fix SMEP and SMAP without EPT
KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
KVM: device assignment: remove pointless #ifdefs
KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
KVM: x86: zero apic_arb_prio on reset
drivers/hv: share Hyper-V SynIC constants with userspace
KVM: x86: handle SMBASE as physical address in RSM
KVM: x86: add read_phys to x86_emulate_ops
KVM: x86: removing unused variable
KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs
KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()
KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings
KVM: arm/arm64: Optimize away redundant LR tracking
KVM: s390: use simple switch statement as multiplexer
KVM: s390: drop useless newline in debugging data
KVM: s390: SCA must not cross page boundaries
KVM: arm: Do not indent the arguments of DECLARE_BITMAP
...
Diffstat (limited to 'virt')
| -rw-r--r-- | virt/Makefile | 1 | ||||
| -rw-r--r-- | virt/kvm/Kconfig | 5 | ||||
| -rw-r--r-- | virt/kvm/arm/arch_timer.c | 173 | ||||
| -rw-r--r-- | virt/kvm/arm/trace.h | 63 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic-v2.c | 6 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic-v3.c | 6 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic.c | 308 | ||||
| -rw-r--r-- | virt/kvm/async_pf.c | 4 | ||||
| -rw-r--r-- | virt/kvm/eventfd.c | 190 | ||||
| -rw-r--r-- | virt/kvm/irqchip.c | 18 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 11 | ||||
| -rw-r--r-- | virt/lib/Kconfig | 2 | ||||
| -rw-r--r-- | virt/lib/Makefile | 1 | ||||
| -rw-r--r-- | virt/lib/irqbypass.c | 257 |
14 files changed, 681 insertions, 364 deletions
diff --git a/virt/Makefile b/virt/Makefile new file mode 100644 index 000000000000..be783472ac81 --- /dev/null +++ b/virt/Makefile | |||
| @@ -0,0 +1 @@ | |||
| obj-y += lib/ | |||
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e2c876d5a03b..7a79b6853583 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
| @@ -46,4 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT | |||
| 46 | 46 | ||
| 47 | config KVM_COMPAT | 47 | config KVM_COMPAT |
| 48 | def_bool y | 48 | def_bool y |
| 49 | depends on COMPAT && !S390 | 49 | depends on KVM && COMPAT && !S390 |
| 50 | |||
| 51 | config HAVE_KVM_IRQ_BYPASS | ||
| 52 | bool | ||
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b9d3a32cbc04..21a0ab2d8919 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
| @@ -28,6 +28,8 @@ | |||
| 28 | #include <kvm/arm_vgic.h> | 28 | #include <kvm/arm_vgic.h> |
| 29 | #include <kvm/arm_arch_timer.h> | 29 | #include <kvm/arm_arch_timer.h> |
| 30 | 30 | ||
| 31 | #include "trace.h" | ||
| 32 | |||
| 31 | static struct timecounter *timecounter; | 33 | static struct timecounter *timecounter; |
| 32 | static struct workqueue_struct *wqueue; | 34 | static struct workqueue_struct *wqueue; |
| 33 | static unsigned int host_vtimer_irq; | 35 | static unsigned int host_vtimer_irq; |
| @@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) | |||
| 59 | } | 61 | } |
| 60 | } | 62 | } |
| 61 | 63 | ||
| 62 | static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) | ||
| 63 | { | ||
| 64 | int ret; | ||
| 65 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
| 66 | |||
| 67 | kvm_vgic_set_phys_irq_active(timer->map, true); | ||
| 68 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | ||
| 69 | timer->map, | ||
| 70 | timer->irq->level); | ||
| 71 | WARN_ON(ret); | ||
| 72 | } | ||
| 73 | |||
| 74 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 64 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
| 75 | { | 65 | { |
| 76 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | 66 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; |
| @@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) | |||
| 111 | return HRTIMER_NORESTART; | 101 | return HRTIMER_NORESTART; |
| 112 | } | 102 | } |
| 113 | 103 | ||
| 104 | static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) | ||
| 105 | { | ||
| 106 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
| 107 | |||
| 108 | return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
| 109 | (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); | ||
| 110 | } | ||
| 111 | |||
| 114 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | 112 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) |
| 115 | { | 113 | { |
| 116 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 114 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
| 117 | cycle_t cval, now; | 115 | cycle_t cval, now; |
| 118 | 116 | ||
| 119 | if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || | 117 | if (!kvm_timer_irq_can_fire(vcpu)) |
| 120 | !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || | ||
| 121 | kvm_vgic_get_phys_irq_active(timer->map)) | ||
| 122 | return false; | 118 | return false; |
| 123 | 119 | ||
| 124 | cval = timer->cntv_cval; | 120 | cval = timer->cntv_cval; |
| @@ -127,12 +123,94 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | |||
| 127 | return cval <= now; | 123 | return cval <= now; |
| 128 | } | 124 | } |
| 129 | 125 | ||
| 126 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | ||
| 127 | { | ||
| 128 | int ret; | ||
| 129 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
| 130 | |||
| 131 | BUG_ON(!vgic_initialized(vcpu->kvm)); | ||
| 132 | |||
| 133 | timer->irq.level = new_level; | ||
| 134 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, | ||
| 135 | timer->irq.level); | ||
| 136 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | ||
| 137 | timer->map, | ||
| 138 | timer->irq.level); | ||
| 139 | WARN_ON(ret); | ||
| 140 | } | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Check if there was a change in the timer state (should we raise or lower | ||
| 144 | * the line level to the GIC). | ||
| 145 | */ | ||
| 146 | static void kvm_timer_update_state(struct kvm_vcpu *vcpu) | ||
| 147 | { | ||
| 148 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
| 149 | |||
| 150 | /* | ||
| 151 | * If userspace modified the timer registers via SET_ONE_REG before | ||
| 152 | * the vgic was initialized, we mustn't set the timer->irq.level value | ||
| 153 | * because the guest would never see the interrupt. Instead wait | ||
| 154 | * until we call this function from kvm_timer_flush_hwstate. | ||
| 155 | */ | ||
| 156 | if (!vgic_initialized(vcpu->kvm)) | ||
| 157 | return; | ||
| 158 | |||
| 159 | if (kvm_timer_should_fire(vcpu) != timer->irq.level) | ||
| 160 | kvm_timer_update_irq(vcpu, !timer->irq.level); | ||
| 161 | } | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Schedule the background timer before calling kvm_vcpu_block, so that this | ||
| 165 | * thread is removed from its waitqueue and made runnable when there's a timer | ||
| 166 | * interrupt to handle. | ||
| 167 | */ | ||
| 168 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | ||
| 169 | { | ||
| 170 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
| 171 | u64 ns; | ||
| 172 | cycle_t cval, now; | ||
| 173 | |||
| 174 | BUG_ON(timer_is_armed(timer)); | ||
| 175 | |||
| 176 | /* | ||
| 177 | * No need to schedule a background timer if the guest timer has | ||
| 178 | * already expired, because kvm_vcpu_block will return before putting | ||
| 179 | * the thread to sleep. | ||
| 180 | */ | ||
| 181 | if (kvm_timer_should_fire(vcpu)) | ||
| 182 | return; | ||
| 183 | |||
| 184 | /* | ||
| 185 | * If the timer is not capable of raising interrupts (disabled or | ||
| 186 | * masked), then there's no more work for us to do. | ||
| 187 | */ | ||
| 188 | if (!kvm_timer_irq_can_fire(vcpu)) | ||
| 189 | return; | ||
| 190 | |||
| 191 | /* The timer has not yet expired, schedule a background timer */ | ||
| 192 | cval = timer->cntv_cval; | ||
| 193 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | ||
| 194 | |||
| 195 | ns = cyclecounter_cyc2ns(timecounter->cc, | ||
| 196 | cval - now, | ||
| 197 | timecounter->mask, | ||
| 198 | &timecounter->frac); | ||
| 199 | timer_arm(timer, ns); | ||
| 200 | } | ||
| 201 | |||
| 202 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | ||
| 203 | { | ||
| 204 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
| 205 | timer_disarm(timer); | ||
| 206 | } | ||
| 207 | |||
| 130 | /** | 208 | /** |
| 131 | * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu | 209 | * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu |
| 132 | * @vcpu: The vcpu pointer | 210 | * @vcpu: The vcpu pointer |
| 133 | * | 211 | * |
| 134 | * Disarm any pending soft timers, since the world-switch code will write the | 212 | * Check if the virtual timer has expired while we were running in the host, |
| 135 | * virtual timer state back to the physical CPU. | 213 | * and inject an interrupt if that was the case. |
| 136 | */ | 214 | */ |
| 137 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | 215 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) |
| 138 | { | 216 | { |
| @@ -140,28 +218,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
| 140 | bool phys_active; | 218 | bool phys_active; |
| 141 | int ret; | 219 | int ret; |
| 142 | 220 | ||
| 143 | /* | 221 | kvm_timer_update_state(vcpu); |
| 144 | * We're about to run this vcpu again, so there is no need to | ||
| 145 | * keep the background timer running, as we're about to | ||
| 146 | * populate the CPU timer again. | ||
| 147 | */ | ||
| 148 | timer_disarm(timer); | ||
| 149 | 222 | ||
| 150 | /* | 223 | /* |
| 151 | * If the timer expired while we were not scheduled, now is the time | 224 | * If we enter the guest with the virtual input level to the VGIC |
| 152 | * to inject it. | 225 | * asserted, then we have already told the VGIC what we need to, and |
| 226 | * we don't need to exit from the guest until the guest deactivates | ||
| 227 | * the already injected interrupt, so therefore we should set the | ||
| 228 | * hardware active state to prevent unnecessary exits from the guest. | ||
| 229 | * | ||
| 230 | * Conversely, if the virtual input level is deasserted, then always | ||
| 231 | * clear the hardware active state to ensure that hardware interrupts | ||
| 232 | * from the timer triggers a guest exit. | ||
| 153 | */ | 233 | */ |
| 154 | if (kvm_timer_should_fire(vcpu)) | 234 | if (timer->irq.level) |
| 155 | kvm_timer_inject_irq(vcpu); | ||
| 156 | |||
| 157 | /* | ||
| 158 | * We keep track of whether the edge-triggered interrupt has been | ||
| 159 | * signalled to the vgic/guest, and if so, we mask the interrupt and | ||
| 160 | * the physical distributor to prevent the timer from raising a | ||
| 161 | * physical interrupt whenever we run a guest, preventing forward | ||
| 162 | * VCPU progress. | ||
| 163 | */ | ||
| 164 | if (kvm_vgic_get_phys_irq_active(timer->map)) | ||
| 165 | phys_active = true; | 235 | phys_active = true; |
| 166 | else | 236 | else |
| 167 | phys_active = false; | 237 | phys_active = false; |
| @@ -176,32 +246,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
| 176 | * kvm_timer_sync_hwstate - sync timer state from cpu | 246 | * kvm_timer_sync_hwstate - sync timer state from cpu |
| 177 | * @vcpu: The vcpu pointer | 247 | * @vcpu: The vcpu pointer |
| 178 | * | 248 | * |
| 179 | * Check if the virtual timer was armed and either schedule a corresponding | 249 | * Check if the virtual timer has expired while we were running in the guest, |
| 180 | * soft timer or inject directly if already expired. | 250 | * and inject an interrupt if that was the case. |
| 181 | */ | 251 | */ |
| 182 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | 252 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) |
| 183 | { | 253 | { |
| 184 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 254 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
| 185 | cycle_t cval, now; | ||
| 186 | u64 ns; | ||
| 187 | 255 | ||
| 188 | BUG_ON(timer_is_armed(timer)); | 256 | BUG_ON(timer_is_armed(timer)); |
| 189 | 257 | ||
| 190 | if (kvm_timer_should_fire(vcpu)) { | 258 | /* |
| 191 | /* | 259 | * The guest could have modified the timer registers or the timer |
| 192 | * Timer has already expired while we were not | 260 | * could have expired, update the timer state. |
| 193 | * looking. Inject the interrupt and carry on. | 261 | */ |
| 194 | */ | 262 | kvm_timer_update_state(vcpu); |
| 195 | kvm_timer_inject_irq(vcpu); | ||
| 196 | return; | ||
| 197 | } | ||
| 198 | |||
| 199 | cval = timer->cntv_cval; | ||
| 200 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | ||
| 201 | |||
| 202 | ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, | ||
| 203 | &timecounter->frac); | ||
| 204 | timer_arm(timer, ns); | ||
| 205 | } | 263 | } |
| 206 | 264 | ||
| 207 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 265 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, |
| @@ -216,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
| 216 | * kvm_vcpu_set_target(). To handle this, we determine | 274 | * kvm_vcpu_set_target(). To handle this, we determine |
| 217 | * vcpu timer irq number when the vcpu is reset. | 275 | * vcpu timer irq number when the vcpu is reset. |
| 218 | */ | 276 | */ |
| 219 | timer->irq = irq; | 277 | timer->irq.irq = irq->irq; |
| 220 | 278 | ||
| 221 | /* | 279 | /* |
| 222 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 280 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
| @@ -225,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
| 225 | * the ARMv7 architecture. | 283 | * the ARMv7 architecture. |
| 226 | */ | 284 | */ |
| 227 | timer->cntv_ctl = 0; | 285 | timer->cntv_ctl = 0; |
| 286 | kvm_timer_update_state(vcpu); | ||
| 228 | 287 | ||
| 229 | /* | 288 | /* |
| 230 | * Tell the VGIC that the virtual interrupt is tied to a | 289 | * Tell the VGIC that the virtual interrupt is tied to a |
| @@ -269,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | |||
| 269 | default: | 328 | default: |
| 270 | return -1; | 329 | return -1; |
| 271 | } | 330 | } |
| 331 | |||
| 332 | kvm_timer_update_state(vcpu); | ||
| 272 | return 0; | 333 | return 0; |
| 273 | } | 334 | } |
| 274 | 335 | ||
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h new file mode 100644 index 000000000000..37d8b98867d5 --- /dev/null +++ b/virt/kvm/arm/trace.h | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 2 | #define _TRACE_KVM_H | ||
| 3 | |||
| 4 | #include <linux/tracepoint.h> | ||
| 5 | |||
| 6 | #undef TRACE_SYSTEM | ||
| 7 | #define TRACE_SYSTEM kvm | ||
| 8 | |||
| 9 | /* | ||
| 10 | * Tracepoints for vgic | ||
| 11 | */ | ||
| 12 | TRACE_EVENT(vgic_update_irq_pending, | ||
| 13 | TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), | ||
| 14 | TP_ARGS(vcpu_id, irq, level), | ||
| 15 | |||
| 16 | TP_STRUCT__entry( | ||
| 17 | __field( unsigned long, vcpu_id ) | ||
| 18 | __field( __u32, irq ) | ||
| 19 | __field( bool, level ) | ||
| 20 | ), | ||
| 21 | |||
| 22 | TP_fast_assign( | ||
| 23 | __entry->vcpu_id = vcpu_id; | ||
| 24 | __entry->irq = irq; | ||
| 25 | __entry->level = level; | ||
| 26 | ), | ||
| 27 | |||
| 28 | TP_printk("VCPU: %ld, IRQ %d, level: %d", | ||
| 29 | __entry->vcpu_id, __entry->irq, __entry->level) | ||
| 30 | ); | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Tracepoints for arch_timer | ||
| 34 | */ | ||
| 35 | TRACE_EVENT(kvm_timer_update_irq, | ||
| 36 | TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), | ||
| 37 | TP_ARGS(vcpu_id, irq, level), | ||
| 38 | |||
| 39 | TP_STRUCT__entry( | ||
| 40 | __field( unsigned long, vcpu_id ) | ||
| 41 | __field( __u32, irq ) | ||
| 42 | __field( int, level ) | ||
| 43 | ), | ||
| 44 | |||
| 45 | TP_fast_assign( | ||
| 46 | __entry->vcpu_id = vcpu_id; | ||
| 47 | __entry->irq = irq; | ||
| 48 | __entry->level = level; | ||
| 49 | ), | ||
| 50 | |||
| 51 | TP_printk("VCPU: %ld, IRQ %d, level %d", | ||
| 52 | __entry->vcpu_id, __entry->irq, __entry->level) | ||
| 53 | ); | ||
| 54 | |||
| 55 | #endif /* _TRACE_KVM_H */ | ||
| 56 | |||
| 57 | #undef TRACE_INCLUDE_PATH | ||
| 58 | #define TRACE_INCLUDE_PATH ../../../virt/kvm/arm | ||
| 59 | #undef TRACE_INCLUDE_FILE | ||
| 60 | #define TRACE_INCLUDE_FILE trace | ||
| 61 | |||
| 62 | /* This part must be outside protection */ | ||
| 63 | #include <trace/define_trace.h> | ||
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 8d7b04db8471..ff02f08df74d 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c | |||
| @@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
| 79 | lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); | 79 | lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); |
| 80 | 80 | ||
| 81 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; | 81 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; |
| 82 | } | ||
| 83 | 82 | ||
| 84 | static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
| 85 | struct vgic_lr lr_desc) | ||
| 86 | { | ||
| 87 | if (!(lr_desc.state & LR_STATE_MASK)) | 83 | if (!(lr_desc.state & LR_STATE_MASK)) |
| 88 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); | 84 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); |
| 89 | else | 85 | else |
| @@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) | |||
| 158 | * anyway. | 154 | * anyway. |
| 159 | */ | 155 | */ |
| 160 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; | 156 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; |
| 157 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; | ||
| 161 | 158 | ||
| 162 | /* Get the show on the road... */ | 159 | /* Get the show on the road... */ |
| 163 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; | 160 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; |
| @@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) | |||
| 166 | static const struct vgic_ops vgic_v2_ops = { | 163 | static const struct vgic_ops vgic_v2_ops = { |
| 167 | .get_lr = vgic_v2_get_lr, | 164 | .get_lr = vgic_v2_get_lr, |
| 168 | .set_lr = vgic_v2_set_lr, | 165 | .set_lr = vgic_v2_set_lr, |
| 169 | .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, | ||
| 170 | .get_elrsr = vgic_v2_get_elrsr, | 166 | .get_elrsr = vgic_v2_get_elrsr, |
| 171 | .get_eisr = vgic_v2_get_eisr, | 167 | .get_eisr = vgic_v2_get_eisr, |
| 172 | .clear_eisr = vgic_v2_clear_eisr, | 168 | .clear_eisr = vgic_v2_clear_eisr, |
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 7dd5d62f10a1..487d6357b7e7 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c | |||
| @@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; | 114 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; |
| 115 | } | ||
| 116 | 115 | ||
| 117 | static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
| 118 | struct vgic_lr lr_desc) | ||
| 119 | { | ||
| 120 | if (!(lr_desc.state & LR_STATE_MASK)) | 116 | if (!(lr_desc.state & LR_STATE_MASK)) |
| 121 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); | 117 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); |
| 122 | else | 118 | else |
| @@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
| 193 | * anyway. | 189 | * anyway. |
| 194 | */ | 190 | */ |
| 195 | vgic_v3->vgic_vmcr = 0; | 191 | vgic_v3->vgic_vmcr = 0; |
| 192 | vgic_v3->vgic_elrsr = ~0; | ||
| 196 | 193 | ||
| 197 | /* | 194 | /* |
| 198 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible | 195 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible |
| @@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
| 211 | static const struct vgic_ops vgic_v3_ops = { | 208 | static const struct vgic_ops vgic_v3_ops = { |
| 212 | .get_lr = vgic_v3_get_lr, | 209 | .get_lr = vgic_v3_get_lr, |
| 213 | .set_lr = vgic_v3_set_lr, | 210 | .set_lr = vgic_v3_set_lr, |
| 214 | .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, | ||
| 215 | .get_elrsr = vgic_v3_get_elrsr, | 211 | .get_elrsr = vgic_v3_get_elrsr, |
| 216 | .get_eisr = vgic_v3_get_eisr, | 212 | .get_eisr = vgic_v3_get_eisr, |
| 217 | .clear_eisr = vgic_v3_clear_eisr, | 213 | .clear_eisr = vgic_v3_clear_eisr, |
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 30489181922d..533538385d5d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
| @@ -34,6 +34,9 @@ | |||
| 34 | #include <asm/kvm.h> | 34 | #include <asm/kvm.h> |
| 35 | #include <kvm/iodev.h> | 35 | #include <kvm/iodev.h> |
| 36 | 36 | ||
| 37 | #define CREATE_TRACE_POINTS | ||
| 38 | #include "trace.h" | ||
| 39 | |||
| 37 | /* | 40 | /* |
| 38 | * How the whole thing works (courtesy of Christoffer Dall): | 41 | * How the whole thing works (courtesy of Christoffer Dall): |
| 39 | * | 42 | * |
| @@ -102,11 +105,13 @@ | |||
| 102 | #include "vgic.h" | 105 | #include "vgic.h" |
| 103 | 106 | ||
| 104 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); | 107 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); |
| 105 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); | 108 | static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); |
| 106 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); | 109 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); |
| 107 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); | 110 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); |
| 111 | static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); | ||
| 108 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, | 112 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, |
| 109 | int virt_irq); | 113 | int virt_irq); |
| 114 | static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); | ||
| 110 | 115 | ||
| 111 | static const struct vgic_ops *vgic_ops; | 116 | static const struct vgic_ops *vgic_ops; |
| 112 | static const struct vgic_params *vgic; | 117 | static const struct vgic_params *vgic; |
| @@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) | |||
| 357 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 362 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
| 358 | 363 | ||
| 359 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); | 364 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); |
| 365 | if (!vgic_dist_irq_get_level(vcpu, irq)) { | ||
| 366 | vgic_dist_irq_clear_pending(vcpu, irq); | ||
| 367 | if (!compute_pending_for_cpu(vcpu)) | ||
| 368 | clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); | ||
| 369 | } | ||
| 360 | } | 370 | } |
| 361 | 371 | ||
| 362 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) | 372 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) |
| @@ -531,34 +541,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, | |||
| 531 | return false; | 541 | return false; |
| 532 | } | 542 | } |
| 533 | 543 | ||
| 534 | /* | ||
| 535 | * If a mapped interrupt's state has been modified by the guest such that it | ||
| 536 | * is no longer active or pending, without it have gone through the sync path, | ||
| 537 | * then the map->active field must be cleared so the interrupt can be taken | ||
| 538 | * again. | ||
| 539 | */ | ||
| 540 | static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu) | ||
| 541 | { | ||
| 542 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
| 543 | struct list_head *root; | ||
| 544 | struct irq_phys_map_entry *entry; | ||
| 545 | struct irq_phys_map *map; | ||
| 546 | |||
| 547 | rcu_read_lock(); | ||
| 548 | |||
| 549 | /* Check for PPIs */ | ||
| 550 | root = &vgic_cpu->irq_phys_map_list; | ||
| 551 | list_for_each_entry_rcu(entry, root, entry) { | ||
| 552 | map = &entry->map; | ||
| 553 | |||
| 554 | if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) && | ||
| 555 | !vgic_irq_is_active(vcpu, map->virt_irq)) | ||
| 556 | map->active = false; | ||
| 557 | } | ||
| 558 | |||
| 559 | rcu_read_unlock(); | ||
| 560 | } | ||
| 561 | |||
| 562 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, | 544 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, |
| 563 | struct kvm_exit_mmio *mmio, | 545 | struct kvm_exit_mmio *mmio, |
| 564 | phys_addr_t offset, int vcpu_id) | 546 | phys_addr_t offset, int vcpu_id) |
| @@ -589,7 +571,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, | |||
| 589 | vcpu_id, offset); | 571 | vcpu_id, offset); |
| 590 | vgic_reg_access(mmio, reg, offset, mode); | 572 | vgic_reg_access(mmio, reg, offset, mode); |
| 591 | 573 | ||
| 592 | vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); | ||
| 593 | vgic_update_state(kvm); | 574 | vgic_update_state(kvm); |
| 594 | return true; | 575 | return true; |
| 595 | } | 576 | } |
| @@ -627,7 +608,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm, | |||
| 627 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | 608 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); |
| 628 | 609 | ||
| 629 | if (mmio->is_write) { | 610 | if (mmio->is_write) { |
| 630 | vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); | ||
| 631 | vgic_update_state(kvm); | 611 | vgic_update_state(kvm); |
| 632 | return true; | 612 | return true; |
| 633 | } | 613 | } |
| @@ -684,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | |||
| 684 | vgic_reg_access(mmio, &val, offset, | 664 | vgic_reg_access(mmio, &val, offset, |
| 685 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | 665 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); |
| 686 | if (mmio->is_write) { | 666 | if (mmio->is_write) { |
| 687 | if (offset < 8) { | 667 | /* Ignore writes to read-only SGI and PPI bits */ |
| 688 | *reg = ~0U; /* Force PPIs/SGIs to 1 */ | 668 | if (offset < 8) |
| 689 | return false; | 669 | return false; |
| 690 | } | ||
| 691 | 670 | ||
| 692 | val = vgic_cfg_compress(val); | 671 | val = vgic_cfg_compress(val); |
| 693 | if (offset & 4) { | 672 | if (offset & 4) { |
| @@ -713,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | |||
| 713 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | 692 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) |
| 714 | { | 693 | { |
| 715 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 694 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
| 695 | u64 elrsr = vgic_get_elrsr(vcpu); | ||
| 696 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
| 716 | int i; | 697 | int i; |
| 717 | 698 | ||
| 718 | for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { | 699 | for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { |
| 719 | struct vgic_lr lr = vgic_get_lr(vcpu, i); | 700 | struct vgic_lr lr = vgic_get_lr(vcpu, i); |
| 720 | 701 | ||
| 721 | /* | 702 | /* |
| @@ -736,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
| 736 | * interrupt then move the active state to the | 717 | * interrupt then move the active state to the |
| 737 | * distributor tracking bit. | 718 | * distributor tracking bit. |
| 738 | */ | 719 | */ |
| 739 | if (lr.state & LR_STATE_ACTIVE) { | 720 | if (lr.state & LR_STATE_ACTIVE) |
| 740 | vgic_irq_set_active(vcpu, lr.irq); | 721 | vgic_irq_set_active(vcpu, lr.irq); |
| 741 | lr.state &= ~LR_STATE_ACTIVE; | ||
| 742 | } | ||
| 743 | 722 | ||
| 744 | /* | 723 | /* |
| 745 | * Reestablish the pending state on the distributor and the | 724 | * Reestablish the pending state on the distributor and the |
| 746 | * CPU interface. It may have already been pending, but that | 725 | * CPU interface and mark the LR as free for other use. |
| 747 | * is fine, then we are only setting a few bits that were | ||
| 748 | * already set. | ||
| 749 | */ | 726 | */ |
| 750 | if (lr.state & LR_STATE_PENDING) { | 727 | vgic_retire_lr(i, vcpu); |
| 751 | vgic_dist_irq_set_pending(vcpu, lr.irq); | ||
| 752 | lr.state &= ~LR_STATE_PENDING; | ||
| 753 | } | ||
| 754 | |||
| 755 | vgic_set_lr(vcpu, i, lr); | ||
| 756 | |||
| 757 | /* | ||
| 758 | * Mark the LR as free for other use. | ||
| 759 | */ | ||
| 760 | BUG_ON(lr.state & LR_STATE_MASK); | ||
| 761 | vgic_retire_lr(i, lr.irq, vcpu); | ||
| 762 | vgic_irq_clear_queued(vcpu, lr.irq); | ||
| 763 | 728 | ||
| 764 | /* Finally update the VGIC state. */ | 729 | /* Finally update the VGIC state. */ |
| 765 | vgic_update_state(vcpu->kvm); | 730 | vgic_update_state(vcpu->kvm); |
| @@ -1067,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
| 1067 | vgic_ops->set_lr(vcpu, lr, vlr); | 1032 | vgic_ops->set_lr(vcpu, lr, vlr); |
| 1068 | } | 1033 | } |
| 1069 | 1034 | ||
| 1070 | static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
| 1071 | struct vgic_lr vlr) | ||
| 1072 | { | ||
| 1073 | vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) | 1035 | static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) |
| 1077 | { | 1036 | { |
| 1078 | return vgic_ops->get_elrsr(vcpu); | 1037 | return vgic_ops->get_elrsr(vcpu); |
| @@ -1118,25 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) | |||
| 1118 | vgic_ops->enable(vcpu); | 1077 | vgic_ops->enable(vcpu); |
| 1119 | } | 1078 | } |
| 1120 | 1079 | ||
| 1121 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) | 1080 | static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) |
| 1122 | { | 1081 | { |
| 1123 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
| 1124 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); | 1082 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); |
| 1125 | 1083 | ||
| 1084 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
| 1085 | |||
| 1126 | /* | 1086 | /* |
| 1127 | * We must transfer the pending state back to the distributor before | 1087 | * We must transfer the pending state back to the distributor before |
| 1128 | * retiring the LR, otherwise we may loose edge-triggered interrupts. | 1088 | * retiring the LR, otherwise we may loose edge-triggered interrupts. |
| 1129 | */ | 1089 | */ |
| 1130 | if (vlr.state & LR_STATE_PENDING) { | 1090 | if (vlr.state & LR_STATE_PENDING) { |
| 1131 | vgic_dist_irq_set_pending(vcpu, irq); | 1091 | vgic_dist_irq_set_pending(vcpu, vlr.irq); |
| 1132 | vlr.hwirq = 0; | 1092 | vlr.hwirq = 0; |
| 1133 | } | 1093 | } |
| 1134 | 1094 | ||
| 1135 | vlr.state = 0; | 1095 | vlr.state = 0; |
| 1136 | vgic_set_lr(vcpu, lr_nr, vlr); | 1096 | vgic_set_lr(vcpu, lr_nr, vlr); |
| 1137 | clear_bit(lr_nr, vgic_cpu->lr_used); | ||
| 1138 | vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; | ||
| 1139 | vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); | ||
| 1140 | } | 1097 | } |
| 1141 | 1098 | ||
| 1142 | /* | 1099 | /* |
| @@ -1150,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) | |||
| 1150 | */ | 1107 | */ |
| 1151 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | 1108 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) |
| 1152 | { | 1109 | { |
| 1153 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 1110 | u64 elrsr = vgic_get_elrsr(vcpu); |
| 1111 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
| 1154 | int lr; | 1112 | int lr; |
| 1155 | 1113 | ||
| 1156 | for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { | 1114 | for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { |
| 1157 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1115 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
| 1158 | 1116 | ||
| 1159 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { | 1117 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) |
| 1160 | vgic_retire_lr(lr, vlr.irq, vcpu); | 1118 | vgic_retire_lr(lr, vcpu); |
| 1161 | if (vgic_irq_is_queued(vcpu, vlr.irq)) | ||
| 1162 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
| 1163 | } | ||
| 1164 | } | 1119 | } |
| 1165 | } | 1120 | } |
| 1166 | 1121 | ||
| @@ -1200,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, | |||
| 1200 | } | 1155 | } |
| 1201 | 1156 | ||
| 1202 | vgic_set_lr(vcpu, lr_nr, vlr); | 1157 | vgic_set_lr(vcpu, lr_nr, vlr); |
| 1203 | vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); | ||
| 1204 | } | 1158 | } |
| 1205 | 1159 | ||
| 1206 | /* | 1160 | /* |
| @@ -1210,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, | |||
| 1210 | */ | 1164 | */ |
| 1211 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | 1165 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) |
| 1212 | { | 1166 | { |
| 1213 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
| 1214 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1167 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
| 1168 | u64 elrsr = vgic_get_elrsr(vcpu); | ||
| 1169 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
| 1215 | struct vgic_lr vlr; | 1170 | struct vgic_lr vlr; |
| 1216 | int lr; | 1171 | int lr; |
| 1217 | 1172 | ||
| @@ -1222,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | |||
| 1222 | 1177 | ||
| 1223 | kvm_debug("Queue IRQ%d\n", irq); | 1178 | kvm_debug("Queue IRQ%d\n", irq); |
| 1224 | 1179 | ||
| 1225 | lr = vgic_cpu->vgic_irq_lr_map[irq]; | ||
| 1226 | |||
| 1227 | /* Do we have an active interrupt for the same CPUID? */ | 1180 | /* Do we have an active interrupt for the same CPUID? */ |
| 1228 | if (lr != LR_EMPTY) { | 1181 | for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { |
| 1229 | vlr = vgic_get_lr(vcpu, lr); | 1182 | vlr = vgic_get_lr(vcpu, lr); |
| 1230 | if (vlr.source == sgi_source_id) { | 1183 | if (vlr.irq == irq && vlr.source == sgi_source_id) { |
| 1231 | kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); | 1184 | kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); |
| 1232 | BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); | ||
| 1233 | vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); | 1185 | vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); |
| 1234 | return true; | 1186 | return true; |
| 1235 | } | 1187 | } |
| 1236 | } | 1188 | } |
| 1237 | 1189 | ||
| 1238 | /* Try to use another LR for this interrupt */ | 1190 | /* Try to use another LR for this interrupt */ |
| 1239 | lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, | 1191 | lr = find_first_bit(elrsr_ptr, vgic->nr_lr); |
| 1240 | vgic->nr_lr); | ||
| 1241 | if (lr >= vgic->nr_lr) | 1192 | if (lr >= vgic->nr_lr) |
| 1242 | return false; | 1193 | return false; |
| 1243 | 1194 | ||
| 1244 | kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); | 1195 | kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); |
| 1245 | vgic_cpu->vgic_irq_lr_map[irq] = lr; | ||
| 1246 | set_bit(lr, vgic_cpu->lr_used); | ||
| 1247 | 1196 | ||
| 1248 | vlr.irq = irq; | 1197 | vlr.irq = irq; |
| 1249 | vlr.source = sgi_source_id; | 1198 | vlr.source = sgi_source_id; |
| @@ -1338,12 +1287,60 @@ epilog: | |||
| 1338 | } | 1287 | } |
| 1339 | } | 1288 | } |
| 1340 | 1289 | ||
| 1290 | static int process_queued_irq(struct kvm_vcpu *vcpu, | ||
| 1291 | int lr, struct vgic_lr vlr) | ||
| 1292 | { | ||
| 1293 | int pending = 0; | ||
| 1294 | |||
| 1295 | /* | ||
| 1296 | * If the IRQ was EOIed (called from vgic_process_maintenance) or it | ||
| 1297 | * went from active to non-active (called from vgic_sync_hwirq) it was | ||
| 1298 | * also ACKed and we we therefore assume we can clear the soft pending | ||
| 1299 | * state (should it had been set) for this interrupt. | ||
| 1300 | * | ||
| 1301 | * Note: if the IRQ soft pending state was set after the IRQ was | ||
| 1302 | * acked, it actually shouldn't be cleared, but we have no way of | ||
| 1303 | * knowing that unless we start trapping ACKs when the soft-pending | ||
| 1304 | * state is set. | ||
| 1305 | */ | ||
| 1306 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
| 1307 | |||
| 1308 | /* | ||
| 1309 | * Tell the gic to start sampling this interrupt again. | ||
| 1310 | */ | ||
| 1311 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
| 1312 | |||
| 1313 | /* Any additional pending interrupt? */ | ||
| 1314 | if (vgic_irq_is_edge(vcpu, vlr.irq)) { | ||
| 1315 | BUG_ON(!(vlr.state & LR_HW)); | ||
| 1316 | pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); | ||
| 1317 | } else { | ||
| 1318 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { | ||
| 1319 | vgic_cpu_irq_set(vcpu, vlr.irq); | ||
| 1320 | pending = 1; | ||
| 1321 | } else { | ||
| 1322 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
| 1323 | vgic_cpu_irq_clear(vcpu, vlr.irq); | ||
| 1324 | } | ||
| 1325 | } | ||
| 1326 | |||
| 1327 | /* | ||
| 1328 | * Despite being EOIed, the LR may not have | ||
| 1329 | * been marked as empty. | ||
| 1330 | */ | ||
| 1331 | vlr.state = 0; | ||
| 1332 | vlr.hwirq = 0; | ||
| 1333 | vgic_set_lr(vcpu, lr, vlr); | ||
| 1334 | |||
| 1335 | return pending; | ||
| 1336 | } | ||
| 1337 | |||
| 1341 | static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | 1338 | static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) |
| 1342 | { | 1339 | { |
| 1343 | u32 status = vgic_get_interrupt_status(vcpu); | 1340 | u32 status = vgic_get_interrupt_status(vcpu); |
| 1344 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1341 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
| 1345 | bool level_pending = false; | ||
| 1346 | struct kvm *kvm = vcpu->kvm; | 1342 | struct kvm *kvm = vcpu->kvm; |
| 1343 | int level_pending = 0; | ||
| 1347 | 1344 | ||
| 1348 | kvm_debug("STATUS = %08x\n", status); | 1345 | kvm_debug("STATUS = %08x\n", status); |
| 1349 | 1346 | ||
| @@ -1358,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
| 1358 | 1355 | ||
| 1359 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { | 1356 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { |
| 1360 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1357 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
| 1361 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); | ||
| 1362 | 1358 | ||
| 1363 | spin_lock(&dist->lock); | 1359 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); |
| 1364 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
| 1365 | WARN_ON(vlr.state & LR_STATE_MASK); | 1360 | WARN_ON(vlr.state & LR_STATE_MASK); |
| 1366 | vlr.state = 0; | ||
| 1367 | vgic_set_lr(vcpu, lr, vlr); | ||
| 1368 | 1361 | ||
| 1369 | /* | ||
| 1370 | * If the IRQ was EOIed it was also ACKed and we we | ||
| 1371 | * therefore assume we can clear the soft pending | ||
| 1372 | * state (should it had been set) for this interrupt. | ||
| 1373 | * | ||
| 1374 | * Note: if the IRQ soft pending state was set after | ||
| 1375 | * the IRQ was acked, it actually shouldn't be | ||
| 1376 | * cleared, but we have no way of knowing that unless | ||
| 1377 | * we start trapping ACKs when the soft-pending state | ||
| 1378 | * is set. | ||
| 1379 | */ | ||
| 1380 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
| 1381 | 1362 | ||
| 1382 | /* | 1363 | /* |
| 1383 | * kvm_notify_acked_irq calls kvm_set_irq() | 1364 | * kvm_notify_acked_irq calls kvm_set_irq() |
| 1384 | * to reset the IRQ level. Need to release the | 1365 | * to reset the IRQ level, which grabs the dist->lock |
| 1385 | * lock for kvm_set_irq to grab it. | 1366 | * so we call this before taking the dist->lock. |
| 1386 | */ | 1367 | */ |
| 1387 | spin_unlock(&dist->lock); | ||
| 1388 | |||
| 1389 | kvm_notify_acked_irq(kvm, 0, | 1368 | kvm_notify_acked_irq(kvm, 0, |
| 1390 | vlr.irq - VGIC_NR_PRIVATE_IRQS); | 1369 | vlr.irq - VGIC_NR_PRIVATE_IRQS); |
| 1391 | spin_lock(&dist->lock); | ||
| 1392 | |||
| 1393 | /* Any additional pending interrupt? */ | ||
| 1394 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { | ||
| 1395 | vgic_cpu_irq_set(vcpu, vlr.irq); | ||
| 1396 | level_pending = true; | ||
| 1397 | } else { | ||
| 1398 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
| 1399 | vgic_cpu_irq_clear(vcpu, vlr.irq); | ||
| 1400 | } | ||
| 1401 | 1370 | ||
| 1371 | spin_lock(&dist->lock); | ||
| 1372 | level_pending |= process_queued_irq(vcpu, lr, vlr); | ||
| 1402 | spin_unlock(&dist->lock); | 1373 | spin_unlock(&dist->lock); |
| 1403 | |||
| 1404 | /* | ||
| 1405 | * Despite being EOIed, the LR may not have | ||
| 1406 | * been marked as empty. | ||
| 1407 | */ | ||
| 1408 | vgic_sync_lr_elrsr(vcpu, lr, vlr); | ||
| 1409 | } | 1374 | } |
| 1410 | } | 1375 | } |
| 1411 | 1376 | ||
| @@ -1426,35 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
| 1426 | /* | 1391 | /* |
| 1427 | * Save the physical active state, and reset it to inactive. | 1392 | * Save the physical active state, and reset it to inactive. |
| 1428 | * | 1393 | * |
| 1429 | * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. | 1394 | * Return true if there's a pending forwarded interrupt to queue. |
| 1430 | */ | 1395 | */ |
| 1431 | static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) | 1396 | static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) |
| 1432 | { | 1397 | { |
| 1398 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
| 1433 | struct irq_phys_map *map; | 1399 | struct irq_phys_map *map; |
| 1400 | bool phys_active; | ||
| 1401 | bool level_pending; | ||
| 1434 | int ret; | 1402 | int ret; |
| 1435 | 1403 | ||
| 1436 | if (!(vlr.state & LR_HW)) | 1404 | if (!(vlr.state & LR_HW)) |
| 1437 | return 0; | 1405 | return false; |
| 1438 | 1406 | ||
| 1439 | map = vgic_irq_map_search(vcpu, vlr.irq); | 1407 | map = vgic_irq_map_search(vcpu, vlr.irq); |
| 1440 | BUG_ON(!map); | 1408 | BUG_ON(!map); |
| 1441 | 1409 | ||
| 1442 | ret = irq_get_irqchip_state(map->irq, | 1410 | ret = irq_get_irqchip_state(map->irq, |
| 1443 | IRQCHIP_STATE_ACTIVE, | 1411 | IRQCHIP_STATE_ACTIVE, |
| 1444 | &map->active); | 1412 | &phys_active); |
| 1445 | 1413 | ||
| 1446 | WARN_ON(ret); | 1414 | WARN_ON(ret); |
| 1447 | 1415 | ||
| 1448 | if (map->active) | 1416 | if (phys_active) |
| 1449 | return 0; | 1417 | return 0; |
| 1450 | 1418 | ||
| 1451 | return 1; | 1419 | spin_lock(&dist->lock); |
| 1420 | level_pending = process_queued_irq(vcpu, lr, vlr); | ||
| 1421 | spin_unlock(&dist->lock); | ||
| 1422 | return level_pending; | ||
| 1452 | } | 1423 | } |
| 1453 | 1424 | ||
| 1454 | /* Sync back the VGIC state after a guest run */ | 1425 | /* Sync back the VGIC state after a guest run */ |
| 1455 | static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | 1426 | static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) |
| 1456 | { | 1427 | { |
| 1457 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
| 1458 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1428 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
| 1459 | u64 elrsr; | 1429 | u64 elrsr; |
| 1460 | unsigned long *elrsr_ptr; | 1430 | unsigned long *elrsr_ptr; |
| @@ -1462,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | |||
| 1462 | bool level_pending; | 1432 | bool level_pending; |
| 1463 | 1433 | ||
| 1464 | level_pending = vgic_process_maintenance(vcpu); | 1434 | level_pending = vgic_process_maintenance(vcpu); |
| 1465 | elrsr = vgic_get_elrsr(vcpu); | ||
| 1466 | elrsr_ptr = u64_to_bitmask(&elrsr); | ||
| 1467 | 1435 | ||
| 1468 | /* Deal with HW interrupts, and clear mappings for empty LRs */ | 1436 | /* Deal with HW interrupts, and clear mappings for empty LRs */ |
| 1469 | for (lr = 0; lr < vgic->nr_lr; lr++) { | 1437 | for (lr = 0; lr < vgic->nr_lr; lr++) { |
| 1470 | struct vgic_lr vlr; | 1438 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
| 1471 | |||
| 1472 | if (!test_bit(lr, vgic_cpu->lr_used)) | ||
| 1473 | continue; | ||
| 1474 | |||
| 1475 | vlr = vgic_get_lr(vcpu, lr); | ||
| 1476 | if (vgic_sync_hwirq(vcpu, vlr)) { | ||
| 1477 | /* | ||
| 1478 | * So this is a HW interrupt that the guest | ||
| 1479 | * EOI-ed. Clean the LR state and allow the | ||
| 1480 | * interrupt to be sampled again. | ||
| 1481 | */ | ||
| 1482 | vlr.state = 0; | ||
| 1483 | vlr.hwirq = 0; | ||
| 1484 | vgic_set_lr(vcpu, lr, vlr); | ||
| 1485 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
| 1486 | set_bit(lr, elrsr_ptr); | ||
| 1487 | } | ||
| 1488 | |||
| 1489 | if (!test_bit(lr, elrsr_ptr)) | ||
| 1490 | continue; | ||
| 1491 | |||
| 1492 | clear_bit(lr, vgic_cpu->lr_used); | ||
| 1493 | 1439 | ||
| 1440 | level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); | ||
| 1494 | BUG_ON(vlr.irq >= dist->nr_irqs); | 1441 | BUG_ON(vlr.irq >= dist->nr_irqs); |
| 1495 | vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; | ||
| 1496 | } | 1442 | } |
| 1497 | 1443 | ||
| 1498 | /* Check if we still have something up our sleeve... */ | 1444 | /* Check if we still have something up our sleeve... */ |
| 1445 | elrsr = vgic_get_elrsr(vcpu); | ||
| 1446 | elrsr_ptr = u64_to_bitmask(&elrsr); | ||
| 1499 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); | 1447 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); |
| 1500 | if (level_pending || pending < vgic->nr_lr) | 1448 | if (level_pending || pending < vgic->nr_lr) |
| 1501 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); | 1449 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
| @@ -1585,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
| 1585 | int enabled; | 1533 | int enabled; |
| 1586 | bool ret = true, can_inject = true; | 1534 | bool ret = true, can_inject = true; |
| 1587 | 1535 | ||
| 1536 | trace_vgic_update_irq_pending(cpuid, irq_num, level); | ||
| 1537 | |||
| 1588 | if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) | 1538 | if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) |
| 1589 | return -EINVAL; | 1539 | return -EINVAL; |
| 1590 | 1540 | ||
| @@ -1864,30 +1814,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) | |||
| 1864 | } | 1814 | } |
| 1865 | 1815 | ||
| 1866 | /** | 1816 | /** |
| 1867 | * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ | ||
| 1868 | * | ||
| 1869 | * Return the logical active state of a mapped interrupt. This doesn't | ||
| 1870 | * necessarily reflects the current HW state. | ||
| 1871 | */ | ||
| 1872 | bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) | ||
| 1873 | { | ||
| 1874 | BUG_ON(!map); | ||
| 1875 | return map->active; | ||
| 1876 | } | ||
| 1877 | |||
| 1878 | /** | ||
| 1879 | * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ | ||
| 1880 | * | ||
| 1881 | * Set the logical active state of a mapped interrupt. This doesn't | ||
| 1882 | * immediately affects the HW state. | ||
| 1883 | */ | ||
| 1884 | void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) | ||
| 1885 | { | ||
| 1886 | BUG_ON(!map); | ||
| 1887 | map->active = active; | ||
| 1888 | } | ||
| 1889 | |||
| 1890 | /** | ||
| 1891 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping | 1817 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping |
| 1892 | * @vcpu: The VCPU pointer | 1818 | * @vcpu: The VCPU pointer |
| 1893 | * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq | 1819 | * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq |
| @@ -1942,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
| 1942 | kfree(vgic_cpu->pending_shared); | 1868 | kfree(vgic_cpu->pending_shared); |
| 1943 | kfree(vgic_cpu->active_shared); | 1869 | kfree(vgic_cpu->active_shared); |
| 1944 | kfree(vgic_cpu->pend_act_shared); | 1870 | kfree(vgic_cpu->pend_act_shared); |
| 1945 | kfree(vgic_cpu->vgic_irq_lr_map); | ||
| 1946 | vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); | 1871 | vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); |
| 1947 | vgic_cpu->pending_shared = NULL; | 1872 | vgic_cpu->pending_shared = NULL; |
| 1948 | vgic_cpu->active_shared = NULL; | 1873 | vgic_cpu->active_shared = NULL; |
| 1949 | vgic_cpu->pend_act_shared = NULL; | 1874 | vgic_cpu->pend_act_shared = NULL; |
| 1950 | vgic_cpu->vgic_irq_lr_map = NULL; | ||
| 1951 | } | 1875 | } |
| 1952 | 1876 | ||
| 1953 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | 1877 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) |
| @@ -1958,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | |||
| 1958 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); | 1882 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); |
| 1959 | vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); | 1883 | vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); |
| 1960 | vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); | 1884 | vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); |
| 1961 | vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); | ||
| 1962 | 1885 | ||
| 1963 | if (!vgic_cpu->pending_shared | 1886 | if (!vgic_cpu->pending_shared |
| 1964 | || !vgic_cpu->active_shared | 1887 | || !vgic_cpu->active_shared |
| 1965 | || !vgic_cpu->pend_act_shared | 1888 | || !vgic_cpu->pend_act_shared) { |
| 1966 | || !vgic_cpu->vgic_irq_lr_map) { | ||
| 1967 | kvm_vgic_vcpu_destroy(vcpu); | 1889 | kvm_vgic_vcpu_destroy(vcpu); |
| 1968 | return -ENOMEM; | 1890 | return -ENOMEM; |
| 1969 | } | 1891 | } |
| 1970 | 1892 | ||
| 1971 | memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); | ||
| 1972 | |||
| 1973 | /* | 1893 | /* |
| 1974 | * Store the number of LRs per vcpu, so we don't have to go | 1894 | * Store the number of LRs per vcpu, so we don't have to go |
| 1975 | * all the way to the distributor structure to find out. Only | 1895 | * all the way to the distributor structure to find out. Only |
| @@ -2111,14 +2031,24 @@ int vgic_init(struct kvm *kvm) | |||
| 2111 | break; | 2031 | break; |
| 2112 | } | 2032 | } |
| 2113 | 2033 | ||
| 2114 | for (i = 0; i < dist->nr_irqs; i++) { | 2034 | /* |
| 2115 | if (i < VGIC_NR_PPIS) | 2035 | * Enable and configure all SGIs to be edge-triggere and |
| 2036 | * configure all PPIs as level-triggered. | ||
| 2037 | */ | ||
| 2038 | for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { | ||
| 2039 | if (i < VGIC_NR_SGIS) { | ||
| 2040 | /* SGIs */ | ||
| 2116 | vgic_bitmap_set_irq_val(&dist->irq_enabled, | 2041 | vgic_bitmap_set_irq_val(&dist->irq_enabled, |
| 2117 | vcpu->vcpu_id, i, 1); | 2042 | vcpu->vcpu_id, i, 1); |
| 2118 | if (i < VGIC_NR_PRIVATE_IRQS) | ||
| 2119 | vgic_bitmap_set_irq_val(&dist->irq_cfg, | 2043 | vgic_bitmap_set_irq_val(&dist->irq_cfg, |
| 2120 | vcpu->vcpu_id, i, | 2044 | vcpu->vcpu_id, i, |
| 2121 | VGIC_CFG_EDGE); | 2045 | VGIC_CFG_EDGE); |
| 2046 | } else if (i < VGIC_NR_PRIVATE_IRQS) { | ||
| 2047 | /* PPIs */ | ||
| 2048 | vgic_bitmap_set_irq_val(&dist->irq_cfg, | ||
| 2049 | vcpu->vcpu_id, i, | ||
| 2050 | VGIC_CFG_LEVEL); | ||
| 2051 | } | ||
| 2122 | } | 2052 | } |
| 2123 | 2053 | ||
| 2124 | vgic_enable(vcpu); | 2054 | vgic_enable(vcpu); |
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 44660aee335f..77d42be6970e 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
| @@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work) | |||
| 94 | 94 | ||
| 95 | trace_kvm_async_pf_completed(addr, gva); | 95 | trace_kvm_async_pf_completed(addr, gva); |
| 96 | 96 | ||
| 97 | /* | ||
| 98 | * This memory barrier pairs with prepare_to_wait's set_current_state() | ||
| 99 | */ | ||
| 100 | smp_mb(); | ||
| 97 | if (waitqueue_active(&vcpu->wq)) | 101 | if (waitqueue_active(&vcpu->wq)) |
| 98 | wake_up_interruptible(&vcpu->wq); | 102 | wake_up_interruptible(&vcpu->wq); |
| 99 | 103 | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 79db45336e3a..46dbc0a7dfc1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | 23 | ||
| 24 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
| 25 | #include <linux/kvm.h> | 25 | #include <linux/kvm.h> |
| 26 | #include <linux/kvm_irqfd.h> | ||
| 26 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
| 27 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
| 28 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
| @@ -34,73 +35,20 @@ | |||
| 34 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
| 35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
| 36 | #include <linux/seqlock.h> | 37 | #include <linux/seqlock.h> |
| 38 | #include <linux/irqbypass.h> | ||
| 37 | #include <trace/events/kvm.h> | 39 | #include <trace/events/kvm.h> |
| 38 | 40 | ||
| 39 | #include <kvm/iodev.h> | 41 | #include <kvm/iodev.h> |
| 40 | 42 | ||
| 41 | #ifdef CONFIG_HAVE_KVM_IRQFD | 43 | #ifdef CONFIG_HAVE_KVM_IRQFD |
| 42 | /* | ||
| 43 | * -------------------------------------------------------------------- | ||
| 44 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | ||
| 45 | * | ||
| 46 | * Credit goes to Avi Kivity for the original idea. | ||
| 47 | * -------------------------------------------------------------------- | ||
| 48 | */ | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Resampling irqfds are a special variety of irqfds used to emulate | ||
| 52 | * level triggered interrupts. The interrupt is asserted on eventfd | ||
| 53 | * trigger. On acknowledgement through the irq ack notifier, the | ||
| 54 | * interrupt is de-asserted and userspace is notified through the | ||
| 55 | * resamplefd. All resamplers on the same gsi are de-asserted | ||
| 56 | * together, so we don't need to track the state of each individual | ||
| 57 | * user. We can also therefore share the same irq source ID. | ||
| 58 | */ | ||
| 59 | struct _irqfd_resampler { | ||
| 60 | struct kvm *kvm; | ||
| 61 | /* | ||
| 62 | * List of resampling struct _irqfd objects sharing this gsi. | ||
| 63 | * RCU list modified under kvm->irqfds.resampler_lock | ||
| 64 | */ | ||
| 65 | struct list_head list; | ||
| 66 | struct kvm_irq_ack_notifier notifier; | ||
| 67 | /* | ||
| 68 | * Entry in list of kvm->irqfd.resampler_list. Use for sharing | ||
| 69 | * resamplers among irqfds on the same gsi. | ||
| 70 | * Accessed and modified under kvm->irqfds.resampler_lock | ||
| 71 | */ | ||
| 72 | struct list_head link; | ||
| 73 | }; | ||
| 74 | |||
| 75 | struct _irqfd { | ||
| 76 | /* Used for MSI fast-path */ | ||
| 77 | struct kvm *kvm; | ||
| 78 | wait_queue_t wait; | ||
| 79 | /* Update side is protected by irqfds.lock */ | ||
| 80 | struct kvm_kernel_irq_routing_entry irq_entry; | ||
| 81 | seqcount_t irq_entry_sc; | ||
| 82 | /* Used for level IRQ fast-path */ | ||
| 83 | int gsi; | ||
| 84 | struct work_struct inject; | ||
| 85 | /* The resampler used by this irqfd (resampler-only) */ | ||
| 86 | struct _irqfd_resampler *resampler; | ||
| 87 | /* Eventfd notified on resample (resampler-only) */ | ||
| 88 | struct eventfd_ctx *resamplefd; | ||
| 89 | /* Entry in list of irqfds for a resampler (resampler-only) */ | ||
| 90 | struct list_head resampler_link; | ||
| 91 | /* Used for setup/shutdown */ | ||
| 92 | struct eventfd_ctx *eventfd; | ||
| 93 | struct list_head list; | ||
| 94 | poll_table pt; | ||
| 95 | struct work_struct shutdown; | ||
| 96 | }; | ||
| 97 | 44 | ||
| 98 | static struct workqueue_struct *irqfd_cleanup_wq; | 45 | static struct workqueue_struct *irqfd_cleanup_wq; |
| 99 | 46 | ||
| 100 | static void | 47 | static void |
| 101 | irqfd_inject(struct work_struct *work) | 48 | irqfd_inject(struct work_struct *work) |
| 102 | { | 49 | { |
| 103 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 50 | struct kvm_kernel_irqfd *irqfd = |
| 51 | container_of(work, struct kvm_kernel_irqfd, inject); | ||
| 104 | struct kvm *kvm = irqfd->kvm; | 52 | struct kvm *kvm = irqfd->kvm; |
| 105 | 53 | ||
| 106 | if (!irqfd->resampler) { | 54 | if (!irqfd->resampler) { |
| @@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work) | |||
| 121 | static void | 69 | static void |
| 122 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | 70 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) |
| 123 | { | 71 | { |
| 124 | struct _irqfd_resampler *resampler; | 72 | struct kvm_kernel_irqfd_resampler *resampler; |
| 125 | struct kvm *kvm; | 73 | struct kvm *kvm; |
| 126 | struct _irqfd *irqfd; | 74 | struct kvm_kernel_irqfd *irqfd; |
| 127 | int idx; | 75 | int idx; |
| 128 | 76 | ||
| 129 | resampler = container_of(kian, struct _irqfd_resampler, notifier); | 77 | resampler = container_of(kian, |
| 78 | struct kvm_kernel_irqfd_resampler, notifier); | ||
| 130 | kvm = resampler->kvm; | 79 | kvm = resampler->kvm; |
| 131 | 80 | ||
| 132 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | 81 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, |
| @@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | |||
| 141 | } | 90 | } |
| 142 | 91 | ||
| 143 | static void | 92 | static void |
| 144 | irqfd_resampler_shutdown(struct _irqfd *irqfd) | 93 | irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) |
| 145 | { | 94 | { |
| 146 | struct _irqfd_resampler *resampler = irqfd->resampler; | 95 | struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; |
| 147 | struct kvm *kvm = resampler->kvm; | 96 | struct kvm *kvm = resampler->kvm; |
| 148 | 97 | ||
| 149 | mutex_lock(&kvm->irqfds.resampler_lock); | 98 | mutex_lock(&kvm->irqfds.resampler_lock); |
| @@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) | |||
| 168 | static void | 117 | static void |
| 169 | irqfd_shutdown(struct work_struct *work) | 118 | irqfd_shutdown(struct work_struct *work) |
| 170 | { | 119 | { |
| 171 | struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); | 120 | struct kvm_kernel_irqfd *irqfd = |
| 121 | container_of(work, struct kvm_kernel_irqfd, shutdown); | ||
| 172 | u64 cnt; | 122 | u64 cnt; |
| 173 | 123 | ||
| 174 | /* | 124 | /* |
| @@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work) | |||
| 191 | /* | 141 | /* |
| 192 | * It is now safe to release the object's resources | 142 | * It is now safe to release the object's resources |
| 193 | */ | 143 | */ |
| 144 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
| 145 | irq_bypass_unregister_consumer(&irqfd->consumer); | ||
| 146 | #endif | ||
| 194 | eventfd_ctx_put(irqfd->eventfd); | 147 | eventfd_ctx_put(irqfd->eventfd); |
| 195 | kfree(irqfd); | 148 | kfree(irqfd); |
| 196 | } | 149 | } |
| @@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work) | |||
| 198 | 151 | ||
| 199 | /* assumes kvm->irqfds.lock is held */ | 152 | /* assumes kvm->irqfds.lock is held */ |
| 200 | static bool | 153 | static bool |
| 201 | irqfd_is_active(struct _irqfd *irqfd) | 154 | irqfd_is_active(struct kvm_kernel_irqfd *irqfd) |
| 202 | { | 155 | { |
| 203 | return list_empty(&irqfd->list) ? false : true; | 156 | return list_empty(&irqfd->list) ? false : true; |
| 204 | } | 157 | } |
| @@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd) | |||
| 209 | * assumes kvm->irqfds.lock is held | 162 | * assumes kvm->irqfds.lock is held |
| 210 | */ | 163 | */ |
| 211 | static void | 164 | static void |
| 212 | irqfd_deactivate(struct _irqfd *irqfd) | 165 | irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) |
| 213 | { | 166 | { |
| 214 | BUG_ON(!irqfd_is_active(irqfd)); | 167 | BUG_ON(!irqfd_is_active(irqfd)); |
| 215 | 168 | ||
| @@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd) | |||
| 218 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); | 171 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); |
| 219 | } | 172 | } |
| 220 | 173 | ||
| 174 | int __attribute__((weak)) kvm_arch_set_irq_inatomic( | ||
| 175 | struct kvm_kernel_irq_routing_entry *irq, | ||
| 176 | struct kvm *kvm, int irq_source_id, | ||
| 177 | int level, | ||
| 178 | bool line_status) | ||
| 179 | { | ||
| 180 | return -EWOULDBLOCK; | ||
| 181 | } | ||
| 182 | |||
| 221 | /* | 183 | /* |
| 222 | * Called with wqh->lock held and interrupts disabled | 184 | * Called with wqh->lock held and interrupts disabled |
| 223 | */ | 185 | */ |
| 224 | static int | 186 | static int |
| 225 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | 187 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) |
| 226 | { | 188 | { |
| 227 | struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); | 189 | struct kvm_kernel_irqfd *irqfd = |
| 190 | container_of(wait, struct kvm_kernel_irqfd, wait); | ||
| 228 | unsigned long flags = (unsigned long)key; | 191 | unsigned long flags = (unsigned long)key; |
| 229 | struct kvm_kernel_irq_routing_entry irq; | 192 | struct kvm_kernel_irq_routing_entry irq; |
| 230 | struct kvm *kvm = irqfd->kvm; | 193 | struct kvm *kvm = irqfd->kvm; |
| @@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | |||
| 238 | irq = irqfd->irq_entry; | 201 | irq = irqfd->irq_entry; |
| 239 | } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); | 202 | } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); |
| 240 | /* An event has been signaled, inject an interrupt */ | 203 | /* An event has been signaled, inject an interrupt */ |
| 241 | if (irq.type == KVM_IRQ_ROUTING_MSI) | 204 | if (kvm_arch_set_irq_inatomic(&irq, kvm, |
| 242 | kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, | 205 | KVM_USERSPACE_IRQ_SOURCE_ID, 1, |
| 243 | false); | 206 | false) == -EWOULDBLOCK) |
| 244 | else | ||
| 245 | schedule_work(&irqfd->inject); | 207 | schedule_work(&irqfd->inject); |
| 246 | srcu_read_unlock(&kvm->irq_srcu, idx); | 208 | srcu_read_unlock(&kvm->irq_srcu, idx); |
| 247 | } | 209 | } |
| @@ -274,37 +236,54 @@ static void | |||
| 274 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | 236 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, |
| 275 | poll_table *pt) | 237 | poll_table *pt) |
| 276 | { | 238 | { |
| 277 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | 239 | struct kvm_kernel_irqfd *irqfd = |
| 240 | container_of(pt, struct kvm_kernel_irqfd, pt); | ||
| 278 | add_wait_queue(wqh, &irqfd->wait); | 241 | add_wait_queue(wqh, &irqfd->wait); |
| 279 | } | 242 | } |
| 280 | 243 | ||
| 281 | /* Must be called under irqfds.lock */ | 244 | /* Must be called under irqfds.lock */ |
| 282 | static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) | 245 | static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) |
| 283 | { | 246 | { |
| 284 | struct kvm_kernel_irq_routing_entry *e; | 247 | struct kvm_kernel_irq_routing_entry *e; |
| 285 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | 248 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; |
| 286 | int i, n_entries; | 249 | int n_entries; |
| 287 | 250 | ||
| 288 | n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); | 251 | n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); |
| 289 | 252 | ||
| 290 | write_seqcount_begin(&irqfd->irq_entry_sc); | 253 | write_seqcount_begin(&irqfd->irq_entry_sc); |
| 291 | 254 | ||
| 292 | irqfd->irq_entry.type = 0; | ||
| 293 | |||
| 294 | e = entries; | 255 | e = entries; |
| 295 | for (i = 0; i < n_entries; ++i, ++e) { | 256 | if (n_entries == 1) |
| 296 | /* Only fast-path MSI. */ | 257 | irqfd->irq_entry = *e; |
| 297 | if (e->type == KVM_IRQ_ROUTING_MSI) | 258 | else |
| 298 | irqfd->irq_entry = *e; | 259 | irqfd->irq_entry.type = 0; |
| 299 | } | ||
| 300 | 260 | ||
| 301 | write_seqcount_end(&irqfd->irq_entry_sc); | 261 | write_seqcount_end(&irqfd->irq_entry_sc); |
| 302 | } | 262 | } |
| 303 | 263 | ||
| 264 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
| 265 | void __attribute__((weak)) kvm_arch_irq_bypass_stop( | ||
| 266 | struct irq_bypass_consumer *cons) | ||
| 267 | { | ||
| 268 | } | ||
| 269 | |||
| 270 | void __attribute__((weak)) kvm_arch_irq_bypass_start( | ||
| 271 | struct irq_bypass_consumer *cons) | ||
| 272 | { | ||
| 273 | } | ||
| 274 | |||
| 275 | int __attribute__((weak)) kvm_arch_update_irqfd_routing( | ||
| 276 | struct kvm *kvm, unsigned int host_irq, | ||
| 277 | uint32_t guest_irq, bool set) | ||
| 278 | { | ||
| 279 | return 0; | ||
| 280 | } | ||
| 281 | #endif | ||
| 282 | |||
| 304 | static int | 283 | static int |
| 305 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | 284 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) |
| 306 | { | 285 | { |
| 307 | struct _irqfd *irqfd, *tmp; | 286 | struct kvm_kernel_irqfd *irqfd, *tmp; |
| 308 | struct fd f; | 287 | struct fd f; |
| 309 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; | 288 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; |
| 310 | int ret; | 289 | int ret; |
| @@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 340 | irqfd->eventfd = eventfd; | 319 | irqfd->eventfd = eventfd; |
| 341 | 320 | ||
| 342 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { | 321 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { |
| 343 | struct _irqfd_resampler *resampler; | 322 | struct kvm_kernel_irqfd_resampler *resampler; |
| 344 | 323 | ||
| 345 | resamplefd = eventfd_ctx_fdget(args->resamplefd); | 324 | resamplefd = eventfd_ctx_fdget(args->resamplefd); |
| 346 | if (IS_ERR(resamplefd)) { | 325 | if (IS_ERR(resamplefd)) { |
| @@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 428 | * we might race against the POLLHUP | 407 | * we might race against the POLLHUP |
| 429 | */ | 408 | */ |
| 430 | fdput(f); | 409 | fdput(f); |
| 410 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
| 411 | irqfd->consumer.token = (void *)irqfd->eventfd; | ||
| 412 | irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; | ||
| 413 | irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; | ||
| 414 | irqfd->consumer.stop = kvm_arch_irq_bypass_stop; | ||
| 415 | irqfd->consumer.start = kvm_arch_irq_bypass_start; | ||
| 416 | ret = irq_bypass_register_consumer(&irqfd->consumer); | ||
| 417 | if (ret) | ||
| 418 | pr_info("irq bypass consumer (token %p) registration fails: %d\n", | ||
| 419 | irqfd->consumer.token, ret); | ||
| 420 | #endif | ||
| 431 | 421 | ||
| 432 | return 0; | 422 | return 0; |
| 433 | 423 | ||
| @@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
| 469 | } | 459 | } |
| 470 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); | 460 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); |
| 471 | 461 | ||
| 472 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 462 | void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) |
| 473 | { | 463 | { |
| 474 | struct kvm_irq_ack_notifier *kian; | 464 | struct kvm_irq_ack_notifier *kian; |
| 465 | |||
| 466 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | ||
| 467 | link) | ||
| 468 | if (kian->gsi == gsi) | ||
| 469 | kian->irq_acked(kian); | ||
| 470 | } | ||
| 471 | |||
| 472 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
| 473 | { | ||
| 475 | int gsi, idx; | 474 | int gsi, idx; |
| 476 | 475 | ||
| 477 | trace_kvm_ack_irq(irqchip, pin); | 476 | trace_kvm_ack_irq(irqchip, pin); |
| @@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
| 479 | idx = srcu_read_lock(&kvm->irq_srcu); | 478 | idx = srcu_read_lock(&kvm->irq_srcu); |
| 480 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); | 479 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); |
| 481 | if (gsi != -1) | 480 | if (gsi != -1) |
| 482 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | 481 | kvm_notify_acked_gsi(kvm, gsi); |
| 483 | link) | ||
| 484 | if (kian->gsi == gsi) | ||
| 485 | kian->irq_acked(kian); | ||
| 486 | srcu_read_unlock(&kvm->irq_srcu, idx); | 482 | srcu_read_unlock(&kvm->irq_srcu, idx); |
| 487 | } | 483 | } |
| 488 | 484 | ||
| @@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm) | |||
| 525 | static int | 521 | static int |
| 526 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) | 522 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) |
| 527 | { | 523 | { |
| 528 | struct _irqfd *irqfd, *tmp; | 524 | struct kvm_kernel_irqfd *irqfd, *tmp; |
| 529 | struct eventfd_ctx *eventfd; | 525 | struct eventfd_ctx *eventfd; |
| 530 | 526 | ||
| 531 | eventfd = eventfd_ctx_fdget(args->fd); | 527 | eventfd = eventfd_ctx_fdget(args->fd); |
| @@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 581 | void | 577 | void |
| 582 | kvm_irqfd_release(struct kvm *kvm) | 578 | kvm_irqfd_release(struct kvm *kvm) |
| 583 | { | 579 | { |
| 584 | struct _irqfd *irqfd, *tmp; | 580 | struct kvm_kernel_irqfd *irqfd, *tmp; |
| 585 | 581 | ||
| 586 | spin_lock_irq(&kvm->irqfds.lock); | 582 | spin_lock_irq(&kvm->irqfds.lock); |
| 587 | 583 | ||
| @@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm) | |||
| 604 | */ | 600 | */ |
| 605 | void kvm_irq_routing_update(struct kvm *kvm) | 601 | void kvm_irq_routing_update(struct kvm *kvm) |
| 606 | { | 602 | { |
| 607 | struct _irqfd *irqfd; | 603 | struct kvm_kernel_irqfd *irqfd; |
| 608 | 604 | ||
| 609 | spin_lock_irq(&kvm->irqfds.lock); | 605 | spin_lock_irq(&kvm->irqfds.lock); |
| 610 | 606 | ||
| 611 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) | 607 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) { |
| 612 | irqfd_update(kvm, irqfd); | 608 | irqfd_update(kvm, irqfd); |
| 613 | 609 | ||
| 610 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
| 611 | if (irqfd->producer) { | ||
| 612 | int ret = kvm_arch_update_irqfd_routing( | ||
| 613 | irqfd->kvm, irqfd->producer->irq, | ||
| 614 | irqfd->gsi, 1); | ||
| 615 | WARN_ON(ret); | ||
| 616 | } | ||
| 617 | #endif | ||
| 618 | } | ||
| 619 | |||
| 614 | spin_unlock_irq(&kvm->irqfds.lock); | 620 | spin_unlock_irq(&kvm->irqfds.lock); |
| 615 | } | 621 | } |
| 616 | 622 | ||
| @@ -914,9 +920,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
| 914 | return -EINVAL; | 920 | return -EINVAL; |
| 915 | 921 | ||
| 916 | /* ioeventfd with no length can't be combined with DATAMATCH */ | 922 | /* ioeventfd with no length can't be combined with DATAMATCH */ |
| 917 | if (!args->len && | 923 | if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) |
| 918 | args->flags & (KVM_IOEVENTFD_FLAG_PIO | | ||
| 919 | KVM_IOEVENTFD_FLAG_DATAMATCH)) | ||
| 920 | return -EINVAL; | 924 | return -EINVAL; |
| 921 | 925 | ||
| 922 | ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); | 926 | ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index d7ea8e20dae4..f0b08a2a48ba 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
| @@ -31,16 +31,6 @@ | |||
| 31 | #include <trace/events/kvm.h> | 31 | #include <trace/events/kvm.h> |
| 32 | #include "irq.h" | 32 | #include "irq.h" |
| 33 | 33 | ||
| 34 | struct kvm_irq_routing_table { | ||
| 35 | int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; | ||
| 36 | u32 nr_rt_entries; | ||
| 37 | /* | ||
| 38 | * Array indexed by gsi. Each entry contains list of irq chips | ||
| 39 | * the gsi is connected to. | ||
| 40 | */ | ||
| 41 | struct hlist_head map[0]; | ||
| 42 | }; | ||
| 43 | |||
| 44 | int kvm_irq_map_gsi(struct kvm *kvm, | 34 | int kvm_irq_map_gsi(struct kvm *kvm, |
| 45 | struct kvm_kernel_irq_routing_entry *entries, int gsi) | 35 | struct kvm_kernel_irq_routing_entry *entries, int gsi) |
| 46 | { | 36 | { |
| @@ -154,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
| 154 | 144 | ||
| 155 | /* | 145 | /* |
| 156 | * Do not allow GSI to be mapped to the same irqchip more than once. | 146 | * Do not allow GSI to be mapped to the same irqchip more than once. |
| 157 | * Allow only one to one mapping between GSI and MSI. | 147 | * Allow only one to one mapping between GSI and non-irqchip routing. |
| 158 | */ | 148 | */ |
| 159 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) | 149 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) |
| 160 | if (ei->type == KVM_IRQ_ROUTING_MSI || | 150 | if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || |
| 161 | ue->type == KVM_IRQ_ROUTING_MSI || | 151 | ue->type != KVM_IRQ_ROUTING_IRQCHIP || |
| 162 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | 152 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) |
| 163 | return r; | 153 | return r; |
| 164 | 154 | ||
| @@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
| 231 | kvm_irq_routing_update(kvm); | 221 | kvm_irq_routing_update(kvm); |
| 232 | mutex_unlock(&kvm->irq_lock); | 222 | mutex_unlock(&kvm->irq_lock); |
| 233 | 223 | ||
| 224 | kvm_arch_irq_routing_update(kvm); | ||
| 225 | |||
| 234 | synchronize_srcu_expedited(&kvm->irq_srcu); | 226 | synchronize_srcu_expedited(&kvm->irq_srcu); |
| 235 | 227 | ||
| 236 | new = old; | 228 | new = old; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..484079efea5b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
| 230 | init_waitqueue_head(&vcpu->wq); | 230 | init_waitqueue_head(&vcpu->wq); |
| 231 | kvm_async_pf_vcpu_init(vcpu); | 231 | kvm_async_pf_vcpu_init(vcpu); |
| 232 | 232 | ||
| 233 | vcpu->pre_pcpu = -1; | ||
| 234 | INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); | ||
| 235 | |||
| 233 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 236 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
| 234 | if (!page) { | 237 | if (!page) { |
| 235 | r = -ENOMEM; | 238 | r = -ENOMEM; |
| @@ -2018,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
| 2018 | } while (single_task_running() && ktime_before(cur, stop)); | 2021 | } while (single_task_running() && ktime_before(cur, stop)); |
| 2019 | } | 2022 | } |
| 2020 | 2023 | ||
| 2024 | kvm_arch_vcpu_blocking(vcpu); | ||
| 2025 | |||
| 2021 | for (;;) { | 2026 | for (;;) { |
| 2022 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 2027 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
| 2023 | 2028 | ||
| @@ -2031,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
| 2031 | finish_wait(&vcpu->wq, &wait); | 2036 | finish_wait(&vcpu->wq, &wait); |
| 2032 | cur = ktime_get(); | 2037 | cur = ktime_get(); |
| 2033 | 2038 | ||
| 2039 | kvm_arch_vcpu_unblocking(vcpu); | ||
| 2034 | out: | 2040 | out: |
| 2035 | block_ns = ktime_to_ns(cur) - ktime_to_ns(start); | 2041 | block_ns = ktime_to_ns(cur) - ktime_to_ns(start); |
| 2036 | 2042 | ||
| @@ -2718,6 +2724,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) | |||
| 2718 | case KVM_CAP_IRQFD: | 2724 | case KVM_CAP_IRQFD: |
| 2719 | case KVM_CAP_IRQFD_RESAMPLE: | 2725 | case KVM_CAP_IRQFD_RESAMPLE: |
| 2720 | #endif | 2726 | #endif |
| 2727 | case KVM_CAP_IOEVENTFD_ANY_LENGTH: | ||
| 2721 | case KVM_CAP_CHECK_EXTENSION_VM: | 2728 | case KVM_CAP_CHECK_EXTENSION_VM: |
| 2722 | return 1; | 2729 | return 1; |
| 2723 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | 2730 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
| @@ -3341,7 +3348,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 3341 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | 3348 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
| 3342 | return -ENOSPC; | 3349 | return -ENOSPC; |
| 3343 | 3350 | ||
| 3344 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 3351 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * |
| 3345 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3352 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
| 3346 | if (!new_bus) | 3353 | if (!new_bus) |
| 3347 | return -ENOMEM; | 3354 | return -ENOMEM; |
| @@ -3373,7 +3380,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
| 3373 | if (r) | 3380 | if (r) |
| 3374 | return r; | 3381 | return r; |
| 3375 | 3382 | ||
| 3376 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * | 3383 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * |
| 3377 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3384 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
| 3378 | if (!new_bus) | 3385 | if (!new_bus) |
| 3379 | return -ENOMEM; | 3386 | return -ENOMEM; |
diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig new file mode 100644 index 000000000000..89a414f815d2 --- /dev/null +++ b/virt/lib/Kconfig | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | config IRQ_BYPASS_MANAGER | ||
| 2 | tristate | ||
diff --git a/virt/lib/Makefile b/virt/lib/Makefile new file mode 100644 index 000000000000..901228d1ffbc --- /dev/null +++ b/virt/lib/Makefile | |||
| @@ -0,0 +1 @@ | |||
| obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o | |||
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c new file mode 100644 index 000000000000..09a03b5a21ff --- /dev/null +++ b/virt/lib/irqbypass.c | |||
| @@ -0,0 +1,257 @@ | |||
| 1 | /* | ||
| 2 | * IRQ offload/bypass manager | ||
| 3 | * | ||
| 4 | * Copyright (C) 2015 Red Hat, Inc. | ||
| 5 | * Copyright (c) 2015 Linaro Ltd. | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License version 2 as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * Various virtualization hardware acceleration techniques allow bypassing or | ||
| 12 | * offloading interrupts received from devices around the host kernel. Posted | ||
| 13 | * Interrupts on Intel VT-d systems can allow interrupts to be received | ||
| 14 | * directly by a virtual machine. ARM IRQ Forwarding allows forwarded physical | ||
| 15 | * interrupts to be directly deactivated by the guest. This manager allows | ||
| 16 | * interrupt producers and consumers to find each other to enable this sort of | ||
| 17 | * bypass. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <linux/irqbypass.h> | ||
| 21 | #include <linux/list.h> | ||
| 22 | #include <linux/module.h> | ||
| 23 | #include <linux/mutex.h> | ||
| 24 | |||
| 25 | MODULE_LICENSE("GPL v2"); | ||
| 26 | MODULE_DESCRIPTION("IRQ bypass manager utility module"); | ||
| 27 | |||
| 28 | static LIST_HEAD(producers); | ||
| 29 | static LIST_HEAD(consumers); | ||
| 30 | static DEFINE_MUTEX(lock); | ||
| 31 | |||
| 32 | /* @lock must be held when calling connect */ | ||
| 33 | static int __connect(struct irq_bypass_producer *prod, | ||
| 34 | struct irq_bypass_consumer *cons) | ||
| 35 | { | ||
| 36 | int ret = 0; | ||
| 37 | |||
| 38 | if (prod->stop) | ||
| 39 | prod->stop(prod); | ||
| 40 | if (cons->stop) | ||
| 41 | cons->stop(cons); | ||
| 42 | |||
| 43 | if (prod->add_consumer) | ||
| 44 | ret = prod->add_consumer(prod, cons); | ||
| 45 | |||
| 46 | if (!ret) { | ||
| 47 | ret = cons->add_producer(cons, prod); | ||
| 48 | if (ret && prod->del_consumer) | ||
| 49 | prod->del_consumer(prod, cons); | ||
| 50 | } | ||
| 51 | |||
| 52 | if (cons->start) | ||
| 53 | cons->start(cons); | ||
| 54 | if (prod->start) | ||
| 55 | prod->start(prod); | ||
| 56 | |||
| 57 | return ret; | ||
| 58 | } | ||
| 59 | |||
| 60 | /* @lock must be held when calling disconnect */ | ||
| 61 | static void __disconnect(struct irq_bypass_producer *prod, | ||
| 62 | struct irq_bypass_consumer *cons) | ||
| 63 | { | ||
| 64 | if (prod->stop) | ||
| 65 | prod->stop(prod); | ||
| 66 | if (cons->stop) | ||
| 67 | cons->stop(cons); | ||
| 68 | |||
| 69 | cons->del_producer(cons, prod); | ||
| 70 | |||
| 71 | if (prod->del_consumer) | ||
| 72 | prod->del_consumer(prod, cons); | ||
| 73 | |||
| 74 | if (cons->start) | ||
| 75 | cons->start(cons); | ||
| 76 | if (prod->start) | ||
| 77 | prod->start(prod); | ||
| 78 | } | ||
| 79 | |||
| 80 | /** | ||
| 81 | * irq_bypass_register_producer - register IRQ bypass producer | ||
| 82 | * @producer: pointer to producer structure | ||
| 83 | * | ||
| 84 | * Add the provided IRQ producer to the list of producers and connect | ||
| 85 | * with any matching token found on the IRQ consumers list. | ||
| 86 | */ | ||
| 87 | int irq_bypass_register_producer(struct irq_bypass_producer *producer) | ||
| 88 | { | ||
| 89 | struct irq_bypass_producer *tmp; | ||
| 90 | struct irq_bypass_consumer *consumer; | ||
| 91 | |||
| 92 | might_sleep(); | ||
| 93 | |||
| 94 | if (!try_module_get(THIS_MODULE)) | ||
| 95 | return -ENODEV; | ||
| 96 | |||
| 97 | mutex_lock(&lock); | ||
| 98 | |||
| 99 | list_for_each_entry(tmp, &producers, node) { | ||
| 100 | if (tmp->token == producer->token) { | ||
| 101 | mutex_unlock(&lock); | ||
| 102 | module_put(THIS_MODULE); | ||
| 103 | return -EBUSY; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 107 | list_for_each_entry(consumer, &consumers, node) { | ||
| 108 | if (consumer->token == producer->token) { | ||
| 109 | int ret = __connect(producer, consumer); | ||
| 110 | if (ret) { | ||
| 111 | mutex_unlock(&lock); | ||
| 112 | module_put(THIS_MODULE); | ||
| 113 | return ret; | ||
| 114 | } | ||
| 115 | break; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | list_add(&producer->node, &producers); | ||
| 120 | |||
| 121 | mutex_unlock(&lock); | ||
| 122 | |||
| 123 | return 0; | ||
| 124 | } | ||
| 125 | EXPORT_SYMBOL_GPL(irq_bypass_register_producer); | ||
| 126 | |||
| 127 | /** | ||
| 128 | * irq_bypass_unregister_producer - unregister IRQ bypass producer | ||
| 129 | * @producer: pointer to producer structure | ||
| 130 | * | ||
| 131 | * Remove a previously registered IRQ producer from the list of producers | ||
| 132 | * and disconnect it from any connected IRQ consumer. | ||
| 133 | */ | ||
| 134 | void irq_bypass_unregister_producer(struct irq_bypass_producer *producer) | ||
| 135 | { | ||
| 136 | struct irq_bypass_producer *tmp; | ||
| 137 | struct irq_bypass_consumer *consumer; | ||
| 138 | |||
| 139 | might_sleep(); | ||
| 140 | |||
| 141 | if (!try_module_get(THIS_MODULE)) | ||
| 142 | return; /* nothing in the list anyway */ | ||
| 143 | |||
| 144 | mutex_lock(&lock); | ||
| 145 | |||
| 146 | list_for_each_entry(tmp, &producers, node) { | ||
| 147 | if (tmp->token != producer->token) | ||
| 148 | continue; | ||
| 149 | |||
| 150 | list_for_each_entry(consumer, &consumers, node) { | ||
| 151 | if (consumer->token == producer->token) { | ||
| 152 | __disconnect(producer, consumer); | ||
| 153 | break; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | list_del(&producer->node); | ||
| 158 | module_put(THIS_MODULE); | ||
| 159 | break; | ||
| 160 | } | ||
| 161 | |||
| 162 | mutex_unlock(&lock); | ||
| 163 | |||
| 164 | module_put(THIS_MODULE); | ||
| 165 | } | ||
| 166 | EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer); | ||
| 167 | |||
| 168 | /** | ||
| 169 | * irq_bypass_register_consumer - register IRQ bypass consumer | ||
| 170 | * @consumer: pointer to consumer structure | ||
| 171 | * | ||
| 172 | * Add the provided IRQ consumer to the list of consumers and connect | ||
| 173 | * with any matching token found on the IRQ producer list. | ||
| 174 | */ | ||
| 175 | int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) | ||
| 176 | { | ||
| 177 | struct irq_bypass_consumer *tmp; | ||
| 178 | struct irq_bypass_producer *producer; | ||
| 179 | |||
| 180 | if (!consumer->add_producer || !consumer->del_producer) | ||
| 181 | return -EINVAL; | ||
| 182 | |||
| 183 | might_sleep(); | ||
| 184 | |||
| 185 | if (!try_module_get(THIS_MODULE)) | ||
| 186 | return -ENODEV; | ||
| 187 | |||
| 188 | mutex_lock(&lock); | ||
| 189 | |||
| 190 | list_for_each_entry(tmp, &consumers, node) { | ||
| 191 | if (tmp->token == consumer->token) { | ||
| 192 | mutex_unlock(&lock); | ||
| 193 | module_put(THIS_MODULE); | ||
| 194 | return -EBUSY; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | list_for_each_entry(producer, &producers, node) { | ||
| 199 | if (producer->token == consumer->token) { | ||
| 200 | int ret = __connect(producer, consumer); | ||
| 201 | if (ret) { | ||
| 202 | mutex_unlock(&lock); | ||
| 203 | module_put(THIS_MODULE); | ||
| 204 | return ret; | ||
| 205 | } | ||
| 206 | break; | ||
| 207 | } | ||
| 208 | } | ||
| 209 | |||
| 210 | list_add(&consumer->node, &consumers); | ||
| 211 | |||
| 212 | mutex_unlock(&lock); | ||
| 213 | |||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | EXPORT_SYMBOL_GPL(irq_bypass_register_consumer); | ||
| 217 | |||
| 218 | /** | ||
| 219 | * irq_bypass_unregister_consumer - unregister IRQ bypass consumer | ||
| 220 | * @consumer: pointer to consumer structure | ||
| 221 | * | ||
| 222 | * Remove a previously registered IRQ consumer from the list of consumers | ||
| 223 | * and disconnect it from any connected IRQ producer. | ||
| 224 | */ | ||
| 225 | void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) | ||
| 226 | { | ||
| 227 | struct irq_bypass_consumer *tmp; | ||
| 228 | struct irq_bypass_producer *producer; | ||
| 229 | |||
| 230 | might_sleep(); | ||
| 231 | |||
| 232 | if (!try_module_get(THIS_MODULE)) | ||
| 233 | return; /* nothing in the list anyway */ | ||
| 234 | |||
| 235 | mutex_lock(&lock); | ||
| 236 | |||
| 237 | list_for_each_entry(tmp, &consumers, node) { | ||
| 238 | if (tmp->token != consumer->token) | ||
| 239 | continue; | ||
| 240 | |||
| 241 | list_for_each_entry(producer, &producers, node) { | ||
| 242 | if (producer->token == consumer->token) { | ||
| 243 | __disconnect(producer, consumer); | ||
| 244 | break; | ||
| 245 | } | ||
| 246 | } | ||
| 247 | |||
| 248 | list_del(&consumer->node); | ||
| 249 | module_put(THIS_MODULE); | ||
| 250 | break; | ||
| 251 | } | ||
| 252 | |||
| 253 | mutex_unlock(&lock); | ||
| 254 | |||
| 255 | module_put(THIS_MODULE); | ||
| 256 | } | ||
| 257 | EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer); | ||
