aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-11-04 10:24:17 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2015-11-04 10:24:17 -0500
commit197a4f4b063e4e7a603ff1de56b3cf0400fabc30 (patch)
tree36a3d057cec3aff49cf2fe9df3e63218595dd68b
parentd6cf98e06ea4c4071596bc28f2a0f21412d5c6dc (diff)
parent26caea7693cb99833fe4ecc544c842289d6b3f69 (diff)
Merge tag 'kvm-arm-for-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/ARM Changes for v4.4-rc1 Includes a number of fixes for the arch-timer, introducing proper level-triggered semantics for the arch-timers, a series of patches to synchronously halt a guest (prerequisite for IRQ forwarding), some tracepoint improvements, a tweak for the EL2 panic handlers, some more VGIC cleanups getting rid of redundant state, and finally a stylistic change that gets rid of some ctags warnings. Conflicts: arch/x86/include/asm/kvm_host.h
-rw-r--r--Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt187
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt18
-rw-r--r--arch/arm/include/asm/kvm_arm.h20
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/kvm/Kconfig3
-rw-r--r--arch/arm/kvm/arm.c78
-rw-r--r--arch/arm/kvm/psci.c10
-rw-r--r--arch/arm/kvm/trace.h10
-rw-r--r--arch/arm64/include/asm/kvm_arm.h16
-rw-r--r--arch/arm64/include/asm/kvm_host.h5
-rw-r--r--arch/arm64/kvm/Kconfig2
-rw-r--r--arch/arm64/kvm/hyp.S8
-rw-r--r--arch/mips/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--include/kvm/arm_arch_timer.h4
-rw-r--r--include/kvm/arm_vgic.h16
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--virt/kvm/arm/arch_timer.c174
-rw-r--r--virt/kvm/arm/trace.h63
-rw-r--r--virt/kvm/arm/vgic-v2.c6
-rw-r--r--virt/kvm/arm/vgic-v3.c6
-rw-r--r--virt/kvm/arm/vgic.c327
-rw-r--r--virt/kvm/kvm_main.c3
25 files changed, 676 insertions, 297 deletions
diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
new file mode 100644
index 000000000000..38bca2835278
--- /dev/null
+++ b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
@@ -0,0 +1,187 @@
1KVM/ARM VGIC Forwarded Physical Interrupts
2==========================================
3
4The KVM/ARM code implements software support for the ARM Generic
5Interrupt Controller's (GIC's) hardware support for virtualization by
6allowing software to inject virtual interrupts to a VM, which the guest
7OS sees as regular interrupts. The code is famously known as the VGIC.
8
9Some of these virtual interrupts, however, correspond to physical
10interrupts from real physical devices. One example could be the
11architected timer, which itself supports virtualization, and therefore
12lets a guest OS program the hardware device directly to raise an
13interrupt at some point in time. When such an interrupt is raised, the
14host OS initially handles the interrupt and must somehow signal this
15event as a virtual interrupt to the guest. Another example could be a
16passthrough device, where the physical interrupts are initially handled
17by the host, but the device driver for the device lives in the guest OS
18and KVM must therefore somehow inject a virtual interrupt on behalf of
19the physical one to the guest OS.
20
21These virtual interrupts corresponding to a physical interrupt on the
22host are called forwarded physical interrupts, but are also sometimes
23referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
24
25Forwarded physical interrupts are handled slightly differently compared
26to virtual interrupts generated purely by a software emulated device.
27
28
29The HW bit
30----------
31Virtual interrupts are signalled to the guest by programming the List
32Registers (LRs) on the GIC before running a VCPU. The LR is programmed
33with the virtual IRQ number and the state of the interrupt (Pending,
34Active, or Pending+Active). When the guest ACKs and EOIs a virtual
35interrupt, the LR state moves from Pending to Active, and finally to
36inactive.
37
38The LRs include an extra bit, called the HW bit. When this bit is set,
39KVM must also program an additional field in the LR, the physical IRQ
40number, to link the virtual with the physical IRQ.
41
42When the HW bit is set, KVM must EITHER set the Pending OR the Active
43bit, never both at the same time.
44
45Setting the HW bit causes the hardware to deactivate the physical
46interrupt on the physical distributor when the guest deactivates the
47corresponding virtual interrupt.
48
49
50Forwarded Physical Interrupts Life Cycle
51----------------------------------------
52
53The state of forwarded physical interrupts is managed in the following way:
54
55 - The physical interrupt is acked by the host, and becomes active on
56 the physical distributor (*).
57 - KVM sets the LR.Pending bit, because this is the only way the GICV
58 interface is going to present it to the guest.
59 - LR.Pending will stay set as long as the guest has not acked the interrupt.
60 - LR.Pending transitions to LR.Active on the guest read of the IAR, as
61 expected.
62 - On guest EOI, the *physical distributor* active bit gets cleared,
63 but the LR.Active is left untouched (set).
64 - KVM clears the LR on VM exits when the physical distributor
65 active state has been cleared.
66
67(*): The host handling is slightly more complicated. For some forwarded
68interrupts (shared), KVM directly sets the active state on the physical
69distributor before entering the guest, because the interrupt is never actually
70handled on the host (see details on the timer as an example below). For other
71forwarded interrupts (non-shared) the host does not deactivate the interrupt
72when the host ISR completes, but leaves the interrupt active until the guest
73deactivates it. Leaving the interrupt active is allowed, because Linux
74configures the physical GIC with EOIMode=1, which causes EOI operations to
75perform a priority drop allowing the GIC to receive other interrupts of the
76default priority.
77
78
79Forwarded Edge and Level Triggered PPIs and SPIs
80------------------------------------------------
81Forwarded physical interrupts injected should always be active on the
82physical distributor when injected to a guest.
83
84Level-triggered interrupts will keep the interrupt line to the GIC
85asserted, typically until the guest programs the device to deassert the
86line. This means that the interrupt will remain pending on the physical
87distributor until the guest has reprogrammed the device. Since we
88always run the VM with interrupts enabled on the CPU, a pending
89interrupt will exit the guest as soon as we switch into the guest,
90preventing the guest from ever making progress as the process repeats
91over and over. Therefore, the active state on the physical distributor
92must be set when entering the guest, preventing the GIC from forwarding
93the pending interrupt to the CPU. As soon as the guest deactivates the
94interrupt, the physical line is sampled by the hardware again and the host
95takes a new interrupt if and only if the physical line is still asserted.
96
97Edge-triggered interrupts do not exhibit the same problem with
98preventing guest execution that level-triggered interrupts do. One
99option is to not use HW bit at all, and inject edge-triggered interrupts
100from a physical device as pure virtual interrupts. But that would
101potentially slow down handling of the interrupt in the guest, because a
102physical interrupt occurring in the middle of the guest ISR would
103preempt the guest for the host to handle the interrupt. Additionally,
104if you configure the system to handle interrupts on a separate physical
105core from that running your VCPU, you still have to interrupt the VCPU
106to queue the pending state onto the LR, even though the guest won't use
107this information until the guest ISR completes. Therefore, the HW
108bit should always be set for forwarded edge-triggered interrupts. With
109the HW bit set, the virtual interrupt is injected and additional
110physical interrupts occurring before the guest deactivates the interrupt
111simply mark the state on the physical distributor as Pending+Active. As
112soon as the guest deactivates the interrupt, the host takes another
113interrupt if and only if there was a physical interrupt between injecting
114the forwarded interrupt to the guest and the guest deactivating the
115interrupt.
116
117Consequently, whenever we schedule a VCPU with one or more LRs with the
118HW bit set, the interrupt must also be active on the physical
119distributor.
120
121
122Forwarded LPIs
123--------------
124LPIs, introduced in GICv3, are always edge-triggered and do not have an
125active state. They become pending when a device signal them, and as
126soon as they are acked by the CPU, they are inactive again.
127
128It therefore doesn't make sense, and is not supported, to set the HW bit
129for physical LPIs that are forwarded to a VM as virtual interrupts,
130typically virtual SPIs.
131
132For LPIs, there is no other choice than to preempt the VCPU thread if
133necessary, and queue the pending state onto the LR.
134
135
136Putting It Together: The Architected Timer
137------------------------------------------
138The architected timer is a device that signals interrupts with level
139triggered semantics. The timer hardware is directly accessed by VCPUs
140which program the timer to fire at some point in time. Each VCPU on a
141system programs the timer to fire at different times, and therefore the
142hardware is multiplexed between multiple VCPUs. This is implemented by
143context-switching the timer state along with each VCPU thread.
144
145However, this means that a scenario like the following is entirely
146possible, and in fact, typical:
147
1481. KVM runs the VCPU
1492. The guest programs the time to fire in T+100
1503. The guest is idle and calls WFI (wait-for-interrupts)
1514. The hardware traps to the host
1525. KVM stores the timer state to memory and disables the hardware timer
1536. KVM schedules a soft timer to fire in T+(100 - time since step 2)
1547. KVM puts the VCPU thread to sleep (on a waitqueue)
1558. The soft timer fires, waking up the VCPU thread
1569. KVM reprograms the timer hardware with the VCPU's values
15710. KVM marks the timer interrupt as active on the physical distributor
15811. KVM injects a forwarded physical interrupt to the guest
15912. KVM runs the VCPU
160
161Notice that KVM injects a forwarded physical interrupt in step 11 without
162the corresponding interrupt having actually fired on the host. That is
163exactly why we mark the timer interrupt as active in step 10, because
164the active state on the physical distributor is part of the state
165belonging to the timer hardware, which is context-switched along with
166the VCPU thread.
167
168If the guest does not idle because it is busy, the flow looks like this
169instead:
170
1711. KVM runs the VCPU
1722. The guest programs the time to fire in T+100
1734. At T+100 the timer fires and a physical IRQ causes the VM to exit
174 (note that this initially only traps to EL2 and does not run the host ISR
175 until KVM has returned to the host).
1765. With interrupts still disabled on the CPU coming back from the guest, KVM
177 stores the virtual timer state to memory and disables the virtual hw timer.
1786. KVM looks at the timer state (in memory) and injects a forwarded physical
179 interrupt because it concludes the timer has expired.
1807. KVM marks the timer interrupt as active on the physical distributor
1817. KVM enables the timer, enables interrupts, and runs the VCPU
182
183Notice that again the forwarded physical interrupt is injected to the
184guest without having actually been handled on the host. In this case it
185is because the physical interrupt is never actually seen by the host because the
186timer is disabled upon guest return, and the virtual forwarded interrupt is
187injected on the KVM guest entry path.
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 3fb905429e8a..59541d49e15c 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -44,28 +44,29 @@ Groups:
44 Attributes: 44 Attributes:
45 The attr field of kvm_device_attr encodes two values: 45 The attr field of kvm_device_attr encodes two values:
46 bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | 46 bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
47 values: | reserved | cpu id | offset | 47 values: | reserved | vcpu_index | offset |
48 48
49 All distributor regs are (rw, 32-bit) 49 All distributor regs are (rw, 32-bit)
50 50
51 The offset is relative to the "Distributor base address" as defined in the 51 The offset is relative to the "Distributor base address" as defined in the
52 GICv2 specs. Getting or setting such a register has the same effect as 52 GICv2 specs. Getting or setting such a register has the same effect as
53 reading or writing the register on the actual hardware from the cpu 53 reading or writing the register on the actual hardware from the cpu whose
54 specified with cpu id field. Note that most distributor fields are not 54 index is specified with the vcpu_index field. Note that most distributor
55 banked, but return the same value regardless of the cpu id used to access 55 fields are not banked, but return the same value regardless of the
56 the register. 56 vcpu_index used to access the register.
57 Limitations: 57 Limitations:
58 - Priorities are not implemented, and registers are RAZ/WI 58 - Priorities are not implemented, and registers are RAZ/WI
59 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. 59 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
60 Errors: 60 Errors:
61 -ENODEV: Getting or setting this register is not yet supported 61 -ENXIO: Getting or setting this register is not yet supported
62 -EBUSY: One or more VCPUs are running 62 -EBUSY: One or more VCPUs are running
63 -EINVAL: Invalid vcpu_index supplied
63 64
64 KVM_DEV_ARM_VGIC_GRP_CPU_REGS 65 KVM_DEV_ARM_VGIC_GRP_CPU_REGS
65 Attributes: 66 Attributes:
66 The attr field of kvm_device_attr encodes two values: 67 The attr field of kvm_device_attr encodes two values:
67 bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | 68 bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
68 values: | reserved | cpu id | offset | 69 values: | reserved | vcpu_index | offset |
69 70
70 All CPU interface regs are (rw, 32-bit) 71 All CPU interface regs are (rw, 32-bit)
71 72
@@ -91,8 +92,9 @@ Groups:
91 - Priorities are not implemented, and registers are RAZ/WI 92 - Priorities are not implemented, and registers are RAZ/WI
92 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. 93 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
93 Errors: 94 Errors:
94 -ENODEV: Getting or setting this register is not yet supported 95 -ENXIO: Getting or setting this register is not yet supported
95 -EBUSY: One or more VCPUs are running 96 -EBUSY: One or more VCPUs are running
97 -EINVAL: Invalid vcpu_index supplied
96 98
97 KVM_DEV_ARM_VGIC_GRP_NR_IRQS 99 KVM_DEV_ARM_VGIC_GRP_NR_IRQS
98 Attributes: 100 Attributes:
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index d995821f1698..dc641ddf0784 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -218,4 +218,24 @@
218#define HSR_DABT_CM (1U << 8) 218#define HSR_DABT_CM (1U << 8)
219#define HSR_DABT_EA (1U << 9) 219#define HSR_DABT_EA (1U << 9)
220 220
221#define kvm_arm_exception_type \
222 {0, "RESET" }, \
223 {1, "UNDEFINED" }, \
224 {2, "SOFTWARE" }, \
225 {3, "PREF_ABORT" }, \
226 {4, "DATA_ABORT" }, \
227 {5, "IRQ" }, \
228 {6, "FIQ" }, \
229 {7, "HVC" }
230
231#define HSRECN(x) { HSR_EC_##x, #x }
232
233#define kvm_arm_exception_class \
234 HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \
235 HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \
236 HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \
237 HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \
238 HSRECN(DABT), HSRECN(DABT_HYP)
239
240
221#endif /* __ARM_KVM_ARM_H__ */ 241#endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c4072d9f32c7..6692982c9b57 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -126,7 +126,10 @@ struct kvm_vcpu_arch {
126 * here. 126 * here.
127 */ 127 */
128 128
129 /* Don't run the guest on this vcpu */ 129 /* vcpu power-off state */
130 bool power_off;
131
132 /* Don't run the guest (internal implementation need) */
130 bool pause; 133 bool pause;
131 134
132 /* IO related fields */ 135 /* IO related fields */
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 210eccadb69a..95a000515e43 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
21 depends on MMU && OF 21 depends on MMU && OF
22 select PREEMPT_NOTIFIERS 22 select PREEMPT_NOTIFIERS
23 select ANON_INODES 23 select ANON_INODES
24 select ARM_GIC
24 select HAVE_KVM_CPU_RELAX_INTERCEPT 25 select HAVE_KVM_CPU_RELAX_INTERCEPT
25 select HAVE_KVM_ARCH_TLB_FLUSH_ALL 26 select HAVE_KVM_ARCH_TLB_FLUSH_ALL
26 select KVM_MMIO 27 select KVM_MMIO
@@ -45,4 +46,6 @@ config KVM_ARM_HOST
45 ---help--- 46 ---help---
46 Provides host support for ARM processors. 47 Provides host support for ARM processors.
47 48
49source drivers/vhost/Kconfig
50
48endif # VIRTUALIZATION 51endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dc017adfddc8..eab83b2435b8 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
271 return kvm_timer_should_fire(vcpu); 271 return kvm_timer_should_fire(vcpu);
272} 272}
273 273
274void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
275{
276 kvm_timer_schedule(vcpu);
277}
278
279void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
280{
281 kvm_timer_unschedule(vcpu);
282}
283
274int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 284int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
275{ 285{
276 /* Force users to call KVM_ARM_VCPU_INIT */ 286 /* Force users to call KVM_ARM_VCPU_INIT */
@@ -308,7 +318,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
308int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 318int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
309 struct kvm_mp_state *mp_state) 319 struct kvm_mp_state *mp_state)
310{ 320{
311 if (vcpu->arch.pause) 321 if (vcpu->arch.power_off)
312 mp_state->mp_state = KVM_MP_STATE_STOPPED; 322 mp_state->mp_state = KVM_MP_STATE_STOPPED;
313 else 323 else
314 mp_state->mp_state = KVM_MP_STATE_RUNNABLE; 324 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
@@ -321,10 +331,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
321{ 331{
322 switch (mp_state->mp_state) { 332 switch (mp_state->mp_state) {
323 case KVM_MP_STATE_RUNNABLE: 333 case KVM_MP_STATE_RUNNABLE:
324 vcpu->arch.pause = false; 334 vcpu->arch.power_off = false;
325 break; 335 break;
326 case KVM_MP_STATE_STOPPED: 336 case KVM_MP_STATE_STOPPED:
327 vcpu->arch.pause = true; 337 vcpu->arch.power_off = true;
328 break; 338 break;
329 default: 339 default:
330 return -EINVAL; 340 return -EINVAL;
@@ -342,7 +352,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
342 */ 352 */
343int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 353int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
344{ 354{
345 return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v); 355 return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
356 && !v->arch.power_off && !v->arch.pause);
346} 357}
347 358
348/* Just ensure a guest exit from a particular CPU */ 359/* Just ensure a guest exit from a particular CPU */
@@ -468,11 +479,38 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
468 return vgic_initialized(kvm); 479 return vgic_initialized(kvm);
469} 480}
470 481
471static void vcpu_pause(struct kvm_vcpu *vcpu) 482static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused;
483static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused;
484
485static void kvm_arm_halt_guest(struct kvm *kvm)
486{
487 int i;
488 struct kvm_vcpu *vcpu;
489
490 kvm_for_each_vcpu(i, vcpu, kvm)
491 vcpu->arch.pause = true;
492 force_vm_exit(cpu_all_mask);
493}
494
495static void kvm_arm_resume_guest(struct kvm *kvm)
496{
497 int i;
498 struct kvm_vcpu *vcpu;
499
500 kvm_for_each_vcpu(i, vcpu, kvm) {
501 wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
502
503 vcpu->arch.pause = false;
504 wake_up_interruptible(wq);
505 }
506}
507
508static void vcpu_sleep(struct kvm_vcpu *vcpu)
472{ 509{
473 wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); 510 wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
474 511
475 wait_event_interruptible(*wq, !vcpu->arch.pause); 512 wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
513 (!vcpu->arch.pause)));
476} 514}
477 515
478static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) 516static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
@@ -522,8 +560,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
522 560
523 update_vttbr(vcpu->kvm); 561 update_vttbr(vcpu->kvm);
524 562
525 if (vcpu->arch.pause) 563 if (vcpu->arch.power_off || vcpu->arch.pause)
526 vcpu_pause(vcpu); 564 vcpu_sleep(vcpu);
527 565
528 /* 566 /*
529 * Disarming the background timer must be done in a 567 * Disarming the background timer must be done in a
@@ -549,11 +587,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
549 run->exit_reason = KVM_EXIT_INTR; 587 run->exit_reason = KVM_EXIT_INTR;
550 } 588 }
551 589
552 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { 590 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
591 vcpu->arch.power_off || vcpu->arch.pause) {
553 local_irq_enable(); 592 local_irq_enable();
593 kvm_timer_sync_hwstate(vcpu);
554 kvm_vgic_sync_hwstate(vcpu); 594 kvm_vgic_sync_hwstate(vcpu);
555 preempt_enable(); 595 preempt_enable();
556 kvm_timer_sync_hwstate(vcpu);
557 continue; 596 continue;
558 } 597 }
559 598
@@ -596,14 +635,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
596 * guest time. 635 * guest time.
597 */ 636 */
598 kvm_guest_exit(); 637 kvm_guest_exit();
599 trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 638 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
639
640 /*
641 * We must sync the timer state before the vgic state so that
642 * the vgic can properly sample the updated state of the
643 * interrupt line.
644 */
645 kvm_timer_sync_hwstate(vcpu);
600 646
601 kvm_vgic_sync_hwstate(vcpu); 647 kvm_vgic_sync_hwstate(vcpu);
602 648
603 preempt_enable(); 649 preempt_enable();
604 650
605 kvm_timer_sync_hwstate(vcpu);
606
607 ret = handle_exit(vcpu, run, ret); 651 ret = handle_exit(vcpu, run, ret);
608 } 652 }
609 653
@@ -765,12 +809,12 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
765 vcpu_reset_hcr(vcpu); 809 vcpu_reset_hcr(vcpu);
766 810
767 /* 811 /*
768 * Handle the "start in power-off" case by marking the VCPU as paused. 812 * Handle the "start in power-off" case.
769 */ 813 */
770 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) 814 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
771 vcpu->arch.pause = true; 815 vcpu->arch.power_off = true;
772 else 816 else
773 vcpu->arch.pause = false; 817 vcpu->arch.power_off = false;
774 818
775 return 0; 819 return 0;
776} 820}
@@ -1080,7 +1124,7 @@ static int init_hyp_mode(void)
1080 */ 1124 */
1081 err = kvm_timer_hyp_init(); 1125 err = kvm_timer_hyp_init();
1082 if (err) 1126 if (err)
1083 goto out_free_mappings; 1127 goto out_free_context;
1084 1128
1085#ifndef CONFIG_HOTPLUG_CPU 1129#ifndef CONFIG_HOTPLUG_CPU
1086 free_boot_hyp_pgd(); 1130 free_boot_hyp_pgd();
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index ad6f6424f1d1..0b556968a6da 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -63,7 +63,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
63 63
64static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) 64static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
65{ 65{
66 vcpu->arch.pause = true; 66 vcpu->arch.power_off = true;
67} 67}
68 68
69static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 69static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
@@ -87,7 +87,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
87 */ 87 */
88 if (!vcpu) 88 if (!vcpu)
89 return PSCI_RET_INVALID_PARAMS; 89 return PSCI_RET_INVALID_PARAMS;
90 if (!vcpu->arch.pause) { 90 if (!vcpu->arch.power_off) {
91 if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) 91 if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
92 return PSCI_RET_ALREADY_ON; 92 return PSCI_RET_ALREADY_ON;
93 else 93 else
@@ -115,7 +115,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
115 * the general puspose registers are undefined upon CPU_ON. 115 * the general puspose registers are undefined upon CPU_ON.
116 */ 116 */
117 *vcpu_reg(vcpu, 0) = context_id; 117 *vcpu_reg(vcpu, 0) = context_id;
118 vcpu->arch.pause = false; 118 vcpu->arch.power_off = false;
119 smp_mb(); /* Make sure the above is visible */ 119 smp_mb(); /* Make sure the above is visible */
120 120
121 wq = kvm_arch_vcpu_wq(vcpu); 121 wq = kvm_arch_vcpu_wq(vcpu);
@@ -153,7 +153,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
153 mpidr = kvm_vcpu_get_mpidr_aff(tmp); 153 mpidr = kvm_vcpu_get_mpidr_aff(tmp);
154 if ((mpidr & target_affinity_mask) == target_affinity) { 154 if ((mpidr & target_affinity_mask) == target_affinity) {
155 matching_cpus++; 155 matching_cpus++;
156 if (!tmp->arch.pause) 156 if (!tmp->arch.power_off)
157 return PSCI_0_2_AFFINITY_LEVEL_ON; 157 return PSCI_0_2_AFFINITY_LEVEL_ON;
158 } 158 }
159 } 159 }
@@ -179,7 +179,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
179 * re-initialized. 179 * re-initialized.
180 */ 180 */
181 kvm_for_each_vcpu(i, tmp, vcpu->kvm) { 181 kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
182 tmp->arch.pause = true; 182 tmp->arch.power_off = true;
183 kvm_vcpu_kick(tmp); 183 kvm_vcpu_kick(tmp);
184 } 184 }
185 185
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 0ec35392d208..c25a88598eb0 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry,
25); 25);
26 26
27TRACE_EVENT(kvm_exit, 27TRACE_EVENT(kvm_exit,
28 TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc), 28 TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
29 TP_ARGS(exit_reason, vcpu_pc), 29 TP_ARGS(idx, exit_reason, vcpu_pc),
30 30
31 TP_STRUCT__entry( 31 TP_STRUCT__entry(
32 __field( int, idx )
32 __field( unsigned int, exit_reason ) 33 __field( unsigned int, exit_reason )
33 __field( unsigned long, vcpu_pc ) 34 __field( unsigned long, vcpu_pc )
34 ), 35 ),
35 36
36 TP_fast_assign( 37 TP_fast_assign(
38 __entry->idx = idx;
37 __entry->exit_reason = exit_reason; 39 __entry->exit_reason = exit_reason;
38 __entry->vcpu_pc = vcpu_pc; 40 __entry->vcpu_pc = vcpu_pc;
39 ), 41 ),
40 42
41 TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx", 43 TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
44 __print_symbolic(__entry->idx, kvm_arm_exception_type),
42 __entry->exit_reason, 45 __entry->exit_reason,
46 __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
43 __entry->vcpu_pc) 47 __entry->vcpu_pc)
44); 48);
45 49
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 9694f2654593..5e6857b6bdc4 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -200,4 +200,20 @@
200/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ 200/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
201#define HPFAR_MASK (~UL(0xf)) 201#define HPFAR_MASK (~UL(0xf))
202 202
203#define kvm_arm_exception_type \
204 {0, "IRQ" }, \
205 {1, "TRAP" }
206
207#define ECN(x) { ESR_ELx_EC_##x, #x }
208
209#define kvm_arm_exception_class \
210 ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
211 ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \
212 ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \
213 ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
214 ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
215 ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
216 ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
217 ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
218
203#endif /* __ARM64_KVM_ARM_H__ */ 219#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ed039688c221..a35ce7266aac 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -149,7 +149,10 @@ struct kvm_vcpu_arch {
149 u32 mdscr_el1; 149 u32 mdscr_el1;
150 } guest_debug_preserved; 150 } guest_debug_preserved;
151 151
152 /* Don't run the guest */ 152 /* vcpu power-off state */
153 bool power_off;
154
155 /* Don't run the guest (internal implementation need) */
153 bool pause; 156 bool pause;
154 157
155 /* IO related fields */ 158 /* IO related fields */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 5c7e920e4861..38102f5d3cbb 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -41,4 +41,6 @@ config KVM_ARM_HOST
41 ---help--- 41 ---help---
42 Provides host support for ARM processors. 42 Provides host support for ARM processors.
43 43
44source drivers/vhost/Kconfig
45
44endif # VIRTUALIZATION 46endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index e5836138ec42..1599701ef044 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -880,6 +880,14 @@ __kvm_hyp_panic:
880 880
881 bl __restore_sysregs 881 bl __restore_sysregs
882 882
883 /*
884 * Make sure we have a valid host stack, and don't leave junk in the
885 * frame pointer that will give us a misleading host stack unwinding.
886 */
887 ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
888 msr sp_el1, x22
889 mov x29, xzr
890
8831: adr x0, __hyp_panic_str 8911: adr x0, __hyp_panic_str
884 adr x1, 2f 892 adr x1, 2f
885 ldp x2, x3, [x1] 893 ldp x2, x3, [x1]
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 5a1a882e0a75..6ded8d347af9 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
847 struct kvm_memory_slot *slot) {} 847 struct kvm_memory_slot *slot) {}
848static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 848static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
849static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 849static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
850static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
851static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
850 852
851#endif /* __MIPS_KVM_HOST_H__ */ 853#endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 827a38d7a9db..c9f122d00920 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -718,5 +718,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
718static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} 718static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
719static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 719static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
720static inline void kvm_arch_exit(void) {} 720static inline void kvm_arch_exit(void) {}
721static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
722static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
721 723
722#endif /* __POWERPC_KVM_HOST_H__ */ 724#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8ced426091e1..72a614c68ed8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
644static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} 644static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
645static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 645static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
646 struct kvm_memory_slot *slot) {} 646 struct kvm_memory_slot *slot) {}
647static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
648static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
647 649
648#endif 650#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53deb2750bf6..9265196e877f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1261,4 +1261,8 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
1261 1261
1262void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, 1262void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
1263 struct kvm_lapic_irq *irq); 1263 struct kvm_lapic_irq *irq);
1264
1265static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
1266static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
1267
1264#endif /* _ASM_X86_KVM_HOST_H */ 1268#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index e1e4d7c38dda..1800227af9d6 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -51,7 +51,7 @@ struct arch_timer_cpu {
51 bool armed; 51 bool armed;
52 52
53 /* Timer IRQ */ 53 /* Timer IRQ */
54 const struct kvm_irq_level *irq; 54 struct kvm_irq_level irq;
55 55
56 /* VGIC mapping */ 56 /* VGIC mapping */
57 struct irq_phys_map *map; 57 struct irq_phys_map *map;
@@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
71int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); 71int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
72 72
73bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); 73bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
74void kvm_timer_schedule(struct kvm_vcpu *vcpu);
75void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
74 76
75#endif 77#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4e14dac282bb..f62addc17dcf 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -112,7 +112,6 @@ struct vgic_vmcr {
112struct vgic_ops { 112struct vgic_ops {
113 struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); 113 struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int);
114 void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); 114 void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
115 void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
116 u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); 115 u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
117 u64 (*get_eisr)(const struct kvm_vcpu *vcpu); 116 u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
118 void (*clear_eisr)(struct kvm_vcpu *vcpu); 117 void (*clear_eisr)(struct kvm_vcpu *vcpu);
@@ -159,7 +158,6 @@ struct irq_phys_map {
159 u32 virt_irq; 158 u32 virt_irq;
160 u32 phys_irq; 159 u32 phys_irq;
161 u32 irq; 160 u32 irq;
162 bool active;
163}; 161};
164 162
165struct irq_phys_map_entry { 163struct irq_phys_map_entry {
@@ -296,22 +294,16 @@ struct vgic_v3_cpu_if {
296}; 294};
297 295
298struct vgic_cpu { 296struct vgic_cpu {
299 /* per IRQ to LR mapping */
300 u8 *vgic_irq_lr_map;
301
302 /* Pending/active/both interrupts on this VCPU */ 297 /* Pending/active/both interrupts on this VCPU */
303 DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); 298 DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS);
304 DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS); 299 DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS);
305 DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS); 300 DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
306 301
307 /* Pending/active/both shared interrupts, dynamically sized */ 302 /* Pending/active/both shared interrupts, dynamically sized */
308 unsigned long *pending_shared; 303 unsigned long *pending_shared;
309 unsigned long *active_shared; 304 unsigned long *active_shared;
310 unsigned long *pend_act_shared; 305 unsigned long *pend_act_shared;
311 306
312 /* Bitmap of used/free list registers */
313 DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
314
315 /* Number of list registers on this CPU */ 307 /* Number of list registers on this CPU */
316 int nr_lr; 308 int nr_lr;
317 309
@@ -354,8 +346,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
354struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, 346struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
355 int virt_irq, int irq); 347 int virt_irq, int irq);
356int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); 348int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
357bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
358void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
359 349
360#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 350#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
361#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) 351#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eba9caebc9c1..87189a41d904 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -647,6 +647,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
647void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); 647void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
648 648
649void kvm_vcpu_block(struct kvm_vcpu *vcpu); 649void kvm_vcpu_block(struct kvm_vcpu *vcpu);
650void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
651void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
650void kvm_vcpu_kick(struct kvm_vcpu *vcpu); 652void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
651int kvm_vcpu_yield_to(struct kvm_vcpu *target); 653int kvm_vcpu_yield_to(struct kvm_vcpu *target);
652void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); 654void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 48c6e1ac6827..21a0ab2d8919 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -28,6 +28,8 @@
28#include <kvm/arm_vgic.h> 28#include <kvm/arm_vgic.h>
29#include <kvm/arm_arch_timer.h> 29#include <kvm/arm_arch_timer.h>
30 30
31#include "trace.h"
32
31static struct timecounter *timecounter; 33static struct timecounter *timecounter;
32static struct workqueue_struct *wqueue; 34static struct workqueue_struct *wqueue;
33static unsigned int host_vtimer_irq; 35static unsigned int host_vtimer_irq;
@@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer)
59 } 61 }
60} 62}
61 63
62static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
63{
64 int ret;
65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
66
67 kvm_vgic_set_phys_irq_active(timer->map, true);
68 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
69 timer->map,
70 timer->irq->level);
71 WARN_ON(ret);
72}
73
74static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 64static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
75{ 65{
76 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 66 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
111 return HRTIMER_NORESTART; 101 return HRTIMER_NORESTART;
112} 102}
113 103
104static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
105{
106 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
107
108 return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
109 (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
110}
111
114bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) 112bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
115{ 113{
116 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 114 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
117 cycle_t cval, now; 115 cycle_t cval, now;
118 116
119 if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || 117 if (!kvm_timer_irq_can_fire(vcpu))
120 !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
121 kvm_vgic_get_phys_irq_active(timer->map))
122 return false; 118 return false;
123 119
124 cval = timer->cntv_cval; 120 cval = timer->cntv_cval;
@@ -127,62 +123,143 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
127 return cval <= now; 123 return cval <= now;
128} 124}
129 125
126static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
127{
128 int ret;
129 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
130
131 BUG_ON(!vgic_initialized(vcpu->kvm));
132
133 timer->irq.level = new_level;
134 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
135 timer->irq.level);
136 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
137 timer->map,
138 timer->irq.level);
139 WARN_ON(ret);
140}
141
142/*
143 * Check if there was a change in the timer state (should we raise or lower
144 * the line level to the GIC).
145 */
146static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
147{
148 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
149
150 /*
151 * If userspace modified the timer registers via SET_ONE_REG before
152 * the vgic was initialized, we mustn't set the timer->irq.level value
153 * because the guest would never see the interrupt. Instead wait
154 * until we call this function from kvm_timer_flush_hwstate.
155 */
156 if (!vgic_initialized(vcpu->kvm))
157 return;
158
159 if (kvm_timer_should_fire(vcpu) != timer->irq.level)
160 kvm_timer_update_irq(vcpu, !timer->irq.level);
161}
162
163/*
164 * Schedule the background timer before calling kvm_vcpu_block, so that this
165 * thread is removed from its waitqueue and made runnable when there's a timer
166 * interrupt to handle.
167 */
168void kvm_timer_schedule(struct kvm_vcpu *vcpu)
169{
170 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
171 u64 ns;
172 cycle_t cval, now;
173
174 BUG_ON(timer_is_armed(timer));
175
176 /*
177 * No need to schedule a background timer if the guest timer has
178 * already expired, because kvm_vcpu_block will return before putting
179 * the thread to sleep.
180 */
181 if (kvm_timer_should_fire(vcpu))
182 return;
183
184 /*
185 * If the timer is not capable of raising interrupts (disabled or
186 * masked), then there's no more work for us to do.
187 */
188 if (!kvm_timer_irq_can_fire(vcpu))
189 return;
190
191 /* The timer has not yet expired, schedule a background timer */
192 cval = timer->cntv_cval;
193 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
194
195 ns = cyclecounter_cyc2ns(timecounter->cc,
196 cval - now,
197 timecounter->mask,
198 &timecounter->frac);
199 timer_arm(timer, ns);
200}
201
202void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
203{
204 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
205 timer_disarm(timer);
206}
207
130/** 208/**
131 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu 209 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
132 * @vcpu: The vcpu pointer 210 * @vcpu: The vcpu pointer
133 * 211 *
134 * Disarm any pending soft timers, since the world-switch code will write the 212 * Check if the virtual timer has expired while we were running in the host,
135 * virtual timer state back to the physical CPU. 213 * and inject an interrupt if that was the case.
136 */ 214 */
137void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) 215void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
138{ 216{
139 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 217 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
218 bool phys_active;
219 int ret;
140 220
141 /* 221 kvm_timer_update_state(vcpu);
142 * We're about to run this vcpu again, so there is no need to
143 * keep the background timer running, as we're about to
144 * populate the CPU timer again.
145 */
146 timer_disarm(timer);
147 222
148 /* 223 /*
149 * If the timer expired while we were not scheduled, now is the time 224 * If we enter the guest with the virtual input level to the VGIC
150 * to inject it. 225 * asserted, then we have already told the VGIC what we need to, and
226 * we don't need to exit from the guest until the guest deactivates
227 * the already injected interrupt, so therefore we should set the
228 * hardware active state to prevent unnecessary exits from the guest.
229 *
230 * Conversely, if the virtual input level is deasserted, then always
231 * clear the hardware active state to ensure that hardware interrupts
232 * from the timer triggers a guest exit.
151 */ 233 */
152 if (kvm_timer_should_fire(vcpu)) 234 if (timer->irq.level)
153 kvm_timer_inject_irq(vcpu); 235 phys_active = true;
236 else
237 phys_active = false;
238
239 ret = irq_set_irqchip_state(timer->map->irq,
240 IRQCHIP_STATE_ACTIVE,
241 phys_active);
242 WARN_ON(ret);
154} 243}
155 244
156/** 245/**
157 * kvm_timer_sync_hwstate - sync timer state from cpu 246 * kvm_timer_sync_hwstate - sync timer state from cpu
158 * @vcpu: The vcpu pointer 247 * @vcpu: The vcpu pointer
159 * 248 *
160 * Check if the virtual timer was armed and either schedule a corresponding 249 * Check if the virtual timer has expired while we were running in the guest,
161 * soft timer or inject directly if already expired. 250 * and inject an interrupt if that was the case.
162 */ 251 */
163void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 252void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
164{ 253{
165 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 254 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
166 cycle_t cval, now;
167 u64 ns;
168 255
169 BUG_ON(timer_is_armed(timer)); 256 BUG_ON(timer_is_armed(timer));
170 257
171 if (kvm_timer_should_fire(vcpu)) { 258 /*
172 /* 259 * The guest could have modified the timer registers or the timer
173 * Timer has already expired while we were not 260 * could have expired, update the timer state.
174 * looking. Inject the interrupt and carry on. 261 */
175 */ 262 kvm_timer_update_state(vcpu);
176 kvm_timer_inject_irq(vcpu);
177 return;
178 }
179
180 cval = timer->cntv_cval;
181 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
182
183 ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
184 &timecounter->frac);
185 timer_arm(timer, ns);
186} 263}
187 264
188int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 265int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
@@ -197,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
197 * kvm_vcpu_set_target(). To handle this, we determine 274 * kvm_vcpu_set_target(). To handle this, we determine
198 * vcpu timer irq number when the vcpu is reset. 275 * vcpu timer irq number when the vcpu is reset.
199 */ 276 */
200 timer->irq = irq; 277 timer->irq.irq = irq->irq;
201 278
202 /* 279 /*
203 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 280 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -206,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
206 * the ARMv7 architecture. 283 * the ARMv7 architecture.
207 */ 284 */
208 timer->cntv_ctl = 0; 285 timer->cntv_ctl = 0;
286 kvm_timer_update_state(vcpu);
209 287
210 /* 288 /*
211 * Tell the VGIC that the virtual interrupt is tied to a 289 * Tell the VGIC that the virtual interrupt is tied to a
@@ -250,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
250 default: 328 default:
251 return -1; 329 return -1;
252 } 330 }
331
332 kvm_timer_update_state(vcpu);
253 return 0; 333 return 0;
254} 334}
255 335
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
new file mode 100644
index 000000000000..37d8b98867d5
--- /dev/null
+++ b/virt/kvm/arm/trace.h
@@ -0,0 +1,63 @@
1#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_KVM_H
3
4#include <linux/tracepoint.h>
5
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm
8
9/*
10 * Tracepoints for vgic
11 */
12TRACE_EVENT(vgic_update_irq_pending,
13 TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
14 TP_ARGS(vcpu_id, irq, level),
15
16 TP_STRUCT__entry(
17 __field( unsigned long, vcpu_id )
18 __field( __u32, irq )
19 __field( bool, level )
20 ),
21
22 TP_fast_assign(
23 __entry->vcpu_id = vcpu_id;
24 __entry->irq = irq;
25 __entry->level = level;
26 ),
27
28 TP_printk("VCPU: %ld, IRQ %d, level: %d",
29 __entry->vcpu_id, __entry->irq, __entry->level)
30);
31
32/*
33 * Tracepoints for arch_timer
34 */
35TRACE_EVENT(kvm_timer_update_irq,
36 TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
37 TP_ARGS(vcpu_id, irq, level),
38
39 TP_STRUCT__entry(
40 __field( unsigned long, vcpu_id )
41 __field( __u32, irq )
42 __field( int, level )
43 ),
44
45 TP_fast_assign(
46 __entry->vcpu_id = vcpu_id;
47 __entry->irq = irq;
48 __entry->level = level;
49 ),
50
51 TP_printk("VCPU: %ld, IRQ %d, level %d",
52 __entry->vcpu_id, __entry->irq, __entry->level)
53);
54
55#endif /* _TRACE_KVM_H */
56
57#undef TRACE_INCLUDE_PATH
58#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
59#undef TRACE_INCLUDE_FILE
60#define TRACE_INCLUDE_FILE trace
61
62/* This part must be outside protection */
63#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 8d7b04db8471..ff02f08df74d 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
79 lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); 79 lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
80 80
81 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; 81 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
82}
83 82
84static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
85 struct vgic_lr lr_desc)
86{
87 if (!(lr_desc.state & LR_STATE_MASK)) 83 if (!(lr_desc.state & LR_STATE_MASK))
88 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); 84 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
89 else 85 else
@@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
158 * anyway. 154 * anyway.
159 */ 155 */
160 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; 156 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
157 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
161 158
162 /* Get the show on the road... */ 159 /* Get the show on the road... */
163 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; 160 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
@@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
166static const struct vgic_ops vgic_v2_ops = { 163static const struct vgic_ops vgic_v2_ops = {
167 .get_lr = vgic_v2_get_lr, 164 .get_lr = vgic_v2_get_lr,
168 .set_lr = vgic_v2_set_lr, 165 .set_lr = vgic_v2_set_lr,
169 .sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
170 .get_elrsr = vgic_v2_get_elrsr, 166 .get_elrsr = vgic_v2_get_elrsr,
171 .get_eisr = vgic_v2_get_eisr, 167 .get_eisr = vgic_v2_get_eisr,
172 .clear_eisr = vgic_v2_clear_eisr, 168 .clear_eisr = vgic_v2_clear_eisr,
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 7dd5d62f10a1..487d6357b7e7 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
112 } 112 }
113 113
114 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; 114 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
115}
116 115
117static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
118 struct vgic_lr lr_desc)
119{
120 if (!(lr_desc.state & LR_STATE_MASK)) 116 if (!(lr_desc.state & LR_STATE_MASK))
121 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); 117 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
122 else 118 else
@@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
193 * anyway. 189 * anyway.
194 */ 190 */
195 vgic_v3->vgic_vmcr = 0; 191 vgic_v3->vgic_vmcr = 0;
192 vgic_v3->vgic_elrsr = ~0;
196 193
197 /* 194 /*
198 * If we are emulating a GICv3, we do it in an non-GICv2-compatible 195 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
@@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
211static const struct vgic_ops vgic_v3_ops = { 208static const struct vgic_ops vgic_v3_ops = {
212 .get_lr = vgic_v3_get_lr, 209 .get_lr = vgic_v3_get_lr,
213 .set_lr = vgic_v3_set_lr, 210 .set_lr = vgic_v3_set_lr,
214 .sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
215 .get_elrsr = vgic_v3_get_elrsr, 211 .get_elrsr = vgic_v3_get_elrsr,
216 .get_eisr = vgic_v3_get_eisr, 212 .get_eisr = vgic_v3_get_eisr,
217 .clear_eisr = vgic_v3_clear_eisr, 213 .clear_eisr = vgic_v3_clear_eisr,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6bd1c9bf7ae7..fe451d4885ae 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -34,6 +34,9 @@
34#include <asm/kvm.h> 34#include <asm/kvm.h>
35#include <kvm/iodev.h> 35#include <kvm/iodev.h>
36 36
37#define CREATE_TRACE_POINTS
38#include "trace.h"
39
37/* 40/*
38 * How the whole thing works (courtesy of Christoffer Dall): 41 * How the whole thing works (courtesy of Christoffer Dall):
39 * 42 *
@@ -102,11 +105,13 @@
102#include "vgic.h" 105#include "vgic.h"
103 106
104static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); 107static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
105static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 108static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
106static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 109static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
107static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); 110static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
111static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
108static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, 112static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
109 int virt_irq); 113 int virt_irq);
114static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
110 115
111static const struct vgic_ops *vgic_ops; 116static const struct vgic_ops *vgic_ops;
112static const struct vgic_params *vgic; 117static const struct vgic_params *vgic;
@@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
357 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 362 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
358 363
359 vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); 364 vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
365 if (!vgic_dist_irq_get_level(vcpu, irq)) {
366 vgic_dist_irq_clear_pending(vcpu, irq);
367 if (!compute_pending_for_cpu(vcpu))
368 clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
369 }
360} 370}
361 371
362static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) 372static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@ -654,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
654 vgic_reg_access(mmio, &val, offset, 664 vgic_reg_access(mmio, &val, offset,
655 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); 665 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
656 if (mmio->is_write) { 666 if (mmio->is_write) {
657 if (offset < 8) { 667 /* Ignore writes to read-only SGI and PPI bits */
658 *reg = ~0U; /* Force PPIs/SGIs to 1 */ 668 if (offset < 8)
659 return false; 669 return false;
660 }
661 670
662 val = vgic_cfg_compress(val); 671 val = vgic_cfg_compress(val);
663 if (offset & 4) { 672 if (offset & 4) {
@@ -683,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
683void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) 692void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
684{ 693{
685 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 694 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
695 u64 elrsr = vgic_get_elrsr(vcpu);
696 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
686 int i; 697 int i;
687 698
688 for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { 699 for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
689 struct vgic_lr lr = vgic_get_lr(vcpu, i); 700 struct vgic_lr lr = vgic_get_lr(vcpu, i);
690 701
691 /* 702 /*
@@ -706,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
706 * interrupt then move the active state to the 717 * interrupt then move the active state to the
707 * distributor tracking bit. 718 * distributor tracking bit.
708 */ 719 */
709 if (lr.state & LR_STATE_ACTIVE) { 720 if (lr.state & LR_STATE_ACTIVE)
710 vgic_irq_set_active(vcpu, lr.irq); 721 vgic_irq_set_active(vcpu, lr.irq);
711 lr.state &= ~LR_STATE_ACTIVE;
712 }
713 722
714 /* 723 /*
715 * Reestablish the pending state on the distributor and the 724 * Reestablish the pending state on the distributor and the
716 * CPU interface. It may have already been pending, but that 725 * CPU interface and mark the LR as free for other use.
717 * is fine, then we are only setting a few bits that were
718 * already set.
719 */
720 if (lr.state & LR_STATE_PENDING) {
721 vgic_dist_irq_set_pending(vcpu, lr.irq);
722 lr.state &= ~LR_STATE_PENDING;
723 }
724
725 vgic_set_lr(vcpu, i, lr);
726
727 /*
728 * Mark the LR as free for other use.
729 */ 726 */
730 BUG_ON(lr.state & LR_STATE_MASK); 727 vgic_retire_lr(i, vcpu);
731 vgic_retire_lr(i, lr.irq, vcpu);
732 vgic_irq_clear_queued(vcpu, lr.irq);
733 728
734 /* Finally update the VGIC state. */ 729 /* Finally update the VGIC state. */
735 vgic_update_state(vcpu->kvm); 730 vgic_update_state(vcpu->kvm);
@@ -982,6 +977,12 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
982 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; 977 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
983 pend_shared = vcpu->arch.vgic_cpu.pending_shared; 978 pend_shared = vcpu->arch.vgic_cpu.pending_shared;
984 979
980 if (!dist->enabled) {
981 bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS);
982 bitmap_zero(pend_shared, nr_shared);
983 return 0;
984 }
985
985 pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); 986 pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
986 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); 987 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
987 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); 988 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
@@ -1009,11 +1010,6 @@ void vgic_update_state(struct kvm *kvm)
1009 struct kvm_vcpu *vcpu; 1010 struct kvm_vcpu *vcpu;
1010 int c; 1011 int c;
1011 1012
1012 if (!dist->enabled) {
1013 set_bit(0, dist->irq_pending_on_cpu);
1014 return;
1015 }
1016
1017 kvm_for_each_vcpu(c, vcpu, kvm) { 1013 kvm_for_each_vcpu(c, vcpu, kvm) {
1018 if (compute_pending_for_cpu(vcpu)) 1014 if (compute_pending_for_cpu(vcpu))
1019 set_bit(c, dist->irq_pending_on_cpu); 1015 set_bit(c, dist->irq_pending_on_cpu);
@@ -1036,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
1036 vgic_ops->set_lr(vcpu, lr, vlr); 1032 vgic_ops->set_lr(vcpu, lr, vlr);
1037} 1033}
1038 1034
1039static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
1040 struct vgic_lr vlr)
1041{
1042 vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
1043}
1044
1045static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) 1035static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
1046{ 1036{
1047 return vgic_ops->get_elrsr(vcpu); 1037 return vgic_ops->get_elrsr(vcpu);
@@ -1087,16 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
1087 vgic_ops->enable(vcpu); 1077 vgic_ops->enable(vcpu);
1088} 1078}
1089 1079
1090static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) 1080static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
1091{ 1081{
1092 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1093 struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); 1082 struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
1094 1083
1084 vgic_irq_clear_queued(vcpu, vlr.irq);
1085
1086 /*
1087 * We must transfer the pending state back to the distributor before
1088 * retiring the LR, otherwise we may loose edge-triggered interrupts.
1089 */
1090 if (vlr.state & LR_STATE_PENDING) {
1091 vgic_dist_irq_set_pending(vcpu, vlr.irq);
1092 vlr.hwirq = 0;
1093 }
1094
1095 vlr.state = 0; 1095 vlr.state = 0;
1096 vgic_set_lr(vcpu, lr_nr, vlr); 1096 vgic_set_lr(vcpu, lr_nr, vlr);
1097 clear_bit(lr_nr, vgic_cpu->lr_used);
1098 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1099 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1100} 1097}
1101 1098
1102/* 1099/*
@@ -1110,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
1110 */ 1107 */
1111static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) 1108static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1112{ 1109{
1113 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1110 u64 elrsr = vgic_get_elrsr(vcpu);
1111 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
1114 int lr; 1112 int lr;
1115 1113
1116 for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { 1114 for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
1117 struct vgic_lr vlr = vgic_get_lr(vcpu, lr); 1115 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1118 1116
1119 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { 1117 if (!vgic_irq_is_enabled(vcpu, vlr.irq))
1120 vgic_retire_lr(lr, vlr.irq, vcpu); 1118 vgic_retire_lr(lr, vcpu);
1121 if (vgic_irq_is_queued(vcpu, vlr.irq))
1122 vgic_irq_clear_queued(vcpu, vlr.irq);
1123 }
1124 } 1119 }
1125} 1120}
1126 1121
@@ -1132,7 +1127,8 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1132 kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); 1127 kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
1133 vgic_irq_clear_active(vcpu, irq); 1128 vgic_irq_clear_active(vcpu, irq);
1134 vgic_update_state(vcpu->kvm); 1129 vgic_update_state(vcpu->kvm);
1135 } else if (vgic_dist_irq_is_pending(vcpu, irq)) { 1130 } else {
1131 WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq));
1136 vlr.state |= LR_STATE_PENDING; 1132 vlr.state |= LR_STATE_PENDING;
1137 kvm_debug("Set pending: 0x%x\n", vlr.state); 1133 kvm_debug("Set pending: 0x%x\n", vlr.state);
1138 } 1134 }
@@ -1159,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1159 } 1155 }
1160 1156
1161 vgic_set_lr(vcpu, lr_nr, vlr); 1157 vgic_set_lr(vcpu, lr_nr, vlr);
1162 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1163} 1158}
1164 1159
1165/* 1160/*
@@ -1169,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1169 */ 1164 */
1170bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) 1165bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1171{ 1166{
1172 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1173 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1167 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1168 u64 elrsr = vgic_get_elrsr(vcpu);
1169 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
1174 struct vgic_lr vlr; 1170 struct vgic_lr vlr;
1175 int lr; 1171 int lr;
1176 1172
@@ -1181,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1181 1177
1182 kvm_debug("Queue IRQ%d\n", irq); 1178 kvm_debug("Queue IRQ%d\n", irq);
1183 1179
1184 lr = vgic_cpu->vgic_irq_lr_map[irq];
1185
1186 /* Do we have an active interrupt for the same CPUID? */ 1180 /* Do we have an active interrupt for the same CPUID? */
1187 if (lr != LR_EMPTY) { 1181 for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
1188 vlr = vgic_get_lr(vcpu, lr); 1182 vlr = vgic_get_lr(vcpu, lr);
1189 if (vlr.source == sgi_source_id) { 1183 if (vlr.irq == irq && vlr.source == sgi_source_id) {
1190 kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); 1184 kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
1191 BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
1192 vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); 1185 vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1193 return true; 1186 return true;
1194 } 1187 }
1195 } 1188 }
1196 1189
1197 /* Try to use another LR for this interrupt */ 1190 /* Try to use another LR for this interrupt */
1198 lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, 1191 lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
1199 vgic->nr_lr);
1200 if (lr >= vgic->nr_lr) 1192 if (lr >= vgic->nr_lr)
1201 return false; 1193 return false;
1202 1194
1203 kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); 1195 kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1204 vgic_cpu->vgic_irq_lr_map[irq] = lr;
1205 set_bit(lr, vgic_cpu->lr_used);
1206 1196
1207 vlr.irq = irq; 1197 vlr.irq = irq;
1208 vlr.source = sgi_source_id; 1198 vlr.source = sgi_source_id;
@@ -1240,7 +1230,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1240 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1230 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1241 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1231 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1242 unsigned long *pa_percpu, *pa_shared; 1232 unsigned long *pa_percpu, *pa_shared;
1243 int i, vcpu_id, lr, ret; 1233 int i, vcpu_id;
1244 int overflow = 0; 1234 int overflow = 0;
1245 int nr_shared = vgic_nr_shared_irqs(dist); 1235 int nr_shared = vgic_nr_shared_irqs(dist);
1246 1236
@@ -1295,39 +1285,62 @@ epilog:
1295 */ 1285 */
1296 clear_bit(vcpu_id, dist->irq_pending_on_cpu); 1286 clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1297 } 1287 }
1288}
1298 1289
1299 for (lr = 0; lr < vgic->nr_lr; lr++) { 1290static int process_queued_irq(struct kvm_vcpu *vcpu,
1300 struct vgic_lr vlr; 1291 int lr, struct vgic_lr vlr)
1292{
1293 int pending = 0;
1301 1294
1302 if (!test_bit(lr, vgic_cpu->lr_used)) 1295 /*
1303 continue; 1296 * If the IRQ was EOIed (called from vgic_process_maintenance) or it
1297 * went from active to non-active (called from vgic_sync_hwirq) it was
1298 * also ACKed and we we therefore assume we can clear the soft pending
1299 * state (should it had been set) for this interrupt.
1300 *
1301 * Note: if the IRQ soft pending state was set after the IRQ was
1302 * acked, it actually shouldn't be cleared, but we have no way of
1303 * knowing that unless we start trapping ACKs when the soft-pending
1304 * state is set.
1305 */
1306 vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1304 1307
1305 vlr = vgic_get_lr(vcpu, lr); 1308 /*
1309 * Tell the gic to start sampling this interrupt again.
1310 */
1311 vgic_irq_clear_queued(vcpu, vlr.irq);
1306 1312
1307 /* 1313 /* Any additional pending interrupt? */
1308 * If we have a mapping, and the virtual interrupt is 1314 if (vgic_irq_is_edge(vcpu, vlr.irq)) {
1309 * presented to the guest (as pending or active), then we must 1315 BUG_ON(!(vlr.state & LR_HW));
1310 * set the state to active in the physical world. See 1316 pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
1311 * Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt. 1317 } else {
1312 */ 1318 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1313 if (vlr.state & LR_HW) { 1319 vgic_cpu_irq_set(vcpu, vlr.irq);
1314 struct irq_phys_map *map; 1320 pending = 1;
1315 map = vgic_irq_map_search(vcpu, vlr.irq); 1321 } else {
1316 1322 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1317 ret = irq_set_irqchip_state(map->irq, 1323 vgic_cpu_irq_clear(vcpu, vlr.irq);
1318 IRQCHIP_STATE_ACTIVE,
1319 true);
1320 WARN_ON(ret);
1321 } 1324 }
1322 } 1325 }
1326
1327 /*
1328 * Despite being EOIed, the LR may not have
1329 * been marked as empty.
1330 */
1331 vlr.state = 0;
1332 vlr.hwirq = 0;
1333 vgic_set_lr(vcpu, lr, vlr);
1334
1335 return pending;
1323} 1336}
1324 1337
1325static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) 1338static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1326{ 1339{
1327 u32 status = vgic_get_interrupt_status(vcpu); 1340 u32 status = vgic_get_interrupt_status(vcpu);
1328 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1341 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1329 bool level_pending = false;
1330 struct kvm *kvm = vcpu->kvm; 1342 struct kvm *kvm = vcpu->kvm;
1343 int level_pending = 0;
1331 1344
1332 kvm_debug("STATUS = %08x\n", status); 1345 kvm_debug("STATUS = %08x\n", status);
1333 1346
@@ -1342,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1342 1355
1343 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { 1356 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1344 struct vgic_lr vlr = vgic_get_lr(vcpu, lr); 1357 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1345 WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1346 1358
1347 spin_lock(&dist->lock); 1359 WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1348 vgic_irq_clear_queued(vcpu, vlr.irq);
1349 WARN_ON(vlr.state & LR_STATE_MASK); 1360 WARN_ON(vlr.state & LR_STATE_MASK);
1350 vlr.state = 0;
1351 vgic_set_lr(vcpu, lr, vlr);
1352 1361
1353 /*
1354 * If the IRQ was EOIed it was also ACKed and we we
1355 * therefore assume we can clear the soft pending
1356 * state (should it had been set) for this interrupt.
1357 *
1358 * Note: if the IRQ soft pending state was set after
1359 * the IRQ was acked, it actually shouldn't be
1360 * cleared, but we have no way of knowing that unless
1361 * we start trapping ACKs when the soft-pending state
1362 * is set.
1363 */
1364 vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1365 1362
1366 /* 1363 /*
1367 * kvm_notify_acked_irq calls kvm_set_irq() 1364 * kvm_notify_acked_irq calls kvm_set_irq()
1368 * to reset the IRQ level. Need to release the 1365 * to reset the IRQ level, which grabs the dist->lock
1369 * lock for kvm_set_irq to grab it. 1366 * so we call this before taking the dist->lock.
1370 */ 1367 */
1371 spin_unlock(&dist->lock);
1372
1373 kvm_notify_acked_irq(kvm, 0, 1368 kvm_notify_acked_irq(kvm, 0,
1374 vlr.irq - VGIC_NR_PRIVATE_IRQS); 1369 vlr.irq - VGIC_NR_PRIVATE_IRQS);
1375 spin_lock(&dist->lock);
1376
1377 /* Any additional pending interrupt? */
1378 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1379 vgic_cpu_irq_set(vcpu, vlr.irq);
1380 level_pending = true;
1381 } else {
1382 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1383 vgic_cpu_irq_clear(vcpu, vlr.irq);
1384 }
1385 1370
1371 spin_lock(&dist->lock);
1372 level_pending |= process_queued_irq(vcpu, lr, vlr);
1386 spin_unlock(&dist->lock); 1373 spin_unlock(&dist->lock);
1387
1388 /*
1389 * Despite being EOIed, the LR may not have
1390 * been marked as empty.
1391 */
1392 vgic_sync_lr_elrsr(vcpu, lr, vlr);
1393 } 1374 }
1394 } 1375 }
1395 1376
@@ -1410,40 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1410/* 1391/*
1411 * Save the physical active state, and reset it to inactive. 1392 * Save the physical active state, and reset it to inactive.
1412 * 1393 *
1413 * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. 1394 * Return true if there's a pending forwarded interrupt to queue.
1414 */ 1395 */
1415static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) 1396static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
1416{ 1397{
1398 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1417 struct irq_phys_map *map; 1399 struct irq_phys_map *map;
1400 bool phys_active;
1401 bool level_pending;
1418 int ret; 1402 int ret;
1419 1403
1420 if (!(vlr.state & LR_HW)) 1404 if (!(vlr.state & LR_HW))
1421 return 0; 1405 return false;
1422 1406
1423 map = vgic_irq_map_search(vcpu, vlr.irq); 1407 map = vgic_irq_map_search(vcpu, vlr.irq);
1424 BUG_ON(!map || !map->active); 1408 BUG_ON(!map);
1425 1409
1426 ret = irq_get_irqchip_state(map->irq, 1410 ret = irq_get_irqchip_state(map->irq,
1427 IRQCHIP_STATE_ACTIVE, 1411 IRQCHIP_STATE_ACTIVE,
1428 &map->active); 1412 &phys_active);
1429 1413
1430 WARN_ON(ret); 1414 WARN_ON(ret);
1431 1415
1432 if (map->active) { 1416 if (phys_active)
1433 ret = irq_set_irqchip_state(map->irq,
1434 IRQCHIP_STATE_ACTIVE,
1435 false);
1436 WARN_ON(ret);
1437 return 0; 1417 return 0;
1438 }
1439 1418
1440 return 1; 1419 spin_lock(&dist->lock);
1420 level_pending = process_queued_irq(vcpu, lr, vlr);
1421 spin_unlock(&dist->lock);
1422 return level_pending;
1441} 1423}
1442 1424
1443/* Sync back the VGIC state after a guest run */ 1425/* Sync back the VGIC state after a guest run */
1444static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1426static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1445{ 1427{
1446 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1447 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1428 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1448 u64 elrsr; 1429 u64 elrsr;
1449 unsigned long *elrsr_ptr; 1430 unsigned long *elrsr_ptr;
@@ -1451,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1451 bool level_pending; 1432 bool level_pending;
1452 1433
1453 level_pending = vgic_process_maintenance(vcpu); 1434 level_pending = vgic_process_maintenance(vcpu);
1454 elrsr = vgic_get_elrsr(vcpu);
1455 elrsr_ptr = u64_to_bitmask(&elrsr);
1456 1435
1457 /* Deal with HW interrupts, and clear mappings for empty LRs */ 1436 /* Deal with HW interrupts, and clear mappings for empty LRs */
1458 for (lr = 0; lr < vgic->nr_lr; lr++) { 1437 for (lr = 0; lr < vgic->nr_lr; lr++) {
1459 struct vgic_lr vlr; 1438 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1460
1461 if (!test_bit(lr, vgic_cpu->lr_used))
1462 continue;
1463
1464 vlr = vgic_get_lr(vcpu, lr);
1465 if (vgic_sync_hwirq(vcpu, vlr)) {
1466 /*
1467 * So this is a HW interrupt that the guest
1468 * EOI-ed. Clean the LR state and allow the
1469 * interrupt to be sampled again.
1470 */
1471 vlr.state = 0;
1472 vlr.hwirq = 0;
1473 vgic_set_lr(vcpu, lr, vlr);
1474 vgic_irq_clear_queued(vcpu, vlr.irq);
1475 set_bit(lr, elrsr_ptr);
1476 }
1477
1478 if (!test_bit(lr, elrsr_ptr))
1479 continue;
1480
1481 clear_bit(lr, vgic_cpu->lr_used);
1482 1439
1440 level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
1483 BUG_ON(vlr.irq >= dist->nr_irqs); 1441 BUG_ON(vlr.irq >= dist->nr_irqs);
1484 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1485 } 1442 }
1486 1443
1487 /* Check if we still have something up our sleeve... */ 1444 /* Check if we still have something up our sleeve... */
1445 elrsr = vgic_get_elrsr(vcpu);
1446 elrsr_ptr = u64_to_bitmask(&elrsr);
1488 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); 1447 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1489 if (level_pending || pending < vgic->nr_lr) 1448 if (level_pending || pending < vgic->nr_lr)
1490 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); 1449 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
@@ -1574,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1574 int enabled; 1533 int enabled;
1575 bool ret = true, can_inject = true; 1534 bool ret = true, can_inject = true;
1576 1535
1536 trace_vgic_update_irq_pending(cpuid, irq_num, level);
1537
1577 if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) 1538 if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
1578 return -EINVAL; 1539 return -EINVAL;
1579 1540
@@ -1607,8 +1568,12 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1607 } else { 1568 } else {
1608 if (level_triggered) { 1569 if (level_triggered) {
1609 vgic_dist_irq_clear_level(vcpu, irq_num); 1570 vgic_dist_irq_clear_level(vcpu, irq_num);
1610 if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) 1571 if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) {
1611 vgic_dist_irq_clear_pending(vcpu, irq_num); 1572 vgic_dist_irq_clear_pending(vcpu, irq_num);
1573 vgic_cpu_irq_clear(vcpu, irq_num);
1574 if (!compute_pending_for_cpu(vcpu))
1575 clear_bit(cpuid, dist->irq_pending_on_cpu);
1576 }
1612 } 1577 }
1613 1578
1614 ret = false; 1579 ret = false;
@@ -1849,30 +1814,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
1849} 1814}
1850 1815
1851/** 1816/**
1852 * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
1853 *
1854 * Return the logical active state of a mapped interrupt. This doesn't
1855 * necessarily reflects the current HW state.
1856 */
1857bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
1858{
1859 BUG_ON(!map);
1860 return map->active;
1861}
1862
1863/**
1864 * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
1865 *
1866 * Set the logical active state of a mapped interrupt. This doesn't
1867 * immediately affects the HW state.
1868 */
1869void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
1870{
1871 BUG_ON(!map);
1872 map->active = active;
1873}
1874
1875/**
1876 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping 1817 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
1877 * @vcpu: The VCPU pointer 1818 * @vcpu: The VCPU pointer
1878 * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq 1819 * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
@@ -1927,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1927 kfree(vgic_cpu->pending_shared); 1868 kfree(vgic_cpu->pending_shared);
1928 kfree(vgic_cpu->active_shared); 1869 kfree(vgic_cpu->active_shared);
1929 kfree(vgic_cpu->pend_act_shared); 1870 kfree(vgic_cpu->pend_act_shared);
1930 kfree(vgic_cpu->vgic_irq_lr_map);
1931 vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); 1871 vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
1932 vgic_cpu->pending_shared = NULL; 1872 vgic_cpu->pending_shared = NULL;
1933 vgic_cpu->active_shared = NULL; 1873 vgic_cpu->active_shared = NULL;
1934 vgic_cpu->pend_act_shared = NULL; 1874 vgic_cpu->pend_act_shared = NULL;
1935 vgic_cpu->vgic_irq_lr_map = NULL;
1936} 1875}
1937 1876
1938static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) 1877static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
@@ -1943,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1943 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); 1882 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1944 vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); 1883 vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
1945 vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); 1884 vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
1946 vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
1947 1885
1948 if (!vgic_cpu->pending_shared 1886 if (!vgic_cpu->pending_shared
1949 || !vgic_cpu->active_shared 1887 || !vgic_cpu->active_shared
1950 || !vgic_cpu->pend_act_shared 1888 || !vgic_cpu->pend_act_shared) {
1951 || !vgic_cpu->vgic_irq_lr_map) {
1952 kvm_vgic_vcpu_destroy(vcpu); 1889 kvm_vgic_vcpu_destroy(vcpu);
1953 return -ENOMEM; 1890 return -ENOMEM;
1954 } 1891 }
1955 1892
1956 memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
1957
1958 /* 1893 /*
1959 * Store the number of LRs per vcpu, so we don't have to go 1894 * Store the number of LRs per vcpu, so we don't have to go
1960 * all the way to the distributor structure to find out. Only 1895 * all the way to the distributor structure to find out. Only
@@ -2096,14 +2031,24 @@ int vgic_init(struct kvm *kvm)
2096 break; 2031 break;
2097 } 2032 }
2098 2033
2099 for (i = 0; i < dist->nr_irqs; i++) { 2034 /*
2100 if (i < VGIC_NR_PPIS) 2035 * Enable and configure all SGIs to be edge-triggere and
2036 * configure all PPIs as level-triggered.
2037 */
2038 for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
2039 if (i < VGIC_NR_SGIS) {
2040 /* SGIs */
2101 vgic_bitmap_set_irq_val(&dist->irq_enabled, 2041 vgic_bitmap_set_irq_val(&dist->irq_enabled,
2102 vcpu->vcpu_id, i, 1); 2042 vcpu->vcpu_id, i, 1);
2103 if (i < VGIC_NR_PRIVATE_IRQS)
2104 vgic_bitmap_set_irq_val(&dist->irq_cfg, 2043 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2105 vcpu->vcpu_id, i, 2044 vcpu->vcpu_id, i,
2106 VGIC_CFG_EDGE); 2045 VGIC_CFG_EDGE);
2046 } else if (i < VGIC_NR_PRIVATE_IRQS) {
2047 /* PPIs */
2048 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2049 vcpu->vcpu_id, i,
2050 VGIC_CFG_LEVEL);
2051 }
2107 } 2052 }
2108 2053
2109 vgic_enable(vcpu); 2054 vgic_enable(vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a75502c93c3e..484079efea5b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2021,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2021 } while (single_task_running() && ktime_before(cur, stop)); 2021 } while (single_task_running() && ktime_before(cur, stop));
2022 } 2022 }
2023 2023
2024 kvm_arch_vcpu_blocking(vcpu);
2025
2024 for (;;) { 2026 for (;;) {
2025 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 2027 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
2026 2028
@@ -2034,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2034 finish_wait(&vcpu->wq, &wait); 2036 finish_wait(&vcpu->wq, &wait);
2035 cur = ktime_get(); 2037 cur = ktime_get();
2036 2038
2039 kvm_arch_vcpu_unblocking(vcpu);
2037out: 2040out:
2038 block_ns = ktime_to_ns(cur) - ktime_to_ns(start); 2041 block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
2039 2042