aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2018-01-31 07:34:41 -0500
committerRadim Krčmář <rkrcmar@redhat.com>2018-01-31 07:34:41 -0500
commite53175395d7e12d8474707271bc02a2814279843 (patch)
treeca6a0fc846cffb1b6db999a4595998c160333cf0
parent810f4600ec5ee79c68dcbb136ed26a652df46348 (diff)
parentcd15d2050c044ca9525ba165e9073ac8e036b8d0 (diff)
Merge tag 'kvm-arm-for-v4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
KVM/ARM Changes for v4.16 The changes for this version include icache invalidation optimizations (improving VM startup time), support for forwarded level-triggered interrupts (improved performance for timers and passthrough platform devices), a small fix for power-management notifiers, and some cosmetic changes.
-rw-r--r--Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt187
-rw-r--r--arch/arm/include/asm/kvm_emulate.h2
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_hyp.h3
-rw-r--r--arch/arm/include/asm/kvm_mmu.h99
-rw-r--r--arch/arm/include/asm/pgtable.h4
-rw-r--r--arch/arm/kvm/hyp/switch.c1
-rw-r--r--arch/arm/kvm/hyp/tlb.c1
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h12
-rw-r--r--arch/arm64/include/asm/assembler.h21
-rw-r--r--arch/arm64/include/asm/cacheflush.h7
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h36
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h4
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c1
-rw-r--r--arch/arm64/kvm/hyp/switch.c1
-rw-r--r--arch/arm64/kvm/hyp/tlb.c1
-rw-r--r--arch/arm64/lib/clear_user.S2
-rw-r--r--arch/arm64/lib/copy_from_user.S2
-rw-r--r--arch/arm64/lib/copy_in_user.S2
-rw-r--r--arch/arm64/lib/copy_to_user.S2
-rw-r--r--arch/arm64/mm/cache.S36
-rw-r--r--arch/arm64/xen/hypercall.S4
-rw-r--r--include/kvm/arm_arch_timer.h2
-rw-r--r--include/kvm/arm_vgic.h13
-rw-r--r--virt/kvm/arm/arch_timer.c137
-rw-r--r--virt/kvm/arm/arm.c63
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c1
-rw-r--r--virt/kvm/arm/mmu.c64
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c4
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c115
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c29
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c29
-rw-r--r--virt/kvm/arm/vgic/vgic.c41
-rw-r--r--virt/kvm/arm/vgic/vgic.h8
37 files changed, 579 insertions, 362 deletions
diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
deleted file mode 100644
index 38bca2835278..000000000000
--- a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
+++ /dev/null
@@ -1,187 +0,0 @@
1KVM/ARM VGIC Forwarded Physical Interrupts
2==========================================
3
4The KVM/ARM code implements software support for the ARM Generic
5Interrupt Controller's (GIC's) hardware support for virtualization by
6allowing software to inject virtual interrupts to a VM, which the guest
7OS sees as regular interrupts. The code is famously known as the VGIC.
8
9Some of these virtual interrupts, however, correspond to physical
10interrupts from real physical devices. One example could be the
11architected timer, which itself supports virtualization, and therefore
12lets a guest OS program the hardware device directly to raise an
13interrupt at some point in time. When such an interrupt is raised, the
14host OS initially handles the interrupt and must somehow signal this
15event as a virtual interrupt to the guest. Another example could be a
16passthrough device, where the physical interrupts are initially handled
17by the host, but the device driver for the device lives in the guest OS
18and KVM must therefore somehow inject a virtual interrupt on behalf of
19the physical one to the guest OS.
20
21These virtual interrupts corresponding to a physical interrupt on the
22host are called forwarded physical interrupts, but are also sometimes
23referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
24
25Forwarded physical interrupts are handled slightly differently compared
26to virtual interrupts generated purely by a software emulated device.
27
28
29The HW bit
30----------
31Virtual interrupts are signalled to the guest by programming the List
32Registers (LRs) on the GIC before running a VCPU. The LR is programmed
33with the virtual IRQ number and the state of the interrupt (Pending,
34Active, or Pending+Active). When the guest ACKs and EOIs a virtual
35interrupt, the LR state moves from Pending to Active, and finally to
36inactive.
37
38The LRs include an extra bit, called the HW bit. When this bit is set,
39KVM must also program an additional field in the LR, the physical IRQ
40number, to link the virtual with the physical IRQ.
41
42When the HW bit is set, KVM must EITHER set the Pending OR the Active
43bit, never both at the same time.
44
45Setting the HW bit causes the hardware to deactivate the physical
46interrupt on the physical distributor when the guest deactivates the
47corresponding virtual interrupt.
48
49
50Forwarded Physical Interrupts Life Cycle
51----------------------------------------
52
53The state of forwarded physical interrupts is managed in the following way:
54
55 - The physical interrupt is acked by the host, and becomes active on
56 the physical distributor (*).
57 - KVM sets the LR.Pending bit, because this is the only way the GICV
58 interface is going to present it to the guest.
59 - LR.Pending will stay set as long as the guest has not acked the interrupt.
60 - LR.Pending transitions to LR.Active on the guest read of the IAR, as
61 expected.
62 - On guest EOI, the *physical distributor* active bit gets cleared,
63 but the LR.Active is left untouched (set).
64 - KVM clears the LR on VM exits when the physical distributor
65 active state has been cleared.
66
67(*): The host handling is slightly more complicated. For some forwarded
68interrupts (shared), KVM directly sets the active state on the physical
69distributor before entering the guest, because the interrupt is never actually
70handled on the host (see details on the timer as an example below). For other
71forwarded interrupts (non-shared) the host does not deactivate the interrupt
72when the host ISR completes, but leaves the interrupt active until the guest
73deactivates it. Leaving the interrupt active is allowed, because Linux
74configures the physical GIC with EOIMode=1, which causes EOI operations to
75perform a priority drop allowing the GIC to receive other interrupts of the
76default priority.
77
78
79Forwarded Edge and Level Triggered PPIs and SPIs
80------------------------------------------------
81Forwarded physical interrupts injected should always be active on the
82physical distributor when injected to a guest.
83
84Level-triggered interrupts will keep the interrupt line to the GIC
85asserted, typically until the guest programs the device to deassert the
86line. This means that the interrupt will remain pending on the physical
87distributor until the guest has reprogrammed the device. Since we
88always run the VM with interrupts enabled on the CPU, a pending
89interrupt will exit the guest as soon as we switch into the guest,
90preventing the guest from ever making progress as the process repeats
91over and over. Therefore, the active state on the physical distributor
92must be set when entering the guest, preventing the GIC from forwarding
93the pending interrupt to the CPU. As soon as the guest deactivates the
94interrupt, the physical line is sampled by the hardware again and the host
95takes a new interrupt if and only if the physical line is still asserted.
96
97Edge-triggered interrupts do not exhibit the same problem with
98preventing guest execution that level-triggered interrupts do. One
99option is to not use HW bit at all, and inject edge-triggered interrupts
100from a physical device as pure virtual interrupts. But that would
101potentially slow down handling of the interrupt in the guest, because a
102physical interrupt occurring in the middle of the guest ISR would
103preempt the guest for the host to handle the interrupt. Additionally,
104if you configure the system to handle interrupts on a separate physical
105core from that running your VCPU, you still have to interrupt the VCPU
106to queue the pending state onto the LR, even though the guest won't use
107this information until the guest ISR completes. Therefore, the HW
108bit should always be set for forwarded edge-triggered interrupts. With
109the HW bit set, the virtual interrupt is injected and additional
110physical interrupts occurring before the guest deactivates the interrupt
111simply mark the state on the physical distributor as Pending+Active. As
112soon as the guest deactivates the interrupt, the host takes another
113interrupt if and only if there was a physical interrupt between injecting
114the forwarded interrupt to the guest and the guest deactivating the
115interrupt.
116
117Consequently, whenever we schedule a VCPU with one or more LRs with the
118HW bit set, the interrupt must also be active on the physical
119distributor.
120
121
122Forwarded LPIs
123--------------
124LPIs, introduced in GICv3, are always edge-triggered and do not have an
125active state. They become pending when a device signal them, and as
126soon as they are acked by the CPU, they are inactive again.
127
128It therefore doesn't make sense, and is not supported, to set the HW bit
129for physical LPIs that are forwarded to a VM as virtual interrupts,
130typically virtual SPIs.
131
132For LPIs, there is no other choice than to preempt the VCPU thread if
133necessary, and queue the pending state onto the LR.
134
135
136Putting It Together: The Architected Timer
137------------------------------------------
138The architected timer is a device that signals interrupts with level
139triggered semantics. The timer hardware is directly accessed by VCPUs
140which program the timer to fire at some point in time. Each VCPU on a
141system programs the timer to fire at different times, and therefore the
142hardware is multiplexed between multiple VCPUs. This is implemented by
143context-switching the timer state along with each VCPU thread.
144
145However, this means that a scenario like the following is entirely
146possible, and in fact, typical:
147
1481. KVM runs the VCPU
1492. The guest programs the time to fire in T+100
1503. The guest is idle and calls WFI (wait-for-interrupts)
1514. The hardware traps to the host
1525. KVM stores the timer state to memory and disables the hardware timer
1536. KVM schedules a soft timer to fire in T+(100 - time since step 2)
1547. KVM puts the VCPU thread to sleep (on a waitqueue)
1558. The soft timer fires, waking up the VCPU thread
1569. KVM reprograms the timer hardware with the VCPU's values
15710. KVM marks the timer interrupt as active on the physical distributor
15811. KVM injects a forwarded physical interrupt to the guest
15912. KVM runs the VCPU
160
161Notice that KVM injects a forwarded physical interrupt in step 11 without
162the corresponding interrupt having actually fired on the host. That is
163exactly why we mark the timer interrupt as active in step 10, because
164the active state on the physical distributor is part of the state
165belonging to the timer hardware, which is context-switched along with
166the VCPU thread.
167
168If the guest does not idle because it is busy, the flow looks like this
169instead:
170
1711. KVM runs the VCPU
1722. The guest programs the time to fire in T+100
1734. At T+100 the timer fires and a physical IRQ causes the VM to exit
174 (note that this initially only traps to EL2 and does not run the host ISR
175 until KVM has returned to the host).
1765. With interrupts still disabled on the CPU coming back from the guest, KVM
177 stores the virtual timer state to memory and disables the virtual hw timer.
1786. KVM looks at the timer state (in memory) and injects a forwarded physical
179 interrupt because it concludes the timer has expired.
1807. KVM marks the timer interrupt as active on the physical distributor
1817. KVM enables the timer, enables interrupts, and runs the VCPU
182
183Notice that again the forwarded physical interrupt is injected to the
184guest without having actually been handled on the host. In this case it
185is because the physical interrupt is never actually seen by the host because the
186timer is disabled upon guest return, and the virtual forwarded interrupt is
187injected on the KVM guest entry path.
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 3d22eb87f919..9003bd19cb70 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -131,7 +131,7 @@ static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
131static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) 131static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
132{ 132{
133 unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK; 133 unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
134 return cpsr_mode > USR_MODE;; 134 return cpsr_mode > USR_MODE;
135} 135}
136 136
137static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) 137static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index a9f7d3f47134..6394fb99da7f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -48,6 +48,8 @@
48 KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 48 KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
49#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) 49#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
50 50
51DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
52
51u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); 53u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
52int __attribute_const__ kvm_target_cpu(void); 54int __attribute_const__ kvm_target_cpu(void);
53int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 55int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index ab20ffa8b9e7..1ab8329e9ff7 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -21,7 +21,6 @@
21#include <linux/compiler.h> 21#include <linux/compiler.h>
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23#include <asm/cp15.h> 23#include <asm/cp15.h>
24#include <asm/kvm_mmu.h>
25#include <asm/vfp.h> 24#include <asm/vfp.h>
26 25
27#define __hyp_text __section(.hyp.text) notrace 26#define __hyp_text __section(.hyp.text) notrace
@@ -69,6 +68,8 @@
69#define HIFAR __ACCESS_CP15(c6, 4, c0, 2) 68#define HIFAR __ACCESS_CP15(c6, 4, c0, 2)
70#define HPFAR __ACCESS_CP15(c6, 4, c0, 4) 69#define HPFAR __ACCESS_CP15(c6, 4, c0, 4)
71#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0) 70#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0)
71#define BPIALLIS __ACCESS_CP15(c7, 0, c1, 6)
72#define ICIMVAU __ACCESS_CP15(c7, 0, c5, 1)
72#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0) 73#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0)
73#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0) 74#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
74#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0) 75#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0)
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index fa6f2174276b..bc70a1f0f42d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,6 +37,8 @@
37 37
38#include <linux/highmem.h> 38#include <linux/highmem.h>
39#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
40#include <asm/cputype.h>
41#include <asm/kvm_hyp.h>
40#include <asm/pgalloc.h> 42#include <asm/pgalloc.h>
41#include <asm/stage2_pgtable.h> 43#include <asm/stage2_pgtable.h>
42 44
@@ -83,6 +85,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
83 return pmd; 85 return pmd;
84} 86}
85 87
88static inline pte_t kvm_s2pte_mkexec(pte_t pte)
89{
90 pte_val(pte) &= ~L_PTE_XN;
91 return pte;
92}
93
94static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
95{
96 pmd_val(pmd) &= ~PMD_SECT_XN;
97 return pmd;
98}
99
86static inline void kvm_set_s2pte_readonly(pte_t *pte) 100static inline void kvm_set_s2pte_readonly(pte_t *pte)
87{ 101{
88 pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; 102 pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
@@ -93,6 +107,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
93 return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; 107 return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
94} 108}
95 109
110static inline bool kvm_s2pte_exec(pte_t *pte)
111{
112 return !(pte_val(*pte) & L_PTE_XN);
113}
114
96static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) 115static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
97{ 116{
98 pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; 117 pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
@@ -103,6 +122,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
103 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; 122 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
104} 123}
105 124
125static inline bool kvm_s2pmd_exec(pmd_t *pmd)
126{
127 return !(pmd_val(*pmd) & PMD_SECT_XN);
128}
129
106static inline bool kvm_page_empty(void *ptr) 130static inline bool kvm_page_empty(void *ptr)
107{ 131{
108 struct page *ptr_page = virt_to_page(ptr); 132 struct page *ptr_page = virt_to_page(ptr);
@@ -126,10 +150,36 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
126 return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101; 150 return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
127} 151}
128 152
129static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, 153static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
130 kvm_pfn_t pfn, 154{
131 unsigned long size) 155 /*
156 * Clean the dcache to the Point of Coherency.
157 *
158 * We need to do this through a kernel mapping (using the
159 * user-space mapping has proved to be the wrong
160 * solution). For that, we need to kmap one page at a time,
161 * and iterate over the range.
162 */
163
164 VM_BUG_ON(size & ~PAGE_MASK);
165
166 while (size) {
167 void *va = kmap_atomic_pfn(pfn);
168
169 kvm_flush_dcache_to_poc(va, PAGE_SIZE);
170
171 size -= PAGE_SIZE;
172 pfn++;
173
174 kunmap_atomic(va);
175 }
176}
177
178static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
179 unsigned long size)
132{ 180{
181 u32 iclsz;
182
133 /* 183 /*
134 * If we are going to insert an instruction page and the icache is 184 * If we are going to insert an instruction page and the icache is
135 * either VIPT or PIPT, there is a potential problem where the host 185 * either VIPT or PIPT, there is a potential problem where the host
@@ -141,23 +191,40 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
141 * 191 *
142 * VIVT caches are tagged using both the ASID and the VMID and doesn't 192 * VIVT caches are tagged using both the ASID and the VMID and doesn't
143 * need any kind of flushing (DDI 0406C.b - Page B3-1392). 193 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
144 *
145 * We need to do this through a kernel mapping (using the
146 * user-space mapping has proved to be the wrong
147 * solution). For that, we need to kmap one page at a time,
148 * and iterate over the range.
149 */ 194 */
150 195
151 VM_BUG_ON(size & ~PAGE_MASK); 196 VM_BUG_ON(size & ~PAGE_MASK);
152 197
198 if (icache_is_vivt_asid_tagged())
199 return;
200
201 if (!icache_is_pipt()) {
202 /* any kind of VIPT cache */
203 __flush_icache_all();
204 return;
205 }
206
207 /*
208 * CTR IminLine contains Log2 of the number of words in the
209 * cache line, so we can get the number of words as
210 * 2 << (IminLine - 1). To get the number of bytes, we
211 * multiply by 4 (the number of bytes in a 32-bit word), and
212 * get 4 << (IminLine).
213 */
214 iclsz = 4 << (read_cpuid(CPUID_CACHETYPE) & 0xf);
215
153 while (size) { 216 while (size) {
154 void *va = kmap_atomic_pfn(pfn); 217 void *va = kmap_atomic_pfn(pfn);
218 void *end = va + PAGE_SIZE;
219 void *addr = va;
155 220
156 kvm_flush_dcache_to_poc(va, PAGE_SIZE); 221 do {
222 write_sysreg(addr, ICIMVAU);
223 addr += iclsz;
224 } while (addr < end);
157 225
158 if (icache_is_pipt()) 226 dsb(ishst);
159 __cpuc_coherent_user_range((unsigned long)va, 227 isb();
160 (unsigned long)va + PAGE_SIZE);
161 228
162 size -= PAGE_SIZE; 229 size -= PAGE_SIZE;
163 pfn++; 230 pfn++;
@@ -165,9 +232,11 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
165 kunmap_atomic(va); 232 kunmap_atomic(va);
166 } 233 }
167 234
168 if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { 235 /* Check if we need to invalidate the BTB */
169 /* any kind of VIPT cache */ 236 if ((read_cpuid_ext(CPUID_EXT_MMFR1) >> 28) != 4) {
170 __flush_icache_all(); 237 write_sysreg(0, BPIALLIS);
238 dsb(ishst);
239 isb();
171 } 240 }
172} 241}
173 242
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 150ece66ddf3..a757401129f9 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -102,8 +102,8 @@ extern pgprot_t pgprot_s2_device;
102#define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY) 102#define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
103#define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN) 103#define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
104#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) 104#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
105#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) 105#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN)
106#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY) 106#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN)
107 107
108#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) 108#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
109#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) 109#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 330c9ce34ba5..ae45ae96aac2 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -18,6 +18,7 @@
18 18
19#include <asm/kvm_asm.h> 19#include <asm/kvm_asm.h>
20#include <asm/kvm_hyp.h> 20#include <asm/kvm_hyp.h>
21#include <asm/kvm_mmu.h>
21 22
22__asm__(".arch_extension virt"); 23__asm__(".arch_extension virt");
23 24
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 6d810af2d9fd..c0edd450e104 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <asm/kvm_hyp.h> 21#include <asm/kvm_hyp.h>
22#include <asm/kvm_mmu.h>
22 23
23/** 24/**
24 * Flush per-VMID TLBs 25 * Flush per-VMID TLBs
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index b3da6c886835..03064261ee0b 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -25,13 +25,13 @@
25 isb 25 isb
26 .endm 26 .endm
27 27
28 .macro uaccess_ttbr0_disable, tmp1 28 .macro uaccess_ttbr0_disable, tmp1, tmp2
29alternative_if_not ARM64_HAS_PAN 29alternative_if_not ARM64_HAS_PAN
30 __uaccess_ttbr0_disable \tmp1 30 __uaccess_ttbr0_disable \tmp1
31alternative_else_nop_endif 31alternative_else_nop_endif
32 .endm 32 .endm
33 33
34 .macro uaccess_ttbr0_enable, tmp1, tmp2 34 .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
35alternative_if_not ARM64_HAS_PAN 35alternative_if_not ARM64_HAS_PAN
36 save_and_disable_irq \tmp2 // avoid preemption 36 save_and_disable_irq \tmp2 // avoid preemption
37 __uaccess_ttbr0_enable \tmp1 37 __uaccess_ttbr0_enable \tmp1
@@ -39,18 +39,18 @@ alternative_if_not ARM64_HAS_PAN
39alternative_else_nop_endif 39alternative_else_nop_endif
40 .endm 40 .endm
41#else 41#else
42 .macro uaccess_ttbr0_disable, tmp1 42 .macro uaccess_ttbr0_disable, tmp1, tmp2
43 .endm 43 .endm
44 44
45 .macro uaccess_ttbr0_enable, tmp1, tmp2 45 .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
46 .endm 46 .endm
47#endif 47#endif
48 48
49/* 49/*
50 * These macros are no-ops when UAO is present. 50 * These macros are no-ops when UAO is present.
51 */ 51 */
52 .macro uaccess_disable_not_uao, tmp1 52 .macro uaccess_disable_not_uao, tmp1, tmp2
53 uaccess_ttbr0_disable \tmp1 53 uaccess_ttbr0_disable \tmp1, \tmp2
54alternative_if ARM64_ALT_PAN_NOT_UAO 54alternative_if ARM64_ALT_PAN_NOT_UAO
55 SET_PSTATE_PAN(1) 55 SET_PSTATE_PAN(1)
56alternative_else_nop_endif 56alternative_else_nop_endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index aef72d886677..0884e1fdfd30 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -388,6 +388,27 @@ alternative_endif
388 .endm 388 .endm
389 389
390/* 390/*
391 * Macro to perform an instruction cache maintenance for the interval
392 * [start, end)
393 *
394 * start, end: virtual addresses describing the region
395 * label: A label to branch to on user fault.
396 * Corrupts: tmp1, tmp2
397 */
398 .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
399 icache_line_size \tmp1, \tmp2
400 sub \tmp2, \tmp1, #1
401 bic \tmp2, \start, \tmp2
4029997:
403USER(\label, ic ivau, \tmp2) // invalidate I line PoU
404 add \tmp2, \tmp2, \tmp1
405 cmp \tmp2, \end
406 b.lo 9997b
407 dsb ish
408 isb
409 .endm
410
411/*
391 * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present 412 * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
392 */ 413 */
393 .macro reset_pmuserenr_el0, tmpreg 414 .macro reset_pmuserenr_el0, tmpreg
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 955130762a3c..bef9f418f089 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -52,6 +52,12 @@
52 * - start - virtual start address 52 * - start - virtual start address
53 * - end - virtual end address 53 * - end - virtual end address
54 * 54 *
55 * invalidate_icache_range(start, end)
56 *
57 * Invalidate the I-cache in the region described by start, end.
58 * - start - virtual start address
59 * - end - virtual end address
60 *
55 * __flush_cache_user_range(start, end) 61 * __flush_cache_user_range(start, end)
56 * 62 *
57 * Ensure coherency between the I-cache and the D-cache in the 63 * Ensure coherency between the I-cache and the D-cache in the
@@ -66,6 +72,7 @@
66 * - size - region size 72 * - size - region size
67 */ 73 */
68extern void flush_icache_range(unsigned long start, unsigned long end); 74extern void flush_icache_range(unsigned long start, unsigned long end);
75extern int invalidate_icache_range(unsigned long start, unsigned long end);
69extern void __flush_dcache_area(void *addr, size_t len); 76extern void __flush_dcache_area(void *addr, size_t len);
70extern void __inval_dcache_area(void *addr, size_t len); 77extern void __inval_dcache_area(void *addr, size_t len);
71extern void __clean_dcache_area_poc(void *addr, size_t len); 78extern void __clean_dcache_area_poc(void *addr, size_t len);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ea6cb5b24258..e7218cf7df2a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -47,6 +47,8 @@
47 KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 47 KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
48#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) 48#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
49 49
50DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
51
50int __attribute_const__ kvm_target_cpu(void); 52int __attribute_const__ kvm_target_cpu(void);
51int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 53int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
52int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); 54int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 08d3bb66c8b7..f26f9cd70c72 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -20,7 +20,6 @@
20 20
21#include <linux/compiler.h> 21#include <linux/compiler.h>
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23#include <asm/kvm_mmu.h>
24#include <asm/sysreg.h> 23#include <asm/sysreg.h>
25 24
26#define __hyp_text __section(.hyp.text) notrace 25#define __hyp_text __section(.hyp.text) notrace
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 672c8684d5c2..06f1f9794679 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -173,6 +173,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
173 return pmd; 173 return pmd;
174} 174}
175 175
176static inline pte_t kvm_s2pte_mkexec(pte_t pte)
177{
178 pte_val(pte) &= ~PTE_S2_XN;
179 return pte;
180}
181
182static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
183{
184 pmd_val(pmd) &= ~PMD_S2_XN;
185 return pmd;
186}
187
176static inline void kvm_set_s2pte_readonly(pte_t *pte) 188static inline void kvm_set_s2pte_readonly(pte_t *pte)
177{ 189{
178 pteval_t old_pteval, pteval; 190 pteval_t old_pteval, pteval;
@@ -191,6 +203,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
191 return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY; 203 return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
192} 204}
193 205
206static inline bool kvm_s2pte_exec(pte_t *pte)
207{
208 return !(pte_val(*pte) & PTE_S2_XN);
209}
210
194static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) 211static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
195{ 212{
196 kvm_set_s2pte_readonly((pte_t *)pmd); 213 kvm_set_s2pte_readonly((pte_t *)pmd);
@@ -201,6 +218,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
201 return kvm_s2pte_readonly((pte_t *)pmd); 218 return kvm_s2pte_readonly((pte_t *)pmd);
202} 219}
203 220
221static inline bool kvm_s2pmd_exec(pmd_t *pmd)
222{
223 return !(pmd_val(*pmd) & PMD_S2_XN);
224}
225
204static inline bool kvm_page_empty(void *ptr) 226static inline bool kvm_page_empty(void *ptr)
205{ 227{
206 struct page *ptr_page = virt_to_page(ptr); 228 struct page *ptr_page = virt_to_page(ptr);
@@ -230,21 +252,25 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
230 return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; 252 return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
231} 253}
232 254
233static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, 255static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
234 kvm_pfn_t pfn,
235 unsigned long size)
236{ 256{
237 void *va = page_address(pfn_to_page(pfn)); 257 void *va = page_address(pfn_to_page(pfn));
238 258
239 kvm_flush_dcache_to_poc(va, size); 259 kvm_flush_dcache_to_poc(va, size);
260}
240 261
262static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
263 unsigned long size)
264{
241 if (icache_is_aliasing()) { 265 if (icache_is_aliasing()) {
242 /* any kind of VIPT cache */ 266 /* any kind of VIPT cache */
243 __flush_icache_all(); 267 __flush_icache_all();
244 } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { 268 } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
245 /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ 269 /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
246 flush_icache_range((unsigned long)va, 270 void *va = page_address(pfn_to_page(pfn));
247 (unsigned long)va + size); 271
272 invalidate_icache_range((unsigned long)va,
273 (unsigned long)va + size);
248 } 274 }
249} 275}
250 276
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index eb0c2bd90de9..af035331fb09 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -177,9 +177,11 @@
177 */ 177 */
178#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ 178#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
179#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 179#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
180#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */
180 181
181#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */ 182#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
182#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 183#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
184#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
183 185
184/* 186/*
185 * Memory Attribute override for Stage-2 (MemAttr[3:0]) 187 * Memory Attribute override for Stage-2 (MemAttr[3:0])
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 0a5635fb0ef9..4e12dabd342b 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -60,8 +60,8 @@
60#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) 60#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
61#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) 61#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
62 62
63#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) 63#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
64#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) 64#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
65 65
66#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) 66#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
67#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) 67#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..360455f86346 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -21,6 +21,7 @@
21#include <asm/debug-monitors.h> 21#include <asm/debug-monitors.h>
22#include <asm/kvm_asm.h> 22#include <asm/kvm_asm.h>
23#include <asm/kvm_hyp.h> 23#include <asm/kvm_hyp.h>
24#include <asm/kvm_mmu.h>
24 25
25#define read_debug(r,n) read_sysreg(r##n##_el1) 26#define read_debug(r,n) read_sysreg(r##n##_el1)
26#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) 27#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index f7c651f3a8c0..f3d8bed096f5 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -21,6 +21,7 @@
21#include <asm/kvm_asm.h> 21#include <asm/kvm_asm.h>
22#include <asm/kvm_emulate.h> 22#include <asm/kvm_emulate.h>
23#include <asm/kvm_hyp.h> 23#include <asm/kvm_hyp.h>
24#include <asm/kvm_mmu.h>
24#include <asm/fpsimd.h> 25#include <asm/fpsimd.h>
25#include <asm/debug-monitors.h> 26#include <asm/debug-monitors.h>
26 27
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 73464a96c365..131c7772703c 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -16,6 +16,7 @@
16 */ 16 */
17 17
18#include <asm/kvm_hyp.h> 18#include <asm/kvm_hyp.h>
19#include <asm/kvm_mmu.h>
19#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
20 21
21static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) 22static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index e88fb99c1561..8932e5f7a6f3 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
50 b.mi 5f 50 b.mi 5f
51uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 51uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
525: mov x0, #0 525: mov x0, #0
53 uaccess_disable_not_uao x2 53 uaccess_disable_not_uao x2, x3
54 ret 54 ret
55ENDPROC(__clear_user) 55ENDPROC(__clear_user)
56 56
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4b5d826895ff..bc108634992c 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -67,7 +67,7 @@ ENTRY(__arch_copy_from_user)
67 uaccess_enable_not_uao x3, x4 67 uaccess_enable_not_uao x3, x4
68 add end, x0, x2 68 add end, x0, x2
69#include "copy_template.S" 69#include "copy_template.S"
70 uaccess_disable_not_uao x3 70 uaccess_disable_not_uao x3, x4
71 mov x0, #0 // Nothing to copy 71 mov x0, #0 // Nothing to copy
72 ret 72 ret
73ENDPROC(__arch_copy_from_user) 73ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index b24a830419ad..e6dd59dd4053 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -68,7 +68,7 @@ ENTRY(raw_copy_in_user)
68 uaccess_enable_not_uao x3, x4 68 uaccess_enable_not_uao x3, x4
69 add end, x0, x2 69 add end, x0, x2
70#include "copy_template.S" 70#include "copy_template.S"
71 uaccess_disable_not_uao x3 71 uaccess_disable_not_uao x3, x4
72 mov x0, #0 72 mov x0, #0
73 ret 73 ret
74ENDPROC(raw_copy_in_user) 74ENDPROC(raw_copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 351f0766f7a6..bd20f9f7dd84 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -66,7 +66,7 @@ ENTRY(__arch_copy_to_user)
66 uaccess_enable_not_uao x3, x4 66 uaccess_enable_not_uao x3, x4
67 add end, x0, x2 67 add end, x0, x2
68#include "copy_template.S" 68#include "copy_template.S"
69 uaccess_disable_not_uao x3 69 uaccess_disable_not_uao x3, x4
70 mov x0, #0 70 mov x0, #0
71 ret 71 ret
72ENDPROC(__arch_copy_to_user) 72ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7f1dbe962cf5..758bde7e2fa6 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -49,7 +49,7 @@ ENTRY(flush_icache_range)
49 * - end - virtual end address of region 49 * - end - virtual end address of region
50 */ 50 */
51ENTRY(__flush_cache_user_range) 51ENTRY(__flush_cache_user_range)
52 uaccess_ttbr0_enable x2, x3 52 uaccess_ttbr0_enable x2, x3, x4
53 dcache_line_size x2, x3 53 dcache_line_size x2, x3
54 sub x3, x2, #1 54 sub x3, x2, #1
55 bic x4, x0, x3 55 bic x4, x0, x3
@@ -60,19 +60,10 @@ user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
60 b.lo 1b 60 b.lo 1b
61 dsb ish 61 dsb ish
62 62
63 icache_line_size x2, x3 63 invalidate_icache_by_line x0, x1, x2, x3, 9f
64 sub x3, x2, #1
65 bic x4, x0, x3
661:
67USER(9f, ic ivau, x4 ) // invalidate I line PoU
68 add x4, x4, x2
69 cmp x4, x1
70 b.lo 1b
71 dsb ish
72 isb
73 mov x0, #0 64 mov x0, #0
741: 651:
75 uaccess_ttbr0_disable x1 66 uaccess_ttbr0_disable x1, x2
76 ret 67 ret
779: 689:
78 mov x0, #-EFAULT 69 mov x0, #-EFAULT
@@ -81,6 +72,27 @@ ENDPROC(flush_icache_range)
81ENDPROC(__flush_cache_user_range) 72ENDPROC(__flush_cache_user_range)
82 73
83/* 74/*
75 * invalidate_icache_range(start,end)
76 *
77 * Ensure that the I cache is invalid within specified region.
78 *
79 * - start - virtual start address of region
80 * - end - virtual end address of region
81 */
82ENTRY(invalidate_icache_range)
83 uaccess_ttbr0_enable x2, x3, x4
84
85 invalidate_icache_by_line x0, x1, x2, x3, 2f
86 mov x0, xzr
871:
88 uaccess_ttbr0_disable x1, x2
89 ret
902:
91 mov x0, #-EFAULT
92 b 1b
93ENDPROC(invalidate_icache_range)
94
95/*
84 * __flush_dcache_area(kaddr, size) 96 * __flush_dcache_area(kaddr, size)
85 * 97 *
86 * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) 98 * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 401ceb71540c..c5f05c4a4d00 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -101,12 +101,12 @@ ENTRY(privcmd_call)
101 * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation 101 * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
102 * is enabled (it implies that hardware UAO and PAN disabled). 102 * is enabled (it implies that hardware UAO and PAN disabled).
103 */ 103 */
104 uaccess_ttbr0_enable x6, x7 104 uaccess_ttbr0_enable x6, x7, x8
105 hvc XEN_IMM 105 hvc XEN_IMM
106 106
107 /* 107 /*
108 * Disable userspace access from kernel once the hyp call completed. 108 * Disable userspace access from kernel once the hyp call completed.
109 */ 109 */
110 uaccess_ttbr0_disable x6 110 uaccess_ttbr0_disable x6, x7
111 ret 111 ret
112ENDPROC(privcmd_call); 112ENDPROC(privcmd_call);
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6e45608b2399..b1dcfde0a3ef 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -90,6 +90,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
90 90
91void kvm_timer_init_vhe(void); 91void kvm_timer_init_vhe(void);
92 92
93bool kvm_arch_timer_get_input_level(int vintid);
94
93#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) 95#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
94#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) 96#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
95 97
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 8c896540a72c..cdbd142ca7f2 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -130,6 +130,17 @@ struct vgic_irq {
130 u8 priority; 130 u8 priority;
131 enum vgic_irq_config config; /* Level or edge */ 131 enum vgic_irq_config config; /* Level or edge */
132 132
133 /*
134 * Callback function pointer to in-kernel devices that can tell us the
135 * state of the input level of mapped level-triggered IRQ faster than
136 * peaking into the physical GIC.
137 *
138 * Always called in non-preemptible section and the functions can use
139 * kvm_arm_get_running_vcpu() to get the vcpu pointer for private
140 * IRQs.
141 */
142 bool (*get_input_level)(int vintid);
143
133 void *owner; /* Opaque pointer to reserve an interrupt 144 void *owner; /* Opaque pointer to reserve an interrupt
134 for in-kernel devices. */ 145 for in-kernel devices. */
135}; 146};
@@ -331,7 +342,7 @@ void kvm_vgic_init_cpu_hardware(void);
331int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, 342int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
332 bool level, void *owner); 343 bool level, void *owner);
333int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, 344int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
334 u32 vintid); 345 u32 vintid, bool (*get_input_level)(int vindid));
335int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); 346int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
336bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); 347bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
337 348
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f9555b1e7f15..fb6bd9b9845e 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -97,15 +97,13 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
97 pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); 97 pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
98 return IRQ_NONE; 98 return IRQ_NONE;
99 } 99 }
100 vtimer = vcpu_vtimer(vcpu);
101 100
102 if (!vtimer->irq.level) { 101 vtimer = vcpu_vtimer(vcpu);
103 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 102 if (kvm_timer_should_fire(vtimer))
104 if (kvm_timer_irq_can_fire(vtimer)) 103 kvm_timer_update_irq(vcpu, true, vtimer);
105 kvm_timer_update_irq(vcpu, true, vtimer);
106 }
107 104
108 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 105 if (static_branch_unlikely(&userspace_irqchip_in_use) &&
106 unlikely(!irqchip_in_kernel(vcpu->kvm)))
109 kvm_vtimer_update_mask_user(vcpu); 107 kvm_vtimer_update_mask_user(vcpu);
110 108
111 return IRQ_HANDLED; 109 return IRQ_HANDLED;
@@ -231,6 +229,16 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
231{ 229{
232 u64 cval, now; 230 u64 cval, now;
233 231
232 if (timer_ctx->loaded) {
233 u32 cnt_ctl;
234
235 /* Only the virtual timer can be loaded so far */
236 cnt_ctl = read_sysreg_el0(cntv_ctl);
237 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
238 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
239 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
240 }
241
234 if (!kvm_timer_irq_can_fire(timer_ctx)) 242 if (!kvm_timer_irq_can_fire(timer_ctx))
235 return false; 243 return false;
236 244
@@ -245,15 +253,7 @@ bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
245 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 253 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
246 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 254 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
247 255
248 if (vtimer->irq.level || ptimer->irq.level) 256 if (kvm_timer_should_fire(vtimer))
249 return true;
250
251 /*
252 * When this is called from withing the wait loop of kvm_vcpu_block(),
253 * the software view of the timer state is up to date (timer->loaded
254 * is false), and so we can simply check if the timer should fire now.
255 */
256 if (!vtimer->loaded && kvm_timer_should_fire(vtimer))
257 return true; 257 return true;
258 258
259 return kvm_timer_should_fire(ptimer); 259 return kvm_timer_should_fire(ptimer);
@@ -271,9 +271,9 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
271 /* Populate the device bitmap with the timer states */ 271 /* Populate the device bitmap with the timer states */
272 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 272 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
273 KVM_ARM_DEV_EL1_PTIMER); 273 KVM_ARM_DEV_EL1_PTIMER);
274 if (vtimer->irq.level) 274 if (kvm_timer_should_fire(vtimer))
275 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 275 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
276 if (ptimer->irq.level) 276 if (kvm_timer_should_fire(ptimer))
277 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 277 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
278} 278}
279 279
@@ -286,7 +286,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
286 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, 286 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
287 timer_ctx->irq.level); 287 timer_ctx->irq.level);
288 288
289 if (likely(irqchip_in_kernel(vcpu->kvm))) { 289 if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
290 likely(irqchip_in_kernel(vcpu->kvm))) {
290 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 291 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
291 timer_ctx->irq.irq, 292 timer_ctx->irq.irq,
292 timer_ctx->irq.level, 293 timer_ctx->irq.level,
@@ -324,12 +325,20 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
324 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 325 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
325 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 326 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
326 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 327 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
328 bool level;
327 329
328 if (unlikely(!timer->enabled)) 330 if (unlikely(!timer->enabled))
329 return; 331 return;
330 332
331 if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) 333 /*
332 kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); 334 * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
335 * of its lifecycle is offloaded to the hardware, and we therefore may
336 * not have lowered the irq.level value before having to signal a new
337 * interrupt, but have to signal an interrupt every time the level is
338 * asserted.
339 */
340 level = kvm_timer_should_fire(vtimer);
341 kvm_timer_update_irq(vcpu, level, vtimer);
333 342
334 if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) 343 if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
335 kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); 344 kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
@@ -337,6 +346,12 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
337 phys_timer_emulate(vcpu); 346 phys_timer_emulate(vcpu);
338} 347}
339 348
349static void __timer_snapshot_state(struct arch_timer_context *timer)
350{
351 timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
352 timer->cnt_cval = read_sysreg_el0(cntv_cval);
353}
354
340static void vtimer_save_state(struct kvm_vcpu *vcpu) 355static void vtimer_save_state(struct kvm_vcpu *vcpu)
341{ 356{
342 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 357 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -348,10 +363,8 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
348 if (!vtimer->loaded) 363 if (!vtimer->loaded)
349 goto out; 364 goto out;
350 365
351 if (timer->enabled) { 366 if (timer->enabled)
352 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 367 __timer_snapshot_state(vtimer);
353 vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
354 }
355 368
356 /* Disable the virtual timer */ 369 /* Disable the virtual timer */
357 write_sysreg_el0(0, cntv_ctl); 370 write_sysreg_el0(0, cntv_ctl);
@@ -448,8 +461,7 @@ static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
448 bool phys_active; 461 bool phys_active;
449 int ret; 462 int ret;
450 463
451 phys_active = vtimer->irq.level || 464 phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
452 kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
453 465
454 ret = irq_set_irqchip_state(host_vtimer_irq, 466 ret = irq_set_irqchip_state(host_vtimer_irq,
455 IRQCHIP_STATE_ACTIVE, 467 IRQCHIP_STATE_ACTIVE,
@@ -496,8 +508,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
496 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 508 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
497 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 509 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
498 510
499 return vtimer->irq.level != vlevel || 511 return kvm_timer_should_fire(vtimer) != vlevel ||
500 ptimer->irq.level != plevel; 512 kvm_timer_should_fire(ptimer) != plevel;
501} 513}
502 514
503void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 515void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
@@ -529,54 +541,27 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
529 set_cntvoff(0); 541 set_cntvoff(0);
530} 542}
531 543
532static void unmask_vtimer_irq(struct kvm_vcpu *vcpu) 544/*
545 * With a userspace irqchip we have to check if the guest de-asserted the
546 * timer and if so, unmask the timer irq signal on the host interrupt
547 * controller to ensure that we see future timer signals.
548 */
549static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
533{ 550{
534 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 551 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
535 552
536 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { 553 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
537 kvm_vtimer_update_mask_user(vcpu); 554 __timer_snapshot_state(vtimer);
538 return; 555 if (!kvm_timer_should_fire(vtimer)) {
539 } 556 kvm_timer_update_irq(vcpu, false, vtimer);
540 557 kvm_vtimer_update_mask_user(vcpu);
541 /* 558 }
542 * If the guest disabled the timer without acking the interrupt, then
543 * we must make sure the physical and virtual active states are in
544 * sync by deactivating the physical interrupt, because otherwise we
545 * wouldn't see the next timer interrupt in the host.
546 */
547 if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
548 int ret;
549 ret = irq_set_irqchip_state(host_vtimer_irq,
550 IRQCHIP_STATE_ACTIVE,
551 false);
552 WARN_ON(ret);
553 } 559 }
554} 560}
555 561
556/**
557 * kvm_timer_sync_hwstate - sync timer state from cpu
558 * @vcpu: The vcpu pointer
559 *
560 * Check if any of the timers have expired while we were running in the guest,
561 * and inject an interrupt if that was the case.
562 */
563void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 562void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
564{ 563{
565 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 564 unmask_vtimer_irq_user(vcpu);
566
567 /*
568 * If we entered the guest with the vtimer output asserted we have to
569 * check if the guest has modified the timer so that we should lower
570 * the line at this point.
571 */
572 if (vtimer->irq.level) {
573 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
574 vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
575 if (!kvm_timer_should_fire(vtimer)) {
576 kvm_timer_update_irq(vcpu, false, vtimer);
577 unmask_vtimer_irq(vcpu);
578 }
579 }
580} 565}
581 566
582int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 567int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -807,6 +792,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
807 return true; 792 return true;
808} 793}
809 794
795bool kvm_arch_timer_get_input_level(int vintid)
796{
797 struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu();
798 struct arch_timer_context *timer;
799
800 if (vintid == vcpu_vtimer(vcpu)->irq.irq)
801 timer = vcpu_vtimer(vcpu);
802 else
803 BUG(); /* We only map the vtimer so far */
804
805 return kvm_timer_should_fire(timer);
806}
807
810int kvm_timer_enable(struct kvm_vcpu *vcpu) 808int kvm_timer_enable(struct kvm_vcpu *vcpu)
811{ 809{
812 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 810 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -828,7 +826,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
828 return -EINVAL; 826 return -EINVAL;
829 } 827 }
830 828
831 ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq); 829 ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
830 kvm_arch_timer_get_input_level);
832 if (ret) 831 if (ret)
833 return ret; 832 return ret;
834 833
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index cd7d90c9f644..92b95ae9a2ca 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -71,17 +71,17 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
71 71
72static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) 72static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
73{ 73{
74 BUG_ON(preemptible());
75 __this_cpu_write(kvm_arm_running_vcpu, vcpu); 74 __this_cpu_write(kvm_arm_running_vcpu, vcpu);
76} 75}
77 76
77DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
78
78/** 79/**
79 * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU. 80 * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
80 * Must be called from non-preemptible context 81 * Must be called from non-preemptible context
81 */ 82 */
82struct kvm_vcpu *kvm_arm_get_running_vcpu(void) 83struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
83{ 84{
84 BUG_ON(preemptible());
85 return __this_cpu_read(kvm_arm_running_vcpu); 85 return __this_cpu_read(kvm_arm_running_vcpu);
86} 86}
87 87
@@ -295,6 +295,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
295 295
296void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 296void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
297{ 297{
298 if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
299 static_branch_dec(&userspace_irqchip_in_use);
300
298 kvm_mmu_free_memory_caches(vcpu); 301 kvm_mmu_free_memory_caches(vcpu);
299 kvm_timer_vcpu_terminate(vcpu); 302 kvm_timer_vcpu_terminate(vcpu);
300 kvm_pmu_vcpu_destroy(vcpu); 303 kvm_pmu_vcpu_destroy(vcpu);
@@ -532,14 +535,22 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
532 535
533 vcpu->arch.has_run_once = true; 536 vcpu->arch.has_run_once = true;
534 537
535 /* 538 if (likely(irqchip_in_kernel(kvm))) {
536 * Map the VGIC hardware resources before running a vcpu the first 539 /*
537 * time on this VM. 540 * Map the VGIC hardware resources before running a vcpu the
538 */ 541 * first time on this VM.
539 if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { 542 */
540 ret = kvm_vgic_map_resources(kvm); 543 if (unlikely(!vgic_ready(kvm))) {
541 if (ret) 544 ret = kvm_vgic_map_resources(kvm);
542 return ret; 545 if (ret)
546 return ret;
547 }
548 } else {
549 /*
550 * Tell the rest of the code that there are userspace irqchip
551 * VMs in the wild.
552 */
553 static_branch_inc(&userspace_irqchip_in_use);
543 } 554 }
544 555
545 ret = kvm_timer_enable(vcpu); 556 ret = kvm_timer_enable(vcpu);
@@ -680,19 +691,30 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
680 kvm_vgic_flush_hwstate(vcpu); 691 kvm_vgic_flush_hwstate(vcpu);
681 692
682 /* 693 /*
683 * If we have a singal pending, or need to notify a userspace 694 * Exit if we have a signal pending so that we can deliver the
684 * irqchip about timer or PMU level changes, then we exit (and 695 * signal to user space.
685 * update the timer level state in kvm_timer_update_run
686 * below).
687 */ 696 */
688 if (signal_pending(current) || 697 if (signal_pending(current)) {
689 kvm_timer_should_notify_user(vcpu) ||
690 kvm_pmu_should_notify_user(vcpu)) {
691 ret = -EINTR; 698 ret = -EINTR;
692 run->exit_reason = KVM_EXIT_INTR; 699 run->exit_reason = KVM_EXIT_INTR;
693 } 700 }
694 701
695 /* 702 /*
703 * If we're using a userspace irqchip, then check if we need
704 * to tell a userspace irqchip about timer or PMU level
705 * changes and if so, exit to userspace (the actual level
706 * state gets updated in kvm_timer_update_run and
707 * kvm_pmu_update_run below).
708 */
709 if (static_branch_unlikely(&userspace_irqchip_in_use)) {
710 if (kvm_timer_should_notify_user(vcpu) ||
711 kvm_pmu_should_notify_user(vcpu)) {
712 ret = -EINTR;
713 run->exit_reason = KVM_EXIT_INTR;
714 }
715 }
716
717 /*
696 * Ensure we set mode to IN_GUEST_MODE after we disable 718 * Ensure we set mode to IN_GUEST_MODE after we disable
697 * interrupts and before the final VCPU requests check. 719 * interrupts and before the final VCPU requests check.
698 * See the comment in kvm_vcpu_exiting_guest_mode() and 720 * See the comment in kvm_vcpu_exiting_guest_mode() and
@@ -704,7 +726,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
704 kvm_request_pending(vcpu)) { 726 kvm_request_pending(vcpu)) {
705 vcpu->mode = OUTSIDE_GUEST_MODE; 727 vcpu->mode = OUTSIDE_GUEST_MODE;
706 kvm_pmu_sync_hwstate(vcpu); 728 kvm_pmu_sync_hwstate(vcpu);
707 kvm_timer_sync_hwstate(vcpu); 729 if (static_branch_unlikely(&userspace_irqchip_in_use))
730 kvm_timer_sync_hwstate(vcpu);
708 kvm_vgic_sync_hwstate(vcpu); 731 kvm_vgic_sync_hwstate(vcpu);
709 local_irq_enable(); 732 local_irq_enable();
710 preempt_enable(); 733 preempt_enable();
@@ -748,7 +771,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
748 * we don't want vtimer interrupts to race with syncing the 771 * we don't want vtimer interrupts to race with syncing the
749 * timer virtual interrupt state. 772 * timer virtual interrupt state.
750 */ 773 */
751 kvm_timer_sync_hwstate(vcpu); 774 if (static_branch_unlikely(&userspace_irqchip_in_use))
775 kvm_timer_sync_hwstate(vcpu);
752 776
753 /* 777 /*
754 * We may have taken a host interrupt in HYP mode (ie 778 * We may have taken a host interrupt in HYP mode (ie
@@ -1277,6 +1301,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1277 cpu_hyp_reset(); 1301 cpu_hyp_reset();
1278 1302
1279 return NOTIFY_OK; 1303 return NOTIFY_OK;
1304 case CPU_PM_ENTER_FAILED:
1280 case CPU_PM_EXIT: 1305 case CPU_PM_EXIT:
1281 if (__this_cpu_read(kvm_arm_hardware_enabled)) 1306 if (__this_cpu_read(kvm_arm_hardware_enabled))
1282 /* The hardware was enabled before suspend. */ 1307 /* The hardware was enabled before suspend. */
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index d7fd46fe9efb..4fe6e797e8b3 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -21,6 +21,7 @@
21 21
22#include <asm/kvm_emulate.h> 22#include <asm/kvm_emulate.h>
23#include <asm/kvm_hyp.h> 23#include <asm/kvm_hyp.h>
24#include <asm/kvm_mmu.h>
24 25
25static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) 26static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
26{ 27{
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b36945d49986..a1ea43fa75cf 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -926,6 +926,25 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
926 return 0; 926 return 0;
927} 927}
928 928
929static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
930{
931 pmd_t *pmdp;
932 pte_t *ptep;
933
934 pmdp = stage2_get_pmd(kvm, NULL, addr);
935 if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
936 return false;
937
938 if (pmd_thp_or_huge(*pmdp))
939 return kvm_s2pmd_exec(pmdp);
940
941 ptep = pte_offset_kernel(pmdp, addr);
942 if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
943 return false;
944
945 return kvm_s2pte_exec(ptep);
946}
947
929static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 948static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
930 phys_addr_t addr, const pte_t *new_pte, 949 phys_addr_t addr, const pte_t *new_pte,
931 unsigned long flags) 950 unsigned long flags)
@@ -1257,10 +1276,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1257 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); 1276 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1258} 1277}
1259 1278
1260static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, 1279static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
1261 unsigned long size)
1262{ 1280{
1263 __coherent_cache_guest_page(vcpu, pfn, size); 1281 __clean_dcache_guest_page(pfn, size);
1282}
1283
1284static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
1285{
1286 __invalidate_icache_guest_page(pfn, size);
1264} 1287}
1265 1288
1266static void kvm_send_hwpoison_signal(unsigned long address, 1289static void kvm_send_hwpoison_signal(unsigned long address,
@@ -1286,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1286 unsigned long fault_status) 1309 unsigned long fault_status)
1287{ 1310{
1288 int ret; 1311 int ret;
1289 bool write_fault, writable, hugetlb = false, force_pte = false; 1312 bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false;
1290 unsigned long mmu_seq; 1313 unsigned long mmu_seq;
1291 gfn_t gfn = fault_ipa >> PAGE_SHIFT; 1314 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
1292 struct kvm *kvm = vcpu->kvm; 1315 struct kvm *kvm = vcpu->kvm;
@@ -1298,7 +1321,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1298 unsigned long flags = 0; 1321 unsigned long flags = 0;
1299 1322
1300 write_fault = kvm_is_write_fault(vcpu); 1323 write_fault = kvm_is_write_fault(vcpu);
1301 if (fault_status == FSC_PERM && !write_fault) { 1324 exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
1325 VM_BUG_ON(write_fault && exec_fault);
1326
1327 if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
1302 kvm_err("Unexpected L2 read permission error\n"); 1328 kvm_err("Unexpected L2 read permission error\n");
1303 return -EFAULT; 1329 return -EFAULT;
1304 } 1330 }
@@ -1391,7 +1417,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1391 new_pmd = kvm_s2pmd_mkwrite(new_pmd); 1417 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
1392 kvm_set_pfn_dirty(pfn); 1418 kvm_set_pfn_dirty(pfn);
1393 } 1419 }
1394 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE); 1420
1421 if (fault_status != FSC_PERM)
1422 clean_dcache_guest_page(pfn, PMD_SIZE);
1423
1424 if (exec_fault) {
1425 new_pmd = kvm_s2pmd_mkexec(new_pmd);
1426 invalidate_icache_guest_page(pfn, PMD_SIZE);
1427 } else if (fault_status == FSC_PERM) {
1428 /* Preserve execute if XN was already cleared */
1429 if (stage2_is_exec(kvm, fault_ipa))
1430 new_pmd = kvm_s2pmd_mkexec(new_pmd);
1431 }
1432
1395 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1433 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1396 } else { 1434 } else {
1397 pte_t new_pte = pfn_pte(pfn, mem_type); 1435 pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1401,7 +1439,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1401 kvm_set_pfn_dirty(pfn); 1439 kvm_set_pfn_dirty(pfn);
1402 mark_page_dirty(kvm, gfn); 1440 mark_page_dirty(kvm, gfn);
1403 } 1441 }
1404 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE); 1442
1443 if (fault_status != FSC_PERM)
1444 clean_dcache_guest_page(pfn, PAGE_SIZE);
1445
1446 if (exec_fault) {
1447 new_pte = kvm_s2pte_mkexec(new_pte);
1448 invalidate_icache_guest_page(pfn, PAGE_SIZE);
1449 } else if (fault_status == FSC_PERM) {
1450 /* Preserve execute if XN was already cleared */
1451 if (stage2_is_exec(kvm, fault_ipa))
1452 new_pte = kvm_s2pte_mkexec(new_pte);
1453 }
1454
1405 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); 1455 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
1406 } 1456 }
1407 1457
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8e633bd9cc1e..465095355666 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1034,10 +1034,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
1034 1034
1035 device = vgic_its_alloc_device(its, device_id, itt_addr, 1035 device = vgic_its_alloc_device(its, device_id, itt_addr,
1036 num_eventid_bits); 1036 num_eventid_bits);
1037 if (IS_ERR(device))
1038 return PTR_ERR(device);
1039 1037
1040 return 0; 1038 return PTR_ERR_OR_ZERO(device);
1041} 1039}
1042 1040
1043/* 1041/*
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index deb51ee16a3d..83d82bd7dc4e 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -16,6 +16,7 @@
16#include <linux/kvm.h> 16#include <linux/kvm.h>
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include <kvm/iodev.h> 18#include <kvm/iodev.h>
19#include <kvm/arm_arch_timer.h>
19#include <kvm/arm_vgic.h> 20#include <kvm/arm_vgic.h>
20 21
21#include "vgic.h" 22#include "vgic.h"
@@ -122,10 +123,43 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
122 return value; 123 return value;
123} 124}
124 125
126/*
127 * This function will return the VCPU that performed the MMIO access and
128 * trapped from within the VM, and will return NULL if this is a userspace
129 * access.
130 *
131 * We can disable preemption locally around accessing the per-CPU variable,
132 * and use the resolved vcpu pointer after enabling preemption again, because
133 * even if the current thread is migrated to another CPU, reading the per-CPU
134 * value later will give us the same value as we update the per-CPU variable
135 * in the preempt notifier handlers.
136 */
137static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
138{
139 struct kvm_vcpu *vcpu;
140
141 preempt_disable();
142 vcpu = kvm_arm_get_running_vcpu();
143 preempt_enable();
144 return vcpu;
145}
146
147/* Must be called with irq->irq_lock held */
148static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
149 bool is_uaccess)
150{
151 if (is_uaccess)
152 return;
153
154 irq->pending_latch = true;
155 vgic_irq_set_phys_active(irq, true);
156}
157
125void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, 158void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
126 gpa_t addr, unsigned int len, 159 gpa_t addr, unsigned int len,
127 unsigned long val) 160 unsigned long val)
128{ 161{
162 bool is_uaccess = !vgic_get_mmio_requester_vcpu();
129 u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 163 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
130 int i; 164 int i;
131 unsigned long flags; 165 unsigned long flags;
@@ -134,17 +168,45 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
134 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 168 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
135 169
136 spin_lock_irqsave(&irq->irq_lock, flags); 170 spin_lock_irqsave(&irq->irq_lock, flags);
137 irq->pending_latch = true; 171 if (irq->hw)
138 172 vgic_hw_irq_spending(vcpu, irq, is_uaccess);
173 else
174 irq->pending_latch = true;
139 vgic_queue_irq_unlock(vcpu->kvm, irq, flags); 175 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
140 vgic_put_irq(vcpu->kvm, irq); 176 vgic_put_irq(vcpu->kvm, irq);
141 } 177 }
142} 178}
143 179
180/* Must be called with irq->irq_lock held */
181static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
182 bool is_uaccess)
183{
184 if (is_uaccess)
185 return;
186
187 irq->pending_latch = false;
188
189 /*
190 * We don't want the guest to effectively mask the physical
191 * interrupt by doing a write to SPENDR followed by a write to
192 * CPENDR for HW interrupts, so we clear the active state on
193 * the physical side if the virtual interrupt is not active.
194 * This may lead to taking an additional interrupt on the
195 * host, but that should not be a problem as the worst that
196 * can happen is an additional vgic injection. We also clear
197 * the pending state to maintain proper semantics for edge HW
198 * interrupts.
199 */
200 vgic_irq_set_phys_pending(irq, false);
201 if (!irq->active)
202 vgic_irq_set_phys_active(irq, false);
203}
204
144void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, 205void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
145 gpa_t addr, unsigned int len, 206 gpa_t addr, unsigned int len,
146 unsigned long val) 207 unsigned long val)
147{ 208{
209 bool is_uaccess = !vgic_get_mmio_requester_vcpu();
148 u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 210 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
149 int i; 211 int i;
150 unsigned long flags; 212 unsigned long flags;
@@ -154,7 +216,10 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
154 216
155 spin_lock_irqsave(&irq->irq_lock, flags); 217 spin_lock_irqsave(&irq->irq_lock, flags);
156 218
157 irq->pending_latch = false; 219 if (irq->hw)
220 vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
221 else
222 irq->pending_latch = false;
158 223
159 spin_unlock_irqrestore(&irq->irq_lock, flags); 224 spin_unlock_irqrestore(&irq->irq_lock, flags);
160 vgic_put_irq(vcpu->kvm, irq); 225 vgic_put_irq(vcpu->kvm, irq);
@@ -181,27 +246,24 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
181 return value; 246 return value;
182} 247}
183 248
249/* Must be called with irq->irq_lock held */
250static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
251 bool active, bool is_uaccess)
252{
253 if (is_uaccess)
254 return;
255
256 irq->active = active;
257 vgic_irq_set_phys_active(irq, active);
258}
259
184static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 260static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
185 bool new_active_state) 261 bool active)
186{ 262{
187 struct kvm_vcpu *requester_vcpu;
188 unsigned long flags; 263 unsigned long flags;
189 spin_lock_irqsave(&irq->irq_lock, flags); 264 struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
190 265
191 /* 266 spin_lock_irqsave(&irq->irq_lock, flags);
192 * The vcpu parameter here can mean multiple things depending on how
193 * this function is called; when handling a trap from the kernel it
194 * depends on the GIC version, and these functions are also called as
195 * part of save/restore from userspace.
196 *
197 * Therefore, we have to figure out the requester in a reliable way.
198 *
199 * When accessing VGIC state from user space, the requester_vcpu is
200 * NULL, which is fine, because we guarantee that no VCPUs are running
201 * when accessing VGIC state from user space so irq->vcpu->cpu is
202 * always -1.
203 */
204 requester_vcpu = kvm_arm_get_running_vcpu();
205 267
206 /* 268 /*
207 * If this virtual IRQ was written into a list register, we 269 * If this virtual IRQ was written into a list register, we
@@ -213,14 +275,23 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
213 * vgic_change_active_prepare) and still has to sync back this IRQ, 275 * vgic_change_active_prepare) and still has to sync back this IRQ,
214 * so we release and re-acquire the spin_lock to let the other thread 276 * so we release and re-acquire the spin_lock to let the other thread
215 * sync back the IRQ. 277 * sync back the IRQ.
278 *
279 * When accessing VGIC state from user space, requester_vcpu is
280 * NULL, which is fine, because we guarantee that no VCPUs are running
281 * when accessing VGIC state from user space so irq->vcpu->cpu is
282 * always -1.
216 */ 283 */
217 while (irq->vcpu && /* IRQ may have state in an LR somewhere */ 284 while (irq->vcpu && /* IRQ may have state in an LR somewhere */
218 irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ 285 irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
219 irq->vcpu->cpu != -1) /* VCPU thread is running */ 286 irq->vcpu->cpu != -1) /* VCPU thread is running */
220 cond_resched_lock(&irq->irq_lock); 287 cond_resched_lock(&irq->irq_lock);
221 288
222 irq->active = new_active_state; 289 if (irq->hw)
223 if (new_active_state) 290 vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
291 else
292 irq->active = active;
293
294 if (irq->active)
224 vgic_queue_irq_unlock(vcpu->kvm, irq, flags); 295 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
225 else 296 else
226 spin_unlock_irqrestore(&irq->irq_lock, flags); 297 spin_unlock_irqrestore(&irq->irq_lock, flags);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 80897102da26..c32d7b93ffd1 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -105,6 +105,26 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
105 irq->pending_latch = false; 105 irq->pending_latch = false;
106 } 106 }
107 107
108 /*
109 * Level-triggered mapped IRQs are special because we only
110 * observe rising edges as input to the VGIC.
111 *
112 * If the guest never acked the interrupt we have to sample
113 * the physical line and set the line level, because the
114 * device state could have changed or we simply need to
115 * process the still pending interrupt later.
116 *
117 * If this causes us to lower the level, we have to also clear
118 * the physical active state, since we will otherwise never be
119 * told when the interrupt becomes asserted again.
120 */
121 if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
122 irq->line_level = vgic_get_phys_line_level(irq);
123
124 if (!irq->line_level)
125 vgic_irq_set_phys_active(irq, false);
126 }
127
108 spin_unlock_irqrestore(&irq->irq_lock, flags); 128 spin_unlock_irqrestore(&irq->irq_lock, flags);
109 vgic_put_irq(vcpu->kvm, irq); 129 vgic_put_irq(vcpu->kvm, irq);
110 } 130 }
@@ -162,6 +182,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
162 val |= GICH_LR_EOI; 182 val |= GICH_LR_EOI;
163 } 183 }
164 184
185 /*
186 * Level-triggered mapped IRQs are special because we only observe
187 * rising edges as input to the VGIC. We therefore lower the line
188 * level here, so that we can take new virtual IRQs. See
189 * vgic_v2_fold_lr_state for more info.
190 */
191 if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT))
192 irq->line_level = false;
193
165 /* The GICv2 LR only holds five bits of priority. */ 194 /* The GICv2 LR only holds five bits of priority. */
166 val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; 195 val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
167 196
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index f47e8481fa45..6b329414e57a 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -96,6 +96,26 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
96 irq->pending_latch = false; 96 irq->pending_latch = false;
97 } 97 }
98 98
99 /*
100 * Level-triggered mapped IRQs are special because we only
101 * observe rising edges as input to the VGIC.
102 *
103 * If the guest never acked the interrupt we have to sample
104 * the physical line and set the line level, because the
105 * device state could have changed or we simply need to
106 * process the still pending interrupt later.
107 *
108 * If this causes us to lower the level, we have to also clear
109 * the physical active state, since we will otherwise never be
110 * told when the interrupt becomes asserted again.
111 */
112 if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
113 irq->line_level = vgic_get_phys_line_level(irq);
114
115 if (!irq->line_level)
116 vgic_irq_set_phys_active(irq, false);
117 }
118
99 spin_unlock_irqrestore(&irq->irq_lock, flags); 119 spin_unlock_irqrestore(&irq->irq_lock, flags);
100 vgic_put_irq(vcpu->kvm, irq); 120 vgic_put_irq(vcpu->kvm, irq);
101 } 121 }
@@ -146,6 +166,15 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
146 } 166 }
147 167
148 /* 168 /*
169 * Level-triggered mapped IRQs are special because we only observe
170 * rising edges as input to the VGIC. We therefore lower the line
171 * level here, so that we can take new virtual IRQs. See
172 * vgic_v3_fold_lr_state for more info.
173 */
174 if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
175 irq->line_level = false;
176
177 /*
149 * We currently only support Group1 interrupts, which is a 178 * We currently only support Group1 interrupts, which is a
150 * known defect. This needs to be addressed at some point. 179 * known defect. This needs to be addressed at some point.
151 */ 180 */
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index ecb8e25f5fe5..c7c5ef190afa 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -144,6 +144,38 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
144 kfree(irq); 144 kfree(irq);
145} 145}
146 146
147void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
148{
149 WARN_ON(irq_set_irqchip_state(irq->host_irq,
150 IRQCHIP_STATE_PENDING,
151 pending));
152}
153
154bool vgic_get_phys_line_level(struct vgic_irq *irq)
155{
156 bool line_level;
157
158 BUG_ON(!irq->hw);
159
160 if (irq->get_input_level)
161 return irq->get_input_level(irq->intid);
162
163 WARN_ON(irq_get_irqchip_state(irq->host_irq,
164 IRQCHIP_STATE_PENDING,
165 &line_level));
166 return line_level;
167}
168
169/* Set/Clear the physical active state */
170void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
171{
172
173 BUG_ON(!irq->hw);
174 WARN_ON(irq_set_irqchip_state(irq->host_irq,
175 IRQCHIP_STATE_ACTIVE,
176 active));
177}
178
147/** 179/**
148 * kvm_vgic_target_oracle - compute the target vcpu for an irq 180 * kvm_vgic_target_oracle - compute the target vcpu for an irq
149 * 181 *
@@ -413,7 +445,8 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
413 445
414/* @irq->irq_lock must be held */ 446/* @irq->irq_lock must be held */
415static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 447static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
416 unsigned int host_irq) 448 unsigned int host_irq,
449 bool (*get_input_level)(int vindid))
417{ 450{
418 struct irq_desc *desc; 451 struct irq_desc *desc;
419 struct irq_data *data; 452 struct irq_data *data;
@@ -433,6 +466,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
433 irq->hw = true; 466 irq->hw = true;
434 irq->host_irq = host_irq; 467 irq->host_irq = host_irq;
435 irq->hwintid = data->hwirq; 468 irq->hwintid = data->hwirq;
469 irq->get_input_level = get_input_level;
436 return 0; 470 return 0;
437} 471}
438 472
@@ -441,10 +475,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
441{ 475{
442 irq->hw = false; 476 irq->hw = false;
443 irq->hwintid = 0; 477 irq->hwintid = 0;
478 irq->get_input_level = NULL;
444} 479}
445 480
446int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, 481int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
447 u32 vintid) 482 u32 vintid, bool (*get_input_level)(int vindid))
448{ 483{
449 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 484 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
450 unsigned long flags; 485 unsigned long flags;
@@ -453,7 +488,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
453 BUG_ON(!irq); 488 BUG_ON(!irq);
454 489
455 spin_lock_irqsave(&irq->irq_lock, flags); 490 spin_lock_irqsave(&irq->irq_lock, flags);
456 ret = kvm_vgic_map_irq(vcpu, irq, host_irq); 491 ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
457 spin_unlock_irqrestore(&irq->irq_lock, flags); 492 spin_unlock_irqrestore(&irq->irq_lock, flags);
458 vgic_put_irq(vcpu->kvm, irq); 493 vgic_put_irq(vcpu->kvm, irq);
459 494
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index efbcf8f96f9c..12c37b89f7a3 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -104,6 +104,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
104 return irq->pending_latch || irq->line_level; 104 return irq->pending_latch || irq->line_level;
105} 105}
106 106
107static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
108{
109 return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
110}
111
107/* 112/*
108 * This struct provides an intermediate representation of the fields contained 113 * This struct provides an intermediate representation of the fields contained
109 * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC 114 * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
@@ -140,6 +145,9 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
140struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, 145struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
141 u32 intid); 146 u32 intid);
142void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); 147void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
148bool vgic_get_phys_line_level(struct vgic_irq *irq);
149void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
150void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
143bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, 151bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
144 unsigned long flags); 152 unsigned long flags);
145void vgic_kick_vcpus(struct kvm *kvm); 153void vgic_kick_vcpus(struct kvm *kvm);