aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-01-30 13:45:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-01-30 13:45:24 -0500
commit1f59fe76678b49fc406c6d7b63dec4abe60a0547 (patch)
treea96429e2a8471a49d885947bd74d931e659c3304
parentf3a3404162774770ded2ba1494a4c8ba27cd553e (diff)
parentdf04d1d191a5fea628981067e7cb7da33b246e89 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "The ARM changes are largish, but not too scary. And a simple fix for x86 (bug introduced in 3.19)" (Paolo sayus these are the "Final" fixes. We'll see). * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: check LAPIC presence when building apic_map arm/arm64: KVM: Use kernel mapping to perform invalidation on page fault arm/arm64: KVM: Invalidate data cache on unmap arm/arm64: KVM: Use set/way op trapping to track the state of the caches
-rw-r--r--arch/arm/include/asm/kvm_emulate.h10
-rw-r--r--arch/arm/include/asm/kvm_host.h3
-rw-r--r--arch/arm/include/asm/kvm_mmu.h77
-rw-r--r--arch/arm/kvm/arm.c10
-rw-r--r--arch/arm/kvm/coproc.c70
-rw-r--r--arch/arm/kvm/coproc.h6
-rw-r--r--arch/arm/kvm/coproc_a15.c2
-rw-r--r--arch/arm/kvm/coproc_a7.c2
-rw-r--r--arch/arm/kvm/mmu.c164
-rw-r--r--arch/arm/kvm/trace.h39
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h10
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h34
-rw-r--r--arch/arm64/kvm/sys_regs.c75
-rw-r--r--arch/x86/kvm/lapic.c3
15 files changed, 330 insertions, 178 deletions
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 66ce17655bb9..7b0152321b20 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -38,6 +38,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
38 vcpu->arch.hcr = HCR_GUEST_MASK; 38 vcpu->arch.hcr = HCR_GUEST_MASK;
39} 39}
40 40
41static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
42{
43 return vcpu->arch.hcr;
44}
45
46static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
47{
48 vcpu->arch.hcr = hcr;
49}
50
41static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) 51static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
42{ 52{
43 return 1; 53 return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 254e0650e48b..04b4ea0b550a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -125,9 +125,6 @@ struct kvm_vcpu_arch {
125 * Anything that is not used directly from assembly code goes 125 * Anything that is not used directly from assembly code goes
126 * here. 126 * here.
127 */ 127 */
128 /* dcache set/way operation pending */
129 int last_pcpu;
130 cpumask_t require_dcache_flush;
131 128
132 /* Don't run the guest on this vcpu */ 129 /* Don't run the guest on this vcpu */
133 bool pause; 130 bool pause;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 63e0ecc04901..1bca8f8af442 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -44,6 +44,7 @@
44 44
45#ifndef __ASSEMBLY__ 45#ifndef __ASSEMBLY__
46 46
47#include <linux/highmem.h>
47#include <asm/cacheflush.h> 48#include <asm/cacheflush.h>
48#include <asm/pgalloc.h> 49#include <asm/pgalloc.h>
49 50
@@ -161,13 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
161 return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; 162 return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
162} 163}
163 164
164static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 165static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
165 unsigned long size, 166 unsigned long size,
166 bool ipa_uncached) 167 bool ipa_uncached)
167{ 168{
168 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
169 kvm_flush_dcache_to_poc((void *)hva, size);
170
171 /* 169 /*
172 * If we are going to insert an instruction page and the icache is 170 * If we are going to insert an instruction page and the icache is
173 * either VIPT or PIPT, there is a potential problem where the host 171 * either VIPT or PIPT, there is a potential problem where the host
@@ -179,18 +177,77 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
179 * 177 *
180 * VIVT caches are tagged using both the ASID and the VMID and doesn't 178 * VIVT caches are tagged using both the ASID and the VMID and doesn't
181 * need any kind of flushing (DDI 0406C.b - Page B3-1392). 179 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
180 *
181 * We need to do this through a kernel mapping (using the
182 * user-space mapping has proved to be the wrong
183 * solution). For that, we need to kmap one page at a time,
184 * and iterate over the range.
182 */ 185 */
183 if (icache_is_pipt()) { 186
184 __cpuc_coherent_user_range(hva, hva + size); 187 bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
185 } else if (!icache_is_vivt_asid_tagged()) { 188
189 VM_BUG_ON(size & PAGE_MASK);
190
191 if (!need_flush && !icache_is_pipt())
192 goto vipt_cache;
193
194 while (size) {
195 void *va = kmap_atomic_pfn(pfn);
196
197 if (need_flush)
198 kvm_flush_dcache_to_poc(va, PAGE_SIZE);
199
200 if (icache_is_pipt())
201 __cpuc_coherent_user_range((unsigned long)va,
202 (unsigned long)va + PAGE_SIZE);
203
204 size -= PAGE_SIZE;
205 pfn++;
206
207 kunmap_atomic(va);
208 }
209
210vipt_cache:
211 if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
186 /* any kind of VIPT cache */ 212 /* any kind of VIPT cache */
187 __flush_icache_all(); 213 __flush_icache_all();
188 } 214 }
189} 215}
190 216
217static inline void __kvm_flush_dcache_pte(pte_t pte)
218{
219 void *va = kmap_atomic(pte_page(pte));
220
221 kvm_flush_dcache_to_poc(va, PAGE_SIZE);
222
223 kunmap_atomic(va);
224}
225
226static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
227{
228 unsigned long size = PMD_SIZE;
229 pfn_t pfn = pmd_pfn(pmd);
230
231 while (size) {
232 void *va = kmap_atomic_pfn(pfn);
233
234 kvm_flush_dcache_to_poc(va, PAGE_SIZE);
235
236 pfn++;
237 size -= PAGE_SIZE;
238
239 kunmap_atomic(va);
240 }
241}
242
243static inline void __kvm_flush_dcache_pud(pud_t pud)
244{
245}
246
191#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) 247#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
192 248
193void stage2_flush_vm(struct kvm *kvm); 249void kvm_set_way_flush(struct kvm_vcpu *vcpu);
250void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
194 251
195#endif /* !__ASSEMBLY__ */ 252#endif /* !__ASSEMBLY__ */
196 253
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2d6d91001062..0b0d58a905c4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -281,15 +281,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
281 vcpu->cpu = cpu; 281 vcpu->cpu = cpu;
282 vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); 282 vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
283 283
284 /*
285 * Check whether this vcpu requires the cache to be flushed on
286 * this physical CPU. This is a consequence of doing dcache
287 * operations by set/way on this vcpu. We do it here to be in
288 * a non-preemptible section.
289 */
290 if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
291 flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
292
293 kvm_arm_set_running_vcpu(vcpu); 284 kvm_arm_set_running_vcpu(vcpu);
294} 285}
295 286
@@ -541,7 +532,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
541 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); 532 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
542 533
543 vcpu->mode = OUTSIDE_GUEST_MODE; 534 vcpu->mode = OUTSIDE_GUEST_MODE;
544 vcpu->arch.last_pcpu = smp_processor_id();
545 kvm_guest_exit(); 535 kvm_guest_exit();
546 trace_kvm_exit(*vcpu_pc(vcpu)); 536 trace_kvm_exit(*vcpu_pc(vcpu));
547 /* 537 /*
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 7928dbdf2102..f3d88dc388bc 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -189,82 +189,40 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
189 return true; 189 return true;
190} 190}
191 191
192/* See note at ARM ARM B1.14.4 */ 192/*
193 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
194 */
193static bool access_dcsw(struct kvm_vcpu *vcpu, 195static bool access_dcsw(struct kvm_vcpu *vcpu,
194 const struct coproc_params *p, 196 const struct coproc_params *p,
195 const struct coproc_reg *r) 197 const struct coproc_reg *r)
196{ 198{
197 unsigned long val;
198 int cpu;
199
200 if (!p->is_write) 199 if (!p->is_write)
201 return read_from_write_only(vcpu, p); 200 return read_from_write_only(vcpu, p);
202 201
203 cpu = get_cpu(); 202 kvm_set_way_flush(vcpu);
204
205 cpumask_setall(&vcpu->arch.require_dcache_flush);
206 cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
207
208 /* If we were already preempted, take the long way around */
209 if (cpu != vcpu->arch.last_pcpu) {
210 flush_cache_all();
211 goto done;
212 }
213
214 val = *vcpu_reg(vcpu, p->Rt1);
215
216 switch (p->CRm) {
217 case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
218 case 14: /* DCCISW */
219 asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
220 break;
221
222 case 10: /* DCCSW */
223 asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
224 break;
225 }
226
227done:
228 put_cpu();
229
230 return true; 203 return true;
231} 204}
232 205
233/* 206/*
234 * Generic accessor for VM registers. Only called as long as HCR_TVM 207 * Generic accessor for VM registers. Only called as long as HCR_TVM
235 * is set. 208 * is set. If the guest enables the MMU, we stop trapping the VM
209 * sys_regs and leave it in complete control of the caches.
210 *
211 * Used by the cpu-specific code.
236 */ 212 */
237static bool access_vm_reg(struct kvm_vcpu *vcpu, 213bool access_vm_reg(struct kvm_vcpu *vcpu,
238 const struct coproc_params *p, 214 const struct coproc_params *p,
239 const struct coproc_reg *r) 215 const struct coproc_reg *r)
240{ 216{
217 bool was_enabled = vcpu_has_cache_enabled(vcpu);
218
241 BUG_ON(!p->is_write); 219 BUG_ON(!p->is_write);
242 220
243 vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); 221 vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
244 if (p->is_64bit) 222 if (p->is_64bit)
245 vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); 223 vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
246 224
247 return true; 225 kvm_toggle_cache(vcpu, was_enabled);
248}
249
250/*
251 * SCTLR accessor. Only called as long as HCR_TVM is set. If the
252 * guest enables the MMU, we stop trapping the VM sys_regs and leave
253 * it in complete control of the caches.
254 *
255 * Used by the cpu-specific code.
256 */
257bool access_sctlr(struct kvm_vcpu *vcpu,
258 const struct coproc_params *p,
259 const struct coproc_reg *r)
260{
261 access_vm_reg(vcpu, p, r);
262
263 if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
264 vcpu->arch.hcr &= ~HCR_TVM;
265 stage2_flush_vm(vcpu->kvm);
266 }
267
268 return true; 226 return true;
269} 227}
270 228
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 1a44bbe39643..88d24a3a9778 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -153,8 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
153#define is64 .is_64 = true 153#define is64 .is_64 = true
154#define is32 .is_64 = false 154#define is32 .is_64 = false
155 155
156bool access_sctlr(struct kvm_vcpu *vcpu, 156bool access_vm_reg(struct kvm_vcpu *vcpu,
157 const struct coproc_params *p, 157 const struct coproc_params *p,
158 const struct coproc_reg *r); 158 const struct coproc_reg *r);
159 159
160#endif /* __ARM_KVM_COPROC_LOCAL_H__ */ 160#endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index e6f4ae48bda9..a7136757d373 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
34static const struct coproc_reg a15_regs[] = { 34static const struct coproc_reg a15_regs[] = {
35 /* SCTLR: swapped by interrupt.S. */ 35 /* SCTLR: swapped by interrupt.S. */
36 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 36 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
37 access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, 37 access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 },
38}; 38};
39 39
40static struct kvm_coproc_target_table a15_target_table = { 40static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 17fc7cd479d3..b19e46d1b2c0 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
37static const struct coproc_reg a7_regs[] = { 37static const struct coproc_reg a7_regs[] = {
38 /* SCTLR: swapped by interrupt.S. */ 38 /* SCTLR: swapped by interrupt.S. */
39 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 39 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
40 access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, 40 access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 },
41}; 41};
42 42
43static struct kvm_coproc_target_table a7_target_table = { 43static struct kvm_coproc_target_table a7_target_table = {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1dc9778a00af..136662547ca6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
58 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 58 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
59} 59}
60 60
61/*
62 * D-Cache management functions. They take the page table entries by
63 * value, as they are flushing the cache using the kernel mapping (or
64 * kmap on 32bit).
65 */
66static void kvm_flush_dcache_pte(pte_t pte)
67{
68 __kvm_flush_dcache_pte(pte);
69}
70
71static void kvm_flush_dcache_pmd(pmd_t pmd)
72{
73 __kvm_flush_dcache_pmd(pmd);
74}
75
76static void kvm_flush_dcache_pud(pud_t pud)
77{
78 __kvm_flush_dcache_pud(pud);
79}
80
61static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 81static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
62 int min, int max) 82 int min, int max)
63{ 83{
@@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
119 put_page(virt_to_page(pmd)); 139 put_page(virt_to_page(pmd));
120} 140}
121 141
142/*
143 * Unmapping vs dcache management:
144 *
145 * If a guest maps certain memory pages as uncached, all writes will
146 * bypass the data cache and go directly to RAM. However, the CPUs
147 * can still speculate reads (not writes) and fill cache lines with
148 * data.
149 *
150 * Those cache lines will be *clean* cache lines though, so a
151 * clean+invalidate operation is equivalent to an invalidate
152 * operation, because no cache lines are marked dirty.
153 *
154 * Those clean cache lines could be filled prior to an uncached write
155 * by the guest, and the cache coherent IO subsystem would therefore
156 * end up writing old data to disk.
157 *
158 * This is why right after unmapping a page/section and invalidating
159 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
160 * the IO subsystem will never hit in the cache.
161 */
122static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 162static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
123 phys_addr_t addr, phys_addr_t end) 163 phys_addr_t addr, phys_addr_t end)
124{ 164{
@@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
128 start_pte = pte = pte_offset_kernel(pmd, addr); 168 start_pte = pte = pte_offset_kernel(pmd, addr);
129 do { 169 do {
130 if (!pte_none(*pte)) { 170 if (!pte_none(*pte)) {
171 pte_t old_pte = *pte;
172
131 kvm_set_pte(pte, __pte(0)); 173 kvm_set_pte(pte, __pte(0));
132 put_page(virt_to_page(pte));
133 kvm_tlb_flush_vmid_ipa(kvm, addr); 174 kvm_tlb_flush_vmid_ipa(kvm, addr);
175
176 /* No need to invalidate the cache for device mappings */
177 if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
178 kvm_flush_dcache_pte(old_pte);
179
180 put_page(virt_to_page(pte));
134 } 181 }
135 } while (pte++, addr += PAGE_SIZE, addr != end); 182 } while (pte++, addr += PAGE_SIZE, addr != end);
136 183
@@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
149 next = kvm_pmd_addr_end(addr, end); 196 next = kvm_pmd_addr_end(addr, end);
150 if (!pmd_none(*pmd)) { 197 if (!pmd_none(*pmd)) {
151 if (kvm_pmd_huge(*pmd)) { 198 if (kvm_pmd_huge(*pmd)) {
199 pmd_t old_pmd = *pmd;
200
152 pmd_clear(pmd); 201 pmd_clear(pmd);
153 kvm_tlb_flush_vmid_ipa(kvm, addr); 202 kvm_tlb_flush_vmid_ipa(kvm, addr);
203
204 kvm_flush_dcache_pmd(old_pmd);
205
154 put_page(virt_to_page(pmd)); 206 put_page(virt_to_page(pmd));
155 } else { 207 } else {
156 unmap_ptes(kvm, pmd, addr, next); 208 unmap_ptes(kvm, pmd, addr, next);
@@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
173 next = kvm_pud_addr_end(addr, end); 225 next = kvm_pud_addr_end(addr, end);
174 if (!pud_none(*pud)) { 226 if (!pud_none(*pud)) {
175 if (pud_huge(*pud)) { 227 if (pud_huge(*pud)) {
228 pud_t old_pud = *pud;
229
176 pud_clear(pud); 230 pud_clear(pud);
177 kvm_tlb_flush_vmid_ipa(kvm, addr); 231 kvm_tlb_flush_vmid_ipa(kvm, addr);
232
233 kvm_flush_dcache_pud(old_pud);
234
178 put_page(virt_to_page(pud)); 235 put_page(virt_to_page(pud));
179 } else { 236 } else {
180 unmap_pmds(kvm, pud, addr, next); 237 unmap_pmds(kvm, pud, addr, next);
@@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
209 266
210 pte = pte_offset_kernel(pmd, addr); 267 pte = pte_offset_kernel(pmd, addr);
211 do { 268 do {
212 if (!pte_none(*pte)) { 269 if (!pte_none(*pte) &&
213 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 270 (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
214 kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); 271 kvm_flush_dcache_pte(*pte);
215 }
216 } while (pte++, addr += PAGE_SIZE, addr != end); 272 } while (pte++, addr += PAGE_SIZE, addr != end);
217} 273}
218 274
@@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
226 do { 282 do {
227 next = kvm_pmd_addr_end(addr, end); 283 next = kvm_pmd_addr_end(addr, end);
228 if (!pmd_none(*pmd)) { 284 if (!pmd_none(*pmd)) {
229 if (kvm_pmd_huge(*pmd)) { 285 if (kvm_pmd_huge(*pmd))
230 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 286 kvm_flush_dcache_pmd(*pmd);
231 kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); 287 else
232 } else {
233 stage2_flush_ptes(kvm, pmd, addr, next); 288 stage2_flush_ptes(kvm, pmd, addr, next);
234 }
235 } 289 }
236 } while (pmd++, addr = next, addr != end); 290 } while (pmd++, addr = next, addr != end);
237} 291}
@@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
246 do { 300 do {
247 next = kvm_pud_addr_end(addr, end); 301 next = kvm_pud_addr_end(addr, end);
248 if (!pud_none(*pud)) { 302 if (!pud_none(*pud)) {
249 if (pud_huge(*pud)) { 303 if (pud_huge(*pud))
250 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 304 kvm_flush_dcache_pud(*pud);
251 kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); 305 else
252 } else {
253 stage2_flush_pmds(kvm, pud, addr, next); 306 stage2_flush_pmds(kvm, pud, addr, next);
254 }
255 } 307 }
256 } while (pud++, addr = next, addr != end); 308 } while (pud++, addr = next, addr != end);
257} 309}
@@ -278,7 +330,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
278 * Go through the stage 2 page tables and invalidate any cache lines 330 * Go through the stage 2 page tables and invalidate any cache lines
279 * backing memory already mapped to the VM. 331 * backing memory already mapped to the VM.
280 */ 332 */
281void stage2_flush_vm(struct kvm *kvm) 333static void stage2_flush_vm(struct kvm *kvm)
282{ 334{
283 struct kvm_memslots *slots; 335 struct kvm_memslots *slots;
284 struct kvm_memory_slot *memslot; 336 struct kvm_memory_slot *memslot;
@@ -905,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
905 return !pfn_valid(pfn); 957 return !pfn_valid(pfn);
906} 958}
907 959
960static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
961 unsigned long size, bool uncached)
962{
963 __coherent_cache_guest_page(vcpu, pfn, size, uncached);
964}
965
908static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 966static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
909 struct kvm_memory_slot *memslot, unsigned long hva, 967 struct kvm_memory_slot *memslot, unsigned long hva,
910 unsigned long fault_status) 968 unsigned long fault_status)
@@ -994,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
994 kvm_set_s2pmd_writable(&new_pmd); 1052 kvm_set_s2pmd_writable(&new_pmd);
995 kvm_set_pfn_dirty(pfn); 1053 kvm_set_pfn_dirty(pfn);
996 } 1054 }
997 coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, 1055 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
998 fault_ipa_uncached);
999 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1056 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1000 } else { 1057 } else {
1001 pte_t new_pte = pfn_pte(pfn, mem_type); 1058 pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1003,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1003 kvm_set_s2pte_writable(&new_pte); 1060 kvm_set_s2pte_writable(&new_pte);
1004 kvm_set_pfn_dirty(pfn); 1061 kvm_set_pfn_dirty(pfn);
1005 } 1062 }
1006 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, 1063 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
1007 fault_ipa_uncached);
1008 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 1064 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
1009 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); 1065 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
1010 } 1066 }
@@ -1411,3 +1467,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1411 unmap_stage2_range(kvm, gpa, size); 1467 unmap_stage2_range(kvm, gpa, size);
1412 spin_unlock(&kvm->mmu_lock); 1468 spin_unlock(&kvm->mmu_lock);
1413} 1469}
1470
1471/*
1472 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
1473 *
1474 * Main problems:
1475 * - S/W ops are local to a CPU (not broadcast)
1476 * - We have line migration behind our back (speculation)
1477 * - System caches don't support S/W at all (damn!)
1478 *
1479 * In the face of the above, the best we can do is to try and convert
1480 * S/W ops to VA ops. Because the guest is not allowed to infer the
1481 * S/W to PA mapping, it can only use S/W to nuke the whole cache,
1482 * which is a rather good thing for us.
1483 *
1484 * Also, it is only used when turning caches on/off ("The expected
1485 * usage of the cache maintenance instructions that operate by set/way
1486 * is associated with the cache maintenance instructions associated
1487 * with the powerdown and powerup of caches, if this is required by
1488 * the implementation.").
1489 *
1490 * We use the following policy:
1491 *
1492 * - If we trap a S/W operation, we enable VM trapping to detect
1493 * caches being turned on/off, and do a full clean.
1494 *
1495 * - We flush the caches on both caches being turned on and off.
1496 *
1497 * - Once the caches are enabled, we stop trapping VM ops.
1498 */
1499void kvm_set_way_flush(struct kvm_vcpu *vcpu)
1500{
1501 unsigned long hcr = vcpu_get_hcr(vcpu);
1502
1503 /*
1504 * If this is the first time we do a S/W operation
1505 * (i.e. HCR_TVM not set) flush the whole memory, and set the
1506 * VM trapping.
1507 *
1508 * Otherwise, rely on the VM trapping to wait for the MMU +
1509 * Caches to be turned off. At that point, we'll be able to
1510 * clean the caches again.
1511 */
1512 if (!(hcr & HCR_TVM)) {
1513 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
1514 vcpu_has_cache_enabled(vcpu));
1515 stage2_flush_vm(vcpu->kvm);
1516 vcpu_set_hcr(vcpu, hcr | HCR_TVM);
1517 }
1518}
1519
1520void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
1521{
1522 bool now_enabled = vcpu_has_cache_enabled(vcpu);
1523
1524 /*
1525 * If switching the MMU+caches on, need to invalidate the caches.
1526 * If switching it off, need to clean the caches.
1527 * Clean + invalidate does the trick always.
1528 */
1529 if (now_enabled != was_enabled)
1530 stage2_flush_vm(vcpu->kvm);
1531
1532 /* Caches are now on, stop trapping VM ops (until a S/W op) */
1533 if (now_enabled)
1534 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
1535
1536 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
1537}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index b1d640f78623..b6a6e7102201 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -223,6 +223,45 @@ TRACE_EVENT(kvm_hvc,
223 __entry->vcpu_pc, __entry->r0, __entry->imm) 223 __entry->vcpu_pc, __entry->r0, __entry->imm)
224); 224);
225 225
226TRACE_EVENT(kvm_set_way_flush,
227 TP_PROTO(unsigned long vcpu_pc, bool cache),
228 TP_ARGS(vcpu_pc, cache),
229
230 TP_STRUCT__entry(
231 __field( unsigned long, vcpu_pc )
232 __field( bool, cache )
233 ),
234
235 TP_fast_assign(
236 __entry->vcpu_pc = vcpu_pc;
237 __entry->cache = cache;
238 ),
239
240 TP_printk("S/W flush at 0x%016lx (cache %s)",
241 __entry->vcpu_pc, __entry->cache ? "on" : "off")
242);
243
244TRACE_EVENT(kvm_toggle_cache,
245 TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
246 TP_ARGS(vcpu_pc, was, now),
247
248 TP_STRUCT__entry(
249 __field( unsigned long, vcpu_pc )
250 __field( bool, was )
251 __field( bool, now )
252 ),
253
254 TP_fast_assign(
255 __entry->vcpu_pc = vcpu_pc;
256 __entry->was = was;
257 __entry->now = now;
258 ),
259
260 TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
261 __entry->vcpu_pc, __entry->was ? "on" : "off",
262 __entry->now ? "on" : "off")
263);
264
226#endif /* _TRACE_KVM_H */ 265#endif /* _TRACE_KVM_H */
227 266
228#undef TRACE_INCLUDE_PATH 267#undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 865a7e28ea2d..3cb4c856b10d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -45,6 +45,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
45 vcpu->arch.hcr_el2 &= ~HCR_RW; 45 vcpu->arch.hcr_el2 &= ~HCR_RW;
46} 46}
47 47
48static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
49{
50 return vcpu->arch.hcr_el2;
51}
52
53static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
54{
55 vcpu->arch.hcr_el2 = hcr;
56}
57
48static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) 58static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
49{ 59{
50 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; 60 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0b7dfdb931df..acd101a9014d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -116,9 +116,6 @@ struct kvm_vcpu_arch {
116 * Anything that is not used directly from assembly code goes 116 * Anything that is not used directly from assembly code goes
117 * here. 117 * here.
118 */ 118 */
119 /* dcache set/way operation pending */
120 int last_pcpu;
121 cpumask_t require_dcache_flush;
122 119
123 /* Don't run the guest */ 120 /* Don't run the guest */
124 bool pause; 121 bool pause;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 14a74f136272..adcf49547301 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -243,24 +243,46 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
243 return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; 243 return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
244} 244}
245 245
246static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 246static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
247 unsigned long size, 247 unsigned long size,
248 bool ipa_uncached) 248 bool ipa_uncached)
249{ 249{
250 void *va = page_address(pfn_to_page(pfn));
251
250 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) 252 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
251 kvm_flush_dcache_to_poc((void *)hva, size); 253 kvm_flush_dcache_to_poc(va, size);
252 254
253 if (!icache_is_aliasing()) { /* PIPT */ 255 if (!icache_is_aliasing()) { /* PIPT */
254 flush_icache_range(hva, hva + size); 256 flush_icache_range((unsigned long)va,
257 (unsigned long)va + size);
255 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ 258 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
256 /* any kind of VIPT cache */ 259 /* any kind of VIPT cache */
257 __flush_icache_all(); 260 __flush_icache_all();
258 } 261 }
259} 262}
260 263
264static inline void __kvm_flush_dcache_pte(pte_t pte)
265{
266 struct page *page = pte_page(pte);
267 kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
268}
269
270static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
271{
272 struct page *page = pmd_page(pmd);
273 kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
274}
275
276static inline void __kvm_flush_dcache_pud(pud_t pud)
277{
278 struct page *page = pud_page(pud);
279 kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
280}
281
261#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) 282#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
262 283
263void stage2_flush_vm(struct kvm *kvm); 284void kvm_set_way_flush(struct kvm_vcpu *vcpu);
285void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
264 286
265#endif /* __ASSEMBLY__ */ 287#endif /* __ASSEMBLY__ */
266#endif /* __ARM64_KVM_MMU_H__ */ 288#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3d7c2df89946..f31e8bb2bc5b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -69,68 +69,31 @@ static u32 get_ccsidr(u32 csselr)
69 return ccsidr; 69 return ccsidr;
70} 70}
71 71
72static void do_dc_cisw(u32 val) 72/*
73{ 73 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
74 asm volatile("dc cisw, %x0" : : "r" (val)); 74 */
75 dsb(ish);
76}
77
78static void do_dc_csw(u32 val)
79{
80 asm volatile("dc csw, %x0" : : "r" (val));
81 dsb(ish);
82}
83
84/* See note at ARM ARM B1.14.4 */
85static bool access_dcsw(struct kvm_vcpu *vcpu, 75static bool access_dcsw(struct kvm_vcpu *vcpu,
86 const struct sys_reg_params *p, 76 const struct sys_reg_params *p,
87 const struct sys_reg_desc *r) 77 const struct sys_reg_desc *r)
88{ 78{
89 unsigned long val;
90 int cpu;
91
92 if (!p->is_write) 79 if (!p->is_write)
93 return read_from_write_only(vcpu, p); 80 return read_from_write_only(vcpu, p);
94 81
95 cpu = get_cpu(); 82 kvm_set_way_flush(vcpu);
96
97 cpumask_setall(&vcpu->arch.require_dcache_flush);
98 cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
99
100 /* If we were already preempted, take the long way around */
101 if (cpu != vcpu->arch.last_pcpu) {
102 flush_cache_all();
103 goto done;
104 }
105
106 val = *vcpu_reg(vcpu, p->Rt);
107
108 switch (p->CRm) {
109 case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
110 case 14: /* DCCISW */
111 do_dc_cisw(val);
112 break;
113
114 case 10: /* DCCSW */
115 do_dc_csw(val);
116 break;
117 }
118
119done:
120 put_cpu();
121
122 return true; 83 return true;
123} 84}
124 85
125/* 86/*
126 * Generic accessor for VM registers. Only called as long as HCR_TVM 87 * Generic accessor for VM registers. Only called as long as HCR_TVM
127 * is set. 88 * is set. If the guest enables the MMU, we stop trapping the VM
89 * sys_regs and leave it in complete control of the caches.
128 */ 90 */
129static bool access_vm_reg(struct kvm_vcpu *vcpu, 91static bool access_vm_reg(struct kvm_vcpu *vcpu,
130 const struct sys_reg_params *p, 92 const struct sys_reg_params *p,
131 const struct sys_reg_desc *r) 93 const struct sys_reg_desc *r)
132{ 94{
133 unsigned long val; 95 unsigned long val;
96 bool was_enabled = vcpu_has_cache_enabled(vcpu);
134 97
135 BUG_ON(!p->is_write); 98 BUG_ON(!p->is_write);
136 99
@@ -143,25 +106,7 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
143 vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; 106 vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
144 } 107 }
145 108
146 return true; 109 kvm_toggle_cache(vcpu, was_enabled);
147}
148
149/*
150 * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the
151 * guest enables the MMU, we stop trapping the VM sys_regs and leave
152 * it in complete control of the caches.
153 */
154static bool access_sctlr(struct kvm_vcpu *vcpu,
155 const struct sys_reg_params *p,
156 const struct sys_reg_desc *r)
157{
158 access_vm_reg(vcpu, p, r);
159
160 if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
161 vcpu->arch.hcr_el2 &= ~HCR_TVM;
162 stage2_flush_vm(vcpu->kvm);
163 }
164
165 return true; 110 return true;
166} 111}
167 112
@@ -377,7 +322,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
377 NULL, reset_mpidr, MPIDR_EL1 }, 322 NULL, reset_mpidr, MPIDR_EL1 },
378 /* SCTLR_EL1 */ 323 /* SCTLR_EL1 */
379 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), 324 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
380 access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, 325 access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
381 /* CPACR_EL1 */ 326 /* CPACR_EL1 */
382 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), 327 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
383 NULL, reset_val, CPACR_EL1, 0 }, 328 NULL, reset_val, CPACR_EL1, 0 },
@@ -657,7 +602,7 @@ static const struct sys_reg_desc cp14_64_regs[] = {
657 * register). 602 * register).
658 */ 603 */
659static const struct sys_reg_desc cp15_regs[] = { 604static const struct sys_reg_desc cp15_regs[] = {
660 { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, 605 { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
661 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, 606 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
662 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, 607 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
663 { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, 608 { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4f0c0b954686..d52dcf0776ea 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -192,6 +192,9 @@ static void recalculate_apic_map(struct kvm *kvm)
192 u16 cid, lid; 192 u16 cid, lid;
193 u32 ldr, aid; 193 u32 ldr, aid;
194 194
195 if (!kvm_apic_present(vcpu))
196 continue;
197
195 aid = kvm_apic_id(apic); 198 aid = kvm_apic_id(apic);
196 ldr = kvm_apic_get_reg(apic, APIC_LDR); 199 ldr = kvm_apic_get_reg(apic, APIC_LDR);
197 cid = apic_cluster_id(new, ldr); 200 cid = apic_cluster_id(new, ldr);