aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm/mmu.c')
-rw-r--r--arch/arm/kvm/mmu.c164
1 files changed, 144 insertions, 20 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1dc9778a00af..136662547ca6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
58 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 58 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
59} 59}
60 60
61/*
62 * D-Cache management functions. They take the page table entries by
63 * value, as they are flushing the cache using the kernel mapping (or
64 * kmap on 32bit).
65 */
66static void kvm_flush_dcache_pte(pte_t pte)
67{
68 __kvm_flush_dcache_pte(pte);
69}
70
71static void kvm_flush_dcache_pmd(pmd_t pmd)
72{
73 __kvm_flush_dcache_pmd(pmd);
74}
75
76static void kvm_flush_dcache_pud(pud_t pud)
77{
78 __kvm_flush_dcache_pud(pud);
79}
80
61static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 81static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
62 int min, int max) 82 int min, int max)
63{ 83{
@@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
119 put_page(virt_to_page(pmd)); 139 put_page(virt_to_page(pmd));
120} 140}
121 141
142/*
143 * Unmapping vs dcache management:
144 *
145 * If a guest maps certain memory pages as uncached, all writes will
146 * bypass the data cache and go directly to RAM. However, the CPUs
147 * can still speculate reads (not writes) and fill cache lines with
148 * data.
149 *
150 * Those cache lines will be *clean* cache lines though, so a
151 * clean+invalidate operation is equivalent to an invalidate
152 * operation, because no cache lines are marked dirty.
153 *
154 * Those clean cache lines could be filled prior to an uncached write
155 * by the guest, and the cache coherent IO subsystem would therefore
156 * end up writing old data to disk.
157 *
158 * This is why right after unmapping a page/section and invalidating
159 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
160 * the IO subsystem will never hit in the cache.
161 */
122static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 162static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
123 phys_addr_t addr, phys_addr_t end) 163 phys_addr_t addr, phys_addr_t end)
124{ 164{
@@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
128 start_pte = pte = pte_offset_kernel(pmd, addr); 168 start_pte = pte = pte_offset_kernel(pmd, addr);
129 do { 169 do {
130 if (!pte_none(*pte)) { 170 if (!pte_none(*pte)) {
171 pte_t old_pte = *pte;
172
131 kvm_set_pte(pte, __pte(0)); 173 kvm_set_pte(pte, __pte(0));
132 put_page(virt_to_page(pte));
133 kvm_tlb_flush_vmid_ipa(kvm, addr); 174 kvm_tlb_flush_vmid_ipa(kvm, addr);
175
176 /* No need to invalidate the cache for device mappings */
177 if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
178 kvm_flush_dcache_pte(old_pte);
179
180 put_page(virt_to_page(pte));
134 } 181 }
135 } while (pte++, addr += PAGE_SIZE, addr != end); 182 } while (pte++, addr += PAGE_SIZE, addr != end);
136 183
@@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
149 next = kvm_pmd_addr_end(addr, end); 196 next = kvm_pmd_addr_end(addr, end);
150 if (!pmd_none(*pmd)) { 197 if (!pmd_none(*pmd)) {
151 if (kvm_pmd_huge(*pmd)) { 198 if (kvm_pmd_huge(*pmd)) {
199 pmd_t old_pmd = *pmd;
200
152 pmd_clear(pmd); 201 pmd_clear(pmd);
153 kvm_tlb_flush_vmid_ipa(kvm, addr); 202 kvm_tlb_flush_vmid_ipa(kvm, addr);
203
204 kvm_flush_dcache_pmd(old_pmd);
205
154 put_page(virt_to_page(pmd)); 206 put_page(virt_to_page(pmd));
155 } else { 207 } else {
156 unmap_ptes(kvm, pmd, addr, next); 208 unmap_ptes(kvm, pmd, addr, next);
@@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
173 next = kvm_pud_addr_end(addr, end); 225 next = kvm_pud_addr_end(addr, end);
174 if (!pud_none(*pud)) { 226 if (!pud_none(*pud)) {
175 if (pud_huge(*pud)) { 227 if (pud_huge(*pud)) {
228 pud_t old_pud = *pud;
229
176 pud_clear(pud); 230 pud_clear(pud);
177 kvm_tlb_flush_vmid_ipa(kvm, addr); 231 kvm_tlb_flush_vmid_ipa(kvm, addr);
232
233 kvm_flush_dcache_pud(old_pud);
234
178 put_page(virt_to_page(pud)); 235 put_page(virt_to_page(pud));
179 } else { 236 } else {
180 unmap_pmds(kvm, pud, addr, next); 237 unmap_pmds(kvm, pud, addr, next);
@@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
209 266
210 pte = pte_offset_kernel(pmd, addr); 267 pte = pte_offset_kernel(pmd, addr);
211 do { 268 do {
212 if (!pte_none(*pte)) { 269 if (!pte_none(*pte) &&
213 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 270 (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
214 kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); 271 kvm_flush_dcache_pte(*pte);
215 }
216 } while (pte++, addr += PAGE_SIZE, addr != end); 272 } while (pte++, addr += PAGE_SIZE, addr != end);
217} 273}
218 274
@@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
226 do { 282 do {
227 next = kvm_pmd_addr_end(addr, end); 283 next = kvm_pmd_addr_end(addr, end);
228 if (!pmd_none(*pmd)) { 284 if (!pmd_none(*pmd)) {
229 if (kvm_pmd_huge(*pmd)) { 285 if (kvm_pmd_huge(*pmd))
230 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 286 kvm_flush_dcache_pmd(*pmd);
231 kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); 287 else
232 } else {
233 stage2_flush_ptes(kvm, pmd, addr, next); 288 stage2_flush_ptes(kvm, pmd, addr, next);
234 }
235 } 289 }
236 } while (pmd++, addr = next, addr != end); 290 } while (pmd++, addr = next, addr != end);
237} 291}
@@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
246 do { 300 do {
247 next = kvm_pud_addr_end(addr, end); 301 next = kvm_pud_addr_end(addr, end);
248 if (!pud_none(*pud)) { 302 if (!pud_none(*pud)) {
249 if (pud_huge(*pud)) { 303 if (pud_huge(*pud))
250 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 304 kvm_flush_dcache_pud(*pud);
251 kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); 305 else
252 } else {
253 stage2_flush_pmds(kvm, pud, addr, next); 306 stage2_flush_pmds(kvm, pud, addr, next);
254 }
255 } 307 }
256 } while (pud++, addr = next, addr != end); 308 } while (pud++, addr = next, addr != end);
257} 309}
@@ -278,7 +330,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
278 * Go through the stage 2 page tables and invalidate any cache lines 330 * Go through the stage 2 page tables and invalidate any cache lines
279 * backing memory already mapped to the VM. 331 * backing memory already mapped to the VM.
280 */ 332 */
281void stage2_flush_vm(struct kvm *kvm) 333static void stage2_flush_vm(struct kvm *kvm)
282{ 334{
283 struct kvm_memslots *slots; 335 struct kvm_memslots *slots;
284 struct kvm_memory_slot *memslot; 336 struct kvm_memory_slot *memslot;
@@ -905,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
905 return !pfn_valid(pfn); 957 return !pfn_valid(pfn);
906} 958}
907 959
960static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
961 unsigned long size, bool uncached)
962{
963 __coherent_cache_guest_page(vcpu, pfn, size, uncached);
964}
965
908static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 966static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
909 struct kvm_memory_slot *memslot, unsigned long hva, 967 struct kvm_memory_slot *memslot, unsigned long hva,
910 unsigned long fault_status) 968 unsigned long fault_status)
@@ -994,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
994 kvm_set_s2pmd_writable(&new_pmd); 1052 kvm_set_s2pmd_writable(&new_pmd);
995 kvm_set_pfn_dirty(pfn); 1053 kvm_set_pfn_dirty(pfn);
996 } 1054 }
997 coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, 1055 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
998 fault_ipa_uncached);
999 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1056 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1000 } else { 1057 } else {
1001 pte_t new_pte = pfn_pte(pfn, mem_type); 1058 pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1003,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1003 kvm_set_s2pte_writable(&new_pte); 1060 kvm_set_s2pte_writable(&new_pte);
1004 kvm_set_pfn_dirty(pfn); 1061 kvm_set_pfn_dirty(pfn);
1005 } 1062 }
1006 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, 1063 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
1007 fault_ipa_uncached);
1008 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 1064 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
1009 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); 1065 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
1010 } 1066 }
@@ -1411,3 +1467,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1411 unmap_stage2_range(kvm, gpa, size); 1467 unmap_stage2_range(kvm, gpa, size);
1412 spin_unlock(&kvm->mmu_lock); 1468 spin_unlock(&kvm->mmu_lock);
1413} 1469}
1470
1471/*
1472 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
1473 *
1474 * Main problems:
1475 * - S/W ops are local to a CPU (not broadcast)
1476 * - We have line migration behind our back (speculation)
1477 * - System caches don't support S/W at all (damn!)
1478 *
1479 * In the face of the above, the best we can do is to try and convert
1480 * S/W ops to VA ops. Because the guest is not allowed to infer the
1481 * S/W to PA mapping, it can only use S/W to nuke the whole cache,
1482 * which is a rather good thing for us.
1483 *
1484 * Also, it is only used when turning caches on/off ("The expected
1485 * usage of the cache maintenance instructions that operate by set/way
1486 * is associated with the cache maintenance instructions associated
1487 * with the powerdown and powerup of caches, if this is required by
1488 * the implementation.").
1489 *
1490 * We use the following policy:
1491 *
1492 * - If we trap a S/W operation, we enable VM trapping to detect
1493 * caches being turned on/off, and do a full clean.
1494 *
1495 * - We flush the caches on both caches being turned on and off.
1496 *
1497 * - Once the caches are enabled, we stop trapping VM ops.
1498 */
1499void kvm_set_way_flush(struct kvm_vcpu *vcpu)
1500{
1501 unsigned long hcr = vcpu_get_hcr(vcpu);
1502
1503 /*
1504 * If this is the first time we do a S/W operation
1505 * (i.e. HCR_TVM not set) flush the whole memory, and set the
1506 * VM trapping.
1507 *
1508 * Otherwise, rely on the VM trapping to wait for the MMU +
1509 * Caches to be turned off. At that point, we'll be able to
1510 * clean the caches again.
1511 */
1512 if (!(hcr & HCR_TVM)) {
1513 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
1514 vcpu_has_cache_enabled(vcpu));
1515 stage2_flush_vm(vcpu->kvm);
1516 vcpu_set_hcr(vcpu, hcr | HCR_TVM);
1517 }
1518}
1519
1520void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
1521{
1522 bool now_enabled = vcpu_has_cache_enabled(vcpu);
1523
1524 /*
1525 * If switching the MMU+caches on, need to invalidate the caches.
1526 * If switching it off, need to clean the caches.
1527 * Clean + invalidate does the trick always.
1528 */
1529 if (now_enabled != was_enabled)
1530 stage2_flush_vm(vcpu->kvm);
1531
1532 /* Caches are now on, stop trapping VM ops (until a S/W op) */
1533 if (now_enabled)
1534 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
1535
1536 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
1537}