diff options
Diffstat (limited to 'arch/arm/kvm/mmu.c')
-rw-r--r-- | arch/arm/kvm/mmu.c | 164 |
1 files changed, 144 insertions, 20 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1dc9778a00af..136662547ca6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) | |||
58 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); | 58 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); |
59 | } | 59 | } |
60 | 60 | ||
61 | /* | ||
62 | * D-Cache management functions. They take the page table entries by | ||
63 | * value, as they are flushing the cache using the kernel mapping (or | ||
64 | * kmap on 32bit). | ||
65 | */ | ||
66 | static void kvm_flush_dcache_pte(pte_t pte) | ||
67 | { | ||
68 | __kvm_flush_dcache_pte(pte); | ||
69 | } | ||
70 | |||
71 | static void kvm_flush_dcache_pmd(pmd_t pmd) | ||
72 | { | ||
73 | __kvm_flush_dcache_pmd(pmd); | ||
74 | } | ||
75 | |||
76 | static void kvm_flush_dcache_pud(pud_t pud) | ||
77 | { | ||
78 | __kvm_flush_dcache_pud(pud); | ||
79 | } | ||
80 | |||
61 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 81 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
62 | int min, int max) | 82 | int min, int max) |
63 | { | 83 | { |
@@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) | |||
119 | put_page(virt_to_page(pmd)); | 139 | put_page(virt_to_page(pmd)); |
120 | } | 140 | } |
121 | 141 | ||
142 | /* | ||
143 | * Unmapping vs dcache management: | ||
144 | * | ||
145 | * If a guest maps certain memory pages as uncached, all writes will | ||
146 | * bypass the data cache and go directly to RAM. However, the CPUs | ||
147 | * can still speculate reads (not writes) and fill cache lines with | ||
148 | * data. | ||
149 | * | ||
150 | * Those cache lines will be *clean* cache lines though, so a | ||
151 | * clean+invalidate operation is equivalent to an invalidate | ||
152 | * operation, because no cache lines are marked dirty. | ||
153 | * | ||
154 | * Those clean cache lines could be filled prior to an uncached write | ||
155 | * by the guest, and the cache coherent IO subsystem would therefore | ||
156 | * end up writing old data to disk. | ||
157 | * | ||
158 | * This is why right after unmapping a page/section and invalidating | ||
159 | * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure | ||
160 | * the IO subsystem will never hit in the cache. | ||
161 | */ | ||
122 | static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, | 162 | static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, |
123 | phys_addr_t addr, phys_addr_t end) | 163 | phys_addr_t addr, phys_addr_t end) |
124 | { | 164 | { |
@@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, | |||
128 | start_pte = pte = pte_offset_kernel(pmd, addr); | 168 | start_pte = pte = pte_offset_kernel(pmd, addr); |
129 | do { | 169 | do { |
130 | if (!pte_none(*pte)) { | 170 | if (!pte_none(*pte)) { |
171 | pte_t old_pte = *pte; | ||
172 | |||
131 | kvm_set_pte(pte, __pte(0)); | 173 | kvm_set_pte(pte, __pte(0)); |
132 | put_page(virt_to_page(pte)); | ||
133 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 174 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
175 | |||
176 | /* No need to invalidate the cache for device mappings */ | ||
177 | if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) | ||
178 | kvm_flush_dcache_pte(old_pte); | ||
179 | |||
180 | put_page(virt_to_page(pte)); | ||
134 | } | 181 | } |
135 | } while (pte++, addr += PAGE_SIZE, addr != end); | 182 | } while (pte++, addr += PAGE_SIZE, addr != end); |
136 | 183 | ||
@@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, | |||
149 | next = kvm_pmd_addr_end(addr, end); | 196 | next = kvm_pmd_addr_end(addr, end); |
150 | if (!pmd_none(*pmd)) { | 197 | if (!pmd_none(*pmd)) { |
151 | if (kvm_pmd_huge(*pmd)) { | 198 | if (kvm_pmd_huge(*pmd)) { |
199 | pmd_t old_pmd = *pmd; | ||
200 | |||
152 | pmd_clear(pmd); | 201 | pmd_clear(pmd); |
153 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 202 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
203 | |||
204 | kvm_flush_dcache_pmd(old_pmd); | ||
205 | |||
154 | put_page(virt_to_page(pmd)); | 206 | put_page(virt_to_page(pmd)); |
155 | } else { | 207 | } else { |
156 | unmap_ptes(kvm, pmd, addr, next); | 208 | unmap_ptes(kvm, pmd, addr, next); |
@@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd, | |||
173 | next = kvm_pud_addr_end(addr, end); | 225 | next = kvm_pud_addr_end(addr, end); |
174 | if (!pud_none(*pud)) { | 226 | if (!pud_none(*pud)) { |
175 | if (pud_huge(*pud)) { | 227 | if (pud_huge(*pud)) { |
228 | pud_t old_pud = *pud; | ||
229 | |||
176 | pud_clear(pud); | 230 | pud_clear(pud); |
177 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 231 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
232 | |||
233 | kvm_flush_dcache_pud(old_pud); | ||
234 | |||
178 | put_page(virt_to_page(pud)); | 235 | put_page(virt_to_page(pud)); |
179 | } else { | 236 | } else { |
180 | unmap_pmds(kvm, pud, addr, next); | 237 | unmap_pmds(kvm, pud, addr, next); |
@@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, | |||
209 | 266 | ||
210 | pte = pte_offset_kernel(pmd, addr); | 267 | pte = pte_offset_kernel(pmd, addr); |
211 | do { | 268 | do { |
212 | if (!pte_none(*pte)) { | 269 | if (!pte_none(*pte) && |
213 | hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | 270 | (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) |
214 | kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); | 271 | kvm_flush_dcache_pte(*pte); |
215 | } | ||
216 | } while (pte++, addr += PAGE_SIZE, addr != end); | 272 | } while (pte++, addr += PAGE_SIZE, addr != end); |
217 | } | 273 | } |
218 | 274 | ||
@@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, | |||
226 | do { | 282 | do { |
227 | next = kvm_pmd_addr_end(addr, end); | 283 | next = kvm_pmd_addr_end(addr, end); |
228 | if (!pmd_none(*pmd)) { | 284 | if (!pmd_none(*pmd)) { |
229 | if (kvm_pmd_huge(*pmd)) { | 285 | if (kvm_pmd_huge(*pmd)) |
230 | hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | 286 | kvm_flush_dcache_pmd(*pmd); |
231 | kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); | 287 | else |
232 | } else { | ||
233 | stage2_flush_ptes(kvm, pmd, addr, next); | 288 | stage2_flush_ptes(kvm, pmd, addr, next); |
234 | } | ||
235 | } | 289 | } |
236 | } while (pmd++, addr = next, addr != end); | 290 | } while (pmd++, addr = next, addr != end); |
237 | } | 291 | } |
@@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, | |||
246 | do { | 300 | do { |
247 | next = kvm_pud_addr_end(addr, end); | 301 | next = kvm_pud_addr_end(addr, end); |
248 | if (!pud_none(*pud)) { | 302 | if (!pud_none(*pud)) { |
249 | if (pud_huge(*pud)) { | 303 | if (pud_huge(*pud)) |
250 | hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | 304 | kvm_flush_dcache_pud(*pud); |
251 | kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); | 305 | else |
252 | } else { | ||
253 | stage2_flush_pmds(kvm, pud, addr, next); | 306 | stage2_flush_pmds(kvm, pud, addr, next); |
254 | } | ||
255 | } | 307 | } |
256 | } while (pud++, addr = next, addr != end); | 308 | } while (pud++, addr = next, addr != end); |
257 | } | 309 | } |
@@ -278,7 +330,7 @@ static void stage2_flush_memslot(struct kvm *kvm, | |||
278 | * Go through the stage 2 page tables and invalidate any cache lines | 330 | * Go through the stage 2 page tables and invalidate any cache lines |
279 | * backing memory already mapped to the VM. | 331 | * backing memory already mapped to the VM. |
280 | */ | 332 | */ |
281 | void stage2_flush_vm(struct kvm *kvm) | 333 | static void stage2_flush_vm(struct kvm *kvm) |
282 | { | 334 | { |
283 | struct kvm_memslots *slots; | 335 | struct kvm_memslots *slots; |
284 | struct kvm_memory_slot *memslot; | 336 | struct kvm_memory_slot *memslot; |
@@ -905,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn) | |||
905 | return !pfn_valid(pfn); | 957 | return !pfn_valid(pfn); |
906 | } | 958 | } |
907 | 959 | ||
960 | static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, | ||
961 | unsigned long size, bool uncached) | ||
962 | { | ||
963 | __coherent_cache_guest_page(vcpu, pfn, size, uncached); | ||
964 | } | ||
965 | |||
908 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | 966 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
909 | struct kvm_memory_slot *memslot, unsigned long hva, | 967 | struct kvm_memory_slot *memslot, unsigned long hva, |
910 | unsigned long fault_status) | 968 | unsigned long fault_status) |
@@ -994,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
994 | kvm_set_s2pmd_writable(&new_pmd); | 1052 | kvm_set_s2pmd_writable(&new_pmd); |
995 | kvm_set_pfn_dirty(pfn); | 1053 | kvm_set_pfn_dirty(pfn); |
996 | } | 1054 | } |
997 | coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, | 1055 | coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); |
998 | fault_ipa_uncached); | ||
999 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); | 1056 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); |
1000 | } else { | 1057 | } else { |
1001 | pte_t new_pte = pfn_pte(pfn, mem_type); | 1058 | pte_t new_pte = pfn_pte(pfn, mem_type); |
@@ -1003,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1003 | kvm_set_s2pte_writable(&new_pte); | 1060 | kvm_set_s2pte_writable(&new_pte); |
1004 | kvm_set_pfn_dirty(pfn); | 1061 | kvm_set_pfn_dirty(pfn); |
1005 | } | 1062 | } |
1006 | coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, | 1063 | coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); |
1007 | fault_ipa_uncached); | ||
1008 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, | 1064 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, |
1009 | pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); | 1065 | pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); |
1010 | } | 1066 | } |
@@ -1411,3 +1467,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | |||
1411 | unmap_stage2_range(kvm, gpa, size); | 1467 | unmap_stage2_range(kvm, gpa, size); |
1412 | spin_unlock(&kvm->mmu_lock); | 1468 | spin_unlock(&kvm->mmu_lock); |
1413 | } | 1469 | } |
1470 | |||
1471 | /* | ||
1472 | * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). | ||
1473 | * | ||
1474 | * Main problems: | ||
1475 | * - S/W ops are local to a CPU (not broadcast) | ||
1476 | * - We have line migration behind our back (speculation) | ||
1477 | * - System caches don't support S/W at all (damn!) | ||
1478 | * | ||
1479 | * In the face of the above, the best we can do is to try and convert | ||
1480 | * S/W ops to VA ops. Because the guest is not allowed to infer the | ||
1481 | * S/W to PA mapping, it can only use S/W to nuke the whole cache, | ||
1482 | * which is a rather good thing for us. | ||
1483 | * | ||
1484 | * Also, it is only used when turning caches on/off ("The expected | ||
1485 | * usage of the cache maintenance instructions that operate by set/way | ||
1486 | * is associated with the cache maintenance instructions associated | ||
1487 | * with the powerdown and powerup of caches, if this is required by | ||
1488 | * the implementation."). | ||
1489 | * | ||
1490 | * We use the following policy: | ||
1491 | * | ||
1492 | * - If we trap a S/W operation, we enable VM trapping to detect | ||
1493 | * caches being turned on/off, and do a full clean. | ||
1494 | * | ||
1495 | * - We flush the caches on both caches being turned on and off. | ||
1496 | * | ||
1497 | * - Once the caches are enabled, we stop trapping VM ops. | ||
1498 | */ | ||
1499 | void kvm_set_way_flush(struct kvm_vcpu *vcpu) | ||
1500 | { | ||
1501 | unsigned long hcr = vcpu_get_hcr(vcpu); | ||
1502 | |||
1503 | /* | ||
1504 | * If this is the first time we do a S/W operation | ||
1505 | * (i.e. HCR_TVM not set) flush the whole memory, and set the | ||
1506 | * VM trapping. | ||
1507 | * | ||
1508 | * Otherwise, rely on the VM trapping to wait for the MMU + | ||
1509 | * Caches to be turned off. At that point, we'll be able to | ||
1510 | * clean the caches again. | ||
1511 | */ | ||
1512 | if (!(hcr & HCR_TVM)) { | ||
1513 | trace_kvm_set_way_flush(*vcpu_pc(vcpu), | ||
1514 | vcpu_has_cache_enabled(vcpu)); | ||
1515 | stage2_flush_vm(vcpu->kvm); | ||
1516 | vcpu_set_hcr(vcpu, hcr | HCR_TVM); | ||
1517 | } | ||
1518 | } | ||
1519 | |||
1520 | void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) | ||
1521 | { | ||
1522 | bool now_enabled = vcpu_has_cache_enabled(vcpu); | ||
1523 | |||
1524 | /* | ||
1525 | * If switching the MMU+caches on, need to invalidate the caches. | ||
1526 | * If switching it off, need to clean the caches. | ||
1527 | * Clean + invalidate does the trick always. | ||
1528 | */ | ||
1529 | if (now_enabled != was_enabled) | ||
1530 | stage2_flush_vm(vcpu->kvm); | ||
1531 | |||
1532 | /* Caches are now on, stop trapping VM ops (until a S/W op) */ | ||
1533 | if (now_enabled) | ||
1534 | vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); | ||
1535 | |||
1536 | trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); | ||
1537 | } | ||