diff options
Diffstat (limited to 'arch/arm/kvm/mmu.c')
| -rw-r--r-- | arch/arm/kvm/mmu.c | 164 |
1 files changed, 144 insertions, 20 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1dc9778a00af..136662547ca6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
| @@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) | |||
| 58 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); | 58 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | /* | ||
| 62 | * D-Cache management functions. They take the page table entries by | ||
| 63 | * value, as they are flushing the cache using the kernel mapping (or | ||
| 64 | * kmap on 32bit). | ||
| 65 | */ | ||
| 66 | static void kvm_flush_dcache_pte(pte_t pte) | ||
| 67 | { | ||
| 68 | __kvm_flush_dcache_pte(pte); | ||
| 69 | } | ||
| 70 | |||
| 71 | static void kvm_flush_dcache_pmd(pmd_t pmd) | ||
| 72 | { | ||
| 73 | __kvm_flush_dcache_pmd(pmd); | ||
| 74 | } | ||
| 75 | |||
| 76 | static void kvm_flush_dcache_pud(pud_t pud) | ||
| 77 | { | ||
| 78 | __kvm_flush_dcache_pud(pud); | ||
| 79 | } | ||
| 80 | |||
| 61 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 81 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
| 62 | int min, int max) | 82 | int min, int max) |
| 63 | { | 83 | { |
| @@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) | |||
| 119 | put_page(virt_to_page(pmd)); | 139 | put_page(virt_to_page(pmd)); |
| 120 | } | 140 | } |
| 121 | 141 | ||
| 142 | /* | ||
| 143 | * Unmapping vs dcache management: | ||
| 144 | * | ||
| 145 | * If a guest maps certain memory pages as uncached, all writes will | ||
| 146 | * bypass the data cache and go directly to RAM. However, the CPUs | ||
| 147 | * can still speculate reads (not writes) and fill cache lines with | ||
| 148 | * data. | ||
| 149 | * | ||
| 150 | * Those cache lines will be *clean* cache lines though, so a | ||
| 151 | * clean+invalidate operation is equivalent to an invalidate | ||
| 152 | * operation, because no cache lines are marked dirty. | ||
| 153 | * | ||
| 154 | * Those clean cache lines could be filled prior to an uncached write | ||
| 155 | * by the guest, and the cache coherent IO subsystem would therefore | ||
| 156 | * end up writing old data to disk. | ||
| 157 | * | ||
| 158 | * This is why right after unmapping a page/section and invalidating | ||
| 159 | * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure | ||
| 160 | * the IO subsystem will never hit in the cache. | ||
| 161 | */ | ||
| 122 | static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, | 162 | static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, |
| 123 | phys_addr_t addr, phys_addr_t end) | 163 | phys_addr_t addr, phys_addr_t end) |
| 124 | { | 164 | { |
| @@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, | |||
| 128 | start_pte = pte = pte_offset_kernel(pmd, addr); | 168 | start_pte = pte = pte_offset_kernel(pmd, addr); |
| 129 | do { | 169 | do { |
| 130 | if (!pte_none(*pte)) { | 170 | if (!pte_none(*pte)) { |
| 171 | pte_t old_pte = *pte; | ||
| 172 | |||
| 131 | kvm_set_pte(pte, __pte(0)); | 173 | kvm_set_pte(pte, __pte(0)); |
| 132 | put_page(virt_to_page(pte)); | ||
| 133 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 174 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
| 175 | |||
| 176 | /* No need to invalidate the cache for device mappings */ | ||
| 177 | if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) | ||
| 178 | kvm_flush_dcache_pte(old_pte); | ||
| 179 | |||
| 180 | put_page(virt_to_page(pte)); | ||
| 134 | } | 181 | } |
| 135 | } while (pte++, addr += PAGE_SIZE, addr != end); | 182 | } while (pte++, addr += PAGE_SIZE, addr != end); |
| 136 | 183 | ||
| @@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, | |||
| 149 | next = kvm_pmd_addr_end(addr, end); | 196 | next = kvm_pmd_addr_end(addr, end); |
| 150 | if (!pmd_none(*pmd)) { | 197 | if (!pmd_none(*pmd)) { |
| 151 | if (kvm_pmd_huge(*pmd)) { | 198 | if (kvm_pmd_huge(*pmd)) { |
| 199 | pmd_t old_pmd = *pmd; | ||
| 200 | |||
| 152 | pmd_clear(pmd); | 201 | pmd_clear(pmd); |
| 153 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 202 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
| 203 | |||
| 204 | kvm_flush_dcache_pmd(old_pmd); | ||
| 205 | |||
| 154 | put_page(virt_to_page(pmd)); | 206 | put_page(virt_to_page(pmd)); |
| 155 | } else { | 207 | } else { |
| 156 | unmap_ptes(kvm, pmd, addr, next); | 208 | unmap_ptes(kvm, pmd, addr, next); |
| @@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd, | |||
| 173 | next = kvm_pud_addr_end(addr, end); | 225 | next = kvm_pud_addr_end(addr, end); |
| 174 | if (!pud_none(*pud)) { | 226 | if (!pud_none(*pud)) { |
| 175 | if (pud_huge(*pud)) { | 227 | if (pud_huge(*pud)) { |
| 228 | pud_t old_pud = *pud; | ||
| 229 | |||
| 176 | pud_clear(pud); | 230 | pud_clear(pud); |
| 177 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 231 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
| 232 | |||
| 233 | kvm_flush_dcache_pud(old_pud); | ||
| 234 | |||
| 178 | put_page(virt_to_page(pud)); | 235 | put_page(virt_to_page(pud)); |
| 179 | } else { | 236 | } else { |
| 180 | unmap_pmds(kvm, pud, addr, next); | 237 | unmap_pmds(kvm, pud, addr, next); |
| @@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, | |||
| 209 | 266 | ||
| 210 | pte = pte_offset_kernel(pmd, addr); | 267 | pte = pte_offset_kernel(pmd, addr); |
| 211 | do { | 268 | do { |
| 212 | if (!pte_none(*pte)) { | 269 | if (!pte_none(*pte) && |
| 213 | hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | 270 | (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) |
| 214 | kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); | 271 | kvm_flush_dcache_pte(*pte); |
| 215 | } | ||
| 216 | } while (pte++, addr += PAGE_SIZE, addr != end); | 272 | } while (pte++, addr += PAGE_SIZE, addr != end); |
| 217 | } | 273 | } |
| 218 | 274 | ||
| @@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, | |||
| 226 | do { | 282 | do { |
| 227 | next = kvm_pmd_addr_end(addr, end); | 283 | next = kvm_pmd_addr_end(addr, end); |
| 228 | if (!pmd_none(*pmd)) { | 284 | if (!pmd_none(*pmd)) { |
| 229 | if (kvm_pmd_huge(*pmd)) { | 285 | if (kvm_pmd_huge(*pmd)) |
| 230 | hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | 286 | kvm_flush_dcache_pmd(*pmd); |
| 231 | kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); | 287 | else |
| 232 | } else { | ||
| 233 | stage2_flush_ptes(kvm, pmd, addr, next); | 288 | stage2_flush_ptes(kvm, pmd, addr, next); |
| 234 | } | ||
| 235 | } | 289 | } |
| 236 | } while (pmd++, addr = next, addr != end); | 290 | } while (pmd++, addr = next, addr != end); |
| 237 | } | 291 | } |
| @@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, | |||
| 246 | do { | 300 | do { |
| 247 | next = kvm_pud_addr_end(addr, end); | 301 | next = kvm_pud_addr_end(addr, end); |
| 248 | if (!pud_none(*pud)) { | 302 | if (!pud_none(*pud)) { |
| 249 | if (pud_huge(*pud)) { | 303 | if (pud_huge(*pud)) |
| 250 | hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | 304 | kvm_flush_dcache_pud(*pud); |
| 251 | kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); | 305 | else |
| 252 | } else { | ||
| 253 | stage2_flush_pmds(kvm, pud, addr, next); | 306 | stage2_flush_pmds(kvm, pud, addr, next); |
| 254 | } | ||
| 255 | } | 307 | } |
| 256 | } while (pud++, addr = next, addr != end); | 308 | } while (pud++, addr = next, addr != end); |
| 257 | } | 309 | } |
| @@ -278,7 +330,7 @@ static void stage2_flush_memslot(struct kvm *kvm, | |||
| 278 | * Go through the stage 2 page tables and invalidate any cache lines | 330 | * Go through the stage 2 page tables and invalidate any cache lines |
| 279 | * backing memory already mapped to the VM. | 331 | * backing memory already mapped to the VM. |
| 280 | */ | 332 | */ |
| 281 | void stage2_flush_vm(struct kvm *kvm) | 333 | static void stage2_flush_vm(struct kvm *kvm) |
| 282 | { | 334 | { |
| 283 | struct kvm_memslots *slots; | 335 | struct kvm_memslots *slots; |
| 284 | struct kvm_memory_slot *memslot; | 336 | struct kvm_memory_slot *memslot; |
| @@ -905,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn) | |||
| 905 | return !pfn_valid(pfn); | 957 | return !pfn_valid(pfn); |
| 906 | } | 958 | } |
| 907 | 959 | ||
| 960 | static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, | ||
| 961 | unsigned long size, bool uncached) | ||
| 962 | { | ||
| 963 | __coherent_cache_guest_page(vcpu, pfn, size, uncached); | ||
| 964 | } | ||
| 965 | |||
| 908 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | 966 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
| 909 | struct kvm_memory_slot *memslot, unsigned long hva, | 967 | struct kvm_memory_slot *memslot, unsigned long hva, |
| 910 | unsigned long fault_status) | 968 | unsigned long fault_status) |
| @@ -994,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
| 994 | kvm_set_s2pmd_writable(&new_pmd); | 1052 | kvm_set_s2pmd_writable(&new_pmd); |
| 995 | kvm_set_pfn_dirty(pfn); | 1053 | kvm_set_pfn_dirty(pfn); |
| 996 | } | 1054 | } |
| 997 | coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, | 1055 | coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); |
| 998 | fault_ipa_uncached); | ||
| 999 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); | 1056 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); |
| 1000 | } else { | 1057 | } else { |
| 1001 | pte_t new_pte = pfn_pte(pfn, mem_type); | 1058 | pte_t new_pte = pfn_pte(pfn, mem_type); |
| @@ -1003,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
| 1003 | kvm_set_s2pte_writable(&new_pte); | 1060 | kvm_set_s2pte_writable(&new_pte); |
| 1004 | kvm_set_pfn_dirty(pfn); | 1061 | kvm_set_pfn_dirty(pfn); |
| 1005 | } | 1062 | } |
| 1006 | coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, | 1063 | coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); |
| 1007 | fault_ipa_uncached); | ||
| 1008 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, | 1064 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, |
| 1009 | pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); | 1065 | pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); |
| 1010 | } | 1066 | } |
| @@ -1411,3 +1467,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | |||
| 1411 | unmap_stage2_range(kvm, gpa, size); | 1467 | unmap_stage2_range(kvm, gpa, size); |
| 1412 | spin_unlock(&kvm->mmu_lock); | 1468 | spin_unlock(&kvm->mmu_lock); |
| 1413 | } | 1469 | } |
| 1470 | |||
| 1471 | /* | ||
| 1472 | * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). | ||
| 1473 | * | ||
| 1474 | * Main problems: | ||
| 1475 | * - S/W ops are local to a CPU (not broadcast) | ||
| 1476 | * - We have line migration behind our back (speculation) | ||
| 1477 | * - System caches don't support S/W at all (damn!) | ||
| 1478 | * | ||
| 1479 | * In the face of the above, the best we can do is to try and convert | ||
| 1480 | * S/W ops to VA ops. Because the guest is not allowed to infer the | ||
| 1481 | * S/W to PA mapping, it can only use S/W to nuke the whole cache, | ||
| 1482 | * which is a rather good thing for us. | ||
| 1483 | * | ||
| 1484 | * Also, it is only used when turning caches on/off ("The expected | ||
| 1485 | * usage of the cache maintenance instructions that operate by set/way | ||
| 1486 | * is associated with the cache maintenance instructions associated | ||
| 1487 | * with the powerdown and powerup of caches, if this is required by | ||
| 1488 | * the implementation."). | ||
| 1489 | * | ||
| 1490 | * We use the following policy: | ||
| 1491 | * | ||
| 1492 | * - If we trap a S/W operation, we enable VM trapping to detect | ||
| 1493 | * caches being turned on/off, and do a full clean. | ||
| 1494 | * | ||
| 1495 | * - We flush the caches on both caches being turned on and off. | ||
| 1496 | * | ||
| 1497 | * - Once the caches are enabled, we stop trapping VM ops. | ||
| 1498 | */ | ||
| 1499 | void kvm_set_way_flush(struct kvm_vcpu *vcpu) | ||
| 1500 | { | ||
| 1501 | unsigned long hcr = vcpu_get_hcr(vcpu); | ||
| 1502 | |||
| 1503 | /* | ||
| 1504 | * If this is the first time we do a S/W operation | ||
| 1505 | * (i.e. HCR_TVM not set) flush the whole memory, and set the | ||
| 1506 | * VM trapping. | ||
| 1507 | * | ||
| 1508 | * Otherwise, rely on the VM trapping to wait for the MMU + | ||
| 1509 | * Caches to be turned off. At that point, we'll be able to | ||
| 1510 | * clean the caches again. | ||
| 1511 | */ | ||
| 1512 | if (!(hcr & HCR_TVM)) { | ||
| 1513 | trace_kvm_set_way_flush(*vcpu_pc(vcpu), | ||
| 1514 | vcpu_has_cache_enabled(vcpu)); | ||
| 1515 | stage2_flush_vm(vcpu->kvm); | ||
| 1516 | vcpu_set_hcr(vcpu, hcr | HCR_TVM); | ||
| 1517 | } | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) | ||
| 1521 | { | ||
| 1522 | bool now_enabled = vcpu_has_cache_enabled(vcpu); | ||
| 1523 | |||
| 1524 | /* | ||
| 1525 | * If switching the MMU+caches on, need to invalidate the caches. | ||
| 1526 | * If switching it off, need to clean the caches. | ||
| 1527 | * Clean + invalidate does the trick always. | ||
| 1528 | */ | ||
| 1529 | if (now_enabled != was_enabled) | ||
| 1530 | stage2_flush_vm(vcpu->kvm); | ||
| 1531 | |||
| 1532 | /* Caches are now on, stop trapping VM ops (until a S/W op) */ | ||
| 1533 | if (now_enabled) | ||
| 1534 | vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); | ||
| 1535 | |||
| 1536 | trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); | ||
| 1537 | } | ||
