Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini: "The ARM changes are largish, but not too scary. And a simple fix for x86 (bug introduced in 3.19)" (Paolo sayus these are the "Final" fixes. We'll see). * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: check LAPIC presence when building apic_map arm/arm64: KVM: Use kernel mapping to perform invalidation on page fault arm/arm64: KVM: Invalidate data cache on unmap arm/arm64: KVM: Use set/way op trapping to track the state of the caches
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-01-30 13:45:24 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-01-30 13:45:24 -0500
commit: 1f59fe76678b49fc406c6d7b63dec4abe60a0547 (patch)
tree: a96429e2a8471a49d885947bd74d931e659c3304
parent: f3a3404162774770ded2ba1494a4c8ba27cd553e (diff)
parent: df04d1d191a5fea628981067e7cb7da33b246e89 (diff)
15 files changed, 330 insertions, 178 deletions
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 66ce17655bb9..7b0152321b20 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -38,6 +38,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
        vcpu->arch.hcr = HCR_GUEST_MASK;
 }
+static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
+{
+        return vcpu->arch.hcr;
+}
+static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+{
+        vcpu->arch.hcr = hcr;
+}
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
        return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 254e0650e48b..04b4ea0b550a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -125,9 +125,6 @@ struct kvm_vcpu_arch {
         * Anything that is not used directly from assembly code goes
         * here.
         */
-        /* dcache set/way operation pending */
-        int last_pcpu;
-        cpumask_t require_dcache_flush;
        /* Don't run the guest on this vcpu */
        bool pause;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 63e0ecc04901..1bca8f8af442 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -44,6 +44,7 @@
 #ifndef __ASSEMBLY__
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
@@ -161,13 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
        return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
 }
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
-                                             unsigned long size,
+                                               unsigned long size,
-                                             bool ipa_uncached)
+                                               bool ipa_uncached)
 {
-        if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
-                kvm_flush_dcache_to_poc((void *)hva, size);
-        
        /*
         * If we are going to insert an instruction page and the icache is
         * either VIPT or PIPT, there is a potential problem where the host
@@ -179,18 +177,77 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
         *
         * VIVT caches are tagged using both the ASID and the VMID and doesn't
         * need any kind of flushing (DDI 0406C.b - Page B3-1392).
+         *
+         * We need to do this through a kernel mapping (using the
+         * user-space mapping has proved to be the wrong
+         * solution). For that, we need to kmap one page at a time,
+         * and iterate over the range.
         */
-        if (icache_is_pipt()) {
-                __cpuc_coherent_user_range(hva, hva + size);
+        bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
-        } else if (!icache_is_vivt_asid_tagged()) {
+        VM_BUG_ON(size & PAGE_MASK);
+        if (!need_flush && !icache_is_pipt())
+                goto vipt_cache;
+        while (size) {
+                void *va = kmap_atomic_pfn(pfn);
+                if (need_flush)
+                        kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+                if (icache_is_pipt())
+                        __cpuc_coherent_user_range((unsigned long)va,
+                                                   (unsigned long)va + PAGE_SIZE);
+                size -= PAGE_SIZE;
+                pfn++;
+                kunmap_atomic(va);
+        }
+vipt_cache:
+        if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
                /* any kind of VIPT cache */
                __flush_icache_all();
        }
 }
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+        void *va = kmap_atomic(pte_page(pte));
+        kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+        kunmap_atomic(va);
+}
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+        unsigned long size = PMD_SIZE;
+        pfn_t pfn = pmd_pfn(pmd);
+        while (size) {
+                void *va = kmap_atomic_pfn(pfn);
+                kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+                pfn++;
+                size -= PAGE_SIZE;
+                kunmap_atomic(va);
+        }
+}
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+}
 #define kvm_virt_to_phys(x)             virt_to_idmap((unsigned long)(x))
-void stage2_flush_vm(struct kvm *kvm);
+void kvm_set_way_flush(struct kvm_vcpu *vcpu);
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 #endif  /* !__ASSEMBLY__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2d6d91001062..0b0d58a905c4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -281,15 +281,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        vcpu->cpu = cpu;
        vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
-        /*
-         * Check whether this vcpu requires the cache to be flushed on
-         * this physical CPU. This is a consequence of doing dcache
-         * operations by set/way on this vcpu. We do it here to be in
-         * a non-preemptible section.
-         */
-        if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
-                flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
        kvm_arm_set_running_vcpu(vcpu);
 }
@@ -541,7 +532,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
                vcpu->mode = OUTSIDE_GUEST_MODE;
-                vcpu->arch.last_pcpu = smp_processor_id();
                kvm_guest_exit();
                trace_kvm_exit(*vcpu_pc(vcpu));
                /*
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 7928dbdf2102..f3d88dc388bc 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -189,82 +189,40 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
        return true;
 }
-/* See note at ARM ARM B1.14.4 */
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
                        const struct coproc_params *p,
                        const struct coproc_reg *r)
 {
-        unsigned long val;
-        int cpu;
        if (!p->is_write)
                return read_from_write_only(vcpu, p);
-        cpu = get_cpu();
+        kvm_set_way_flush(vcpu);
-        cpumask_setall(&vcpu->arch.require_dcache_flush);
-        cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
-        /* If we were already preempted, take the long way around */
-        if (cpu != vcpu->arch.last_pcpu) {
-                flush_cache_all();
-                goto done;
-        }
-        val = *vcpu_reg(vcpu, p->Rt1);
-        switch (p->CRm) {
-        case 6:                 /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
-        case 14:                /* DCCISW */
-                asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
-                break;
-        case 10:                /* DCCSW */
-                asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
-                break;
-        }
-done:
-        put_cpu();
        return true;
 }
 /*
 * Generic accessor for VM registers. Only called as long as HCR_TVM
- * is set.
+ * is set.  If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
 */
-static bool access_vm_reg(struct kvm_vcpu *vcpu,
+bool access_vm_reg(struct kvm_vcpu *vcpu,
-                          const struct coproc_params *p,
+                   const struct coproc_params *p,
-                          const struct coproc_reg *r)
+                   const struct coproc_reg *r)
 {
+        bool was_enabled = vcpu_has_cache_enabled(vcpu);
        BUG_ON(!p->is_write);
        vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
        if (p->is_64bit)
                vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
-        return true;
+        kvm_toggle_cache(vcpu, was_enabled);
-}
-/*
- * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
- * guest enables the MMU, we stop trapping the VM sys_regs and leave
- * it in complete control of the caches.
- *
- * Used by the cpu-specific code.
- */
-bool access_sctlr(struct kvm_vcpu *vcpu,
-                  const struct coproc_params *p,
-                  const struct coproc_reg *r)
-{
-        access_vm_reg(vcpu, p, r);
-        if (vcpu_has_cache_enabled(vcpu)) {     /* MMU+Caches enabled? */
-                vcpu->arch.hcr &= ~HCR_TVM;
-                stage2_flush_vm(vcpu->kvm);
-        }
        return true;
 }
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 1a44bbe39643..88d24a3a9778 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -153,8 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 #define is64            .is_64 = true
 #define is32            .is_64 = false
-bool access_sctlr(struct kvm_vcpu *vcpu,
+bool access_vm_reg(struct kvm_vcpu *vcpu,
-                  const struct coproc_params *p,
+                   const struct coproc_params *p,
-                  const struct coproc_reg *r);
+                   const struct coproc_reg *r);
 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index e6f4ae48bda9..a7136757d373 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
 static const struct coproc_reg a15_regs[] = {
        /* SCTLR: swapped by interrupt.S. */
        { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-                        access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
+                        access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 },
 };
 static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 17fc7cd479d3..b19e46d1b2c0 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
 static const struct coproc_reg a7_regs[] = {
        /* SCTLR: swapped by interrupt.S. */
        { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-                        access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
+                        access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 },
 };
 static struct kvm_coproc_target_table a7_target_table = {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1dc9778a00af..136662547ca6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
                kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+        __kvm_flush_dcache_pte(pte);
+}
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+        __kvm_flush_dcache_pmd(pmd);
+}
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+        __kvm_flush_dcache_pud(pud);
+}
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
                                  int min, int max)
 {
@@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
        put_page(virt_to_page(pmd));
 }
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM.  However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
 static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
                       phys_addr_t addr, phys_addr_t end)
 {
@@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
        start_pte = pte = pte_offset_kernel(pmd, addr);
        do {
                if (!pte_none(*pte)) {
+                        pte_t old_pte = *pte;
                        kvm_set_pte(pte, __pte(0));
-                        put_page(virt_to_page(pte));
                        kvm_tlb_flush_vmid_ipa(kvm, addr);
+                        /* No need to invalidate the cache for device mappings */
+                        if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+                                kvm_flush_dcache_pte(old_pte);
+                        put_page(virt_to_page(pte));
                }
        } while (pte++, addr += PAGE_SIZE, addr != end);
@@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
                next = kvm_pmd_addr_end(addr, end);
                if (!pmd_none(*pmd)) {
                        if (kvm_pmd_huge(*pmd)) {
+                                pmd_t old_pmd = *pmd;
                                pmd_clear(pmd);
                                kvm_tlb_flush_vmid_ipa(kvm, addr);
+                                kvm_flush_dcache_pmd(old_pmd);
                                put_page(virt_to_page(pmd));
                        } else {
                                unmap_ptes(kvm, pmd, addr, next);
@@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
                next = kvm_pud_addr_end(addr, end);
                if (!pud_none(*pud)) {
                        if (pud_huge(*pud)) {
+                                pud_t old_pud = *pud;
                                pud_clear(pud);
                                kvm_tlb_flush_vmid_ipa(kvm, addr);
+                                kvm_flush_dcache_pud(old_pud);
                                put_page(virt_to_page(pud));
                        } else {
                                unmap_pmds(kvm, pud, addr, next);
@@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
        pte = pte_offset_kernel(pmd, addr);
        do {
-                if (!pte_none(*pte)) {
+                if (!pte_none(*pte) &&
-                        hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                    (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
-                        kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
+                        kvm_flush_dcache_pte(*pte);
-                }
        } while (pte++, addr += PAGE_SIZE, addr != end);
 }
@@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
        do {
                next = kvm_pmd_addr_end(addr, end);
                if (!pmd_none(*pmd)) {
-                        if (kvm_pmd_huge(*pmd)) {
+                        if (kvm_pmd_huge(*pmd))
-                                hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                                kvm_flush_dcache_pmd(*pmd);
-                                kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+                        else
-                        } else {
                                stage2_flush_ptes(kvm, pmd, addr, next);
-                        }
                }
        } while (pmd++, addr = next, addr != end);
 }
@@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
        do {
                next = kvm_pud_addr_end(addr, end);
                if (!pud_none(*pud)) {
-                        if (pud_huge(*pud)) {
+                        if (pud_huge(*pud))
-                                hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                                kvm_flush_dcache_pud(*pud);
-                                kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+                        else
-                        } else {
                                stage2_flush_pmds(kvm, pud, addr, next);
-                        }
                }
        } while (pud++, addr = next, addr != end);
 }
@@ -278,7 +330,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 * Go through the stage 2 page tables and invalidate any cache lines
 * backing memory already mapped to the VM.
 */
-void stage2_flush_vm(struct kvm *kvm)
+static void stage2_flush_vm(struct kvm *kvm)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
@@ -905,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
        return !pfn_valid(pfn);
 }
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+                                      unsigned long size, bool uncached)
+{
+        __coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_memory_slot *memslot, unsigned long hva,
                          unsigned long fault_status)
@@ -994,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        kvm_set_s2pmd_writable(&new_pmd);
                        kvm_set_pfn_dirty(pfn);
                }
-                coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+                coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
-                                          fault_ipa_uncached);
                ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
        } else {
                pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1003,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        kvm_set_s2pte_writable(&new_pte);
                        kvm_set_pfn_dirty(pfn);
                }
-                coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+                coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
-                                          fault_ipa_uncached);
                ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
                        pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
        }
@@ -1411,3 +1467,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
        unmap_stage2_range(kvm, gpa, size);
        spin_unlock(&kvm->mmu_lock);
 }
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ *
+ * Main problems:
+ * - S/W ops are local to a CPU (not broadcast)
+ * - We have line migration behind our back (speculation)
+ * - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the
+ * S/W to PA mapping, it can only use S/W to nuke the whole cache,
+ * which is a rather good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the cache maintenance instructions associated
+ * with the powerdown and powerup of caches, if this is required by
+ * the implementation.").
+ *
+ * We use the following policy:
+ *
+ * - If we trap a S/W operation, we enable VM trapping to detect
+ *   caches being turned on/off, and do a full clean.
+ *
+ * - We flush the caches on both caches being turned on and off.
+ *
+ * - Once the caches are enabled, we stop trapping VM ops.
+ */
+void kvm_set_way_flush(struct kvm_vcpu *vcpu)
+{
+        unsigned long hcr = vcpu_get_hcr(vcpu);
+        /*
+         * If this is the first time we do a S/W operation
+         * (i.e. HCR_TVM not set) flush the whole memory, and set the
+         * VM trapping.
+         *
+         * Otherwise, rely on the VM trapping to wait for the MMU +
+         * Caches to be turned off. At that point, we'll be able to
+         * clean the caches again.
+         */
+        if (!(hcr & HCR_TVM)) {
+                trace_kvm_set_way_flush(*vcpu_pc(vcpu),
+                                        vcpu_has_cache_enabled(vcpu));
+                stage2_flush_vm(vcpu->kvm);
+                vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+        }
+}
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
+{
+        bool now_enabled = vcpu_has_cache_enabled(vcpu);
+        /*
+         * If switching the MMU+caches on, need to invalidate the caches.
+         * If switching it off, need to clean the caches.
+         * Clean + invalidate does the trick always.
+         */
+        if (now_enabled != was_enabled)
+                stage2_flush_vm(vcpu->kvm);
+        /* Caches are now on, stop trapping VM ops (until a S/W op) */
+        if (now_enabled)
+                vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+        trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
+}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index b1d640f78623..b6a6e7102201 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -223,6 +223,45 @@ TRACE_EVENT(kvm_hvc,
                  __entry->vcpu_pc, __entry->r0, __entry->imm)
 );
+TRACE_EVENT(kvm_set_way_flush,
+            TP_PROTO(unsigned long vcpu_pc, bool cache),
+            TP_ARGS(vcpu_pc, cache),
+            TP_STRUCT__entry(
+                    __field(    unsigned long,  vcpu_pc         )
+                    __field(    bool,           cache           )
+            ),
+            TP_fast_assign(
+                    __entry->vcpu_pc            = vcpu_pc;
+                    __entry->cache              = cache;
+            ),
+            TP_printk("S/W flush at 0x%016lx (cache %s)",
+                      __entry->vcpu_pc, __entry->cache ? "on" : "off")
+);
+TRACE_EVENT(kvm_toggle_cache,
+            TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
+            TP_ARGS(vcpu_pc, was, now),
+            TP_STRUCT__entry(
+                    __field(    unsigned long,  vcpu_pc         )
+                    __field(    bool,           was             )
+                    __field(    bool,           now             )
+            ),
+            TP_fast_assign(
+                    __entry->vcpu_pc            = vcpu_pc;
+                    __entry->was                = was;
+                    __entry->now                = now;
+            ),
+            TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
+                      __entry->vcpu_pc, __entry->was ? "on" : "off",
+                      __entry->now ? "on" : "off")
+);
 #endif /* _TRACE_KVM_H */
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 865a7e28ea2d..3cb4c856b10d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -45,6 +45,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
                vcpu->arch.hcr_el2 &= ~HCR_RW;
 }
+static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
+{
+        return vcpu->arch.hcr_el2;
+}
+static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+{
+        vcpu->arch.hcr_el2 = hcr;
+}
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
        return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0b7dfdb931df..acd101a9014d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -116,9 +116,6 @@ struct kvm_vcpu_arch {
         * Anything that is not used directly from assembly code goes
         * here.
         */
-        /* dcache set/way operation pending */
-        int last_pcpu;
-        cpumask_t require_dcache_flush;
        /* Don't run the guest */
        bool pause;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 14a74f136272..adcf49547301 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -243,24 +243,46 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
        return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
-                                             unsigned long size,
+                                               unsigned long size,
-                                             bool ipa_uncached)
+                                               bool ipa_uncached)
 {
+        void *va = page_address(pfn_to_page(pfn));
        if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
-                kvm_flush_dcache_to_poc((void *)hva, size);
+                kvm_flush_dcache_to_poc(va, size);
        if (!icache_is_aliasing()) {            /* PIPT */
-                flush_icache_range(hva, hva + size);
+                flush_icache_range((unsigned long)va,
+                                   (unsigned long)va + size);
        } else if (!icache_is_aivivt()) {       /* non ASID-tagged VIVT */
                /* any kind of VIPT cache */
                __flush_icache_all();
        }
 }
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+        struct page *page = pte_page(pte);
+        kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+}
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+        struct page *page = pmd_page(pmd);
+        kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+}
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+        struct page *page = pud_page(pud);
+        kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+}
 #define kvm_virt_to_phys(x)             __virt_to_phys((unsigned long)(x))
-void stage2_flush_vm(struct kvm *kvm);
+void kvm_set_way_flush(struct kvm_vcpu *vcpu);
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3d7c2df89946..f31e8bb2bc5b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -69,68 +69,31 @@ static u32 get_ccsidr(u32 csselr)
        return ccsidr;
 }
-static void do_dc_cisw(u32 val)
+/*
-{
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
-        asm volatile("dc cisw, %x0" : : "r" (val));
+ */
-        dsb(ish);
-}
-static void do_dc_csw(u32 val)
-{
-        asm volatile("dc csw, %x0" : : "r" (val));
-        dsb(ish);
-}
-/* See note at ARM ARM B1.14.4 */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
                        const struct sys_reg_params *p,
                        const struct sys_reg_desc *r)
 {
-        unsigned long val;
-        int cpu;
        if (!p->is_write)
                return read_from_write_only(vcpu, p);
-        cpu = get_cpu();
+        kvm_set_way_flush(vcpu);
-        cpumask_setall(&vcpu->arch.require_dcache_flush);
-        cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
-        /* If we were already preempted, take the long way around */
-        if (cpu != vcpu->arch.last_pcpu) {
-                flush_cache_all();
-                goto done;
-        }
-        val = *vcpu_reg(vcpu, p->Rt);
-        switch (p->CRm) {
-        case 6:                 /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
-        case 14:                /* DCCISW */
-                do_dc_cisw(val);
-                break;
-        case 10:                /* DCCSW */
-                do_dc_csw(val);
-                break;
-        }
-done:
-        put_cpu();
        return true;
 }
 /*
 * Generic accessor for VM registers. Only called as long as HCR_TVM
- * is set.
+ * is set. If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
 */
 static bool access_vm_reg(struct kvm_vcpu *vcpu,
                          const struct sys_reg_params *p,
                          const struct sys_reg_desc *r)
 {
        unsigned long val;
+        bool was_enabled = vcpu_has_cache_enabled(vcpu);
        BUG_ON(!p->is_write);
@@ -143,25 +106,7 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
                vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
        }
-        return true;
+        kvm_toggle_cache(vcpu, was_enabled);
-}
-/*
- * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set.  If the
- * guest enables the MMU, we stop trapping the VM sys_regs and leave
- * it in complete control of the caches.
- */
-static bool access_sctlr(struct kvm_vcpu *vcpu,
-                         const struct sys_reg_params *p,
-                         const struct sys_reg_desc *r)
-{
-        access_vm_reg(vcpu, p, r);
-        if (vcpu_has_cache_enabled(vcpu)) {     /* MMU+Caches enabled? */
-                vcpu->arch.hcr_el2 &= ~HCR_TVM;
-                stage2_flush_vm(vcpu->kvm);
-        }
        return true;
 }
@@ -377,7 +322,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
          NULL, reset_mpidr, MPIDR_EL1 },
        /* SCTLR_EL1 */
        { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
-          access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
+          access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
        /* CPACR_EL1 */
        { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
          NULL, reset_val, CPACR_EL1, 0 },
@@ -657,7 +602,7 @@ static const struct sys_reg_desc cp14_64_regs[] = {
 * register).
 */
 static const struct sys_reg_desc cp15_regs[] = {
-        { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
+        { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
        { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
        { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
        { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4f0c0b954686..d52dcf0776ea 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -192,6 +192,9 @@ static void recalculate_apic_map(struct kvm *kvm)
                u16 cid, lid;
                u32 ldr, aid;
+                if (!kvm_apic_present(vcpu))
+                        continue;
                aid = kvm_apic_id(apic);
                ldr = kvm_apic_get_reg(apic, APIC_LDR);
                cid = apic_cluster_id(new, ldr);
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-01-30 13:45:24 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-01-30 13:45:24 -0500
commit	1f59fe76678b49fc406c6d7b63dec4abe60a0547 (patch)
tree	a96429e2a8471a49d885947bd74d931e659c3304
parent	f3a3404162774770ded2ba1494a4c8ba27cd553e (diff)
parent	df04d1d191a5fea628981067e7cb7da33b246e89 (diff)