arm/arm64: KVM: Use kernel mapping to perform invalidation on page fault

When handling a fault in stage-2, we need to resync I$ and D$, just to be sure we don't leave any old cache line behind. That's very good, except that we do so using the *user* address. Under heavy load (swapping like crazy), we may end up in a situation where the page gets mapped in stage-2 while being unmapped from userspace by another CPU. At that point, the DC/IC instructions can generate a fault, which we handle with kvm->mmu_lock held. The box quickly deadlocks, user is unhappy. Instead, perform this invalidation through the kernel mapping, which is guaranteed to be present. The box is much happier, and so am I. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
author: Marc Zyngier <marc.zyngier@arm.com> 2015-01-05 16:13:24 -0500
committer: Christoffer Dall <christoffer.dall@linaro.org> 2015-01-29 17:24:57 -0500
commit: 0d3e4d4fade6b04e933b11e69e80044f35e9cd60 (patch)
tree: 57b219954fcb587cd0bbf8b3a4abbe40c269a2da /arch/arm
parent: 363ef89f8e9bcedc28b976d0fe2d858fe139c122 (diff)
2 files changed, 42 insertions, 13 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 552c31f5a3f7..1bca8f8af442 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -162,13 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
        return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
 }
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
-                                             unsigned long size,
+                                               unsigned long size,
-                                             bool ipa_uncached)
+                                               bool ipa_uncached)
 {
-        if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
-                kvm_flush_dcache_to_poc((void *)hva, size);
-        
        /*
         * If we are going to insert an instruction page and the icache is
         * either VIPT or PIPT, there is a potential problem where the host
@@ -180,10 +177,38 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
         *
         * VIVT caches are tagged using both the ASID and the VMID and doesn't
         * need any kind of flushing (DDI 0406C.b - Page B3-1392).
+         *
+         * We need to do this through a kernel mapping (using the
+         * user-space mapping has proved to be the wrong
+         * solution). For that, we need to kmap one page at a time,
+         * and iterate over the range.
         */
-        if (icache_is_pipt()) {
-                __cpuc_coherent_user_range(hva, hva + size);
+        bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
-        } else if (!icache_is_vivt_asid_tagged()) {
+        VM_BUG_ON(size & PAGE_MASK);
+        if (!need_flush && !icache_is_pipt())
+                goto vipt_cache;
+        while (size) {
+                void *va = kmap_atomic_pfn(pfn);
+                if (need_flush)
+                        kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+                if (icache_is_pipt())
+                        __cpuc_coherent_user_range((unsigned long)va,
+                                                   (unsigned long)va + PAGE_SIZE);
+                size -= PAGE_SIZE;
+                pfn++;
+                kunmap_atomic(va);
+        }
+vipt_cache:
+        if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
                /* any kind of VIPT cache */
                __flush_icache_all();
        }
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 78e68abcb01f..136662547ca6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -957,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
        return !pfn_valid(pfn);
 }
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+                                      unsigned long size, bool uncached)
+{
+        __coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_memory_slot *memslot, unsigned long hva,
                          unsigned long fault_status)
@@ -1046,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        kvm_set_s2pmd_writable(&new_pmd);
                        kvm_set_pfn_dirty(pfn);
                }
-                coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+                coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
-                                          fault_ipa_uncached);
                ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
        } else {
                pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1055,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        kvm_set_s2pte_writable(&new_pte);
                        kvm_set_pfn_dirty(pfn);
                }
-                coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+                coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
-                                          fault_ipa_uncached);
                ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
                        pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
        }
author	Marc Zyngier <marc.zyngier@arm.com>	2015-01-05 16:13:24 -0500
committer	Christoffer Dall <christoffer.dall@linaro.org>	2015-01-29 17:24:57 -0500
commit	0d3e4d4fade6b04e933b11e69e80044f35e9cd60 (patch)
tree	57b219954fcb587cd0bbf8b3a4abbe40c269a2da /arch/arm
parent	363ef89f8e9bcedc28b976d0fe2d858fe139c122 (diff)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 552c31f5a3f7..1bca8f8af442 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h
@@ -162,13 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
162	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;	162	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
163	}	163	}
164		164
165	static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,	165	static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
166	unsigned long size,	166	unsigned long size,
167	bool ipa_uncached)	167	bool ipa_uncached)
168	{	168	{
169	if (!vcpu_has_cache_enabled(vcpu) \|\| ipa_uncached)
170	kvm_flush_dcache_to_poc((void *)hva, size);
171
172	/*	169	/*
173	* If we are going to insert an instruction page and the icache is	170	* If we are going to insert an instruction page and the icache is
174	* either VIPT or PIPT, there is a potential problem where the host	171	* either VIPT or PIPT, there is a potential problem where the host
@@ -180,10 +177,38 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
180	*	177	*
181	* VIVT caches are tagged using both the ASID and the VMID and doesn't	178	* VIVT caches are tagged using both the ASID and the VMID and doesn't
182	* need any kind of flushing (DDI 0406C.b - Page B3-1392).	179	* need any kind of flushing (DDI 0406C.b - Page B3-1392).
		180	*
		181	* We need to do this through a kernel mapping (using the
		182	* user-space mapping has proved to be the wrong
		183	* solution). For that, we need to kmap one page at a time,
		184	* and iterate over the range.
183	*/	185	*/
184	if (icache_is_pipt()) {	186
185	__cpuc_coherent_user_range(hva, hva + size);	187	bool need_flush = !vcpu_has_cache_enabled(vcpu) \|\| ipa_uncached;
186	} else if (!icache_is_vivt_asid_tagged()) {	188
		189	VM_BUG_ON(size & PAGE_MASK);
		190
		191	if (!need_flush && !icache_is_pipt())
		192	goto vipt_cache;
		193
		194	while (size) {
		195	void *va = kmap_atomic_pfn(pfn);
		196
		197	if (need_flush)
		198	kvm_flush_dcache_to_poc(va, PAGE_SIZE);
		199
		200	if (icache_is_pipt())
		201	__cpuc_coherent_user_range((unsigned long)va,
		202	(unsigned long)va + PAGE_SIZE);
		203
		204	size -= PAGE_SIZE;
		205	pfn++;
		206
		207	kunmap_atomic(va);
		208	}
		209
		210	vipt_cache:
		211	if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
187	/* any kind of VIPT cache */	212	/* any kind of VIPT cache */
188	__flush_icache_all();	213	__flush_icache_all();
189	}	214	}


diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 78e68abcb01f..136662547ca6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c
@@ -957,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
957	return !pfn_valid(pfn);	957	return !pfn_valid(pfn);
958	}	958	}
959		959
		960	static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
		961	unsigned long size, bool uncached)
		962	{
		963	__coherent_cache_guest_page(vcpu, pfn, size, uncached);
		964	}
		965
960	static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,	966	static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
961	struct kvm_memory_slot *memslot, unsigned long hva,	967	struct kvm_memory_slot *memslot, unsigned long hva,
962	unsigned long fault_status)	968	unsigned long fault_status)
@@ -1046,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1046	kvm_set_s2pmd_writable(&new_pmd);	1052	kvm_set_s2pmd_writable(&new_pmd);
1047	kvm_set_pfn_dirty(pfn);	1053	kvm_set_pfn_dirty(pfn);
1048	}	1054	}
1049	coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,	1055	coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
1050	fault_ipa_uncached);
1051	ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);	1056	ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1052	} else {	1057	} else {
1053	pte_t new_pte = pfn_pte(pfn, mem_type);	1058	pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1055,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1055	kvm_set_s2pte_writable(&new_pte);	1060	kvm_set_s2pte_writable(&new_pte);
1056	kvm_set_pfn_dirty(pfn);	1061	kvm_set_pfn_dirty(pfn);
1057	}	1062	}
1058	coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,	1063	coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
1059	fault_ipa_uncached);
1060	ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,	1064	ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
1061	pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));	1065	pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
1062	}	1066	}