kvm: arm/arm64: Fix use after free of stage2 page table

We yield the kvm->mmu_lock occassionaly while performing an operation (e.g, unmap or permission changes) on a large area of stage2 mappings. However this could possibly cause another thread to clear and free up the stage2 page tables while we were waiting for regaining the lock and thus the original thread could end up in accessing memory that was freed. This patch fixes the problem by making sure that the stage2 pagetable is still valid after we regain the lock. The fact that mmu_notifer->release() could be called twice (via __mmu_notifier_release and mmu_notifier_unregsister) enhances the possibility of hitting this race where there are two threads trying to unmap the entire guest shadow pages. While at it, cleanup the redudant checks around cond_resched_lock in stage2_wp_range(), as cond_resched_lock already does the same checks. Cc: Mark Rutland <mark.rutland@arm.com> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: andreyknvl@google.com Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: stable@vger.kernel.org Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com> Reviewed-by: Christoffer Dall <cdall@linaro.org> Signed-off-by: Christoffer Dall <cdall@linaro.org>
author: Suzuki K Poulose <suzuki.poulose@arm.com> 2017-05-16 05:34:55 -0400
committer: Christoffer Dall <cdall@linaro.org> 2017-05-16 05:54:25 -0400
commit: 0c428a6a9256fcd66817e12db32a50b405ed2e5c (patch)
tree: ec43e322b3094edbc6808ef31cd404d9b127db51
parent: 2952a6070e07ebdd5896f1f5b861acad677caded (diff)
1 files changed, 13 insertions, 4 deletions
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 704e35f312a4..a2d63247d1bb 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -295,6 +295,13 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
        assert_spin_locked(&kvm->mmu_lock);
        pgd = kvm->arch.pgd + stage2_pgd_index(addr);
        do {
+                /*
+                 * Make sure the page table is still active, as another thread
+                 * could have possibly freed the page table, while we released
+                 * the lock.
+                 */
+                if (!READ_ONCE(kvm->arch.pgd))
+                        break;
                next = stage2_pgd_addr_end(addr, end);
                if (!stage2_pgd_none(*pgd))
                        unmap_stage2_puds(kvm, pgd, addr, next);
@@ -1170,11 +1177,13 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
                 * large. Otherwise, we may see kernel panics with
                 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
                 * CONFIG_LOCKDEP. Additionally, holding the lock too long
-                 * will also starve other vCPUs.
+                 * will also starve other vCPUs. We have to also make sure
+                 * that the page tables are not freed while we released
+                 * the lock.
                 */
-                if (need_resched() || spin_needbreak(&kvm->mmu_lock))
+                cond_resched_lock(&kvm->mmu_lock);
-                        cond_resched_lock(&kvm->mmu_lock);
+                if (!READ_ONCE(kvm->arch.pgd))
+                        break;
                next = stage2_pgd_addr_end(addr, end);
                if (stage2_pgd_present(*pgd))
                        stage2_wp_puds(pgd, addr, next);
author	Suzuki K Poulose <suzuki.poulose@arm.com>	2017-05-16 05:34:55 -0400
committer	Christoffer Dall <cdall@linaro.org>	2017-05-16 05:54:25 -0400
commit	0c428a6a9256fcd66817e12db32a50b405ed2e5c (patch)
tree	ec43e322b3094edbc6808ef31cd404d9b127db51
parent	2952a6070e07ebdd5896f1f5b861acad677caded (diff)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 704e35f312a4..a2d63247d1bb 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c
@@ -295,6 +295,13 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
295	assert_spin_locked(&kvm->mmu_lock);	295	assert_spin_locked(&kvm->mmu_lock);
296	pgd = kvm->arch.pgd + stage2_pgd_index(addr);	296	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
297	do {	297	do {
		298	/*
		299	* Make sure the page table is still active, as another thread
		300	* could have possibly freed the page table, while we released
		301	* the lock.
		302	*/
		303	if (!READ_ONCE(kvm->arch.pgd))
		304	break;
298	next = stage2_pgd_addr_end(addr, end);	305	next = stage2_pgd_addr_end(addr, end);
299	if (!stage2_pgd_none(*pgd))	306	if (!stage2_pgd_none(*pgd))
300	unmap_stage2_puds(kvm, pgd, addr, next);	307	unmap_stage2_puds(kvm, pgd, addr, next);
@@ -1170,11 +1177,13 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1170	* large. Otherwise, we may see kernel panics with	1177	* large. Otherwise, we may see kernel panics with
1171	* CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,	1178	* CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
1172	* CONFIG_LOCKDEP. Additionally, holding the lock too long	1179	* CONFIG_LOCKDEP. Additionally, holding the lock too long
1173	* will also starve other vCPUs.	1180	* will also starve other vCPUs. We have to also make sure
		1181	* that the page tables are not freed while we released
		1182	* the lock.
1174	*/	1183	*/
1175	if (need_resched() \|\| spin_needbreak(&kvm->mmu_lock))	1184	cond_resched_lock(&kvm->mmu_lock);
1176	cond_resched_lock(&kvm->mmu_lock);	1185	if (!READ_ONCE(kvm->arch.pgd))
1177		1186	break;
1178	next = stage2_pgd_addr_end(addr, end);	1187	next = stage2_pgd_addr_end(addr, end);
1179	if (stage2_pgd_present(*pgd))	1188	if (stage2_pgd_present(*pgd))
1180	stage2_wp_puds(pgd, addr, next);	1189	stage2_wp_puds(pgd, addr, next);