KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page

At the moment we call kvmppc_pin_guest_page() in kvmppc_update_vpa() with two spinlocks held: the vcore lock and the vcpu->vpa_update_lock. This is not good, since kvmppc_pin_guest_page() calls down_read() and get_user_pages_fast(), both of which can sleep. This bug was introduced in 2e25aa5f ("KVM: PPC: Book3S HV: Make virtual processor area registration more robust"). This arranges to drop those spinlocks before calling kvmppc_pin_guest_page() and re-take them afterwards. Dropping the vcore lock in kvmppc_run_core() means we have to set the vcore_state field to VCORE_RUNNING before we drop the lock, so that other vcpus won't try to run this vcore. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
author: Paul Mackerras <paulus@samba.org> 2012-06-01 06:20:24 -0400
committer: Avi Kivity <avi@redhat.com> 2012-06-19 08:04:13 -0400
commit: 081f323bd3cc3a4d5ee6276e53cc52eddfc20a63 (patch)
tree: 057cb2a7b22d55cd50275a5a5dff6b8ce56abbb7 /arch/powerpc/kvm
parent: f961f72836eb6c0fd76201f6f6b2fafff93c4cea (diff)
1 files changed, 66 insertions, 30 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c6af1d62383..3abe1b86e58 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -268,24 +268,45 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
        return err;
 }
-static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
 {
+        struct kvm *kvm = vcpu->kvm;
        void *va;
        unsigned long nb;
+        unsigned long gpa;
-        vpap->update_pending = 0;
+        /*
-        va = NULL;
+         * We need to pin the page pointed to by vpap->next_gpa,
-        if (vpap->next_gpa) {
+         * but we can't call kvmppc_pin_guest_page under the lock
-                va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+         * as it does get_user_pages() and down_read().  So we
-                if (nb < vpap->len) {
+         * have to drop the lock, pin the page, then get the lock
-                        /*
+         * again and check that a new area didn't get registered
-                         * If it's now too short, it must be that userspace
+         * in the meantime.
-                         * has changed the mappings underlying guest memory,
+         */
-                         * so unregister the region.
+        for (;;) {
-                         */
+                gpa = vpap->next_gpa;
+                spin_unlock(&vcpu->arch.vpa_update_lock);
+                va = NULL;
+                nb = 0;
+                if (gpa)
+                        va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+                spin_lock(&vcpu->arch.vpa_update_lock);
+                if (gpa == vpap->next_gpa)
+                        break;
+                /* sigh... unpin that one and try again */
+                if (va)
                        kvmppc_unpin_guest_page(kvm, va);
-                        va = NULL;
+        }
-                }
+        vpap->update_pending = 0;
+        if (va && nb < vpap->len) {
+                /*
+                 * If it's now too short, it must be that userspace
+                 * has changed the mappings underlying guest memory,
+                 * so unregister the region.
+                 */
+                kvmppc_unpin_guest_page(kvm, va);
+                va = NULL;
        }
        if (vpap->pinned_addr)
                kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
@@ -296,20 +317,18 @@ static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
 static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 {
-        struct kvm *kvm = vcpu->kvm;
        spin_lock(&vcpu->arch.vpa_update_lock);
        if (vcpu->arch.vpa.update_pending) {
-                kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
+                kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
                init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
        }
        if (vcpu->arch.dtl.update_pending) {
-                kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
+                kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
                vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
                vcpu->arch.dtl_index = 0;
        }
        if (vcpu->arch.slb_shadow.update_pending)
-                kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
+                kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
        spin_unlock(&vcpu->arch.vpa_update_lock);
 }
@@ -800,12 +819,39 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
        struct kvm_vcpu *vcpu, *vcpu0, *vnext;
        long ret;
        u64 now;
-        int ptid, i;
+        int ptid, i, need_vpa_update;
        /* don't start if any threads have a signal pending */
-        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+        need_vpa_update = 0;
+        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                if (signal_pending(vcpu->arch.run_task))
                        return 0;
+                need_vpa_update |= vcpu->arch.vpa.update_pending |
+                        vcpu->arch.slb_shadow.update_pending |
+                        vcpu->arch.dtl.update_pending;
+        }
+        /*
+         * Initialize *vc, in particular vc->vcore_state, so we can
+         * drop the vcore lock if necessary.
+         */
+        vc->n_woken = 0;
+        vc->nap_count = 0;
+        vc->entry_exit_count = 0;
+        vc->vcore_state = VCORE_RUNNING;
+        vc->in_guest = 0;
+        vc->napping_threads = 0;
+        /*
+         * Updating any of the vpas requires calling kvmppc_pin_guest_page,
+         * which can't be called with any spinlocks held.
+         */
+        if (need_vpa_update) {
+                spin_unlock(&vc->lock);
+                list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+                        kvmppc_update_vpas(vcpu);
+                spin_lock(&vc->lock);
+        }
        /*
         * Make sure we are running on thread 0, and that
@@ -838,20 +884,10 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
                if (vcpu->arch.ceded)
                        vcpu->arch.ptid = ptid++;
-        vc->n_woken = 0;
-        vc->nap_count = 0;
-        vc->entry_exit_count = 0;
-        vc->vcore_state = VCORE_RUNNING;
        vc->stolen_tb += mftb() - vc->preempt_tb;
-        vc->in_guest = 0;
        vc->pcpu = smp_processor_id();
-        vc->napping_threads = 0;
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                kvmppc_start_thread(vcpu);
-                if (vcpu->arch.vpa.update_pending ||
-                    vcpu->arch.slb_shadow.update_pending ||
-                    vcpu->arch.dtl.update_pending)
-                        kvmppc_update_vpas(vcpu);
                kvmppc_create_dtl_entry(vcpu, vc);
        }
        /* Grab any remaining hw threads so they can't go into the kernel */
author	Paul Mackerras <paulus@samba.org>	2012-06-01 06:20:24 -0400
committer	Avi Kivity <avi@redhat.com>	2012-06-19 08:04:13 -0400
commit	081f323bd3cc3a4d5ee6276e53cc52eddfc20a63 (patch)
tree	057cb2a7b22d55cd50275a5a5dff6b8ce56abbb7 /arch/powerpc/kvm
parent	f961f72836eb6c0fd76201f6f6b2fafff93c4cea (diff)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c6af1d62383..3abe1b86e58 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c
@@ -268,24 +268,45 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
268	return err;	268	return err;
269	}	269	}
270		270
271	static void kvmppc_update_vpa(struct kvm kvm, struct kvmppc_vpa vpap)	271	static void kvmppc_update_vpa(struct kvm_vcpu vcpu, struct kvmppc_vpa vpap)
272	{	272	{
		273	struct kvm *kvm = vcpu->kvm;
273	void *va;	274	void *va;
274	unsigned long nb;	275	unsigned long nb;
		276	unsigned long gpa;
275		277
276	vpap->update_pending = 0;	278	/*
277	va = NULL;	279	* We need to pin the page pointed to by vpap->next_gpa,
278	if (vpap->next_gpa) {	280	* but we can't call kvmppc_pin_guest_page under the lock
279	va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);	281	* as it does get_user_pages() and down_read(). So we
280	if (nb < vpap->len) {	282	* have to drop the lock, pin the page, then get the lock
281	/*	283	* again and check that a new area didn't get registered
282	* If it's now too short, it must be that userspace	284	* in the meantime.
283	* has changed the mappings underlying guest memory,	285	*/
284	* so unregister the region.	286	for (;;) {
285	*/	287	gpa = vpap->next_gpa;
		288	spin_unlock(&vcpu->arch.vpa_update_lock);
		289	va = NULL;
		290	nb = 0;
		291	if (gpa)
		292	va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
		293	spin_lock(&vcpu->arch.vpa_update_lock);
		294	if (gpa == vpap->next_gpa)
		295	break;
		296	/* sigh... unpin that one and try again */
		297	if (va)
286	kvmppc_unpin_guest_page(kvm, va);	298	kvmppc_unpin_guest_page(kvm, va);
287	va = NULL;	299	}
288	}	300
		301	vpap->update_pending = 0;
		302	if (va && nb < vpap->len) {
		303	/*
		304	* If it's now too short, it must be that userspace
		305	* has changed the mappings underlying guest memory,
		306	* so unregister the region.
		307	*/
		308	kvmppc_unpin_guest_page(kvm, va);
		309	va = NULL;
289	}	310	}
290	if (vpap->pinned_addr)	311	if (vpap->pinned_addr)
291	kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);	312	kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
@@ -296,20 +317,18 @@ static void kvmppc_update_vpa(struct kvm kvm, struct kvmppc_vpa vpap)
296		317
297	static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)	318	static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
298	{	319	{
299	struct kvm *kvm = vcpu->kvm;
300
301	spin_lock(&vcpu->arch.vpa_update_lock);	320	spin_lock(&vcpu->arch.vpa_update_lock);
302	if (vcpu->arch.vpa.update_pending) {	321	if (vcpu->arch.vpa.update_pending) {
303	kvmppc_update_vpa(kvm, &vcpu->arch.vpa);	322	kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
304	init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);	323	init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
305	}	324	}
306	if (vcpu->arch.dtl.update_pending) {	325	if (vcpu->arch.dtl.update_pending) {
307	kvmppc_update_vpa(kvm, &vcpu->arch.dtl);	326	kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
308	vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;	327	vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
309	vcpu->arch.dtl_index = 0;	328	vcpu->arch.dtl_index = 0;
310	}	329	}
311	if (vcpu->arch.slb_shadow.update_pending)	330	if (vcpu->arch.slb_shadow.update_pending)
312	kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);	331	kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
313	spin_unlock(&vcpu->arch.vpa_update_lock);	332	spin_unlock(&vcpu->arch.vpa_update_lock);
314	}	333	}
315		334
@@ -800,12 +819,39 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
800	struct kvm_vcpu vcpu, vcpu0, *vnext;	819	struct kvm_vcpu vcpu, vcpu0, *vnext;
801	long ret;	820	long ret;
802	u64 now;	821	u64 now;
803	int ptid, i;	822	int ptid, i, need_vpa_update;
804		823
805	/* don't start if any threads have a signal pending */	824	/* don't start if any threads have a signal pending */
806	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)	825	need_vpa_update = 0;
		826	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
807	if (signal_pending(vcpu->arch.run_task))	827	if (signal_pending(vcpu->arch.run_task))
808	return 0;	828	return 0;
		829	need_vpa_update \|= vcpu->arch.vpa.update_pending \|
		830	vcpu->arch.slb_shadow.update_pending \|
		831	vcpu->arch.dtl.update_pending;
		832	}
		833
		834	/*
		835	* Initialize *vc, in particular vc->vcore_state, so we can
		836	* drop the vcore lock if necessary.
		837	*/
		838	vc->n_woken = 0;
		839	vc->nap_count = 0;
		840	vc->entry_exit_count = 0;
		841	vc->vcore_state = VCORE_RUNNING;
		842	vc->in_guest = 0;
		843	vc->napping_threads = 0;
		844
		845	/*
		846	* Updating any of the vpas requires calling kvmppc_pin_guest_page,
		847	* which can't be called with any spinlocks held.
		848	*/
		849	if (need_vpa_update) {
		850	spin_unlock(&vc->lock);
		851	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
		852	kvmppc_update_vpas(vcpu);
		853	spin_lock(&vc->lock);
		854	}
809		855
810	/*	856	/*
811	* Make sure we are running on thread 0, and that	857	* Make sure we are running on thread 0, and that
@@ -838,20 +884,10 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
838	if (vcpu->arch.ceded)	884	if (vcpu->arch.ceded)
839	vcpu->arch.ptid = ptid++;	885	vcpu->arch.ptid = ptid++;
840		886
841	vc->n_woken = 0;
842	vc->nap_count = 0;
843	vc->entry_exit_count = 0;
844	vc->vcore_state = VCORE_RUNNING;
845	vc->stolen_tb += mftb() - vc->preempt_tb;	887	vc->stolen_tb += mftb() - vc->preempt_tb;
846	vc->in_guest = 0;
847	vc->pcpu = smp_processor_id();	888	vc->pcpu = smp_processor_id();
848	vc->napping_threads = 0;
849	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {	889	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
850	kvmppc_start_thread(vcpu);	890	kvmppc_start_thread(vcpu);
851	if (vcpu->arch.vpa.update_pending \|\|
852	vcpu->arch.slb_shadow.update_pending \|\|
853	vcpu->arch.dtl.update_pending)
854	kvmppc_update_vpas(vcpu);
855	kvmppc_create_dtl_entry(vcpu, vc);	891	kvmppc_create_dtl_entry(vcpu, vc);
856	}	892	}
857	/* Grab any remaining hw threads so they can't go into the kernel */	893	/* Grab any remaining hw threads so they can't go into the kernel */