KVM: PPC: Allow book3s_hv guests to use SMT processor modes

This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
author: Paul Mackerras <paulus@samba.org> 2011-06-28 20:23:08 -0400
committer: Avi Kivity <avi@redhat.com> 2011-07-12 06:16:57 -0400
commit: 371fefd6f2dc46668e00871930dde613b88d4bde (patch)
tree: 35fe799343861405914d27873eb175eb04d6dce5 /arch/powerpc/kvm/book3s_hv.c
parent: 54738c097163c3f01e67ccc85462b78d4d4f495f (diff)
1 files changed, 289 insertions, 27 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6fe469eabce8..36b6d98f1197 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -39,6 +39,7 @@
 #include <asm/mmu_context.h>
 #include <asm/lppaca.h>
 #include <asm/processor.h>
+#include <asm/cputhreads.h>
 #include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
@@ -51,12 +52,16 @@
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        local_paca->kvm_hstate.kvm_vcpu = vcpu;
+        local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
 }
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
 }
+static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
+static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
 void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
 {
        u64 now;
@@ -74,11 +79,15 @@ void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
                              HRTIMER_MODE_REL);
        }
+        kvmppc_vcpu_blocked(vcpu);
        kvm_vcpu_block(vcpu);
        vcpu->stat.halt_wakeup++;
        if (vcpu->arch.dec_expires != ~(u64)0)
                hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+        kvmppc_vcpu_unblocked(vcpu);
 }
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -429,9 +438,16 @@ int kvmppc_core_check_processor_compat(void)
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
        struct kvm_vcpu *vcpu;
-        int err = -ENOMEM;
+        int err = -EINVAL;
+        int core;
+        struct kvmppc_vcore *vcore;
        unsigned long lpcr;
+        core = id / threads_per_core;
+        if (core >= KVM_MAX_VCORES)
+                goto out;
+        err = -ENOMEM;
        vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
        if (!vcpu)
                goto out;
@@ -454,6 +470,38 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        kvmppc_mmu_book3s_hv_init(vcpu);
+        /*
+         * Some vcpus may start out in stopped state.  If we initialize
+         * them to busy-in-host state they will stop other vcpus in the
+         * vcore from running.  Instead we initialize them to blocked
+         * state, effectively considering them to be stopped until we
+         * see the first run ioctl for them.
+         */
+        vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
+        init_waitqueue_head(&vcpu->arch.cpu_run);
+        mutex_lock(&kvm->lock);
+        vcore = kvm->arch.vcores[core];
+        if (!vcore) {
+                vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+                if (vcore) {
+                        INIT_LIST_HEAD(&vcore->runnable_threads);
+                        spin_lock_init(&vcore->lock);
+                }
+                kvm->arch.vcores[core] = vcore;
+        }
+        mutex_unlock(&kvm->lock);
+        if (!vcore)
+                goto free_vcpu;
+        spin_lock(&vcore->lock);
+        ++vcore->num_threads;
+        ++vcore->n_blocked;
+        spin_unlock(&vcore->lock);
+        vcpu->arch.vcore = vcore;
        return vcpu;
 free_vcpu:
@@ -468,21 +516,121 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
        kfree(vcpu);
 }
+static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
+{
+        struct kvmppc_vcore *vc = vcpu->arch.vcore;
+        spin_lock(&vc->lock);
+        vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
+        ++vc->n_blocked;
+        if (vc->n_runnable > 0 &&
+            vc->n_runnable + vc->n_blocked == vc->num_threads) {
+                vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
+                                        arch.run_list);
+                wake_up(&vcpu->arch.cpu_run);
+        }
+        spin_unlock(&vc->lock);
+}
+static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
+{
+        struct kvmppc_vcore *vc = vcpu->arch.vcore;
+        spin_lock(&vc->lock);
+        vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+        --vc->n_blocked;
+        spin_unlock(&vc->lock);
+}
 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+extern void xics_wake_cpu(int cpu);
-static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
+                                   struct kvm_vcpu *vcpu)
 {
-        u64 now;
+        struct kvm_vcpu *v;
-        if (signal_pending(current)) {
+        if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
-                run->exit_reason = KVM_EXIT_INTR;
+                return;
-                return -EINTR;
+        vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+        --vc->n_runnable;
+        /* decrement the physical thread id of each following vcpu */
+        v = vcpu;
+        list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
+                --v->arch.ptid;
+        list_del(&vcpu->arch.run_list);
+}
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+{
+        int cpu;
+        struct paca_struct *tpaca;
+        struct kvmppc_vcore *vc = vcpu->arch.vcore;
+        cpu = vc->pcpu + vcpu->arch.ptid;
+        tpaca = &paca[cpu];
+        tpaca->kvm_hstate.kvm_vcpu = vcpu;
+        tpaca->kvm_hstate.kvm_vcore = vc;
+        smp_wmb();
+#ifdef CONFIG_PPC_ICP_NATIVE
+        if (vcpu->arch.ptid) {
+                tpaca->cpu_start = 0x80;
+                tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
+                wmb();
+                xics_wake_cpu(cpu);
+                ++vc->n_woken;
        }
+#endif
+}
-        flush_fp_to_thread(current);
+static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
-        flush_altivec_to_thread(current);
+{
-        flush_vsx_to_thread(current);
+        int i;
-        preempt_disable();
+        HMT_low();
+        i = 0;
+        while (vc->nap_count < vc->n_woken) {
+                if (++i >= 1000000) {
+                        pr_err("kvmppc_wait_for_nap timeout %d %d\n",
+                               vc->nap_count, vc->n_woken);
+                        break;
+                }
+                cpu_relax();
+        }
+        HMT_medium();
+}
+/*
+ * Check that we are on thread 0 and that any other threads in
+ * this core are off-line.
+ */
+static int on_primary_thread(void)
+{
+        int cpu = smp_processor_id();
+        int thr = cpu_thread_in_core(cpu);
+        if (thr)
+                return 0;
+        while (++thr < threads_per_core)
+                if (cpu_online(cpu + thr))
+                        return 0;
+        return 1;
+}
+/*
+ * Run a set of guest threads on a physical core.
+ * Called with vc->lock held.
+ */
+static int kvmppc_run_core(struct kvmppc_vcore *vc)
+{
+        struct kvm_vcpu *vcpu, *vnext;
+        long ret;
+        u64 now;
+        /* don't start if any threads have a signal pending */
+        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+                if (signal_pending(vcpu->arch.run_task))
+                        return 0;
        /*
         * Make sure we are running on thread 0, and that
@@ -490,36 +638,150 @@ static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu)
         * XXX we should also block attempts to bring any
         * secondary threads online.
         */
-        if (threads_per_core > 1) {
+        if (threads_per_core > 1 && !on_primary_thread()) {
-                int cpu = smp_processor_id();
+                list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
-                int thr = cpu_thread_in_core(cpu);
+                        vcpu->arch.ret = -EBUSY;
+                goto out;
-                if (thr)
-                        goto out;
-                while (++thr < threads_per_core)
-                        if (cpu_online(cpu + thr))
-                                goto out;
        }
-        kvm_guest_enter();
+        vc->n_woken = 0;
+        vc->nap_count = 0;
+        vc->entry_exit_count = 0;
+        vc->vcore_running = 1;
+        vc->in_guest = 0;
+        vc->pcpu = smp_processor_id();
+        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+                kvmppc_start_thread(vcpu);
+        vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
+                                arch.run_list);
+        spin_unlock(&vc->lock);
+        preempt_disable();
+        kvm_guest_enter();
        __kvmppc_vcore_entry(NULL, vcpu);
+        /* wait for secondary threads to finish writing their state to memory */
+        spin_lock(&vc->lock);
+        if (vc->nap_count < vc->n_woken)
+                kvmppc_wait_for_nap(vc);
+        /* prevent other vcpu threads from doing kvmppc_start_thread() now */
+        vc->vcore_running = 2;
+        spin_unlock(&vc->lock);
+        /* make sure updates to secondary vcpu structs are visible now */
+        smp_mb();
        kvm_guest_exit();
        preempt_enable();
        kvm_resched(vcpu);
        now = get_tb();
-        /* cancel pending dec exception if dec is positive */
+        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-        if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
+                /* cancel pending dec exception if dec is positive */
-                kvmppc_core_dequeue_dec(vcpu);
+                if (now < vcpu->arch.dec_expires &&
+                    kvmppc_core_pending_dec(vcpu))
-        return kvmppc_handle_exit(run, vcpu, current);
+                        kvmppc_core_dequeue_dec(vcpu);
+                if (!vcpu->arch.trap) {
+                        if (signal_pending(vcpu->arch.run_task)) {
+                                vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+                                vcpu->arch.ret = -EINTR;
+                        }
+                        continue;               /* didn't get to run */
+                }
+                ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
+                                         vcpu->arch.run_task);
+                vcpu->arch.ret = ret;
+                vcpu->arch.trap = 0;
+        }
+        spin_lock(&vc->lock);
 out:
-        preempt_enable();
+        vc->vcore_running = 0;
-        return -EBUSY;
+        list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+                                 arch.run_list) {
+                if (vcpu->arch.ret != RESUME_GUEST) {
+                        kvmppc_remove_runnable(vc, vcpu);
+                        wake_up(&vcpu->arch.cpu_run);
+                }
+        }
+        return 1;
+}
+static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+        int ptid;
+        int wait_state;
+        struct kvmppc_vcore *vc;
+        DEFINE_WAIT(wait);
+        /* No need to go into the guest when all we do is going out */
+        if (signal_pending(current)) {
+                kvm_run->exit_reason = KVM_EXIT_INTR;
+                return -EINTR;
+        }
+        kvm_run->exit_reason = 0;
+        vcpu->arch.ret = RESUME_GUEST;
+        vcpu->arch.trap = 0;
+        flush_fp_to_thread(current);
+        flush_altivec_to_thread(current);
+        flush_vsx_to_thread(current);
+        /*
+         * Synchronize with other threads in this virtual core
+         */
+        vc = vcpu->arch.vcore;
+        spin_lock(&vc->lock);
+        /* This happens the first time this is called for a vcpu */
+        if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
+                --vc->n_blocked;
+        vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+        ptid = vc->n_runnable;
+        vcpu->arch.run_task = current;
+        vcpu->arch.kvm_run = kvm_run;
+        vcpu->arch.ptid = ptid;
+        list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+        ++vc->n_runnable;
+        wait_state = TASK_INTERRUPTIBLE;
+        while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+                if (signal_pending(current)) {
+                        if (!vc->vcore_running) {
+                                kvm_run->exit_reason = KVM_EXIT_INTR;
+                                vcpu->arch.ret = -EINTR;
+                                break;
+                        }
+                        /* have to wait for vcore to stop executing guest */
+                        wait_state = TASK_UNINTERRUPTIBLE;
+                        smp_send_reschedule(vc->pcpu);
+                }
+                if (!vc->vcore_running &&
+                    vc->n_runnable + vc->n_blocked == vc->num_threads) {
+                        /* we can run now */
+                        if (kvmppc_run_core(vc))
+                                continue;
+                }
+                if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
+                        kvmppc_start_thread(vcpu);
+                /* wait for other threads to come in, or wait for vcore */
+                prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+                spin_unlock(&vc->lock);
+                schedule();
+                finish_wait(&vcpu->arch.cpu_run, &wait);
+                spin_lock(&vc->lock);
+        }
+        if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+                kvmppc_remove_runnable(vc, vcpu);
+        spin_unlock(&vc->lock);
+        return vcpu->arch.ret;
 }
 int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
author	Paul Mackerras <paulus@samba.org>	2011-06-28 20:23:08 -0400
committer	Avi Kivity <avi@redhat.com>	2011-07-12 06:16:57 -0400
commit	371fefd6f2dc46668e00871930dde613b88d4bde (patch)
tree	35fe799343861405914d27873eb175eb04d6dce5 /arch/powerpc/kvm/book3s_hv.c
parent	54738c097163c3f01e67ccc85462b78d4d4f495f (diff)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6fe469eabce8..36b6d98f1197 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c
@@ -39,6 +39,7 @@
39	#include <asm/mmu_context.h>	39	#include <asm/mmu_context.h>
40	#include <asm/lppaca.h>	40	#include <asm/lppaca.h>
41	#include <asm/processor.h>	41	#include <asm/processor.h>
		42	#include <asm/cputhreads.h>
42	#include <linux/gfp.h>	43	#include <linux/gfp.h>
43	#include <linux/sched.h>	44	#include <linux/sched.h>
44	#include <linux/vmalloc.h>	45	#include <linux/vmalloc.h>
@@ -51,12 +52,16 @@
51	void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)	52	void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
52	{	53	{
53	local_paca->kvm_hstate.kvm_vcpu = vcpu;	54	local_paca->kvm_hstate.kvm_vcpu = vcpu;
		55	local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
54	}	56	}
55		57
56	void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)	58	void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
57	{	59	{
58	}	60	}
59		61
		62	static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
		63	static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
		64
60	void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)	65	void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
61	{	66	{
62	u64 now;	67	u64 now;
@@ -74,11 +79,15 @@ void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
74	HRTIMER_MODE_REL);	79	HRTIMER_MODE_REL);
75	}	80	}
76		81
		82	kvmppc_vcpu_blocked(vcpu);
		83
77	kvm_vcpu_block(vcpu);	84	kvm_vcpu_block(vcpu);
78	vcpu->stat.halt_wakeup++;	85	vcpu->stat.halt_wakeup++;
79		86
80	if (vcpu->arch.dec_expires != ~(u64)0)	87	if (vcpu->arch.dec_expires != ~(u64)0)
81	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);	88	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
		89
		90	kvmppc_vcpu_unblocked(vcpu);
82	}	91	}
83		92
84	void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)	93	void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -429,9 +438,16 @@ int kvmppc_core_check_processor_compat(void)
429	struct kvm_vcpu kvmppc_core_vcpu_create(struct kvm kvm, unsigned int id)	438	struct kvm_vcpu kvmppc_core_vcpu_create(struct kvm kvm, unsigned int id)
430	{	439	{
431	struct kvm_vcpu *vcpu;	440	struct kvm_vcpu *vcpu;
432	int err = -ENOMEM;	441	int err = -EINVAL;
		442	int core;
		443	struct kvmppc_vcore *vcore;
433	unsigned long lpcr;	444	unsigned long lpcr;
434		445
		446	core = id / threads_per_core;
		447	if (core >= KVM_MAX_VCORES)
		448	goto out;
		449
		450	err = -ENOMEM;
435	vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);	451	vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
436	if (!vcpu)	452	if (!vcpu)
437	goto out;	453	goto out;
@@ -454,6 +470,38 @@ struct kvm_vcpu kvmppc_core_vcpu_create(struct kvm kvm, unsigned int id)
454		470
455	kvmppc_mmu_book3s_hv_init(vcpu);	471	kvmppc_mmu_book3s_hv_init(vcpu);
456		472
		473	/*
		474	* Some vcpus may start out in stopped state. If we initialize
		475	* them to busy-in-host state they will stop other vcpus in the
		476	* vcore from running. Instead we initialize them to blocked
		477	* state, effectively considering them to be stopped until we
		478	* see the first run ioctl for them.
		479	*/
		480	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
		481
		482	init_waitqueue_head(&vcpu->arch.cpu_run);
		483
		484	mutex_lock(&kvm->lock);
		485	vcore = kvm->arch.vcores[core];
		486	if (!vcore) {
		487	vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
		488	if (vcore) {
		489	INIT_LIST_HEAD(&vcore->runnable_threads);
		490	spin_lock_init(&vcore->lock);
		491	}
		492	kvm->arch.vcores[core] = vcore;
		493	}
		494	mutex_unlock(&kvm->lock);
		495
		496	if (!vcore)
		497	goto free_vcpu;
		498
		499	spin_lock(&vcore->lock);
		500	++vcore->num_threads;
		501	++vcore->n_blocked;
		502	spin_unlock(&vcore->lock);
		503	vcpu->arch.vcore = vcore;
		504
457	return vcpu;	505	return vcpu;
458		506
459	free_vcpu:	507	free_vcpu:
@@ -468,21 +516,121 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
468	kfree(vcpu);	516	kfree(vcpu);
469	}	517	}
470		518
		519	static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
		520	{
		521	struct kvmppc_vcore *vc = vcpu->arch.vcore;
		522
		523	spin_lock(&vc->lock);
		524	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
		525	++vc->n_blocked;
		526	if (vc->n_runnable > 0 &&
		527	vc->n_runnable + vc->n_blocked == vc->num_threads) {
		528	vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
		529	arch.run_list);
		530	wake_up(&vcpu->arch.cpu_run);
		531	}
		532	spin_unlock(&vc->lock);
		533	}
		534
		535	static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
		536	{
		537	struct kvmppc_vcore *vc = vcpu->arch.vcore;
		538
		539	spin_lock(&vc->lock);
		540	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
		541	--vc->n_blocked;
		542	spin_unlock(&vc->lock);
		543	}
		544
471	extern int __kvmppc_vcore_entry(struct kvm_run kvm_run, struct kvm_vcpu vcpu);	545	extern int __kvmppc_vcore_entry(struct kvm_run kvm_run, struct kvm_vcpu vcpu);
		546	extern void xics_wake_cpu(int cpu);
472		547
473	static int kvmppc_run_vcpu(struct kvm_run run, struct kvm_vcpu vcpu)	548	static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
		549	struct kvm_vcpu *vcpu)
474	{	550	{
475	u64 now;	551	struct kvm_vcpu *v;
476		552
477	if (signal_pending(current)) {	553	if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
478	run->exit_reason = KVM_EXIT_INTR;	554	return;
479	return -EINTR;	555	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
		556	--vc->n_runnable;
		557	/* decrement the physical thread id of each following vcpu */
		558	v = vcpu;
		559	list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
		560	--v->arch.ptid;
		561	list_del(&vcpu->arch.run_list);
		562	}
		563
		564	static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
		565	{
		566	int cpu;
		567	struct paca_struct *tpaca;
		568	struct kvmppc_vcore *vc = vcpu->arch.vcore;
		569
		570	cpu = vc->pcpu + vcpu->arch.ptid;
		571	tpaca = &paca[cpu];
		572	tpaca->kvm_hstate.kvm_vcpu = vcpu;
		573	tpaca->kvm_hstate.kvm_vcore = vc;
		574	smp_wmb();
		575	#ifdef CONFIG_PPC_ICP_NATIVE
		576	if (vcpu->arch.ptid) {
		577	tpaca->cpu_start = 0x80;
		578	tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
		579	wmb();
		580	xics_wake_cpu(cpu);
		581	++vc->n_woken;
480	}	582	}
		583	#endif
		584	}
481		585
482	flush_fp_to_thread(current);	586	static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
483	flush_altivec_to_thread(current);	587	{
484	flush_vsx_to_thread(current);	588	int i;
485	preempt_disable();	589
		590	HMT_low();
		591	i = 0;
		592	while (vc->nap_count < vc->n_woken) {
		593	if (++i >= 1000000) {
		594	pr_err("kvmppc_wait_for_nap timeout %d %d\n",
		595	vc->nap_count, vc->n_woken);
		596	break;
		597	}
		598	cpu_relax();
		599	}
		600	HMT_medium();
		601	}
		602
		603	/*
		604	* Check that we are on thread 0 and that any other threads in
		605	* this core are off-line.
		606	*/
		607	static int on_primary_thread(void)
		608	{
		609	int cpu = smp_processor_id();
		610	int thr = cpu_thread_in_core(cpu);
		611
		612	if (thr)
		613	return 0;
		614	while (++thr < threads_per_core)
		615	if (cpu_online(cpu + thr))
		616	return 0;
		617	return 1;
		618	}
		619
		620	/*
		621	* Run a set of guest threads on a physical core.
		622	* Called with vc->lock held.
		623	*/
		624	static int kvmppc_run_core(struct kvmppc_vcore *vc)
		625	{
		626	struct kvm_vcpu vcpu, vnext;
		627	long ret;
		628	u64 now;
		629
		630	/* don't start if any threads have a signal pending */
		631	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
		632	if (signal_pending(vcpu->arch.run_task))
		633	return 0;
486		634
487	/*	635	/*
488	* Make sure we are running on thread 0, and that	636	* Make sure we are running on thread 0, and that
@@ -490,36 +638,150 @@ static int kvmppc_run_vcpu(struct kvm_run run, struct kvm_vcpu vcpu)
490	* XXX we should also block attempts to bring any	638	* XXX we should also block attempts to bring any
491	* secondary threads online.	639	* secondary threads online.
492	*/	640	*/
493	if (threads_per_core > 1) {	641	if (threads_per_core > 1 && !on_primary_thread()) {
494	int cpu = smp_processor_id();	642	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
495	int thr = cpu_thread_in_core(cpu);	643	vcpu->arch.ret = -EBUSY;
496		644	goto out;
497	if (thr)
498	goto out;
499	while (++thr < threads_per_core)
500	if (cpu_online(cpu + thr))
501	goto out;
502	}	645	}
503		646
504	kvm_guest_enter();	647	vc->n_woken = 0;
		648	vc->nap_count = 0;
		649	vc->entry_exit_count = 0;
		650	vc->vcore_running = 1;
		651	vc->in_guest = 0;
		652	vc->pcpu = smp_processor_id();
		653	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
		654	kvmppc_start_thread(vcpu);
		655	vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
		656	arch.run_list);
		657
		658	spin_unlock(&vc->lock);
505		659
		660	preempt_disable();
		661	kvm_guest_enter();
506	__kvmppc_vcore_entry(NULL, vcpu);	662	__kvmppc_vcore_entry(NULL, vcpu);
507		663
		664	/* wait for secondary threads to finish writing their state to memory */
		665	spin_lock(&vc->lock);
		666	if (vc->nap_count < vc->n_woken)
		667	kvmppc_wait_for_nap(vc);
		668	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
		669	vc->vcore_running = 2;
		670	spin_unlock(&vc->lock);
		671
		672	/* make sure updates to secondary vcpu structs are visible now */
		673	smp_mb();
508	kvm_guest_exit();	674	kvm_guest_exit();
509		675
510	preempt_enable();	676	preempt_enable();
511	kvm_resched(vcpu);	677	kvm_resched(vcpu);
512		678
513	now = get_tb();	679	now = get_tb();
514	/* cancel pending dec exception if dec is positive */	680	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
515	if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))	681	/* cancel pending dec exception if dec is positive */
516	kvmppc_core_dequeue_dec(vcpu);	682	if (now < vcpu->arch.dec_expires &&
517		683	kvmppc_core_pending_dec(vcpu))
518	return kvmppc_handle_exit(run, vcpu, current);	684	kvmppc_core_dequeue_dec(vcpu);
		685	if (!vcpu->arch.trap) {
		686	if (signal_pending(vcpu->arch.run_task)) {
		687	vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
		688	vcpu->arch.ret = -EINTR;
		689	}
		690	continue; /* didn't get to run */
		691	}
		692	ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
		693	vcpu->arch.run_task);
		694	vcpu->arch.ret = ret;
		695	vcpu->arch.trap = 0;
		696	}
519		697
		698	spin_lock(&vc->lock);
520	out:	699	out:
521	preempt_enable();	700	vc->vcore_running = 0;
522	return -EBUSY;	701	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
		702	arch.run_list) {
		703	if (vcpu->arch.ret != RESUME_GUEST) {
		704	kvmppc_remove_runnable(vc, vcpu);
		705	wake_up(&vcpu->arch.cpu_run);
		706	}
		707	}
		708
		709	return 1;
		710	}
		711
		712	static int kvmppc_run_vcpu(struct kvm_run kvm_run, struct kvm_vcpu vcpu)
		713	{
		714	int ptid;
		715	int wait_state;
		716	struct kvmppc_vcore *vc;
		717	DEFINE_WAIT(wait);
		718
		719	/* No need to go into the guest when all we do is going out */
		720	if (signal_pending(current)) {
		721	kvm_run->exit_reason = KVM_EXIT_INTR;
		722	return -EINTR;
		723	}
		724
		725	kvm_run->exit_reason = 0;
		726	vcpu->arch.ret = RESUME_GUEST;
		727	vcpu->arch.trap = 0;
		728
		729	flush_fp_to_thread(current);
		730	flush_altivec_to_thread(current);
		731	flush_vsx_to_thread(current);
		732
		733	/*
		734	* Synchronize with other threads in this virtual core
		735	*/
		736	vc = vcpu->arch.vcore;
		737	spin_lock(&vc->lock);
		738	/* This happens the first time this is called for a vcpu */
		739	if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
		740	--vc->n_blocked;
		741	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
		742	ptid = vc->n_runnable;
		743	vcpu->arch.run_task = current;
		744	vcpu->arch.kvm_run = kvm_run;
		745	vcpu->arch.ptid = ptid;
		746	list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
		747	++vc->n_runnable;
		748
		749	wait_state = TASK_INTERRUPTIBLE;
		750	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
		751	if (signal_pending(current)) {
		752	if (!vc->vcore_running) {
		753	kvm_run->exit_reason = KVM_EXIT_INTR;
		754	vcpu->arch.ret = -EINTR;
		755	break;
		756	}
		757	/* have to wait for vcore to stop executing guest */
		758	wait_state = TASK_UNINTERRUPTIBLE;
		759	smp_send_reschedule(vc->pcpu);
		760	}
		761
		762	if (!vc->vcore_running &&
		763	vc->n_runnable + vc->n_blocked == vc->num_threads) {
		764	/* we can run now */
		765	if (kvmppc_run_core(vc))
		766	continue;
		767	}
		768
		769	if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
		770	kvmppc_start_thread(vcpu);
		771
		772	/* wait for other threads to come in, or wait for vcore */
		773	prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
		774	spin_unlock(&vc->lock);
		775	schedule();
		776	finish_wait(&vcpu->arch.cpu_run, &wait);
		777	spin_lock(&vc->lock);
		778	}
		779
		780	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
		781	kvmppc_remove_runnable(vc, vcpu);
		782	spin_unlock(&vc->lock);
		783
		784	return vcpu->arch.ret;
523	}	785	}
524		786
525	int kvmppc_vcpu_run(struct kvm_run run, struct kvm_vcpu vcpu)	787	int kvmppc_vcpu_run(struct kvm_run run, struct kvm_vcpu vcpu)