aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2015-06-24 07:18:03 -0400
committerAlexander Graf <agraf@suse.de>2015-08-22 05:16:17 -0400
commitec257165082616841a354dd915801ed43e3553be (patch)
tree9d88c91517f5cf7879f7fb1121b88c4e797a390c
parent845ac985cf8e3d52939dbe2446d91e47e91a07b6 (diff)
KVM: PPC: Book3S HV: Make use of unused threads when running guests
When running a virtual core of a guest that is configured with fewer threads per core than the physical cores have, the extra physical threads are currently unused. This makes it possible to use them to run one or more other virtual cores from the same guest when certain conditions are met. This applies on POWER7, and on POWER8 to guests with one thread per virtual core. (It doesn't apply to POWER8 guests with multiple threads per vcore because they require a 1-1 virtual to physical thread mapping in order to be able to use msgsndp and the TIR.) The idea is that we maintain a list of preempted vcores for each physical cpu (i.e. each core, since the host runs single-threaded). Then, when a vcore is about to run, it checks to see if there are any vcores on the list for its physical cpu that could be piggybacked onto this vcore's execution. If so, those additional vcores are put into state VCORE_PIGGYBACK and their runnable VCPU threads are started as well as the original vcore, which is called the master vcore. After the vcores have exited the guest, the extra ones are put back onto the preempted list if any of their VCPUs are still runnable and not idle. This means that vcpu->arch.ptid is no longer necessarily the same as the physical thread that the vcpu runs on. In order to make it easier for code that wants to send an IPI to know which CPU to target, we now store that in a new field in struct vcpu_arch, called thread_cpu. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Tested-by: Laurent Vivier <lvivier@redhat.com> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--arch/powerpc/include/asm/kvm_host.h19
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c333
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S5
6 files changed, 298 insertions, 72 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d91f65b28e32..2b7449017ae8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -278,7 +278,9 @@ struct kvmppc_vcore {
278 u16 last_cpu; 278 u16 last_cpu;
279 u8 vcore_state; 279 u8 vcore_state;
280 u8 in_guest; 280 u8 in_guest;
281 struct kvmppc_vcore *master_vcore;
281 struct list_head runnable_threads; 282 struct list_head runnable_threads;
283 struct list_head preempt_list;
282 spinlock_t lock; 284 spinlock_t lock;
283 wait_queue_head_t wq; 285 wait_queue_head_t wq;
284 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ 286 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
@@ -300,12 +302,18 @@ struct kvmppc_vcore {
300#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) 302#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
301#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) 303#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
302 304
303/* Values for vcore_state */ 305/*
306 * Values for vcore_state.
307 * Note that these are arranged such that lower values
308 * (< VCORE_SLEEPING) don't require stolen time accounting
309 * on load/unload, and higher values do.
310 */
304#define VCORE_INACTIVE 0 311#define VCORE_INACTIVE 0
305#define VCORE_SLEEPING 1 312#define VCORE_PREEMPT 1
306#define VCORE_PREEMPT 2 313#define VCORE_PIGGYBACK 2
307#define VCORE_RUNNING 3 314#define VCORE_SLEEPING 3
308#define VCORE_EXITING 4 315#define VCORE_RUNNING 4
316#define VCORE_EXITING 5
309 317
310/* 318/*
311 * Struct used to manage memory for a virtual processor area 319 * Struct used to manage memory for a virtual processor area
@@ -619,6 +627,7 @@ struct kvm_vcpu_arch {
619 int trap; 627 int trap;
620 int state; 628 int state;
621 int ptid; 629 int ptid;
630 int thread_cpu;
622 bool timer_running; 631 bool timer_running;
623 wait_queue_head_t cpu_run; 632 wait_queue_head_t cpu_run;
624 633
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 98230579d99c..a78cdbf9b622 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -512,6 +512,8 @@ int main(void)
512 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 512 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
513 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); 513 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
514 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); 514 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
515 DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
516 DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
515#endif 517#endif
516#ifdef CONFIG_PPC_BOOK3S 518#ifdef CONFIG_PPC_BOOK3S
517 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 519 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6e588acaac2c..0173ce221111 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,9 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
81#define MPP_BUFFER_ORDER 3 81#define MPP_BUFFER_ORDER 3
82#endif 82#endif
83 83
84static int target_smt_mode;
85module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
86MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
84 87
85static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 88static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
86static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 89static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,7 +117,7 @@ static bool kvmppc_ipi_thread(int cpu)
114 117
115static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) 118static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
116{ 119{
117 int cpu = vcpu->cpu; 120 int cpu;
118 wait_queue_head_t *wqp; 121 wait_queue_head_t *wqp;
119 122
120 wqp = kvm_arch_vcpu_wq(vcpu); 123 wqp = kvm_arch_vcpu_wq(vcpu);
@@ -123,10 +126,11 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
123 ++vcpu->stat.halt_wakeup; 126 ++vcpu->stat.halt_wakeup;
124 } 127 }
125 128
126 if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid)) 129 if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
127 return; 130 return;
128 131
129 /* CPU points to the first thread of the core */ 132 /* CPU points to the first thread of the core */
133 cpu = vcpu->cpu;
130 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu)) 134 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
131 smp_send_reschedule(cpu); 135 smp_send_reschedule(cpu);
132} 136}
@@ -164,6 +168,27 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
164 * they should never fail.) 168 * they should never fail.)
165 */ 169 */
166 170
171static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
172{
173 unsigned long flags;
174
175 spin_lock_irqsave(&vc->stoltb_lock, flags);
176 vc->preempt_tb = mftb();
177 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
178}
179
180static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
181{
182 unsigned long flags;
183
184 spin_lock_irqsave(&vc->stoltb_lock, flags);
185 if (vc->preempt_tb != TB_NIL) {
186 vc->stolen_tb += mftb() - vc->preempt_tb;
187 vc->preempt_tb = TB_NIL;
188 }
189 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
190}
191
167static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 192static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
168{ 193{
169 struct kvmppc_vcore *vc = vcpu->arch.vcore; 194 struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -175,14 +200,9 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
175 * vcpu, and once it is set to this vcpu, only this task 200 * vcpu, and once it is set to this vcpu, only this task
176 * ever sets it to NULL. 201 * ever sets it to NULL.
177 */ 202 */
178 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 203 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
179 spin_lock_irqsave(&vc->stoltb_lock, flags); 204 kvmppc_core_end_stolen(vc);
180 if (vc->preempt_tb != TB_NIL) { 205
181 vc->stolen_tb += mftb() - vc->preempt_tb;
182 vc->preempt_tb = TB_NIL;
183 }
184 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
185 }
186 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 206 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
187 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 207 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
188 vcpu->arch.busy_preempt != TB_NIL) { 208 vcpu->arch.busy_preempt != TB_NIL) {
@@ -197,11 +217,9 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
197 struct kvmppc_vcore *vc = vcpu->arch.vcore; 217 struct kvmppc_vcore *vc = vcpu->arch.vcore;
198 unsigned long flags; 218 unsigned long flags;
199 219
200 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 220 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
201 spin_lock_irqsave(&vc->stoltb_lock, flags); 221 kvmppc_core_start_stolen(vc);
202 vc->preempt_tb = mftb(); 222
203 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
204 }
205 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 223 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
206 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 224 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
207 vcpu->arch.busy_preempt = mftb(); 225 vcpu->arch.busy_preempt = mftb();
@@ -641,7 +659,8 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
641 659
642 spin_lock(&vcore->lock); 660 spin_lock(&vcore->lock);
643 if (target->arch.state == KVMPPC_VCPU_RUNNABLE && 661 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
644 vcore->vcore_state != VCORE_INACTIVE) 662 vcore->vcore_state != VCORE_INACTIVE &&
663 vcore->runner)
645 target = vcore->runner; 664 target = vcore->runner;
646 spin_unlock(&vcore->lock); 665 spin_unlock(&vcore->lock);
647 666
@@ -1431,6 +1450,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1431 vcore->lpcr = kvm->arch.lpcr; 1450 vcore->lpcr = kvm->arch.lpcr;
1432 vcore->first_vcpuid = core * threads_per_subcore; 1451 vcore->first_vcpuid = core * threads_per_subcore;
1433 vcore->kvm = kvm; 1452 vcore->kvm = kvm;
1453 INIT_LIST_HEAD(&vcore->preempt_list);
1434 1454
1435 vcore->mpp_buffer_is_valid = false; 1455 vcore->mpp_buffer_is_valid = false;
1436 1456
@@ -1655,6 +1675,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1655 spin_unlock(&vcore->lock); 1675 spin_unlock(&vcore->lock);
1656 vcpu->arch.vcore = vcore; 1676 vcpu->arch.vcore = vcore;
1657 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; 1677 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
1678 vcpu->arch.thread_cpu = -1;
1658 1679
1659 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1680 vcpu->arch.cpu_type = KVM_CPU_3S_64;
1660 kvmppc_sanity_check(vcpu); 1681 kvmppc_sanity_check(vcpu);
@@ -1787,6 +1808,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
1787 int cpu; 1808 int cpu;
1788 struct paca_struct *tpaca; 1809 struct paca_struct *tpaca;
1789 struct kvmppc_vcore *vc = vcpu->arch.vcore; 1810 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1811 struct kvmppc_vcore *mvc = vc->master_vcore;
1790 1812
1791 if (vcpu->arch.timer_running) { 1813 if (vcpu->arch.timer_running) {
1792 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1814 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
@@ -1794,10 +1816,11 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
1794 } 1816 }
1795 cpu = vc->pcpu + vcpu->arch.ptid; 1817 cpu = vc->pcpu + vcpu->arch.ptid;
1796 tpaca = &paca[cpu]; 1818 tpaca = &paca[cpu];
1797 tpaca->kvm_hstate.kvm_vcore = vc; 1819 tpaca->kvm_hstate.kvm_vcore = mvc;
1798 tpaca->kvm_hstate.ptid = vcpu->arch.ptid; 1820 tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
1799 vcpu->cpu = vc->pcpu; 1821 vcpu->cpu = mvc->pcpu;
1800 /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */ 1822 vcpu->arch.thread_cpu = cpu;
1823 /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
1801 smp_wmb(); 1824 smp_wmb();
1802 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1825 tpaca->kvm_hstate.kvm_vcpu = vcpu;
1803 if (cpu != smp_processor_id()) 1826 if (cpu != smp_processor_id())
@@ -1890,6 +1913,114 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
1890 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); 1913 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
1891} 1914}
1892 1915
1916/*
1917 * A list of virtual cores for each physical CPU.
1918 * These are vcores that could run but their runner VCPU tasks are
1919 * (or may be) preempted.
1920 */
1921struct preempted_vcore_list {
1922 struct list_head list;
1923 spinlock_t lock;
1924};
1925
1926static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
1927
1928static void init_vcore_lists(void)
1929{
1930 int cpu;
1931
1932 for_each_possible_cpu(cpu) {
1933 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
1934 spin_lock_init(&lp->lock);
1935 INIT_LIST_HEAD(&lp->list);
1936 }
1937}
1938
1939static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
1940{
1941 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
1942
1943 vc->vcore_state = VCORE_PREEMPT;
1944 vc->pcpu = smp_processor_id();
1945 if (vc->num_threads < threads_per_subcore) {
1946 spin_lock(&lp->lock);
1947 list_add_tail(&vc->preempt_list, &lp->list);
1948 spin_unlock(&lp->lock);
1949 }
1950
1951 /* Start accumulating stolen time */
1952 kvmppc_core_start_stolen(vc);
1953}
1954
1955static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
1956{
1957 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
1958
1959 kvmppc_core_end_stolen(vc);
1960 if (!list_empty(&vc->preempt_list)) {
1961 spin_lock(&lp->lock);
1962 list_del_init(&vc->preempt_list);
1963 spin_unlock(&lp->lock);
1964 }
1965 vc->vcore_state = VCORE_INACTIVE;
1966}
1967
1968struct core_info {
1969 int total_threads;
1970 struct list_head vcs;
1971};
1972
1973static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
1974{
1975 memset(cip, 0, sizeof(*cip));
1976 cip->total_threads = vc->num_threads;
1977 INIT_LIST_HEAD(&cip->vcs);
1978 list_add_tail(&vc->preempt_list, &cip->vcs);
1979}
1980
1981static void init_master_vcore(struct kvmppc_vcore *vc)
1982{
1983 vc->master_vcore = vc;
1984 vc->entry_exit_map = 0;
1985 vc->in_guest = 0;
1986 vc->napping_threads = 0;
1987 vc->conferring_threads = 0;
1988}
1989
1990/*
1991 * Work out whether it is possible to piggyback the execute of
1992 * vcore *pvc onto the execution of the other vcores described in *cip.
1993 */
1994static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
1995 int target_threads)
1996{
1997 struct kvmppc_vcore *vc;
1998
1999 vc = list_first_entry(&cip->vcs, struct kvmppc_vcore, preempt_list);
2000
2001 /* require same VM and same per-core reg values */
2002 if (pvc->kvm != vc->kvm ||
2003 pvc->tb_offset != vc->tb_offset ||
2004 pvc->pcr != vc->pcr ||
2005 pvc->lpcr != vc->lpcr)
2006 return false;
2007
2008 /* P8 guest with > 1 thread per core would see wrong TIR value */
2009 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
2010 (vc->num_threads > 1 || pvc->num_threads > 1))
2011 return false;
2012
2013 if (cip->total_threads + pvc->num_threads > target_threads)
2014 return false;
2015
2016 cip->total_threads += pvc->num_threads;
2017 pvc->master_vcore = vc;
2018 list_del(&pvc->preempt_list);
2019 list_add_tail(&pvc->preempt_list, &cip->vcs);
2020
2021 return true;
2022}
2023
1893static void prepare_threads(struct kvmppc_vcore *vc) 2024static void prepare_threads(struct kvmppc_vcore *vc)
1894{ 2025{
1895 struct kvm_vcpu *vcpu, *vnext; 2026 struct kvm_vcpu *vcpu, *vnext;
@@ -1909,12 +2040,45 @@ static void prepare_threads(struct kvmppc_vcore *vc)
1909 } 2040 }
1910} 2041}
1911 2042
1912static void post_guest_process(struct kvmppc_vcore *vc) 2043static void collect_piggybacks(struct core_info *cip, int target_threads)
2044{
2045 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2046 struct kvmppc_vcore *pvc, *vcnext;
2047
2048 spin_lock(&lp->lock);
2049 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2050 if (!spin_trylock(&pvc->lock))
2051 continue;
2052 prepare_threads(pvc);
2053 if (!pvc->n_runnable) {
2054 list_del_init(&pvc->preempt_list);
2055 if (pvc->runner == NULL) {
2056 pvc->vcore_state = VCORE_INACTIVE;
2057 kvmppc_core_end_stolen(pvc);
2058 }
2059 spin_unlock(&pvc->lock);
2060 continue;
2061 }
2062 if (!can_piggyback(pvc, cip, target_threads)) {
2063 spin_unlock(&pvc->lock);
2064 continue;
2065 }
2066 kvmppc_core_end_stolen(pvc);
2067 pvc->vcore_state = VCORE_PIGGYBACK;
2068 if (cip->total_threads >= target_threads)
2069 break;
2070 }
2071 spin_unlock(&lp->lock);
2072}
2073
2074static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
1913{ 2075{
2076 int still_running = 0;
1914 u64 now; 2077 u64 now;
1915 long ret; 2078 long ret;
1916 struct kvm_vcpu *vcpu, *vnext; 2079 struct kvm_vcpu *vcpu, *vnext;
1917 2080
2081 spin_lock(&vc->lock);
1918 now = get_tb(); 2082 now = get_tb();
1919 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 2083 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1920 arch.run_list) { 2084 arch.run_list) {
@@ -1933,17 +2097,31 @@ static void post_guest_process(struct kvmppc_vcore *vc)
1933 vcpu->arch.ret = ret; 2097 vcpu->arch.ret = ret;
1934 vcpu->arch.trap = 0; 2098 vcpu->arch.trap = 0;
1935 2099
1936 if (vcpu->arch.ceded) { 2100 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
1937 if (!is_kvmppc_resume_guest(ret)) 2101 if (vcpu->arch.pending_exceptions)
1938 kvmppc_end_cede(vcpu); 2102 kvmppc_core_prepare_to_enter(vcpu);
1939 else 2103 if (vcpu->arch.ceded)
1940 kvmppc_set_timer(vcpu); 2104 kvmppc_set_timer(vcpu);
1941 } 2105 else
1942 if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { 2106 ++still_running;
2107 } else {
1943 kvmppc_remove_runnable(vc, vcpu); 2108 kvmppc_remove_runnable(vc, vcpu);
1944 wake_up(&vcpu->arch.cpu_run); 2109 wake_up(&vcpu->arch.cpu_run);
1945 } 2110 }
1946 } 2111 }
2112 list_del_init(&vc->preempt_list);
2113 if (!is_master) {
2114 vc->vcore_state = vc->runner ? VCORE_PREEMPT : VCORE_INACTIVE;
2115 if (still_running > 0)
2116 kvmppc_vcore_preempt(vc);
2117 if (vc->n_runnable > 0 && vc->runner == NULL) {
2118 /* make sure there's a candidate runner awake */
2119 vcpu = list_first_entry(&vc->runnable_threads,
2120 struct kvm_vcpu, arch.run_list);
2121 wake_up(&vcpu->arch.cpu_run);
2122 }
2123 }
2124 spin_unlock(&vc->lock);
1947} 2125}
1948 2126
1949/* 2127/*
@@ -1955,6 +2133,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1955 struct kvm_vcpu *vcpu, *vnext; 2133 struct kvm_vcpu *vcpu, *vnext;
1956 int i; 2134 int i;
1957 int srcu_idx; 2135 int srcu_idx;
2136 struct core_info core_info;
2137 struct kvmppc_vcore *pvc, *vcnext;
2138 int pcpu, thr;
2139 int target_threads;
1958 2140
1959 /* 2141 /*
1960 * Remove from the list any threads that have a signal pending 2142 * Remove from the list any threads that have a signal pending
@@ -1969,11 +2151,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1969 /* 2151 /*
1970 * Initialize *vc. 2152 * Initialize *vc.
1971 */ 2153 */
1972 vc->entry_exit_map = 0; 2154 init_master_vcore(vc);
1973 vc->preempt_tb = TB_NIL; 2155 vc->preempt_tb = TB_NIL;
1974 vc->in_guest = 0;
1975 vc->napping_threads = 0;
1976 vc->conferring_threads = 0;
1977 2156
1978 /* 2157 /*
1979 * Make sure we are running on primary threads, and that secondary 2158 * Make sure we are running on primary threads, and that secondary
@@ -1991,12 +2170,28 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1991 goto out; 2170 goto out;
1992 } 2171 }
1993 2172
2173 /*
2174 * See if we could run any other vcores on the physical core
2175 * along with this one.
2176 */
2177 init_core_info(&core_info, vc);
2178 pcpu = smp_processor_id();
2179 target_threads = threads_per_subcore;
2180 if (target_smt_mode && target_smt_mode < target_threads)
2181 target_threads = target_smt_mode;
2182 if (vc->num_threads < target_threads)
2183 collect_piggybacks(&core_info, target_threads);
1994 2184
1995 vc->pcpu = smp_processor_id(); 2185 thr = 0;
1996 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 2186 list_for_each_entry(pvc, &core_info.vcs, preempt_list) {
1997 kvmppc_start_thread(vcpu); 2187 pvc->pcpu = pcpu + thr;
1998 kvmppc_create_dtl_entry(vcpu, vc); 2188 list_for_each_entry(vcpu, &pvc->runnable_threads,
1999 trace_kvm_guest_enter(vcpu); 2189 arch.run_list) {
2190 kvmppc_start_thread(vcpu);
2191 kvmppc_create_dtl_entry(vcpu, pvc);
2192 trace_kvm_guest_enter(vcpu);
2193 }
2194 thr += pvc->num_threads;
2000 } 2195 }
2001 2196
2002 /* Set this explicitly in case thread 0 doesn't have a vcpu */ 2197 /* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -2008,7 +2203,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2008 2203
2009 trace_kvmppc_run_core(vc, 0); 2204 trace_kvmppc_run_core(vc, 0);
2010 2205
2011 spin_unlock(&vc->lock); 2206 list_for_each_entry(pvc, &core_info.vcs, preempt_list)
2207 spin_unlock(&pvc->lock);
2012 2208
2013 kvm_guest_enter(); 2209 kvm_guest_enter();
2014 2210
@@ -2019,32 +2215,30 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2019 2215
2020 __kvmppc_vcore_entry(); 2216 __kvmppc_vcore_entry();
2021 2217
2022 spin_lock(&vc->lock);
2023
2024 if (vc->mpp_buffer) 2218 if (vc->mpp_buffer)
2025 kvmppc_start_saving_l2_cache(vc); 2219 kvmppc_start_saving_l2_cache(vc);
2026 2220
2027 /* disable sending of IPIs on virtual external irqs */ 2221 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2028 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 2222
2029 vcpu->cpu = -1; 2223 spin_lock(&vc->lock);
2224 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
2225 vc->vcore_state = VCORE_EXITING;
2226
2030 /* wait for secondary threads to finish writing their state to memory */ 2227 /* wait for secondary threads to finish writing their state to memory */
2031 kvmppc_wait_for_nap(); 2228 kvmppc_wait_for_nap();
2032 for (i = 0; i < threads_per_subcore; ++i) 2229 for (i = 0; i < threads_per_subcore; ++i)
2033 kvmppc_release_hwthread(vc->pcpu + i); 2230 kvmppc_release_hwthread(vc->pcpu + i);
2034 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
2035 vc->vcore_state = VCORE_EXITING;
2036 spin_unlock(&vc->lock); 2231 spin_unlock(&vc->lock);
2037 2232
2038 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2039
2040 /* make sure updates to secondary vcpu structs are visible now */ 2233 /* make sure updates to secondary vcpu structs are visible now */
2041 smp_mb(); 2234 smp_mb();
2042 kvm_guest_exit(); 2235 kvm_guest_exit();
2043 2236
2044 preempt_enable(); 2237 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs, preempt_list)
2238 post_guest_process(pvc, pvc == vc);
2045 2239
2046 spin_lock(&vc->lock); 2240 spin_lock(&vc->lock);
2047 post_guest_process(vc); 2241 preempt_enable();
2048 2242
2049 out: 2243 out:
2050 vc->vcore_state = VCORE_INACTIVE; 2244 vc->vcore_state = VCORE_INACTIVE;
@@ -2055,13 +2249,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2055 * Wait for some other vcpu thread to execute us, and 2249 * Wait for some other vcpu thread to execute us, and
2056 * wake us up when we need to handle something in the host. 2250 * wake us up when we need to handle something in the host.
2057 */ 2251 */
2058static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 2252static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2253 struct kvm_vcpu *vcpu, int wait_state)
2059{ 2254{
2060 DEFINE_WAIT(wait); 2255 DEFINE_WAIT(wait);
2061 2256
2062 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 2257 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
2063 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 2258 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2259 spin_unlock(&vc->lock);
2064 schedule(); 2260 schedule();
2261 spin_lock(&vc->lock);
2262 }
2065 finish_wait(&vcpu->arch.cpu_run, &wait); 2263 finish_wait(&vcpu->arch.cpu_run, &wait);
2066} 2264}
2067 2265
@@ -2137,7 +2335,19 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2137 * this thread straight away and have it join in. 2335 * this thread straight away and have it join in.
2138 */ 2336 */
2139 if (!signal_pending(current)) { 2337 if (!signal_pending(current)) {
2140 if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) { 2338 if (vc->vcore_state == VCORE_PIGGYBACK) {
2339 struct kvmppc_vcore *mvc = vc->master_vcore;
2340 if (spin_trylock(&mvc->lock)) {
2341 if (mvc->vcore_state == VCORE_RUNNING &&
2342 !VCORE_IS_EXITING(mvc)) {
2343 kvmppc_create_dtl_entry(vcpu, vc);
2344 kvmppc_start_thread(vcpu);
2345 trace_kvm_guest_enter(vcpu);
2346 }
2347 spin_unlock(&mvc->lock);
2348 }
2349 } else if (vc->vcore_state == VCORE_RUNNING &&
2350 !VCORE_IS_EXITING(vc)) {
2141 kvmppc_create_dtl_entry(vcpu, vc); 2351 kvmppc_create_dtl_entry(vcpu, vc);
2142 kvmppc_start_thread(vcpu); 2352 kvmppc_start_thread(vcpu);
2143 trace_kvm_guest_enter(vcpu); 2353 trace_kvm_guest_enter(vcpu);
@@ -2149,10 +2359,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2149 2359
2150 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2360 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2151 !signal_pending(current)) { 2361 !signal_pending(current)) {
2362 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
2363 kvmppc_vcore_end_preempt(vc);
2364
2152 if (vc->vcore_state != VCORE_INACTIVE) { 2365 if (vc->vcore_state != VCORE_INACTIVE) {
2153 spin_unlock(&vc->lock); 2366 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
2154 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
2155 spin_lock(&vc->lock);
2156 continue; 2367 continue;
2157 } 2368 }
2158 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 2369 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
@@ -2179,10 +2390,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2179 if (n_ceded == vc->n_runnable) { 2390 if (n_ceded == vc->n_runnable) {
2180 kvmppc_vcore_blocked(vc); 2391 kvmppc_vcore_blocked(vc);
2181 } else if (should_resched()) { 2392 } else if (should_resched()) {
2182 vc->vcore_state = VCORE_PREEMPT; 2393 kvmppc_vcore_preempt(vc);
2183 /* Let something else run */ 2394 /* Let something else run */
2184 cond_resched_lock(&vc->lock); 2395 cond_resched_lock(&vc->lock);
2185 vc->vcore_state = VCORE_INACTIVE; 2396 if (vc->vcore_state == VCORE_PREEMPT)
2397 kvmppc_vcore_end_preempt(vc);
2186 } else { 2398 } else {
2187 kvmppc_run_core(vc); 2399 kvmppc_run_core(vc);
2188 } 2400 }
@@ -2191,11 +2403,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2191 2403
2192 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2404 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2193 (vc->vcore_state == VCORE_RUNNING || 2405 (vc->vcore_state == VCORE_RUNNING ||
2194 vc->vcore_state == VCORE_EXITING)) { 2406 vc->vcore_state == VCORE_EXITING))
2195 spin_unlock(&vc->lock); 2407 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
2196 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
2197 spin_lock(&vc->lock);
2198 }
2199 2408
2200 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 2409 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2201 kvmppc_remove_runnable(vc, vcpu); 2410 kvmppc_remove_runnable(vc, vcpu);
@@ -2755,6 +2964,8 @@ static int kvmppc_book3s_init_hv(void)
2755 2964
2756 init_default_hcalls(); 2965 init_default_hcalls();
2757 2966
2967 init_vcore_lists();
2968
2758 r = kvmppc_mmu_hv_init(); 2969 r = kvmppc_mmu_hv_init();
2759 return r; 2970 return r;
2760} 2971}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ed2589d4593f..1fd0e3057396 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -110,14 +110,15 @@ void __init kvm_cma_reserve(void)
110long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, 110long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
111 unsigned int yield_count) 111 unsigned int yield_count)
112{ 112{
113 struct kvmppc_vcore *vc = vcpu->arch.vcore; 113 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
114 int ptid = local_paca->kvm_hstate.ptid;
114 int threads_running; 115 int threads_running;
115 int threads_ceded; 116 int threads_ceded;
116 int threads_conferring; 117 int threads_conferring;
117 u64 stop = get_tb() + 10 * tb_ticks_per_usec; 118 u64 stop = get_tb() + 10 * tb_ticks_per_usec;
118 int rv = H_SUCCESS; /* => don't yield */ 119 int rv = H_SUCCESS; /* => don't yield */
119 120
120 set_bit(vcpu->arch.ptid, &vc->conferring_threads); 121 set_bit(ptid, &vc->conferring_threads);
121 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) { 122 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
122 threads_running = VCORE_ENTRY_MAP(vc); 123 threads_running = VCORE_ENTRY_MAP(vc);
123 threads_ceded = vc->napping_threads; 124 threads_ceded = vc->napping_threads;
@@ -127,7 +128,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
127 break; 128 break;
128 } 129 }
129 } 130 }
130 clear_bit(vcpu->arch.ptid, &vc->conferring_threads); 131 clear_bit(ptid, &vc->conferring_threads);
131 return rv; 132 return rv;
132} 133}
133 134
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 00e45b6d4f24..24f58076d49e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -67,14 +67,12 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
67 } 67 }
68 68
69 /* Check if the core is loaded, if not, too hard */ 69 /* Check if the core is loaded, if not, too hard */
70 cpu = vcpu->cpu; 70 cpu = vcpu->arch.thread_cpu;
71 if (cpu < 0 || cpu >= nr_cpu_ids) { 71 if (cpu < 0 || cpu >= nr_cpu_ids) {
72 this_icp->rm_action |= XICS_RM_KICK_VCPU; 72 this_icp->rm_action |= XICS_RM_KICK_VCPU;
73 this_icp->rm_kick_target = vcpu; 73 this_icp->rm_kick_target = vcpu;
74 return; 74 return;
75 } 75 }
76 /* In SMT cpu will always point to thread 0, we adjust it */
77 cpu += vcpu->arch.ptid;
78 76
79 smp_mb(); 77 smp_mb();
80 kvmhv_rm_send_ipi(cpu); 78 kvmhv_rm_send_ipi(cpu);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index faa86e9c0551..ac113b527bf9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1176,6 +1176,11 @@ mc_cont:
1176 ld r9, HSTATE_KVM_VCPU(r13) 1176 ld r9, HSTATE_KVM_VCPU(r13)
1177 lwz r12, VCPU_TRAP(r9) 1177 lwz r12, VCPU_TRAP(r9)
1178 1178
1179 /* Stop others sending VCPU interrupts to this physical CPU */
1180 li r0, -1
1181 stw r0, VCPU_CPU(r9)
1182 stw r0, VCPU_THREAD_CPU(r9)
1183
1179 /* Save guest CTRL register, set runlatch to 1 */ 1184 /* Save guest CTRL register, set runlatch to 1 */
1180 mfspr r6,SPRN_CTRLF 1185 mfspr r6,SPRN_CTRLF
1181 stw r6,VCPU_CTRL(r9) 1186 stw r6,VCPU_CTRL(r9)