aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2015-07-02 06:38:16 -0400
committerAlexander Graf <agraf@suse.de>2015-08-22 05:16:17 -0400
commitb4deba5c41e9f6d3239606c9e060853d9decfee1 (patch)
treef728e4a3c252446ccd3071c5dbd816550d3dce69
parentec257165082616841a354dd915801ed43e3553be (diff)
KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
This builds on the ability to run more than one vcore on a physical core by using the micro-threading (split-core) modes of the POWER8 chip. Previously, only vcores from the same VM could be run together, and (on POWER8) only if they had just one thread per core. With the ability to split the core on guest entry and unsplit it on guest exit, we can run up to 8 vcpu threads from up to 4 different VMs, and we can run multiple vcores with 2 or 4 vcpus per vcore. Dynamic micro-threading is only available if the static configuration of the cores is whole-core mode (unsplit), and only on POWER8. To manage this, we introduce a new kvm_split_mode struct which is shared across all of the subcores in the core, with a pointer in the paca on each thread. In addition we extend the core_info struct to have information on each subcore. When deciding whether to add a vcore to the set already on the core, we now have two possibilities: (a) piggyback the vcore onto an existing subcore, or (b) start a new subcore. Currently, when any vcpu needs to exit the guest and switch to host virtual mode, we interrupt all the threads in all subcores and switch the core back to whole-core mode. It may be possible in future to allow some of the subcores to keep executing in the guest while subcore 0 switches to the host, but that is not implemented in this patch. This adds a module parameter called dynamic_mt_modes which controls which micro-threading (split-core) modes the code will consider, as a bitmap. In other words, if it is 0, no micro-threading mode is considered; if it is 2, only 2-way micro-threading is considered; if it is 4, only 4-way, and if it is 6, both 2-way and 4-way micro-threading mode will be considered. The default is 6. With this, we now have secondary threads which are the primary thread for their subcore and therefore need to do the MMU switch. These threads will need to be started even if they have no vcpu to run, so we use the vcore pointer in the PACA rather than the vcpu pointer to trigger them. It is now possible for thread 0 to find that an exit has been requested before it gets to switch the subcore state to the guest. In that case we haven't added the guest's timebase offset to the timebase, so we need to be careful not to subtract the offset in the guest exit path. In fact we just skip the whole path that switches back to host context, since we haven't switched to the guest context. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h20
-rw-r--r--arch/powerpc/include/asm/kvm_host.h3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv.c367
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c25
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S113
6 files changed, 473 insertions, 62 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5dd3400..57d5dfef48bd 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,12 @@
25#define XICS_MFRR 0xc 25#define XICS_MFRR 0xc
26#define XICS_IPI 2 /* interrupt source # for IPIs */ 26#define XICS_IPI 2 /* interrupt source # for IPIs */
27 27
28/* Maximum number of threads per physical core */
29#define MAX_SMT_THREADS 8
30
31/* Maximum number of subcores per physical core */
32#define MAX_SUBCORES 4
33
28#ifdef __ASSEMBLY__ 34#ifdef __ASSEMBLY__
29 35
30#ifdef CONFIG_KVM_BOOK3S_HANDLER 36#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
65 71
66#else /*__ASSEMBLY__ */ 72#else /*__ASSEMBLY__ */
67 73
74struct kvmppc_vcore;
75
76/* Struct used for coordinating micro-threading (split-core) mode changes */
77struct kvm_split_mode {
78 unsigned long rpr;
79 unsigned long pmmar;
80 unsigned long ldbar;
81 u8 subcore_size;
82 u8 do_nap;
83 u8 napped[MAX_SMT_THREADS];
84 struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
85};
86
68/* 87/*
69 * This struct goes in the PACA on 64-bit processors. It is used 88 * This struct goes in the PACA on 64-bit processors. It is used
70 * to store host state that needs to be saved when we enter a guest 89 * to store host state that needs to be saved when we enter a guest
@@ -100,6 +119,7 @@ struct kvmppc_host_state {
100 u64 host_spurr; 119 u64 host_spurr;
101 u64 host_dscr; 120 u64 host_dscr;
102 u64 dec_expires; 121 u64 dec_expires;
122 struct kvm_split_mode *kvm_split_mode;
103#endif 123#endif
104#ifdef CONFIG_PPC_BOOK3S_64 124#ifdef CONFIG_PPC_BOOK3S_64
105 u64 cfar; 125 u64 cfar;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 2b7449017ae8..80eb29ab262a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -302,6 +302,9 @@ struct kvmppc_vcore {
302#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) 302#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
303#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) 303#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
304 304
305/* This bit is used when a vcore exit is triggered from outside the vcore */
306#define VCORE_EXIT_REQ 0x10000
307
305/* 308/*
306 * Values for vcore_state. 309 * Values for vcore_state.
307 * Note that these are arranged such that lower values 310 * Note that these are arranged such that lower values
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a78cdbf9b622..de62392f093c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -676,7 +676,14 @@ int main(void)
676 HSTATE_FIELD(HSTATE_DSCR, host_dscr); 676 HSTATE_FIELD(HSTATE_DSCR, host_dscr);
677 HSTATE_FIELD(HSTATE_DABR, dabr); 677 HSTATE_FIELD(HSTATE_DABR, dabr);
678 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 678 HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
679 HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
679 DEFINE(IPI_PRIORITY, IPI_PRIORITY); 680 DEFINE(IPI_PRIORITY, IPI_PRIORITY);
681 DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
682 DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
683 DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
684 DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
685 DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
686 DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
680#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 687#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
681 688
682#ifdef CONFIG_PPC_BOOK3S_64 689#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0173ce221111..6e3ef308b4c5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,9 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
81#define MPP_BUFFER_ORDER 3 81#define MPP_BUFFER_ORDER 3
82#endif 82#endif
83 83
84static int dynamic_mt_modes = 6;
85module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
86MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
84static int target_smt_mode; 87static int target_smt_mode;
85module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); 88module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
86MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); 89MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
@@ -1770,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu)
1770 1773
1771 /* Ensure the thread won't go into the kernel if it wakes */ 1774 /* Ensure the thread won't go into the kernel if it wakes */
1772 tpaca->kvm_hstate.kvm_vcpu = NULL; 1775 tpaca->kvm_hstate.kvm_vcpu = NULL;
1776 tpaca->kvm_hstate.kvm_vcore = NULL;
1773 tpaca->kvm_hstate.napping = 0; 1777 tpaca->kvm_hstate.napping = 0;
1774 smp_wmb(); 1778 smp_wmb();
1775 tpaca->kvm_hstate.hwthread_req = 1; 1779 tpaca->kvm_hstate.hwthread_req = 1;
@@ -1801,28 +1805,32 @@ static void kvmppc_release_hwthread(int cpu)
1801 tpaca = &paca[cpu]; 1805 tpaca = &paca[cpu];
1802 tpaca->kvm_hstate.hwthread_req = 0; 1806 tpaca->kvm_hstate.hwthread_req = 0;
1803 tpaca->kvm_hstate.kvm_vcpu = NULL; 1807 tpaca->kvm_hstate.kvm_vcpu = NULL;
1808 tpaca->kvm_hstate.kvm_vcore = NULL;
1809 tpaca->kvm_hstate.kvm_split_mode = NULL;
1804} 1810}
1805 1811
1806static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 1812static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
1807{ 1813{
1808 int cpu; 1814 int cpu;
1809 struct paca_struct *tpaca; 1815 struct paca_struct *tpaca;
1810 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1811 struct kvmppc_vcore *mvc = vc->master_vcore; 1816 struct kvmppc_vcore *mvc = vc->master_vcore;
1812 1817
1813 if (vcpu->arch.timer_running) { 1818 cpu = vc->pcpu;
1814 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1819 if (vcpu) {
1815 vcpu->arch.timer_running = 0; 1820 if (vcpu->arch.timer_running) {
1821 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
1822 vcpu->arch.timer_running = 0;
1823 }
1824 cpu += vcpu->arch.ptid;
1825 vcpu->cpu = mvc->pcpu;
1826 vcpu->arch.thread_cpu = cpu;
1816 } 1827 }
1817 cpu = vc->pcpu + vcpu->arch.ptid;
1818 tpaca = &paca[cpu]; 1828 tpaca = &paca[cpu];
1819 tpaca->kvm_hstate.kvm_vcore = mvc; 1829 tpaca->kvm_hstate.kvm_vcpu = vcpu;
1820 tpaca->kvm_hstate.ptid = cpu - mvc->pcpu; 1830 tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
1821 vcpu->cpu = mvc->pcpu;
1822 vcpu->arch.thread_cpu = cpu;
1823 /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ 1831 /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
1824 smp_wmb(); 1832 smp_wmb();
1825 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1833 tpaca->kvm_hstate.kvm_vcore = mvc;
1826 if (cpu != smp_processor_id()) 1834 if (cpu != smp_processor_id())
1827 kvmppc_ipi_thread(cpu); 1835 kvmppc_ipi_thread(cpu);
1828} 1836}
@@ -1835,12 +1843,12 @@ static void kvmppc_wait_for_nap(void)
1835 for (loops = 0; loops < 1000000; ++loops) { 1843 for (loops = 0; loops < 1000000; ++loops) {
1836 /* 1844 /*
1837 * Check if all threads are finished. 1845 * Check if all threads are finished.
1838 * We set the vcpu pointer when starting a thread 1846 * We set the vcore pointer when starting a thread
1839 * and the thread clears it when finished, so we look 1847 * and the thread clears it when finished, so we look
1840 * for any threads that still have a non-NULL vcpu ptr. 1848 * for any threads that still have a non-NULL vcore ptr.
1841 */ 1849 */
1842 for (i = 1; i < threads_per_subcore; ++i) 1850 for (i = 1; i < threads_per_subcore; ++i)
1843 if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1851 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1844 break; 1852 break;
1845 if (i == threads_per_subcore) { 1853 if (i == threads_per_subcore) {
1846 HMT_medium(); 1854 HMT_medium();
@@ -1850,7 +1858,7 @@ static void kvmppc_wait_for_nap(void)
1850 } 1858 }
1851 HMT_medium(); 1859 HMT_medium();
1852 for (i = 1; i < threads_per_subcore; ++i) 1860 for (i = 1; i < threads_per_subcore; ++i)
1853 if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1861 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1854 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); 1862 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
1855} 1863}
1856 1864
@@ -1965,17 +1973,55 @@ static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
1965 vc->vcore_state = VCORE_INACTIVE; 1973 vc->vcore_state = VCORE_INACTIVE;
1966} 1974}
1967 1975
1976/*
1977 * This stores information about the virtual cores currently
1978 * assigned to a physical core.
1979 */
1968struct core_info { 1980struct core_info {
1981 int n_subcores;
1982 int max_subcore_threads;
1969 int total_threads; 1983 int total_threads;
1970 struct list_head vcs; 1984 int subcore_threads[MAX_SUBCORES];
1985 struct kvm *subcore_vm[MAX_SUBCORES];
1986 struct list_head vcs[MAX_SUBCORES];
1971}; 1987};
1972 1988
1989/*
1990 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
1991 * respectively in 2-way micro-threading (split-core) mode.
1992 */
1993static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
1994
1973static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) 1995static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
1974{ 1996{
1997 int sub;
1998
1975 memset(cip, 0, sizeof(*cip)); 1999 memset(cip, 0, sizeof(*cip));
2000 cip->n_subcores = 1;
2001 cip->max_subcore_threads = vc->num_threads;
1976 cip->total_threads = vc->num_threads; 2002 cip->total_threads = vc->num_threads;
1977 INIT_LIST_HEAD(&cip->vcs); 2003 cip->subcore_threads[0] = vc->num_threads;
1978 list_add_tail(&vc->preempt_list, &cip->vcs); 2004 cip->subcore_vm[0] = vc->kvm;
2005 for (sub = 0; sub < MAX_SUBCORES; ++sub)
2006 INIT_LIST_HEAD(&cip->vcs[sub]);
2007 list_add_tail(&vc->preempt_list, &cip->vcs[0]);
2008}
2009
2010static bool subcore_config_ok(int n_subcores, int n_threads)
2011{
2012 /* Can only dynamically split if unsplit to begin with */
2013 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2014 return false;
2015 if (n_subcores > MAX_SUBCORES)
2016 return false;
2017 if (n_subcores > 1) {
2018 if (!(dynamic_mt_modes & 2))
2019 n_subcores = 4;
2020 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2021 return false;
2022 }
2023
2024 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
1979} 2025}
1980 2026
1981static void init_master_vcore(struct kvmppc_vcore *vc) 2027static void init_master_vcore(struct kvmppc_vcore *vc)
@@ -1988,15 +2034,113 @@ static void init_master_vcore(struct kvmppc_vcore *vc)
1988} 2034}
1989 2035
1990/* 2036/*
1991 * Work out whether it is possible to piggyback the execute of 2037 * See if the existing subcores can be split into 3 (or fewer) subcores
1992 * vcore *pvc onto the execution of the other vcores described in *cip. 2038 * of at most two threads each, so we can fit in another vcore. This
2039 * assumes there are at most two subcores and at most 6 threads in total.
1993 */ 2040 */
1994static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip, 2041static bool can_split_piggybacked_subcores(struct core_info *cip)
1995 int target_threads) 2042{
2043 int sub, new_sub;
2044 int large_sub = -1;
2045 int thr;
2046 int n_subcores = cip->n_subcores;
2047 struct kvmppc_vcore *vc, *vcnext;
2048 struct kvmppc_vcore *master_vc = NULL;
2049
2050 for (sub = 0; sub < cip->n_subcores; ++sub) {
2051 if (cip->subcore_threads[sub] <= 2)
2052 continue;
2053 if (large_sub >= 0)
2054 return false;
2055 large_sub = sub;
2056 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2057 preempt_list);
2058 if (vc->num_threads > 2)
2059 return false;
2060 n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
2061 }
2062 if (n_subcores > 3 || large_sub < 0)
2063 return false;
2064
2065 /*
2066 * Seems feasible, so go through and move vcores to new subcores.
2067 * Note that when we have two or more vcores in one subcore,
2068 * all those vcores must have only one thread each.
2069 */
2070 new_sub = cip->n_subcores;
2071 thr = 0;
2072 sub = large_sub;
2073 list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
2074 if (thr >= 2) {
2075 list_del(&vc->preempt_list);
2076 list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
2077 /* vc->num_threads must be 1 */
2078 if (++cip->subcore_threads[new_sub] == 1) {
2079 cip->subcore_vm[new_sub] = vc->kvm;
2080 init_master_vcore(vc);
2081 master_vc = vc;
2082 ++cip->n_subcores;
2083 } else {
2084 vc->master_vcore = master_vc;
2085 ++new_sub;
2086 }
2087 }
2088 thr += vc->num_threads;
2089 }
2090 cip->subcore_threads[large_sub] = 2;
2091 cip->max_subcore_threads = 2;
2092
2093 return true;
2094}
2095
2096static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2097{
2098 int n_threads = vc->num_threads;
2099 int sub;
2100
2101 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2102 return false;
2103
2104 if (n_threads < cip->max_subcore_threads)
2105 n_threads = cip->max_subcore_threads;
2106 if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
2107 cip->max_subcore_threads = n_threads;
2108 } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
2109 vc->num_threads <= 2) {
2110 /*
2111 * We may be able to fit another subcore in by
2112 * splitting an existing subcore with 3 or 4
2113 * threads into two 2-thread subcores, or one
2114 * with 5 or 6 threads into three subcores.
2115 * We can only do this if those subcores have
2116 * piggybacked virtual cores.
2117 */
2118 if (!can_split_piggybacked_subcores(cip))
2119 return false;
2120 } else {
2121 return false;
2122 }
2123
2124 sub = cip->n_subcores;
2125 ++cip->n_subcores;
2126 cip->total_threads += vc->num_threads;
2127 cip->subcore_threads[sub] = vc->num_threads;
2128 cip->subcore_vm[sub] = vc->kvm;
2129 init_master_vcore(vc);
2130 list_del(&vc->preempt_list);
2131 list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
2132
2133 return true;
2134}
2135
2136static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
2137 struct core_info *cip, int sub)
1996{ 2138{
1997 struct kvmppc_vcore *vc; 2139 struct kvmppc_vcore *vc;
2140 int n_thr;
1998 2141
1999 vc = list_first_entry(&cip->vcs, struct kvmppc_vcore, preempt_list); 2142 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2143 preempt_list);
2000 2144
2001 /* require same VM and same per-core reg values */ 2145 /* require same VM and same per-core reg values */
2002 if (pvc->kvm != vc->kvm || 2146 if (pvc->kvm != vc->kvm ||
@@ -2010,17 +2154,44 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2010 (vc->num_threads > 1 || pvc->num_threads > 1)) 2154 (vc->num_threads > 1 || pvc->num_threads > 1))
2011 return false; 2155 return false;
2012 2156
2013 if (cip->total_threads + pvc->num_threads > target_threads) 2157 n_thr = cip->subcore_threads[sub] + pvc->num_threads;
2014 return false; 2158 if (n_thr > cip->max_subcore_threads) {
2159 if (!subcore_config_ok(cip->n_subcores, n_thr))
2160 return false;
2161 cip->max_subcore_threads = n_thr;
2162 }
2015 2163
2016 cip->total_threads += pvc->num_threads; 2164 cip->total_threads += pvc->num_threads;
2165 cip->subcore_threads[sub] = n_thr;
2017 pvc->master_vcore = vc; 2166 pvc->master_vcore = vc;
2018 list_del(&pvc->preempt_list); 2167 list_del(&pvc->preempt_list);
2019 list_add_tail(&pvc->preempt_list, &cip->vcs); 2168 list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
2020 2169
2021 return true; 2170 return true;
2022} 2171}
2023 2172
2173/*
2174 * Work out whether it is possible to piggyback the execution of
2175 * vcore *pvc onto the execution of the other vcores described in *cip.
2176 */
2177static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2178 int target_threads)
2179{
2180 int sub;
2181
2182 if (cip->total_threads + pvc->num_threads > target_threads)
2183 return false;
2184 for (sub = 0; sub < cip->n_subcores; ++sub)
2185 if (cip->subcore_threads[sub] &&
2186 can_piggyback_subcore(pvc, cip, sub))
2187 return true;
2188
2189 if (can_dynamic_split(pvc, cip))
2190 return true;
2191
2192 return false;
2193}
2194
2024static void prepare_threads(struct kvmppc_vcore *vc) 2195static void prepare_threads(struct kvmppc_vcore *vc)
2025{ 2196{
2026 struct kvm_vcpu *vcpu, *vnext; 2197 struct kvm_vcpu *vcpu, *vnext;
@@ -2135,6 +2306,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2135 int srcu_idx; 2306 int srcu_idx;
2136 struct core_info core_info; 2307 struct core_info core_info;
2137 struct kvmppc_vcore *pvc, *vcnext; 2308 struct kvmppc_vcore *pvc, *vcnext;
2309 struct kvm_split_mode split_info, *sip;
2310 int split, subcore_size, active;
2311 int sub;
2312 bool thr0_done;
2313 unsigned long cmd_bit, stat_bit;
2138 int pcpu, thr; 2314 int pcpu, thr;
2139 int target_threads; 2315 int target_threads;
2140 2316
@@ -2182,29 +2358,100 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2182 if (vc->num_threads < target_threads) 2358 if (vc->num_threads < target_threads)
2183 collect_piggybacks(&core_info, target_threads); 2359 collect_piggybacks(&core_info, target_threads);
2184 2360
2185 thr = 0; 2361 /* Decide on micro-threading (split-core) mode */
2186 list_for_each_entry(pvc, &core_info.vcs, preempt_list) { 2362 subcore_size = threads_per_subcore;
2187 pvc->pcpu = pcpu + thr; 2363 cmd_bit = stat_bit = 0;
2188 list_for_each_entry(vcpu, &pvc->runnable_threads, 2364 split = core_info.n_subcores;
2189 arch.run_list) { 2365 sip = NULL;
2190 kvmppc_start_thread(vcpu); 2366 if (split > 1) {
2191 kvmppc_create_dtl_entry(vcpu, pvc); 2367 /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
2192 trace_kvm_guest_enter(vcpu); 2368 if (split == 2 && (dynamic_mt_modes & 2)) {
2369 cmd_bit = HID0_POWER8_1TO2LPAR;
2370 stat_bit = HID0_POWER8_2LPARMODE;
2371 } else {
2372 split = 4;
2373 cmd_bit = HID0_POWER8_1TO4LPAR;
2374 stat_bit = HID0_POWER8_4LPARMODE;
2193 } 2375 }
2194 thr += pvc->num_threads; 2376 subcore_size = MAX_SMT_THREADS / split;
2377 sip = &split_info;
2378 memset(&split_info, 0, sizeof(split_info));
2379 split_info.rpr = mfspr(SPRN_RPR);
2380 split_info.pmmar = mfspr(SPRN_PMMAR);
2381 split_info.ldbar = mfspr(SPRN_LDBAR);
2382 split_info.subcore_size = subcore_size;
2383 for (sub = 0; sub < core_info.n_subcores; ++sub)
2384 split_info.master_vcs[sub] =
2385 list_first_entry(&core_info.vcs[sub],
2386 struct kvmppc_vcore, preempt_list);
2387 /* order writes to split_info before kvm_split_mode pointer */
2388 smp_wmb();
2195 } 2389 }
2196 2390 pcpu = smp_processor_id();
2197 /* Set this explicitly in case thread 0 doesn't have a vcpu */ 2391 for (thr = 0; thr < threads_per_subcore; ++thr)
2198 get_paca()->kvm_hstate.kvm_vcore = vc; 2392 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2199 get_paca()->kvm_hstate.ptid = 0; 2393
2394 /* Initiate micro-threading (split-core) if required */
2395 if (cmd_bit) {
2396 unsigned long hid0 = mfspr(SPRN_HID0);
2397
2398 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
2399 mb();
2400 mtspr(SPRN_HID0, hid0);
2401 isync();
2402 for (;;) {
2403 hid0 = mfspr(SPRN_HID0);
2404 if (hid0 & stat_bit)
2405 break;
2406 cpu_relax();
2407 }
2408 split_info.do_nap = 1; /* ask secondaries to nap when done */
2409 }
2410
2411 /* Start all the threads */
2412 active = 0;
2413 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2414 thr = subcore_thread_map[sub];
2415 thr0_done = false;
2416 active |= 1 << thr;
2417 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
2418 pvc->pcpu = pcpu + thr;
2419 list_for_each_entry(vcpu, &pvc->runnable_threads,
2420 arch.run_list) {
2421 kvmppc_start_thread(vcpu, pvc);
2422 kvmppc_create_dtl_entry(vcpu, pvc);
2423 trace_kvm_guest_enter(vcpu);
2424 if (!vcpu->arch.ptid)
2425 thr0_done = true;
2426 active |= 1 << (thr + vcpu->arch.ptid);
2427 }
2428 /*
2429 * We need to start the first thread of each subcore
2430 * even if it doesn't have a vcpu.
2431 */
2432 if (pvc->master_vcore == pvc && !thr0_done)
2433 kvmppc_start_thread(NULL, pvc);
2434 thr += pvc->num_threads;
2435 }
2436 }
2437 /*
2438 * When doing micro-threading, poke the inactive threads as well.
2439 * This gets them to the nap instruction after kvm_do_nap,
2440 * which reduces the time taken to unsplit later.
2441 */
2442 if (split > 1)
2443 for (thr = 1; thr < threads_per_subcore; ++thr)
2444 if (!(active & (1 << thr)))
2445 kvmppc_ipi_thread(pcpu + thr);
2200 2446
2201 vc->vcore_state = VCORE_RUNNING; 2447 vc->vcore_state = VCORE_RUNNING;
2202 preempt_disable(); 2448 preempt_disable();
2203 2449
2204 trace_kvmppc_run_core(vc, 0); 2450 trace_kvmppc_run_core(vc, 0);
2205 2451
2206 list_for_each_entry(pvc, &core_info.vcs, preempt_list) 2452 for (sub = 0; sub < core_info.n_subcores; ++sub)
2207 spin_unlock(&pvc->lock); 2453 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
2454 spin_unlock(&pvc->lock);
2208 2455
2209 kvm_guest_enter(); 2456 kvm_guest_enter();
2210 2457
@@ -2226,16 +2473,44 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2226 2473
2227 /* wait for secondary threads to finish writing their state to memory */ 2474 /* wait for secondary threads to finish writing their state to memory */
2228 kvmppc_wait_for_nap(); 2475 kvmppc_wait_for_nap();
2229 for (i = 0; i < threads_per_subcore; ++i) 2476
2230 kvmppc_release_hwthread(vc->pcpu + i); 2477 /* Return to whole-core mode if we split the core earlier */
2478 if (split > 1) {
2479 unsigned long hid0 = mfspr(SPRN_HID0);
2480 unsigned long loops = 0;
2481
2482 hid0 &= ~HID0_POWER8_DYNLPARDIS;
2483 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
2484 mb();
2485 mtspr(SPRN_HID0, hid0);
2486 isync();
2487 for (;;) {
2488 hid0 = mfspr(SPRN_HID0);
2489 if (!(hid0 & stat_bit))
2490 break;
2491 cpu_relax();
2492 ++loops;
2493 }
2494 split_info.do_nap = 0;
2495 }
2496
2497 /* Let secondaries go back to the offline loop */
2498 for (i = 0; i < threads_per_subcore; ++i) {
2499 kvmppc_release_hwthread(pcpu + i);
2500 if (sip && sip->napped[i])
2501 kvmppc_ipi_thread(pcpu + i);
2502 }
2503
2231 spin_unlock(&vc->lock); 2504 spin_unlock(&vc->lock);
2232 2505
2233 /* make sure updates to secondary vcpu structs are visible now */ 2506 /* make sure updates to secondary vcpu structs are visible now */
2234 smp_mb(); 2507 smp_mb();
2235 kvm_guest_exit(); 2508 kvm_guest_exit();
2236 2509
2237 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs, preempt_list) 2510 for (sub = 0; sub < core_info.n_subcores; ++sub)
2238 post_guest_process(pvc, pvc == vc); 2511 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
2512 preempt_list)
2513 post_guest_process(pvc, pvc == vc);
2239 2514
2240 spin_lock(&vc->lock); 2515 spin_lock(&vc->lock);
2241 preempt_enable(); 2516 preempt_enable();
@@ -2341,7 +2616,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2341 if (mvc->vcore_state == VCORE_RUNNING && 2616 if (mvc->vcore_state == VCORE_RUNNING &&
2342 !VCORE_IS_EXITING(mvc)) { 2617 !VCORE_IS_EXITING(mvc)) {
2343 kvmppc_create_dtl_entry(vcpu, vc); 2618 kvmppc_create_dtl_entry(vcpu, vc);
2344 kvmppc_start_thread(vcpu); 2619 kvmppc_start_thread(vcpu, vc);
2345 trace_kvm_guest_enter(vcpu); 2620 trace_kvm_guest_enter(vcpu);
2346 } 2621 }
2347 spin_unlock(&mvc->lock); 2622 spin_unlock(&mvc->lock);
@@ -2349,7 +2624,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2349 } else if (vc->vcore_state == VCORE_RUNNING && 2624 } else if (vc->vcore_state == VCORE_RUNNING &&
2350 !VCORE_IS_EXITING(vc)) { 2625 !VCORE_IS_EXITING(vc)) {
2351 kvmppc_create_dtl_entry(vcpu, vc); 2626 kvmppc_create_dtl_entry(vcpu, vc);
2352 kvmppc_start_thread(vcpu); 2627 kvmppc_start_thread(vcpu, vc);
2353 trace_kvm_guest_enter(vcpu); 2628 trace_kvm_guest_enter(vcpu);
2354 } else if (vc->vcore_state == VCORE_SLEEPING) { 2629 } else if (vc->vcore_state == VCORE_SLEEPING) {
2355 wake_up(&vc->wq); 2630 wake_up(&vc->wq);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 1fd0e3057396..fd7006bf6b1a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -239,7 +239,8 @@ void kvmhv_commence_exit(int trap)
239{ 239{
240 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; 240 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
241 int ptid = local_paca->kvm_hstate.ptid; 241 int ptid = local_paca->kvm_hstate.ptid;
242 int me, ee; 242 struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
243 int me, ee, i;
243 244
244 /* Set our bit in the threads-exiting-guest map in the 0xff00 245 /* Set our bit in the threads-exiting-guest map in the 0xff00
245 bits of vcore->entry_exit_map */ 246 bits of vcore->entry_exit_map */
@@ -259,4 +260,26 @@ void kvmhv_commence_exit(int trap)
259 */ 260 */
260 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER) 261 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
261 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid)); 262 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
263
264 /*
265 * If we are doing dynamic micro-threading, interrupt the other
266 * subcores to pull them out of their guests too.
267 */
268 if (!sip)
269 return;
270
271 for (i = 0; i < MAX_SUBCORES; ++i) {
272 vc = sip->master_vcs[i];
273 if (!vc)
274 break;
275 do {
276 ee = vc->entry_exit_map;
277 /* Already asked to exit? */
278 if ((ee >> 8) != 0)
279 break;
280 } while (cmpxchg(&vc->entry_exit_map, ee,
281 ee | VCORE_EXIT_REQ) != ee);
282 if ((ee >> 8) == 0)
283 kvmhv_interrupt_vcore(vc, ee);
284 }
262} 285}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ac113b527bf9..db2427db4471 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
128 subf r4, r4, r3 128 subf r4, r4, r3
129 mtspr SPRN_DEC, r4 129 mtspr SPRN_DEC, r4
130 130
131 /* hwthread_req may have got set by cede or no vcpu, so clear it */
132 li r0, 0
133 stb r0, HSTATE_HWTHREAD_REQ(r13)
134
131 /* 135 /*
132 * For external and machine check interrupts, we need 136 * For external and machine check interrupts, we need
133 * to call the Linux handler to process the interrupt. 137 * to call the Linux handler to process the interrupt.
@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
215 ld r5, HSTATE_KVM_VCORE(r13) 219 ld r5, HSTATE_KVM_VCORE(r13)
216 li r0, 0 220 li r0, 0
217 stb r0, HSTATE_NAPPING(r13) 221 stb r0, HSTATE_NAPPING(r13)
218 stb r0, HSTATE_HWTHREAD_REQ(r13)
219 222
220 /* check the wake reason */ 223 /* check the wake reason */
221 bl kvmppc_check_wake_reason 224 bl kvmppc_check_wake_reason
@@ -315,10 +318,10 @@ kvm_start_guest:
315 cmpdi r3, 0 318 cmpdi r3, 0
316 bge kvm_no_guest 319 bge kvm_no_guest
317 320
318 /* get vcpu pointer, NULL if we have no vcpu to run */ 321 /* get vcore pointer, NULL if we have nothing to run */
319 ld r4,HSTATE_KVM_VCPU(r13) 322 ld r5,HSTATE_KVM_VCORE(r13)
320 cmpdi r4,0 323 cmpdi r5,0
321 /* if we have no vcpu to run, go back to sleep */ 324 /* if we have no vcore to run, go back to sleep */
322 beq kvm_no_guest 325 beq kvm_no_guest
323 326
324kvm_secondary_got_guest: 327kvm_secondary_got_guest:
@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
327 ld r6, PACA_DSCR_DEFAULT(r13) 330 ld r6, PACA_DSCR_DEFAULT(r13)
328 std r6, HSTATE_DSCR(r13) 331 std r6, HSTATE_DSCR(r13)
329 332
330 /* Order load of vcore, ptid etc. after load of vcpu */ 333 /* On thread 0 of a subcore, set HDEC to max */
334 lbz r4, HSTATE_PTID(r13)
335 cmpwi r4, 0
336 bne 63f
337 lis r6, 0x7fff
338 ori r6, r6, 0xffff
339 mtspr SPRN_HDEC, r6
340 /* and set per-LPAR registers, if doing dynamic micro-threading */
341 ld r6, HSTATE_SPLIT_MODE(r13)
342 cmpdi r6, 0
343 beq 63f
344 ld r0, KVM_SPLIT_RPR(r6)
345 mtspr SPRN_RPR, r0
346 ld r0, KVM_SPLIT_PMMAR(r6)
347 mtspr SPRN_PMMAR, r0
348 ld r0, KVM_SPLIT_LDBAR(r6)
349 mtspr SPRN_LDBAR, r0
350 isync
35163:
352 /* Order load of vcpu after load of vcore */
331 lwsync 353 lwsync
354 ld r4, HSTATE_KVM_VCPU(r13)
332 bl kvmppc_hv_entry 355 bl kvmppc_hv_entry
333 356
334 /* Back from the guest, go back to nap */ 357 /* Back from the guest, go back to nap */
335 /* Clear our vcpu pointer so we don't come back in early */ 358 /* Clear our vcpu and vcore pointers so we don't come back in early */
336 li r0, 0 359 li r0, 0
360 std r0, HSTATE_KVM_VCPU(r13)
337 /* 361 /*
338 * Once we clear HSTATE_KVM_VCPU(r13), the code in 362 * Once we clear HSTATE_KVM_VCORE(r13), the code in
339 * kvmppc_run_core() is going to assume that all our vcpu 363 * kvmppc_run_core() is going to assume that all our vcpu
340 * state is visible in memory. This lwsync makes sure 364 * state is visible in memory. This lwsync makes sure
341 * that that is true. 365 * that that is true.
342 */ 366 */
343 lwsync 367 lwsync
344 std r0, HSTATE_KVM_VCPU(r13) 368 std r0, HSTATE_KVM_VCORE(r13)
345 369
346/* 370/*
347 * At this point we have finished executing in the guest. 371 * At this point we have finished executing in the guest.
@@ -374,16 +398,63 @@ kvm_no_guest:
374 b power7_wakeup_loss 398 b power7_wakeup_loss
375 399
37653: HMT_LOW 40053: HMT_LOW
377 ld r4, HSTATE_KVM_VCPU(r13) 401 ld r5, HSTATE_KVM_VCORE(r13)
378 cmpdi r4, 0 402 cmpdi r5, 0
403 bne 60f
404 ld r3, HSTATE_SPLIT_MODE(r13)
405 cmpdi r3, 0
406 beq kvm_no_guest
407 lbz r0, KVM_SPLIT_DO_NAP(r3)
408 cmpwi r0, 0
379 beq kvm_no_guest 409 beq kvm_no_guest
380 HMT_MEDIUM 410 HMT_MEDIUM
411 b kvm_unsplit_nap
41260: HMT_MEDIUM
381 b kvm_secondary_got_guest 413 b kvm_secondary_got_guest
382 414
38354: li r0, KVM_HWTHREAD_IN_KVM 41554: li r0, KVM_HWTHREAD_IN_KVM
384 stb r0, HSTATE_HWTHREAD_STATE(r13) 416 stb r0, HSTATE_HWTHREAD_STATE(r13)
385 b kvm_no_guest 417 b kvm_no_guest
386 418
419/*
420 * Here the primary thread is trying to return the core to
421 * whole-core mode, so we need to nap.
422 */
423kvm_unsplit_nap:
424 /* clear any pending message */
425BEGIN_FTR_SECTION
426 lis r6, (PPC_DBELL_SERVER << (63-36))@h
427 PPC_MSGCLR(6)
428END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
429 /* Set kvm_split_mode.napped[tid] = 1 */
430 ld r3, HSTATE_SPLIT_MODE(r13)
431 li r0, 1
432 lhz r4, PACAPACAINDEX(r13)
433 clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
434 addi r4, r4, KVM_SPLIT_NAPPED
435 stbx r0, r3, r4
436 /* Check the do_nap flag again after setting napped[] */
437 sync
438 lbz r0, KVM_SPLIT_DO_NAP(r3)
439 cmpwi r0, 0
440 beq 57f
441 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
442 mfspr r4, SPRN_LPCR
443 rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
444 mtspr SPRN_LPCR, r4
445 isync
446 std r0, HSTATE_SCRATCH0(r13)
447 ptesync
448 ld r0, HSTATE_SCRATCH0(r13)
4491: cmpd r0, r0
450 bne 1b
451 nap
452 b .
453
45457: li r0, 0
455 stbx r0, r3, r4
456 b kvm_no_guest
457
387/****************************************************************************** 458/******************************************************************************
388 * * 459 * *
389 * Entry code * 460 * Entry code *
@@ -854,7 +925,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
854 cmpwi r0, 0 925 cmpwi r0, 0
855 bne 21f 926 bne 21f
856 HMT_LOW 927 HMT_LOW
85720: lbz r0, VCORE_IN_GUEST(r5) 92820: lwz r3, VCORE_ENTRY_EXIT(r5)
929 cmpwi r3, 0x100
930 bge no_switch_exit
931 lbz r0, VCORE_IN_GUEST(r5)
858 cmpwi r0, 0 932 cmpwi r0, 0
859 beq 20b 933 beq 20b
860 HMT_MEDIUM 934 HMT_MEDIUM
@@ -985,9 +1059,13 @@ secondary_too_late:
985#endif 1059#endif
98611: b kvmhv_switch_to_host 106011: b kvmhv_switch_to_host
987 1061
1062no_switch_exit:
1063 HMT_MEDIUM
1064 li r12, 0
1065 b 12f
988hdec_soon: 1066hdec_soon:
989 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 1067 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
990 stw r12, VCPU_TRAP(r4) 106812: stw r12, VCPU_TRAP(r4)
991 mr r9, r4 1069 mr r9, r4
992#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1070#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
993 addi r3, r4, VCPU_TB_RMEXIT 1071 addi r3, r4, VCPU_TB_RMEXIT
@@ -1545,12 +1623,17 @@ kvmhv_switch_to_host:
1545 1623
1546 /* Primary thread waits for all the secondaries to exit guest */ 1624 /* Primary thread waits for all the secondaries to exit guest */
154715: lwz r3,VCORE_ENTRY_EXIT(r5) 162515: lwz r3,VCORE_ENTRY_EXIT(r5)
1548 srwi r0,r3,8 1626 rlwinm r0,r3,32-8,0xff
1549 clrldi r3,r3,56 1627 clrldi r3,r3,56
1550 cmpw r3,r0 1628 cmpw r3,r0
1551 bne 15b 1629 bne 15b
1552 isync 1630 isync
1553 1631
1632 /* Did we actually switch to the guest at all? */
1633 lbz r6, VCORE_IN_GUEST(r5)
1634 cmpwi r6, 0
1635 beq 19f
1636
1554 /* Primary thread switches back to host partition */ 1637 /* Primary thread switches back to host partition */
1555 ld r6,KVM_HOST_SDR1(r4) 1638 ld r6,KVM_HOST_SDR1(r4)
1556 lwz r7,KVM_HOST_LPID(r4) 1639 lwz r7,KVM_HOST_LPID(r4)
@@ -1594,7 +1677,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
159418: 167718:
1595 /* Signal secondary CPUs to continue */ 1678 /* Signal secondary CPUs to continue */
1596 stb r0,VCORE_IN_GUEST(r5) 1679 stb r0,VCORE_IN_GUEST(r5)
1597 lis r8,0x7fff /* MAX_INT@h */ 168019: lis r8,0x7fff /* MAX_INT@h */
1598 mtspr SPRN_HDEC,r8 1681 mtspr SPRN_HDEC,r8
1599 1682
160016: ld r8,KVM_HOST_LPCR(r4) 168316: ld r8,KVM_HOST_LPCR(r4)