summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 19:42:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 19:42:49 -0400
commit519f526d391b0ef775aeb04c4b6f632ea6b3ee50 (patch)
tree36985d7882734c136fc3c9a48e9d9abf9e97c1f1 /arch/powerpc
parent06ab838c2024db468855118087db16d8fa905ddc (diff)
parentba60c41ae392b473a1897faa0b8739fcb8759d69 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more kvm updates from Paolo Bonzini: "ARM: - Full debug support for arm64 - Active state switching for timer interrupts - Lazy FP/SIMD save/restore for arm64 - Generic ARMv8 target PPC: - Book3S: A few bug fixes - Book3S: Allow micro-threading on POWER8 x86: - Compiler warnings Generic: - Adaptive polling for guest halt" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits) kvm: irqchip: fix memory leak kvm: move new trace event outside #ifdef CONFIG_KVM_ASYNC_PF KVM: trace kvm_halt_poll_ns grow/shrink KVM: dynamic halt-polling KVM: make halt_poll_ns per-vCPU Silence compiler warning in arch/x86/kvm/emulate.c kvm: compile process_smi_save_seg_64() only for x86_64 KVM: x86: avoid uninitialized variable warning KVM: PPC: Book3S: Fix typo in top comment about locking KVM: PPC: Book3S: Fix size of the PSPB register KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set KVM: PPC: Book3S HV: Fix race in starting secondary threads KVM: PPC: Book3S: correct width in XER handling KVM: PPC: Book3S HV: Fix preempted vcore stolen time calculation KVM: PPC: Book3S HV: Fix preempted vcore list locking KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD KVM: PPC: Book3S HV: Fix bug in dirty page tracking KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8 KVM: PPC: Book3S HV: Make use of unused threads when running guests ...
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h5
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h22
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h26
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c9
-rw-r--r--arch/powerpc/kvm/Kconfig8
-rw-r--r--arch/powerpc/kvm/book3s.c3
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c664
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c32
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c161
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S137
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c2
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/kvm/booke.c1
-rw-r--r--arch/powerpc/kvm/e500_mmu.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c2
23 files changed, 955 insertions, 146 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index b91e74a817d8..9fac01cb89c1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -158,6 +158,7 @@ extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
158 bool *writable); 158 bool *writable);
159extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 159extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
160 unsigned long *rmap, long pte_index, int realmode); 160 unsigned long *rmap, long pte_index, int realmode);
161extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
161extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 162extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
162 unsigned long pte_index); 163 unsigned long pte_index);
163void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 164void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
@@ -225,12 +226,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
225 return vcpu->arch.cr; 226 return vcpu->arch.cr;
226} 227}
227 228
228static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 229static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
229{ 230{
230 vcpu->arch.xer = val; 231 vcpu->arch.xer = val;
231} 232}
232 233
233static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 234static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
234{ 235{
235 return vcpu->arch.xer; 236 return vcpu->arch.xer;
236} 237}
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5dd3400..72b6225aca73 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,12 @@
25#define XICS_MFRR 0xc 25#define XICS_MFRR 0xc
26#define XICS_IPI 2 /* interrupt source # for IPIs */ 26#define XICS_IPI 2 /* interrupt source # for IPIs */
27 27
28/* Maximum number of threads per physical core */
29#define MAX_SMT_THREADS 8
30
31/* Maximum number of subcores per physical core */
32#define MAX_SUBCORES 4
33
28#ifdef __ASSEMBLY__ 34#ifdef __ASSEMBLY__
29 35
30#ifdef CONFIG_KVM_BOOK3S_HANDLER 36#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
65 71
66#else /*__ASSEMBLY__ */ 72#else /*__ASSEMBLY__ */
67 73
74struct kvmppc_vcore;
75
76/* Struct used for coordinating micro-threading (split-core) mode changes */
77struct kvm_split_mode {
78 unsigned long rpr;
79 unsigned long pmmar;
80 unsigned long ldbar;
81 u8 subcore_size;
82 u8 do_nap;
83 u8 napped[MAX_SMT_THREADS];
84 struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
85};
86
68/* 87/*
69 * This struct goes in the PACA on 64-bit processors. It is used 88 * This struct goes in the PACA on 64-bit processors. It is used
70 * to store host state that needs to be saved when we enter a guest 89 * to store host state that needs to be saved when we enter a guest
@@ -100,6 +119,7 @@ struct kvmppc_host_state {
100 u64 host_spurr; 119 u64 host_spurr;
101 u64 host_dscr; 120 u64 host_dscr;
102 u64 dec_expires; 121 u64 dec_expires;
122 struct kvm_split_mode *kvm_split_mode;
103#endif 123#endif
104#ifdef CONFIG_PPC_BOOK3S_64 124#ifdef CONFIG_PPC_BOOK3S_64
105 u64 cfar; 125 u64 cfar;
@@ -112,7 +132,7 @@ struct kvmppc_book3s_shadow_vcpu {
112 bool in_use; 132 bool in_use;
113 ulong gpr[14]; 133 ulong gpr[14];
114 u32 cr; 134 u32 cr;
115 u32 xer; 135 ulong xer;
116 ulong ctr; 136 ulong ctr;
117 ulong lr; 137 ulong lr;
118 ulong pc; 138 ulong pc;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index 3286f0d6a86c..bc6e29e4dfd4 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
54 return vcpu->arch.cr; 54 return vcpu->arch.cr;
55} 55}
56 56
57static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 57static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
58{ 58{
59 vcpu->arch.xer = val; 59 vcpu->arch.xer = val;
60} 60}
61 61
62static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 62static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
63{ 63{
64 return vcpu->arch.xer; 64 return vcpu->arch.xer;
65} 65}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d91f65b28e32..98eebbf66340 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -205,8 +205,10 @@ struct revmap_entry {
205 */ 205 */
206#define KVMPPC_RMAP_LOCK_BIT 63 206#define KVMPPC_RMAP_LOCK_BIT 63
207#define KVMPPC_RMAP_RC_SHIFT 32 207#define KVMPPC_RMAP_RC_SHIFT 32
208#define KVMPPC_RMAP_CHG_SHIFT 48
208#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) 209#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
209#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT) 210#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
211#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT)
210#define KVMPPC_RMAP_PRESENT 0x100000000ul 212#define KVMPPC_RMAP_PRESENT 0x100000000ul
211#define KVMPPC_RMAP_INDEX 0xfffffffful 213#define KVMPPC_RMAP_INDEX 0xfffffffful
212 214
@@ -278,7 +280,9 @@ struct kvmppc_vcore {
278 u16 last_cpu; 280 u16 last_cpu;
279 u8 vcore_state; 281 u8 vcore_state;
280 u8 in_guest; 282 u8 in_guest;
283 struct kvmppc_vcore *master_vcore;
281 struct list_head runnable_threads; 284 struct list_head runnable_threads;
285 struct list_head preempt_list;
282 spinlock_t lock; 286 spinlock_t lock;
283 wait_queue_head_t wq; 287 wait_queue_head_t wq;
284 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ 288 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
@@ -300,12 +304,21 @@ struct kvmppc_vcore {
300#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) 304#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
301#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) 305#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
302 306
303/* Values for vcore_state */ 307/* This bit is used when a vcore exit is triggered from outside the vcore */
308#define VCORE_EXIT_REQ 0x10000
309
310/*
311 * Values for vcore_state.
312 * Note that these are arranged such that lower values
313 * (< VCORE_SLEEPING) don't require stolen time accounting
314 * on load/unload, and higher values do.
315 */
304#define VCORE_INACTIVE 0 316#define VCORE_INACTIVE 0
305#define VCORE_SLEEPING 1 317#define VCORE_PREEMPT 1
306#define VCORE_PREEMPT 2 318#define VCORE_PIGGYBACK 2
307#define VCORE_RUNNING 3 319#define VCORE_SLEEPING 3
308#define VCORE_EXITING 4 320#define VCORE_RUNNING 4
321#define VCORE_EXITING 5
309 322
310/* 323/*
311 * Struct used to manage memory for a virtual processor area 324 * Struct used to manage memory for a virtual processor area
@@ -473,7 +486,7 @@ struct kvm_vcpu_arch {
473 ulong ciabr; 486 ulong ciabr;
474 ulong cfar; 487 ulong cfar;
475 ulong ppr; 488 ulong ppr;
476 ulong pspb; 489 u32 pspb;
477 ulong fscr; 490 ulong fscr;
478 ulong shadow_fscr; 491 ulong shadow_fscr;
479 ulong ebbhr; 492 ulong ebbhr;
@@ -619,6 +632,7 @@ struct kvm_vcpu_arch {
619 int trap; 632 int trap;
620 int state; 633 int state;
621 int ptid; 634 int ptid;
635 int thread_cpu;
622 bool timer_running; 636 bool timer_running;
623 wait_queue_head_t cpu_run; 637 wait_queue_head_t cpu_run;
624 638
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 8452335661a5..790f5d1d9a46 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -287,7 +287,7 @@
287 287
288/* POWER8 Micro Partition Prefetch (MPP) parameters */ 288/* POWER8 Micro Partition Prefetch (MPP) parameters */
289/* Address mask is common for LOGMPP instruction and MPPR SPR */ 289/* Address mask is common for LOGMPP instruction and MPPR SPR */
290#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 290#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL
291 291
292/* Bits 60 and 61 of MPP SPR should be set to one of the following */ 292/* Bits 60 and 61 of MPP SPR should be set to one of the following */
293/* Aborting the fetch is indeed setting 00 in the table size bits */ 293/* Aborting the fetch is indeed setting 00 in the table size bits */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 810f433731dc..221d584d089f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -511,6 +511,8 @@ int main(void)
511 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 511 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
512 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); 512 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
513 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); 513 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
514 DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
515 DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
514#endif 516#endif
515#ifdef CONFIG_PPC_BOOK3S 517#ifdef CONFIG_PPC_BOOK3S
516 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 518 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -673,7 +675,14 @@ int main(void)
673 HSTATE_FIELD(HSTATE_DSCR, host_dscr); 675 HSTATE_FIELD(HSTATE_DSCR, host_dscr);
674 HSTATE_FIELD(HSTATE_DABR, dabr); 676 HSTATE_FIELD(HSTATE_DABR, dabr);
675 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 677 HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
678 HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
676 DEFINE(IPI_PRIORITY, IPI_PRIORITY); 679 DEFINE(IPI_PRIORITY, IPI_PRIORITY);
680 DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
681 DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
682 DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
683 DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
684 DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
685 DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
677#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 686#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
678 687
679#ifdef CONFIG_PPC_BOOK3S_64 688#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 3caec2c42105..c2024ac9d4e8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -74,14 +74,14 @@ config KVM_BOOK3S_64
74 If unsure, say N. 74 If unsure, say N.
75 75
76config KVM_BOOK3S_64_HV 76config KVM_BOOK3S_64_HV
77 tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" 77 tristate "KVM for POWER7 and later using hypervisor mode in host"
78 depends on KVM_BOOK3S_64 && PPC_POWERNV 78 depends on KVM_BOOK3S_64 && PPC_POWERNV
79 select KVM_BOOK3S_HV_POSSIBLE 79 select KVM_BOOK3S_HV_POSSIBLE
80 select MMU_NOTIFIER 80 select MMU_NOTIFIER
81 select CMA 81 select CMA
82 ---help--- 82 ---help---
83 Support running unmodified book3s_64 guest kernels in 83 Support running unmodified book3s_64 guest kernels in
84 virtual machines on POWER7 and PPC970 processors that have 84 virtual machines on POWER7 and newer processors that have
85 hypervisor mode available to the host. 85 hypervisor mode available to the host.
86 86
87 If you say Y here, KVM will use the hardware virtualization 87 If you say Y here, KVM will use the hardware virtualization
@@ -89,8 +89,8 @@ config KVM_BOOK3S_64_HV
89 guest operating systems will run at full hardware speed 89 guest operating systems will run at full hardware speed
90 using supervisor and user modes. However, this also means 90 using supervisor and user modes. However, this also means
91 that KVM is not usable under PowerVM (pHyp), is only usable 91 that KVM is not usable under PowerVM (pHyp), is only usable
92 on POWER7 (or later) processors and PPC970-family processors, 92 on POWER7 or later processors, and cannot emulate a
93 and cannot emulate a different processor from the host processor. 93 different processor from the host processor.
94 94
95 If unsure, say N. 95 If unsure, say N.
96 96
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 6d6398f4d632..d75bf325f54a 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -240,7 +240,8 @@ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
241} 241}
242 242
243int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) 243static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
244 unsigned int priority)
244{ 245{
245 int deliver = 1; 246 int deliver = 1;
246 int vec = 0; 247 int vec = 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 2035d16a9262..d5c9bfeb0c9c 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -26,6 +26,7 @@
26#include <asm/machdep.h> 26#include <asm/machdep.h>
27#include <asm/mmu_context.h> 27#include <asm/mmu_context.h>
28#include <asm/hw_irq.h> 28#include <asm/hw_irq.h>
29#include "book3s.h"
29 30
30/* #define DEBUG_MMU */ 31/* #define DEBUG_MMU */
31/* #define DEBUG_SR */ 32/* #define DEBUG_SR */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index b982d925c710..79ad35abd196 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -28,6 +28,7 @@
28#include <asm/mmu_context.h> 28#include <asm/mmu_context.h>
29#include <asm/hw_irq.h> 29#include <asm/hw_irq.h>
30#include "trace_pr.h" 30#include "trace_pr.h"
31#include "book3s.h"
31 32
32#define PTE_SIZE 12 33#define PTE_SIZE 12
33 34
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index dab68b7af3f2..1f9c0a17f445 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -761,6 +761,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
761 /* Harvest R and C */ 761 /* Harvest R and C */
762 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); 762 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
763 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 763 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
764 if (rcbits & HPTE_R_C)
765 kvmppc_update_rmap_change(rmapp, psize);
764 if (rcbits & ~rev[i].guest_rpte) { 766 if (rcbits & ~rev[i].guest_rpte) {
765 rev[i].guest_rpte = ptel | rcbits; 767 rev[i].guest_rpte = ptel | rcbits;
766 note_hpte_modification(kvm, &rev[i]); 768 note_hpte_modification(kvm, &rev[i]);
@@ -927,8 +929,12 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
927 retry: 929 retry:
928 lock_rmap(rmapp); 930 lock_rmap(rmapp);
929 if (*rmapp & KVMPPC_RMAP_CHANGED) { 931 if (*rmapp & KVMPPC_RMAP_CHANGED) {
930 *rmapp &= ~KVMPPC_RMAP_CHANGED; 932 long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
933 >> KVMPPC_RMAP_CHG_SHIFT;
934 *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
931 npages_dirty = 1; 935 npages_dirty = 1;
936 if (change_order > PAGE_SHIFT)
937 npages_dirty = 1ul << (change_order - PAGE_SHIFT);
932 } 938 }
933 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 939 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
934 unlock_rmap(rmapp); 940 unlock_rmap(rmapp);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 5a2bc4b0dfe5..2afdb9c0937d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
23#include <asm/reg.h> 23#include <asm/reg.h>
24#include <asm/switch_to.h> 24#include <asm/switch_to.h>
25#include <asm/time.h> 25#include <asm/time.h>
26#include "book3s.h"
26 27
27#define OP_19_XOP_RFID 18 28#define OP_19_XOP_RFID 18
28#define OP_19_XOP_RFI 50 29#define OP_19_XOP_RFI 50
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a9f753fb73a8..9754e6815e52 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,12 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
81#define MPP_BUFFER_ORDER 3 81#define MPP_BUFFER_ORDER 3
82#endif 82#endif
83 83
84static int dynamic_mt_modes = 6;
85module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
86MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
87static int target_smt_mode;
88module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
89MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
84 90
85static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 91static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
86static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 92static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,7 +120,7 @@ static bool kvmppc_ipi_thread(int cpu)
114 120
115static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) 121static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
116{ 122{
117 int cpu = vcpu->cpu; 123 int cpu;
118 wait_queue_head_t *wqp; 124 wait_queue_head_t *wqp;
119 125
120 wqp = kvm_arch_vcpu_wq(vcpu); 126 wqp = kvm_arch_vcpu_wq(vcpu);
@@ -123,10 +129,11 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
123 ++vcpu->stat.halt_wakeup; 129 ++vcpu->stat.halt_wakeup;
124 } 130 }
125 131
126 if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid)) 132 if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
127 return; 133 return;
128 134
129 /* CPU points to the first thread of the core */ 135 /* CPU points to the first thread of the core */
136 cpu = vcpu->cpu;
130 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu)) 137 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
131 smp_send_reschedule(cpu); 138 smp_send_reschedule(cpu);
132} 139}
@@ -164,6 +171,27 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
164 * they should never fail.) 171 * they should never fail.)
165 */ 172 */
166 173
174static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
175{
176 unsigned long flags;
177
178 spin_lock_irqsave(&vc->stoltb_lock, flags);
179 vc->preempt_tb = mftb();
180 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
181}
182
183static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
184{
185 unsigned long flags;
186
187 spin_lock_irqsave(&vc->stoltb_lock, flags);
188 if (vc->preempt_tb != TB_NIL) {
189 vc->stolen_tb += mftb() - vc->preempt_tb;
190 vc->preempt_tb = TB_NIL;
191 }
192 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
193}
194
167static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 195static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
168{ 196{
169 struct kvmppc_vcore *vc = vcpu->arch.vcore; 197 struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -175,14 +203,9 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
175 * vcpu, and once it is set to this vcpu, only this task 203 * vcpu, and once it is set to this vcpu, only this task
176 * ever sets it to NULL. 204 * ever sets it to NULL.
177 */ 205 */
178 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 206 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
179 spin_lock_irqsave(&vc->stoltb_lock, flags); 207 kvmppc_core_end_stolen(vc);
180 if (vc->preempt_tb != TB_NIL) { 208
181 vc->stolen_tb += mftb() - vc->preempt_tb;
182 vc->preempt_tb = TB_NIL;
183 }
184 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
185 }
186 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 209 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
187 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 210 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
188 vcpu->arch.busy_preempt != TB_NIL) { 211 vcpu->arch.busy_preempt != TB_NIL) {
@@ -197,11 +220,9 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
197 struct kvmppc_vcore *vc = vcpu->arch.vcore; 220 struct kvmppc_vcore *vc = vcpu->arch.vcore;
198 unsigned long flags; 221 unsigned long flags;
199 222
200 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 223 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
201 spin_lock_irqsave(&vc->stoltb_lock, flags); 224 kvmppc_core_start_stolen(vc);
202 vc->preempt_tb = mftb(); 225
203 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
204 }
205 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 226 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
206 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 227 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
207 vcpu->arch.busy_preempt = mftb(); 228 vcpu->arch.busy_preempt = mftb();
@@ -214,12 +235,12 @@ static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
214 kvmppc_end_cede(vcpu); 235 kvmppc_end_cede(vcpu);
215} 236}
216 237
217void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) 238static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
218{ 239{
219 vcpu->arch.pvr = pvr; 240 vcpu->arch.pvr = pvr;
220} 241}
221 242
222int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 243static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
223{ 244{
224 unsigned long pcr = 0; 245 unsigned long pcr = 0;
225 struct kvmppc_vcore *vc = vcpu->arch.vcore; 246 struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -259,7 +280,7 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
259 return 0; 280 return 0;
260} 281}
261 282
262void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 283static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
263{ 284{
264 int r; 285 int r;
265 286
@@ -292,7 +313,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
292 vcpu->arch.last_inst); 313 vcpu->arch.last_inst);
293} 314}
294 315
295struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 316static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
296{ 317{
297 int r; 318 int r;
298 struct kvm_vcpu *v, *ret = NULL; 319 struct kvm_vcpu *v, *ret = NULL;
@@ -641,7 +662,8 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
641 662
642 spin_lock(&vcore->lock); 663 spin_lock(&vcore->lock);
643 if (target->arch.state == KVMPPC_VCPU_RUNNABLE && 664 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
644 vcore->vcore_state != VCORE_INACTIVE) 665 vcore->vcore_state != VCORE_INACTIVE &&
666 vcore->runner)
645 target = vcore->runner; 667 target = vcore->runner;
646 spin_unlock(&vcore->lock); 668 spin_unlock(&vcore->lock);
647 669
@@ -1431,6 +1453,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1431 vcore->lpcr = kvm->arch.lpcr; 1453 vcore->lpcr = kvm->arch.lpcr;
1432 vcore->first_vcpuid = core * threads_per_subcore; 1454 vcore->first_vcpuid = core * threads_per_subcore;
1433 vcore->kvm = kvm; 1455 vcore->kvm = kvm;
1456 INIT_LIST_HEAD(&vcore->preempt_list);
1434 1457
1435 vcore->mpp_buffer_is_valid = false; 1458 vcore->mpp_buffer_is_valid = false;
1436 1459
@@ -1655,6 +1678,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1655 spin_unlock(&vcore->lock); 1678 spin_unlock(&vcore->lock);
1656 vcpu->arch.vcore = vcore; 1679 vcpu->arch.vcore = vcore;
1657 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; 1680 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
1681 vcpu->arch.thread_cpu = -1;
1658 1682
1659 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1683 vcpu->arch.cpu_type = KVM_CPU_3S_64;
1660 kvmppc_sanity_check(vcpu); 1684 kvmppc_sanity_check(vcpu);
@@ -1749,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu)
1749 1773
1750 /* Ensure the thread won't go into the kernel if it wakes */ 1774 /* Ensure the thread won't go into the kernel if it wakes */
1751 tpaca->kvm_hstate.kvm_vcpu = NULL; 1775 tpaca->kvm_hstate.kvm_vcpu = NULL;
1776 tpaca->kvm_hstate.kvm_vcore = NULL;
1752 tpaca->kvm_hstate.napping = 0; 1777 tpaca->kvm_hstate.napping = 0;
1753 smp_wmb(); 1778 smp_wmb();
1754 tpaca->kvm_hstate.hwthread_req = 1; 1779 tpaca->kvm_hstate.hwthread_req = 1;
@@ -1780,26 +1805,32 @@ static void kvmppc_release_hwthread(int cpu)
1780 tpaca = &paca[cpu]; 1805 tpaca = &paca[cpu];
1781 tpaca->kvm_hstate.hwthread_req = 0; 1806 tpaca->kvm_hstate.hwthread_req = 0;
1782 tpaca->kvm_hstate.kvm_vcpu = NULL; 1807 tpaca->kvm_hstate.kvm_vcpu = NULL;
1808 tpaca->kvm_hstate.kvm_vcore = NULL;
1809 tpaca->kvm_hstate.kvm_split_mode = NULL;
1783} 1810}
1784 1811
1785static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 1812static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
1786{ 1813{
1787 int cpu; 1814 int cpu;
1788 struct paca_struct *tpaca; 1815 struct paca_struct *tpaca;
1789 struct kvmppc_vcore *vc = vcpu->arch.vcore; 1816 struct kvmppc_vcore *mvc = vc->master_vcore;
1790 1817
1791 if (vcpu->arch.timer_running) { 1818 cpu = vc->pcpu;
1792 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1819 if (vcpu) {
1793 vcpu->arch.timer_running = 0; 1820 if (vcpu->arch.timer_running) {
1821 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
1822 vcpu->arch.timer_running = 0;
1823 }
1824 cpu += vcpu->arch.ptid;
1825 vcpu->cpu = mvc->pcpu;
1826 vcpu->arch.thread_cpu = cpu;
1794 } 1827 }
1795 cpu = vc->pcpu + vcpu->arch.ptid;
1796 tpaca = &paca[cpu]; 1828 tpaca = &paca[cpu];
1797 tpaca->kvm_hstate.kvm_vcore = vc;
1798 tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
1799 vcpu->cpu = vc->pcpu;
1800 /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
1801 smp_wmb();
1802 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1829 tpaca->kvm_hstate.kvm_vcpu = vcpu;
1830 tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
1831 /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
1832 smp_wmb();
1833 tpaca->kvm_hstate.kvm_vcore = mvc;
1803 if (cpu != smp_processor_id()) 1834 if (cpu != smp_processor_id())
1804 kvmppc_ipi_thread(cpu); 1835 kvmppc_ipi_thread(cpu);
1805} 1836}
@@ -1812,12 +1843,12 @@ static void kvmppc_wait_for_nap(void)
1812 for (loops = 0; loops < 1000000; ++loops) { 1843 for (loops = 0; loops < 1000000; ++loops) {
1813 /* 1844 /*
1814 * Check if all threads are finished. 1845 * Check if all threads are finished.
1815 * We set the vcpu pointer when starting a thread 1846 * We set the vcore pointer when starting a thread
1816 * and the thread clears it when finished, so we look 1847 * and the thread clears it when finished, so we look
1817 * for any threads that still have a non-NULL vcpu ptr. 1848 * for any threads that still have a non-NULL vcore ptr.
1818 */ 1849 */
1819 for (i = 1; i < threads_per_subcore; ++i) 1850 for (i = 1; i < threads_per_subcore; ++i)
1820 if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1851 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1821 break; 1852 break;
1822 if (i == threads_per_subcore) { 1853 if (i == threads_per_subcore) {
1823 HMT_medium(); 1854 HMT_medium();
@@ -1827,7 +1858,7 @@ static void kvmppc_wait_for_nap(void)
1827 } 1858 }
1828 HMT_medium(); 1859 HMT_medium();
1829 for (i = 1; i < threads_per_subcore; ++i) 1860 for (i = 1; i < threads_per_subcore; ++i)
1830 if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1861 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1831 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); 1862 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
1832} 1863}
1833 1864
@@ -1890,6 +1921,278 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
1890 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); 1921 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
1891} 1922}
1892 1923
1924/*
1925 * A list of virtual cores for each physical CPU.
1926 * These are vcores that could run but their runner VCPU tasks are
1927 * (or may be) preempted.
1928 */
1929struct preempted_vcore_list {
1930 struct list_head list;
1931 spinlock_t lock;
1932};
1933
1934static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
1935
1936static void init_vcore_lists(void)
1937{
1938 int cpu;
1939
1940 for_each_possible_cpu(cpu) {
1941 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
1942 spin_lock_init(&lp->lock);
1943 INIT_LIST_HEAD(&lp->list);
1944 }
1945}
1946
1947static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
1948{
1949 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
1950
1951 vc->vcore_state = VCORE_PREEMPT;
1952 vc->pcpu = smp_processor_id();
1953 if (vc->num_threads < threads_per_subcore) {
1954 spin_lock(&lp->lock);
1955 list_add_tail(&vc->preempt_list, &lp->list);
1956 spin_unlock(&lp->lock);
1957 }
1958
1959 /* Start accumulating stolen time */
1960 kvmppc_core_start_stolen(vc);
1961}
1962
1963static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
1964{
1965 struct preempted_vcore_list *lp;
1966
1967 kvmppc_core_end_stolen(vc);
1968 if (!list_empty(&vc->preempt_list)) {
1969 lp = &per_cpu(preempted_vcores, vc->pcpu);
1970 spin_lock(&lp->lock);
1971 list_del_init(&vc->preempt_list);
1972 spin_unlock(&lp->lock);
1973 }
1974 vc->vcore_state = VCORE_INACTIVE;
1975}
1976
1977/*
1978 * This stores information about the virtual cores currently
1979 * assigned to a physical core.
1980 */
1981struct core_info {
1982 int n_subcores;
1983 int max_subcore_threads;
1984 int total_threads;
1985 int subcore_threads[MAX_SUBCORES];
1986 struct kvm *subcore_vm[MAX_SUBCORES];
1987 struct list_head vcs[MAX_SUBCORES];
1988};
1989
1990/*
1991 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
1992 * respectively in 2-way micro-threading (split-core) mode.
1993 */
1994static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
1995
1996static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
1997{
1998 int sub;
1999
2000 memset(cip, 0, sizeof(*cip));
2001 cip->n_subcores = 1;
2002 cip->max_subcore_threads = vc->num_threads;
2003 cip->total_threads = vc->num_threads;
2004 cip->subcore_threads[0] = vc->num_threads;
2005 cip->subcore_vm[0] = vc->kvm;
2006 for (sub = 0; sub < MAX_SUBCORES; ++sub)
2007 INIT_LIST_HEAD(&cip->vcs[sub]);
2008 list_add_tail(&vc->preempt_list, &cip->vcs[0]);
2009}
2010
2011static bool subcore_config_ok(int n_subcores, int n_threads)
2012{
2013 /* Can only dynamically split if unsplit to begin with */
2014 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2015 return false;
2016 if (n_subcores > MAX_SUBCORES)
2017 return false;
2018 if (n_subcores > 1) {
2019 if (!(dynamic_mt_modes & 2))
2020 n_subcores = 4;
2021 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2022 return false;
2023 }
2024
2025 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
2026}
2027
2028static void init_master_vcore(struct kvmppc_vcore *vc)
2029{
2030 vc->master_vcore = vc;
2031 vc->entry_exit_map = 0;
2032 vc->in_guest = 0;
2033 vc->napping_threads = 0;
2034 vc->conferring_threads = 0;
2035}
2036
2037/*
2038 * See if the existing subcores can be split into 3 (or fewer) subcores
2039 * of at most two threads each, so we can fit in another vcore. This
2040 * assumes there are at most two subcores and at most 6 threads in total.
2041 */
2042static bool can_split_piggybacked_subcores(struct core_info *cip)
2043{
2044 int sub, new_sub;
2045 int large_sub = -1;
2046 int thr;
2047 int n_subcores = cip->n_subcores;
2048 struct kvmppc_vcore *vc, *vcnext;
2049 struct kvmppc_vcore *master_vc = NULL;
2050
2051 for (sub = 0; sub < cip->n_subcores; ++sub) {
2052 if (cip->subcore_threads[sub] <= 2)
2053 continue;
2054 if (large_sub >= 0)
2055 return false;
2056 large_sub = sub;
2057 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2058 preempt_list);
2059 if (vc->num_threads > 2)
2060 return false;
2061 n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
2062 }
2063 if (n_subcores > 3 || large_sub < 0)
2064 return false;
2065
2066 /*
2067 * Seems feasible, so go through and move vcores to new subcores.
2068 * Note that when we have two or more vcores in one subcore,
2069 * all those vcores must have only one thread each.
2070 */
2071 new_sub = cip->n_subcores;
2072 thr = 0;
2073 sub = large_sub;
2074 list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
2075 if (thr >= 2) {
2076 list_del(&vc->preempt_list);
2077 list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
2078 /* vc->num_threads must be 1 */
2079 if (++cip->subcore_threads[new_sub] == 1) {
2080 cip->subcore_vm[new_sub] = vc->kvm;
2081 init_master_vcore(vc);
2082 master_vc = vc;
2083 ++cip->n_subcores;
2084 } else {
2085 vc->master_vcore = master_vc;
2086 ++new_sub;
2087 }
2088 }
2089 thr += vc->num_threads;
2090 }
2091 cip->subcore_threads[large_sub] = 2;
2092 cip->max_subcore_threads = 2;
2093
2094 return true;
2095}
2096
2097static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2098{
2099 int n_threads = vc->num_threads;
2100 int sub;
2101
2102 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2103 return false;
2104
2105 if (n_threads < cip->max_subcore_threads)
2106 n_threads = cip->max_subcore_threads;
2107 if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
2108 cip->max_subcore_threads = n_threads;
2109 } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
2110 vc->num_threads <= 2) {
2111 /*
2112 * We may be able to fit another subcore in by
2113 * splitting an existing subcore with 3 or 4
2114 * threads into two 2-thread subcores, or one
2115 * with 5 or 6 threads into three subcores.
2116 * We can only do this if those subcores have
2117 * piggybacked virtual cores.
2118 */
2119 if (!can_split_piggybacked_subcores(cip))
2120 return false;
2121 } else {
2122 return false;
2123 }
2124
2125 sub = cip->n_subcores;
2126 ++cip->n_subcores;
2127 cip->total_threads += vc->num_threads;
2128 cip->subcore_threads[sub] = vc->num_threads;
2129 cip->subcore_vm[sub] = vc->kvm;
2130 init_master_vcore(vc);
2131 list_del(&vc->preempt_list);
2132 list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
2133
2134 return true;
2135}
2136
2137static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
2138 struct core_info *cip, int sub)
2139{
2140 struct kvmppc_vcore *vc;
2141 int n_thr;
2142
2143 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2144 preempt_list);
2145
2146 /* require same VM and same per-core reg values */
2147 if (pvc->kvm != vc->kvm ||
2148 pvc->tb_offset != vc->tb_offset ||
2149 pvc->pcr != vc->pcr ||
2150 pvc->lpcr != vc->lpcr)
2151 return false;
2152
2153 /* P8 guest with > 1 thread per core would see wrong TIR value */
2154 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
2155 (vc->num_threads > 1 || pvc->num_threads > 1))
2156 return false;
2157
2158 n_thr = cip->subcore_threads[sub] + pvc->num_threads;
2159 if (n_thr > cip->max_subcore_threads) {
2160 if (!subcore_config_ok(cip->n_subcores, n_thr))
2161 return false;
2162 cip->max_subcore_threads = n_thr;
2163 }
2164
2165 cip->total_threads += pvc->num_threads;
2166 cip->subcore_threads[sub] = n_thr;
2167 pvc->master_vcore = vc;
2168 list_del(&pvc->preempt_list);
2169 list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
2170
2171 return true;
2172}
2173
2174/*
2175 * Work out whether it is possible to piggyback the execution of
2176 * vcore *pvc onto the execution of the other vcores described in *cip.
2177 */
2178static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2179 int target_threads)
2180{
2181 int sub;
2182
2183 if (cip->total_threads + pvc->num_threads > target_threads)
2184 return false;
2185 for (sub = 0; sub < cip->n_subcores; ++sub)
2186 if (cip->subcore_threads[sub] &&
2187 can_piggyback_subcore(pvc, cip, sub))
2188 return true;
2189
2190 if (can_dynamic_split(pvc, cip))
2191 return true;
2192
2193 return false;
2194}
2195
1893static void prepare_threads(struct kvmppc_vcore *vc) 2196static void prepare_threads(struct kvmppc_vcore *vc)
1894{ 2197{
1895 struct kvm_vcpu *vcpu, *vnext; 2198 struct kvm_vcpu *vcpu, *vnext;
@@ -1909,12 +2212,45 @@ static void prepare_threads(struct kvmppc_vcore *vc)
1909 } 2212 }
1910} 2213}
1911 2214
1912static void post_guest_process(struct kvmppc_vcore *vc) 2215static void collect_piggybacks(struct core_info *cip, int target_threads)
2216{
2217 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2218 struct kvmppc_vcore *pvc, *vcnext;
2219
2220 spin_lock(&lp->lock);
2221 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2222 if (!spin_trylock(&pvc->lock))
2223 continue;
2224 prepare_threads(pvc);
2225 if (!pvc->n_runnable) {
2226 list_del_init(&pvc->preempt_list);
2227 if (pvc->runner == NULL) {
2228 pvc->vcore_state = VCORE_INACTIVE;
2229 kvmppc_core_end_stolen(pvc);
2230 }
2231 spin_unlock(&pvc->lock);
2232 continue;
2233 }
2234 if (!can_piggyback(pvc, cip, target_threads)) {
2235 spin_unlock(&pvc->lock);
2236 continue;
2237 }
2238 kvmppc_core_end_stolen(pvc);
2239 pvc->vcore_state = VCORE_PIGGYBACK;
2240 if (cip->total_threads >= target_threads)
2241 break;
2242 }
2243 spin_unlock(&lp->lock);
2244}
2245
2246static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
1913{ 2247{
2248 int still_running = 0;
1914 u64 now; 2249 u64 now;
1915 long ret; 2250 long ret;
1916 struct kvm_vcpu *vcpu, *vnext; 2251 struct kvm_vcpu *vcpu, *vnext;
1917 2252
2253 spin_lock(&vc->lock);
1918 now = get_tb(); 2254 now = get_tb();
1919 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 2255 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1920 arch.run_list) { 2256 arch.run_list) {
@@ -1933,17 +2269,36 @@ static void post_guest_process(struct kvmppc_vcore *vc)
1933 vcpu->arch.ret = ret; 2269 vcpu->arch.ret = ret;
1934 vcpu->arch.trap = 0; 2270 vcpu->arch.trap = 0;
1935 2271
1936 if (vcpu->arch.ceded) { 2272 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
1937 if (!is_kvmppc_resume_guest(ret)) 2273 if (vcpu->arch.pending_exceptions)
1938 kvmppc_end_cede(vcpu); 2274 kvmppc_core_prepare_to_enter(vcpu);
1939 else 2275 if (vcpu->arch.ceded)
1940 kvmppc_set_timer(vcpu); 2276 kvmppc_set_timer(vcpu);
1941 } 2277 else
1942 if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { 2278 ++still_running;
2279 } else {
1943 kvmppc_remove_runnable(vc, vcpu); 2280 kvmppc_remove_runnable(vc, vcpu);
1944 wake_up(&vcpu->arch.cpu_run); 2281 wake_up(&vcpu->arch.cpu_run);
1945 } 2282 }
1946 } 2283 }
2284 list_del_init(&vc->preempt_list);
2285 if (!is_master) {
2286 if (still_running > 0) {
2287 kvmppc_vcore_preempt(vc);
2288 } else if (vc->runner) {
2289 vc->vcore_state = VCORE_PREEMPT;
2290 kvmppc_core_start_stolen(vc);
2291 } else {
2292 vc->vcore_state = VCORE_INACTIVE;
2293 }
2294 if (vc->n_runnable > 0 && vc->runner == NULL) {
2295 /* make sure there's a candidate runner awake */
2296 vcpu = list_first_entry(&vc->runnable_threads,
2297 struct kvm_vcpu, arch.run_list);
2298 wake_up(&vcpu->arch.cpu_run);
2299 }
2300 }
2301 spin_unlock(&vc->lock);
1947} 2302}
1948 2303
1949/* 2304/*
@@ -1955,6 +2310,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1955 struct kvm_vcpu *vcpu, *vnext; 2310 struct kvm_vcpu *vcpu, *vnext;
1956 int i; 2311 int i;
1957 int srcu_idx; 2312 int srcu_idx;
2313 struct core_info core_info;
2314 struct kvmppc_vcore *pvc, *vcnext;
2315 struct kvm_split_mode split_info, *sip;
2316 int split, subcore_size, active;
2317 int sub;
2318 bool thr0_done;
2319 unsigned long cmd_bit, stat_bit;
2320 int pcpu, thr;
2321 int target_threads;
1958 2322
1959 /* 2323 /*
1960 * Remove from the list any threads that have a signal pending 2324 * Remove from the list any threads that have a signal pending
@@ -1969,11 +2333,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1969 /* 2333 /*
1970 * Initialize *vc. 2334 * Initialize *vc.
1971 */ 2335 */
1972 vc->entry_exit_map = 0; 2336 init_master_vcore(vc);
1973 vc->preempt_tb = TB_NIL; 2337 vc->preempt_tb = TB_NIL;
1974 vc->in_guest = 0;
1975 vc->napping_threads = 0;
1976 vc->conferring_threads = 0;
1977 2338
1978 /* 2339 /*
1979 * Make sure we are running on primary threads, and that secondary 2340 * Make sure we are running on primary threads, and that secondary
@@ -1991,24 +2352,120 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1991 goto out; 2352 goto out;
1992 } 2353 }
1993 2354
2355 /*
2356 * See if we could run any other vcores on the physical core
2357 * along with this one.
2358 */
2359 init_core_info(&core_info, vc);
2360 pcpu = smp_processor_id();
2361 target_threads = threads_per_subcore;
2362 if (target_smt_mode && target_smt_mode < target_threads)
2363 target_threads = target_smt_mode;
2364 if (vc->num_threads < target_threads)
2365 collect_piggybacks(&core_info, target_threads);
2366
2367 /* Decide on micro-threading (split-core) mode */
2368 subcore_size = threads_per_subcore;
2369 cmd_bit = stat_bit = 0;
2370 split = core_info.n_subcores;
2371 sip = NULL;
2372 if (split > 1) {
2373 /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
2374 if (split == 2 && (dynamic_mt_modes & 2)) {
2375 cmd_bit = HID0_POWER8_1TO2LPAR;
2376 stat_bit = HID0_POWER8_2LPARMODE;
2377 } else {
2378 split = 4;
2379 cmd_bit = HID0_POWER8_1TO4LPAR;
2380 stat_bit = HID0_POWER8_4LPARMODE;
2381 }
2382 subcore_size = MAX_SMT_THREADS / split;
2383 sip = &split_info;
2384 memset(&split_info, 0, sizeof(split_info));
2385 split_info.rpr = mfspr(SPRN_RPR);
2386 split_info.pmmar = mfspr(SPRN_PMMAR);
2387 split_info.ldbar = mfspr(SPRN_LDBAR);
2388 split_info.subcore_size = subcore_size;
2389 for (sub = 0; sub < core_info.n_subcores; ++sub)
2390 split_info.master_vcs[sub] =
2391 list_first_entry(&core_info.vcs[sub],
2392 struct kvmppc_vcore, preempt_list);
2393 /* order writes to split_info before kvm_split_mode pointer */
2394 smp_wmb();
2395 }
2396 pcpu = smp_processor_id();
2397 for (thr = 0; thr < threads_per_subcore; ++thr)
2398 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2399
2400 /* Initiate micro-threading (split-core) if required */
2401 if (cmd_bit) {
2402 unsigned long hid0 = mfspr(SPRN_HID0);
2403
2404 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
2405 mb();
2406 mtspr(SPRN_HID0, hid0);
2407 isync();
2408 for (;;) {
2409 hid0 = mfspr(SPRN_HID0);
2410 if (hid0 & stat_bit)
2411 break;
2412 cpu_relax();
2413 }
2414 }
1994 2415
1995 vc->pcpu = smp_processor_id(); 2416 /* Start all the threads */
1996 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 2417 active = 0;
1997 kvmppc_start_thread(vcpu); 2418 for (sub = 0; sub < core_info.n_subcores; ++sub) {
1998 kvmppc_create_dtl_entry(vcpu, vc); 2419 thr = subcore_thread_map[sub];
1999 trace_kvm_guest_enter(vcpu); 2420 thr0_done = false;
2421 active |= 1 << thr;
2422 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
2423 pvc->pcpu = pcpu + thr;
2424 list_for_each_entry(vcpu, &pvc->runnable_threads,
2425 arch.run_list) {
2426 kvmppc_start_thread(vcpu, pvc);
2427 kvmppc_create_dtl_entry(vcpu, pvc);
2428 trace_kvm_guest_enter(vcpu);
2429 if (!vcpu->arch.ptid)
2430 thr0_done = true;
2431 active |= 1 << (thr + vcpu->arch.ptid);
2432 }
2433 /*
2434 * We need to start the first thread of each subcore
2435 * even if it doesn't have a vcpu.
2436 */
2437 if (pvc->master_vcore == pvc && !thr0_done)
2438 kvmppc_start_thread(NULL, pvc);
2439 thr += pvc->num_threads;
2440 }
2000 } 2441 }
2001 2442
2002 /* Set this explicitly in case thread 0 doesn't have a vcpu */ 2443 /*
2003 get_paca()->kvm_hstate.kvm_vcore = vc; 2444 * Ensure that split_info.do_nap is set after setting
2004 get_paca()->kvm_hstate.ptid = 0; 2445 * the vcore pointer in the PACA of the secondaries.
2446 */
2447 smp_mb();
2448 if (cmd_bit)
2449 split_info.do_nap = 1; /* ask secondaries to nap when done */
2450
2451 /*
2452 * When doing micro-threading, poke the inactive threads as well.
2453 * This gets them to the nap instruction after kvm_do_nap,
2454 * which reduces the time taken to unsplit later.
2455 */
2456 if (split > 1)
2457 for (thr = 1; thr < threads_per_subcore; ++thr)
2458 if (!(active & (1 << thr)))
2459 kvmppc_ipi_thread(pcpu + thr);
2005 2460
2006 vc->vcore_state = VCORE_RUNNING; 2461 vc->vcore_state = VCORE_RUNNING;
2007 preempt_disable(); 2462 preempt_disable();
2008 2463
2009 trace_kvmppc_run_core(vc, 0); 2464 trace_kvmppc_run_core(vc, 0);
2010 2465
2011 spin_unlock(&vc->lock); 2466 for (sub = 0; sub < core_info.n_subcores; ++sub)
2467 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
2468 spin_unlock(&pvc->lock);
2012 2469
2013 kvm_guest_enter(); 2470 kvm_guest_enter();
2014 2471
@@ -2019,32 +2476,58 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2019 2476
2020 __kvmppc_vcore_entry(); 2477 __kvmppc_vcore_entry();
2021 2478
2022 spin_lock(&vc->lock);
2023
2024 if (vc->mpp_buffer) 2479 if (vc->mpp_buffer)
2025 kvmppc_start_saving_l2_cache(vc); 2480 kvmppc_start_saving_l2_cache(vc);
2026 2481
2027 /* disable sending of IPIs on virtual external irqs */ 2482 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2028 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 2483
2029 vcpu->cpu = -1; 2484 spin_lock(&vc->lock);
2030 /* wait for secondary threads to finish writing their state to memory */
2031 kvmppc_wait_for_nap();
2032 for (i = 0; i < threads_per_subcore; ++i)
2033 kvmppc_release_hwthread(vc->pcpu + i);
2034 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 2485 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
2035 vc->vcore_state = VCORE_EXITING; 2486 vc->vcore_state = VCORE_EXITING;
2036 spin_unlock(&vc->lock);
2037 2487
2038 srcu_read_unlock(&vc->kvm->srcu, srcu_idx); 2488 /* wait for secondary threads to finish writing their state to memory */
2489 kvmppc_wait_for_nap();
2490
2491 /* Return to whole-core mode if we split the core earlier */
2492 if (split > 1) {
2493 unsigned long hid0 = mfspr(SPRN_HID0);
2494 unsigned long loops = 0;
2495
2496 hid0 &= ~HID0_POWER8_DYNLPARDIS;
2497 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
2498 mb();
2499 mtspr(SPRN_HID0, hid0);
2500 isync();
2501 for (;;) {
2502 hid0 = mfspr(SPRN_HID0);
2503 if (!(hid0 & stat_bit))
2504 break;
2505 cpu_relax();
2506 ++loops;
2507 }
2508 split_info.do_nap = 0;
2509 }
2510
2511 /* Let secondaries go back to the offline loop */
2512 for (i = 0; i < threads_per_subcore; ++i) {
2513 kvmppc_release_hwthread(pcpu + i);
2514 if (sip && sip->napped[i])
2515 kvmppc_ipi_thread(pcpu + i);
2516 }
2517
2518 spin_unlock(&vc->lock);
2039 2519
2040 /* make sure updates to secondary vcpu structs are visible now */ 2520 /* make sure updates to secondary vcpu structs are visible now */
2041 smp_mb(); 2521 smp_mb();
2042 kvm_guest_exit(); 2522 kvm_guest_exit();
2043 2523
2044 preempt_enable(); 2524 for (sub = 0; sub < core_info.n_subcores; ++sub)
2525 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
2526 preempt_list)
2527 post_guest_process(pvc, pvc == vc);
2045 2528
2046 spin_lock(&vc->lock); 2529 spin_lock(&vc->lock);
2047 post_guest_process(vc); 2530 preempt_enable();
2048 2531
2049 out: 2532 out:
2050 vc->vcore_state = VCORE_INACTIVE; 2533 vc->vcore_state = VCORE_INACTIVE;
@@ -2055,13 +2538,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2055 * Wait for some other vcpu thread to execute us, and 2538 * Wait for some other vcpu thread to execute us, and
2056 * wake us up when we need to handle something in the host. 2539 * wake us up when we need to handle something in the host.
2057 */ 2540 */
2058static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 2541static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2542 struct kvm_vcpu *vcpu, int wait_state)
2059{ 2543{
2060 DEFINE_WAIT(wait); 2544 DEFINE_WAIT(wait);
2061 2545
2062 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 2546 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
2063 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 2547 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2548 spin_unlock(&vc->lock);
2064 schedule(); 2549 schedule();
2550 spin_lock(&vc->lock);
2551 }
2065 finish_wait(&vcpu->arch.cpu_run, &wait); 2552 finish_wait(&vcpu->arch.cpu_run, &wait);
2066} 2553}
2067 2554
@@ -2137,9 +2624,21 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2137 * this thread straight away and have it join in. 2624 * this thread straight away and have it join in.
2138 */ 2625 */
2139 if (!signal_pending(current)) { 2626 if (!signal_pending(current)) {
2140 if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) { 2627 if (vc->vcore_state == VCORE_PIGGYBACK) {
2628 struct kvmppc_vcore *mvc = vc->master_vcore;
2629 if (spin_trylock(&mvc->lock)) {
2630 if (mvc->vcore_state == VCORE_RUNNING &&
2631 !VCORE_IS_EXITING(mvc)) {
2632 kvmppc_create_dtl_entry(vcpu, vc);
2633 kvmppc_start_thread(vcpu, vc);
2634 trace_kvm_guest_enter(vcpu);
2635 }
2636 spin_unlock(&mvc->lock);
2637 }
2638 } else if (vc->vcore_state == VCORE_RUNNING &&
2639 !VCORE_IS_EXITING(vc)) {
2141 kvmppc_create_dtl_entry(vcpu, vc); 2640 kvmppc_create_dtl_entry(vcpu, vc);
2142 kvmppc_start_thread(vcpu); 2641 kvmppc_start_thread(vcpu, vc);
2143 trace_kvm_guest_enter(vcpu); 2642 trace_kvm_guest_enter(vcpu);
2144 } else if (vc->vcore_state == VCORE_SLEEPING) { 2643 } else if (vc->vcore_state == VCORE_SLEEPING) {
2145 wake_up(&vc->wq); 2644 wake_up(&vc->wq);
@@ -2149,10 +2648,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2149 2648
2150 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2649 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2151 !signal_pending(current)) { 2650 !signal_pending(current)) {
2651 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
2652 kvmppc_vcore_end_preempt(vc);
2653
2152 if (vc->vcore_state != VCORE_INACTIVE) { 2654 if (vc->vcore_state != VCORE_INACTIVE) {
2153 spin_unlock(&vc->lock); 2655 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
2154 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
2155 spin_lock(&vc->lock);
2156 continue; 2656 continue;
2157 } 2657 }
2158 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 2658 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
@@ -2179,10 +2679,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2179 if (n_ceded == vc->n_runnable) { 2679 if (n_ceded == vc->n_runnable) {
2180 kvmppc_vcore_blocked(vc); 2680 kvmppc_vcore_blocked(vc);
2181 } else if (need_resched()) { 2681 } else if (need_resched()) {
2182 vc->vcore_state = VCORE_PREEMPT; 2682 kvmppc_vcore_preempt(vc);
2183 /* Let something else run */ 2683 /* Let something else run */
2184 cond_resched_lock(&vc->lock); 2684 cond_resched_lock(&vc->lock);
2185 vc->vcore_state = VCORE_INACTIVE; 2685 if (vc->vcore_state == VCORE_PREEMPT)
2686 kvmppc_vcore_end_preempt(vc);
2186 } else { 2687 } else {
2187 kvmppc_run_core(vc); 2688 kvmppc_run_core(vc);
2188 } 2689 }
@@ -2191,11 +2692,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2191 2692
2192 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2693 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2193 (vc->vcore_state == VCORE_RUNNING || 2694 (vc->vcore_state == VCORE_RUNNING ||
2194 vc->vcore_state == VCORE_EXITING)) { 2695 vc->vcore_state == VCORE_EXITING))
2195 spin_unlock(&vc->lock); 2696 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
2196 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
2197 spin_lock(&vc->lock);
2198 }
2199 2697
2200 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 2698 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2201 kvmppc_remove_runnable(vc, vcpu); 2699 kvmppc_remove_runnable(vc, vcpu);
@@ -2755,6 +3253,8 @@ static int kvmppc_book3s_init_hv(void)
2755 3253
2756 init_default_hcalls(); 3254 init_default_hcalls();
2757 3255
3256 init_vcore_lists();
3257
2758 r = kvmppc_mmu_hv_init(); 3258 r = kvmppc_mmu_hv_init();
2759 return r; 3259 return r;
2760} 3260}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ed2589d4593f..fd7006bf6b1a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -110,14 +110,15 @@ void __init kvm_cma_reserve(void)
110long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, 110long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
111 unsigned int yield_count) 111 unsigned int yield_count)
112{ 112{
113 struct kvmppc_vcore *vc = vcpu->arch.vcore; 113 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
114 int ptid = local_paca->kvm_hstate.ptid;
114 int threads_running; 115 int threads_running;
115 int threads_ceded; 116 int threads_ceded;
116 int threads_conferring; 117 int threads_conferring;
117 u64 stop = get_tb() + 10 * tb_ticks_per_usec; 118 u64 stop = get_tb() + 10 * tb_ticks_per_usec;
118 int rv = H_SUCCESS; /* => don't yield */ 119 int rv = H_SUCCESS; /* => don't yield */
119 120
120 set_bit(vcpu->arch.ptid, &vc->conferring_threads); 121 set_bit(ptid, &vc->conferring_threads);
121 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) { 122 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
122 threads_running = VCORE_ENTRY_MAP(vc); 123 threads_running = VCORE_ENTRY_MAP(vc);
123 threads_ceded = vc->napping_threads; 124 threads_ceded = vc->napping_threads;
@@ -127,7 +128,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
127 break; 128 break;
128 } 129 }
129 } 130 }
130 clear_bit(vcpu->arch.ptid, &vc->conferring_threads); 131 clear_bit(ptid, &vc->conferring_threads);
131 return rv; 132 return rv;
132} 133}
133 134
@@ -238,7 +239,8 @@ void kvmhv_commence_exit(int trap)
238{ 239{
239 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; 240 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
240 int ptid = local_paca->kvm_hstate.ptid; 241 int ptid = local_paca->kvm_hstate.ptid;
241 int me, ee; 242 struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
243 int me, ee, i;
242 244
243 /* Set our bit in the threads-exiting-guest map in the 0xff00 245 /* Set our bit in the threads-exiting-guest map in the 0xff00
244 bits of vcore->entry_exit_map */ 246 bits of vcore->entry_exit_map */
@@ -258,4 +260,26 @@ void kvmhv_commence_exit(int trap)
258 */ 260 */
259 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER) 261 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
260 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid)); 262 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
263
264 /*
265 * If we are doing dynamic micro-threading, interrupt the other
266 * subcores to pull them out of their guests too.
267 */
268 if (!sip)
269 return;
270
271 for (i = 0; i < MAX_SUBCORES; ++i) {
272 vc = sip->master_vcs[i];
273 if (!vc)
274 break;
275 do {
276 ee = vc->entry_exit_map;
277 /* Already asked to exit? */
278 if ((ee >> 8) != 0)
279 break;
280 } while (cmpxchg(&vc->entry_exit_map, ee,
281 ee | VCORE_EXIT_REQ) != ee);
282 if ((ee >> 8) == 0)
283 kvmhv_interrupt_vcore(vc, ee);
284 }
261} 285}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index b027a89737b6..c1df9bb1e413 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -12,6 +12,7 @@
12#include <linux/kvm_host.h> 12#include <linux/kvm_host.h>
13#include <linux/hugetlb.h> 13#include <linux/hugetlb.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/log2.h>
15 16
16#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
17#include <asm/kvm_ppc.h> 18#include <asm/kvm_ppc.h>
@@ -97,25 +98,52 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
97} 98}
98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 99EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
99 100
101/* Update the changed page order field of an rmap entry */
102void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
103{
104 unsigned long order;
105
106 if (!psize)
107 return;
108 order = ilog2(psize);
109 order <<= KVMPPC_RMAP_CHG_SHIFT;
110 if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
111 *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
112}
113EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
114
115/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
116static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
117 unsigned long hpte_gr)
118{
119 struct kvm_memory_slot *memslot;
120 unsigned long *rmap;
121 unsigned long gfn;
122
123 gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
124 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
125 if (!memslot)
126 return NULL;
127
128 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
129 return rmap;
130}
131
100/* Remove this HPTE from the chain for a real page */ 132/* Remove this HPTE from the chain for a real page */
101static void remove_revmap_chain(struct kvm *kvm, long pte_index, 133static void remove_revmap_chain(struct kvm *kvm, long pte_index,
102 struct revmap_entry *rev, 134 struct revmap_entry *rev,
103 unsigned long hpte_v, unsigned long hpte_r) 135 unsigned long hpte_v, unsigned long hpte_r)
104{ 136{
105 struct revmap_entry *next, *prev; 137 struct revmap_entry *next, *prev;
106 unsigned long gfn, ptel, head; 138 unsigned long ptel, head;
107 struct kvm_memory_slot *memslot;
108 unsigned long *rmap; 139 unsigned long *rmap;
109 unsigned long rcbits; 140 unsigned long rcbits;
110 141
111 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 142 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
112 ptel = rev->guest_rpte |= rcbits; 143 ptel = rev->guest_rpte |= rcbits;
113 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); 144 rmap = revmap_for_hpte(kvm, hpte_v, ptel);
114 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 145 if (!rmap)
115 if (!memslot)
116 return; 146 return;
117
118 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
119 lock_rmap(rmap); 147 lock_rmap(rmap);
120 148
121 head = *rmap & KVMPPC_RMAP_INDEX; 149 head = *rmap & KVMPPC_RMAP_INDEX;
@@ -131,6 +159,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
131 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 159 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
132 } 160 }
133 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 161 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
162 if (rcbits & HPTE_R_C)
163 kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
134 unlock_rmap(rmap); 164 unlock_rmap(rmap);
135} 165}
136 166
@@ -421,14 +451,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
421 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 451 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
422 v = pte & ~HPTE_V_HVLOCK; 452 v = pte & ~HPTE_V_HVLOCK;
423 if (v & HPTE_V_VALID) { 453 if (v & HPTE_V_VALID) {
424 u64 pte1;
425
426 pte1 = be64_to_cpu(hpte[1]);
427 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 454 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
428 rb = compute_tlbie_rb(v, pte1, pte_index); 455 rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
429 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); 456 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
430 /* Read PTE low word after tlbie to get final R/C values */ 457 /*
431 remove_revmap_chain(kvm, pte_index, rev, v, pte1); 458 * The reference (R) and change (C) bits in a HPT
459 * entry can be set by hardware at any time up until
460 * the HPTE is invalidated and the TLB invalidation
461 * sequence has completed. This means that when
462 * removing a HPTE, we need to re-read the HPTE after
463 * the invalidation sequence has completed in order to
464 * obtain reliable values of R and C.
465 */
466 remove_revmap_chain(kvm, pte_index, rev, v,
467 be64_to_cpu(hpte[1]));
432 } 468 }
433 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 469 r = rev->guest_rpte & ~HPTE_GR_RESERVED;
434 note_hpte_modification(kvm, rev); 470 note_hpte_modification(kvm, rev);
@@ -655,6 +691,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
655 return H_SUCCESS; 691 return H_SUCCESS;
656} 692}
657 693
694long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
695 unsigned long pte_index)
696{
697 struct kvm *kvm = vcpu->kvm;
698 __be64 *hpte;
699 unsigned long v, r, gr;
700 struct revmap_entry *rev;
701 unsigned long *rmap;
702 long ret = H_NOT_FOUND;
703
704 if (pte_index >= kvm->arch.hpt_npte)
705 return H_PARAMETER;
706
707 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
708 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
709 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
710 cpu_relax();
711 v = be64_to_cpu(hpte[0]);
712 r = be64_to_cpu(hpte[1]);
713 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
714 goto out;
715
716 gr = rev->guest_rpte;
717 if (rev->guest_rpte & HPTE_R_R) {
718 rev->guest_rpte &= ~HPTE_R_R;
719 note_hpte_modification(kvm, rev);
720 }
721 if (v & HPTE_V_VALID) {
722 gr |= r & (HPTE_R_R | HPTE_R_C);
723 if (r & HPTE_R_R) {
724 kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
725 rmap = revmap_for_hpte(kvm, v, gr);
726 if (rmap) {
727 lock_rmap(rmap);
728 *rmap |= KVMPPC_RMAP_REFERENCED;
729 unlock_rmap(rmap);
730 }
731 }
732 }
733 vcpu->arch.gpr[4] = gr;
734 ret = H_SUCCESS;
735 out:
736 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
737 return ret;
738}
739
740long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
741 unsigned long pte_index)
742{
743 struct kvm *kvm = vcpu->kvm;
744 __be64 *hpte;
745 unsigned long v, r, gr;
746 struct revmap_entry *rev;
747 unsigned long *rmap;
748 long ret = H_NOT_FOUND;
749
750 if (pte_index >= kvm->arch.hpt_npte)
751 return H_PARAMETER;
752
753 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
754 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
755 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
756 cpu_relax();
757 v = be64_to_cpu(hpte[0]);
758 r = be64_to_cpu(hpte[1]);
759 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
760 goto out;
761
762 gr = rev->guest_rpte;
763 if (gr & HPTE_R_C) {
764 rev->guest_rpte &= ~HPTE_R_C;
765 note_hpte_modification(kvm, rev);
766 }
767 if (v & HPTE_V_VALID) {
768 /* need to make it temporarily absent so C is stable */
769 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
770 kvmppc_invalidate_hpte(kvm, hpte, pte_index);
771 r = be64_to_cpu(hpte[1]);
772 gr |= r & (HPTE_R_R | HPTE_R_C);
773 if (r & HPTE_R_C) {
774 unsigned long psize = hpte_page_size(v, r);
775 hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
776 eieio();
777 rmap = revmap_for_hpte(kvm, v, gr);
778 if (rmap) {
779 lock_rmap(rmap);
780 *rmap |= KVMPPC_RMAP_CHANGED;
781 kvmppc_update_rmap_change(rmap, psize);
782 unlock_rmap(rmap);
783 }
784 }
785 }
786 vcpu->arch.gpr[4] = gr;
787 ret = H_SUCCESS;
788 out:
789 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
790 return ret;
791}
792
658void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 793void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
659 unsigned long pte_index) 794 unsigned long pte_index)
660{ 795{
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 00e45b6d4f24..24f58076d49e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -67,14 +67,12 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
67 } 67 }
68 68
69 /* Check if the core is loaded, if not, too hard */ 69 /* Check if the core is loaded, if not, too hard */
70 cpu = vcpu->cpu; 70 cpu = vcpu->arch.thread_cpu;
71 if (cpu < 0 || cpu >= nr_cpu_ids) { 71 if (cpu < 0 || cpu >= nr_cpu_ids) {
72 this_icp->rm_action |= XICS_RM_KICK_VCPU; 72 this_icp->rm_action |= XICS_RM_KICK_VCPU;
73 this_icp->rm_kick_target = vcpu; 73 this_icp->rm_kick_target = vcpu;
74 return; 74 return;
75 } 75 }
76 /* In SMT cpu will always point to thread 0, we adjust it */
77 cpu += vcpu->arch.ptid;
78 76
79 smp_mb(); 77 smp_mb();
80 kvmhv_rm_send_ipi(cpu); 78 kvmhv_rm_send_ipi(cpu);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index faa86e9c0551..2273dcacef39 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
128 subf r4, r4, r3 128 subf r4, r4, r3
129 mtspr SPRN_DEC, r4 129 mtspr SPRN_DEC, r4
130 130
131 /* hwthread_req may have got set by cede or no vcpu, so clear it */
132 li r0, 0
133 stb r0, HSTATE_HWTHREAD_REQ(r13)
134
131 /* 135 /*
132 * For external and machine check interrupts, we need 136 * For external and machine check interrupts, we need
133 * to call the Linux handler to process the interrupt. 137 * to call the Linux handler to process the interrupt.
@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
215 ld r5, HSTATE_KVM_VCORE(r13) 219 ld r5, HSTATE_KVM_VCORE(r13)
216 li r0, 0 220 li r0, 0
217 stb r0, HSTATE_NAPPING(r13) 221 stb r0, HSTATE_NAPPING(r13)
218 stb r0, HSTATE_HWTHREAD_REQ(r13)
219 222
220 /* check the wake reason */ 223 /* check the wake reason */
221 bl kvmppc_check_wake_reason 224 bl kvmppc_check_wake_reason
@@ -315,10 +318,10 @@ kvm_start_guest:
315 cmpdi r3, 0 318 cmpdi r3, 0
316 bge kvm_no_guest 319 bge kvm_no_guest
317 320
318 /* get vcpu pointer, NULL if we have no vcpu to run */ 321 /* get vcore pointer, NULL if we have nothing to run */
319 ld r4,HSTATE_KVM_VCPU(r13) 322 ld r5,HSTATE_KVM_VCORE(r13)
320 cmpdi r4,0 323 cmpdi r5,0
321 /* if we have no vcpu to run, go back to sleep */ 324 /* if we have no vcore to run, go back to sleep */
322 beq kvm_no_guest 325 beq kvm_no_guest
323 326
324kvm_secondary_got_guest: 327kvm_secondary_got_guest:
@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
327 ld r6, PACA_DSCR_DEFAULT(r13) 330 ld r6, PACA_DSCR_DEFAULT(r13)
328 std r6, HSTATE_DSCR(r13) 331 std r6, HSTATE_DSCR(r13)
329 332
330 /* Order load of vcore, ptid etc. after load of vcpu */ 333 /* On thread 0 of a subcore, set HDEC to max */
334 lbz r4, HSTATE_PTID(r13)
335 cmpwi r4, 0
336 bne 63f
337 lis r6, 0x7fff
338 ori r6, r6, 0xffff
339 mtspr SPRN_HDEC, r6
340 /* and set per-LPAR registers, if doing dynamic micro-threading */
341 ld r6, HSTATE_SPLIT_MODE(r13)
342 cmpdi r6, 0
343 beq 63f
344 ld r0, KVM_SPLIT_RPR(r6)
345 mtspr SPRN_RPR, r0
346 ld r0, KVM_SPLIT_PMMAR(r6)
347 mtspr SPRN_PMMAR, r0
348 ld r0, KVM_SPLIT_LDBAR(r6)
349 mtspr SPRN_LDBAR, r0
350 isync
35163:
352 /* Order load of vcpu after load of vcore */
331 lwsync 353 lwsync
354 ld r4, HSTATE_KVM_VCPU(r13)
332 bl kvmppc_hv_entry 355 bl kvmppc_hv_entry
333 356
334 /* Back from the guest, go back to nap */ 357 /* Back from the guest, go back to nap */
335 /* Clear our vcpu pointer so we don't come back in early */ 358 /* Clear our vcpu and vcore pointers so we don't come back in early */
336 li r0, 0 359 li r0, 0
360 std r0, HSTATE_KVM_VCPU(r13)
337 /* 361 /*
338 * Once we clear HSTATE_KVM_VCPU(r13), the code in 362 * Once we clear HSTATE_KVM_VCORE(r13), the code in
339 * kvmppc_run_core() is going to assume that all our vcpu 363 * kvmppc_run_core() is going to assume that all our vcpu
340 * state is visible in memory. This lwsync makes sure 364 * state is visible in memory. This lwsync makes sure
341 * that that is true. 365 * that that is true.
342 */ 366 */
343 lwsync 367 lwsync
344 std r0, HSTATE_KVM_VCPU(r13) 368 std r0, HSTATE_KVM_VCORE(r13)
345 369
346/* 370/*
347 * At this point we have finished executing in the guest. 371 * At this point we have finished executing in the guest.
@@ -374,16 +398,71 @@ kvm_no_guest:
374 b power7_wakeup_loss 398 b power7_wakeup_loss
375 399
37653: HMT_LOW 40053: HMT_LOW
377 ld r4, HSTATE_KVM_VCPU(r13) 401 ld r5, HSTATE_KVM_VCORE(r13)
378 cmpdi r4, 0 402 cmpdi r5, 0
403 bne 60f
404 ld r3, HSTATE_SPLIT_MODE(r13)
405 cmpdi r3, 0
406 beq kvm_no_guest
407 lbz r0, KVM_SPLIT_DO_NAP(r3)
408 cmpwi r0, 0
379 beq kvm_no_guest 409 beq kvm_no_guest
380 HMT_MEDIUM 410 HMT_MEDIUM
411 b kvm_unsplit_nap
41260: HMT_MEDIUM
381 b kvm_secondary_got_guest 413 b kvm_secondary_got_guest
382 414
38354: li r0, KVM_HWTHREAD_IN_KVM 41554: li r0, KVM_HWTHREAD_IN_KVM
384 stb r0, HSTATE_HWTHREAD_STATE(r13) 416 stb r0, HSTATE_HWTHREAD_STATE(r13)
385 b kvm_no_guest 417 b kvm_no_guest
386 418
419/*
420 * Here the primary thread is trying to return the core to
421 * whole-core mode, so we need to nap.
422 */
423kvm_unsplit_nap:
424 /*
425 * Ensure that secondary doesn't nap when it has
426 * its vcore pointer set.
427 */
428 sync /* matches smp_mb() before setting split_info.do_nap */
429 ld r0, HSTATE_KVM_VCORE(r13)
430 cmpdi r0, 0
431 bne kvm_no_guest
432 /* clear any pending message */
433BEGIN_FTR_SECTION
434 lis r6, (PPC_DBELL_SERVER << (63-36))@h
435 PPC_MSGCLR(6)
436END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
437 /* Set kvm_split_mode.napped[tid] = 1 */
438 ld r3, HSTATE_SPLIT_MODE(r13)
439 li r0, 1
440 lhz r4, PACAPACAINDEX(r13)
441 clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
442 addi r4, r4, KVM_SPLIT_NAPPED
443 stbx r0, r3, r4
444 /* Check the do_nap flag again after setting napped[] */
445 sync
446 lbz r0, KVM_SPLIT_DO_NAP(r3)
447 cmpwi r0, 0
448 beq 57f
449 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
450 mfspr r4, SPRN_LPCR
451 rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
452 mtspr SPRN_LPCR, r4
453 isync
454 std r0, HSTATE_SCRATCH0(r13)
455 ptesync
456 ld r0, HSTATE_SCRATCH0(r13)
4571: cmpd r0, r0
458 bne 1b
459 nap
460 b .
461
46257: li r0, 0
463 stbx r0, r3, r4
464 b kvm_no_guest
465
387/****************************************************************************** 466/******************************************************************************
388 * * 467 * *
389 * Entry code * 468 * Entry code *
@@ -854,7 +933,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
854 cmpwi r0, 0 933 cmpwi r0, 0
855 bne 21f 934 bne 21f
856 HMT_LOW 935 HMT_LOW
85720: lbz r0, VCORE_IN_GUEST(r5) 93620: lwz r3, VCORE_ENTRY_EXIT(r5)
937 cmpwi r3, 0x100
938 bge no_switch_exit
939 lbz r0, VCORE_IN_GUEST(r5)
858 cmpwi r0, 0 940 cmpwi r0, 0
859 beq 20b 941 beq 20b
860 HMT_MEDIUM 942 HMT_MEDIUM
@@ -870,7 +952,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
870 blt hdec_soon 952 blt hdec_soon
871 953
872 ld r6, VCPU_CTR(r4) 954 ld r6, VCPU_CTR(r4)
873 lwz r7, VCPU_XER(r4) 955 ld r7, VCPU_XER(r4)
874 956
875 mtctr r6 957 mtctr r6
876 mtxer r7 958 mtxer r7
@@ -985,9 +1067,13 @@ secondary_too_late:
985#endif 1067#endif
98611: b kvmhv_switch_to_host 106811: b kvmhv_switch_to_host
987 1069
1070no_switch_exit:
1071 HMT_MEDIUM
1072 li r12, 0
1073 b 12f
988hdec_soon: 1074hdec_soon:
989 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 1075 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
990 stw r12, VCPU_TRAP(r4) 107612: stw r12, VCPU_TRAP(r4)
991 mr r9, r4 1077 mr r9, r4
992#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1078#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
993 addi r3, r4, VCPU_TB_RMEXIT 1079 addi r3, r4, VCPU_TB_RMEXIT
@@ -1103,7 +1189,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1103 mfctr r3 1189 mfctr r3
1104 mfxer r4 1190 mfxer r4
1105 std r3, VCPU_CTR(r9) 1191 std r3, VCPU_CTR(r9)
1106 stw r4, VCPU_XER(r9) 1192 std r4, VCPU_XER(r9)
1107 1193
1108 /* If this is a page table miss then see if it's theirs or ours */ 1194 /* If this is a page table miss then see if it's theirs or ours */
1109 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 1195 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
@@ -1127,6 +1213,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1127 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL 1213 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
1128 bne 3f 1214 bne 3f
1129 lbz r0, HSTATE_HOST_IPI(r13) 1215 lbz r0, HSTATE_HOST_IPI(r13)
1216 cmpwi r0, 0
1130 beq 4f 1217 beq 4f
1131 b guest_exit_cont 1218 b guest_exit_cont
11323: 12193:
@@ -1176,6 +1263,11 @@ mc_cont:
1176 ld r9, HSTATE_KVM_VCPU(r13) 1263 ld r9, HSTATE_KVM_VCPU(r13)
1177 lwz r12, VCPU_TRAP(r9) 1264 lwz r12, VCPU_TRAP(r9)
1178 1265
1266 /* Stop others sending VCPU interrupts to this physical CPU */
1267 li r0, -1
1268 stw r0, VCPU_CPU(r9)
1269 stw r0, VCPU_THREAD_CPU(r9)
1270
1179 /* Save guest CTRL register, set runlatch to 1 */ 1271 /* Save guest CTRL register, set runlatch to 1 */
1180 mfspr r6,SPRN_CTRLF 1272 mfspr r6,SPRN_CTRLF
1181 stw r6,VCPU_CTRL(r9) 1273 stw r6,VCPU_CTRL(r9)
@@ -1540,12 +1632,17 @@ kvmhv_switch_to_host:
1540 1632
1541 /* Primary thread waits for all the secondaries to exit guest */ 1633 /* Primary thread waits for all the secondaries to exit guest */
154215: lwz r3,VCORE_ENTRY_EXIT(r5) 163415: lwz r3,VCORE_ENTRY_EXIT(r5)
1543 srwi r0,r3,8 1635 rlwinm r0,r3,32-8,0xff
1544 clrldi r3,r3,56 1636 clrldi r3,r3,56
1545 cmpw r3,r0 1637 cmpw r3,r0
1546 bne 15b 1638 bne 15b
1547 isync 1639 isync
1548 1640
1641 /* Did we actually switch to the guest at all? */
1642 lbz r6, VCORE_IN_GUEST(r5)
1643 cmpwi r6, 0
1644 beq 19f
1645
1549 /* Primary thread switches back to host partition */ 1646 /* Primary thread switches back to host partition */
1550 ld r6,KVM_HOST_SDR1(r4) 1647 ld r6,KVM_HOST_SDR1(r4)
1551 lwz r7,KVM_HOST_LPID(r4) 1648 lwz r7,KVM_HOST_LPID(r4)
@@ -1589,7 +1686,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
158918: 168618:
1590 /* Signal secondary CPUs to continue */ 1687 /* Signal secondary CPUs to continue */
1591 stb r0,VCORE_IN_GUEST(r5) 1688 stb r0,VCORE_IN_GUEST(r5)
1592 lis r8,0x7fff /* MAX_INT@h */ 168919: lis r8,0x7fff /* MAX_INT@h */
1593 mtspr SPRN_HDEC,r8 1690 mtspr SPRN_HDEC,r8
1594 1691
159516: ld r8,KVM_HOST_LPCR(r4) 169216: ld r8,KVM_HOST_LPCR(r4)
@@ -1675,7 +1772,7 @@ kvmppc_hdsi:
1675 bl kvmppc_msr_interrupt 1772 bl kvmppc_msr_interrupt
1676fast_interrupt_c_return: 1773fast_interrupt_c_return:
16776: ld r7, VCPU_CTR(r9) 17746: ld r7, VCPU_CTR(r9)
1678 lwz r8, VCPU_XER(r9) 1775 ld r8, VCPU_XER(r9)
1679 mtctr r7 1776 mtctr r7
1680 mtxer r8 1777 mtxer r8
1681 mr r4, r9 1778 mr r4, r9
@@ -1816,8 +1913,8 @@ hcall_real_table:
1816 .long DOTSYM(kvmppc_h_remove) - hcall_real_table 1913 .long DOTSYM(kvmppc_h_remove) - hcall_real_table
1817 .long DOTSYM(kvmppc_h_enter) - hcall_real_table 1914 .long DOTSYM(kvmppc_h_enter) - hcall_real_table
1818 .long DOTSYM(kvmppc_h_read) - hcall_real_table 1915 .long DOTSYM(kvmppc_h_read) - hcall_real_table
1819 .long 0 /* 0x10 - H_CLEAR_MOD */ 1916 .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
1820 .long 0 /* 0x14 - H_CLEAR_REF */ 1917 .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
1821 .long DOTSYM(kvmppc_h_protect) - hcall_real_table 1918 .long DOTSYM(kvmppc_h_protect) - hcall_real_table
1822 .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table 1919 .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
1823 .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table 1920 .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bd6ab1672ae6..a759d9adb0b6 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,7 +352,7 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
352 return kvmppc_get_field(inst, msb + 32, lsb + 32); 352 return kvmppc_get_field(inst, msb + 32, lsb + 32);
353} 353}
354 354
355bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) 355static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
356{ 356{
357 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 357 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
358 return false; 358 return false;
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index acee37cde840..ca8f174289bb 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@ no_dcbz32_on:
123 PPC_LL r8, SVCPU_CTR(r3) 123 PPC_LL r8, SVCPU_CTR(r3)
124 PPC_LL r9, SVCPU_LR(r3) 124 PPC_LL r9, SVCPU_LR(r3)
125 lwz r10, SVCPU_CR(r3) 125 lwz r10, SVCPU_CR(r3)
126 lwz r11, SVCPU_XER(r3) 126 PPC_LL r11, SVCPU_XER(r3)
127 127
128 mtctr r8 128 mtctr r8
129 mtlr r9 129 mtlr r9
@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
237 mfctr r8 237 mfctr r8
238 mflr r9 238 mflr r9
239 239
240 stw r5, SVCPU_XER(r13) 240 PPC_STL r5, SVCPU_XER(r13)
241 PPC_STL r6, SVCPU_FAULT_DAR(r13) 241 PPC_STL r6, SVCPU_FAULT_DAR(r13)
242 stw r7, SVCPU_FAULT_DSISR(r13) 242 stw r7, SVCPU_FAULT_DSISR(r13)
243 PPC_STL r8, SVCPU_CTR(r13) 243 PPC_STL r8, SVCPU_CTR(r13)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index c6ca7db64673..905e94a1370f 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -41,7 +41,7 @@
41 * ======= 41 * =======
42 * 42 *
43 * Each ICS has a spin lock protecting the information about the IRQ 43 * Each ICS has a spin lock protecting the information about the IRQ
44 * sources and avoiding simultaneous deliveries if the same interrupt. 44 * sources and avoiding simultaneous deliveries of the same interrupt.
45 * 45 *
46 * ICP operations are done via a single compare & swap transaction 46 * ICP operations are done via a single compare & swap transaction
47 * (most ICP state fits in the union kvmppc_icp_state) 47 * (most ICP state fits in the union kvmppc_icp_state)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index cc5842657161..ae458f0fd061 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -933,6 +933,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
933#endif 933#endif
934 break; 934 break;
935 case BOOKE_INTERRUPT_CRITICAL: 935 case BOOKE_INTERRUPT_CRITICAL:
936 kvmppc_fill_pt_regs(&regs);
936 unknown_exception(&regs); 937 unknown_exception(&regs);
937 break; 938 break;
938 case BOOKE_INTERRUPT_DEBUG: 939 case BOOKE_INTERRUPT_DEBUG:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 50860e919cb8..29911a07bcdb 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -377,7 +377,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
377 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 377 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
378 vcpu->arch.shared->mas1 = 378 vcpu->arch.shared->mas1 =
379 (vcpu->arch.shared->mas6 & MAS6_SPID0) 379 (vcpu->arch.shared->mas6 & MAS6_SPID0)
380 | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) 380 | ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
381 | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); 381 | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
382 vcpu->arch.shared->mas2 &= MAS2_EPN; 382 vcpu->arch.shared->mas2 &= MAS2_EPN;
383 vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & 383 vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e5dde32fe71f..2e51289610e4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -660,7 +660,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
660 return kvmppc_core_pending_dec(vcpu); 660 return kvmppc_core_pending_dec(vcpu);
661} 661}
662 662
663enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) 663static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
664{ 664{
665 struct kvm_vcpu *vcpu; 665 struct kvm_vcpu *vcpu;
666 666