aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2017-11-02 13:21:26 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2017-11-02 13:21:26 -0400
commit6d6ab940dc8b1c84fc86195c0f15a82ef282c8a3 (patch)
tree90127d4682f3e728a3516994e7696488f1db6385
parent9ffd986c6e4e59c11857cbc78e4217e9569f3725 (diff)
parentc01015091a77035de1939ef106bfbcaf9a21395f (diff)
Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD
Apart from various bugfixes and code cleanups, the major new feature is the ability to run guests using the hashed page table (HPT) MMU mode on a host that is using the radix MMU mode. Because of limitations in the current POWER9 chip (all SMT threads in each core must use the same MMU mode, HPT or radix), this requires the host to be configured to run similar to POWER8: the host runs in single-threaded mode (only thread 0 of each core online), and have KVM be able to wake up the other threads when a KVM guest is to be run, and use the other threads for running guest VCPUs. A new module parameter, called "indep_threads_mode", is normally Y on POWER9 but must be set to N before any HPT guests can be run on a radix host: # echo N >/sys/module/kvm_hv/parameters/indep_threads_mode # ppc64_cpu --smt=off Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h140
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h17
-rw-r--r--arch/powerpc/include/asm/kvm_host.h6
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/idle_book3s.S35
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c124
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c51
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c337
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c117
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c65
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S205
-rw-r--r--arch/powerpc/kvm/book3s_pr.c16
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c2
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c7
18 files changed, 797 insertions, 338 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index b8d5b8e35244..9a667007bff8 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -216,7 +216,8 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
216 bool writing, bool *writable); 216 bool writing, bool *writable);
217extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 217extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
218 unsigned long *rmap, long pte_index, int realmode); 218 unsigned long *rmap, long pte_index, int realmode);
219extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize); 219extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
220 unsigned long gfn, unsigned long psize);
220extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 221extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
221 unsigned long pte_index); 222 unsigned long pte_index);
222void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 223void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index d55c7f881ce7..735cfa35298a 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -20,6 +20,8 @@
20#ifndef __ASM_KVM_BOOK3S_64_H__ 20#ifndef __ASM_KVM_BOOK3S_64_H__
21#define __ASM_KVM_BOOK3S_64_H__ 21#define __ASM_KVM_BOOK3S_64_H__
22 22
23#include <linux/string.h>
24#include <asm/bitops.h>
23#include <asm/book3s/64/mmu-hash.h> 25#include <asm/book3s/64/mmu-hash.h>
24 26
25/* Power architecture requires HPT is at least 256kiB, at most 64TiB */ 27/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
@@ -107,18 +109,96 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
107 hpte[0] = cpu_to_be64(hpte_v); 109 hpte[0] = cpu_to_be64(hpte_v);
108} 110}
109 111
112/*
113 * These functions encode knowledge of the POWER7/8/9 hardware
114 * interpretations of the HPTE LP (large page size) field.
115 */
116static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
117{
118 unsigned int lphi;
119
120 if (!(h & HPTE_V_LARGE))
121 return 12; /* 4kB */
122 lphi = (l >> 16) & 0xf;
123 switch ((l >> 12) & 0xf) {
124 case 0:
125 return !lphi ? 24 : -1; /* 16MB */
126 break;
127 case 1:
128 return 16; /* 64kB */
129 break;
130 case 3:
131 return !lphi ? 34 : -1; /* 16GB */
132 break;
133 case 7:
134 return (16 << 8) + 12; /* 64kB in 4kB */
135 break;
136 case 8:
137 if (!lphi)
138 return (24 << 8) + 16; /* 16MB in 64kkB */
139 if (lphi == 3)
140 return (24 << 8) + 12; /* 16MB in 4kB */
141 break;
142 }
143 return -1;
144}
145
146static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
147{
148 return kvmppc_hpte_page_shifts(h, l) & 0xff;
149}
150
151static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l)
152{
153 int tmp = kvmppc_hpte_page_shifts(h, l);
154
155 if (tmp >= 0x100)
156 tmp >>= 8;
157 return tmp;
158}
159
160static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
161{
162 return 1ul << kvmppc_hpte_actual_page_shift(v, r);
163}
164
165static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
166{
167 switch (base_shift) {
168 case 12:
169 switch (actual_shift) {
170 case 12:
171 return 0;
172 case 16:
173 return 7;
174 case 24:
175 return 0x38;
176 }
177 break;
178 case 16:
179 switch (actual_shift) {
180 case 16:
181 return 1;
182 case 24:
183 return 8;
184 }
185 break;
186 case 24:
187 return 0;
188 }
189 return -1;
190}
191
110static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, 192static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
111 unsigned long pte_index) 193 unsigned long pte_index)
112{ 194{
113 int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; 195 int a_pgshift, b_pgshift;
114 unsigned int penc;
115 unsigned long rb = 0, va_low, sllp; 196 unsigned long rb = 0, va_low, sllp;
116 unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
117 197
118 if (v & HPTE_V_LARGE) { 198 b_pgshift = a_pgshift = kvmppc_hpte_page_shifts(v, r);
119 i = hpte_page_sizes[lp]; 199 if (a_pgshift >= 0x100) {
120 b_psize = i & 0xf; 200 b_pgshift &= 0xff;
121 a_psize = i >> 4; 201 a_pgshift >>= 8;
122 } 202 }
123 203
124 /* 204 /*
@@ -152,37 +232,33 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
152 va_low ^= v >> (SID_SHIFT_1T - 16); 232 va_low ^= v >> (SID_SHIFT_1T - 16);
153 va_low &= 0x7ff; 233 va_low &= 0x7ff;
154 234
155 switch (b_psize) { 235 if (b_pgshift == 12) {
156 case MMU_PAGE_4K: 236 if (a_pgshift > 12) {
157 sllp = get_sllp_encoding(a_psize); 237 sllp = (a_pgshift == 16) ? 5 : 4;
158 rb |= sllp << 5; /* AP field */ 238 rb |= sllp << 5; /* AP field */
239 }
159 rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */ 240 rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
160 break; 241 } else {
161 default:
162 {
163 int aval_shift; 242 int aval_shift;
164 /* 243 /*
165 * remaining bits of AVA/LP fields 244 * remaining bits of AVA/LP fields
166 * Also contain the rr bits of LP 245 * Also contain the rr bits of LP
167 */ 246 */
168 rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000; 247 rb |= (va_low << b_pgshift) & 0x7ff000;
169 /* 248 /*
170 * Now clear not needed LP bits based on actual psize 249 * Now clear not needed LP bits based on actual psize
171 */ 250 */
172 rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1); 251 rb &= ~((1ul << a_pgshift) - 1);
173 /* 252 /*
174 * AVAL field 58..77 - base_page_shift bits of va 253 * AVAL field 58..77 - base_page_shift bits of va
175 * we have space for 58..64 bits, Missing bits should 254 * we have space for 58..64 bits, Missing bits should
176 * be zero filled. +1 is to take care of L bit shift 255 * be zero filled. +1 is to take care of L bit shift
177 */ 256 */
178 aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1; 257 aval_shift = 64 - (77 - b_pgshift) + 1;
179 rb |= ((va_low << aval_shift) & 0xfe); 258 rb |= ((va_low << aval_shift) & 0xfe);
180 259
181 rb |= 1; /* L field */ 260 rb |= 1; /* L field */
182 penc = mmu_psize_defs[b_psize].penc[a_psize]; 261 rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
183 rb |= penc << 12; /* LP field */
184 break;
185 }
186 } 262 }
187 rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ 263 rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
188 return rb; 264 return rb;
@@ -370,6 +446,28 @@ static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
370 return (1UL << (hpt->order - 7)) - 1; 446 return (1UL << (hpt->order - 7)) - 1;
371} 447}
372 448
449/* Set bits in a dirty bitmap, which is in LE format */
450static inline void set_dirty_bits(unsigned long *map, unsigned long i,
451 unsigned long npages)
452{
453
454 if (npages >= 8)
455 memset((char *)map + i / 8, 0xff, npages / 8);
456 else
457 for (; npages; ++i, --npages)
458 __set_bit_le(i, map);
459}
460
461static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
462 unsigned long npages)
463{
464 if (npages >= 8)
465 memset((char *)map + i / 8, 0xff, npages / 8);
466 else
467 for (; npages; ++i, --npages)
468 set_bit_le(i, map);
469}
470
373#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 471#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
374 472
375#endif /* __ASM_KVM_BOOK3S_64_H__ */ 473#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 83596f32f50b..ab386af2904f 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -82,6 +82,16 @@ struct kvm_split_mode {
82 u8 do_nap; 82 u8 do_nap;
83 u8 napped[MAX_SMT_THREADS]; 83 u8 napped[MAX_SMT_THREADS];
84 struct kvmppc_vcore *vc[MAX_SUBCORES]; 84 struct kvmppc_vcore *vc[MAX_SUBCORES];
85 /* Bits for changing lpcr on P9 */
86 unsigned long lpcr_req;
87 unsigned long lpidr_req;
88 unsigned long host_lpcr;
89 u32 do_set;
90 u32 do_restore;
91 union {
92 u32 allphases;
93 u8 phase[4];
94 } lpcr_sync;
85}; 95};
86 96
87/* 97/*
@@ -104,14 +114,11 @@ struct kvmppc_host_state {
104 u8 napping; 114 u8 napping;
105 115
106#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 116#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
107 /*
108 * hwthread_req/hwthread_state pair is used to pull sibling threads
109 * out of guest on pre-ISAv3.0B CPUs where threads share MMU.
110 */
111 u8 hwthread_req; 117 u8 hwthread_req;
112 u8 hwthread_state; 118 u8 hwthread_state;
113 u8 host_ipi; 119 u8 host_ipi;
114 u8 ptid; 120 u8 ptid; /* thread number within subcore when split */
121 u8 tid; /* thread number within whole core */
115 struct kvm_vcpu *kvm_vcpu; 122 struct kvm_vcpu *kvm_vcpu;
116 struct kvmppc_vcore *kvm_vcore; 123 struct kvmppc_vcore *kvm_vcore;
117 void __iomem *xics_phys; 124 void __iomem *xics_phys;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e372ed871c51..3aa5b577cd60 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -235,10 +235,7 @@ struct revmap_entry {
235 */ 235 */
236#define KVMPPC_RMAP_LOCK_BIT 63 236#define KVMPPC_RMAP_LOCK_BIT 63
237#define KVMPPC_RMAP_RC_SHIFT 32 237#define KVMPPC_RMAP_RC_SHIFT 32
238#define KVMPPC_RMAP_CHG_SHIFT 48
239#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) 238#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
240#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
241#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT)
242#define KVMPPC_RMAP_PRESENT 0x100000000ul 239#define KVMPPC_RMAP_PRESENT 0x100000000ul
243#define KVMPPC_RMAP_INDEX 0xfffffffful 240#define KVMPPC_RMAP_INDEX 0xfffffffful
244 241
@@ -276,7 +273,7 @@ struct kvm_arch {
276 int tlbie_lock; 273 int tlbie_lock;
277 unsigned long lpcr; 274 unsigned long lpcr;
278 unsigned long vrma_slb_v; 275 unsigned long vrma_slb_v;
279 int hpte_setup_done; 276 int mmu_ready;
280 atomic_t vcpus_running; 277 atomic_t vcpus_running;
281 u32 online_vcores; 278 u32 online_vcores;
282 atomic_t hpte_mod_interest; 279 atomic_t hpte_mod_interest;
@@ -284,6 +281,7 @@ struct kvm_arch {
284 cpumask_t cpu_in_guest; 281 cpumask_t cpu_in_guest;
285 u8 radix; 282 u8 radix;
286 u8 fwnmi_enabled; 283 u8 fwnmi_enabled;
284 bool threads_indep;
287 pgd_t *pgtable; 285 pgd_t *pgtable;
288 u64 process_table; 286 u64 process_table;
289 struct dentry *debugfs_dir; 287 struct dentry *debugfs_dir;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ba5fadd6f3c9..96753f3aac6d 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -168,6 +168,7 @@ extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
168extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info); 168extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
169extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order); 169extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
170extern void kvmppc_free_hpt(struct kvm_hpt_info *info); 170extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
171extern void kvmppc_rmap_reset(struct kvm *kvm);
171extern long kvmppc_prepare_vrma(struct kvm *kvm, 172extern long kvmppc_prepare_vrma(struct kvm *kvm,
172 struct kvm_userspace_memory_region *mem); 173 struct kvm_userspace_memory_region *mem);
173extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, 174extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
@@ -177,6 +178,8 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
177 struct iommu_group *grp); 178 struct iommu_group *grp);
178extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, 179extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
179 struct iommu_group *grp); 180 struct iommu_group *grp);
181extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
182extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
180 183
181extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 184extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
182 struct kvm_create_spapr_tce_64 *args); 185 struct kvm_create_spapr_tce_64 *args);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8cfb20e38cfe..519fad556113 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -642,6 +642,7 @@ int main(void)
642 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); 642 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
643 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); 643 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
644 HSTATE_FIELD(HSTATE_PTID, ptid); 644 HSTATE_FIELD(HSTATE_PTID, ptid);
645 HSTATE_FIELD(HSTATE_TID, tid);
645 HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); 646 HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
646 HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); 647 HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
647 HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]); 648 HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@ -667,6 +668,8 @@ int main(void)
667 OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); 668 OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
668 OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap); 669 OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
669 OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped); 670 OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
671 OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
672 OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
670#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 673#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
671 674
672#ifdef CONFIG_PPC_BOOK3S_64 675#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1125c9be9e06..175d49f468af 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -319,20 +319,13 @@ enter_winkle:
319/* 319/*
320 * r3 - PSSCR value corresponding to the requested stop state. 320 * r3 - PSSCR value corresponding to the requested stop state.
321 */ 321 */
322power_enter_stop:
322#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 323#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
323power_enter_stop_kvm_rm: 324 /* Tell KVM we're entering idle */
324 /*
325 * This is currently unused because POWER9 KVM does not have to
326 * gather secondary threads into sibling mode, but the code is
327 * here in case that function is required.
328 *
329 * Tell KVM we're entering idle.
330 */
331 li r4,KVM_HWTHREAD_IN_IDLE 325 li r4,KVM_HWTHREAD_IN_IDLE
332 /* DO THIS IN REAL MODE! See comment above. */ 326 /* DO THIS IN REAL MODE! See comment above. */
333 stb r4,HSTATE_HWTHREAD_STATE(r13) 327 stb r4,HSTATE_HWTHREAD_STATE(r13)
334#endif 328#endif
335power_enter_stop:
336/* 329/*
337 * Check if we are executing the lite variant with ESL=EC=0 330 * Check if we are executing the lite variant with ESL=EC=0
338 */ 331 */
@@ -496,18 +489,6 @@ pnv_powersave_wakeup_mce:
496 489
497 b pnv_powersave_wakeup 490 b pnv_powersave_wakeup
498 491
499#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
500kvm_start_guest_check:
501 li r0,KVM_HWTHREAD_IN_KERNEL
502 stb r0,HSTATE_HWTHREAD_STATE(r13)
503 /* Order setting hwthread_state vs. testing hwthread_req */
504 sync
505 lbz r0,HSTATE_HWTHREAD_REQ(r13)
506 cmpwi r0,0
507 beqlr
508 b kvm_start_guest
509#endif
510
511/* 492/*
512 * Called from reset vector for powersave wakeups. 493 * Called from reset vector for powersave wakeups.
513 * cr3 - set to gt if waking up with partial/complete hypervisor state loss 494 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
@@ -532,9 +513,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
532 mr r3,r12 513 mr r3,r12
533 514
534#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 515#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
535BEGIN_FTR_SECTION 516 li r0,KVM_HWTHREAD_IN_KERNEL
536 bl kvm_start_guest_check 517 stb r0,HSTATE_HWTHREAD_STATE(r13)
537END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 518 /* Order setting hwthread_state vs. testing hwthread_req */
519 sync
520 lbz r0,HSTATE_HWTHREAD_REQ(r13)
521 cmpwi r0,0
522 beq 1f
523 b kvm_start_guest
5241:
538#endif 525#endif
539 526
540 /* Return SRR1 from power7_nap() */ 527 /* Return SRR1 from power7_nap() */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7c62967d672c..6aec8a22aeff 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -73,8 +73,6 @@ struct kvm_resize_hpt {
73 struct kvm_hpt_info hpt; 73 struct kvm_hpt_info hpt;
74}; 74};
75 75
76static void kvmppc_rmap_reset(struct kvm *kvm);
77
78int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) 76int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
79{ 77{
80 unsigned long hpt = 0; 78 unsigned long hpt = 0;
@@ -106,7 +104,6 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
106 /* Allocate reverse map array */ 104 /* Allocate reverse map array */
107 rev = vmalloc(sizeof(struct revmap_entry) * npte); 105 rev = vmalloc(sizeof(struct revmap_entry) * npte);
108 if (!rev) { 106 if (!rev) {
109 pr_err("kvmppc_allocate_hpt: Couldn't alloc reverse map array\n");
110 if (cma) 107 if (cma)
111 kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT)); 108 kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
112 else 109 else
@@ -137,19 +134,22 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
137 long err = -EBUSY; 134 long err = -EBUSY;
138 struct kvm_hpt_info info; 135 struct kvm_hpt_info info;
139 136
140 if (kvm_is_radix(kvm))
141 return -EINVAL;
142
143 mutex_lock(&kvm->lock); 137 mutex_lock(&kvm->lock);
144 if (kvm->arch.hpte_setup_done) { 138 if (kvm->arch.mmu_ready) {
145 kvm->arch.hpte_setup_done = 0; 139 kvm->arch.mmu_ready = 0;
146 /* order hpte_setup_done vs. vcpus_running */ 140 /* order mmu_ready vs. vcpus_running */
147 smp_mb(); 141 smp_mb();
148 if (atomic_read(&kvm->arch.vcpus_running)) { 142 if (atomic_read(&kvm->arch.vcpus_running)) {
149 kvm->arch.hpte_setup_done = 1; 143 kvm->arch.mmu_ready = 1;
150 goto out; 144 goto out;
151 } 145 }
152 } 146 }
147 if (kvm_is_radix(kvm)) {
148 err = kvmppc_switch_mmu_to_hpt(kvm);
149 if (err)
150 goto out;
151 }
152
153 if (kvm->arch.hpt.order == order) { 153 if (kvm->arch.hpt.order == order) {
154 /* We already have a suitable HPT */ 154 /* We already have a suitable HPT */
155 155
@@ -183,6 +183,7 @@ out:
183void kvmppc_free_hpt(struct kvm_hpt_info *info) 183void kvmppc_free_hpt(struct kvm_hpt_info *info)
184{ 184{
185 vfree(info->rev); 185 vfree(info->rev);
186 info->rev = NULL;
186 if (info->cma) 187 if (info->cma)
187 kvm_free_hpt_cma(virt_to_page(info->virt), 188 kvm_free_hpt_cma(virt_to_page(info->virt),
188 1 << (info->order - PAGE_SHIFT)); 189 1 << (info->order - PAGE_SHIFT));
@@ -334,7 +335,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
334{ 335{
335 unsigned long ra_mask; 336 unsigned long ra_mask;
336 337
337 ra_mask = hpte_page_size(v, r) - 1; 338 ra_mask = kvmppc_actual_pgsz(v, r) - 1;
338 return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); 339 return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
339} 340}
340 341
@@ -350,6 +351,9 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
350 int index; 351 int index;
351 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 352 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
352 353
354 if (kvm_is_radix(vcpu->kvm))
355 return kvmppc_mmu_radix_xlate(vcpu, eaddr, gpte, data, iswrite);
356
353 /* Get SLB entry */ 357 /* Get SLB entry */
354 if (virtmode) { 358 if (virtmode) {
355 slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr); 359 slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
@@ -505,7 +509,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
505 mmio_update = atomic64_read(&kvm->arch.mmio_update); 509 mmio_update = atomic64_read(&kvm->arch.mmio_update);
506 if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) { 510 if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
507 r = vcpu->arch.pgfault_cache->rpte; 511 r = vcpu->arch.pgfault_cache->rpte;
508 psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r); 512 psize = kvmppc_actual_pgsz(vcpu->arch.pgfault_hpte[0],
513 r);
509 gpa_base = r & HPTE_R_RPN & ~(psize - 1); 514 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
510 gfn_base = gpa_base >> PAGE_SHIFT; 515 gfn_base = gpa_base >> PAGE_SHIFT;
511 gpa = gpa_base | (ea & (psize - 1)); 516 gpa = gpa_base | (ea & (psize - 1));
@@ -534,7 +539,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
534 return RESUME_GUEST; 539 return RESUME_GUEST;
535 540
536 /* Translate the logical address and get the page */ 541 /* Translate the logical address and get the page */
537 psize = hpte_page_size(hpte[0], r); 542 psize = kvmppc_actual_pgsz(hpte[0], r);
538 gpa_base = r & HPTE_R_RPN & ~(psize - 1); 543 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
539 gfn_base = gpa_base >> PAGE_SHIFT; 544 gfn_base = gpa_base >> PAGE_SHIFT;
540 gpa = gpa_base | (ea & (psize - 1)); 545 gpa = gpa_base | (ea & (psize - 1));
@@ -710,7 +715,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
710 goto out_put; 715 goto out_put;
711} 716}
712 717
713static void kvmppc_rmap_reset(struct kvm *kvm) 718void kvmppc_rmap_reset(struct kvm *kvm)
714{ 719{
715 struct kvm_memslots *slots; 720 struct kvm_memslots *slots;
716 struct kvm_memory_slot *memslot; 721 struct kvm_memory_slot *memslot;
@@ -776,6 +781,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
776 781
777/* Must be called with both HPTE and rmap locked */ 782/* Must be called with both HPTE and rmap locked */
778static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, 783static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
784 struct kvm_memory_slot *memslot,
779 unsigned long *rmapp, unsigned long gfn) 785 unsigned long *rmapp, unsigned long gfn)
780{ 786{
781 __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); 787 __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
@@ -798,7 +804,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
798 804
799 /* Now check and modify the HPTE */ 805 /* Now check and modify the HPTE */
800 ptel = rev[i].guest_rpte; 806 ptel = rev[i].guest_rpte;
801 psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); 807 psize = kvmppc_actual_pgsz(be64_to_cpu(hptep[0]), ptel);
802 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && 808 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
803 hpte_rpn(ptel, psize) == gfn) { 809 hpte_rpn(ptel, psize) == gfn) {
804 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); 810 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -807,8 +813,8 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
807 /* Harvest R and C */ 813 /* Harvest R and C */
808 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); 814 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
809 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 815 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
810 if (rcbits & HPTE_R_C) 816 if ((rcbits & HPTE_R_C) && memslot->dirty_bitmap)
811 kvmppc_update_rmap_change(rmapp, psize); 817 kvmppc_update_dirty_map(memslot, gfn, psize);
812 if (rcbits & ~rev[i].guest_rpte) { 818 if (rcbits & ~rev[i].guest_rpte) {
813 rev[i].guest_rpte = ptel | rcbits; 819 rev[i].guest_rpte = ptel | rcbits;
814 note_hpte_modification(kvm, &rev[i]); 820 note_hpte_modification(kvm, &rev[i]);
@@ -846,7 +852,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
846 continue; 852 continue;
847 } 853 }
848 854
849 kvmppc_unmap_hpte(kvm, i, rmapp, gfn); 855 kvmppc_unmap_hpte(kvm, i, memslot, rmapp, gfn);
850 unlock_rmap(rmapp); 856 unlock_rmap(rmapp);
851 __unlock_hpte(hptep, be64_to_cpu(hptep[0])); 857 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
852 } 858 }
@@ -1029,14 +1035,6 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1029 1035
1030 retry: 1036 retry:
1031 lock_rmap(rmapp); 1037 lock_rmap(rmapp);
1032 if (*rmapp & KVMPPC_RMAP_CHANGED) {
1033 long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
1034 >> KVMPPC_RMAP_CHG_SHIFT;
1035 *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
1036 npages_dirty = 1;
1037 if (change_order > PAGE_SHIFT)
1038 npages_dirty = 1ul << (change_order - PAGE_SHIFT);
1039 }
1040 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 1038 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
1041 unlock_rmap(rmapp); 1039 unlock_rmap(rmapp);
1042 return npages_dirty; 1040 return npages_dirty;
@@ -1092,7 +1090,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1092 rev[i].guest_rpte |= HPTE_R_C; 1090 rev[i].guest_rpte |= HPTE_R_C;
1093 note_hpte_modification(kvm, &rev[i]); 1091 note_hpte_modification(kvm, &rev[i]);
1094 } 1092 }
1095 n = hpte_page_size(v, r); 1093 n = kvmppc_actual_pgsz(v, r);
1096 n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT; 1094 n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
1097 if (n > npages_dirty) 1095 if (n > npages_dirty)
1098 npages_dirty = n; 1096 npages_dirty = n;
@@ -1128,7 +1126,7 @@ void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1128long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, 1126long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
1129 struct kvm_memory_slot *memslot, unsigned long *map) 1127 struct kvm_memory_slot *memslot, unsigned long *map)
1130{ 1128{
1131 unsigned long i, j; 1129 unsigned long i;
1132 unsigned long *rmapp; 1130 unsigned long *rmapp;
1133 1131
1134 preempt_disable(); 1132 preempt_disable();
@@ -1140,9 +1138,8 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
1140 * since we always put huge-page HPTEs in the rmap chain 1138 * since we always put huge-page HPTEs in the rmap chain
1141 * corresponding to their page base address. 1139 * corresponding to their page base address.
1142 */ 1140 */
1143 if (npages && map) 1141 if (npages)
1144 for (j = i; npages; ++j, --npages) 1142 set_dirty_bits(map, i, npages);
1145 __set_bit_le(j, map);
1146 ++rmapp; 1143 ++rmapp;
1147 } 1144 }
1148 preempt_enable(); 1145 preempt_enable();
@@ -1186,7 +1183,6 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1186 struct page *page = virt_to_page(va); 1183 struct page *page = virt_to_page(va);
1187 struct kvm_memory_slot *memslot; 1184 struct kvm_memory_slot *memslot;
1188 unsigned long gfn; 1185 unsigned long gfn;
1189 unsigned long *rmap;
1190 int srcu_idx; 1186 int srcu_idx;
1191 1187
1192 put_page(page); 1188 put_page(page);
@@ -1194,20 +1190,12 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1194 if (!dirty) 1190 if (!dirty)
1195 return; 1191 return;
1196 1192
1197 /* We need to mark this page dirty in the rmap chain */ 1193 /* We need to mark this page dirty in the memslot dirty_bitmap, if any */
1198 gfn = gpa >> PAGE_SHIFT; 1194 gfn = gpa >> PAGE_SHIFT;
1199 srcu_idx = srcu_read_lock(&kvm->srcu); 1195 srcu_idx = srcu_read_lock(&kvm->srcu);
1200 memslot = gfn_to_memslot(kvm, gfn); 1196 memslot = gfn_to_memslot(kvm, gfn);
1201 if (memslot) { 1197 if (memslot && memslot->dirty_bitmap)
1202 if (!kvm_is_radix(kvm)) { 1198 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
1203 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
1204 lock_rmap(rmap);
1205 *rmap |= KVMPPC_RMAP_CHANGED;
1206 unlock_rmap(rmap);
1207 } else if (memslot->dirty_bitmap) {
1208 mark_page_dirty(kvm, gfn);
1209 }
1210 }
1211 srcu_read_unlock(&kvm->srcu, srcu_idx); 1199 srcu_read_unlock(&kvm->srcu, srcu_idx);
1212} 1200}
1213 1201
@@ -1267,7 +1255,7 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
1267 guest_rpte = rev->guest_rpte; 1255 guest_rpte = rev->guest_rpte;
1268 1256
1269 ret = -EIO; 1257 ret = -EIO;
1270 apsize = hpte_page_size(vpte, guest_rpte); 1258 apsize = kvmppc_actual_pgsz(vpte, guest_rpte);
1271 if (!apsize) 1259 if (!apsize)
1272 goto out; 1260 goto out;
1273 1261
@@ -1282,7 +1270,7 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
1282 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1270 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1283 1271
1284 lock_rmap(rmapp); 1272 lock_rmap(rmapp);
1285 kvmppc_unmap_hpte(kvm, idx, rmapp, gfn); 1273 kvmppc_unmap_hpte(kvm, idx, memslot, rmapp, gfn);
1286 unlock_rmap(rmapp); 1274 unlock_rmap(rmapp);
1287 } 1275 }
1288 1276
@@ -1455,7 +1443,7 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
1455 struct kvm_resize_hpt *resize; 1443 struct kvm_resize_hpt *resize;
1456 int ret; 1444 int ret;
1457 1445
1458 if (flags != 0) 1446 if (flags != 0 || kvm_is_radix(kvm))
1459 return -EINVAL; 1447 return -EINVAL;
1460 1448
1461 if (shift && ((shift < 18) || (shift > 46))) 1449 if (shift && ((shift < 18) || (shift > 46)))
@@ -1521,7 +1509,7 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
1521 struct kvm_resize_hpt *resize; 1509 struct kvm_resize_hpt *resize;
1522 long ret; 1510 long ret;
1523 1511
1524 if (flags != 0) 1512 if (flags != 0 || kvm_is_radix(kvm))
1525 return -EINVAL; 1513 return -EINVAL;
1526 1514
1527 if (shift && ((shift < 18) || (shift > 46))) 1515 if (shift && ((shift < 18) || (shift > 46)))
@@ -1533,15 +1521,15 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
1533 1521
1534 /* This shouldn't be possible */ 1522 /* This shouldn't be possible */
1535 ret = -EIO; 1523 ret = -EIO;
1536 if (WARN_ON(!kvm->arch.hpte_setup_done)) 1524 if (WARN_ON(!kvm->arch.mmu_ready))
1537 goto out_no_hpt; 1525 goto out_no_hpt;
1538 1526
1539 /* Stop VCPUs from running while we mess with the HPT */ 1527 /* Stop VCPUs from running while we mess with the HPT */
1540 kvm->arch.hpte_setup_done = 0; 1528 kvm->arch.mmu_ready = 0;
1541 smp_mb(); 1529 smp_mb();
1542 1530
1543 /* Boot all CPUs out of the guest so they re-read 1531 /* Boot all CPUs out of the guest so they re-read
1544 * hpte_setup_done */ 1532 * mmu_ready */
1545 on_each_cpu(resize_hpt_boot_vcpu, NULL, 1); 1533 on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
1546 1534
1547 ret = -ENXIO; 1535 ret = -ENXIO;
@@ -1564,7 +1552,7 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
1564 1552
1565out: 1553out:
1566 /* Let VCPUs run again */ 1554 /* Let VCPUs run again */
1567 kvm->arch.hpte_setup_done = 1; 1555 kvm->arch.mmu_ready = 1;
1568 smp_mb(); 1556 smp_mb();
1569out_no_hpt: 1557out_no_hpt:
1570 resize_hpt_release(kvm, resize); 1558 resize_hpt_release(kvm, resize);
@@ -1707,6 +1695,8 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1707 1695
1708 if (!access_ok(VERIFY_WRITE, buf, count)) 1696 if (!access_ok(VERIFY_WRITE, buf, count))
1709 return -EFAULT; 1697 return -EFAULT;
1698 if (kvm_is_radix(kvm))
1699 return 0;
1710 1700
1711 first_pass = ctx->first_pass; 1701 first_pass = ctx->first_pass;
1712 flags = ctx->flags; 1702 flags = ctx->flags;
@@ -1800,20 +1790,22 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1800 unsigned long tmp[2]; 1790 unsigned long tmp[2];
1801 ssize_t nb; 1791 ssize_t nb;
1802 long int err, ret; 1792 long int err, ret;
1803 int hpte_setup; 1793 int mmu_ready;
1804 1794
1805 if (!access_ok(VERIFY_READ, buf, count)) 1795 if (!access_ok(VERIFY_READ, buf, count))
1806 return -EFAULT; 1796 return -EFAULT;
1797 if (kvm_is_radix(kvm))
1798 return -EINVAL;
1807 1799
1808 /* lock out vcpus from running while we're doing this */ 1800 /* lock out vcpus from running while we're doing this */
1809 mutex_lock(&kvm->lock); 1801 mutex_lock(&kvm->lock);
1810 hpte_setup = kvm->arch.hpte_setup_done; 1802 mmu_ready = kvm->arch.mmu_ready;
1811 if (hpte_setup) { 1803 if (mmu_ready) {
1812 kvm->arch.hpte_setup_done = 0; /* temporarily */ 1804 kvm->arch.mmu_ready = 0; /* temporarily */
1813 /* order hpte_setup_done vs. vcpus_running */ 1805 /* order mmu_ready vs. vcpus_running */
1814 smp_mb(); 1806 smp_mb();
1815 if (atomic_read(&kvm->arch.vcpus_running)) { 1807 if (atomic_read(&kvm->arch.vcpus_running)) {
1816 kvm->arch.hpte_setup_done = 1; 1808 kvm->arch.mmu_ready = 1;
1817 mutex_unlock(&kvm->lock); 1809 mutex_unlock(&kvm->lock);
1818 return -EBUSY; 1810 return -EBUSY;
1819 } 1811 }
@@ -1866,7 +1858,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1866 "r=%lx\n", ret, i, v, r); 1858 "r=%lx\n", ret, i, v, r);
1867 goto out; 1859 goto out;
1868 } 1860 }
1869 if (!hpte_setup && is_vrma_hpte(v)) { 1861 if (!mmu_ready && is_vrma_hpte(v)) {
1870 unsigned long psize = hpte_base_page_size(v, r); 1862 unsigned long psize = hpte_base_page_size(v, r);
1871 unsigned long senc = slb_pgsize_encoding(psize); 1863 unsigned long senc = slb_pgsize_encoding(psize);
1872 unsigned long lpcr; 1864 unsigned long lpcr;
@@ -1875,7 +1867,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1875 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1867 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1876 lpcr = senc << (LPCR_VRMASD_SH - 4); 1868 lpcr = senc << (LPCR_VRMASD_SH - 4);
1877 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 1869 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
1878 hpte_setup = 1; 1870 mmu_ready = 1;
1879 } 1871 }
1880 ++i; 1872 ++i;
1881 hptp += 2; 1873 hptp += 2;
@@ -1891,9 +1883,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1891 } 1883 }
1892 1884
1893 out: 1885 out:
1894 /* Order HPTE updates vs. hpte_setup_done */ 1886 /* Order HPTE updates vs. mmu_ready */
1895 smp_wmb(); 1887 smp_wmb();
1896 kvm->arch.hpte_setup_done = hpte_setup; 1888 kvm->arch.mmu_ready = mmu_ready;
1897 mutex_unlock(&kvm->lock); 1889 mutex_unlock(&kvm->lock);
1898 1890
1899 if (err) 1891 if (err)
@@ -2002,6 +1994,10 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
2002 struct kvm *kvm; 1994 struct kvm *kvm;
2003 __be64 *hptp; 1995 __be64 *hptp;
2004 1996
1997 kvm = p->kvm;
1998 if (kvm_is_radix(kvm))
1999 return 0;
2000
2005 ret = mutex_lock_interruptible(&p->mutex); 2001 ret = mutex_lock_interruptible(&p->mutex);
2006 if (ret) 2002 if (ret)
2007 return ret; 2003 return ret;
@@ -2024,7 +2020,6 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
2024 } 2020 }
2025 } 2021 }
2026 2022
2027 kvm = p->kvm;
2028 i = p->hpt_index; 2023 i = p->hpt_index;
2029 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE)); 2024 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
2030 for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt); 2025 for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
@@ -2099,10 +2094,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
2099 2094
2100 vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ 2095 vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
2101 2096
2102 if (kvm_is_radix(vcpu->kvm)) 2097 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
2103 mmu->xlate = kvmppc_mmu_radix_xlate;
2104 else
2105 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
2106 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 2098 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
2107 2099
2108 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; 2100 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index c5d7435455f1..58618f644c56 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -474,26 +474,6 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
474 return ret; 474 return ret;
475} 475}
476 476
477static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
478 unsigned long gfn, unsigned int order)
479{
480 unsigned long i, limit;
481 unsigned long *dp;
482
483 if (!memslot->dirty_bitmap)
484 return;
485 limit = 1ul << order;
486 if (limit < BITS_PER_LONG) {
487 for (i = 0; i < limit; ++i)
488 mark_page_dirty(kvm, gfn + i);
489 return;
490 }
491 dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn);
492 limit /= BITS_PER_LONG;
493 for (i = 0; i < limit; ++i)
494 *dp++ = ~0ul;
495}
496
497/* Called with kvm->lock held */ 477/* Called with kvm->lock held */
498int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 478int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
499 unsigned long gfn) 479 unsigned long gfn)
@@ -508,12 +488,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
508 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, 488 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
509 gpa, shift); 489 gpa, shift);
510 kvmppc_radix_tlbie_page(kvm, gpa, shift); 490 kvmppc_radix_tlbie_page(kvm, gpa, shift);
511 if (old & _PAGE_DIRTY) { 491 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
512 if (!shift) 492 unsigned long npages = 1;
513 mark_page_dirty(kvm, gfn); 493 if (shift)
514 else 494 npages = 1ul << (shift - PAGE_SHIFT);
515 mark_pages_dirty(kvm, memslot, 495 kvmppc_update_dirty_map(memslot, gfn, npages);
516 gfn, shift - PAGE_SHIFT);
517 } 496 }
518 } 497 }
519 return 0; 498 return 0;
@@ -579,20 +558,8 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
579 struct kvm_memory_slot *memslot, unsigned long *map) 558 struct kvm_memory_slot *memslot, unsigned long *map)
580{ 559{
581 unsigned long i, j; 560 unsigned long i, j;
582 unsigned long n, *p;
583 int npages; 561 int npages;
584 562
585 /*
586 * Radix accumulates dirty bits in the first half of the
587 * memslot's dirty_bitmap area, for when pages are paged
588 * out or modified by the host directly. Pick up these
589 * bits and add them to the map.
590 */
591 n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long);
592 p = memslot->dirty_bitmap;
593 for (i = 0; i < n; ++i)
594 map[i] |= xchg(&p[i], 0);
595
596 for (i = 0; i < memslot->npages; i = j) { 563 for (i = 0; i < memslot->npages; i = j) {
597 npages = kvm_radix_test_clear_dirty(kvm, memslot, i); 564 npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
598 565
@@ -604,9 +571,10 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
604 * real address, if npages > 1 we can skip to i + npages. 571 * real address, if npages > 1 we can skip to i + npages.
605 */ 572 */
606 j = i + 1; 573 j = i + 1;
607 if (npages) 574 if (npages) {
608 for (j = i; npages; ++j, --npages) 575 set_dirty_bits(map, i, npages);
609 __set_bit_le(j, map); 576 i = j + npages;
577 }
610 } 578 }
611 return 0; 579 return 0;
612} 580}
@@ -694,6 +662,7 @@ void kvmppc_free_radix(struct kvm *kvm)
694 pgd_clear(pgd); 662 pgd_clear(pgd);
695 } 663 }
696 pgd_free(kvm->mm, kvm->arch.pgtable); 664 pgd_free(kvm->mm, kvm->arch.pgtable);
665 kvm->arch.pgtable = NULL;
697} 666}
698 667
699static void pte_ctor(void *addr) 668static void pte_ctor(void *addr)
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 3589c4e3d49b..688722acd692 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -113,7 +113,7 @@ slb_do_enter:
113 113
114 /* Remove all SLB entries that are in use. */ 114 /* Remove all SLB entries that are in use. */
115 115
116 li r0, r0 116 li r0, 0
117 slbmte r0, r0 117 slbmte r0, r0
118 slbia 118 slbia
119 119
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 73bf1ebfa78f..fff62fdf1464 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
22#include <linux/kernel.h>
22#include <linux/err.h> 23#include <linux/err.h>
23#include <linux/slab.h> 24#include <linux/slab.h>
24#include <linux/preempt.h> 25#include <linux/preempt.h>
@@ -97,6 +98,10 @@ static int target_smt_mode;
97module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); 98module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
98MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); 99MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
99 100
101static bool indep_threads_mode = true;
102module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
103MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
104
100#ifdef CONFIG_KVM_XICS 105#ifdef CONFIG_KVM_XICS
101static struct kernel_param_ops module_param_ops = { 106static struct kernel_param_ops module_param_ops = {
102 .set = param_set_int, 107 .set = param_set_int,
@@ -114,6 +119,7 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
114 119
115static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 120static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
116static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 121static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
122static void kvmppc_setup_partition_table(struct kvm *kvm);
117 123
118static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, 124static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
119 int *ip) 125 int *ip)
@@ -1732,9 +1738,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1732 * MMU mode (radix or HPT), unfortunately, but since we only support 1738 * MMU mode (radix or HPT), unfortunately, but since we only support
1733 * HPT guests on a HPT host so far, that isn't an impediment yet. 1739 * HPT guests on a HPT host so far, that isn't an impediment yet.
1734 */ 1740 */
1735static int threads_per_vcore(void) 1741static int threads_per_vcore(struct kvm *kvm)
1736{ 1742{
1737 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1743 if (kvm->arch.threads_indep)
1738 return 1; 1744 return 1;
1739 return threads_per_subcore; 1745 return threads_per_subcore;
1740} 1746}
@@ -1772,7 +1778,7 @@ static struct debugfs_timings_element {
1772 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, 1778 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
1773}; 1779};
1774 1780
1775#define N_TIMINGS (sizeof(timings) / sizeof(timings[0])) 1781#define N_TIMINGS (ARRAY_SIZE(timings))
1776 1782
1777struct debugfs_timings_state { 1783struct debugfs_timings_state {
1778 struct kvm_vcpu *vcpu; 1784 struct kvm_vcpu *vcpu;
@@ -2117,15 +2123,6 @@ static int kvmppc_grab_hwthread(int cpu)
2117 struct paca_struct *tpaca; 2123 struct paca_struct *tpaca;
2118 long timeout = 10000; 2124 long timeout = 10000;
2119 2125
2120 /*
2121 * ISA v3.0 idle routines do not set hwthread_state or test
2122 * hwthread_req, so they can not grab idle threads.
2123 */
2124 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
2125 WARN(1, "KVM: can not control sibling threads\n");
2126 return -EBUSY;
2127 }
2128
2129 tpaca = &paca[cpu]; 2126 tpaca = &paca[cpu];
2130 2127
2131 /* Ensure the thread won't go into the kernel if it wakes */ 2128 /* Ensure the thread won't go into the kernel if it wakes */
@@ -2160,12 +2157,10 @@ static void kvmppc_release_hwthread(int cpu)
2160 struct paca_struct *tpaca; 2157 struct paca_struct *tpaca;
2161 2158
2162 tpaca = &paca[cpu]; 2159 tpaca = &paca[cpu];
2160 tpaca->kvm_hstate.hwthread_req = 0;
2163 tpaca->kvm_hstate.kvm_vcpu = NULL; 2161 tpaca->kvm_hstate.kvm_vcpu = NULL;
2164 tpaca->kvm_hstate.kvm_vcore = NULL; 2162 tpaca->kvm_hstate.kvm_vcore = NULL;
2165 tpaca->kvm_hstate.kvm_split_mode = NULL; 2163 tpaca->kvm_hstate.kvm_split_mode = NULL;
2166 if (!cpu_has_feature(CPU_FTR_ARCH_300))
2167 tpaca->kvm_hstate.hwthread_req = 0;
2168
2169} 2164}
2170 2165
2171static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) 2166static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
@@ -2237,11 +2232,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
2237 kvmppc_ipi_thread(cpu); 2232 kvmppc_ipi_thread(cpu);
2238} 2233}
2239 2234
2240static void kvmppc_wait_for_nap(void) 2235static void kvmppc_wait_for_nap(int n_threads)
2241{ 2236{
2242 int cpu = smp_processor_id(); 2237 int cpu = smp_processor_id();
2243 int i, loops; 2238 int i, loops;
2244 int n_threads = threads_per_vcore();
2245 2239
2246 if (n_threads <= 1) 2240 if (n_threads <= 1)
2247 return; 2241 return;
@@ -2328,7 +2322,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
2328 2322
2329 vc->vcore_state = VCORE_PREEMPT; 2323 vc->vcore_state = VCORE_PREEMPT;
2330 vc->pcpu = smp_processor_id(); 2324 vc->pcpu = smp_processor_id();
2331 if (vc->num_threads < threads_per_vcore()) { 2325 if (vc->num_threads < threads_per_vcore(vc->kvm)) {
2332 spin_lock(&lp->lock); 2326 spin_lock(&lp->lock);
2333 list_add_tail(&vc->preempt_list, &lp->list); 2327 list_add_tail(&vc->preempt_list, &lp->list);
2334 spin_unlock(&lp->lock); 2328 spin_unlock(&lp->lock);
@@ -2366,7 +2360,7 @@ struct core_info {
2366 2360
2367/* 2361/*
2368 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7 2362 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
2369 * respectively in 2-way micro-threading (split-core) mode. 2363 * respectively in 2-way micro-threading (split-core) mode on POWER8.
2370 */ 2364 */
2371static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 }; 2365static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
2372 2366
@@ -2382,7 +2376,14 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2382 2376
2383static bool subcore_config_ok(int n_subcores, int n_threads) 2377static bool subcore_config_ok(int n_subcores, int n_threads)
2384{ 2378{
2385 /* Can only dynamically split if unsplit to begin with */ 2379 /*
2380 * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
2381 * mode, with one thread per subcore.
2382 */
2383 if (cpu_has_feature(CPU_FTR_ARCH_300))
2384 return n_subcores <= 4 && n_threads == 1;
2385
2386 /* On POWER8, can only dynamically split if unsplit to begin with */
2386 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS) 2387 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2387 return false; 2388 return false;
2388 if (n_subcores > MAX_SUBCORES) 2389 if (n_subcores > MAX_SUBCORES)
@@ -2413,6 +2414,11 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2413 if (!cpu_has_feature(CPU_FTR_ARCH_207S)) 2414 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2414 return false; 2415 return false;
2415 2416
2417 /* POWER9 currently requires all threads to be in the same MMU mode */
2418 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
2419 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
2420 return false;
2421
2416 if (n_threads < cip->max_subcore_threads) 2422 if (n_threads < cip->max_subcore_threads)
2417 n_threads = cip->max_subcore_threads; 2423 n_threads = cip->max_subcore_threads;
2418 if (!subcore_config_ok(cip->n_subcores + 1, n_threads)) 2424 if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
@@ -2638,6 +2644,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2638 int target_threads; 2644 int target_threads;
2639 int controlled_threads; 2645 int controlled_threads;
2640 int trap; 2646 int trap;
2647 bool is_power8;
2648 bool hpt_on_radix;
2641 2649
2642 /* 2650 /*
2643 * Remove from the list any threads that have a signal pending 2651 * Remove from the list any threads that have a signal pending
@@ -2660,15 +2668,19 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2660 * the number of threads per subcore, except on POWER9, 2668 * the number of threads per subcore, except on POWER9,
2661 * where it's 1 because the threads are (mostly) independent. 2669 * where it's 1 because the threads are (mostly) independent.
2662 */ 2670 */
2663 controlled_threads = threads_per_vcore(); 2671 controlled_threads = threads_per_vcore(vc->kvm);
2664 2672
2665 /* 2673 /*
2666 * Make sure we are running on primary threads, and that secondary 2674 * Make sure we are running on primary threads, and that secondary
2667 * threads are offline. Also check if the number of threads in this 2675 * threads are offline. Also check if the number of threads in this
2668 * guest are greater than the current system threads per guest. 2676 * guest are greater than the current system threads per guest.
2677 * On POWER9, we need to be not in independent-threads mode if
2678 * this is a HPT guest on a radix host.
2669 */ 2679 */
2670 if ((controlled_threads > 1) && 2680 hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
2671 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 2681 if (((controlled_threads > 1) &&
2682 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
2683 (hpt_on_radix && vc->kvm->arch.threads_indep)) {
2672 for_each_runnable_thread(i, vcpu, vc) { 2684 for_each_runnable_thread(i, vcpu, vc) {
2673 vcpu->arch.ret = -EBUSY; 2685 vcpu->arch.ret = -EBUSY;
2674 kvmppc_remove_runnable(vc, vcpu); 2686 kvmppc_remove_runnable(vc, vcpu);
@@ -2731,32 +2743,51 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2731 cmd_bit = stat_bit = 0; 2743 cmd_bit = stat_bit = 0;
2732 split = core_info.n_subcores; 2744 split = core_info.n_subcores;
2733 sip = NULL; 2745 sip = NULL;
2734 if (split > 1) { 2746 is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
2735 /* threads_per_subcore must be MAX_SMT_THREADS (8) here */ 2747 && !cpu_has_feature(CPU_FTR_ARCH_300);
2736 if (split == 2 && (dynamic_mt_modes & 2)) { 2748
2737 cmd_bit = HID0_POWER8_1TO2LPAR; 2749 if (split > 1 || hpt_on_radix) {
2738 stat_bit = HID0_POWER8_2LPARMODE;
2739 } else {
2740 split = 4;
2741 cmd_bit = HID0_POWER8_1TO4LPAR;
2742 stat_bit = HID0_POWER8_4LPARMODE;
2743 }
2744 subcore_size = MAX_SMT_THREADS / split;
2745 sip = &split_info; 2750 sip = &split_info;
2746 memset(&split_info, 0, sizeof(split_info)); 2751 memset(&split_info, 0, sizeof(split_info));
2747 split_info.rpr = mfspr(SPRN_RPR);
2748 split_info.pmmar = mfspr(SPRN_PMMAR);
2749 split_info.ldbar = mfspr(SPRN_LDBAR);
2750 split_info.subcore_size = subcore_size;
2751 for (sub = 0; sub < core_info.n_subcores; ++sub) 2752 for (sub = 0; sub < core_info.n_subcores; ++sub)
2752 split_info.vc[sub] = core_info.vc[sub]; 2753 split_info.vc[sub] = core_info.vc[sub];
2754
2755 if (is_power8) {
2756 if (split == 2 && (dynamic_mt_modes & 2)) {
2757 cmd_bit = HID0_POWER8_1TO2LPAR;
2758 stat_bit = HID0_POWER8_2LPARMODE;
2759 } else {
2760 split = 4;
2761 cmd_bit = HID0_POWER8_1TO4LPAR;
2762 stat_bit = HID0_POWER8_4LPARMODE;
2763 }
2764 subcore_size = MAX_SMT_THREADS / split;
2765 split_info.rpr = mfspr(SPRN_RPR);
2766 split_info.pmmar = mfspr(SPRN_PMMAR);
2767 split_info.ldbar = mfspr(SPRN_LDBAR);
2768 split_info.subcore_size = subcore_size;
2769 } else {
2770 split_info.subcore_size = 1;
2771 if (hpt_on_radix) {
2772 /* Use the split_info for LPCR/LPIDR changes */
2773 split_info.lpcr_req = vc->lpcr;
2774 split_info.lpidr_req = vc->kvm->arch.lpid;
2775 split_info.host_lpcr = vc->kvm->arch.host_lpcr;
2776 split_info.do_set = 1;
2777 }
2778 }
2779
2753 /* order writes to split_info before kvm_split_mode pointer */ 2780 /* order writes to split_info before kvm_split_mode pointer */
2754 smp_wmb(); 2781 smp_wmb();
2755 } 2782 }
2756 for (thr = 0; thr < controlled_threads; ++thr) 2783
2784 for (thr = 0; thr < controlled_threads; ++thr) {
2785 paca[pcpu + thr].kvm_hstate.tid = thr;
2786 paca[pcpu + thr].kvm_hstate.napping = 0;
2757 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; 2787 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2788 }
2758 2789
2759 /* Initiate micro-threading (split-core) if required */ 2790 /* Initiate micro-threading (split-core) on POWER8 if required */
2760 if (cmd_bit) { 2791 if (cmd_bit) {
2761 unsigned long hid0 = mfspr(SPRN_HID0); 2792 unsigned long hid0 = mfspr(SPRN_HID0);
2762 2793
@@ -2775,7 +2806,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2775 /* Start all the threads */ 2806 /* Start all the threads */
2776 active = 0; 2807 active = 0;
2777 for (sub = 0; sub < core_info.n_subcores; ++sub) { 2808 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2778 thr = subcore_thread_map[sub]; 2809 thr = is_power8 ? subcore_thread_map[sub] : sub;
2779 thr0_done = false; 2810 thr0_done = false;
2780 active |= 1 << thr; 2811 active |= 1 << thr;
2781 pvc = core_info.vc[sub]; 2812 pvc = core_info.vc[sub];
@@ -2802,18 +2833,20 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2802 * the vcore pointer in the PACA of the secondaries. 2833 * the vcore pointer in the PACA of the secondaries.
2803 */ 2834 */
2804 smp_mb(); 2835 smp_mb();
2805 if (cmd_bit)
2806 split_info.do_nap = 1; /* ask secondaries to nap when done */
2807 2836
2808 /* 2837 /*
2809 * When doing micro-threading, poke the inactive threads as well. 2838 * When doing micro-threading, poke the inactive threads as well.
2810 * This gets them to the nap instruction after kvm_do_nap, 2839 * This gets them to the nap instruction after kvm_do_nap,
2811 * which reduces the time taken to unsplit later. 2840 * which reduces the time taken to unsplit later.
2841 * For POWER9 HPT guest on radix host, we need all the secondary
2842 * threads woken up so they can do the LPCR/LPIDR change.
2812 */ 2843 */
2813 if (split > 1) 2844 if (cmd_bit || hpt_on_radix) {
2845 split_info.do_nap = 1; /* ask secondaries to nap when done */
2814 for (thr = 1; thr < threads_per_subcore; ++thr) 2846 for (thr = 1; thr < threads_per_subcore; ++thr)
2815 if (!(active & (1 << thr))) 2847 if (!(active & (1 << thr)))
2816 kvmppc_ipi_thread(pcpu + thr); 2848 kvmppc_ipi_thread(pcpu + thr);
2849 }
2817 2850
2818 vc->vcore_state = VCORE_RUNNING; 2851 vc->vcore_state = VCORE_RUNNING;
2819 preempt_disable(); 2852 preempt_disable();
@@ -2847,10 +2880,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2847 vc->vcore_state = VCORE_EXITING; 2880 vc->vcore_state = VCORE_EXITING;
2848 2881
2849 /* wait for secondary threads to finish writing their state to memory */ 2882 /* wait for secondary threads to finish writing their state to memory */
2850 kvmppc_wait_for_nap(); 2883 kvmppc_wait_for_nap(controlled_threads);
2851 2884
2852 /* Return to whole-core mode if we split the core earlier */ 2885 /* Return to whole-core mode if we split the core earlier */
2853 if (split > 1) { 2886 if (cmd_bit) {
2854 unsigned long hid0 = mfspr(SPRN_HID0); 2887 unsigned long hid0 = mfspr(SPRN_HID0);
2855 unsigned long loops = 0; 2888 unsigned long loops = 0;
2856 2889
@@ -2866,8 +2899,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2866 cpu_relax(); 2899 cpu_relax();
2867 ++loops; 2900 ++loops;
2868 } 2901 }
2869 split_info.do_nap = 0; 2902 } else if (hpt_on_radix) {
2903 /* Wait for all threads to have seen final sync */
2904 for (thr = 1; thr < controlled_threads; ++thr) {
2905 while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
2906 HMT_low();
2907 barrier();
2908 }
2909 HMT_medium();
2910 }
2870 } 2911 }
2912 split_info.do_nap = 0;
2871 2913
2872 kvmppc_set_host_core(pcpu); 2914 kvmppc_set_host_core(pcpu);
2873 2915
@@ -3208,6 +3250,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3208 unsigned long ebb_regs[3] = {}; /* shut up GCC */ 3250 unsigned long ebb_regs[3] = {}; /* shut up GCC */
3209 unsigned long user_tar = 0; 3251 unsigned long user_tar = 0;
3210 unsigned int user_vrsave; 3252 unsigned int user_vrsave;
3253 struct kvm *kvm;
3211 3254
3212 if (!vcpu->arch.sane) { 3255 if (!vcpu->arch.sane) {
3213 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3256 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -3245,13 +3288,25 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3245 return -EINTR; 3288 return -EINTR;
3246 } 3289 }
3247 3290
3248 atomic_inc(&vcpu->kvm->arch.vcpus_running); 3291 kvm = vcpu->kvm;
3249 /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */ 3292 atomic_inc(&kvm->arch.vcpus_running);
3293 /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
3250 smp_mb(); 3294 smp_mb();
3251 3295
3252 /* On the first time here, set up HTAB and VRMA */ 3296 /* On the first time here, set up MMU if necessary */
3253 if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) { 3297 if (!vcpu->kvm->arch.mmu_ready) {
3254 r = kvmppc_hv_setup_htab_rma(vcpu); 3298 mutex_lock(&kvm->lock);
3299 r = 0;
3300 if (!kvm->arch.mmu_ready) {
3301 if (!kvm_is_radix(vcpu->kvm))
3302 r = kvmppc_hv_setup_htab_rma(vcpu);
3303 if (!r) {
3304 if (cpu_has_feature(CPU_FTR_ARCH_300))
3305 kvmppc_setup_partition_table(kvm);
3306 kvm->arch.mmu_ready = 1;
3307 }
3308 }
3309 mutex_unlock(&kvm->lock);
3255 if (r) 3310 if (r)
3256 goto out; 3311 goto out;
3257 } 3312 }
@@ -3310,22 +3365,21 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3310} 3365}
3311 3366
3312static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, 3367static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
3313 int linux_psize) 3368 int shift, int sllp)
3314{ 3369{
3315 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; 3370 (*sps)->page_shift = shift;
3316 3371 (*sps)->slb_enc = sllp;
3317 if (!def->shift) 3372 (*sps)->enc[0].page_shift = shift;
3318 return; 3373 (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
3319 (*sps)->page_shift = def->shift;
3320 (*sps)->slb_enc = def->sllp;
3321 (*sps)->enc[0].page_shift = def->shift;
3322 (*sps)->enc[0].pte_enc = def->penc[linux_psize];
3323 /* 3374 /*
3324 * Add 16MB MPSS support if host supports it 3375 * Add 16MB MPSS support (may get filtered out by userspace)
3325 */ 3376 */
3326 if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) { 3377 if (shift != 24) {
3327 (*sps)->enc[1].page_shift = 24; 3378 int penc = kvmppc_pgsize_lp_encoding(shift, 24);
3328 (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M]; 3379 if (penc != -1) {
3380 (*sps)->enc[1].page_shift = 24;
3381 (*sps)->enc[1].pte_enc = penc;
3382 }
3329 } 3383 }
3330 (*sps)++; 3384 (*sps)++;
3331} 3385}
@@ -3336,13 +3390,6 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
3336 struct kvm_ppc_one_seg_page_size *sps; 3390 struct kvm_ppc_one_seg_page_size *sps;
3337 3391
3338 /* 3392 /*
3339 * Since we don't yet support HPT guests on a radix host,
3340 * return an error if the host uses radix.
3341 */
3342 if (radix_enabled())
3343 return -EINVAL;
3344
3345 /*
3346 * POWER7, POWER8 and POWER9 all support 32 storage keys for data. 3393 * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
3347 * POWER7 doesn't support keys for instruction accesses, 3394 * POWER7 doesn't support keys for instruction accesses,
3348 * POWER8 and POWER9 do. 3395 * POWER8 and POWER9 do.
@@ -3350,16 +3397,15 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
3350 info->data_keys = 32; 3397 info->data_keys = 32;
3351 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0; 3398 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
3352 3399
3353 info->flags = KVM_PPC_PAGE_SIZES_REAL; 3400 /* POWER7, 8 and 9 all have 1T segments and 32-entry SLB */
3354 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 3401 info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
3355 info->flags |= KVM_PPC_1T_SEGMENTS; 3402 info->slb_size = 32;
3356 info->slb_size = mmu_slb_size;
3357 3403
3358 /* We only support these sizes for now, and no muti-size segments */ 3404 /* We only support these sizes for now, and no muti-size segments */
3359 sps = &info->sps[0]; 3405 sps = &info->sps[0];
3360 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); 3406 kvmppc_add_seg_page_size(&sps, 12, 0);
3361 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); 3407 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
3362 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); 3408 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
3363 3409
3364 return 0; 3410 return 0;
3365} 3411}
@@ -3374,7 +3420,7 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3374 struct kvm_memory_slot *memslot; 3420 struct kvm_memory_slot *memslot;
3375 int i, r; 3421 int i, r;
3376 unsigned long n; 3422 unsigned long n;
3377 unsigned long *buf; 3423 unsigned long *buf, *p;
3378 struct kvm_vcpu *vcpu; 3424 struct kvm_vcpu *vcpu;
3379 3425
3380 mutex_lock(&kvm->slots_lock); 3426 mutex_lock(&kvm->slots_lock);
@@ -3390,8 +3436,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3390 goto out; 3436 goto out;
3391 3437
3392 /* 3438 /*
3393 * Use second half of bitmap area because radix accumulates 3439 * Use second half of bitmap area because both HPT and radix
3394 * bits in the first half. 3440 * accumulate bits in the first half.
3395 */ 3441 */
3396 n = kvm_dirty_bitmap_bytes(memslot); 3442 n = kvm_dirty_bitmap_bytes(memslot);
3397 buf = memslot->dirty_bitmap + n / sizeof(long); 3443 buf = memslot->dirty_bitmap + n / sizeof(long);
@@ -3404,6 +3450,16 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3404 if (r) 3450 if (r)
3405 goto out; 3451 goto out;
3406 3452
3453 /*
3454 * We accumulate dirty bits in the first half of the
3455 * memslot's dirty_bitmap area, for when pages are paged
3456 * out or modified by the host directly. Pick up these
3457 * bits and add them to the map.
3458 */
3459 p = memslot->dirty_bitmap;
3460 for (i = 0; i < n / sizeof(long); ++i)
3461 buf[i] |= xchg(&p[i], 0);
3462
3407 /* Harvest dirty bits from VPA and DTL updates */ 3463 /* Harvest dirty bits from VPA and DTL updates */
3408 /* Note: we never modify the SLB shadow buffer areas */ 3464 /* Note: we never modify the SLB shadow buffer areas */
3409 kvm_for_each_vcpu(i, vcpu, kvm) { 3465 kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3435,15 +3491,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
3435static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 3491static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
3436 unsigned long npages) 3492 unsigned long npages)
3437{ 3493{
3438 /*
3439 * For now, if radix_enabled() then we only support radix guests,
3440 * and in that case we don't need the rmap array.
3441 */
3442 if (radix_enabled()) {
3443 slot->arch.rmap = NULL;
3444 return 0;
3445 }
3446
3447 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 3494 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
3448 if (!slot->arch.rmap) 3495 if (!slot->arch.rmap)
3449 return -ENOMEM; 3496 return -ENOMEM;
@@ -3464,8 +3511,6 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3464 const struct kvm_memory_slot *new) 3511 const struct kvm_memory_slot *new)
3465{ 3512{
3466 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 3513 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
3467 struct kvm_memslots *slots;
3468 struct kvm_memory_slot *memslot;
3469 3514
3470 /* 3515 /*
3471 * If we are making a new memslot, it might make 3516 * If we are making a new memslot, it might make
@@ -3475,18 +3520,6 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3475 */ 3520 */
3476 if (npages) 3521 if (npages)
3477 atomic64_inc(&kvm->arch.mmio_update); 3522 atomic64_inc(&kvm->arch.mmio_update);
3478
3479 if (npages && old->npages && !kvm_is_radix(kvm)) {
3480 /*
3481 * If modifying a memslot, reset all the rmap dirty bits.
3482 * If this is a new memslot, we don't need to do anything
3483 * since the rmap array starts out as all zeroes,
3484 * i.e. no pages are dirty.
3485 */
3486 slots = kvm_memslots(kvm);
3487 memslot = id_to_memslot(slots, mem->slot);
3488 kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL);
3489 }
3490} 3523}
3491 3524
3492/* 3525/*
@@ -3542,6 +3575,10 @@ static void kvmppc_setup_partition_table(struct kvm *kvm)
3542 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); 3575 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
3543} 3576}
3544 3577
3578/*
3579 * Set up HPT (hashed page table) and RMA (real-mode area).
3580 * Must be called with kvm->lock held.
3581 */
3545static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 3582static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3546{ 3583{
3547 int err = 0; 3584 int err = 0;
@@ -3553,10 +3590,6 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3553 unsigned long psize, porder; 3590 unsigned long psize, porder;
3554 int srcu_idx; 3591 int srcu_idx;
3555 3592
3556 mutex_lock(&kvm->lock);
3557 if (kvm->arch.hpte_setup_done)
3558 goto out; /* another vcpu beat us to it */
3559
3560 /* Allocate hashed page table (if not done already) and reset it */ 3593 /* Allocate hashed page table (if not done already) and reset it */
3561 if (!kvm->arch.hpt.virt) { 3594 if (!kvm->arch.hpt.virt) {
3562 int order = KVM_DEFAULT_HPT_ORDER; 3595 int order = KVM_DEFAULT_HPT_ORDER;
@@ -3615,18 +3648,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3615 /* the -4 is to account for senc values starting at 0x10 */ 3648 /* the -4 is to account for senc values starting at 0x10 */
3616 lpcr = senc << (LPCR_VRMASD_SH - 4); 3649 lpcr = senc << (LPCR_VRMASD_SH - 4);
3617 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 3650 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
3618 } else {
3619 kvmppc_setup_partition_table(kvm);
3620 } 3651 }
3621 3652
3622 /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */ 3653 /* Order updates to kvm->arch.lpcr etc. vs. mmu_ready */
3623 smp_wmb(); 3654 smp_wmb();
3624 kvm->arch.hpte_setup_done = 1;
3625 err = 0; 3655 err = 0;
3626 out_srcu: 3656 out_srcu:
3627 srcu_read_unlock(&kvm->srcu, srcu_idx); 3657 srcu_read_unlock(&kvm->srcu, srcu_idx);
3628 out: 3658 out:
3629 mutex_unlock(&kvm->lock);
3630 return err; 3659 return err;
3631 3660
3632 up_out: 3661 up_out:
@@ -3634,6 +3663,34 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3634 goto out_srcu; 3663 goto out_srcu;
3635} 3664}
3636 3665
3666/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
3667int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
3668{
3669 kvmppc_free_radix(kvm);
3670 kvmppc_update_lpcr(kvm, LPCR_VPM1,
3671 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
3672 kvmppc_rmap_reset(kvm);
3673 kvm->arch.radix = 0;
3674 kvm->arch.process_table = 0;
3675 return 0;
3676}
3677
3678/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
3679int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
3680{
3681 int err;
3682
3683 err = kvmppc_init_vm_radix(kvm);
3684 if (err)
3685 return err;
3686
3687 kvmppc_free_hpt(&kvm->arch.hpt);
3688 kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
3689 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
3690 kvm->arch.radix = 1;
3691 return 0;
3692}
3693
3637#ifdef CONFIG_KVM_XICS 3694#ifdef CONFIG_KVM_XICS
3638/* 3695/*
3639 * Allocate a per-core structure for managing state about which cores are 3696 * Allocate a per-core structure for managing state about which cores are
@@ -3777,10 +3834,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3777 } 3834 }
3778 3835
3779 /* 3836 /*
3780 * For now, if the host uses radix, the guest must be radix. 3837 * If the host uses radix, the guest starts out as radix.
3781 */ 3838 */
3782 if (radix_enabled()) { 3839 if (radix_enabled()) {
3783 kvm->arch.radix = 1; 3840 kvm->arch.radix = 1;
3841 kvm->arch.mmu_ready = 1;
3784 lpcr &= ~LPCR_VPM1; 3842 lpcr &= ~LPCR_VPM1;
3785 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; 3843 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
3786 ret = kvmppc_init_vm_radix(kvm); 3844 ret = kvmppc_init_vm_radix(kvm);
@@ -3800,7 +3858,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3800 * Work out how many sets the TLB has, for the use of 3858 * Work out how many sets the TLB has, for the use of
3801 * the TLB invalidation loop in book3s_hv_rmhandlers.S. 3859 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
3802 */ 3860 */
3803 if (kvm_is_radix(kvm)) 3861 if (radix_enabled())
3804 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ 3862 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */
3805 else if (cpu_has_feature(CPU_FTR_ARCH_300)) 3863 else if (cpu_has_feature(CPU_FTR_ARCH_300))
3806 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ 3864 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
@@ -3812,10 +3870,12 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3812 /* 3870 /*
3813 * Track that we now have a HV mode VM active. This blocks secondary 3871 * Track that we now have a HV mode VM active. This blocks secondary
3814 * CPU threads from coming online. 3872 * CPU threads from coming online.
3815 * On POWER9, we only need to do this for HPT guests on a radix 3873 * On POWER9, we only need to do this if the "indep_threads_mode"
3816 * host, which is not yet supported. 3874 * module parameter has been set to N.
3817 */ 3875 */
3818 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 3876 if (cpu_has_feature(CPU_FTR_ARCH_300))
3877 kvm->arch.threads_indep = indep_threads_mode;
3878 if (!kvm->arch.threads_indep)
3819 kvm_hv_vm_activated(); 3879 kvm_hv_vm_activated();
3820 3880
3821 /* 3881 /*
@@ -3855,7 +3915,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
3855{ 3915{
3856 debugfs_remove_recursive(kvm->arch.debugfs_dir); 3916 debugfs_remove_recursive(kvm->arch.debugfs_dir);
3857 3917
3858 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 3918 if (!kvm->arch.threads_indep)
3859 kvm_hv_vm_deactivated(); 3919 kvm_hv_vm_deactivated();
3860 3920
3861 kvmppc_free_vcores(kvm); 3921 kvmppc_free_vcores(kvm);
@@ -4190,6 +4250,7 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4190{ 4250{
4191 unsigned long lpcr; 4251 unsigned long lpcr;
4192 int radix; 4252 int radix;
4253 int err;
4193 4254
4194 /* If not on a POWER9, reject it */ 4255 /* If not on a POWER9, reject it */
4195 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 4256 if (!cpu_has_feature(CPU_FTR_ARCH_300))
@@ -4199,12 +4260,8 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4199 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE)) 4260 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
4200 return -EINVAL; 4261 return -EINVAL;
4201 4262
4202 /* We can't change a guest to/from radix yet */
4203 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
4204 if (radix != kvm_is_radix(kvm))
4205 return -EINVAL;
4206
4207 /* GR (guest radix) bit in process_table field must match */ 4263 /* GR (guest radix) bit in process_table field must match */
4264 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
4208 if (!!(cfg->process_table & PATB_GR) != radix) 4265 if (!!(cfg->process_table & PATB_GR) != radix)
4209 return -EINVAL; 4266 return -EINVAL;
4210 4267
@@ -4212,15 +4269,40 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4212 if ((cfg->process_table & PRTS_MASK) > 24) 4269 if ((cfg->process_table & PRTS_MASK) > 24)
4213 return -EINVAL; 4270 return -EINVAL;
4214 4271
4272 /* We can change a guest to/from radix now, if the host is radix */
4273 if (radix && !radix_enabled())
4274 return -EINVAL;
4275
4215 mutex_lock(&kvm->lock); 4276 mutex_lock(&kvm->lock);
4277 if (radix != kvm_is_radix(kvm)) {
4278 if (kvm->arch.mmu_ready) {
4279 kvm->arch.mmu_ready = 0;
4280 /* order mmu_ready vs. vcpus_running */
4281 smp_mb();
4282 if (atomic_read(&kvm->arch.vcpus_running)) {
4283 kvm->arch.mmu_ready = 1;
4284 err = -EBUSY;
4285 goto out_unlock;
4286 }
4287 }
4288 if (radix)
4289 err = kvmppc_switch_mmu_to_radix(kvm);
4290 else
4291 err = kvmppc_switch_mmu_to_hpt(kvm);
4292 if (err)
4293 goto out_unlock;
4294 }
4295
4216 kvm->arch.process_table = cfg->process_table; 4296 kvm->arch.process_table = cfg->process_table;
4217 kvmppc_setup_partition_table(kvm); 4297 kvmppc_setup_partition_table(kvm);
4218 4298
4219 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; 4299 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
4220 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); 4300 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
4221 mutex_unlock(&kvm->lock); 4301 err = 0;
4222 4302
4223 return 0; 4303 out_unlock:
4304 mutex_unlock(&kvm->lock);
4305 return err;
4224} 4306}
4225 4307
4226static struct kvmppc_ops kvm_ops_hv = { 4308static struct kvmppc_ops kvm_ops_hv = {
@@ -4362,4 +4444,3 @@ module_exit(kvmppc_book3s_exit_hv);
4362MODULE_LICENSE("GPL"); 4444MODULE_LICENSE("GPL");
4363MODULE_ALIAS_MISCDEV(KVM_MINOR); 4445MODULE_ALIAS_MISCDEV(KVM_MINOR);
4364MODULE_ALIAS("devname:kvm"); 4446MODULE_ALIAS("devname:kvm");
4365
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 90644db9d38e..49a2c7825e04 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -278,7 +278,8 @@ void kvmhv_commence_exit(int trap)
278 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; 278 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
279 int ptid = local_paca->kvm_hstate.ptid; 279 int ptid = local_paca->kvm_hstate.ptid;
280 struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode; 280 struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
281 int me, ee, i; 281 int me, ee, i, t;
282 int cpu0;
282 283
283 /* Set our bit in the threads-exiting-guest map in the 0xff00 284 /* Set our bit in the threads-exiting-guest map in the 0xff00
284 bits of vcore->entry_exit_map */ 285 bits of vcore->entry_exit_map */
@@ -320,6 +321,22 @@ void kvmhv_commence_exit(int trap)
320 if ((ee >> 8) == 0) 321 if ((ee >> 8) == 0)
321 kvmhv_interrupt_vcore(vc, ee); 322 kvmhv_interrupt_vcore(vc, ee);
322 } 323 }
324
325 /*
326 * On POWER9 when running a HPT guest on a radix host (sip != NULL),
327 * we have to interrupt inactive CPU threads to get them to
328 * restore the host LPCR value.
329 */
330 if (sip->lpcr_req) {
331 if (cmpxchg(&sip->do_restore, 0, 1) == 0) {
332 vc = local_paca->kvm_hstate.kvm_vcore;
333 cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid;
334 for (t = 1; t < threads_per_core; ++t) {
335 if (sip->napped[t])
336 kvmhv_rm_send_ipi(cpu0 + t);
337 }
338 }
339 }
323} 340}
324 341
325struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; 342struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
@@ -529,6 +546,8 @@ static inline bool is_rm(void)
529 546
530unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) 547unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
531{ 548{
549 if (!kvmppc_xics_enabled(vcpu))
550 return H_TOO_HARD;
532 if (xive_enabled()) { 551 if (xive_enabled()) {
533 if (is_rm()) 552 if (is_rm())
534 return xive_rm_h_xirr(vcpu); 553 return xive_rm_h_xirr(vcpu);
@@ -541,6 +560,8 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
541 560
542unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) 561unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
543{ 562{
563 if (!kvmppc_xics_enabled(vcpu))
564 return H_TOO_HARD;
544 vcpu->arch.gpr[5] = get_tb(); 565 vcpu->arch.gpr[5] = get_tb();
545 if (xive_enabled()) { 566 if (xive_enabled()) {
546 if (is_rm()) 567 if (is_rm())
@@ -554,6 +575,8 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
554 575
555unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) 576unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
556{ 577{
578 if (!kvmppc_xics_enabled(vcpu))
579 return H_TOO_HARD;
557 if (xive_enabled()) { 580 if (xive_enabled()) {
558 if (is_rm()) 581 if (is_rm())
559 return xive_rm_h_ipoll(vcpu, server); 582 return xive_rm_h_ipoll(vcpu, server);
@@ -567,6 +590,8 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
567int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, 590int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
568 unsigned long mfrr) 591 unsigned long mfrr)
569{ 592{
593 if (!kvmppc_xics_enabled(vcpu))
594 return H_TOO_HARD;
570 if (xive_enabled()) { 595 if (xive_enabled()) {
571 if (is_rm()) 596 if (is_rm())
572 return xive_rm_h_ipi(vcpu, server, mfrr); 597 return xive_rm_h_ipi(vcpu, server, mfrr);
@@ -579,6 +604,8 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
579 604
580int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) 605int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
581{ 606{
607 if (!kvmppc_xics_enabled(vcpu))
608 return H_TOO_HARD;
582 if (xive_enabled()) { 609 if (xive_enabled()) {
583 if (is_rm()) 610 if (is_rm())
584 return xive_rm_h_cppr(vcpu, cppr); 611 return xive_rm_h_cppr(vcpu, cppr);
@@ -591,6 +618,8 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
591 618
592int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) 619int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
593{ 620{
621 if (!kvmppc_xics_enabled(vcpu))
622 return H_TOO_HARD;
594 if (xive_enabled()) { 623 if (xive_enabled()) {
595 if (is_rm()) 624 if (is_rm())
596 return xive_rm_h_eoi(vcpu, xirr); 625 return xive_rm_h_eoi(vcpu, xirr);
@@ -601,3 +630,89 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
601 return xics_rm_h_eoi(vcpu, xirr); 630 return xics_rm_h_eoi(vcpu, xirr);
602} 631}
603#endif /* CONFIG_KVM_XICS */ 632#endif /* CONFIG_KVM_XICS */
633
634void kvmppc_bad_interrupt(struct pt_regs *regs)
635{
636 die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
637 panic("Bad KVM trap");
638}
639
640/*
641 * Functions used to switch LPCR HR and UPRT bits on all threads
642 * when entering and exiting HPT guests on a radix host.
643 */
644
645#define PHASE_REALMODE 1 /* in real mode */
646#define PHASE_SET_LPCR 2 /* have set LPCR */
647#define PHASE_OUT_OF_GUEST 4 /* have finished executing in guest */
648#define PHASE_RESET_LPCR 8 /* have reset LPCR to host value */
649
650#define ALL(p) (((p) << 24) | ((p) << 16) | ((p) << 8) | (p))
651
652static void wait_for_sync(struct kvm_split_mode *sip, int phase)
653{
654 int thr = local_paca->kvm_hstate.tid;
655
656 sip->lpcr_sync.phase[thr] |= phase;
657 phase = ALL(phase);
658 while ((sip->lpcr_sync.allphases & phase) != phase) {
659 HMT_low();
660 barrier();
661 }
662 HMT_medium();
663}
664
665void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip)
666{
667 unsigned long rb, set;
668
669 /* wait for every other thread to get to real mode */
670 wait_for_sync(sip, PHASE_REALMODE);
671
672 /* Set LPCR and LPIDR */
673 mtspr(SPRN_LPCR, sip->lpcr_req);
674 mtspr(SPRN_LPID, sip->lpidr_req);
675 isync();
676
677 /* Invalidate the TLB on thread 0 */
678 if (local_paca->kvm_hstate.tid == 0) {
679 sip->do_set = 0;
680 asm volatile("ptesync" : : : "memory");
681 for (set = 0; set < POWER9_TLB_SETS_RADIX; ++set) {
682 rb = TLBIEL_INVAL_SET_LPID +
683 (set << TLBIEL_INVAL_SET_SHIFT);
684 asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : :
685 "r" (rb), "r" (0));
686 }
687 asm volatile("ptesync" : : : "memory");
688 }
689
690 /* indicate that we have done so and wait for others */
691 wait_for_sync(sip, PHASE_SET_LPCR);
692 /* order read of sip->lpcr_sync.allphases vs. sip->do_set */
693 smp_rmb();
694}
695
696/*
697 * Called when a thread that has been in the guest needs
698 * to reload the host LPCR value - but only on POWER9 when
699 * running a HPT guest on a radix host.
700 */
701void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
702{
703 /* we're out of the guest... */
704 wait_for_sync(sip, PHASE_OUT_OF_GUEST);
705
706 mtspr(SPRN_LPID, 0);
707 mtspr(SPRN_LPCR, sip->host_lpcr);
708 isync();
709
710 if (local_paca->kvm_hstate.tid == 0) {
711 sip->do_restore = 0;
712 smp_wmb(); /* order store of do_restore vs. phase */
713 }
714
715 wait_for_sync(sip, PHASE_RESET_LPCR);
716 smp_mb();
717 local_paca->kvm_hstate.kvm_split_mode = NULL;
718}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 4efe364f1188..26c11f678fbf 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -107,30 +107,50 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
107} 107}
108EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 108EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
109 109
110/* Update the changed page order field of an rmap entry */ 110/* Update the dirty bitmap of a memslot */
111void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize) 111void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
112 unsigned long gfn, unsigned long psize)
112{ 113{
113 unsigned long order; 114 unsigned long npages;
114 115
115 if (!psize) 116 if (!psize || !memslot->dirty_bitmap)
116 return; 117 return;
117 order = ilog2(psize); 118 npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
118 order <<= KVMPPC_RMAP_CHG_SHIFT; 119 gfn -= memslot->base_gfn;
119 if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER)) 120 set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
120 *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order; 121}
122EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
123
124static void kvmppc_set_dirty_from_hpte(struct kvm *kvm,
125 unsigned long hpte_v, unsigned long hpte_gr)
126{
127 struct kvm_memory_slot *memslot;
128 unsigned long gfn;
129 unsigned long psize;
130
131 psize = kvmppc_actual_pgsz(hpte_v, hpte_gr);
132 gfn = hpte_rpn(hpte_gr, psize);
133 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
134 if (memslot && memslot->dirty_bitmap)
135 kvmppc_update_dirty_map(memslot, gfn, psize);
121} 136}
122EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
123 137
124/* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 138/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
125static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 139static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
126 unsigned long hpte_gr) 140 unsigned long hpte_gr,
141 struct kvm_memory_slot **memslotp,
142 unsigned long *gfnp)
127{ 143{
128 struct kvm_memory_slot *memslot; 144 struct kvm_memory_slot *memslot;
129 unsigned long *rmap; 145 unsigned long *rmap;
130 unsigned long gfn; 146 unsigned long gfn;
131 147
132 gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr)); 148 gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
133 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 149 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
150 if (memslotp)
151 *memslotp = memslot;
152 if (gfnp)
153 *gfnp = gfn;
134 if (!memslot) 154 if (!memslot)
135 return NULL; 155 return NULL;
136 156
@@ -147,10 +167,12 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
147 unsigned long ptel, head; 167 unsigned long ptel, head;
148 unsigned long *rmap; 168 unsigned long *rmap;
149 unsigned long rcbits; 169 unsigned long rcbits;
170 struct kvm_memory_slot *memslot;
171 unsigned long gfn;
150 172
151 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 173 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
152 ptel = rev->guest_rpte |= rcbits; 174 ptel = rev->guest_rpte |= rcbits;
153 rmap = revmap_for_hpte(kvm, hpte_v, ptel); 175 rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn);
154 if (!rmap) 176 if (!rmap)
155 return; 177 return;
156 lock_rmap(rmap); 178 lock_rmap(rmap);
@@ -169,7 +191,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
169 } 191 }
170 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 192 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
171 if (rcbits & HPTE_R_C) 193 if (rcbits & HPTE_R_C)
172 kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r)); 194 kvmppc_update_dirty_map(memslot, gfn,
195 kvmppc_actual_pgsz(hpte_v, hpte_r));
173 unlock_rmap(rmap); 196 unlock_rmap(rmap);
174} 197}
175 198
@@ -193,7 +216,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
193 216
194 if (kvm_is_radix(kvm)) 217 if (kvm_is_radix(kvm))
195 return H_FUNCTION; 218 return H_FUNCTION;
196 psize = hpte_page_size(pteh, ptel); 219 psize = kvmppc_actual_pgsz(pteh, ptel);
197 if (!psize) 220 if (!psize)
198 return H_PARAMETER; 221 return H_PARAMETER;
199 writing = hpte_is_writable(ptel); 222 writing = hpte_is_writable(ptel);
@@ -797,7 +820,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
797 gr |= r & (HPTE_R_R | HPTE_R_C); 820 gr |= r & (HPTE_R_R | HPTE_R_C);
798 if (r & HPTE_R_R) { 821 if (r & HPTE_R_R) {
799 kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 822 kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
800 rmap = revmap_for_hpte(kvm, v, gr); 823 rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL);
801 if (rmap) { 824 if (rmap) {
802 lock_rmap(rmap); 825 lock_rmap(rmap);
803 *rmap |= KVMPPC_RMAP_REFERENCED; 826 *rmap |= KVMPPC_RMAP_REFERENCED;
@@ -819,7 +842,6 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
819 __be64 *hpte; 842 __be64 *hpte;
820 unsigned long v, r, gr; 843 unsigned long v, r, gr;
821 struct revmap_entry *rev; 844 struct revmap_entry *rev;
822 unsigned long *rmap;
823 long ret = H_NOT_FOUND; 845 long ret = H_NOT_FOUND;
824 846
825 if (kvm_is_radix(kvm)) 847 if (kvm_is_radix(kvm))
@@ -848,16 +870,9 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
848 r = be64_to_cpu(hpte[1]); 870 r = be64_to_cpu(hpte[1]);
849 gr |= r & (HPTE_R_R | HPTE_R_C); 871 gr |= r & (HPTE_R_R | HPTE_R_C);
850 if (r & HPTE_R_C) { 872 if (r & HPTE_R_C) {
851 unsigned long psize = hpte_page_size(v, r);
852 hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 873 hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
853 eieio(); 874 eieio();
854 rmap = revmap_for_hpte(kvm, v, gr); 875 kvmppc_set_dirty_from_hpte(kvm, v, gr);
855 if (rmap) {
856 lock_rmap(rmap);
857 *rmap |= KVMPPC_RMAP_CHANGED;
858 kvmppc_update_rmap_change(rmap, psize);
859 unlock_rmap(rmap);
860 }
861 } 876 }
862 } 877 }
863 vcpu->arch.gpr[4] = gr; 878 vcpu->arch.gpr[4] = gr;
@@ -1014,7 +1029,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
1014 * Check the HPTE again, including base page size 1029 * Check the HPTE again, including base page size
1015 */ 1030 */
1016 if ((v & valid) && (v & mask) == val && 1031 if ((v & valid) && (v & mask) == val &&
1017 hpte_base_page_size(v, r) == (1ul << pshift)) 1032 kvmppc_hpte_base_page_shift(v, r) == pshift)
1018 /* Return with the HPTE still locked */ 1033 /* Return with the HPTE still locked */
1019 return (hash << 3) + (i >> 1); 1034 return (hash << 3) + (i >> 1);
1020 1035
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ec69fa45d5a2..7add18930e6d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -31,6 +31,7 @@
31#include <asm/tm.h> 31#include <asm/tm.h>
32#include <asm/opal.h> 32#include <asm/opal.h>
33#include <asm/xive-regs.h> 33#include <asm/xive-regs.h>
34#include <asm/thread_info.h>
34 35
35/* Sign-extend HDEC if not on POWER9 */ 36/* Sign-extend HDEC if not on POWER9 */
36#define EXTEND_HDEC(reg) \ 37#define EXTEND_HDEC(reg) \
@@ -81,6 +82,19 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
81 RFI 82 RFI
82 83
83kvmppc_call_hv_entry: 84kvmppc_call_hv_entry:
85BEGIN_FTR_SECTION
86 /* On P9, do LPCR setting, if necessary */
87 ld r3, HSTATE_SPLIT_MODE(r13)
88 cmpdi r3, 0
89 beq 46f
90 lwz r4, KVM_SPLIT_DO_SET(r3)
91 cmpwi r4, 0
92 beq 46f
93 bl kvmhv_p9_set_lpcr
94 nop
9546:
96END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
97
84 ld r4, HSTATE_KVM_VCPU(r13) 98 ld r4, HSTATE_KVM_VCPU(r13)
85 bl kvmppc_hv_entry 99 bl kvmppc_hv_entry
86 100
@@ -149,11 +163,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
149 subf r4, r4, r3 163 subf r4, r4, r3
150 mtspr SPRN_DEC, r4 164 mtspr SPRN_DEC, r4
151 165
152BEGIN_FTR_SECTION
153 /* hwthread_req may have got set by cede or no vcpu, so clear it */ 166 /* hwthread_req may have got set by cede or no vcpu, so clear it */
154 li r0, 0 167 li r0, 0
155 stb r0, HSTATE_HWTHREAD_REQ(r13) 168 stb r0, HSTATE_HWTHREAD_REQ(r13)
156END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
157 169
158 /* 170 /*
159 * For external interrupts we need to call the Linux 171 * For external interrupts we need to call the Linux
@@ -316,7 +328,6 @@ kvm_novcpu_exit:
316 * Relocation is off and most register values are lost. 328 * Relocation is off and most register values are lost.
317 * r13 points to the PACA. 329 * r13 points to the PACA.
318 * r3 contains the SRR1 wakeup value, SRR1 is trashed. 330 * r3 contains the SRR1 wakeup value, SRR1 is trashed.
319 * This is not used by ISAv3.0B processors.
320 */ 331 */
321 .globl kvm_start_guest 332 .globl kvm_start_guest
322kvm_start_guest: 333kvm_start_guest:
@@ -390,6 +401,7 @@ kvm_secondary_got_guest:
390 ld r6, HSTATE_SPLIT_MODE(r13) 401 ld r6, HSTATE_SPLIT_MODE(r13)
391 cmpdi r6, 0 402 cmpdi r6, 0
392 beq 63f 403 beq 63f
404BEGIN_FTR_SECTION
393 ld r0, KVM_SPLIT_RPR(r6) 405 ld r0, KVM_SPLIT_RPR(r6)
394 mtspr SPRN_RPR, r0 406 mtspr SPRN_RPR, r0
395 ld r0, KVM_SPLIT_PMMAR(r6) 407 ld r0, KVM_SPLIT_PMMAR(r6)
@@ -397,6 +409,15 @@ kvm_secondary_got_guest:
397 ld r0, KVM_SPLIT_LDBAR(r6) 409 ld r0, KVM_SPLIT_LDBAR(r6)
398 mtspr SPRN_LDBAR, r0 410 mtspr SPRN_LDBAR, r0
399 isync 411 isync
412FTR_SECTION_ELSE
413 /* On P9 we use the split_info for coordinating LPCR changes */
414 lwz r4, KVM_SPLIT_DO_SET(r6)
415 cmpwi r4, 0
416 beq 63f
417 mr r3, r6
418 bl kvmhv_p9_set_lpcr
419 nop
420ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
40063: 42163:
401 /* Order load of vcpu after load of vcore */ 422 /* Order load of vcpu after load of vcore */
402 lwsync 423 lwsync
@@ -435,9 +456,6 @@ kvm_secondary_got_guest:
435 * While waiting we also need to check if we get given a vcpu to run. 456 * While waiting we also need to check if we get given a vcpu to run.
436 */ 457 */
437kvm_no_guest: 458kvm_no_guest:
438BEGIN_FTR_SECTION
439 twi 31,0,0
440END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
441 lbz r3, HSTATE_HWTHREAD_REQ(r13) 459 lbz r3, HSTATE_HWTHREAD_REQ(r13)
442 cmpwi r3, 0 460 cmpwi r3, 0
443 bne 53f 461 bne 53f
@@ -470,6 +488,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
470 ld r3, HSTATE_SPLIT_MODE(r13) 488 ld r3, HSTATE_SPLIT_MODE(r13)
471 cmpdi r3, 0 489 cmpdi r3, 0
472 beq kvm_no_guest 490 beq kvm_no_guest
491 lwz r0, KVM_SPLIT_DO_SET(r3)
492 cmpwi r0, 0
493 bne kvmhv_do_set
494 lwz r0, KVM_SPLIT_DO_RESTORE(r3)
495 cmpwi r0, 0
496 bne kvmhv_do_restore
473 lbz r0, KVM_SPLIT_DO_NAP(r3) 497 lbz r0, KVM_SPLIT_DO_NAP(r3)
474 cmpwi r0, 0 498 cmpwi r0, 0
475 beq kvm_no_guest 499 beq kvm_no_guest
@@ -482,6 +506,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
482 stb r0, HSTATE_HWTHREAD_STATE(r13) 506 stb r0, HSTATE_HWTHREAD_STATE(r13)
483 b kvm_no_guest 507 b kvm_no_guest
484 508
509kvmhv_do_set:
510 /* Set LPCR, LPIDR etc. on P9 */
511 HMT_MEDIUM
512 bl kvmhv_p9_set_lpcr
513 nop
514 b kvm_no_guest
515
516kvmhv_do_restore:
517 HMT_MEDIUM
518 bl kvmhv_p9_restore_lpcr
519 nop
520 b kvm_no_guest
521
485/* 522/*
486 * Here the primary thread is trying to return the core to 523 * Here the primary thread is trying to return the core to
487 * whole-core mode, so we need to nap. 524 * whole-core mode, so we need to nap.
@@ -519,8 +556,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
519 /* Set kvm_split_mode.napped[tid] = 1 */ 556 /* Set kvm_split_mode.napped[tid] = 1 */
520 ld r3, HSTATE_SPLIT_MODE(r13) 557 ld r3, HSTATE_SPLIT_MODE(r13)
521 li r0, 1 558 li r0, 1
522 lhz r4, PACAPACAINDEX(r13) 559 lbz r4, HSTATE_TID(r13)
523 clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
524 addi r4, r4, KVM_SPLIT_NAPPED 560 addi r4, r4, KVM_SPLIT_NAPPED
525 stbx r0, r3, r4 561 stbx r0, r3, r4
526 /* Check the do_nap flag again after setting napped[] */ 562 /* Check the do_nap flag again after setting napped[] */
@@ -1914,10 +1950,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
191419: lis r8,0x7fff /* MAX_INT@h */ 195019: lis r8,0x7fff /* MAX_INT@h */
1915 mtspr SPRN_HDEC,r8 1951 mtspr SPRN_HDEC,r8
1916 1952
191716: ld r8,KVM_HOST_LPCR(r4) 195316:
1954BEGIN_FTR_SECTION
1955 /* On POWER9 with HPT-on-radix we need to wait for all other threads */
1956 ld r3, HSTATE_SPLIT_MODE(r13)
1957 cmpdi r3, 0
1958 beq 47f
1959 lwz r8, KVM_SPLIT_DO_RESTORE(r3)
1960 cmpwi r8, 0
1961 beq 47f
1962 stw r12, STACK_SLOT_TRAP(r1)
1963 bl kvmhv_p9_restore_lpcr
1964 nop
1965 lwz r12, STACK_SLOT_TRAP(r1)
1966 b 48f
196747:
1968END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1969 ld r8,KVM_HOST_LPCR(r4)
1918 mtspr SPRN_LPCR,r8 1970 mtspr SPRN_LPCR,r8
1919 isync 1971 isync
1920 197248:
1921 /* load host SLB entries */ 1973 /* load host SLB entries */
1922BEGIN_MMU_FTR_SECTION 1974BEGIN_MMU_FTR_SECTION
1923 b 0f 1975 b 0f
@@ -2543,10 +2595,8 @@ kvm_do_nap:
2543 clrrdi r0, r0, 1 2595 clrrdi r0, r0, 1
2544 mtspr SPRN_CTRLT, r0 2596 mtspr SPRN_CTRLT, r0
2545 2597
2546BEGIN_FTR_SECTION
2547 li r0,1 2598 li r0,1
2548 stb r0,HSTATE_HWTHREAD_REQ(r13) 2599 stb r0,HSTATE_HWTHREAD_REQ(r13)
2549END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
2550 mfspr r5,SPRN_LPCR 2600 mfspr r5,SPRN_LPCR
2551 ori r5,r5,LPCR_PECE0 | LPCR_PECE1 2601 ori r5,r5,LPCR_PECE0 | LPCR_PECE1
2552BEGIN_FTR_SECTION 2602BEGIN_FTR_SECTION
@@ -3134,10 +3184,139 @@ kvmppc_restore_tm:
3134/* 3184/*
3135 * We come here if we get any exception or interrupt while we are 3185 * We come here if we get any exception or interrupt while we are
3136 * executing host real mode code while in guest MMU context. 3186 * executing host real mode code while in guest MMU context.
3137 * For now just spin, but we should do something better. 3187 * r12 is (CR << 32) | vector
3188 * r13 points to our PACA
3189 * r12 is saved in HSTATE_SCRATCH0(r13)
3190 * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE
3191 * r9 is saved in HSTATE_SCRATCH2(r13)
3192 * r13 is saved in HSPRG1
3193 * cfar is saved in HSTATE_CFAR(r13)
3194 * ppr is saved in HSTATE_PPR(r13)
3138 */ 3195 */
3139kvmppc_bad_host_intr: 3196kvmppc_bad_host_intr:
3197 /*
3198 * Switch to the emergency stack, but start half-way down in
3199 * case we were already on it.
3200 */
3201 mr r9, r1
3202 std r1, PACAR1(r13)
3203 ld r1, PACAEMERGSP(r13)
3204 subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
3205 std r9, 0(r1)
3206 std r0, GPR0(r1)
3207 std r9, GPR1(r1)
3208 std r2, GPR2(r1)
3209 SAVE_4GPRS(3, r1)
3210 SAVE_2GPRS(7, r1)
3211 srdi r0, r12, 32
3212 clrldi r12, r12, 32
3213 std r0, _CCR(r1)
3214 std r12, _TRAP(r1)
3215 andi. r0, r12, 2
3216 beq 1f
3217 mfspr r3, SPRN_HSRR0
3218 mfspr r4, SPRN_HSRR1
3219 mfspr r5, SPRN_HDAR
3220 mfspr r6, SPRN_HDSISR
3221 b 2f
32221: mfspr r3, SPRN_SRR0
3223 mfspr r4, SPRN_SRR1
3224 mfspr r5, SPRN_DAR
3225 mfspr r6, SPRN_DSISR
32262: std r3, _NIP(r1)
3227 std r4, _MSR(r1)
3228 std r5, _DAR(r1)
3229 std r6, _DSISR(r1)
3230 ld r9, HSTATE_SCRATCH2(r13)
3231 ld r12, HSTATE_SCRATCH0(r13)
3232 GET_SCRATCH0(r0)
3233 SAVE_4GPRS(9, r1)
3234 std r0, GPR13(r1)
3235 SAVE_NVGPRS(r1)
3236 ld r5, HSTATE_CFAR(r13)
3237 std r5, ORIG_GPR3(r1)
3238 mflr r3
3239#ifdef CONFIG_RELOCATABLE
3240 ld r4, HSTATE_SCRATCH1(r13)
3241#else
3242 mfctr r4
3243#endif
3244 mfxer r5
3245 lbz r6, PACASOFTIRQEN(r13)
3246 std r3, _LINK(r1)
3247 std r4, _CTR(r1)
3248 std r5, _XER(r1)
3249 std r6, SOFTE(r1)
3250 ld r2, PACATOC(r13)
3251 LOAD_REG_IMMEDIATE(3, 0x7265677368657265)
3252 std r3, STACK_FRAME_OVERHEAD-16(r1)
3253
3254 /*
3255 * On POWER9 do a minimal restore of the MMU and call C code,
3256 * which will print a message and panic.
3257 * XXX On POWER7 and POWER8, we just spin here since we don't
3258 * know what the other threads are doing (and we don't want to
3259 * coordinate with them) - but at least we now have register state
3260 * in memory that we might be able to look at from another CPU.
3261 */
3262BEGIN_FTR_SECTION
3140 b . 3263 b .
3264END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
3265 ld r9, HSTATE_KVM_VCPU(r13)
3266 ld r10, VCPU_KVM(r9)
3267
3268 li r0, 0
3269 mtspr SPRN_AMR, r0
3270 mtspr SPRN_IAMR, r0
3271 mtspr SPRN_CIABR, r0
3272 mtspr SPRN_DAWRX, r0
3273
3274 /* Flush the ERAT on radix P9 DD1 guest exit */
3275BEGIN_FTR_SECTION
3276 PPC_INVALIDATE_ERAT
3277END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
3278
3279BEGIN_MMU_FTR_SECTION
3280 b 4f
3281END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3282
3283 slbmte r0, r0
3284 slbia
3285 ptesync
3286 ld r8, PACA_SLBSHADOWPTR(r13)
3287 .rept SLB_NUM_BOLTED
3288 li r3, SLBSHADOW_SAVEAREA
3289 LDX_BE r5, r8, r3
3290 addi r3, r3, 8
3291 LDX_BE r6, r8, r3
3292 andis. r7, r5, SLB_ESID_V@h
3293 beq 3f
3294 slbmte r6, r5
32953: addi r8, r8, 16
3296 .endr
3297
32984: lwz r7, KVM_HOST_LPID(r10)
3299 mtspr SPRN_LPID, r7
3300 mtspr SPRN_PID, r0
3301 ld r8, KVM_HOST_LPCR(r10)
3302 mtspr SPRN_LPCR, r8
3303 isync
3304 li r0, KVM_GUEST_MODE_NONE
3305 stb r0, HSTATE_IN_GUEST(r13)
3306
3307 /*
3308 * Turn on the MMU and jump to C code
3309 */
3310 bcl 20, 31, .+4
33115: mflr r3
3312 addi r3, r3, 9f - 5b
3313 ld r4, PACAKMSR(r13)
3314 mtspr SPRN_SRR0, r3
3315 mtspr SPRN_SRR1, r4
3316 rfid
33179: addi r3, r1, STACK_FRAME_OVERHEAD
3318 bl kvmppc_bad_interrupt
3319 b 9b
3141 3320
3142/* 3321/*
3143 * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken 3322 * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 69a09444d46e..d0dc8624198f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1326,12 +1326,22 @@ static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
1326 kvmppc_set_pvr_pr(vcpu, sregs->pvr); 1326 kvmppc_set_pvr_pr(vcpu, sregs->pvr);
1327 1327
1328 vcpu3s->sdr1 = sregs->u.s.sdr1; 1328 vcpu3s->sdr1 = sregs->u.s.sdr1;
1329#ifdef CONFIG_PPC_BOOK3S_64
1329 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1330 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1331 /* Flush all SLB entries */
1332 vcpu->arch.mmu.slbmte(vcpu, 0, 0);
1333 vcpu->arch.mmu.slbia(vcpu);
1334
1330 for (i = 0; i < 64; i++) { 1335 for (i = 0; i < 64; i++) {
1331 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, 1336 u64 rb = sregs->u.s.ppc64.slb[i].slbe;
1332 sregs->u.s.ppc64.slb[i].slbe); 1337 u64 rs = sregs->u.s.ppc64.slb[i].slbv;
1338
1339 if (rb & SLB_ESID_V)
1340 vcpu->arch.mmu.slbmte(vcpu, rs, rb);
1333 } 1341 }
1334 } else { 1342 } else
1343#endif
1344 {
1335 for (i = 0; i < 16; i++) { 1345 for (i = 0; i < 16; i++) {
1336 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); 1346 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
1337 } 1347 }
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 8a4205fa774f..dae3be5ff42b 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -419,6 +419,8 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
419 case H_PROTECT: 419 case H_PROTECT:
420 case H_BULK_REMOVE: 420 case H_BULK_REMOVE:
421 case H_PUT_TCE: 421 case H_PUT_TCE:
422 case H_PUT_TCE_INDIRECT:
423 case H_STUFF_TCE:
422 case H_CEDE: 424 case H_CEDE:
423 case H_LOGICAL_CI_LOAD: 425 case H_LOGICAL_CI_LOAD:
424 case H_LOGICAL_CI_STORE: 426 case H_LOGICAL_CI_STORE:
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index c6c734424c70..423b21393bc9 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -377,7 +377,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
377 377
378 start = vma->vm_pgoff; 378 start = vma->vm_pgoff;
379 end = start + 379 end = start +
380 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); 380 vma_pages(vma);
381 381
382 pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); 382 pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
383 383
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3480faaf1ef8..a0b7f094de78 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -590,8 +590,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
590 r = !!(hv_enabled && radix_enabled()); 590 r = !!(hv_enabled && radix_enabled());
591 break; 591 break;
592 case KVM_CAP_PPC_MMU_HASH_V3: 592 case KVM_CAP_PPC_MMU_HASH_V3:
593 r = !!(hv_enabled && !radix_enabled() && 593 r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300));
594 cpu_has_feature(CPU_FTR_ARCH_300));
595 break; 594 break;
596#endif 595#endif
597 case KVM_CAP_SYNC_MMU: 596 case KVM_CAP_SYNC_MMU:
@@ -644,8 +643,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
644 break; 643 break;
645#endif 644#endif
646 case KVM_CAP_PPC_HTM: 645 case KVM_CAP_PPC_HTM:
647 r = cpu_has_feature(CPU_FTR_TM_COMP) && 646 r = is_kvmppc_hv_enabled(kvm) &&
648 is_kvmppc_hv_enabled(kvm); 647 (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
649 break; 648 break;
650 default: 649 default:
651 r = 0; 650 r = 0;