aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2013-09-20 00:52:44 -0400
committerAlexander Graf <agraf@suse.de>2013-10-17 08:45:03 -0400
commita4a0f2524acc2c602cadd8e743be19d86f3a746b (patch)
tree582fc20dbf59fbbc1eddc2356b1704e5df2085f2 /arch/powerpc/kvm
parenta2d56020d1d91934e7bb3e7c8a5a3b5921ce121b (diff)
KVM: PPC: Book3S PR: Allow guest to use 64k pages
This adds the code to interpret 64k HPTEs in the guest hashed page table (HPT), 64k SLB entries, and to tell the guest about 64k pages in kvm_vm_ioctl_get_smmu_info(). Guest 64k pages are still shadowed by 4k pages. This also adds another hash table to the four we have already in book3s_mmu_hpte.c to allow us to find all the PTEs that we have instantiated that match a given 64k guest page. The tlbie instruction changed starting with POWER6 to use a bit in the RB operand to indicate large page invalidations, and to use other RB bits to indicate the base and actual page sizes and the segment size. 64k pages came in slightly earlier, with POWER5++. We use one bit in vcpu->arch.hflags to indicate that the emulated cpu supports 64k pages, and another to indicate that it has the new tlbie definition. The KVM_PPC_GET_SMMU_INFO ioctl presents a bit of a problem, because the MMU capabilities depend on which CPU model we're emulating, but it is a VM ioctl not a VCPU ioctl and therefore doesn't get passed a VCPU fd. In addition, commonly-used userspace (QEMU) calls it before setting the PVR for any VCPU. Therefore, as a best effort we look at the first vcpu in the VM and return 64k pages or not depending on its capabilities. We also make the PVR default to the host PVR on recent CPUs that support 1TB segments (and therefore multiple page sizes as well) so that KVM_PPC_GET_SMMU_INFO will include 64k page and 1TB segment support on those CPUs. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c92
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c50
-rw-r--r--arch/powerpc/kvm/book3s_pr.c58
3 files changed, 185 insertions, 15 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 7e345e00661a..8277264a0bc5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
107 return kvmppc_slb_calc_vpn(slb, eaddr); 107 return kvmppc_slb_calc_vpn(slb, eaddr);
108} 108}
109 109
110static int mmu_pagesize(int mmu_pg)
111{
112 switch (mmu_pg) {
113 case MMU_PAGE_64K:
114 return 16;
115 case MMU_PAGE_16M:
116 return 24;
117 }
118 return 12;
119}
120
110static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) 121static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
111{ 122{
112 return slbe->large ? 24 : 12; 123 return mmu_pagesize(slbe->base_page_size);
113} 124}
114 125
115static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) 126static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
@@ -166,14 +177,34 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
166 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); 177 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
167 avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); 178 avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
168 179
169 if (p < 24) 180 if (p < 16)
170 avpn >>= ((80 - p) - 56) - 8; 181 avpn >>= ((80 - p) - 56) - 8; /* 16 - p */
171 else 182 else
172 avpn <<= 8; 183 avpn <<= p - 16;
173 184
174 return avpn; 185 return avpn;
175} 186}
176 187
188/*
189 * Return page size encoded in the second word of a HPTE, or
190 * -1 for an invalid encoding for the base page size indicated by
191 * the SLB entry. This doesn't handle mixed pagesize segments yet.
192 */
193static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
194{
195 switch (slbe->base_page_size) {
196 case MMU_PAGE_64K:
197 if ((r & 0xf000) == 0x1000)
198 return MMU_PAGE_64K;
199 break;
200 case MMU_PAGE_16M:
201 if ((r & 0xff000) == 0)
202 return MMU_PAGE_16M;
203 break;
204 }
205 return -1;
206}
207
177static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 208static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
178 struct kvmppc_pte *gpte, bool data) 209 struct kvmppc_pte *gpte, bool data)
179{ 210{
@@ -189,6 +220,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
189 u8 pp, key = 0; 220 u8 pp, key = 0;
190 bool found = false; 221 bool found = false;
191 bool second = false; 222 bool second = false;
223 int pgsize;
192 ulong mp_ea = vcpu->arch.magic_page_ea; 224 ulong mp_ea = vcpu->arch.magic_page_ea;
193 225
194 /* Magic page override */ 226 /* Magic page override */
@@ -202,6 +234,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
202 gpte->may_execute = true; 234 gpte->may_execute = true;
203 gpte->may_read = true; 235 gpte->may_read = true;
204 gpte->may_write = true; 236 gpte->may_write = true;
237 gpte->page_size = MMU_PAGE_4K;
205 238
206 return 0; 239 return 0;
207 } 240 }
@@ -222,6 +255,8 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
222 v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | 255 v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
223 HPTE_V_SECONDARY; 256 HPTE_V_SECONDARY;
224 257
258 pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
259
225do_second: 260do_second:
226 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); 261 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
227 if (kvm_is_error_hva(ptegp)) 262 if (kvm_is_error_hva(ptegp))
@@ -240,6 +275,13 @@ do_second:
240 for (i=0; i<16; i+=2) { 275 for (i=0; i<16; i+=2) {
241 /* Check all relevant fields of 1st dword */ 276 /* Check all relevant fields of 1st dword */
242 if ((pteg[i] & v_mask) == v_val) { 277 if ((pteg[i] & v_mask) == v_val) {
278 /* If large page bit is set, check pgsize encoding */
279 if (slbe->large &&
280 (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
281 pgsize = decode_pagesize(slbe, pteg[i+1]);
282 if (pgsize < 0)
283 continue;
284 }
243 found = true; 285 found = true;
244 break; 286 break;
245 } 287 }
@@ -256,13 +298,13 @@ do_second:
256 v = pteg[i]; 298 v = pteg[i];
257 r = pteg[i+1]; 299 r = pteg[i+1];
258 pp = (r & HPTE_R_PP) | key; 300 pp = (r & HPTE_R_PP) | key;
259 eaddr_mask = 0xFFF;
260 301
261 gpte->eaddr = eaddr; 302 gpte->eaddr = eaddr;
262 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); 303 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
263 if (slbe->large) 304
264 eaddr_mask = 0xFFFFFF; 305 eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
265 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); 306 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
307 gpte->page_size = pgsize;
266 gpte->may_execute = ((r & HPTE_R_N) ? false : true); 308 gpte->may_execute = ((r & HPTE_R_N) ? false : true);
267 gpte->may_read = false; 309 gpte->may_read = false;
268 gpte->may_write = false; 310 gpte->may_write = false;
@@ -345,6 +387,21 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
345 slbe->nx = (rs & SLB_VSID_N) ? 1 : 0; 387 slbe->nx = (rs & SLB_VSID_N) ? 1 : 0;
346 slbe->class = (rs & SLB_VSID_C) ? 1 : 0; 388 slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
347 389
390 slbe->base_page_size = MMU_PAGE_4K;
391 if (slbe->large) {
392 if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
393 switch (rs & SLB_VSID_LP) {
394 case SLB_VSID_LP_00:
395 slbe->base_page_size = MMU_PAGE_16M;
396 break;
397 case SLB_VSID_LP_01:
398 slbe->base_page_size = MMU_PAGE_64K;
399 break;
400 }
401 } else
402 slbe->base_page_size = MMU_PAGE_16M;
403 }
404
348 slbe->orige = rb & (ESID_MASK | SLB_ESID_V); 405 slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
349 slbe->origv = rs; 406 slbe->origv = rs;
350 407
@@ -463,8 +520,25 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
463 520
464 dprintk("KVM MMU: tlbie(0x%lx)\n", va); 521 dprintk("KVM MMU: tlbie(0x%lx)\n", va);
465 522
466 if (large) 523 /*
467 mask = 0xFFFFFF000ULL; 524 * The tlbie instruction changed behaviour starting with
525 * POWER6. POWER6 and later don't have the large page flag
526 * in the instruction but in the RB value, along with bits
527 * indicating page and segment sizes.
528 */
529 if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
530 /* POWER6 or later */
531 if (va & 1) { /* L bit */
532 if ((va & 0xf000) == 0x1000)
533 mask = 0xFFFFFFFF0ULL; /* 64k page */
534 else
535 mask = 0xFFFFFF000ULL; /* 16M page */
536 }
537 } else {
538 /* older processors, e.g. PPC970 */
539 if (large)
540 mask = 0xFFFFFF000ULL;
541 }
468 kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); 542 kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
469} 543}
470 544
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index da8b13c4b776..d2d280b16778 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -56,6 +56,14 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
56 HPTEG_HASH_BITS_VPTE_LONG); 56 HPTEG_HASH_BITS_VPTE_LONG);
57} 57}
58 58
59#ifdef CONFIG_PPC_BOOK3S_64
60static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
61{
62 return hash_64((vpage & 0xffffffff0ULL) >> 4,
63 HPTEG_HASH_BITS_VPTE_64K);
64}
65#endif
66
59void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 67void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
60{ 68{
61 u64 index; 69 u64 index;
@@ -83,6 +91,13 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
83 hlist_add_head_rcu(&pte->list_vpte_long, 91 hlist_add_head_rcu(&pte->list_vpte_long,
84 &vcpu3s->hpte_hash_vpte_long[index]); 92 &vcpu3s->hpte_hash_vpte_long[index]);
85 93
94#ifdef CONFIG_PPC_BOOK3S_64
95 /* Add to vPTE_64k list */
96 index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
97 hlist_add_head_rcu(&pte->list_vpte_64k,
98 &vcpu3s->hpte_hash_vpte_64k[index]);
99#endif
100
86 spin_unlock(&vcpu3s->mmu_lock); 101 spin_unlock(&vcpu3s->mmu_lock);
87} 102}
88 103
@@ -113,6 +128,9 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
113 hlist_del_init_rcu(&pte->list_pte_long); 128 hlist_del_init_rcu(&pte->list_pte_long);
114 hlist_del_init_rcu(&pte->list_vpte); 129 hlist_del_init_rcu(&pte->list_vpte);
115 hlist_del_init_rcu(&pte->list_vpte_long); 130 hlist_del_init_rcu(&pte->list_vpte_long);
131#ifdef CONFIG_PPC_BOOK3S_64
132 hlist_del_init_rcu(&pte->list_vpte_64k);
133#endif
116 134
117 spin_unlock(&vcpu3s->mmu_lock); 135 spin_unlock(&vcpu3s->mmu_lock);
118 136
@@ -219,6 +237,29 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
219 rcu_read_unlock(); 237 rcu_read_unlock();
220} 238}
221 239
240#ifdef CONFIG_PPC_BOOK3S_64
241/* Flush with mask 0xffffffff0 */
242static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
243{
244 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
245 struct hlist_head *list;
246 struct hpte_cache *pte;
247 u64 vp_mask = 0xffffffff0ULL;
248
249 list = &vcpu3s->hpte_hash_vpte_64k[
250 kvmppc_mmu_hash_vpte_64k(guest_vp)];
251
252 rcu_read_lock();
253
254 /* Check the list for matching entries and invalidate */
255 hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
256 if ((pte->pte.vpage & vp_mask) == guest_vp)
257 invalidate_pte(vcpu, pte);
258
259 rcu_read_unlock();
260}
261#endif
262
222/* Flush with mask 0xffffff000 */ 263/* Flush with mask 0xffffff000 */
223static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) 264static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
224{ 265{
@@ -249,6 +290,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
249 case 0xfffffffffULL: 290 case 0xfffffffffULL:
250 kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); 291 kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
251 break; 292 break;
293#ifdef CONFIG_PPC_BOOK3S_64
294 case 0xffffffff0ULL:
295 kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
296 break;
297#endif
252 case 0xffffff000ULL: 298 case 0xffffff000ULL:
253 kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); 299 kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
254 break; 300 break;
@@ -320,6 +366,10 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
320 ARRAY_SIZE(vcpu3s->hpte_hash_vpte)); 366 ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
321 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long, 367 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
322 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); 368 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
369#ifdef CONFIG_PPC_BOOK3S_64
370 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
371 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
372#endif
323 373
324 spin_lock_init(&vcpu3s->mmu_lock); 374 spin_lock_init(&vcpu3s->mmu_lock);
325 375
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 228a9baffd9e..6cc99583ed39 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -306,6 +306,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
306 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 306 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
307 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 307 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
308 308
309 /*
310 * If they're asking for POWER6 or later, set the flag
311 * indicating that we can do multiple large page sizes
312 * and 1TB segments.
313 * Also set the flag that indicates that tlbie has the large
314 * page bit in the RB operand instead of the instruction.
315 */
316 switch (PVR_VER(pvr)) {
317 case PVR_POWER6:
318 case PVR_POWER7:
319 case PVR_POWER7p:
320 case PVR_POWER8:
321 vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
322 BOOK3S_HFLAG_NEW_TLBIE;
323 break;
324 }
325
309#ifdef CONFIG_PPC_BOOK3S_32 326#ifdef CONFIG_PPC_BOOK3S_32
310 /* 32 bit Book3S always has 32 byte dcbz */ 327 /* 32 bit Book3S always has 32 byte dcbz */
311 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 328 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
@@ -1130,8 +1147,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1130 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); 1147 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
1131 1148
1132#ifdef CONFIG_PPC_BOOK3S_64 1149#ifdef CONFIG_PPC_BOOK3S_64
1133 /* default to book3s_64 (970fx) */ 1150 /*
1151 * Default to the same as the host if we're on sufficiently
1152 * recent machine that we have 1TB segments;
1153 * otherwise default to PPC970FX.
1154 */
1134 vcpu->arch.pvr = 0x3C0301; 1155 vcpu->arch.pvr = 0x3C0301;
1156 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1157 vcpu->arch.pvr = mfspr(SPRN_PVR);
1135#else 1158#else
1136 /* default to book3s_32 (750) */ 1159 /* default to book3s_32 (750) */
1137 vcpu->arch.pvr = 0x84202; 1160 vcpu->arch.pvr = 0x84202;
@@ -1317,7 +1340,10 @@ out:
1317#ifdef CONFIG_PPC64 1340#ifdef CONFIG_PPC64
1318int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1341int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1319{ 1342{
1320 info->flags = KVM_PPC_1T_SEGMENTS; 1343 long int i;
1344 struct kvm_vcpu *vcpu;
1345
1346 info->flags = 0;
1321 1347
1322 /* SLB is always 64 entries */ 1348 /* SLB is always 64 entries */
1323 info->slb_size = 64; 1349 info->slb_size = 64;
@@ -1328,11 +1354,31 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1328 info->sps[0].enc[0].page_shift = 12; 1354 info->sps[0].enc[0].page_shift = 12;
1329 info->sps[0].enc[0].pte_enc = 0; 1355 info->sps[0].enc[0].pte_enc = 0;
1330 1356
1357 /*
1358 * 64k large page size.
1359 * We only want to put this in if the CPUs we're emulating
1360 * support it, but unfortunately we don't have a vcpu easily
1361 * to hand here to test. Just pick the first vcpu, and if
1362 * that doesn't exist yet, report the minimum capability,
1363 * i.e., no 64k pages.
1364 * 1T segment support goes along with 64k pages.
1365 */
1366 i = 1;
1367 vcpu = kvm_get_vcpu(kvm, 0);
1368 if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
1369 info->flags = KVM_PPC_1T_SEGMENTS;
1370 info->sps[i].page_shift = 16;
1371 info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
1372 info->sps[i].enc[0].page_shift = 16;
1373 info->sps[i].enc[0].pte_enc = 1;
1374 ++i;
1375 }
1376
1331 /* Standard 16M large page size segment */ 1377 /* Standard 16M large page size segment */
1332 info->sps[1].page_shift = 24; 1378 info->sps[i].page_shift = 24;
1333 info->sps[1].slb_enc = SLB_VSID_L; 1379 info->sps[i].slb_enc = SLB_VSID_L;
1334 info->sps[1].enc[0].page_shift = 24; 1380 info->sps[i].enc[0].page_shift = 24;
1335 info->sps[1].enc[0].pte_enc = 0; 1381 info->sps[i].enc[0].pte_enc = 0;
1336 1382
1337 return 0; 1383 return 0;
1338} 1384}