aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 17:35:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 17:35:31 -0400
commit2e7580b0e75d771d93e24e681031a165b1d31071 (patch)
treed9449702609eeaab28913a43b5a4434667e09d43 /arch/powerpc/kvm
parentd25413efa9536e2f425ea45c7720598035c597bc (diff)
parentcf9eeac46350b8b43730b7dc5e999757bed089a4 (diff)
Merge branch 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Avi Kivity: "Changes include timekeeping improvements, support for assigning host PCI devices that share interrupt lines, s390 user-controlled guests, a large ppc update, and random fixes." This is with the sign-off's fixed, hopefully next merge window we won't have rebased commits. * 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: Convert intx_mask_lock to spin lock KVM: x86: fix kvm_write_tsc() TSC matching thinko x86: kvmclock: abstract save/restore sched_clock_state KVM: nVMX: Fix erroneous exception bitmap check KVM: Ignore the writes to MSR_K7_HWCR(3) KVM: MMU: make use of ->root_level in reset_rsvds_bits_mask KVM: PMU: add proper support for fixed counter 2 KVM: PMU: Fix raw event check KVM: PMU: warn when pin control is set in eventsel msr KVM: VMX: Fix delayed load of shared MSRs KVM: use correct tlbs dirty type in cmpxchg KVM: Allow host IRQ sharing for assigned PCI 2.3 devices KVM: Ensure all vcpus are consistent with in-kernel irqchip settings KVM: x86 emulator: Allow PM/VM86 switch during task switch KVM: SVM: Fix CPL updates KVM: x86 emulator: VM86 segments must have DPL 3 KVM: x86 emulator: Fix task switch privilege checks arch/powerpc/kvm/book3s_hv.c: included linux/sched.h twice KVM: x86 emulator: correctly mask pmc index bits in RDPMC instruction emulation KVM: mmu_notifier: Flush TLBs before releasing mmu_lock ...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c57
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c21
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c66
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c919
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv.c465
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c209
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c835
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S176
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c9
-rw-r--r--arch/powerpc/kvm/book3s_pr.c178
-rw-r--r--arch/powerpc/kvm/booke.c150
-rw-r--r--arch/powerpc/kvm/booke.h4
-rw-r--r--arch/powerpc/kvm/booke_emulate.c23
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S18
-rw-r--r--arch/powerpc/kvm/e500.c32
-rw-r--r--arch/powerpc/kvm/e500_emulate.c38
-rw-r--r--arch/powerpc/kvm/e500_tlb.c775
-rw-r--r--arch/powerpc/kvm/e500_tlb.h80
-rw-r--r--arch/powerpc/kvm/emulate.c61
-rw-r--r--arch/powerpc/kvm/powerpc.c148
-rw-r--r--arch/powerpc/kvm/trace.h62
23 files changed, 3315 insertions, 1020 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 78133deb4b64..8f64709ae331 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -69,6 +69,7 @@ config KVM_BOOK3S_64
69config KVM_BOOK3S_64_HV 69config KVM_BOOK3S_64_HV
70 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" 70 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
71 depends on KVM_BOOK3S_64 71 depends on KVM_BOOK3S_64
72 select MMU_NOTIFIER
72 ---help--- 73 ---help---
73 Support running unmodified book3s_64 guest kernels in 74 Support running unmodified book3s_64 guest kernels in
74 virtual machines on POWER7 and PPC970 processors that have 75 virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index e41ac6f7dcf1..7d54f4ed6d96 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
258 return true; 258 return true;
259} 259}
260 260
261void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) 261void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
262{ 262{
263 unsigned long *pending = &vcpu->arch.pending_exceptions; 263 unsigned long *pending = &vcpu->arch.pending_exceptions;
264 unsigned long old_pending = vcpu->arch.pending_exceptions; 264 unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -423,10 +423,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
423 regs->sprg1 = vcpu->arch.shared->sprg1; 423 regs->sprg1 = vcpu->arch.shared->sprg1;
424 regs->sprg2 = vcpu->arch.shared->sprg2; 424 regs->sprg2 = vcpu->arch.shared->sprg2;
425 regs->sprg3 = vcpu->arch.shared->sprg3; 425 regs->sprg3 = vcpu->arch.shared->sprg3;
426 regs->sprg4 = vcpu->arch.sprg4; 426 regs->sprg4 = vcpu->arch.shared->sprg4;
427 regs->sprg5 = vcpu->arch.sprg5; 427 regs->sprg5 = vcpu->arch.shared->sprg5;
428 regs->sprg6 = vcpu->arch.sprg6; 428 regs->sprg6 = vcpu->arch.shared->sprg6;
429 regs->sprg7 = vcpu->arch.sprg7; 429 regs->sprg7 = vcpu->arch.shared->sprg7;
430 430
431 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 431 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
432 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 432 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -450,10 +450,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
450 vcpu->arch.shared->sprg1 = regs->sprg1; 450 vcpu->arch.shared->sprg1 = regs->sprg1;
451 vcpu->arch.shared->sprg2 = regs->sprg2; 451 vcpu->arch.shared->sprg2 = regs->sprg2;
452 vcpu->arch.shared->sprg3 = regs->sprg3; 452 vcpu->arch.shared->sprg3 = regs->sprg3;
453 vcpu->arch.sprg4 = regs->sprg4; 453 vcpu->arch.shared->sprg4 = regs->sprg4;
454 vcpu->arch.sprg5 = regs->sprg5; 454 vcpu->arch.shared->sprg5 = regs->sprg5;
455 vcpu->arch.sprg6 = regs->sprg6; 455 vcpu->arch.shared->sprg6 = regs->sprg6;
456 vcpu->arch.sprg7 = regs->sprg7; 456 vcpu->arch.shared->sprg7 = regs->sprg7;
457 457
458 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 458 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
459 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 459 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -477,41 +477,10 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
477 return 0; 477 return 0;
478} 478}
479 479
480/* 480void kvmppc_decrementer_func(unsigned long data)
481 * Get (and clear) the dirty memory log for a memory slot.
482 */
483int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
484 struct kvm_dirty_log *log)
485{ 481{
486 struct kvm_memory_slot *memslot; 482 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
487 struct kvm_vcpu *vcpu;
488 ulong ga, ga_end;
489 int is_dirty = 0;
490 int r;
491 unsigned long n;
492
493 mutex_lock(&kvm->slots_lock);
494
495 r = kvm_get_dirty_log(kvm, log, &is_dirty);
496 if (r)
497 goto out;
498
499 /* If nothing is dirty, don't bother messing with page tables. */
500 if (is_dirty) {
501 memslot = id_to_memslot(kvm->memslots, log->slot);
502 483
503 ga = memslot->base_gfn << PAGE_SHIFT; 484 kvmppc_core_queue_dec(vcpu);
504 ga_end = ga + (memslot->npages << PAGE_SHIFT); 485 kvm_vcpu_kick(vcpu);
505
506 kvm_for_each_vcpu(n, vcpu, kvm)
507 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
508
509 n = kvm_dirty_bitmap_bytes(memslot);
510 memset(memslot->dirty_bitmap, 0, n);
511 }
512
513 r = 0;
514out:
515 mutex_unlock(&kvm->slots_lock);
516 return r;
517} 486}
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 9fecbfbce773..f922c29bb234 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -151,13 +151,15 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
151 bool primary = false; 151 bool primary = false;
152 bool evict = false; 152 bool evict = false;
153 struct hpte_cache *pte; 153 struct hpte_cache *pte;
154 int r = 0;
154 155
155 /* Get host physical address for gpa */ 156 /* Get host physical address for gpa */
156 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 157 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
157 if (is_error_pfn(hpaddr)) { 158 if (is_error_pfn(hpaddr)) {
158 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", 159 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
159 orig_pte->eaddr); 160 orig_pte->eaddr);
160 return -EINVAL; 161 r = -EINVAL;
162 goto out;
161 } 163 }
162 hpaddr <<= PAGE_SHIFT; 164 hpaddr <<= PAGE_SHIFT;
163 165
@@ -249,7 +251,8 @@ next_pteg:
249 251
250 kvmppc_mmu_hpte_cache_map(vcpu, pte); 252 kvmppc_mmu_hpte_cache_map(vcpu, pte);
251 253
252 return 0; 254out:
255 return r;
253} 256}
254 257
255static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) 258static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -297,12 +300,14 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
297 u64 gvsid; 300 u64 gvsid;
298 u32 sr; 301 u32 sr;
299 struct kvmppc_sid_map *map; 302 struct kvmppc_sid_map *map;
300 struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); 303 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
304 int r = 0;
301 305
302 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { 306 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
303 /* Invalidate an entry */ 307 /* Invalidate an entry */
304 svcpu->sr[esid] = SR_INVALID; 308 svcpu->sr[esid] = SR_INVALID;
305 return -ENOENT; 309 r = -ENOENT;
310 goto out;
306 } 311 }
307 312
308 map = find_sid_vsid(vcpu, gvsid); 313 map = find_sid_vsid(vcpu, gvsid);
@@ -315,17 +320,21 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
315 320
316 dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); 321 dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
317 322
318 return 0; 323out:
324 svcpu_put(svcpu);
325 return r;
319} 326}
320 327
321void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 328void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
322{ 329{
323 int i; 330 int i;
324 struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); 331 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
325 332
326 dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); 333 dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
327 for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) 334 for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
328 svcpu->sr[i] = SR_INVALID; 335 svcpu->sr[i] = SR_INVALID;
336
337 svcpu_put(svcpu);
329} 338}
330 339
331void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 340void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index fa2f08434ba5..6f87f39a1ac2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -88,12 +88,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
88 int vflags = 0; 88 int vflags = 0;
89 int attempt = 0; 89 int attempt = 0;
90 struct kvmppc_sid_map *map; 90 struct kvmppc_sid_map *map;
91 int r = 0;
91 92
92 /* Get host physical address for gpa */ 93 /* Get host physical address for gpa */
93 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 94 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
94 if (is_error_pfn(hpaddr)) { 95 if (is_error_pfn(hpaddr)) {
95 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); 96 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
96 return -EINVAL; 97 r = -EINVAL;
98 goto out;
97 } 99 }
98 hpaddr <<= PAGE_SHIFT; 100 hpaddr <<= PAGE_SHIFT;
99 hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); 101 hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
@@ -110,7 +112,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
110 printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", 112 printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
111 vsid, orig_pte->eaddr); 113 vsid, orig_pte->eaddr);
112 WARN_ON(true); 114 WARN_ON(true);
113 return -EINVAL; 115 r = -EINVAL;
116 goto out;
114 } 117 }
115 118
116 vsid = map->host_vsid; 119 vsid = map->host_vsid;
@@ -131,8 +134,10 @@ map_again:
131 134
132 /* In case we tried normal mapping already, let's nuke old entries */ 135 /* In case we tried normal mapping already, let's nuke old entries */
133 if (attempt > 1) 136 if (attempt > 1)
134 if (ppc_md.hpte_remove(hpteg) < 0) 137 if (ppc_md.hpte_remove(hpteg) < 0) {
135 return -1; 138 r = -1;
139 goto out;
140 }
136 141
137 ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M); 142 ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
138 143
@@ -162,7 +167,8 @@ map_again:
162 kvmppc_mmu_hpte_cache_map(vcpu, pte); 167 kvmppc_mmu_hpte_cache_map(vcpu, pte);
163 } 168 }
164 169
165 return 0; 170out:
171 return r;
166} 172}
167 173
168static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) 174static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -207,25 +213,30 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
207 213
208static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) 214static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
209{ 215{
216 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
210 int i; 217 int i;
211 int max_slb_size = 64; 218 int max_slb_size = 64;
212 int found_inval = -1; 219 int found_inval = -1;
213 int r; 220 int r;
214 221
215 if (!to_svcpu(vcpu)->slb_max) 222 if (!svcpu->slb_max)
216 to_svcpu(vcpu)->slb_max = 1; 223 svcpu->slb_max = 1;
217 224
218 /* Are we overwriting? */ 225 /* Are we overwriting? */
219 for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) { 226 for (i = 1; i < svcpu->slb_max; i++) {
220 if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V)) 227 if (!(svcpu->slb[i].esid & SLB_ESID_V))
221 found_inval = i; 228 found_inval = i;
222 else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid) 229 else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
223 return i; 230 r = i;
231 goto out;
232 }
224 } 233 }
225 234
226 /* Found a spare entry that was invalidated before */ 235 /* Found a spare entry that was invalidated before */
227 if (found_inval > 0) 236 if (found_inval > 0) {
228 return found_inval; 237 r = found_inval;
238 goto out;
239 }
229 240
230 /* No spare invalid entry, so create one */ 241 /* No spare invalid entry, so create one */
231 242
@@ -233,30 +244,35 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
233 max_slb_size = mmu_slb_size; 244 max_slb_size = mmu_slb_size;
234 245
235 /* Overflowing -> purge */ 246 /* Overflowing -> purge */
236 if ((to_svcpu(vcpu)->slb_max) == max_slb_size) 247 if ((svcpu->slb_max) == max_slb_size)
237 kvmppc_mmu_flush_segments(vcpu); 248 kvmppc_mmu_flush_segments(vcpu);
238 249
239 r = to_svcpu(vcpu)->slb_max; 250 r = svcpu->slb_max;
240 to_svcpu(vcpu)->slb_max++; 251 svcpu->slb_max++;
241 252
253out:
254 svcpu_put(svcpu);
242 return r; 255 return r;
243} 256}
244 257
245int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) 258int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
246{ 259{
260 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
247 u64 esid = eaddr >> SID_SHIFT; 261 u64 esid = eaddr >> SID_SHIFT;
248 u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V; 262 u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
249 u64 slb_vsid = SLB_VSID_USER; 263 u64 slb_vsid = SLB_VSID_USER;
250 u64 gvsid; 264 u64 gvsid;
251 int slb_index; 265 int slb_index;
252 struct kvmppc_sid_map *map; 266 struct kvmppc_sid_map *map;
267 int r = 0;
253 268
254 slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK); 269 slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
255 270
256 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { 271 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
257 /* Invalidate an entry */ 272 /* Invalidate an entry */
258 to_svcpu(vcpu)->slb[slb_index].esid = 0; 273 svcpu->slb[slb_index].esid = 0;
259 return -ENOENT; 274 r = -ENOENT;
275 goto out;
260 } 276 }
261 277
262 map = find_sid_vsid(vcpu, gvsid); 278 map = find_sid_vsid(vcpu, gvsid);
@@ -269,18 +285,22 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
269 slb_vsid &= ~SLB_VSID_KP; 285 slb_vsid &= ~SLB_VSID_KP;
270 slb_esid |= slb_index; 286 slb_esid |= slb_index;
271 287
272 to_svcpu(vcpu)->slb[slb_index].esid = slb_esid; 288 svcpu->slb[slb_index].esid = slb_esid;
273 to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid; 289 svcpu->slb[slb_index].vsid = slb_vsid;
274 290
275 trace_kvm_book3s_slbmte(slb_vsid, slb_esid); 291 trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
276 292
277 return 0; 293out:
294 svcpu_put(svcpu);
295 return r;
278} 296}
279 297
280void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 298void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
281{ 299{
282 to_svcpu(vcpu)->slb_max = 1; 300 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
283 to_svcpu(vcpu)->slb[0].esid = 0; 301 svcpu->slb_max = 1;
302 svcpu->slb[0].esid = 0;
303 svcpu_put(svcpu);
284} 304}
285 305
286void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 306void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bc3a2ea94217..ddc485a529f2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -23,6 +23,7 @@
23#include <linux/gfp.h> 23#include <linux/gfp.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
26#include <linux/vmalloc.h>
26 27
27#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
28#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
@@ -33,15 +34,6 @@
33#include <asm/ppc-opcode.h> 34#include <asm/ppc-opcode.h>
34#include <asm/cputable.h> 35#include <asm/cputable.h>
35 36
36/* For now use fixed-size 16MB page table */
37#define HPT_ORDER 24
38#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
39#define HPT_HASH_MASK (HPT_NPTEG - 1)
40
41/* Pages in the VRMA are 16MB pages */
42#define VRMA_PAGE_ORDER 24
43#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
44
45/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 37/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
46#define MAX_LPID_970 63 38#define MAX_LPID_970 63
47#define NR_LPIDS (LPID_RSVD + 1) 39#define NR_LPIDS (LPID_RSVD + 1)
@@ -51,21 +43,41 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
51{ 43{
52 unsigned long hpt; 44 unsigned long hpt;
53 unsigned long lpid; 45 unsigned long lpid;
46 struct revmap_entry *rev;
47 struct kvmppc_linear_info *li;
48
49 /* Allocate guest's hashed page table */
50 li = kvm_alloc_hpt();
51 if (li) {
52 /* using preallocated memory */
53 hpt = (ulong)li->base_virt;
54 kvm->arch.hpt_li = li;
55 } else {
56 /* using dynamic memory */
57 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
58 __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
59 }
54 60
55 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
56 HPT_ORDER - PAGE_SHIFT);
57 if (!hpt) { 61 if (!hpt) {
58 pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); 62 pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
59 return -ENOMEM; 63 return -ENOMEM;
60 } 64 }
61 kvm->arch.hpt_virt = hpt; 65 kvm->arch.hpt_virt = hpt;
62 66
67 /* Allocate reverse map array */
68 rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
69 if (!rev) {
70 pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
71 goto out_freehpt;
72 }
73 kvm->arch.revmap = rev;
74
75 /* Allocate the guest's logical partition ID */
63 do { 76 do {
64 lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); 77 lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
65 if (lpid >= NR_LPIDS) { 78 if (lpid >= NR_LPIDS) {
66 pr_err("kvm_alloc_hpt: No LPIDs free\n"); 79 pr_err("kvm_alloc_hpt: No LPIDs free\n");
67 free_pages(hpt, HPT_ORDER - PAGE_SHIFT); 80 goto out_freeboth;
68 return -ENOMEM;
69 } 81 }
70 } while (test_and_set_bit(lpid, lpid_inuse)); 82 } while (test_and_set_bit(lpid, lpid_inuse));
71 83
@@ -74,37 +86,64 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
74 86
75 pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); 87 pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
76 return 0; 88 return 0;
89
90 out_freeboth:
91 vfree(rev);
92 out_freehpt:
93 free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
94 return -ENOMEM;
77} 95}
78 96
79void kvmppc_free_hpt(struct kvm *kvm) 97void kvmppc_free_hpt(struct kvm *kvm)
80{ 98{
81 clear_bit(kvm->arch.lpid, lpid_inuse); 99 clear_bit(kvm->arch.lpid, lpid_inuse);
82 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); 100 vfree(kvm->arch.revmap);
101 if (kvm->arch.hpt_li)
102 kvm_release_hpt(kvm->arch.hpt_li);
103 else
104 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
105}
106
107/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
108static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
109{
110 return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
111}
112
113/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
114static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
115{
116 return (pgsize == 0x10000) ? 0x1000 : 0;
83} 117}
84 118
85void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) 119void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
120 unsigned long porder)
86{ 121{
87 unsigned long i; 122 unsigned long i;
88 unsigned long npages = kvm->arch.ram_npages; 123 unsigned long npages;
89 unsigned long pfn; 124 unsigned long hp_v, hp_r;
90 unsigned long *hpte; 125 unsigned long addr, hash;
91 unsigned long hash; 126 unsigned long psize;
92 struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo; 127 unsigned long hp0, hp1;
128 long ret;
93 129
94 if (!pginfo) 130 psize = 1ul << porder;
95 return; 131 npages = memslot->npages >> (porder - PAGE_SHIFT);
96 132
97 /* VRMA can't be > 1TB */ 133 /* VRMA can't be > 1TB */
98 if (npages > 1ul << (40 - kvm->arch.ram_porder)) 134 if (npages > 1ul << (40 - porder))
99 npages = 1ul << (40 - kvm->arch.ram_porder); 135 npages = 1ul << (40 - porder);
100 /* Can't use more than 1 HPTE per HPTEG */ 136 /* Can't use more than 1 HPTE per HPTEG */
101 if (npages > HPT_NPTEG) 137 if (npages > HPT_NPTEG)
102 npages = HPT_NPTEG; 138 npages = HPT_NPTEG;
103 139
140 hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
141 HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
142 hp1 = hpte1_pgsize_encoding(psize) |
143 HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
144
104 for (i = 0; i < npages; ++i) { 145 for (i = 0; i < npages; ++i) {
105 pfn = pginfo[i].pfn; 146 addr = i << porder;
106 if (!pfn)
107 break;
108 /* can't use hpt_hash since va > 64 bits */ 147 /* can't use hpt_hash since va > 64 bits */
109 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; 148 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
110 /* 149 /*
@@ -113,15 +152,15 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
113 * at most one HPTE per HPTEG, we just assume entry 7 152 * at most one HPTE per HPTEG, we just assume entry 7
114 * is available and use it. 153 * is available and use it.
115 */ 154 */
116 hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7)); 155 hash = (hash << 3) + 7;
117 hpte += 7 * 2; 156 hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
118 /* HPTE low word - RPN, protection, etc. */ 157 hp_r = hp1 | addr;
119 hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C | 158 ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
120 HPTE_R_M | PP_RWXX; 159 if (ret != H_SUCCESS) {
121 wmb(); 160 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
122 hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | 161 addr, ret);
123 (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | 162 break;
124 HPTE_V_LARGE | HPTE_V_VALID; 163 }
125 } 164 }
126} 165}
127 166
@@ -158,10 +197,814 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
158 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 197 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
159} 198}
160 199
200/*
201 * This is called to get a reference to a guest page if there isn't
202 * one already in the kvm->arch.slot_phys[][] arrays.
203 */
204static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
205 struct kvm_memory_slot *memslot,
206 unsigned long psize)
207{
208 unsigned long start;
209 long np, err;
210 struct page *page, *hpage, *pages[1];
211 unsigned long s, pgsize;
212 unsigned long *physp;
213 unsigned int is_io, got, pgorder;
214 struct vm_area_struct *vma;
215 unsigned long pfn, i, npages;
216
217 physp = kvm->arch.slot_phys[memslot->id];
218 if (!physp)
219 return -EINVAL;
220 if (physp[gfn - memslot->base_gfn])
221 return 0;
222
223 is_io = 0;
224 got = 0;
225 page = NULL;
226 pgsize = psize;
227 err = -EINVAL;
228 start = gfn_to_hva_memslot(memslot, gfn);
229
230 /* Instantiate and get the page we want access to */
231 np = get_user_pages_fast(start, 1, 1, pages);
232 if (np != 1) {
233 /* Look up the vma for the page */
234 down_read(&current->mm->mmap_sem);
235 vma = find_vma(current->mm, start);
236 if (!vma || vma->vm_start > start ||
237 start + psize > vma->vm_end ||
238 !(vma->vm_flags & VM_PFNMAP))
239 goto up_err;
240 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
241 pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
242 /* check alignment of pfn vs. requested page size */
243 if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
244 goto up_err;
245 up_read(&current->mm->mmap_sem);
246
247 } else {
248 page = pages[0];
249 got = KVMPPC_GOT_PAGE;
250
251 /* See if this is a large page */
252 s = PAGE_SIZE;
253 if (PageHuge(page)) {
254 hpage = compound_head(page);
255 s <<= compound_order(hpage);
256 /* Get the whole large page if slot alignment is ok */
257 if (s > psize && slot_is_aligned(memslot, s) &&
258 !(memslot->userspace_addr & (s - 1))) {
259 start &= ~(s - 1);
260 pgsize = s;
261 page = hpage;
262 }
263 }
264 if (s < psize)
265 goto out;
266 pfn = page_to_pfn(page);
267 }
268
269 npages = pgsize >> PAGE_SHIFT;
270 pgorder = __ilog2(npages);
271 physp += (gfn - memslot->base_gfn) & ~(npages - 1);
272 spin_lock(&kvm->arch.slot_phys_lock);
273 for (i = 0; i < npages; ++i) {
274 if (!physp[i]) {
275 physp[i] = ((pfn + i) << PAGE_SHIFT) +
276 got + is_io + pgorder;
277 got = 0;
278 }
279 }
280 spin_unlock(&kvm->arch.slot_phys_lock);
281 err = 0;
282
283 out:
284 if (got) {
285 if (PageHuge(page))
286 page = compound_head(page);
287 put_page(page);
288 }
289 return err;
290
291 up_err:
292 up_read(&current->mm->mmap_sem);
293 return err;
294}
295
296/*
297 * We come here on a H_ENTER call from the guest when we are not
298 * using mmu notifiers and we don't have the requested page pinned
299 * already.
300 */
301long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
302 long pte_index, unsigned long pteh, unsigned long ptel)
303{
304 struct kvm *kvm = vcpu->kvm;
305 unsigned long psize, gpa, gfn;
306 struct kvm_memory_slot *memslot;
307 long ret;
308
309 if (kvm->arch.using_mmu_notifiers)
310 goto do_insert;
311
312 psize = hpte_page_size(pteh, ptel);
313 if (!psize)
314 return H_PARAMETER;
315
316 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
317
318 /* Find the memslot (if any) for this address */
319 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
320 gfn = gpa >> PAGE_SHIFT;
321 memslot = gfn_to_memslot(kvm, gfn);
322 if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
323 if (!slot_is_aligned(memslot, psize))
324 return H_PARAMETER;
325 if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
326 return H_PARAMETER;
327 }
328
329 do_insert:
330 /* Protect linux PTE lookup from page table destruction */
331 rcu_read_lock_sched(); /* this disables preemption too */
332 vcpu->arch.pgdir = current->mm->pgd;
333 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
334 rcu_read_unlock_sched();
335 if (ret == H_TOO_HARD) {
336 /* this can't happen */
337 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
338 ret = H_RESOURCE; /* or something */
339 }
340 return ret;
341
342}
343
344static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
345 gva_t eaddr)
346{
347 u64 mask;
348 int i;
349
350 for (i = 0; i < vcpu->arch.slb_nr; i++) {
351 if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
352 continue;
353
354 if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
355 mask = ESID_MASK_1T;
356 else
357 mask = ESID_MASK;
358
359 if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
360 return &vcpu->arch.slb[i];
361 }
362 return NULL;
363}
364
365static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
366 unsigned long ea)
367{
368 unsigned long ra_mask;
369
370 ra_mask = hpte_page_size(v, r) - 1;
371 return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
372}
373
161static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 374static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
162 struct kvmppc_pte *gpte, bool data) 375 struct kvmppc_pte *gpte, bool data)
376{
377 struct kvm *kvm = vcpu->kvm;
378 struct kvmppc_slb *slbe;
379 unsigned long slb_v;
380 unsigned long pp, key;
381 unsigned long v, gr;
382 unsigned long *hptep;
383 int index;
384 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
385
386 /* Get SLB entry */
387 if (virtmode) {
388 slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
389 if (!slbe)
390 return -EINVAL;
391 slb_v = slbe->origv;
392 } else {
393 /* real mode access */
394 slb_v = vcpu->kvm->arch.vrma_slb_v;
395 }
396
397 /* Find the HPTE in the hash table */
398 index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
399 HPTE_V_VALID | HPTE_V_ABSENT);
400 if (index < 0)
401 return -ENOENT;
402 hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
403 v = hptep[0] & ~HPTE_V_HVLOCK;
404 gr = kvm->arch.revmap[index].guest_rpte;
405
406 /* Unlock the HPTE */
407 asm volatile("lwsync" : : : "memory");
408 hptep[0] = v;
409
410 gpte->eaddr = eaddr;
411 gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
412
413 /* Get PP bits and key for permission check */
414 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
415 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
416 key &= slb_v;
417
418 /* Calculate permissions */
419 gpte->may_read = hpte_read_permission(pp, key);
420 gpte->may_write = hpte_write_permission(pp, key);
421 gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
422
423 /* Storage key permission check for POWER7 */
424 if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
425 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
426 if (amrfield & 1)
427 gpte->may_read = 0;
428 if (amrfield & 2)
429 gpte->may_write = 0;
430 }
431
432 /* Get the guest physical address */
433 gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
434 return 0;
435}
436
437/*
438 * Quick test for whether an instruction is a load or a store.
439 * If the instruction is a load or a store, then this will indicate
440 * which it is, at least on server processors. (Embedded processors
441 * have some external PID instructions that don't follow the rule
442 * embodied here.) If the instruction isn't a load or store, then
443 * this doesn't return anything useful.
444 */
445static int instruction_is_store(unsigned int instr)
446{
447 unsigned int mask;
448
449 mask = 0x10000000;
450 if ((instr & 0xfc000000) == 0x7c000000)
451 mask = 0x100; /* major opcode 31 */
452 return (instr & mask) != 0;
453}
454
455static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
456 unsigned long gpa, int is_store)
457{
458 int ret;
459 u32 last_inst;
460 unsigned long srr0 = kvmppc_get_pc(vcpu);
461
462 /* We try to load the last instruction. We don't let
463 * emulate_instruction do it as it doesn't check what
464 * kvmppc_ld returns.
465 * If we fail, we just return to the guest and try executing it again.
466 */
467 if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
468 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
469 if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
470 return RESUME_GUEST;
471 vcpu->arch.last_inst = last_inst;
472 }
473
474 /*
475 * WARNING: We do not know for sure whether the instruction we just
476 * read from memory is the same that caused the fault in the first
477 * place. If the instruction we read is neither an load or a store,
478 * then it can't access memory, so we don't need to worry about
479 * enforcing access permissions. So, assuming it is a load or
480 * store, we just check that its direction (load or store) is
481 * consistent with the original fault, since that's what we
482 * checked the access permissions against. If there is a mismatch
483 * we just return and retry the instruction.
484 */
485
486 if (instruction_is_store(vcpu->arch.last_inst) != !!is_store)
487 return RESUME_GUEST;
488
489 /*
490 * Emulated accesses are emulated by looking at the hash for
491 * translation once, then performing the access later. The
492 * translation could be invalidated in the meantime in which
493 * point performing the subsequent memory access on the old
494 * physical address could possibly be a security hole for the
495 * guest (but not the host).
496 *
497 * This is less of an issue for MMIO stores since they aren't
498 * globally visible. It could be an issue for MMIO loads to
499 * a certain extent but we'll ignore it for now.
500 */
501
502 vcpu->arch.paddr_accessed = gpa;
503 return kvmppc_emulate_mmio(run, vcpu);
504}
505
506int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
507 unsigned long ea, unsigned long dsisr)
508{
509 struct kvm *kvm = vcpu->kvm;
510 unsigned long *hptep, hpte[3], r;
511 unsigned long mmu_seq, psize, pte_size;
512 unsigned long gfn, hva, pfn;
513 struct kvm_memory_slot *memslot;
514 unsigned long *rmap;
515 struct revmap_entry *rev;
516 struct page *page, *pages[1];
517 long index, ret, npages;
518 unsigned long is_io;
519 unsigned int writing, write_ok;
520 struct vm_area_struct *vma;
521 unsigned long rcbits;
522
523 /*
524 * Real-mode code has already searched the HPT and found the
525 * entry we're interested in. Lock the entry and check that
526 * it hasn't changed. If it has, just return and re-execute the
527 * instruction.
528 */
529 if (ea != vcpu->arch.pgfault_addr)
530 return RESUME_GUEST;
531 index = vcpu->arch.pgfault_index;
532 hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
533 rev = &kvm->arch.revmap[index];
534 preempt_disable();
535 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
536 cpu_relax();
537 hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
538 hpte[1] = hptep[1];
539 hpte[2] = r = rev->guest_rpte;
540 asm volatile("lwsync" : : : "memory");
541 hptep[0] = hpte[0];
542 preempt_enable();
543
544 if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
545 hpte[1] != vcpu->arch.pgfault_hpte[1])
546 return RESUME_GUEST;
547
548 /* Translate the logical address and get the page */
549 psize = hpte_page_size(hpte[0], r);
550 gfn = hpte_rpn(r, psize);
551 memslot = gfn_to_memslot(kvm, gfn);
552
553 /* No memslot means it's an emulated MMIO region */
554 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
555 unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
556 return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
557 dsisr & DSISR_ISSTORE);
558 }
559
560 if (!kvm->arch.using_mmu_notifiers)
561 return -EFAULT; /* should never get here */
562
563 /* used to check for invalidations in progress */
564 mmu_seq = kvm->mmu_notifier_seq;
565 smp_rmb();
566
567 is_io = 0;
568 pfn = 0;
569 page = NULL;
570 pte_size = PAGE_SIZE;
571 writing = (dsisr & DSISR_ISSTORE) != 0;
572 /* If writing != 0, then the HPTE must allow writing, if we get here */
573 write_ok = writing;
574 hva = gfn_to_hva_memslot(memslot, gfn);
575 npages = get_user_pages_fast(hva, 1, writing, pages);
576 if (npages < 1) {
577 /* Check if it's an I/O mapping */
578 down_read(&current->mm->mmap_sem);
579 vma = find_vma(current->mm, hva);
580 if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
581 (vma->vm_flags & VM_PFNMAP)) {
582 pfn = vma->vm_pgoff +
583 ((hva - vma->vm_start) >> PAGE_SHIFT);
584 pte_size = psize;
585 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
586 write_ok = vma->vm_flags & VM_WRITE;
587 }
588 up_read(&current->mm->mmap_sem);
589 if (!pfn)
590 return -EFAULT;
591 } else {
592 page = pages[0];
593 if (PageHuge(page)) {
594 page = compound_head(page);
595 pte_size <<= compound_order(page);
596 }
597 /* if the guest wants write access, see if that is OK */
598 if (!writing && hpte_is_writable(r)) {
599 pte_t *ptep, pte;
600
601 /*
602 * We need to protect against page table destruction
603 * while looking up and updating the pte.
604 */
605 rcu_read_lock_sched();
606 ptep = find_linux_pte_or_hugepte(current->mm->pgd,
607 hva, NULL);
608 if (ptep && pte_present(*ptep)) {
609 pte = kvmppc_read_update_linux_pte(ptep, 1);
610 if (pte_write(pte))
611 write_ok = 1;
612 }
613 rcu_read_unlock_sched();
614 }
615 pfn = page_to_pfn(page);
616 }
617
618 ret = -EFAULT;
619 if (psize > pte_size)
620 goto out_put;
621
622 /* Check WIMG vs. the actual page we're accessing */
623 if (!hpte_cache_flags_ok(r, is_io)) {
624 if (is_io)
625 return -EFAULT;
626 /*
627 * Allow guest to map emulated device memory as
628 * uncacheable, but actually make it cacheable.
629 */
630 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
631 }
632
633 /* Set the HPTE to point to pfn */
634 r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
635 if (hpte_is_writable(r) && !write_ok)
636 r = hpte_make_readonly(r);
637 ret = RESUME_GUEST;
638 preempt_disable();
639 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
640 cpu_relax();
641 if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
642 rev->guest_rpte != hpte[2])
643 /* HPTE has been changed under us; let the guest retry */
644 goto out_unlock;
645 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
646
647 rmap = &memslot->rmap[gfn - memslot->base_gfn];
648 lock_rmap(rmap);
649
650 /* Check if we might have been invalidated; let the guest retry if so */
651 ret = RESUME_GUEST;
652 if (mmu_notifier_retry(vcpu, mmu_seq)) {
653 unlock_rmap(rmap);
654 goto out_unlock;
655 }
656
657 /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
658 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
659 r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
660
661 if (hptep[0] & HPTE_V_VALID) {
662 /* HPTE was previously valid, so we need to invalidate it */
663 unlock_rmap(rmap);
664 hptep[0] |= HPTE_V_ABSENT;
665 kvmppc_invalidate_hpte(kvm, hptep, index);
666 /* don't lose previous R and C bits */
667 r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
668 } else {
669 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
670 }
671
672 hptep[1] = r;
673 eieio();
674 hptep[0] = hpte[0];
675 asm volatile("ptesync" : : : "memory");
676 preempt_enable();
677 if (page && hpte_is_writable(r))
678 SetPageDirty(page);
679
680 out_put:
681 if (page)
682 put_page(page);
683 return ret;
684
685 out_unlock:
686 hptep[0] &= ~HPTE_V_HVLOCK;
687 preempt_enable();
688 goto out_put;
689}
690
691static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
692 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
693 unsigned long gfn))
694{
695 int ret;
696 int retval = 0;
697 struct kvm_memslots *slots;
698 struct kvm_memory_slot *memslot;
699
700 slots = kvm_memslots(kvm);
701 kvm_for_each_memslot(memslot, slots) {
702 unsigned long start = memslot->userspace_addr;
703 unsigned long end;
704
705 end = start + (memslot->npages << PAGE_SHIFT);
706 if (hva >= start && hva < end) {
707 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
708
709 ret = handler(kvm, &memslot->rmap[gfn_offset],
710 memslot->base_gfn + gfn_offset);
711 retval |= ret;
712 }
713 }
714
715 return retval;
716}
717
718static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
719 unsigned long gfn)
720{
721 struct revmap_entry *rev = kvm->arch.revmap;
722 unsigned long h, i, j;
723 unsigned long *hptep;
724 unsigned long ptel, psize, rcbits;
725
726 for (;;) {
727 lock_rmap(rmapp);
728 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
729 unlock_rmap(rmapp);
730 break;
731 }
732
733 /*
734 * To avoid an ABBA deadlock with the HPTE lock bit,
735 * we can't spin on the HPTE lock while holding the
736 * rmap chain lock.
737 */
738 i = *rmapp & KVMPPC_RMAP_INDEX;
739 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
740 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
741 /* unlock rmap before spinning on the HPTE lock */
742 unlock_rmap(rmapp);
743 while (hptep[0] & HPTE_V_HVLOCK)
744 cpu_relax();
745 continue;
746 }
747 j = rev[i].forw;
748 if (j == i) {
749 /* chain is now empty */
750 *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
751 } else {
752 /* remove i from chain */
753 h = rev[i].back;
754 rev[h].forw = j;
755 rev[j].back = h;
756 rev[i].forw = rev[i].back = i;
757 *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
758 }
759
760 /* Now check and modify the HPTE */
761 ptel = rev[i].guest_rpte;
762 psize = hpte_page_size(hptep[0], ptel);
763 if ((hptep[0] & HPTE_V_VALID) &&
764 hpte_rpn(ptel, psize) == gfn) {
765 hptep[0] |= HPTE_V_ABSENT;
766 kvmppc_invalidate_hpte(kvm, hptep, i);
767 /* Harvest R and C */
768 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
769 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
770 rev[i].guest_rpte = ptel | rcbits;
771 }
772 unlock_rmap(rmapp);
773 hptep[0] &= ~HPTE_V_HVLOCK;
774 }
775 return 0;
776}
777
778int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
779{
780 if (kvm->arch.using_mmu_notifiers)
781 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
782 return 0;
783}
784
785static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
786 unsigned long gfn)
787{
788 struct revmap_entry *rev = kvm->arch.revmap;
789 unsigned long head, i, j;
790 unsigned long *hptep;
791 int ret = 0;
792
793 retry:
794 lock_rmap(rmapp);
795 if (*rmapp & KVMPPC_RMAP_REFERENCED) {
796 *rmapp &= ~KVMPPC_RMAP_REFERENCED;
797 ret = 1;
798 }
799 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
800 unlock_rmap(rmapp);
801 return ret;
802 }
803
804 i = head = *rmapp & KVMPPC_RMAP_INDEX;
805 do {
806 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
807 j = rev[i].forw;
808
809 /* If this HPTE isn't referenced, ignore it */
810 if (!(hptep[1] & HPTE_R_R))
811 continue;
812
813 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
814 /* unlock rmap before spinning on the HPTE lock */
815 unlock_rmap(rmapp);
816 while (hptep[0] & HPTE_V_HVLOCK)
817 cpu_relax();
818 goto retry;
819 }
820
821 /* Now check and modify the HPTE */
822 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
823 kvmppc_clear_ref_hpte(kvm, hptep, i);
824 rev[i].guest_rpte |= HPTE_R_R;
825 ret = 1;
826 }
827 hptep[0] &= ~HPTE_V_HVLOCK;
828 } while ((i = j) != head);
829
830 unlock_rmap(rmapp);
831 return ret;
832}
833
834int kvm_age_hva(struct kvm *kvm, unsigned long hva)
835{
836 if (!kvm->arch.using_mmu_notifiers)
837 return 0;
838 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
839}
840
841static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
842 unsigned long gfn)
843{
844 struct revmap_entry *rev = kvm->arch.revmap;
845 unsigned long head, i, j;
846 unsigned long *hp;
847 int ret = 1;
848
849 if (*rmapp & KVMPPC_RMAP_REFERENCED)
850 return 1;
851
852 lock_rmap(rmapp);
853 if (*rmapp & KVMPPC_RMAP_REFERENCED)
854 goto out;
855
856 if (*rmapp & KVMPPC_RMAP_PRESENT) {
857 i = head = *rmapp & KVMPPC_RMAP_INDEX;
858 do {
859 hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
860 j = rev[i].forw;
861 if (hp[1] & HPTE_R_R)
862 goto out;
863 } while ((i = j) != head);
864 }
865 ret = 0;
866
867 out:
868 unlock_rmap(rmapp);
869 return ret;
870}
871
872int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
873{
874 if (!kvm->arch.using_mmu_notifiers)
875 return 0;
876 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
877}
878
879void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
163{ 880{
164 return -ENOENT; 881 if (!kvm->arch.using_mmu_notifiers)
882 return;
883 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
884}
885
886static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
887{
888 struct revmap_entry *rev = kvm->arch.revmap;
889 unsigned long head, i, j;
890 unsigned long *hptep;
891 int ret = 0;
892
893 retry:
894 lock_rmap(rmapp);
895 if (*rmapp & KVMPPC_RMAP_CHANGED) {
896 *rmapp &= ~KVMPPC_RMAP_CHANGED;
897 ret = 1;
898 }
899 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
900 unlock_rmap(rmapp);
901 return ret;
902 }
903
904 i = head = *rmapp & KVMPPC_RMAP_INDEX;
905 do {
906 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
907 j = rev[i].forw;
908
909 if (!(hptep[1] & HPTE_R_C))
910 continue;
911
912 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
913 /* unlock rmap before spinning on the HPTE lock */
914 unlock_rmap(rmapp);
915 while (hptep[0] & HPTE_V_HVLOCK)
916 cpu_relax();
917 goto retry;
918 }
919
920 /* Now check and modify the HPTE */
921 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
922 /* need to make it temporarily absent to clear C */
923 hptep[0] |= HPTE_V_ABSENT;
924 kvmppc_invalidate_hpte(kvm, hptep, i);
925 hptep[1] &= ~HPTE_R_C;
926 eieio();
927 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
928 rev[i].guest_rpte |= HPTE_R_C;
929 ret = 1;
930 }
931 hptep[0] &= ~HPTE_V_HVLOCK;
932 } while ((i = j) != head);
933
934 unlock_rmap(rmapp);
935 return ret;
936}
937
938long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
939{
940 unsigned long i;
941 unsigned long *rmapp, *map;
942
943 preempt_disable();
944 rmapp = memslot->rmap;
945 map = memslot->dirty_bitmap;
946 for (i = 0; i < memslot->npages; ++i) {
947 if (kvm_test_clear_dirty(kvm, rmapp))
948 __set_bit_le(i, map);
949 ++rmapp;
950 }
951 preempt_enable();
952 return 0;
953}
954
955void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
956 unsigned long *nb_ret)
957{
958 struct kvm_memory_slot *memslot;
959 unsigned long gfn = gpa >> PAGE_SHIFT;
960 struct page *page, *pages[1];
961 int npages;
962 unsigned long hva, psize, offset;
963 unsigned long pa;
964 unsigned long *physp;
965
966 memslot = gfn_to_memslot(kvm, gfn);
967 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
968 return NULL;
969 if (!kvm->arch.using_mmu_notifiers) {
970 physp = kvm->arch.slot_phys[memslot->id];
971 if (!physp)
972 return NULL;
973 physp += gfn - memslot->base_gfn;
974 pa = *physp;
975 if (!pa) {
976 if (kvmppc_get_guest_page(kvm, gfn, memslot,
977 PAGE_SIZE) < 0)
978 return NULL;
979 pa = *physp;
980 }
981 page = pfn_to_page(pa >> PAGE_SHIFT);
982 } else {
983 hva = gfn_to_hva_memslot(memslot, gfn);
984 npages = get_user_pages_fast(hva, 1, 1, pages);
985 if (npages < 1)
986 return NULL;
987 page = pages[0];
988 }
989 psize = PAGE_SIZE;
990 if (PageHuge(page)) {
991 page = compound_head(page);
992 psize <<= compound_order(page);
993 }
994 if (!kvm->arch.using_mmu_notifiers)
995 get_page(page);
996 offset = gpa & (psize - 1);
997 if (nb_ret)
998 *nb_ret = psize - offset;
999 return page_address(page) + offset;
1000}
1001
1002void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
1003{
1004 struct page *page = virt_to_page(va);
1005
1006 page = compound_head(page);
1007 put_page(page);
165} 1008}
166 1009
167void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 1010void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 0c9dc62532d0..f1950d131827 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -230,9 +230,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
230 230
231 r = kvmppc_st(vcpu, &addr, 32, zeros, true); 231 r = kvmppc_st(vcpu, &addr, 32, zeros, true);
232 if ((r == -ENOENT) || (r == -EPERM)) { 232 if ((r == -ENOENT) || (r == -EPERM)) {
233 struct kvmppc_book3s_shadow_vcpu *svcpu;
234
235 svcpu = svcpu_get(vcpu);
233 *advance = 0; 236 *advance = 0;
234 vcpu->arch.shared->dar = vaddr; 237 vcpu->arch.shared->dar = vaddr;
235 to_svcpu(vcpu)->fault_dar = vaddr; 238 svcpu->fault_dar = vaddr;
236 239
237 dsisr = DSISR_ISSTORE; 240 dsisr = DSISR_ISSTORE;
238 if (r == -ENOENT) 241 if (r == -ENOENT)
@@ -241,7 +244,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
241 dsisr |= DSISR_PROTFAULT; 244 dsisr |= DSISR_PROTFAULT;
242 245
243 vcpu->arch.shared->dsisr = dsisr; 246 vcpu->arch.shared->dsisr = dsisr;
244 to_svcpu(vcpu)->fault_dsisr = dsisr; 247 svcpu->fault_dsisr = dsisr;
248 svcpu_put(svcpu);
245 249
246 kvmppc_book3s_queue_irqprio(vcpu, 250 kvmppc_book3s_queue_irqprio(vcpu,
247 BOOK3S_INTERRUPT_DATA_STORAGE); 251 BOOK3S_INTERRUPT_DATA_STORAGE);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a7267167a550..d386b6198bc7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -48,22 +48,14 @@
48#include <linux/gfp.h> 48#include <linux/gfp.h>
49#include <linux/vmalloc.h> 49#include <linux/vmalloc.h>
50#include <linux/highmem.h> 50#include <linux/highmem.h>
51 51#include <linux/hugetlb.h>
52/*
53 * For now, limit memory to 64GB and require it to be large pages.
54 * This value is chosen because it makes the ram_pginfo array be
55 * 64kB in size, which is about as large as we want to be trying
56 * to allocate with kmalloc.
57 */
58#define MAX_MEM_ORDER 36
59
60#define LARGE_PAGE_ORDER 24 /* 16MB pages */
61 52
62/* #define EXIT_DEBUG */ 53/* #define EXIT_DEBUG */
63/* #define EXIT_DEBUG_SIMPLE */ 54/* #define EXIT_DEBUG_SIMPLE */
64/* #define EXIT_DEBUG_INT */ 55/* #define EXIT_DEBUG_INT */
65 56
66static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 57static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
58static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
67 59
68void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 60void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
69{ 61{
@@ -146,10 +138,10 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
146 unsigned long vcpuid, unsigned long vpa) 138 unsigned long vcpuid, unsigned long vpa)
147{ 139{
148 struct kvm *kvm = vcpu->kvm; 140 struct kvm *kvm = vcpu->kvm;
149 unsigned long pg_index, ra, len; 141 unsigned long len, nb;
150 unsigned long pg_offset;
151 void *va; 142 void *va;
152 struct kvm_vcpu *tvcpu; 143 struct kvm_vcpu *tvcpu;
144 int err = H_PARAMETER;
153 145
154 tvcpu = kvmppc_find_vcpu(kvm, vcpuid); 146 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
155 if (!tvcpu) 147 if (!tvcpu)
@@ -162,45 +154,41 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
162 if (flags < 4) { 154 if (flags < 4) {
163 if (vpa & 0x7f) 155 if (vpa & 0x7f)
164 return H_PARAMETER; 156 return H_PARAMETER;
157 if (flags >= 2 && !tvcpu->arch.vpa)
158 return H_RESOURCE;
165 /* registering new area; convert logical addr to real */ 159 /* registering new area; convert logical addr to real */
166 pg_index = vpa >> kvm->arch.ram_porder; 160 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
167 pg_offset = vpa & (kvm->arch.ram_psize - 1); 161 if (va == NULL)
168 if (pg_index >= kvm->arch.ram_npages)
169 return H_PARAMETER; 162 return H_PARAMETER;
170 if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
171 return H_PARAMETER;
172 ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
173 ra |= pg_offset;
174 va = __va(ra);
175 if (flags <= 1) 163 if (flags <= 1)
176 len = *(unsigned short *)(va + 4); 164 len = *(unsigned short *)(va + 4);
177 else 165 else
178 len = *(unsigned int *)(va + 4); 166 len = *(unsigned int *)(va + 4);
179 if (pg_offset + len > kvm->arch.ram_psize) 167 if (len > nb)
180 return H_PARAMETER; 168 goto out_unpin;
181 switch (flags) { 169 switch (flags) {
182 case 1: /* register VPA */ 170 case 1: /* register VPA */
183 if (len < 640) 171 if (len < 640)
184 return H_PARAMETER; 172 goto out_unpin;
173 if (tvcpu->arch.vpa)
174 kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
185 tvcpu->arch.vpa = va; 175 tvcpu->arch.vpa = va;
186 init_vpa(vcpu, va); 176 init_vpa(vcpu, va);
187 break; 177 break;
188 case 2: /* register DTL */ 178 case 2: /* register DTL */
189 if (len < 48) 179 if (len < 48)
190 return H_PARAMETER; 180 goto out_unpin;
191 if (!tvcpu->arch.vpa)
192 return H_RESOURCE;
193 len -= len % 48; 181 len -= len % 48;
182 if (tvcpu->arch.dtl)
183 kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
194 tvcpu->arch.dtl = va; 184 tvcpu->arch.dtl = va;
195 tvcpu->arch.dtl_end = va + len; 185 tvcpu->arch.dtl_end = va + len;
196 break; 186 break;
197 case 3: /* register SLB shadow buffer */ 187 case 3: /* register SLB shadow buffer */
198 if (len < 8) 188 if (len < 16)
199 return H_PARAMETER; 189 goto out_unpin;
200 if (!tvcpu->arch.vpa) 190 if (tvcpu->arch.slb_shadow)
201 return H_RESOURCE; 191 kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
202 tvcpu->arch.slb_shadow = va;
203 len = (len - 16) / 16;
204 tvcpu->arch.slb_shadow = va; 192 tvcpu->arch.slb_shadow = va;
205 break; 193 break;
206 } 194 }
@@ -209,17 +197,30 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
209 case 5: /* unregister VPA */ 197 case 5: /* unregister VPA */
210 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) 198 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
211 return H_RESOURCE; 199 return H_RESOURCE;
200 if (!tvcpu->arch.vpa)
201 break;
202 kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
212 tvcpu->arch.vpa = NULL; 203 tvcpu->arch.vpa = NULL;
213 break; 204 break;
214 case 6: /* unregister DTL */ 205 case 6: /* unregister DTL */
206 if (!tvcpu->arch.dtl)
207 break;
208 kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
215 tvcpu->arch.dtl = NULL; 209 tvcpu->arch.dtl = NULL;
216 break; 210 break;
217 case 7: /* unregister SLB shadow buffer */ 211 case 7: /* unregister SLB shadow buffer */
212 if (!tvcpu->arch.slb_shadow)
213 break;
214 kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
218 tvcpu->arch.slb_shadow = NULL; 215 tvcpu->arch.slb_shadow = NULL;
219 break; 216 break;
220 } 217 }
221 } 218 }
222 return H_SUCCESS; 219 return H_SUCCESS;
220
221 out_unpin:
222 kvmppc_unpin_guest_page(kvm, va);
223 return err;
223} 224}
224 225
225int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 226int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -229,6 +230,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
229 struct kvm_vcpu *tvcpu; 230 struct kvm_vcpu *tvcpu;
230 231
231 switch (req) { 232 switch (req) {
233 case H_ENTER:
234 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
235 kvmppc_get_gpr(vcpu, 5),
236 kvmppc_get_gpr(vcpu, 6),
237 kvmppc_get_gpr(vcpu, 7));
238 break;
232 case H_CEDE: 239 case H_CEDE:
233 break; 240 break;
234 case H_PROD: 241 case H_PROD:
@@ -318,20 +325,19 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
318 break; 325 break;
319 } 326 }
320 /* 327 /*
321 * We get these next two if the guest does a bad real-mode access, 328 * We get these next two if the guest accesses a page which it thinks
322 * as we have enabled VRMA (virtualized real mode area) mode in the 329 * it has mapped but which is not actually present, either because
323 * LPCR. We just generate an appropriate DSI/ISI to the guest. 330 * it is for an emulated I/O device or because the corresonding
331 * host page has been paged out. Any other HDSI/HISI interrupts
332 * have been handled already.
324 */ 333 */
325 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 334 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
326 vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr; 335 r = kvmppc_book3s_hv_page_fault(run, vcpu,
327 vcpu->arch.shregs.dar = vcpu->arch.fault_dar; 336 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
328 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
329 r = RESUME_GUEST;
330 break; 337 break;
331 case BOOK3S_INTERRUPT_H_INST_STORAGE: 338 case BOOK3S_INTERRUPT_H_INST_STORAGE:
332 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, 339 r = kvmppc_book3s_hv_page_fault(run, vcpu,
333 0x08000000); 340 kvmppc_get_pc(vcpu), 0);
334 r = RESUME_GUEST;
335 break; 341 break;
336 /* 342 /*
337 * This occurs if the guest executes an illegal instruction. 343 * This occurs if the guest executes an illegal instruction.
@@ -391,6 +397,42 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
391 return 0; 397 return 0;
392} 398}
393 399
400int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
401{
402 int r = -EINVAL;
403
404 switch (reg->id) {
405 case KVM_REG_PPC_HIOR:
406 r = put_user(0, (u64 __user *)reg->addr);
407 break;
408 default:
409 break;
410 }
411
412 return r;
413}
414
415int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
416{
417 int r = -EINVAL;
418
419 switch (reg->id) {
420 case KVM_REG_PPC_HIOR:
421 {
422 u64 hior;
423 /* Only allow this to be set to zero */
424 r = get_user(hior, (u64 __user *)reg->addr);
425 if (!r && (hior != 0))
426 r = -EINVAL;
427 break;
428 }
429 default:
430 break;
431 }
432
433 return r;
434}
435
394int kvmppc_core_check_processor_compat(void) 436int kvmppc_core_check_processor_compat(void)
395{ 437{
396 if (cpu_has_feature(CPU_FTR_HVMODE)) 438 if (cpu_has_feature(CPU_FTR_HVMODE))
@@ -410,7 +452,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
410 goto out; 452 goto out;
411 453
412 err = -ENOMEM; 454 err = -ENOMEM;
413 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); 455 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
414 if (!vcpu) 456 if (!vcpu)
415 goto out; 457 goto out;
416 458
@@ -462,15 +504,21 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
462 return vcpu; 504 return vcpu;
463 505
464free_vcpu: 506free_vcpu:
465 kfree(vcpu); 507 kmem_cache_free(kvm_vcpu_cache, vcpu);
466out: 508out:
467 return ERR_PTR(err); 509 return ERR_PTR(err);
468} 510}
469 511
470void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 512void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
471{ 513{
514 if (vcpu->arch.dtl)
515 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
516 if (vcpu->arch.slb_shadow)
517 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
518 if (vcpu->arch.vpa)
519 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
472 kvm_vcpu_uninit(vcpu); 520 kvm_vcpu_uninit(vcpu);
473 kfree(vcpu); 521 kmem_cache_free(kvm_vcpu_cache, vcpu);
474} 522}
475 523
476static void kvmppc_set_timer(struct kvm_vcpu *vcpu) 524static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
@@ -481,7 +529,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
481 if (now > vcpu->arch.dec_expires) { 529 if (now > vcpu->arch.dec_expires) {
482 /* decrementer has already gone negative */ 530 /* decrementer has already gone negative */
483 kvmppc_core_queue_dec(vcpu); 531 kvmppc_core_queue_dec(vcpu);
484 kvmppc_core_deliver_interrupts(vcpu); 532 kvmppc_core_prepare_to_enter(vcpu);
485 return; 533 return;
486 } 534 }
487 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC 535 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
@@ -796,7 +844,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
796 844
797 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 845 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
798 arch.run_list) { 846 arch.run_list) {
799 kvmppc_core_deliver_interrupts(v); 847 kvmppc_core_prepare_to_enter(v);
800 if (signal_pending(v->arch.run_task)) { 848 if (signal_pending(v->arch.run_task)) {
801 kvmppc_remove_runnable(vc, v); 849 kvmppc_remove_runnable(vc, v);
802 v->stat.signal_exits++; 850 v->stat.signal_exits++;
@@ -835,20 +883,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
835 return -EINVAL; 883 return -EINVAL;
836 } 884 }
837 885
886 kvmppc_core_prepare_to_enter(vcpu);
887
838 /* No need to go into the guest when all we'll do is come back out */ 888 /* No need to go into the guest when all we'll do is come back out */
839 if (signal_pending(current)) { 889 if (signal_pending(current)) {
840 run->exit_reason = KVM_EXIT_INTR; 890 run->exit_reason = KVM_EXIT_INTR;
841 return -EINTR; 891 return -EINTR;
842 } 892 }
843 893
844 /* On PPC970, check that we have an RMA region */ 894 /* On the first time here, set up VRMA or RMA */
845 if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) 895 if (!vcpu->kvm->arch.rma_setup_done) {
846 return -EPERM; 896 r = kvmppc_hv_setup_rma(vcpu);
897 if (r)
898 return r;
899 }
847 900
848 flush_fp_to_thread(current); 901 flush_fp_to_thread(current);
849 flush_altivec_to_thread(current); 902 flush_altivec_to_thread(current);
850 flush_vsx_to_thread(current); 903 flush_vsx_to_thread(current);
851 vcpu->arch.wqp = &vcpu->arch.vcore->wq; 904 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
905 vcpu->arch.pgdir = current->mm->pgd;
852 906
853 do { 907 do {
854 r = kvmppc_run_vcpu(run, vcpu); 908 r = kvmppc_run_vcpu(run, vcpu);
@@ -856,7 +910,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
856 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 910 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
857 !(vcpu->arch.shregs.msr & MSR_PR)) { 911 !(vcpu->arch.shregs.msr & MSR_PR)) {
858 r = kvmppc_pseries_do_hcall(vcpu); 912 r = kvmppc_pseries_do_hcall(vcpu);
859 kvmppc_core_deliver_interrupts(vcpu); 913 kvmppc_core_prepare_to_enter(vcpu);
860 } 914 }
861 } while (r == RESUME_GUEST); 915 } while (r == RESUME_GUEST);
862 return r; 916 return r;
@@ -1000,7 +1054,7 @@ static inline int lpcr_rmls(unsigned long rma_size)
1000 1054
1001static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1055static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1002{ 1056{
1003 struct kvmppc_rma_info *ri = vma->vm_file->private_data; 1057 struct kvmppc_linear_info *ri = vma->vm_file->private_data;
1004 struct page *page; 1058 struct page *page;
1005 1059
1006 if (vmf->pgoff >= ri->npages) 1060 if (vmf->pgoff >= ri->npages)
@@ -1025,7 +1079,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1025 1079
1026static int kvm_rma_release(struct inode *inode, struct file *filp) 1080static int kvm_rma_release(struct inode *inode, struct file *filp)
1027{ 1081{
1028 struct kvmppc_rma_info *ri = filp->private_data; 1082 struct kvmppc_linear_info *ri = filp->private_data;
1029 1083
1030 kvm_release_rma(ri); 1084 kvm_release_rma(ri);
1031 return 0; 1085 return 0;
@@ -1038,7 +1092,7 @@ static struct file_operations kvm_rma_fops = {
1038 1092
1039long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1093long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1040{ 1094{
1041 struct kvmppc_rma_info *ri; 1095 struct kvmppc_linear_info *ri;
1042 long fd; 1096 long fd;
1043 1097
1044 ri = kvm_alloc_rma(); 1098 ri = kvm_alloc_rma();
@@ -1053,89 +1107,189 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1053 return fd; 1107 return fd;
1054} 1108}
1055 1109
1056static struct page *hva_to_page(unsigned long addr) 1110/*
1111 * Get (and clear) the dirty memory log for a memory slot.
1112 */
1113int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1057{ 1114{
1058 struct page *page[1]; 1115 struct kvm_memory_slot *memslot;
1059 int npages; 1116 int r;
1117 unsigned long n;
1060 1118
1061 might_sleep(); 1119 mutex_lock(&kvm->slots_lock);
1062 1120
1063 npages = get_user_pages_fast(addr, 1, 1, page); 1121 r = -EINVAL;
1122 if (log->slot >= KVM_MEMORY_SLOTS)
1123 goto out;
1064 1124
1065 if (unlikely(npages != 1)) 1125 memslot = id_to_memslot(kvm->memslots, log->slot);
1066 return 0; 1126 r = -ENOENT;
1127 if (!memslot->dirty_bitmap)
1128 goto out;
1129
1130 n = kvm_dirty_bitmap_bytes(memslot);
1131 memset(memslot->dirty_bitmap, 0, n);
1132
1133 r = kvmppc_hv_get_dirty_log(kvm, memslot);
1134 if (r)
1135 goto out;
1067 1136
1068 return page[0]; 1137 r = -EFAULT;
1138 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
1139 goto out;
1140
1141 r = 0;
1142out:
1143 mutex_unlock(&kvm->slots_lock);
1144 return r;
1145}
1146
1147static unsigned long slb_pgsize_encoding(unsigned long psize)
1148{
1149 unsigned long senc = 0;
1150
1151 if (psize > 0x1000) {
1152 senc = SLB_VSID_L;
1153 if (psize == 0x10000)
1154 senc |= SLB_VSID_LP_01;
1155 }
1156 return senc;
1069} 1157}
1070 1158
1071int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1159int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1072 struct kvm_userspace_memory_region *mem) 1160 struct kvm_userspace_memory_region *mem)
1073{ 1161{
1074 unsigned long psize, porder; 1162 unsigned long npages;
1075 unsigned long i, npages, totalpages; 1163 unsigned long *phys;
1076 unsigned long pg_ix; 1164
1077 struct kvmppc_pginfo *pginfo; 1165 /* Allocate a slot_phys array */
1078 unsigned long hva; 1166 phys = kvm->arch.slot_phys[mem->slot];
1079 struct kvmppc_rma_info *ri = NULL; 1167 if (!kvm->arch.using_mmu_notifiers && !phys) {
1168 npages = mem->memory_size >> PAGE_SHIFT;
1169 phys = vzalloc(npages * sizeof(unsigned long));
1170 if (!phys)
1171 return -ENOMEM;
1172 kvm->arch.slot_phys[mem->slot] = phys;
1173 kvm->arch.slot_npages[mem->slot] = npages;
1174 }
1175
1176 return 0;
1177}
1178
1179static void unpin_slot(struct kvm *kvm, int slot_id)
1180{
1181 unsigned long *physp;
1182 unsigned long j, npages, pfn;
1080 struct page *page; 1183 struct page *page;
1081 1184
1082 /* For now, only allow 16MB pages */ 1185 physp = kvm->arch.slot_phys[slot_id];
1083 porder = LARGE_PAGE_ORDER; 1186 npages = kvm->arch.slot_npages[slot_id];
1084 psize = 1ul << porder; 1187 if (physp) {
1085 if ((mem->memory_size & (psize - 1)) || 1188 spin_lock(&kvm->arch.slot_phys_lock);
1086 (mem->guest_phys_addr & (psize - 1))) { 1189 for (j = 0; j < npages; j++) {
1087 pr_err("bad memory_size=%llx @ %llx\n", 1190 if (!(physp[j] & KVMPPC_GOT_PAGE))
1088 mem->memory_size, mem->guest_phys_addr); 1191 continue;
1089 return -EINVAL; 1192 pfn = physp[j] >> PAGE_SHIFT;
1193 page = pfn_to_page(pfn);
1194 if (PageHuge(page))
1195 page = compound_head(page);
1196 SetPageDirty(page);
1197 put_page(page);
1198 }
1199 kvm->arch.slot_phys[slot_id] = NULL;
1200 spin_unlock(&kvm->arch.slot_phys_lock);
1201 vfree(physp);
1090 } 1202 }
1203}
1091 1204
1092 npages = mem->memory_size >> porder; 1205void kvmppc_core_commit_memory_region(struct kvm *kvm,
1093 totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder; 1206 struct kvm_userspace_memory_region *mem)
1207{
1208}
1094 1209
1095 /* More memory than we have space to track? */ 1210static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
1096 if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER))) 1211{
1097 return -EINVAL; 1212 int err = 0;
1213 struct kvm *kvm = vcpu->kvm;
1214 struct kvmppc_linear_info *ri = NULL;
1215 unsigned long hva;
1216 struct kvm_memory_slot *memslot;
1217 struct vm_area_struct *vma;
1218 unsigned long lpcr, senc;
1219 unsigned long psize, porder;
1220 unsigned long rma_size;
1221 unsigned long rmls;
1222 unsigned long *physp;
1223 unsigned long i, npages;
1098 1224
1099 /* Do we already have an RMA registered? */ 1225 mutex_lock(&kvm->lock);
1100 if (mem->guest_phys_addr == 0 && kvm->arch.rma) 1226 if (kvm->arch.rma_setup_done)
1101 return -EINVAL; 1227 goto out; /* another vcpu beat us to it */
1102 1228
1103 if (totalpages > kvm->arch.ram_npages) 1229 /* Look up the memslot for guest physical address 0 */
1104 kvm->arch.ram_npages = totalpages; 1230 memslot = gfn_to_memslot(kvm, 0);
1231
1232 /* We must have some memory at 0 by now */
1233 err = -EINVAL;
1234 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1235 goto out;
1236
1237 /* Look up the VMA for the start of this memory slot */
1238 hva = memslot->userspace_addr;
1239 down_read(&current->mm->mmap_sem);
1240 vma = find_vma(current->mm, hva);
1241 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
1242 goto up_out;
1243
1244 psize = vma_kernel_pagesize(vma);
1245 porder = __ilog2(psize);
1105 1246
1106 /* Is this one of our preallocated RMAs? */ 1247 /* Is this one of our preallocated RMAs? */
1107 if (mem->guest_phys_addr == 0) { 1248 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
1108 struct vm_area_struct *vma; 1249 hva == vma->vm_start)
1109 1250 ri = vma->vm_file->private_data;
1110 down_read(&current->mm->mmap_sem); 1251
1111 vma = find_vma(current->mm, mem->userspace_addr); 1252 up_read(&current->mm->mmap_sem);
1112 if (vma && vma->vm_file && 1253
1113 vma->vm_file->f_op == &kvm_rma_fops && 1254 if (!ri) {
1114 mem->userspace_addr == vma->vm_start) 1255 /* On POWER7, use VRMA; on PPC970, give up */
1115 ri = vma->vm_file->private_data; 1256 err = -EPERM;
1116 up_read(&current->mm->mmap_sem); 1257 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1117 if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) { 1258 pr_err("KVM: CPU requires an RMO\n");
1118 pr_err("CPU requires an RMO\n"); 1259 goto out;
1119 return -EINVAL;
1120 } 1260 }
1121 }
1122 1261
1123 if (ri) { 1262 /* We can handle 4k, 64k or 16M pages in the VRMA */
1124 unsigned long rma_size; 1263 err = -EINVAL;
1125 unsigned long lpcr; 1264 if (!(psize == 0x1000 || psize == 0x10000 ||
1126 long rmls; 1265 psize == 0x1000000))
1266 goto out;
1267
1268 /* Update VRMASD field in the LPCR */
1269 senc = slb_pgsize_encoding(psize);
1270 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1271 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1272 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1273 lpcr |= senc << (LPCR_VRMASD_SH - 4);
1274 kvm->arch.lpcr = lpcr;
1127 1275
1128 rma_size = ri->npages << PAGE_SHIFT; 1276 /* Create HPTEs in the hash page table for the VRMA */
1129 if (rma_size > mem->memory_size) 1277 kvmppc_map_vrma(vcpu, memslot, porder);
1130 rma_size = mem->memory_size; 1278
1279 } else {
1280 /* Set up to use an RMO region */
1281 rma_size = ri->npages;
1282 if (rma_size > memslot->npages)
1283 rma_size = memslot->npages;
1284 rma_size <<= PAGE_SHIFT;
1131 rmls = lpcr_rmls(rma_size); 1285 rmls = lpcr_rmls(rma_size);
1286 err = -EINVAL;
1132 if (rmls < 0) { 1287 if (rmls < 0) {
1133 pr_err("Can't use RMA of 0x%lx bytes\n", rma_size); 1288 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
1134 return -EINVAL; 1289 goto out;
1135 } 1290 }
1136 atomic_inc(&ri->use_count); 1291 atomic_inc(&ri->use_count);
1137 kvm->arch.rma = ri; 1292 kvm->arch.rma = ri;
1138 kvm->arch.n_rma_pages = rma_size >> porder;
1139 1293
1140 /* Update LPCR and RMOR */ 1294 /* Update LPCR and RMOR */
1141 lpcr = kvm->arch.lpcr; 1295 lpcr = kvm->arch.lpcr;
@@ -1155,53 +1309,35 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1155 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; 1309 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1156 } 1310 }
1157 kvm->arch.lpcr = lpcr; 1311 kvm->arch.lpcr = lpcr;
1158 pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", 1312 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
1159 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1313 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1160 }
1161 1314
1162 pg_ix = mem->guest_phys_addr >> porder; 1315 /* Initialize phys addrs of pages in RMO */
1163 pginfo = kvm->arch.ram_pginfo + pg_ix; 1316 npages = ri->npages;
1164 for (i = 0; i < npages; ++i, ++pg_ix) { 1317 porder = __ilog2(npages);
1165 if (ri && pg_ix < kvm->arch.n_rma_pages) { 1318 physp = kvm->arch.slot_phys[memslot->id];
1166 pginfo[i].pfn = ri->base_pfn + 1319 spin_lock(&kvm->arch.slot_phys_lock);
1167 (pg_ix << (porder - PAGE_SHIFT)); 1320 for (i = 0; i < npages; ++i)
1168 continue; 1321 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
1169 } 1322 spin_unlock(&kvm->arch.slot_phys_lock);
1170 hva = mem->userspace_addr + (i << porder);
1171 page = hva_to_page(hva);
1172 if (!page) {
1173 pr_err("oops, no pfn for hva %lx\n", hva);
1174 goto err;
1175 }
1176 /* Check it's a 16MB page */
1177 if (!PageHead(page) ||
1178 compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
1179 pr_err("page at %lx isn't 16MB (o=%d)\n",
1180 hva, compound_order(page));
1181 goto err;
1182 }
1183 pginfo[i].pfn = page_to_pfn(page);
1184 } 1323 }
1185 1324
1186 return 0; 1325 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
1187 1326 smp_wmb();
1188 err: 1327 kvm->arch.rma_setup_done = 1;
1189 return -EINVAL; 1328 err = 0;
1190} 1329 out:
1330 mutex_unlock(&kvm->lock);
1331 return err;
1191 1332
1192void kvmppc_core_commit_memory_region(struct kvm *kvm, 1333 up_out:
1193 struct kvm_userspace_memory_region *mem) 1334 up_read(&current->mm->mmap_sem);
1194{ 1335 goto out;
1195 if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
1196 !kvm->arch.rma)
1197 kvmppc_map_vrma(kvm, mem);
1198} 1336}
1199 1337
1200int kvmppc_core_init_vm(struct kvm *kvm) 1338int kvmppc_core_init_vm(struct kvm *kvm)
1201{ 1339{
1202 long r; 1340 long r;
1203 unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
1204 long err = -ENOMEM;
1205 unsigned long lpcr; 1341 unsigned long lpcr;
1206 1342
1207 /* Allocate hashed page table */ 1343 /* Allocate hashed page table */
@@ -1211,19 +1347,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1211 1347
1212 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1348 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1213 1349
1214 kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
1215 GFP_KERNEL);
1216 if (!kvm->arch.ram_pginfo) {
1217 pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
1218 npages * sizeof(struct kvmppc_pginfo));
1219 goto out_free;
1220 }
1221
1222 kvm->arch.ram_npages = 0;
1223 kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
1224 kvm->arch.ram_porder = LARGE_PAGE_ORDER;
1225 kvm->arch.rma = NULL; 1350 kvm->arch.rma = NULL;
1226 kvm->arch.n_rma_pages = 0;
1227 1351
1228 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 1352 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
1229 1353
@@ -1241,30 +1365,25 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1241 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 1365 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
1242 lpcr &= LPCR_PECE | LPCR_LPES; 1366 lpcr &= LPCR_PECE | LPCR_LPES;
1243 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | 1367 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
1244 LPCR_VPM0 | LPCR_VRMA_L; 1368 LPCR_VPM0 | LPCR_VPM1;
1369 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
1370 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1245 } 1371 }
1246 kvm->arch.lpcr = lpcr; 1372 kvm->arch.lpcr = lpcr;
1247 1373
1374 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
1375 spin_lock_init(&kvm->arch.slot_phys_lock);
1248 return 0; 1376 return 0;
1249
1250 out_free:
1251 kvmppc_free_hpt(kvm);
1252 return err;
1253} 1377}
1254 1378
1255void kvmppc_core_destroy_vm(struct kvm *kvm) 1379void kvmppc_core_destroy_vm(struct kvm *kvm)
1256{ 1380{
1257 struct kvmppc_pginfo *pginfo;
1258 unsigned long i; 1381 unsigned long i;
1259 1382
1260 if (kvm->arch.ram_pginfo) { 1383 if (!kvm->arch.using_mmu_notifiers)
1261 pginfo = kvm->arch.ram_pginfo; 1384 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
1262 kvm->arch.ram_pginfo = NULL; 1385 unpin_slot(kvm, i);
1263 for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i) 1386
1264 if (pginfo[i].pfn)
1265 put_page(pfn_to_page(pginfo[i].pfn));
1266 kfree(pginfo);
1267 }
1268 if (kvm->arch.rma) { 1387 if (kvm->arch.rma) {
1269 kvm_release_rma(kvm->arch.rma); 1388 kvm_release_rma(kvm->arch.rma);
1270 kvm->arch.rma = NULL; 1389 kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index a795a13f4a70..bed1279aa6a8 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -18,6 +18,15 @@
18#include <asm/kvm_ppc.h> 18#include <asm/kvm_ppc.h>
19#include <asm/kvm_book3s.h> 19#include <asm/kvm_book3s.h>
20 20
21#define KVM_LINEAR_RMA 0
22#define KVM_LINEAR_HPT 1
23
24static void __init kvm_linear_init_one(ulong size, int count, int type);
25static struct kvmppc_linear_info *kvm_alloc_linear(int type);
26static void kvm_release_linear(struct kvmppc_linear_info *ri);
27
28/*************** RMA *************/
29
21/* 30/*
22 * This maintains a list of RMAs (real mode areas) for KVM guests to use. 31 * This maintains a list of RMAs (real mode areas) for KVM guests to use.
23 * Each RMA has to be physically contiguous and of a size that the 32 * Each RMA has to be physically contiguous and of a size that the
@@ -29,32 +38,6 @@
29static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ 38static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
30static unsigned long kvm_rma_count; 39static unsigned long kvm_rma_count;
31 40
32static int __init early_parse_rma_size(char *p)
33{
34 if (!p)
35 return 1;
36
37 kvm_rma_size = memparse(p, &p);
38
39 return 0;
40}
41early_param("kvm_rma_size", early_parse_rma_size);
42
43static int __init early_parse_rma_count(char *p)
44{
45 if (!p)
46 return 1;
47
48 kvm_rma_count = simple_strtoul(p, NULL, 0);
49
50 return 0;
51}
52early_param("kvm_rma_count", early_parse_rma_count);
53
54static struct kvmppc_rma_info *rma_info;
55static LIST_HEAD(free_rmas);
56static DEFINE_SPINLOCK(rma_lock);
57
58/* Work out RMLS (real mode limit selector) field value for a given RMA size. 41/* Work out RMLS (real mode limit selector) field value for a given RMA size.
59 Assumes POWER7 or PPC970. */ 42 Assumes POWER7 or PPC970. */
60static inline int lpcr_rmls(unsigned long rma_size) 43static inline int lpcr_rmls(unsigned long rma_size)
@@ -81,45 +64,106 @@ static inline int lpcr_rmls(unsigned long rma_size)
81 } 64 }
82} 65}
83 66
67static int __init early_parse_rma_size(char *p)
68{
69 if (!p)
70 return 1;
71
72 kvm_rma_size = memparse(p, &p);
73
74 return 0;
75}
76early_param("kvm_rma_size", early_parse_rma_size);
77
78static int __init early_parse_rma_count(char *p)
79{
80 if (!p)
81 return 1;
82
83 kvm_rma_count = simple_strtoul(p, NULL, 0);
84
85 return 0;
86}
87early_param("kvm_rma_count", early_parse_rma_count);
88
89struct kvmppc_linear_info *kvm_alloc_rma(void)
90{
91 return kvm_alloc_linear(KVM_LINEAR_RMA);
92}
93EXPORT_SYMBOL_GPL(kvm_alloc_rma);
94
95void kvm_release_rma(struct kvmppc_linear_info *ri)
96{
97 kvm_release_linear(ri);
98}
99EXPORT_SYMBOL_GPL(kvm_release_rma);
100
101/*************** HPT *************/
102
84/* 103/*
85 * Called at boot time while the bootmem allocator is active, 104 * This maintains a list of big linear HPT tables that contain the GVA->HPA
86 * to allocate contiguous physical memory for the real memory 105 * memory mappings. If we don't reserve those early on, we might not be able
87 * areas for guests. 106 * to get a big (usually 16MB) linear memory region from the kernel anymore.
88 */ 107 */
89void __init kvm_rma_init(void) 108
109static unsigned long kvm_hpt_count;
110
111static int __init early_parse_hpt_count(char *p)
112{
113 if (!p)
114 return 1;
115
116 kvm_hpt_count = simple_strtoul(p, NULL, 0);
117
118 return 0;
119}
120early_param("kvm_hpt_count", early_parse_hpt_count);
121
122struct kvmppc_linear_info *kvm_alloc_hpt(void)
123{
124 return kvm_alloc_linear(KVM_LINEAR_HPT);
125}
126EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
127
128void kvm_release_hpt(struct kvmppc_linear_info *li)
129{
130 kvm_release_linear(li);
131}
132EXPORT_SYMBOL_GPL(kvm_release_hpt);
133
134/*************** generic *************/
135
136static LIST_HEAD(free_linears);
137static DEFINE_SPINLOCK(linear_lock);
138
139static void __init kvm_linear_init_one(ulong size, int count, int type)
90{ 140{
91 unsigned long i; 141 unsigned long i;
92 unsigned long j, npages; 142 unsigned long j, npages;
93 void *rma; 143 void *linear;
94 struct page *pg; 144 struct page *pg;
145 const char *typestr;
146 struct kvmppc_linear_info *linear_info;
95 147
96 /* Only do this on PPC970 in HV mode */ 148 if (!count)
97 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
98 !cpu_has_feature(CPU_FTR_ARCH_201))
99 return;
100
101 if (!kvm_rma_size || !kvm_rma_count)
102 return; 149 return;
103 150
104 /* Check that the requested size is one supported in hardware */ 151 typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
105 if (lpcr_rmls(kvm_rma_size) < 0) { 152
106 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); 153 npages = size >> PAGE_SHIFT;
107 return; 154 linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
108 } 155 for (i = 0; i < count; ++i) {
109 156 linear = alloc_bootmem_align(size, size);
110 npages = kvm_rma_size >> PAGE_SHIFT; 157 pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
111 rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info)); 158 size >> 20);
112 for (i = 0; i < kvm_rma_count; ++i) { 159 linear_info[i].base_virt = linear;
113 rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size); 160 linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
114 pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma, 161 linear_info[i].npages = npages;
115 kvm_rma_size >> 20); 162 linear_info[i].type = type;
116 rma_info[i].base_virt = rma; 163 list_add_tail(&linear_info[i].list, &free_linears);
117 rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT; 164 atomic_set(&linear_info[i].use_count, 0);
118 rma_info[i].npages = npages; 165
119 list_add_tail(&rma_info[i].list, &free_rmas); 166 pg = pfn_to_page(linear_info[i].base_pfn);
120 atomic_set(&rma_info[i].use_count, 0);
121
122 pg = pfn_to_page(rma_info[i].base_pfn);
123 for (j = 0; j < npages; ++j) { 167 for (j = 0; j < npages; ++j) {
124 atomic_inc(&pg->_count); 168 atomic_inc(&pg->_count);
125 ++pg; 169 ++pg;
@@ -127,30 +171,59 @@ void __init kvm_rma_init(void)
127 } 171 }
128} 172}
129 173
130struct kvmppc_rma_info *kvm_alloc_rma(void) 174static struct kvmppc_linear_info *kvm_alloc_linear(int type)
131{ 175{
132 struct kvmppc_rma_info *ri; 176 struct kvmppc_linear_info *ri;
133 177
134 ri = NULL; 178 ri = NULL;
135 spin_lock(&rma_lock); 179 spin_lock(&linear_lock);
136 if (!list_empty(&free_rmas)) { 180 list_for_each_entry(ri, &free_linears, list) {
137 ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list); 181 if (ri->type != type)
182 continue;
183
138 list_del(&ri->list); 184 list_del(&ri->list);
139 atomic_inc(&ri->use_count); 185 atomic_inc(&ri->use_count);
186 break;
140 } 187 }
141 spin_unlock(&rma_lock); 188 spin_unlock(&linear_lock);
189 memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
142 return ri; 190 return ri;
143} 191}
144EXPORT_SYMBOL_GPL(kvm_alloc_rma);
145 192
146void kvm_release_rma(struct kvmppc_rma_info *ri) 193static void kvm_release_linear(struct kvmppc_linear_info *ri)
147{ 194{
148 if (atomic_dec_and_test(&ri->use_count)) { 195 if (atomic_dec_and_test(&ri->use_count)) {
149 spin_lock(&rma_lock); 196 spin_lock(&linear_lock);
150 list_add_tail(&ri->list, &free_rmas); 197 list_add_tail(&ri->list, &free_linears);
151 spin_unlock(&rma_lock); 198 spin_unlock(&linear_lock);
152 199
153 } 200 }
154} 201}
155EXPORT_SYMBOL_GPL(kvm_release_rma);
156 202
203/*
204 * Called at boot time while the bootmem allocator is active,
205 * to allocate contiguous physical memory for the hash page
206 * tables for guests.
207 */
208void __init kvm_linear_init(void)
209{
210 /* HPT */
211 kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
212
213 /* RMA */
214 /* Only do this on PPC970 in HV mode */
215 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
216 !cpu_has_feature(CPU_FTR_ARCH_201))
217 return;
218
219 if (!kvm_rma_size || !kvm_rma_count)
220 return;
221
222 /* Check that the requested size is one supported in hardware */
223 if (lpcr_rmls(kvm_rma_size) < 0) {
224 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
225 return;
226 }
227
228 kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
229}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index bacb0cfa3602..def880aea63a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -11,6 +11,7 @@
11#include <linux/kvm.h> 11#include <linux/kvm.h>
12#include <linux/kvm_host.h> 12#include <linux/kvm_host.h>
13#include <linux/hugetlb.h> 13#include <linux/hugetlb.h>
14#include <linux/module.h>
14 15
15#include <asm/tlbflush.h> 16#include <asm/tlbflush.h>
16#include <asm/kvm_ppc.h> 17#include <asm/kvm_ppc.h>
@@ -20,95 +21,307 @@
20#include <asm/synch.h> 21#include <asm/synch.h>
21#include <asm/ppc-opcode.h> 22#include <asm/ppc-opcode.h>
22 23
23/* For now use fixed-size 16MB page table */ 24/* Translate address of a vmalloc'd thing to a linear map address */
24#define HPT_ORDER 24 25static void *real_vmalloc_addr(void *x)
25#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ 26{
26#define HPT_HASH_MASK (HPT_NPTEG - 1) 27 unsigned long addr = (unsigned long) x;
28 pte_t *p;
27 29
28#define HPTE_V_HVLOCK 0x40UL 30 p = find_linux_pte(swapper_pg_dir, addr);
31 if (!p || !pte_present(*p))
32 return NULL;
33 /* assume we don't have huge pages in vmalloc space... */
34 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
35 return __va(addr);
36}
29 37
30static inline long lock_hpte(unsigned long *hpte, unsigned long bits) 38/*
39 * Add this HPTE into the chain for the real page.
40 * Must be called with the chain locked; it unlocks the chain.
41 */
42void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
43 unsigned long *rmap, long pte_index, int realmode)
31{ 44{
32 unsigned long tmp, old; 45 struct revmap_entry *head, *tail;
46 unsigned long i;
33 47
34 asm volatile(" ldarx %0,0,%2\n" 48 if (*rmap & KVMPPC_RMAP_PRESENT) {
35 " and. %1,%0,%3\n" 49 i = *rmap & KVMPPC_RMAP_INDEX;
36 " bne 2f\n" 50 head = &kvm->arch.revmap[i];
37 " ori %0,%0,%4\n" 51 if (realmode)
38 " stdcx. %0,0,%2\n" 52 head = real_vmalloc_addr(head);
39 " beq+ 2f\n" 53 tail = &kvm->arch.revmap[head->back];
40 " li %1,%3\n" 54 if (realmode)
41 "2: isync" 55 tail = real_vmalloc_addr(tail);
42 : "=&r" (tmp), "=&r" (old) 56 rev->forw = i;
43 : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) 57 rev->back = head->back;
44 : "cc", "memory"); 58 tail->forw = pte_index;
45 return old == 0; 59 head->back = pte_index;
60 } else {
61 rev->forw = rev->back = pte_index;
62 i = pte_index;
63 }
64 smp_wmb();
65 *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
66}
67EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
68
69/* Remove this HPTE from the chain for a real page */
70static void remove_revmap_chain(struct kvm *kvm, long pte_index,
71 struct revmap_entry *rev,
72 unsigned long hpte_v, unsigned long hpte_r)
73{
74 struct revmap_entry *next, *prev;
75 unsigned long gfn, ptel, head;
76 struct kvm_memory_slot *memslot;
77 unsigned long *rmap;
78 unsigned long rcbits;
79
80 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
81 ptel = rev->guest_rpte |= rcbits;
82 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
83 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
84 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
85 return;
86
87 rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
88 lock_rmap(rmap);
89
90 head = *rmap & KVMPPC_RMAP_INDEX;
91 next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
92 prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
93 next->back = rev->back;
94 prev->forw = rev->forw;
95 if (head == pte_index) {
96 head = rev->forw;
97 if (head == pte_index)
98 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
99 else
100 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
101 }
102 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
103 unlock_rmap(rmap);
104}
105
106static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
107 int writing, unsigned long *pte_sizep)
108{
109 pte_t *ptep;
110 unsigned long ps = *pte_sizep;
111 unsigned int shift;
112
113 ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
114 if (!ptep)
115 return __pte(0);
116 if (shift)
117 *pte_sizep = 1ul << shift;
118 else
119 *pte_sizep = PAGE_SIZE;
120 if (ps > *pte_sizep)
121 return __pte(0);
122 if (!pte_present(*ptep))
123 return __pte(0);
124 return kvmppc_read_update_linux_pte(ptep, writing);
125}
126
127static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
128{
129 asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
130 hpte[0] = hpte_v;
46} 131}
47 132
48long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 133long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
49 long pte_index, unsigned long pteh, unsigned long ptel) 134 long pte_index, unsigned long pteh, unsigned long ptel)
50{ 135{
51 unsigned long porder;
52 struct kvm *kvm = vcpu->kvm; 136 struct kvm *kvm = vcpu->kvm;
53 unsigned long i, lpn, pa; 137 unsigned long i, pa, gpa, gfn, psize;
138 unsigned long slot_fn, hva;
54 unsigned long *hpte; 139 unsigned long *hpte;
140 struct revmap_entry *rev;
141 unsigned long g_ptel = ptel;
142 struct kvm_memory_slot *memslot;
143 unsigned long *physp, pte_size;
144 unsigned long is_io;
145 unsigned long *rmap;
146 pte_t pte;
147 unsigned int writing;
148 unsigned long mmu_seq;
149 unsigned long rcbits;
150 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
55 151
56 /* only handle 4k, 64k and 16M pages for now */ 152 psize = hpte_page_size(pteh, ptel);
57 porder = 12; 153 if (!psize)
58 if (pteh & HPTE_V_LARGE) { 154 return H_PARAMETER;
59 if (cpu_has_feature(CPU_FTR_ARCH_206) && 155 writing = hpte_is_writable(ptel);
60 (ptel & 0xf000) == 0x1000) { 156 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
61 /* 64k page */ 157
62 porder = 16; 158 /* used later to detect if we might have been invalidated */
63 } else if ((ptel & 0xff000) == 0) { 159 mmu_seq = kvm->mmu_notifier_seq;
64 /* 16M page */ 160 smp_rmb();
65 porder = 24; 161
66 /* lowest AVA bit must be 0 for 16M pages */ 162 /* Find the memslot (if any) for this address */
67 if (pteh & 0x80) 163 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
68 return H_PARAMETER; 164 gfn = gpa >> PAGE_SHIFT;
69 } else 165 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
166 pa = 0;
167 is_io = ~0ul;
168 rmap = NULL;
169 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
170 /* PPC970 can't do emulated MMIO */
171 if (!cpu_has_feature(CPU_FTR_ARCH_206))
70 return H_PARAMETER; 172 return H_PARAMETER;
173 /* Emulated MMIO - mark this with key=31 */
174 pteh |= HPTE_V_ABSENT;
175 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
176 goto do_insert;
71 } 177 }
72 lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder; 178
73 if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder) 179 /* Check if the requested page fits entirely in the memslot. */
74 return H_PARAMETER; 180 if (!slot_is_aligned(memslot, psize))
75 pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
76 if (!pa)
77 return H_PARAMETER; 181 return H_PARAMETER;
78 /* Check WIMG */ 182 slot_fn = gfn - memslot->base_gfn;
79 if ((ptel & HPTE_R_WIMG) != HPTE_R_M && 183 rmap = &memslot->rmap[slot_fn];
80 (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M)) 184
185 if (!kvm->arch.using_mmu_notifiers) {
186 physp = kvm->arch.slot_phys[memslot->id];
187 if (!physp)
188 return H_PARAMETER;
189 physp += slot_fn;
190 if (realmode)
191 physp = real_vmalloc_addr(physp);
192 pa = *physp;
193 if (!pa)
194 return H_TOO_HARD;
195 is_io = pa & (HPTE_R_I | HPTE_R_W);
196 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
197 pa &= PAGE_MASK;
198 } else {
199 /* Translate to host virtual address */
200 hva = gfn_to_hva_memslot(memslot, gfn);
201
202 /* Look up the Linux PTE for the backing page */
203 pte_size = psize;
204 pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
205 if (pte_present(pte)) {
206 if (writing && !pte_write(pte))
207 /* make the actual HPTE be read-only */
208 ptel = hpte_make_readonly(ptel);
209 is_io = hpte_cache_bits(pte_val(pte));
210 pa = pte_pfn(pte) << PAGE_SHIFT;
211 }
212 }
213 if (pte_size < psize)
81 return H_PARAMETER; 214 return H_PARAMETER;
82 pteh &= ~0x60UL; 215 if (pa && pte_size > psize)
83 ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize); 216 pa |= gpa & (pte_size - 1);
217
218 ptel &= ~(HPTE_R_PP0 - psize);
84 ptel |= pa; 219 ptel |= pa;
85 if (pte_index >= (HPT_NPTEG << 3)) 220
221 if (pa)
222 pteh |= HPTE_V_VALID;
223 else
224 pteh |= HPTE_V_ABSENT;
225
226 /* Check WIMG */
227 if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
228 if (is_io)
229 return H_PARAMETER;
230 /*
231 * Allow guest to map emulated device memory as
232 * uncacheable, but actually make it cacheable.
233 */
234 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
235 ptel |= HPTE_R_M;
236 }
237
238 /* Find and lock the HPTEG slot to use */
239 do_insert:
240 if (pte_index >= HPT_NPTE)
86 return H_PARAMETER; 241 return H_PARAMETER;
87 if (likely((flags & H_EXACT) == 0)) { 242 if (likely((flags & H_EXACT) == 0)) {
88 pte_index &= ~7UL; 243 pte_index &= ~7UL;
89 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 244 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
90 for (i = 0; ; ++i) { 245 for (i = 0; i < 8; ++i) {
91 if (i == 8)
92 return H_PTEG_FULL;
93 if ((*hpte & HPTE_V_VALID) == 0 && 246 if ((*hpte & HPTE_V_VALID) == 0 &&
94 lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) 247 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
248 HPTE_V_ABSENT))
95 break; 249 break;
96 hpte += 2; 250 hpte += 2;
97 } 251 }
252 if (i == 8) {
253 /*
254 * Since try_lock_hpte doesn't retry (not even stdcx.
255 * failures), it could be that there is a free slot
256 * but we transiently failed to lock it. Try again,
257 * actually locking each slot and checking it.
258 */
259 hpte -= 16;
260 for (i = 0; i < 8; ++i) {
261 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
262 cpu_relax();
263 if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
264 break;
265 *hpte &= ~HPTE_V_HVLOCK;
266 hpte += 2;
267 }
268 if (i == 8)
269 return H_PTEG_FULL;
270 }
271 pte_index += i;
98 } else { 272 } else {
99 i = 0;
100 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 273 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
101 if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) 274 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
102 return H_PTEG_FULL; 275 HPTE_V_ABSENT)) {
276 /* Lock the slot and check again */
277 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
278 cpu_relax();
279 if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
280 *hpte &= ~HPTE_V_HVLOCK;
281 return H_PTEG_FULL;
282 }
283 }
103 } 284 }
285
286 /* Save away the guest's idea of the second HPTE dword */
287 rev = &kvm->arch.revmap[pte_index];
288 if (realmode)
289 rev = real_vmalloc_addr(rev);
290 if (rev)
291 rev->guest_rpte = g_ptel;
292
293 /* Link HPTE into reverse-map chain */
294 if (pteh & HPTE_V_VALID) {
295 if (realmode)
296 rmap = real_vmalloc_addr(rmap);
297 lock_rmap(rmap);
298 /* Check for pending invalidations under the rmap chain lock */
299 if (kvm->arch.using_mmu_notifiers &&
300 mmu_notifier_retry(vcpu, mmu_seq)) {
301 /* inval in progress, write a non-present HPTE */
302 pteh |= HPTE_V_ABSENT;
303 pteh &= ~HPTE_V_VALID;
304 unlock_rmap(rmap);
305 } else {
306 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
307 realmode);
308 /* Only set R/C in real HPTE if already set in *rmap */
309 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
310 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
311 }
312 }
313
104 hpte[1] = ptel; 314 hpte[1] = ptel;
315
316 /* Write the first HPTE dword, unlocking the HPTE and making it valid */
105 eieio(); 317 eieio();
106 hpte[0] = pteh; 318 hpte[0] = pteh;
107 asm volatile("ptesync" : : : "memory"); 319 asm volatile("ptesync" : : : "memory");
108 atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt); 320
109 vcpu->arch.gpr[4] = pte_index + i; 321 vcpu->arch.gpr[4] = pte_index;
110 return H_SUCCESS; 322 return H_SUCCESS;
111} 323}
324EXPORT_SYMBOL_GPL(kvmppc_h_enter);
112 325
113#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 326#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
114 327
@@ -137,37 +350,46 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
137 struct kvm *kvm = vcpu->kvm; 350 struct kvm *kvm = vcpu->kvm;
138 unsigned long *hpte; 351 unsigned long *hpte;
139 unsigned long v, r, rb; 352 unsigned long v, r, rb;
353 struct revmap_entry *rev;
140 354
141 if (pte_index >= (HPT_NPTEG << 3)) 355 if (pte_index >= HPT_NPTE)
142 return H_PARAMETER; 356 return H_PARAMETER;
143 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 357 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
144 while (!lock_hpte(hpte, HPTE_V_HVLOCK)) 358 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
145 cpu_relax(); 359 cpu_relax();
146 if ((hpte[0] & HPTE_V_VALID) == 0 || 360 if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
147 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || 361 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
148 ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { 362 ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
149 hpte[0] &= ~HPTE_V_HVLOCK; 363 hpte[0] &= ~HPTE_V_HVLOCK;
150 return H_NOT_FOUND; 364 return H_NOT_FOUND;
151 } 365 }
152 if (atomic_read(&kvm->online_vcpus) == 1) 366
153 flags |= H_LOCAL; 367 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
154 vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK; 368 v = hpte[0] & ~HPTE_V_HVLOCK;
155 vcpu->arch.gpr[5] = r = hpte[1]; 369 if (v & HPTE_V_VALID) {
156 rb = compute_tlbie_rb(v, r, pte_index); 370 hpte[0] &= ~HPTE_V_VALID;
157 hpte[0] = 0; 371 rb = compute_tlbie_rb(v, hpte[1], pte_index);
158 if (!(flags & H_LOCAL)) { 372 if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
159 while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) 373 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
160 cpu_relax(); 374 cpu_relax();
161 asm volatile("ptesync" : : : "memory"); 375 asm volatile("ptesync" : : : "memory");
162 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" 376 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
163 : : "r" (rb), "r" (kvm->arch.lpid)); 377 : : "r" (rb), "r" (kvm->arch.lpid));
164 asm volatile("ptesync" : : : "memory"); 378 asm volatile("ptesync" : : : "memory");
165 kvm->arch.tlbie_lock = 0; 379 kvm->arch.tlbie_lock = 0;
166 } else { 380 } else {
167 asm volatile("ptesync" : : : "memory"); 381 asm volatile("ptesync" : : : "memory");
168 asm volatile("tlbiel %0" : : "r" (rb)); 382 asm volatile("tlbiel %0" : : "r" (rb));
169 asm volatile("ptesync" : : : "memory"); 383 asm volatile("ptesync" : : : "memory");
384 }
385 /* Read PTE low word after tlbie to get final R/C values */
386 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
170 } 387 }
388 r = rev->guest_rpte;
389 unlock_hpte(hpte, 0);
390
391 vcpu->arch.gpr[4] = v;
392 vcpu->arch.gpr[5] = r;
171 return H_SUCCESS; 393 return H_SUCCESS;
172} 394}
173 395
@@ -175,78 +397,117 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
175{ 397{
176 struct kvm *kvm = vcpu->kvm; 398 struct kvm *kvm = vcpu->kvm;
177 unsigned long *args = &vcpu->arch.gpr[4]; 399 unsigned long *args = &vcpu->arch.gpr[4];
178 unsigned long *hp, tlbrb[4]; 400 unsigned long *hp, *hptes[4], tlbrb[4];
179 long int i, found; 401 long int i, j, k, n, found, indexes[4];
180 long int n_inval = 0; 402 unsigned long flags, req, pte_index, rcbits;
181 unsigned long flags, req, pte_index;
182 long int local = 0; 403 long int local = 0;
183 long int ret = H_SUCCESS; 404 long int ret = H_SUCCESS;
405 struct revmap_entry *rev, *revs[4];
184 406
185 if (atomic_read(&kvm->online_vcpus) == 1) 407 if (atomic_read(&kvm->online_vcpus) == 1)
186 local = 1; 408 local = 1;
187 for (i = 0; i < 4; ++i) { 409 for (i = 0; i < 4 && ret == H_SUCCESS; ) {
188 pte_index = args[i * 2]; 410 n = 0;
189 flags = pte_index >> 56; 411 for (; i < 4; ++i) {
190 pte_index &= ((1ul << 56) - 1); 412 j = i * 2;
191 req = flags >> 6; 413 pte_index = args[j];
192 flags &= 3; 414 flags = pte_index >> 56;
193 if (req == 3) 415 pte_index &= ((1ul << 56) - 1);
194 break; 416 req = flags >> 6;
195 if (req != 1 || flags == 3 || 417 flags &= 3;
196 pte_index >= (HPT_NPTEG << 3)) { 418 if (req == 3) { /* no more requests */
197 /* parameter error */ 419 i = 4;
198 args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
199 ret = H_PARAMETER;
200 break;
201 }
202 hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
203 while (!lock_hpte(hp, HPTE_V_HVLOCK))
204 cpu_relax();
205 found = 0;
206 if (hp[0] & HPTE_V_VALID) {
207 switch (flags & 3) {
208 case 0: /* absolute */
209 found = 1;
210 break; 420 break;
211 case 1: /* andcond */ 421 }
212 if (!(hp[0] & args[i * 2 + 1])) 422 if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
213 found = 1; 423 /* parameter error */
424 args[j] = ((0xa0 | flags) << 56) + pte_index;
425 ret = H_PARAMETER;
214 break; 426 break;
215 case 2: /* AVPN */ 427 }
216 if ((hp[0] & ~0x7fUL) == args[i * 2 + 1]) 428 hp = (unsigned long *)
429 (kvm->arch.hpt_virt + (pte_index << 4));
430 /* to avoid deadlock, don't spin except for first */
431 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
432 if (n)
433 break;
434 while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
435 cpu_relax();
436 }
437 found = 0;
438 if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
439 switch (flags & 3) {
440 case 0: /* absolute */
217 found = 1; 441 found = 1;
218 break; 442 break;
443 case 1: /* andcond */
444 if (!(hp[0] & args[j + 1]))
445 found = 1;
446 break;
447 case 2: /* AVPN */
448 if ((hp[0] & ~0x7fUL) == args[j + 1])
449 found = 1;
450 break;
451 }
452 }
453 if (!found) {
454 hp[0] &= ~HPTE_V_HVLOCK;
455 args[j] = ((0x90 | flags) << 56) + pte_index;
456 continue;
219 } 457 }
458
459 args[j] = ((0x80 | flags) << 56) + pte_index;
460 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
461
462 if (!(hp[0] & HPTE_V_VALID)) {
463 /* insert R and C bits from PTE */
464 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
465 args[j] |= rcbits << (56 - 5);
466 continue;
467 }
468
469 hp[0] &= ~HPTE_V_VALID; /* leave it locked */
470 tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
471 indexes[n] = j;
472 hptes[n] = hp;
473 revs[n] = rev;
474 ++n;
475 }
476
477 if (!n)
478 break;
479
480 /* Now that we've collected a batch, do the tlbies */
481 if (!local) {
482 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
483 cpu_relax();
484 asm volatile("ptesync" : : : "memory");
485 for (k = 0; k < n; ++k)
486 asm volatile(PPC_TLBIE(%1,%0) : :
487 "r" (tlbrb[k]),
488 "r" (kvm->arch.lpid));
489 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
490 kvm->arch.tlbie_lock = 0;
491 } else {
492 asm volatile("ptesync" : : : "memory");
493 for (k = 0; k < n; ++k)
494 asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
495 asm volatile("ptesync" : : : "memory");
220 } 496 }
221 if (!found) { 497
222 hp[0] &= ~HPTE_V_HVLOCK; 498 /* Read PTE low words after tlbie to get final R/C values */
223 args[i * 2] = ((0x90 | flags) << 56) + pte_index; 499 for (k = 0; k < n; ++k) {
224 continue; 500 j = indexes[k];
501 pte_index = args[j] & ((1ul << 56) - 1);
502 hp = hptes[k];
503 rev = revs[k];
504 remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
505 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
506 args[j] |= rcbits << (56 - 5);
507 hp[0] = 0;
225 } 508 }
226 /* insert R and C bits from PTE */
227 flags |= (hp[1] >> 5) & 0x0c;
228 args[i * 2] = ((0x80 | flags) << 56) + pte_index;
229 tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
230 hp[0] = 0;
231 }
232 if (n_inval == 0)
233 return ret;
234
235 if (!local) {
236 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
237 cpu_relax();
238 asm volatile("ptesync" : : : "memory");
239 for (i = 0; i < n_inval; ++i)
240 asm volatile(PPC_TLBIE(%1,%0)
241 : : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
242 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
243 kvm->arch.tlbie_lock = 0;
244 } else {
245 asm volatile("ptesync" : : : "memory");
246 for (i = 0; i < n_inval; ++i)
247 asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
248 asm volatile("ptesync" : : : "memory");
249 } 509 }
510
250 return ret; 511 return ret;
251} 512}
252 513
@@ -256,40 +517,55 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
256{ 517{
257 struct kvm *kvm = vcpu->kvm; 518 struct kvm *kvm = vcpu->kvm;
258 unsigned long *hpte; 519 unsigned long *hpte;
259 unsigned long v, r, rb; 520 struct revmap_entry *rev;
521 unsigned long v, r, rb, mask, bits;
260 522
261 if (pte_index >= (HPT_NPTEG << 3)) 523 if (pte_index >= HPT_NPTE)
262 return H_PARAMETER; 524 return H_PARAMETER;
525
263 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 526 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
264 while (!lock_hpte(hpte, HPTE_V_HVLOCK)) 527 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
265 cpu_relax(); 528 cpu_relax();
266 if ((hpte[0] & HPTE_V_VALID) == 0 || 529 if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
267 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { 530 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
268 hpte[0] &= ~HPTE_V_HVLOCK; 531 hpte[0] &= ~HPTE_V_HVLOCK;
269 return H_NOT_FOUND; 532 return H_NOT_FOUND;
270 } 533 }
534
271 if (atomic_read(&kvm->online_vcpus) == 1) 535 if (atomic_read(&kvm->online_vcpus) == 1)
272 flags |= H_LOCAL; 536 flags |= H_LOCAL;
273 v = hpte[0]; 537 v = hpte[0];
274 r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 538 bits = (flags << 55) & HPTE_R_PP0;
275 HPTE_R_KEY_HI | HPTE_R_KEY_LO); 539 bits |= (flags << 48) & HPTE_R_KEY_HI;
276 r |= (flags << 55) & HPTE_R_PP0; 540 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
277 r |= (flags << 48) & HPTE_R_KEY_HI; 541
278 r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 542 /* Update guest view of 2nd HPTE dword */
279 rb = compute_tlbie_rb(v, r, pte_index); 543 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
280 hpte[0] = v & ~HPTE_V_VALID; 544 HPTE_R_KEY_HI | HPTE_R_KEY_LO;
281 if (!(flags & H_LOCAL)) { 545 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
282 while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) 546 if (rev) {
283 cpu_relax(); 547 r = (rev->guest_rpte & ~mask) | bits;
284 asm volatile("ptesync" : : : "memory"); 548 rev->guest_rpte = r;
285 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" 549 }
286 : : "r" (rb), "r" (kvm->arch.lpid)); 550 r = (hpte[1] & ~mask) | bits;
287 asm volatile("ptesync" : : : "memory"); 551
288 kvm->arch.tlbie_lock = 0; 552 /* Update HPTE */
289 } else { 553 if (v & HPTE_V_VALID) {
290 asm volatile("ptesync" : : : "memory"); 554 rb = compute_tlbie_rb(v, r, pte_index);
291 asm volatile("tlbiel %0" : : "r" (rb)); 555 hpte[0] = v & ~HPTE_V_VALID;
292 asm volatile("ptesync" : : : "memory"); 556 if (!(flags & H_LOCAL)) {
557 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
558 cpu_relax();
559 asm volatile("ptesync" : : : "memory");
560 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
561 : : "r" (rb), "r" (kvm->arch.lpid));
562 asm volatile("ptesync" : : : "memory");
563 kvm->arch.tlbie_lock = 0;
564 } else {
565 asm volatile("ptesync" : : : "memory");
566 asm volatile("tlbiel %0" : : "r" (rb));
567 asm volatile("ptesync" : : : "memory");
568 }
293 } 569 }
294 hpte[1] = r; 570 hpte[1] = r;
295 eieio(); 571 eieio();
@@ -298,40 +574,243 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
298 return H_SUCCESS; 574 return H_SUCCESS;
299} 575}
300 576
301static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
302{
303 long int i;
304 unsigned long offset, rpn;
305
306 offset = realaddr & (kvm->arch.ram_psize - 1);
307 rpn = (realaddr - offset) >> PAGE_SHIFT;
308 for (i = 0; i < kvm->arch.ram_npages; ++i)
309 if (rpn == kvm->arch.ram_pginfo[i].pfn)
310 return (i << PAGE_SHIFT) + offset;
311 return HPTE_R_RPN; /* all 1s in the RPN field */
312}
313
314long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 577long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
315 unsigned long pte_index) 578 unsigned long pte_index)
316{ 579{
317 struct kvm *kvm = vcpu->kvm; 580 struct kvm *kvm = vcpu->kvm;
318 unsigned long *hpte, r; 581 unsigned long *hpte, v, r;
319 int i, n = 1; 582 int i, n = 1;
583 struct revmap_entry *rev = NULL;
320 584
321 if (pte_index >= (HPT_NPTEG << 3)) 585 if (pte_index >= HPT_NPTE)
322 return H_PARAMETER; 586 return H_PARAMETER;
323 if (flags & H_READ_4) { 587 if (flags & H_READ_4) {
324 pte_index &= ~3; 588 pte_index &= ~3;
325 n = 4; 589 n = 4;
326 } 590 }
591 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
327 for (i = 0; i < n; ++i, ++pte_index) { 592 for (i = 0; i < n; ++i, ++pte_index) {
328 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 593 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
594 v = hpte[0] & ~HPTE_V_HVLOCK;
329 r = hpte[1]; 595 r = hpte[1];
330 if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID)) 596 if (v & HPTE_V_ABSENT) {
331 r = reverse_xlate(kvm, r & HPTE_R_RPN) | 597 v &= ~HPTE_V_ABSENT;
332 (r & ~HPTE_R_RPN); 598 v |= HPTE_V_VALID;
333 vcpu->arch.gpr[4 + i * 2] = hpte[0]; 599 }
600 if (v & HPTE_V_VALID)
601 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
602 vcpu->arch.gpr[4 + i * 2] = v;
334 vcpu->arch.gpr[5 + i * 2] = r; 603 vcpu->arch.gpr[5 + i * 2] = r;
335 } 604 }
336 return H_SUCCESS; 605 return H_SUCCESS;
337} 606}
607
608void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
609 unsigned long pte_index)
610{
611 unsigned long rb;
612
613 hptep[0] &= ~HPTE_V_VALID;
614 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
615 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
616 cpu_relax();
617 asm volatile("ptesync" : : : "memory");
618 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
619 : : "r" (rb), "r" (kvm->arch.lpid));
620 asm volatile("ptesync" : : : "memory");
621 kvm->arch.tlbie_lock = 0;
622}
623EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
624
625void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
626 unsigned long pte_index)
627{
628 unsigned long rb;
629 unsigned char rbyte;
630
631 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
632 rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
633 /* modify only the second-last byte, which contains the ref bit */
634 *((char *)hptep + 14) = rbyte;
635 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
636 cpu_relax();
637 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
638 : : "r" (rb), "r" (kvm->arch.lpid));
639 asm volatile("ptesync" : : : "memory");
640 kvm->arch.tlbie_lock = 0;
641}
642EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
643
644static int slb_base_page_shift[4] = {
645 24, /* 16M */
646 16, /* 64k */
647 34, /* 16G */
648 20, /* 1M, unsupported */
649};
650
651long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
652 unsigned long valid)
653{
654 unsigned int i;
655 unsigned int pshift;
656 unsigned long somask;
657 unsigned long vsid, hash;
658 unsigned long avpn;
659 unsigned long *hpte;
660 unsigned long mask, val;
661 unsigned long v, r;
662
663 /* Get page shift, work out hash and AVPN etc. */
664 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
665 val = 0;
666 pshift = 12;
667 if (slb_v & SLB_VSID_L) {
668 mask |= HPTE_V_LARGE;
669 val |= HPTE_V_LARGE;
670 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
671 }
672 if (slb_v & SLB_VSID_B_1T) {
673 somask = (1UL << 40) - 1;
674 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
675 vsid ^= vsid << 25;
676 } else {
677 somask = (1UL << 28) - 1;
678 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
679 }
680 hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
681 avpn = slb_v & ~(somask >> 16); /* also includes B */
682 avpn |= (eaddr & somask) >> 16;
683
684 if (pshift >= 24)
685 avpn &= ~((1UL << (pshift - 16)) - 1);
686 else
687 avpn &= ~0x7fUL;
688 val |= avpn;
689
690 for (;;) {
691 hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
692
693 for (i = 0; i < 16; i += 2) {
694 /* Read the PTE racily */
695 v = hpte[i] & ~HPTE_V_HVLOCK;
696
697 /* Check valid/absent, hash, segment size and AVPN */
698 if (!(v & valid) || (v & mask) != val)
699 continue;
700
701 /* Lock the PTE and read it under the lock */
702 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
703 cpu_relax();
704 v = hpte[i] & ~HPTE_V_HVLOCK;
705 r = hpte[i+1];
706
707 /*
708 * Check the HPTE again, including large page size
709 * Since we don't currently allow any MPSS (mixed
710 * page-size segment) page sizes, it is sufficient
711 * to check against the actual page size.
712 */
713 if ((v & valid) && (v & mask) == val &&
714 hpte_page_size(v, r) == (1ul << pshift))
715 /* Return with the HPTE still locked */
716 return (hash << 3) + (i >> 1);
717
718 /* Unlock and move on */
719 hpte[i] = v;
720 }
721
722 if (val & HPTE_V_SECONDARY)
723 break;
724 val |= HPTE_V_SECONDARY;
725 hash = hash ^ HPT_HASH_MASK;
726 }
727 return -1;
728}
729EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
730
731/*
732 * Called in real mode to check whether an HPTE not found fault
733 * is due to accessing a paged-out page or an emulated MMIO page,
734 * or if a protection fault is due to accessing a page that the
735 * guest wanted read/write access to but which we made read-only.
736 * Returns a possibly modified status (DSISR) value if not
737 * (i.e. pass the interrupt to the guest),
738 * -1 to pass the fault up to host kernel mode code, -2 to do that
739 * and also load the instruction word (for MMIO emulation),
740 * or 0 if we should make the guest retry the access.
741 */
742long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
743 unsigned long slb_v, unsigned int status, bool data)
744{
745 struct kvm *kvm = vcpu->kvm;
746 long int index;
747 unsigned long v, r, gr;
748 unsigned long *hpte;
749 unsigned long valid;
750 struct revmap_entry *rev;
751 unsigned long pp, key;
752
753 /* For protection fault, expect to find a valid HPTE */
754 valid = HPTE_V_VALID;
755 if (status & DSISR_NOHPTE)
756 valid |= HPTE_V_ABSENT;
757
758 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
759 if (index < 0) {
760 if (status & DSISR_NOHPTE)
761 return status; /* there really was no HPTE */
762 return 0; /* for prot fault, HPTE disappeared */
763 }
764 hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
765 v = hpte[0] & ~HPTE_V_HVLOCK;
766 r = hpte[1];
767 rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
768 gr = rev->guest_rpte;
769
770 unlock_hpte(hpte, v);
771
772 /* For not found, if the HPTE is valid by now, retry the instruction */
773 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
774 return 0;
775
776 /* Check access permissions to the page */
777 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
778 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
779 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
780 if (!data) {
781 if (gr & (HPTE_R_N | HPTE_R_G))
782 return status | SRR1_ISI_N_OR_G;
783 if (!hpte_read_permission(pp, slb_v & key))
784 return status | SRR1_ISI_PROT;
785 } else if (status & DSISR_ISSTORE) {
786 /* check write permission */
787 if (!hpte_write_permission(pp, slb_v & key))
788 return status | DSISR_PROTFAULT;
789 } else {
790 if (!hpte_read_permission(pp, slb_v & key))
791 return status | DSISR_PROTFAULT;
792 }
793
794 /* Check storage key, if applicable */
795 if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
796 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
797 if (status & DSISR_ISSTORE)
798 perm >>= 1;
799 if (perm & 1)
800 return status | DSISR_KEYFAULT;
801 }
802
803 /* Save HPTE info for virtual-mode handler */
804 vcpu->arch.pgfault_addr = addr;
805 vcpu->arch.pgfault_index = index;
806 vcpu->arch.pgfault_hpte[0] = v;
807 vcpu->arch.pgfault_hpte[1] = r;
808
809 /* Check the storage key to see if it is possibly emulated MMIO */
810 if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
811 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
812 (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
813 return -2; /* MMIO emulation - load instr word */
814
815 return -1; /* send fault up to host kernel mode */
816}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5c8b26183f50..b70bf22a3ff3 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -601,6 +601,30 @@ kvmppc_interrupt:
601 601
602 stw r12,VCPU_TRAP(r9) 602 stw r12,VCPU_TRAP(r9)
603 603
604 /* Save HEIR (HV emulation assist reg) in last_inst
605 if this is an HEI (HV emulation interrupt, e40) */
606 li r3,KVM_INST_FETCH_FAILED
607BEGIN_FTR_SECTION
608 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
609 bne 11f
610 mfspr r3,SPRN_HEIR
611END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
61211: stw r3,VCPU_LAST_INST(r9)
613
614 /* these are volatile across C function calls */
615 mfctr r3
616 mfxer r4
617 std r3, VCPU_CTR(r9)
618 stw r4, VCPU_XER(r9)
619
620BEGIN_FTR_SECTION
621 /* If this is a page table miss then see if it's theirs or ours */
622 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
623 beq kvmppc_hdsi
624 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
625 beq kvmppc_hisi
626END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
627
604 /* See if this is a leftover HDEC interrupt */ 628 /* See if this is a leftover HDEC interrupt */
605 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 629 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
606 bne 2f 630 bne 2f
@@ -608,7 +632,7 @@ kvmppc_interrupt:
608 cmpwi r3,0 632 cmpwi r3,0
609 bge ignore_hdec 633 bge ignore_hdec
6102: 6342:
611 /* See if this is something we can handle in real mode */ 635 /* See if this is an hcall we can handle in real mode */
612 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL 636 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
613 beq hcall_try_real_mode 637 beq hcall_try_real_mode
614 638
@@ -624,6 +648,7 @@ BEGIN_FTR_SECTION
6241: 6481:
625END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 649END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
626 650
651nohpte_cont:
627hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 652hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
628 /* Save DEC */ 653 /* Save DEC */
629 mfspr r5,SPRN_DEC 654 mfspr r5,SPRN_DEC
@@ -632,36 +657,21 @@ hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
632 add r5,r5,r6 657 add r5,r5,r6
633 std r5,VCPU_DEC_EXPIRES(r9) 658 std r5,VCPU_DEC_EXPIRES(r9)
634 659
635 /* Save HEIR (HV emulation assist reg) in last_inst
636 if this is an HEI (HV emulation interrupt, e40) */
637 li r3,-1
638BEGIN_FTR_SECTION
639 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
640 bne 11f
641 mfspr r3,SPRN_HEIR
642END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
64311: stw r3,VCPU_LAST_INST(r9)
644
645 /* Save more register state */ 660 /* Save more register state */
646 mfxer r5
647 mfdar r6 661 mfdar r6
648 mfdsisr r7 662 mfdsisr r7
649 mfctr r8
650
651 stw r5, VCPU_XER(r9)
652 std r6, VCPU_DAR(r9) 663 std r6, VCPU_DAR(r9)
653 stw r7, VCPU_DSISR(r9) 664 stw r7, VCPU_DSISR(r9)
654 std r8, VCPU_CTR(r9)
655 /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */
656BEGIN_FTR_SECTION 665BEGIN_FTR_SECTION
666 /* don't overwrite fault_dar/fault_dsisr if HDSI */
657 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE 667 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
658 beq 6f 668 beq 6f
659END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 669END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
6607: std r6, VCPU_FAULT_DAR(r9) 670 std r6, VCPU_FAULT_DAR(r9)
661 stw r7, VCPU_FAULT_DSISR(r9) 671 stw r7, VCPU_FAULT_DSISR(r9)
662 672
663 /* Save guest CTRL register, set runlatch to 1 */ 673 /* Save guest CTRL register, set runlatch to 1 */
664 mfspr r6,SPRN_CTRLF 6746: mfspr r6,SPRN_CTRLF
665 stw r6,VCPU_CTRL(r9) 675 stw r6,VCPU_CTRL(r9)
666 andi. r0,r6,1 676 andi. r0,r6,1
667 bne 4f 677 bne 4f
@@ -1094,9 +1104,131 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1094 mtspr SPRN_HSRR1, r7 1104 mtspr SPRN_HSRR1, r7
1095 ba 0x500 1105 ba 0x500
1096 1106
10976: mfspr r6,SPRN_HDAR 1107/*
1098 mfspr r7,SPRN_HDSISR 1108 * Check whether an HDSI is an HPTE not found fault or something else.
1099 b 7b 1109 * If it is an HPTE not found fault that is due to the guest accessing
1110 * a page that they have mapped but which we have paged out, then
1111 * we continue on with the guest exit path. In all other cases,
1112 * reflect the HDSI to the guest as a DSI.
1113 */
1114kvmppc_hdsi:
1115 mfspr r4, SPRN_HDAR
1116 mfspr r6, SPRN_HDSISR
1117 /* HPTE not found fault or protection fault? */
1118 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
1119 beq 1f /* if not, send it to the guest */
1120 andi. r0, r11, MSR_DR /* data relocation enabled? */
1121 beq 3f
1122 clrrdi r0, r4, 28
1123 PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
1124 bne 1f /* if no SLB entry found */
11254: std r4, VCPU_FAULT_DAR(r9)
1126 stw r6, VCPU_FAULT_DSISR(r9)
1127
1128 /* Search the hash table. */
1129 mr r3, r9 /* vcpu pointer */
1130 li r7, 1 /* data fault */
1131 bl .kvmppc_hpte_hv_fault
1132 ld r9, HSTATE_KVM_VCPU(r13)
1133 ld r10, VCPU_PC(r9)
1134 ld r11, VCPU_MSR(r9)
1135 li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1136 cmpdi r3, 0 /* retry the instruction */
1137 beq 6f
1138 cmpdi r3, -1 /* handle in kernel mode */
1139 beq nohpte_cont
1140 cmpdi r3, -2 /* MMIO emulation; need instr word */
1141 beq 2f
1142
1143 /* Synthesize a DSI for the guest */
1144 ld r4, VCPU_FAULT_DAR(r9)
1145 mr r6, r3
11461: mtspr SPRN_DAR, r4
1147 mtspr SPRN_DSISR, r6
1148 mtspr SPRN_SRR0, r10
1149 mtspr SPRN_SRR1, r11
1150 li r10, BOOK3S_INTERRUPT_DATA_STORAGE
1151 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1152 rotldi r11, r11, 63
11536: ld r7, VCPU_CTR(r9)
1154 lwz r8, VCPU_XER(r9)
1155 mtctr r7
1156 mtxer r8
1157 mr r4, r9
1158 b fast_guest_return
1159
11603: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */
1161 ld r5, KVM_VRMA_SLB_V(r5)
1162 b 4b
1163
1164 /* If this is for emulated MMIO, load the instruction word */
11652: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */
1166
1167 /* Set guest mode to 'jump over instruction' so if lwz faults
1168 * we'll just continue at the next IP. */
1169 li r0, KVM_GUEST_MODE_SKIP
1170 stb r0, HSTATE_IN_GUEST(r13)
1171
1172 /* Do the access with MSR:DR enabled */
1173 mfmsr r3
1174 ori r4, r3, MSR_DR /* Enable paging for data */
1175 mtmsrd r4
1176 lwz r8, 0(r10)
1177 mtmsrd r3
1178
1179 /* Store the result */
1180 stw r8, VCPU_LAST_INST(r9)
1181
1182 /* Unset guest mode. */
1183 li r0, KVM_GUEST_MODE_NONE
1184 stb r0, HSTATE_IN_GUEST(r13)
1185 b nohpte_cont
1186
1187/*
1188 * Similarly for an HISI, reflect it to the guest as an ISI unless
1189 * it is an HPTE not found fault for a page that we have paged out.
1190 */
1191kvmppc_hisi:
1192 andis. r0, r11, SRR1_ISI_NOPT@h
1193 beq 1f
1194 andi. r0, r11, MSR_IR /* instruction relocation enabled? */
1195 beq 3f
1196 clrrdi r0, r10, 28
1197 PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
1198 bne 1f /* if no SLB entry found */
11994:
1200 /* Search the hash table. */
1201 mr r3, r9 /* vcpu pointer */
1202 mr r4, r10
1203 mr r6, r11
1204 li r7, 0 /* instruction fault */
1205 bl .kvmppc_hpte_hv_fault
1206 ld r9, HSTATE_KVM_VCPU(r13)
1207 ld r10, VCPU_PC(r9)
1208 ld r11, VCPU_MSR(r9)
1209 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1210 cmpdi r3, 0 /* retry the instruction */
1211 beq 6f
1212 cmpdi r3, -1 /* handle in kernel mode */
1213 beq nohpte_cont
1214
1215 /* Synthesize an ISI for the guest */
1216 mr r11, r3
12171: mtspr SPRN_SRR0, r10
1218 mtspr SPRN_SRR1, r11
1219 li r10, BOOK3S_INTERRUPT_INST_STORAGE
1220 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1221 rotldi r11, r11, 63
12226: ld r7, VCPU_CTR(r9)
1223 lwz r8, VCPU_XER(r9)
1224 mtctr r7
1225 mtxer r8
1226 mr r4, r9
1227 b fast_guest_return
1228
12293: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
1230 ld r5, KVM_VRMA_SLB_V(r6)
1231 b 4b
1100 1232
1101/* 1233/*
1102 * Try to handle an hcall in real mode. 1234 * Try to handle an hcall in real mode.
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 7b0ee96c1bed..e70ef2d86431 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -196,7 +196,8 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
196 kvmppc_inject_pf(vcpu, addr, false); 196 kvmppc_inject_pf(vcpu, addr, false);
197 goto done_load; 197 goto done_load;
198 } else if (r == EMULATE_DO_MMIO) { 198 } else if (r == EMULATE_DO_MMIO) {
199 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); 199 emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
200 len, 1);
200 goto done_load; 201 goto done_load;
201 } 202 }
202 203
@@ -286,11 +287,13 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
286 kvmppc_inject_pf(vcpu, addr, false); 287 kvmppc_inject_pf(vcpu, addr, false);
287 goto done_load; 288 goto done_load;
288 } else if ((r == EMULATE_DO_MMIO) && w) { 289 } else if ((r == EMULATE_DO_MMIO) && w) {
289 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); 290 emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
291 4, 1);
290 vcpu->arch.qpr[rs] = tmp[1]; 292 vcpu->arch.qpr[rs] = tmp[1];
291 goto done_load; 293 goto done_load;
292 } else if (r == EMULATE_DO_MMIO) { 294 } else if (r == EMULATE_DO_MMIO) {
293 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); 295 emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
296 8, 1);
294 goto done_load; 297 goto done_load;
295 } 298 }
296 299
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 220fcdf26978..7340e1090b77 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -51,15 +51,19 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
51#define MSR_USER32 MSR_USER 51#define MSR_USER32 MSR_USER
52#define MSR_USER64 MSR_USER 52#define MSR_USER64 MSR_USER
53#define HW_PAGE_SIZE PAGE_SIZE 53#define HW_PAGE_SIZE PAGE_SIZE
54#define __hard_irq_disable local_irq_disable
55#define __hard_irq_enable local_irq_enable
54#endif 56#endif
55 57
56void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 58void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
57{ 59{
58#ifdef CONFIG_PPC_BOOK3S_64 60#ifdef CONFIG_PPC_BOOK3S_64
59 memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); 61 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
62 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
60 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, 63 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
61 sizeof(get_paca()->shadow_vcpu)); 64 sizeof(get_paca()->shadow_vcpu));
62 to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; 65 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
66 svcpu_put(svcpu);
63#endif 67#endif
64 68
65#ifdef CONFIG_PPC_BOOK3S_32 69#ifdef CONFIG_PPC_BOOK3S_32
@@ -70,10 +74,12 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
70void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 74void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
71{ 75{
72#ifdef CONFIG_PPC_BOOK3S_64 76#ifdef CONFIG_PPC_BOOK3S_64
73 memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); 77 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
78 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
74 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 79 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
75 sizeof(get_paca()->shadow_vcpu)); 80 sizeof(get_paca()->shadow_vcpu));
76 to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; 81 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
82 svcpu_put(svcpu);
77#endif 83#endif
78 84
79 kvmppc_giveup_ext(vcpu, MSR_FP); 85 kvmppc_giveup_ext(vcpu, MSR_FP);
@@ -151,14 +157,16 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
151#ifdef CONFIG_PPC_BOOK3S_64 157#ifdef CONFIG_PPC_BOOK3S_64
152 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 158 if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
153 kvmppc_mmu_book3s_64_init(vcpu); 159 kvmppc_mmu_book3s_64_init(vcpu);
154 to_book3s(vcpu)->hior = 0xfff00000; 160 if (!to_book3s(vcpu)->hior_explicit)
161 to_book3s(vcpu)->hior = 0xfff00000;
155 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 162 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
156 vcpu->arch.cpu_type = KVM_CPU_3S_64; 163 vcpu->arch.cpu_type = KVM_CPU_3S_64;
157 } else 164 } else
158#endif 165#endif
159 { 166 {
160 kvmppc_mmu_book3s_32_init(vcpu); 167 kvmppc_mmu_book3s_32_init(vcpu);
161 to_book3s(vcpu)->hior = 0; 168 if (!to_book3s(vcpu)->hior_explicit)
169 to_book3s(vcpu)->hior = 0;
162 to_book3s(vcpu)->msr_mask = 0xffffffffULL; 170 to_book3s(vcpu)->msr_mask = 0xffffffffULL;
163 vcpu->arch.cpu_type = KVM_CPU_3S_32; 171 vcpu->arch.cpu_type = KVM_CPU_3S_32;
164 } 172 }
@@ -308,19 +316,22 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
308 316
309 if (page_found == -ENOENT) { 317 if (page_found == -ENOENT) {
310 /* Page not found in guest PTE entries */ 318 /* Page not found in guest PTE entries */
319 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
311 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 320 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
312 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; 321 vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
313 vcpu->arch.shared->msr |= 322 vcpu->arch.shared->msr |=
314 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); 323 (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
324 svcpu_put(svcpu);
315 kvmppc_book3s_queue_irqprio(vcpu, vec); 325 kvmppc_book3s_queue_irqprio(vcpu, vec);
316 } else if (page_found == -EPERM) { 326 } else if (page_found == -EPERM) {
317 /* Storage protection */ 327 /* Storage protection */
328 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
318 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 329 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
319 vcpu->arch.shared->dsisr = 330 vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
320 to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
321 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; 331 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
322 vcpu->arch.shared->msr |= 332 vcpu->arch.shared->msr |=
323 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); 333 svcpu->shadow_srr1 & 0x00000000f8000000ULL;
334 svcpu_put(svcpu);
324 kvmppc_book3s_queue_irqprio(vcpu, vec); 335 kvmppc_book3s_queue_irqprio(vcpu, vec);
325 } else if (page_found == -EINVAL) { 336 } else if (page_found == -EINVAL) {
326 /* Page not found in guest SLB */ 337 /* Page not found in guest SLB */
@@ -517,24 +528,29 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
517 run->ready_for_interrupt_injection = 1; 528 run->ready_for_interrupt_injection = 1;
518 529
519 trace_kvm_book3s_exit(exit_nr, vcpu); 530 trace_kvm_book3s_exit(exit_nr, vcpu);
531 preempt_enable();
520 kvm_resched(vcpu); 532 kvm_resched(vcpu);
521 switch (exit_nr) { 533 switch (exit_nr) {
522 case BOOK3S_INTERRUPT_INST_STORAGE: 534 case BOOK3S_INTERRUPT_INST_STORAGE:
535 {
536 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
537 ulong shadow_srr1 = svcpu->shadow_srr1;
523 vcpu->stat.pf_instruc++; 538 vcpu->stat.pf_instruc++;
524 539
525#ifdef CONFIG_PPC_BOOK3S_32 540#ifdef CONFIG_PPC_BOOK3S_32
526 /* We set segments as unused segments when invalidating them. So 541 /* We set segments as unused segments when invalidating them. So
527 * treat the respective fault as segment fault. */ 542 * treat the respective fault as segment fault. */
528 if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] 543 if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
529 == SR_INVALID) {
530 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 544 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
531 r = RESUME_GUEST; 545 r = RESUME_GUEST;
546 svcpu_put(svcpu);
532 break; 547 break;
533 } 548 }
534#endif 549#endif
550 svcpu_put(svcpu);
535 551
536 /* only care about PTEG not found errors, but leave NX alone */ 552 /* only care about PTEG not found errors, but leave NX alone */
537 if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { 553 if (shadow_srr1 & 0x40000000) {
538 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 554 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
539 vcpu->stat.sp_instruc++; 555 vcpu->stat.sp_instruc++;
540 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 556 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -547,33 +563,37 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
547 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 563 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
548 r = RESUME_GUEST; 564 r = RESUME_GUEST;
549 } else { 565 } else {
550 vcpu->arch.shared->msr |= 566 vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000;
551 to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
552 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 567 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
553 r = RESUME_GUEST; 568 r = RESUME_GUEST;
554 } 569 }
555 break; 570 break;
571 }
556 case BOOK3S_INTERRUPT_DATA_STORAGE: 572 case BOOK3S_INTERRUPT_DATA_STORAGE:
557 { 573 {
558 ulong dar = kvmppc_get_fault_dar(vcpu); 574 ulong dar = kvmppc_get_fault_dar(vcpu);
575 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
576 u32 fault_dsisr = svcpu->fault_dsisr;
559 vcpu->stat.pf_storage++; 577 vcpu->stat.pf_storage++;
560 578
561#ifdef CONFIG_PPC_BOOK3S_32 579#ifdef CONFIG_PPC_BOOK3S_32
562 /* We set segments as unused segments when invalidating them. So 580 /* We set segments as unused segments when invalidating them. So
563 * treat the respective fault as segment fault. */ 581 * treat the respective fault as segment fault. */
564 if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { 582 if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
565 kvmppc_mmu_map_segment(vcpu, dar); 583 kvmppc_mmu_map_segment(vcpu, dar);
566 r = RESUME_GUEST; 584 r = RESUME_GUEST;
585 svcpu_put(svcpu);
567 break; 586 break;
568 } 587 }
569#endif 588#endif
589 svcpu_put(svcpu);
570 590
571 /* The only case we need to handle is missing shadow PTEs */ 591 /* The only case we need to handle is missing shadow PTEs */
572 if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { 592 if (fault_dsisr & DSISR_NOHPTE) {
573 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 593 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
574 } else { 594 } else {
575 vcpu->arch.shared->dar = dar; 595 vcpu->arch.shared->dar = dar;
576 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; 596 vcpu->arch.shared->dsisr = fault_dsisr;
577 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 597 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
578 r = RESUME_GUEST; 598 r = RESUME_GUEST;
579 } 599 }
@@ -609,10 +629,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
609 case BOOK3S_INTERRUPT_PROGRAM: 629 case BOOK3S_INTERRUPT_PROGRAM:
610 { 630 {
611 enum emulation_result er; 631 enum emulation_result er;
632 struct kvmppc_book3s_shadow_vcpu *svcpu;
612 ulong flags; 633 ulong flags;
613 634
614program_interrupt: 635program_interrupt:
615 flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; 636 svcpu = svcpu_get(vcpu);
637 flags = svcpu->shadow_srr1 & 0x1f0000ull;
638 svcpu_put(svcpu);
616 639
617 if (vcpu->arch.shared->msr & MSR_PR) { 640 if (vcpu->arch.shared->msr & MSR_PR) {
618#ifdef EXIT_DEBUG 641#ifdef EXIT_DEBUG
@@ -740,20 +763,33 @@ program_interrupt:
740 r = RESUME_GUEST; 763 r = RESUME_GUEST;
741 break; 764 break;
742 default: 765 default:
766 {
767 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
768 ulong shadow_srr1 = svcpu->shadow_srr1;
769 svcpu_put(svcpu);
743 /* Ugh - bork here! What did we get? */ 770 /* Ugh - bork here! What did we get? */
744 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 771 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
745 exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); 772 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
746 r = RESUME_HOST; 773 r = RESUME_HOST;
747 BUG(); 774 BUG();
748 break; 775 break;
749 } 776 }
750 777 }
751 778
752 if (!(r & RESUME_HOST)) { 779 if (!(r & RESUME_HOST)) {
753 /* To avoid clobbering exit_reason, only check for signals if 780 /* To avoid clobbering exit_reason, only check for signals if
754 * we aren't already exiting to userspace for some other 781 * we aren't already exiting to userspace for some other
755 * reason. */ 782 * reason. */
783
784 /*
785 * Interrupts could be timers for the guest which we have to
786 * inject again, so let's postpone them until we're in the guest
787 * and if we really did time things so badly, then we just exit
788 * again due to a host external interrupt.
789 */
790 __hard_irq_disable();
756 if (signal_pending(current)) { 791 if (signal_pending(current)) {
792 __hard_irq_enable();
757#ifdef EXIT_DEBUG 793#ifdef EXIT_DEBUG
758 printk(KERN_EMERG "KVM: Going back to host\n"); 794 printk(KERN_EMERG "KVM: Going back to host\n");
759#endif 795#endif
@@ -761,10 +797,12 @@ program_interrupt:
761 run->exit_reason = KVM_EXIT_INTR; 797 run->exit_reason = KVM_EXIT_INTR;
762 r = -EINTR; 798 r = -EINTR;
763 } else { 799 } else {
800 preempt_disable();
801
764 /* In case an interrupt came in that was triggered 802 /* In case an interrupt came in that was triggered
765 * from userspace (like DEC), we need to check what 803 * from userspace (like DEC), we need to check what
766 * to inject now! */ 804 * to inject now! */
767 kvmppc_core_deliver_interrupts(vcpu); 805 kvmppc_core_prepare_to_enter(vcpu);
768 } 806 }
769 } 807 }
770 808
@@ -836,6 +874,38 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
836 return 0; 874 return 0;
837} 875}
838 876
877int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
878{
879 int r = -EINVAL;
880
881 switch (reg->id) {
882 case KVM_REG_PPC_HIOR:
883 r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
884 break;
885 default:
886 break;
887 }
888
889 return r;
890}
891
892int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
893{
894 int r = -EINVAL;
895
896 switch (reg->id) {
897 case KVM_REG_PPC_HIOR:
898 r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
899 if (!r)
900 to_book3s(vcpu)->hior_explicit = true;
901 break;
902 default:
903 break;
904 }
905
906 return r;
907}
908
839int kvmppc_core_check_processor_compat(void) 909int kvmppc_core_check_processor_compat(void)
840{ 910{
841 return 0; 911 return 0;
@@ -923,16 +993,31 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
923#endif 993#endif
924 ulong ext_msr; 994 ulong ext_msr;
925 995
996 preempt_disable();
997
926 /* Check if we can run the vcpu at all */ 998 /* Check if we can run the vcpu at all */
927 if (!vcpu->arch.sane) { 999 if (!vcpu->arch.sane) {
928 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1000 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
929 return -EINVAL; 1001 ret = -EINVAL;
1002 goto out;
930 } 1003 }
931 1004
1005 kvmppc_core_prepare_to_enter(vcpu);
1006
1007 /*
1008 * Interrupts could be timers for the guest which we have to inject
1009 * again, so let's postpone them until we're in the guest and if we
1010 * really did time things so badly, then we just exit again due to
1011 * a host external interrupt.
1012 */
1013 __hard_irq_disable();
1014
932 /* No need to go into the guest when all we do is going out */ 1015 /* No need to go into the guest when all we do is going out */
933 if (signal_pending(current)) { 1016 if (signal_pending(current)) {
1017 __hard_irq_enable();
934 kvm_run->exit_reason = KVM_EXIT_INTR; 1018 kvm_run->exit_reason = KVM_EXIT_INTR;
935 return -EINTR; 1019 ret = -EINTR;
1020 goto out;
936 } 1021 }
937 1022
938 /* Save FPU state in stack */ 1023 /* Save FPU state in stack */
@@ -974,8 +1059,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
974 1059
975 kvm_guest_exit(); 1060 kvm_guest_exit();
976 1061
977 local_irq_disable();
978
979 current->thread.regs->msr = ext_msr; 1062 current->thread.regs->msr = ext_msr;
980 1063
981 /* Make sure we save the guest FPU/Altivec/VSX state */ 1064 /* Make sure we save the guest FPU/Altivec/VSX state */
@@ -1002,9 +1085,50 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1002 current->thread.used_vsr = used_vsr; 1085 current->thread.used_vsr = used_vsr;
1003#endif 1086#endif
1004 1087
1088out:
1089 preempt_enable();
1005 return ret; 1090 return ret;
1006} 1091}
1007 1092
1093/*
1094 * Get (and clear) the dirty memory log for a memory slot.
1095 */
1096int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1097 struct kvm_dirty_log *log)
1098{
1099 struct kvm_memory_slot *memslot;
1100 struct kvm_vcpu *vcpu;
1101 ulong ga, ga_end;
1102 int is_dirty = 0;
1103 int r;
1104 unsigned long n;
1105
1106 mutex_lock(&kvm->slots_lock);
1107
1108 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1109 if (r)
1110 goto out;
1111
1112 /* If nothing is dirty, don't bother messing with page tables. */
1113 if (is_dirty) {
1114 memslot = id_to_memslot(kvm->memslots, log->slot);
1115
1116 ga = memslot->base_gfn << PAGE_SHIFT;
1117 ga_end = ga + (memslot->npages << PAGE_SHIFT);
1118
1119 kvm_for_each_vcpu(n, vcpu, kvm)
1120 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
1121
1122 n = kvm_dirty_bitmap_bytes(memslot);
1123 memset(memslot->dirty_bitmap, 0, n);
1124 }
1125
1126 r = 0;
1127out:
1128 mutex_unlock(&kvm->slots_lock);
1129 return r;
1130}
1131
1008int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1132int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1009 struct kvm_userspace_memory_region *mem) 1133 struct kvm_userspace_memory_region *mem)
1010{ 1134{
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index bb6c988f010a..ee9e1ee9c858 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,12 +124,6 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
124 vcpu->arch.shared->msr = new_msr; 124 vcpu->arch.shared->msr = new_msr;
125 125
126 kvmppc_mmu_msr_notify(vcpu, old_msr); 126 kvmppc_mmu_msr_notify(vcpu, old_msr);
127
128 if (vcpu->arch.shared->msr & MSR_WE) {
129 kvm_vcpu_block(vcpu);
130 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
131 };
132
133 kvmppc_vcpu_sync_spe(vcpu); 127 kvmppc_vcpu_sync_spe(vcpu);
134} 128}
135 129
@@ -258,9 +252,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
258 allowed = vcpu->arch.shared->msr & MSR_ME; 252 allowed = vcpu->arch.shared->msr & MSR_ME;
259 msr_mask = 0; 253 msr_mask = 0;
260 break; 254 break;
261 case BOOKE_IRQPRIO_EXTERNAL:
262 case BOOKE_IRQPRIO_DECREMENTER: 255 case BOOKE_IRQPRIO_DECREMENTER:
263 case BOOKE_IRQPRIO_FIT: 256 case BOOKE_IRQPRIO_FIT:
257 keep_irq = true;
258 /* fall through */
259 case BOOKE_IRQPRIO_EXTERNAL:
264 allowed = vcpu->arch.shared->msr & MSR_EE; 260 allowed = vcpu->arch.shared->msr & MSR_EE;
265 allowed = allowed && !crit; 261 allowed = allowed && !crit;
266 msr_mask = MSR_CE|MSR_ME|MSR_DE; 262 msr_mask = MSR_CE|MSR_ME|MSR_DE;
@@ -276,7 +272,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
276 vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; 272 vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
277 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 273 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
278 if (update_esr == true) 274 if (update_esr == true)
279 vcpu->arch.esr = vcpu->arch.queued_esr; 275 vcpu->arch.shared->esr = vcpu->arch.queued_esr;
280 if (update_dear == true) 276 if (update_dear == true)
281 vcpu->arch.shared->dar = vcpu->arch.queued_dear; 277 vcpu->arch.shared->dar = vcpu->arch.queued_dear;
282 kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); 278 kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
@@ -288,13 +284,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
288 return allowed; 284 return allowed;
289} 285}
290 286
291/* Check pending exceptions and deliver one, if possible. */ 287static void update_timer_ints(struct kvm_vcpu *vcpu)
292void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) 288{
289 if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
290 kvmppc_core_queue_dec(vcpu);
291 else
292 kvmppc_core_dequeue_dec(vcpu);
293}
294
295static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
293{ 296{
294 unsigned long *pending = &vcpu->arch.pending_exceptions; 297 unsigned long *pending = &vcpu->arch.pending_exceptions;
295 unsigned long old_pending = vcpu->arch.pending_exceptions;
296 unsigned int priority; 298 unsigned int priority;
297 299
300 if (vcpu->requests) {
301 if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
302 smp_mb();
303 update_timer_ints(vcpu);
304 }
305 }
306
298 priority = __ffs(*pending); 307 priority = __ffs(*pending);
299 while (priority <= BOOKE_IRQPRIO_MAX) { 308 while (priority <= BOOKE_IRQPRIO_MAX) {
300 if (kvmppc_booke_irqprio_deliver(vcpu, priority)) 309 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -306,10 +315,24 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
306 } 315 }
307 316
308 /* Tell the guest about our interrupt status */ 317 /* Tell the guest about our interrupt status */
309 if (*pending) 318 vcpu->arch.shared->int_pending = !!*pending;
310 vcpu->arch.shared->int_pending = 1; 319}
311 else if (old_pending) 320
312 vcpu->arch.shared->int_pending = 0; 321/* Check pending exceptions and deliver one, if possible. */
322void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
323{
324 WARN_ON_ONCE(!irqs_disabled());
325
326 kvmppc_core_check_exceptions(vcpu);
327
328 if (vcpu->arch.shared->msr & MSR_WE) {
329 local_irq_enable();
330 kvm_vcpu_block(vcpu);
331 local_irq_disable();
332
333 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
334 kvmppc_core_check_exceptions(vcpu);
335 };
313} 336}
314 337
315int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 338int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -322,11 +345,21 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
322 } 345 }
323 346
324 local_irq_disable(); 347 local_irq_disable();
348
349 kvmppc_core_prepare_to_enter(vcpu);
350
351 if (signal_pending(current)) {
352 kvm_run->exit_reason = KVM_EXIT_INTR;
353 ret = -EINTR;
354 goto out;
355 }
356
325 kvm_guest_enter(); 357 kvm_guest_enter();
326 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 358 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
327 kvm_guest_exit(); 359 kvm_guest_exit();
328 local_irq_enable();
329 360
361out:
362 local_irq_enable();
330 return ret; 363 return ret;
331} 364}
332 365
@@ -603,7 +636,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
603 636
604 local_irq_disable(); 637 local_irq_disable();
605 638
606 kvmppc_core_deliver_interrupts(vcpu); 639 kvmppc_core_prepare_to_enter(vcpu);
607 640
608 if (!(r & RESUME_HOST)) { 641 if (!(r & RESUME_HOST)) {
609 /* To avoid clobbering exit_reason, only check for signals if 642 /* To avoid clobbering exit_reason, only check for signals if
@@ -628,6 +661,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
628 vcpu->arch.pc = 0; 661 vcpu->arch.pc = 0;
629 vcpu->arch.shared->msr = 0; 662 vcpu->arch.shared->msr = 0;
630 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; 663 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
664 vcpu->arch.shared->pir = vcpu->vcpu_id;
631 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ 665 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
632 666
633 vcpu->arch.shadow_pid = 1; 667 vcpu->arch.shadow_pid = 1;
@@ -662,10 +696,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
662 regs->sprg1 = vcpu->arch.shared->sprg1; 696 regs->sprg1 = vcpu->arch.shared->sprg1;
663 regs->sprg2 = vcpu->arch.shared->sprg2; 697 regs->sprg2 = vcpu->arch.shared->sprg2;
664 regs->sprg3 = vcpu->arch.shared->sprg3; 698 regs->sprg3 = vcpu->arch.shared->sprg3;
665 regs->sprg4 = vcpu->arch.sprg4; 699 regs->sprg4 = vcpu->arch.shared->sprg4;
666 regs->sprg5 = vcpu->arch.sprg5; 700 regs->sprg5 = vcpu->arch.shared->sprg5;
667 regs->sprg6 = vcpu->arch.sprg6; 701 regs->sprg6 = vcpu->arch.shared->sprg6;
668 regs->sprg7 = vcpu->arch.sprg7; 702 regs->sprg7 = vcpu->arch.shared->sprg7;
669 703
670 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 704 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
671 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 705 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -690,10 +724,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
690 vcpu->arch.shared->sprg1 = regs->sprg1; 724 vcpu->arch.shared->sprg1 = regs->sprg1;
691 vcpu->arch.shared->sprg2 = regs->sprg2; 725 vcpu->arch.shared->sprg2 = regs->sprg2;
692 vcpu->arch.shared->sprg3 = regs->sprg3; 726 vcpu->arch.shared->sprg3 = regs->sprg3;
693 vcpu->arch.sprg4 = regs->sprg4; 727 vcpu->arch.shared->sprg4 = regs->sprg4;
694 vcpu->arch.sprg5 = regs->sprg5; 728 vcpu->arch.shared->sprg5 = regs->sprg5;
695 vcpu->arch.sprg6 = regs->sprg6; 729 vcpu->arch.shared->sprg6 = regs->sprg6;
696 vcpu->arch.sprg7 = regs->sprg7; 730 vcpu->arch.shared->sprg7 = regs->sprg7;
697 731
698 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 732 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
699 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 733 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -711,7 +745,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
711 sregs->u.e.csrr0 = vcpu->arch.csrr0; 745 sregs->u.e.csrr0 = vcpu->arch.csrr0;
712 sregs->u.e.csrr1 = vcpu->arch.csrr1; 746 sregs->u.e.csrr1 = vcpu->arch.csrr1;
713 sregs->u.e.mcsr = vcpu->arch.mcsr; 747 sregs->u.e.mcsr = vcpu->arch.mcsr;
714 sregs->u.e.esr = vcpu->arch.esr; 748 sregs->u.e.esr = vcpu->arch.shared->esr;
715 sregs->u.e.dear = vcpu->arch.shared->dar; 749 sregs->u.e.dear = vcpu->arch.shared->dar;
716 sregs->u.e.tsr = vcpu->arch.tsr; 750 sregs->u.e.tsr = vcpu->arch.tsr;
717 sregs->u.e.tcr = vcpu->arch.tcr; 751 sregs->u.e.tcr = vcpu->arch.tcr;
@@ -729,28 +763,19 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
729 vcpu->arch.csrr0 = sregs->u.e.csrr0; 763 vcpu->arch.csrr0 = sregs->u.e.csrr0;
730 vcpu->arch.csrr1 = sregs->u.e.csrr1; 764 vcpu->arch.csrr1 = sregs->u.e.csrr1;
731 vcpu->arch.mcsr = sregs->u.e.mcsr; 765 vcpu->arch.mcsr = sregs->u.e.mcsr;
732 vcpu->arch.esr = sregs->u.e.esr; 766 vcpu->arch.shared->esr = sregs->u.e.esr;
733 vcpu->arch.shared->dar = sregs->u.e.dear; 767 vcpu->arch.shared->dar = sregs->u.e.dear;
734 vcpu->arch.vrsave = sregs->u.e.vrsave; 768 vcpu->arch.vrsave = sregs->u.e.vrsave;
735 vcpu->arch.tcr = sregs->u.e.tcr; 769 kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
736 770
737 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) 771 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
738 vcpu->arch.dec = sregs->u.e.dec; 772 vcpu->arch.dec = sregs->u.e.dec;
739 773 kvmppc_emulate_dec(vcpu);
740 kvmppc_emulate_dec(vcpu); 774 }
741 775
742 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { 776 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
743 /* 777 vcpu->arch.tsr = sregs->u.e.tsr;
744 * FIXME: existing KVM timer handling is incomplete. 778 update_timer_ints(vcpu);
745 * TSR cannot be read by the guest, and its value in
746 * vcpu->arch is always zero. For now, just handle
747 * the case where the caller is trying to inject a
748 * decrementer interrupt.
749 */
750
751 if ((sregs->u.e.tsr & TSR_DIS) &&
752 (vcpu->arch.tcr & TCR_DIE))
753 kvmppc_core_queue_dec(vcpu);
754 } 779 }
755 780
756 return 0; 781 return 0;
@@ -761,7 +786,7 @@ static void get_sregs_arch206(struct kvm_vcpu *vcpu,
761{ 786{
762 sregs->u.e.features |= KVM_SREGS_E_ARCH206; 787 sregs->u.e.features |= KVM_SREGS_E_ARCH206;
763 788
764 sregs->u.e.pir = 0; 789 sregs->u.e.pir = vcpu->vcpu_id;
765 sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; 790 sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
766 sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; 791 sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
767 sregs->u.e.decar = vcpu->arch.decar; 792 sregs->u.e.decar = vcpu->arch.decar;
@@ -774,7 +799,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
774 if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) 799 if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
775 return 0; 800 return 0;
776 801
777 if (sregs->u.e.pir != 0) 802 if (sregs->u.e.pir != vcpu->vcpu_id)
778 return -EINVAL; 803 return -EINVAL;
779 804
780 vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; 805 vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
@@ -862,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
862 return kvmppc_core_set_sregs(vcpu, sregs); 887 return kvmppc_core_set_sregs(vcpu, sregs);
863} 888}
864 889
890int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
891{
892 return -EINVAL;
893}
894
895int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
896{
897 return -EINVAL;
898}
899
865int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 900int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
866{ 901{
867 return -ENOTSUPP; 902 return -ENOTSUPP;
@@ -906,6 +941,33 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
906{ 941{
907} 942}
908 943
944void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
945{
946 vcpu->arch.tcr = new_tcr;
947 update_timer_ints(vcpu);
948}
949
950void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
951{
952 set_bits(tsr_bits, &vcpu->arch.tsr);
953 smp_wmb();
954 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
955 kvm_vcpu_kick(vcpu);
956}
957
958void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
959{
960 clear_bits(tsr_bits, &vcpu->arch.tsr);
961 update_timer_ints(vcpu);
962}
963
964void kvmppc_decrementer_func(unsigned long data)
965{
966 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
967
968 kvmppc_set_tsr_bits(vcpu, TSR_DIS);
969}
970
909int __init kvmppc_booke_init(void) 971int __init kvmppc_booke_init(void)
910{ 972{
911 unsigned long ivor[16]; 973 unsigned long ivor[16];
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 8e1fe33d64e5..2fe202705a3f 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -55,6 +55,10 @@ extern unsigned long kvmppc_booke_handlers;
55void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); 55void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
56void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); 56void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
57 57
58void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
59void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
60void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
61
58int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 62int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
59 unsigned int inst, int *advance); 63 unsigned int inst, int *advance);
60int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); 64int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 1260f5f24c0c..3e652da36534 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -13,6 +13,7 @@
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2008 15 * Copyright IBM Corp. 2008
16 * Copyright 2011 Freescale Semiconductor, Inc.
16 * 17 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */ 19 */
@@ -107,7 +108,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
107 case SPRN_DEAR: 108 case SPRN_DEAR:
108 vcpu->arch.shared->dar = spr_val; break; 109 vcpu->arch.shared->dar = spr_val; break;
109 case SPRN_ESR: 110 case SPRN_ESR:
110 vcpu->arch.esr = spr_val; break; 111 vcpu->arch.shared->esr = spr_val; break;
111 case SPRN_DBCR0: 112 case SPRN_DBCR0:
112 vcpu->arch.dbcr0 = spr_val; break; 113 vcpu->arch.dbcr0 = spr_val; break;
113 case SPRN_DBCR1: 114 case SPRN_DBCR1:
@@ -115,23 +116,23 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
115 case SPRN_DBSR: 116 case SPRN_DBSR:
116 vcpu->arch.dbsr &= ~spr_val; break; 117 vcpu->arch.dbsr &= ~spr_val; break;
117 case SPRN_TSR: 118 case SPRN_TSR:
118 vcpu->arch.tsr &= ~spr_val; break; 119 kvmppc_clr_tsr_bits(vcpu, spr_val);
120 break;
119 case SPRN_TCR: 121 case SPRN_TCR:
120 vcpu->arch.tcr = spr_val; 122 kvmppc_set_tcr(vcpu, spr_val);
121 kvmppc_emulate_dec(vcpu);
122 break; 123 break;
123 124
124 /* Note: SPRG4-7 are user-readable. These values are 125 /* Note: SPRG4-7 are user-readable. These values are
125 * loaded into the real SPRGs when resuming the 126 * loaded into the real SPRGs when resuming the
126 * guest. */ 127 * guest. */
127 case SPRN_SPRG4: 128 case SPRN_SPRG4:
128 vcpu->arch.sprg4 = spr_val; break; 129 vcpu->arch.shared->sprg4 = spr_val; break;
129 case SPRN_SPRG5: 130 case SPRN_SPRG5:
130 vcpu->arch.sprg5 = spr_val; break; 131 vcpu->arch.shared->sprg5 = spr_val; break;
131 case SPRN_SPRG6: 132 case SPRN_SPRG6:
132 vcpu->arch.sprg6 = spr_val; break; 133 vcpu->arch.shared->sprg6 = spr_val; break;
133 case SPRN_SPRG7: 134 case SPRN_SPRG7:
134 vcpu->arch.sprg7 = spr_val; break; 135 vcpu->arch.shared->sprg7 = spr_val; break;
135 136
136 case SPRN_IVPR: 137 case SPRN_IVPR:
137 vcpu->arch.ivpr = spr_val; 138 vcpu->arch.ivpr = spr_val;
@@ -202,13 +203,17 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
202 case SPRN_DEAR: 203 case SPRN_DEAR:
203 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; 204 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
204 case SPRN_ESR: 205 case SPRN_ESR:
205 kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break; 206 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break;
206 case SPRN_DBCR0: 207 case SPRN_DBCR0:
207 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; 208 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
208 case SPRN_DBCR1: 209 case SPRN_DBCR1:
209 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; 210 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
210 case SPRN_DBSR: 211 case SPRN_DBSR:
211 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; 212 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
213 case SPRN_TSR:
214 kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break;
215 case SPRN_TCR:
216 kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break;
212 217
213 case SPRN_IVOR0: 218 case SPRN_IVOR0:
214 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); 219 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 42f2fb1f66e9..10d8ef602e5c 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -402,19 +402,25 @@ lightweight_exit:
402 /* Save vcpu pointer for the exception handlers. */ 402 /* Save vcpu pointer for the exception handlers. */
403 mtspr SPRN_SPRG_WVCPU, r4 403 mtspr SPRN_SPRG_WVCPU, r4
404 404
405 lwz r5, VCPU_SHARED(r4)
406
405 /* Can't switch the stack pointer until after IVPR is switched, 407 /* Can't switch the stack pointer until after IVPR is switched,
406 * because host interrupt handlers would get confused. */ 408 * because host interrupt handlers would get confused. */
407 lwz r1, VCPU_GPR(r1)(r4) 409 lwz r1, VCPU_GPR(r1)(r4)
408 410
409 /* Host interrupt handlers may have clobbered these guest-readable 411 /*
410 * SPRGs, so we need to reload them here with the guest's values. */ 412 * Host interrupt handlers may have clobbered these
411 lwz r3, VCPU_SPRG4(r4) 413 * guest-readable SPRGs, or the guest kernel may have
414 * written directly to the shared area, so we
415 * need to reload them here with the guest's values.
416 */
417 lwz r3, VCPU_SHARED_SPRG4(r5)
412 mtspr SPRN_SPRG4W, r3 418 mtspr SPRN_SPRG4W, r3
413 lwz r3, VCPU_SPRG5(r4) 419 lwz r3, VCPU_SHARED_SPRG5(r5)
414 mtspr SPRN_SPRG5W, r3 420 mtspr SPRN_SPRG5W, r3
415 lwz r3, VCPU_SPRG6(r4) 421 lwz r3, VCPU_SHARED_SPRG6(r5)
416 mtspr SPRN_SPRG6W, r3 422 mtspr SPRN_SPRG6W, r3
417 lwz r3, VCPU_SPRG7(r4) 423 lwz r3, VCPU_SHARED_SPRG7(r5)
418 mtspr SPRN_SPRG7W, r3 424 mtspr SPRN_SPRG7W, r3
419 425
420#ifdef CONFIG_KVM_EXIT_TIMING 426#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 8c0d45a6faf7..ddcd896fa2ff 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -71,9 +71,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
71 vcpu->arch.pvr = mfspr(SPRN_PVR); 71 vcpu->arch.pvr = mfspr(SPRN_PVR);
72 vcpu_e500->svr = mfspr(SPRN_SVR); 72 vcpu_e500->svr = mfspr(SPRN_SVR);
73 73
74 /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
75 vcpu->vcpu_id = 0;
76
77 vcpu->arch.cpu_type = KVM_CPU_E500V2; 74 vcpu->arch.cpu_type = KVM_CPU_E500V2;
78 75
79 return 0; 76 return 0;
@@ -118,12 +115,12 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
118 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; 115 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
119 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; 116 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
120 117
121 sregs->u.e.mas0 = vcpu_e500->mas0; 118 sregs->u.e.mas0 = vcpu->arch.shared->mas0;
122 sregs->u.e.mas1 = vcpu_e500->mas1; 119 sregs->u.e.mas1 = vcpu->arch.shared->mas1;
123 sregs->u.e.mas2 = vcpu_e500->mas2; 120 sregs->u.e.mas2 = vcpu->arch.shared->mas2;
124 sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; 121 sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
125 sregs->u.e.mas4 = vcpu_e500->mas4; 122 sregs->u.e.mas4 = vcpu->arch.shared->mas4;
126 sregs->u.e.mas6 = vcpu_e500->mas6; 123 sregs->u.e.mas6 = vcpu->arch.shared->mas6;
127 124
128 sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); 125 sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
129 sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; 126 sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
@@ -151,13 +148,12 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
151 } 148 }
152 149
153 if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { 150 if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
154 vcpu_e500->mas0 = sregs->u.e.mas0; 151 vcpu->arch.shared->mas0 = sregs->u.e.mas0;
155 vcpu_e500->mas1 = sregs->u.e.mas1; 152 vcpu->arch.shared->mas1 = sregs->u.e.mas1;
156 vcpu_e500->mas2 = sregs->u.e.mas2; 153 vcpu->arch.shared->mas2 = sregs->u.e.mas2;
157 vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; 154 vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
158 vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; 155 vcpu->arch.shared->mas4 = sregs->u.e.mas4;
159 vcpu_e500->mas4 = sregs->u.e.mas4; 156 vcpu->arch.shared->mas6 = sregs->u.e.mas6;
160 vcpu_e500->mas6 = sregs->u.e.mas6;
161 } 157 }
162 158
163 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) 159 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
@@ -233,6 +229,10 @@ static int __init kvmppc_e500_init(void)
233 unsigned long ivor[3]; 229 unsigned long ivor[3];
234 unsigned long max_ivor = 0; 230 unsigned long max_ivor = 0;
235 231
232 r = kvmppc_core_check_processor_compat();
233 if (r)
234 return r;
235
236 r = kvmppc_booke_init(); 236 r = kvmppc_booke_init();
237 if (r) 237 if (r)
238 return r; 238 return r;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index d48ae396f41e..6d0b2bd54fb0 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -89,19 +89,23 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
89 return EMULATE_FAIL; 89 return EMULATE_FAIL;
90 vcpu_e500->pid[2] = spr_val; break; 90 vcpu_e500->pid[2] = spr_val; break;
91 case SPRN_MAS0: 91 case SPRN_MAS0:
92 vcpu_e500->mas0 = spr_val; break; 92 vcpu->arch.shared->mas0 = spr_val; break;
93 case SPRN_MAS1: 93 case SPRN_MAS1:
94 vcpu_e500->mas1 = spr_val; break; 94 vcpu->arch.shared->mas1 = spr_val; break;
95 case SPRN_MAS2: 95 case SPRN_MAS2:
96 vcpu_e500->mas2 = spr_val; break; 96 vcpu->arch.shared->mas2 = spr_val; break;
97 case SPRN_MAS3: 97 case SPRN_MAS3:
98 vcpu_e500->mas3 = spr_val; break; 98 vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
99 vcpu->arch.shared->mas7_3 |= spr_val;
100 break;
99 case SPRN_MAS4: 101 case SPRN_MAS4:
100 vcpu_e500->mas4 = spr_val; break; 102 vcpu->arch.shared->mas4 = spr_val; break;
101 case SPRN_MAS6: 103 case SPRN_MAS6:
102 vcpu_e500->mas6 = spr_val; break; 104 vcpu->arch.shared->mas6 = spr_val; break;
103 case SPRN_MAS7: 105 case SPRN_MAS7:
104 vcpu_e500->mas7 = spr_val; break; 106 vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
107 vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
108 break;
105 case SPRN_L1CSR0: 109 case SPRN_L1CSR0:
106 vcpu_e500->l1csr0 = spr_val; 110 vcpu_e500->l1csr0 = spr_val;
107 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); 111 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
@@ -143,6 +147,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
143{ 147{
144 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 148 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
145 int emulated = EMULATE_DONE; 149 int emulated = EMULATE_DONE;
150 unsigned long val;
146 151
147 switch (sprn) { 152 switch (sprn) {
148 case SPRN_PID: 153 case SPRN_PID:
@@ -152,20 +157,23 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
152 case SPRN_PID2: 157 case SPRN_PID2:
153 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; 158 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
154 case SPRN_MAS0: 159 case SPRN_MAS0:
155 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break; 160 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break;
156 case SPRN_MAS1: 161 case SPRN_MAS1:
157 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break; 162 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break;
158 case SPRN_MAS2: 163 case SPRN_MAS2:
159 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break; 164 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break;
160 case SPRN_MAS3: 165 case SPRN_MAS3:
161 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break; 166 val = (u32)vcpu->arch.shared->mas7_3;
167 kvmppc_set_gpr(vcpu, rt, val);
168 break;
162 case SPRN_MAS4: 169 case SPRN_MAS4:
163 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break; 170 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break;
164 case SPRN_MAS6: 171 case SPRN_MAS6:
165 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break; 172 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break;
166 case SPRN_MAS7: 173 case SPRN_MAS7:
167 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break; 174 val = vcpu->arch.shared->mas7_3 >> 32;
168 175 kvmppc_set_gpr(vcpu, rt, val);
176 break;
169 case SPRN_TLB0CFG: 177 case SPRN_TLB0CFG:
170 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; 178 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
171 case SPRN_TLB1CFG: 179 case SPRN_TLB1CFG:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 13c432ea2fa8..6e53e4164de1 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -12,12 +12,19 @@
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 */ 13 */
14 14
15#include <linux/kernel.h>
15#include <linux/types.h> 16#include <linux/types.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/kvm.h> 19#include <linux/kvm.h>
19#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
20#include <linux/highmem.h> 21#include <linux/highmem.h>
22#include <linux/log2.h>
23#include <linux/uaccess.h>
24#include <linux/sched.h>
25#include <linux/rwsem.h>
26#include <linux/vmalloc.h>
27#include <linux/hugetlb.h>
21#include <asm/kvm_ppc.h> 28#include <asm/kvm_ppc.h>
22#include <asm/kvm_e500.h> 29#include <asm/kvm_e500.h>
23 30
@@ -26,7 +33,7 @@
26#include "trace.h" 33#include "trace.h"
27#include "timing.h" 34#include "timing.h"
28 35
29#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) 36#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
30 37
31struct id { 38struct id {
32 unsigned long val; 39 unsigned long val;
@@ -63,7 +70,14 @@ static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
63 * The valid range of shadow ID is [1..255] */ 70 * The valid range of shadow ID is [1..255] */
64static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); 71static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
65 72
66static unsigned int tlb1_entry_num; 73static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
74
75static struct kvm_book3e_206_tlb_entry *get_entry(
76 struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
77{
78 int offset = vcpu_e500->gtlb_offset[tlbsel];
79 return &vcpu_e500->gtlb_arch[offset + entry];
80}
67 81
68/* 82/*
69 * Allocate a free shadow id and setup a valid sid mapping in given entry. 83 * Allocate a free shadow id and setup a valid sid mapping in given entry.
@@ -116,13 +130,11 @@ static inline int local_sid_lookup(struct id *entry)
116 return -1; 130 return -1;
117} 131}
118 132
119/* Invalidate all id mappings on local core */ 133/* Invalidate all id mappings on local core -- call with preempt disabled */
120static inline void local_sid_destroy_all(void) 134static inline void local_sid_destroy_all(void)
121{ 135{
122 preempt_disable();
123 __get_cpu_var(pcpu_last_used_sid) = 0; 136 __get_cpu_var(pcpu_last_used_sid) = 0;
124 memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); 137 memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
125 preempt_enable();
126} 138}
127 139
128static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) 140static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -218,34 +230,13 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
218 preempt_enable(); 230 preempt_enable();
219} 231}
220 232
221void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) 233static inline unsigned int gtlb0_get_next_victim(
222{
223 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
224 struct tlbe *tlbe;
225 int i, tlbsel;
226
227 printk("| %8s | %8s | %8s | %8s | %8s |\n",
228 "nr", "mas1", "mas2", "mas3", "mas7");
229
230 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
231 printk("Guest TLB%d:\n", tlbsel);
232 for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
233 tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
234 if (tlbe->mas1 & MAS1_VALID)
235 printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
236 tlbsel, i, tlbe->mas1, tlbe->mas2,
237 tlbe->mas3, tlbe->mas7);
238 }
239 }
240}
241
242static inline unsigned int tlb0_get_next_victim(
243 struct kvmppc_vcpu_e500 *vcpu_e500) 234 struct kvmppc_vcpu_e500 *vcpu_e500)
244{ 235{
245 unsigned int victim; 236 unsigned int victim;
246 237
247 victim = vcpu_e500->gtlb_nv[0]++; 238 victim = vcpu_e500->gtlb_nv[0]++;
248 if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) 239 if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways))
249 vcpu_e500->gtlb_nv[0] = 0; 240 vcpu_e500->gtlb_nv[0] = 0;
250 241
251 return victim; 242 return victim;
@@ -254,12 +245,12 @@ static inline unsigned int tlb0_get_next_victim(
254static inline unsigned int tlb1_max_shadow_size(void) 245static inline unsigned int tlb1_max_shadow_size(void)
255{ 246{
256 /* reserve one entry for magic page */ 247 /* reserve one entry for magic page */
257 return tlb1_entry_num - tlbcam_index - 1; 248 return host_tlb_params[1].entries - tlbcam_index - 1;
258} 249}
259 250
260static inline int tlbe_is_writable(struct tlbe *tlbe) 251static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
261{ 252{
262 return tlbe->mas3 & (MAS3_SW|MAS3_UW); 253 return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
263} 254}
264 255
265static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) 256static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
@@ -290,40 +281,66 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
290/* 281/*
291 * writing shadow tlb entry to host TLB 282 * writing shadow tlb entry to host TLB
292 */ 283 */
293static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0) 284static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
285 uint32_t mas0)
294{ 286{
295 unsigned long flags; 287 unsigned long flags;
296 288
297 local_irq_save(flags); 289 local_irq_save(flags);
298 mtspr(SPRN_MAS0, mas0); 290 mtspr(SPRN_MAS0, mas0);
299 mtspr(SPRN_MAS1, stlbe->mas1); 291 mtspr(SPRN_MAS1, stlbe->mas1);
300 mtspr(SPRN_MAS2, stlbe->mas2); 292 mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
301 mtspr(SPRN_MAS3, stlbe->mas3); 293 mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
302 mtspr(SPRN_MAS7, stlbe->mas7); 294 mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
303 asm volatile("isync; tlbwe" : : : "memory"); 295 asm volatile("isync; tlbwe" : : : "memory");
304 local_irq_restore(flags); 296 local_irq_restore(flags);
297
298 trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
299 stlbe->mas2, stlbe->mas7_3);
300}
301
302/*
303 * Acquire a mas0 with victim hint, as if we just took a TLB miss.
304 *
305 * We don't care about the address we're searching for, other than that it's
306 * in the right set and is not present in the TLB. Using a zero PID and a
307 * userspace address means we don't have to set and then restore MAS5, or
308 * calculate a proper MAS6 value.
309 */
310static u32 get_host_mas0(unsigned long eaddr)
311{
312 unsigned long flags;
313 u32 mas0;
314
315 local_irq_save(flags);
316 mtspr(SPRN_MAS6, 0);
317 asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
318 mas0 = mfspr(SPRN_MAS0);
319 local_irq_restore(flags);
320
321 return mas0;
305} 322}
306 323
324/* sesel is for tlb1 only */
307static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, 325static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
308 int tlbsel, int esel, struct tlbe *stlbe) 326 int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe)
309{ 327{
328 u32 mas0;
329
310 if (tlbsel == 0) { 330 if (tlbsel == 0) {
311 __write_host_tlbe(stlbe, 331 mas0 = get_host_mas0(stlbe->mas2);
312 MAS0_TLBSEL(0) | 332 __write_host_tlbe(stlbe, mas0);
313 MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1)));
314 } else { 333 } else {
315 __write_host_tlbe(stlbe, 334 __write_host_tlbe(stlbe,
316 MAS0_TLBSEL(1) | 335 MAS0_TLBSEL(1) |
317 MAS0_ESEL(to_htlb1_esel(esel))); 336 MAS0_ESEL(to_htlb1_esel(sesel)));
318 } 337 }
319 trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
320 stlbe->mas3, stlbe->mas7);
321} 338}
322 339
323void kvmppc_map_magic(struct kvm_vcpu *vcpu) 340void kvmppc_map_magic(struct kvm_vcpu *vcpu)
324{ 341{
325 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 342 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
326 struct tlbe magic; 343 struct kvm_book3e_206_tlb_entry magic;
327 ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; 344 ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
328 unsigned int stid; 345 unsigned int stid;
329 pfn_t pfn; 346 pfn_t pfn;
@@ -337,9 +354,9 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
337 magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | 354 magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
338 MAS1_TSIZE(BOOK3E_PAGESZ_4K); 355 MAS1_TSIZE(BOOK3E_PAGESZ_4K);
339 magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; 356 magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
340 magic.mas3 = (pfn << PAGE_SHIFT) | 357 magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
341 MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; 358 MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
342 magic.mas7 = pfn >> (32 - PAGE_SHIFT); 359 magic.mas8 = 0;
343 360
344 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); 361 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
345 preempt_enable(); 362 preempt_enable();
@@ -357,10 +374,11 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
357{ 374{
358} 375}
359 376
360static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, 377static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
361 int tlbsel, int esel) 378 int tlbsel, int esel)
362{ 379{
363 struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; 380 struct kvm_book3e_206_tlb_entry *gtlbe =
381 get_entry(vcpu_e500, tlbsel, esel);
364 struct vcpu_id_table *idt = vcpu_e500->idt; 382 struct vcpu_id_table *idt = vcpu_e500->idt;
365 unsigned int pr, tid, ts, pid; 383 unsigned int pr, tid, ts, pid;
366 u32 val, eaddr; 384 u32 val, eaddr;
@@ -414,25 +432,57 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
414 preempt_enable(); 432 preempt_enable();
415} 433}
416 434
435static int tlb0_set_base(gva_t addr, int sets, int ways)
436{
437 int set_base;
438
439 set_base = (addr >> PAGE_SHIFT) & (sets - 1);
440 set_base *= ways;
441
442 return set_base;
443}
444
445static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
446{
447 return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets,
448 vcpu_e500->gtlb_params[0].ways);
449}
450
451static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel)
452{
453 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
454 int esel = get_tlb_esel_bit(vcpu);
455
456 if (tlbsel == 0) {
457 esel &= vcpu_e500->gtlb_params[0].ways - 1;
458 esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2);
459 } else {
460 esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1;
461 }
462
463 return esel;
464}
465
417/* Search the guest TLB for a matching entry. */ 466/* Search the guest TLB for a matching entry. */
418static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, 467static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
419 gva_t eaddr, int tlbsel, unsigned int pid, int as) 468 gva_t eaddr, int tlbsel, unsigned int pid, int as)
420{ 469{
421 int size = vcpu_e500->gtlb_size[tlbsel]; 470 int size = vcpu_e500->gtlb_params[tlbsel].entries;
422 int set_base; 471 unsigned int set_base, offset;
423 int i; 472 int i;
424 473
425 if (tlbsel == 0) { 474 if (tlbsel == 0) {
426 int mask = size / KVM_E500_TLB0_WAY_NUM - 1; 475 set_base = gtlb0_set_base(vcpu_e500, eaddr);
427 set_base = (eaddr >> PAGE_SHIFT) & mask; 476 size = vcpu_e500->gtlb_params[0].ways;
428 set_base *= KVM_E500_TLB0_WAY_NUM;
429 size = KVM_E500_TLB0_WAY_NUM;
430 } else { 477 } else {
431 set_base = 0; 478 set_base = 0;
432 } 479 }
433 480
481 offset = vcpu_e500->gtlb_offset[tlbsel];
482
434 for (i = 0; i < size; i++) { 483 for (i = 0; i < size; i++) {
435 struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i]; 484 struct kvm_book3e_206_tlb_entry *tlbe =
485 &vcpu_e500->gtlb_arch[offset + set_base + i];
436 unsigned int tid; 486 unsigned int tid;
437 487
438 if (eaddr < get_tlb_eaddr(tlbe)) 488 if (eaddr < get_tlb_eaddr(tlbe))
@@ -457,27 +507,55 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
457 return -1; 507 return -1;
458} 508}
459 509
460static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv, 510static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
461 struct tlbe *gtlbe, 511 struct kvm_book3e_206_tlb_entry *gtlbe,
462 pfn_t pfn) 512 pfn_t pfn)
463{ 513{
464 priv->pfn = pfn; 514 ref->pfn = pfn;
465 priv->flags = E500_TLB_VALID; 515 ref->flags = E500_TLB_VALID;
466 516
467 if (tlbe_is_writable(gtlbe)) 517 if (tlbe_is_writable(gtlbe))
468 priv->flags |= E500_TLB_DIRTY; 518 ref->flags |= E500_TLB_DIRTY;
469} 519}
470 520
471static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv) 521static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
472{ 522{
473 if (priv->flags & E500_TLB_VALID) { 523 if (ref->flags & E500_TLB_VALID) {
474 if (priv->flags & E500_TLB_DIRTY) 524 if (ref->flags & E500_TLB_DIRTY)
475 kvm_release_pfn_dirty(priv->pfn); 525 kvm_release_pfn_dirty(ref->pfn);
476 else 526 else
477 kvm_release_pfn_clean(priv->pfn); 527 kvm_release_pfn_clean(ref->pfn);
528
529 ref->flags = 0;
530 }
531}
532
533static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
534{
535 int tlbsel = 0;
536 int i;
537
538 for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
539 struct tlbe_ref *ref =
540 &vcpu_e500->gtlb_priv[tlbsel][i].ref;
541 kvmppc_e500_ref_release(ref);
542 }
543}
544
545static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
546{
547 int stlbsel = 1;
548 int i;
549
550 kvmppc_e500_id_table_reset_all(vcpu_e500);
478 551
479 priv->flags = 0; 552 for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
553 struct tlbe_ref *ref =
554 &vcpu_e500->tlb_refs[stlbsel][i];
555 kvmppc_e500_ref_release(ref);
480 } 556 }
557
558 clear_tlb_privs(vcpu_e500);
481} 559}
482 560
483static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, 561static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
@@ -488,59 +566,54 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
488 int tlbsel; 566 int tlbsel;
489 567
490 /* since we only have two TLBs, only lower bit is used. */ 568 /* since we only have two TLBs, only lower bit is used. */
491 tlbsel = (vcpu_e500->mas4 >> 28) & 0x1; 569 tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
492 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; 570 victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
493 pidsel = (vcpu_e500->mas4 >> 16) & 0xf; 571 pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf;
494 tsized = (vcpu_e500->mas4 >> 7) & 0x1f; 572 tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
495 573
496 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) 574 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
497 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 575 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
498 vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) 576 vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
499 | MAS1_TID(vcpu_e500->pid[pidsel]) 577 | MAS1_TID(vcpu_e500->pid[pidsel])
500 | MAS1_TSIZE(tsized); 578 | MAS1_TSIZE(tsized);
501 vcpu_e500->mas2 = (eaddr & MAS2_EPN) 579 vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
502 | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK); 580 | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
503 vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; 581 vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
504 vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1) 582 vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1)
505 | (get_cur_pid(vcpu) << 16) 583 | (get_cur_pid(vcpu) << 16)
506 | (as ? MAS6_SAS : 0); 584 | (as ? MAS6_SAS : 0);
507 vcpu_e500->mas7 = 0;
508} 585}
509 586
510static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, 587/* TID must be supplied by the caller */
511 struct tlbe *gtlbe, int tsize, 588static inline void kvmppc_e500_setup_stlbe(
512 struct tlbe_priv *priv, 589 struct kvmppc_vcpu_e500 *vcpu_e500,
513 u64 gvaddr, struct tlbe *stlbe) 590 struct kvm_book3e_206_tlb_entry *gtlbe,
591 int tsize, struct tlbe_ref *ref, u64 gvaddr,
592 struct kvm_book3e_206_tlb_entry *stlbe)
514{ 593{
515 pfn_t pfn = priv->pfn; 594 pfn_t pfn = ref->pfn;
516 unsigned int stid;
517 595
518 stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), 596 BUG_ON(!(ref->flags & E500_TLB_VALID));
519 get_tlb_tid(gtlbe),
520 get_cur_pr(&vcpu_e500->vcpu), 0);
521 597
522 /* Force TS=1 IPROT=0 for all guest mappings. */ 598 /* Force TS=1 IPROT=0 for all guest mappings. */
523 stlbe->mas1 = MAS1_TSIZE(tsize) 599 stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID;
524 | MAS1_TID(stid) | MAS1_TS | MAS1_VALID;
525 stlbe->mas2 = (gvaddr & MAS2_EPN) 600 stlbe->mas2 = (gvaddr & MAS2_EPN)
526 | e500_shadow_mas2_attrib(gtlbe->mas2, 601 | e500_shadow_mas2_attrib(gtlbe->mas2,
527 vcpu_e500->vcpu.arch.shared->msr & MSR_PR); 602 vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
528 stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN) 603 stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
529 | e500_shadow_mas3_attrib(gtlbe->mas3, 604 | e500_shadow_mas3_attrib(gtlbe->mas7_3,
530 vcpu_e500->vcpu.arch.shared->msr & MSR_PR); 605 vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
531 stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
532} 606}
533 607
534
535static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, 608static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
536 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel, 609 u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
537 struct tlbe *stlbe) 610 int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
611 struct tlbe_ref *ref)
538{ 612{
539 struct kvm_memory_slot *slot; 613 struct kvm_memory_slot *slot;
540 unsigned long pfn, hva; 614 unsigned long pfn, hva;
541 int pfnmap = 0; 615 int pfnmap = 0;
542 int tsize = BOOK3E_PAGESZ_4K; 616 int tsize = BOOK3E_PAGESZ_4K;
543 struct tlbe_priv *priv;
544 617
545 /* 618 /*
546 * Translate guest physical to true physical, acquiring 619 * Translate guest physical to true physical, acquiring
@@ -621,12 +694,31 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
621 pfn &= ~(tsize_pages - 1); 694 pfn &= ~(tsize_pages - 1);
622 break; 695 break;
623 } 696 }
697 } else if (vma && hva >= vma->vm_start &&
698 (vma->vm_flags & VM_HUGETLB)) {
699 unsigned long psize = vma_kernel_pagesize(vma);
700
701 tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
702 MAS1_TSIZE_SHIFT;
703
704 /*
705 * Take the largest page size that satisfies both host
706 * and guest mapping
707 */
708 tsize = min(__ilog2(psize) - 10, tsize);
709
710 /*
711 * e500 doesn't implement the lowest tsize bit,
712 * or 1K pages.
713 */
714 tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
624 } 715 }
625 716
626 up_read(&current->mm->mmap_sem); 717 up_read(&current->mm->mmap_sem);
627 } 718 }
628 719
629 if (likely(!pfnmap)) { 720 if (likely(!pfnmap)) {
721 unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
630 pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); 722 pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
631 if (is_error_pfn(pfn)) { 723 if (is_error_pfn(pfn)) {
632 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", 724 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
@@ -634,45 +726,52 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
634 kvm_release_pfn_clean(pfn); 726 kvm_release_pfn_clean(pfn);
635 return; 727 return;
636 } 728 }
729
730 /* Align guest and physical address to page map boundaries */
731 pfn &= ~(tsize_pages - 1);
732 gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
637 } 733 }
638 734
639 /* Drop old priv and setup new one. */ 735 /* Drop old ref and setup new one. */
640 priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; 736 kvmppc_e500_ref_release(ref);
641 kvmppc_e500_priv_release(priv); 737 kvmppc_e500_ref_setup(ref, gtlbe, pfn);
642 kvmppc_e500_priv_setup(priv, gtlbe, pfn);
643 738
644 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, priv, gvaddr, stlbe); 739 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe);
645} 740}
646 741
647/* XXX only map the one-one case, for now use TLB0 */ 742/* XXX only map the one-one case, for now use TLB0 */
648static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, 743static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
649 int esel, struct tlbe *stlbe) 744 int esel,
745 struct kvm_book3e_206_tlb_entry *stlbe)
650{ 746{
651 struct tlbe *gtlbe; 747 struct kvm_book3e_206_tlb_entry *gtlbe;
748 struct tlbe_ref *ref;
652 749
653 gtlbe = &vcpu_e500->gtlb_arch[0][esel]; 750 gtlbe = get_entry(vcpu_e500, 0, esel);
751 ref = &vcpu_e500->gtlb_priv[0][esel].ref;
654 752
655 kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), 753 kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
656 get_tlb_raddr(gtlbe) >> PAGE_SHIFT, 754 get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
657 gtlbe, 0, esel, stlbe); 755 gtlbe, 0, stlbe, ref);
658
659 return esel;
660} 756}
661 757
662/* Caller must ensure that the specified guest TLB entry is safe to insert into 758/* Caller must ensure that the specified guest TLB entry is safe to insert into
663 * the shadow TLB. */ 759 * the shadow TLB. */
664/* XXX for both one-one and one-to-many , for now use TLB1 */ 760/* XXX for both one-one and one-to-many , for now use TLB1 */
665static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, 761static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
666 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe) 762 u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
763 struct kvm_book3e_206_tlb_entry *stlbe)
667{ 764{
765 struct tlbe_ref *ref;
668 unsigned int victim; 766 unsigned int victim;
669 767
670 victim = vcpu_e500->gtlb_nv[1]++; 768 victim = vcpu_e500->host_tlb1_nv++;
671 769
672 if (unlikely(vcpu_e500->gtlb_nv[1] >= tlb1_max_shadow_size())) 770 if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
673 vcpu_e500->gtlb_nv[1] = 0; 771 vcpu_e500->host_tlb1_nv = 0;
674 772
675 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim, stlbe); 773 ref = &vcpu_e500->tlb_refs[1][victim];
774 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
676 775
677 return victim; 776 return victim;
678} 777}
@@ -689,7 +788,8 @@ static inline int kvmppc_e500_gtlbe_invalidate(
689 struct kvmppc_vcpu_e500 *vcpu_e500, 788 struct kvmppc_vcpu_e500 *vcpu_e500,
690 int tlbsel, int esel) 789 int tlbsel, int esel)
691{ 790{
692 struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; 791 struct kvm_book3e_206_tlb_entry *gtlbe =
792 get_entry(vcpu_e500, tlbsel, esel);
693 793
694 if (unlikely(get_tlb_iprot(gtlbe))) 794 if (unlikely(get_tlb_iprot(gtlbe)))
695 return -1; 795 return -1;
@@ -704,10 +804,10 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
704 int esel; 804 int esel;
705 805
706 if (value & MMUCSR0_TLB0FI) 806 if (value & MMUCSR0_TLB0FI)
707 for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++) 807 for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++)
708 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); 808 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
709 if (value & MMUCSR0_TLB1FI) 809 if (value & MMUCSR0_TLB1FI)
710 for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++) 810 for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
711 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); 811 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
712 812
713 /* Invalidate all vcpu id mappings */ 813 /* Invalidate all vcpu id mappings */
@@ -732,7 +832,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
732 832
733 if (ia) { 833 if (ia) {
734 /* invalidate all entries */ 834 /* invalidate all entries */
735 for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++) 835 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries;
836 esel++)
736 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); 837 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
737 } else { 838 } else {
738 ea &= 0xfffff000; 839 ea &= 0xfffff000;
@@ -752,18 +853,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
752{ 853{
753 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 854 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
754 int tlbsel, esel; 855 int tlbsel, esel;
755 struct tlbe *gtlbe; 856 struct kvm_book3e_206_tlb_entry *gtlbe;
756 857
757 tlbsel = get_tlb_tlbsel(vcpu_e500); 858 tlbsel = get_tlb_tlbsel(vcpu);
758 esel = get_tlb_esel(vcpu_e500, tlbsel); 859 esel = get_tlb_esel(vcpu, tlbsel);
759 860
760 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; 861 gtlbe = get_entry(vcpu_e500, tlbsel, esel);
761 vcpu_e500->mas0 &= ~MAS0_NV(~0); 862 vcpu->arch.shared->mas0 &= ~MAS0_NV(~0);
762 vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 863 vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
763 vcpu_e500->mas1 = gtlbe->mas1; 864 vcpu->arch.shared->mas1 = gtlbe->mas1;
764 vcpu_e500->mas2 = gtlbe->mas2; 865 vcpu->arch.shared->mas2 = gtlbe->mas2;
765 vcpu_e500->mas3 = gtlbe->mas3; 866 vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
766 vcpu_e500->mas7 = gtlbe->mas7;
767 867
768 return EMULATE_DONE; 868 return EMULATE_DONE;
769} 869}
@@ -771,10 +871,10 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
771int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) 871int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
772{ 872{
773 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 873 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
774 int as = !!get_cur_sas(vcpu_e500); 874 int as = !!get_cur_sas(vcpu);
775 unsigned int pid = get_cur_spid(vcpu_e500); 875 unsigned int pid = get_cur_spid(vcpu);
776 int esel, tlbsel; 876 int esel, tlbsel;
777 struct tlbe *gtlbe = NULL; 877 struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
778 gva_t ea; 878 gva_t ea;
779 879
780 ea = kvmppc_get_gpr(vcpu, rb); 880 ea = kvmppc_get_gpr(vcpu, rb);
@@ -782,70 +882,90 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
782 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 882 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
783 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); 883 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
784 if (esel >= 0) { 884 if (esel >= 0) {
785 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; 885 gtlbe = get_entry(vcpu_e500, tlbsel, esel);
786 break; 886 break;
787 } 887 }
788 } 888 }
789 889
790 if (gtlbe) { 890 if (gtlbe) {
791 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) 891 esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1;
892
893 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
792 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 894 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
793 vcpu_e500->mas1 = gtlbe->mas1; 895 vcpu->arch.shared->mas1 = gtlbe->mas1;
794 vcpu_e500->mas2 = gtlbe->mas2; 896 vcpu->arch.shared->mas2 = gtlbe->mas2;
795 vcpu_e500->mas3 = gtlbe->mas3; 897 vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
796 vcpu_e500->mas7 = gtlbe->mas7;
797 } else { 898 } else {
798 int victim; 899 int victim;
799 900
800 /* since we only have two TLBs, only lower bit is used. */ 901 /* since we only have two TLBs, only lower bit is used. */
801 tlbsel = vcpu_e500->mas4 >> 28 & 0x1; 902 tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1;
802 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; 903 victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
803 904
804 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) 905 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel)
906 | MAS0_ESEL(victim)
805 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 907 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
806 vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0) 908 vcpu->arch.shared->mas1 =
807 | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0)) 909 (vcpu->arch.shared->mas6 & MAS6_SPID0)
808 | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); 910 | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
809 vcpu_e500->mas2 &= MAS2_EPN; 911 | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
810 vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK; 912 vcpu->arch.shared->mas2 &= MAS2_EPN;
811 vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; 913 vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
812 vcpu_e500->mas7 = 0; 914 MAS2_ATTRIB_MASK;
915 vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 |
916 MAS3_U2 | MAS3_U3;
813 } 917 }
814 918
815 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); 919 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
816 return EMULATE_DONE; 920 return EMULATE_DONE;
817} 921}
818 922
923/* sesel is for tlb1 only */
924static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
925 struct kvm_book3e_206_tlb_entry *gtlbe,
926 struct kvm_book3e_206_tlb_entry *stlbe,
927 int stlbsel, int sesel)
928{
929 int stid;
930
931 preempt_disable();
932 stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
933 get_tlb_tid(gtlbe),
934 get_cur_pr(&vcpu_e500->vcpu), 0);
935
936 stlbe->mas1 |= MAS1_TID(stid);
937 write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
938 preempt_enable();
939}
940
819int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) 941int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
820{ 942{
821 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 943 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
822 struct tlbe *gtlbe; 944 struct kvm_book3e_206_tlb_entry *gtlbe;
823 int tlbsel, esel; 945 int tlbsel, esel;
824 946
825 tlbsel = get_tlb_tlbsel(vcpu_e500); 947 tlbsel = get_tlb_tlbsel(vcpu);
826 esel = get_tlb_esel(vcpu_e500, tlbsel); 948 esel = get_tlb_esel(vcpu, tlbsel);
827 949
828 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; 950 gtlbe = get_entry(vcpu_e500, tlbsel, esel);
829 951
830 if (get_tlb_v(gtlbe)) 952 if (get_tlb_v(gtlbe))
831 kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel); 953 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
832 954
833 gtlbe->mas1 = vcpu_e500->mas1; 955 gtlbe->mas1 = vcpu->arch.shared->mas1;
834 gtlbe->mas2 = vcpu_e500->mas2; 956 gtlbe->mas2 = vcpu->arch.shared->mas2;
835 gtlbe->mas3 = vcpu_e500->mas3; 957 gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
836 gtlbe->mas7 = vcpu_e500->mas7;
837 958
838 trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2, 959 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
839 gtlbe->mas3, gtlbe->mas7); 960 gtlbe->mas2, gtlbe->mas7_3);
840 961
841 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ 962 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
842 if (tlbe_is_host_safe(vcpu, gtlbe)) { 963 if (tlbe_is_host_safe(vcpu, gtlbe)) {
843 struct tlbe stlbe; 964 struct kvm_book3e_206_tlb_entry stlbe;
844 int stlbsel, sesel; 965 int stlbsel, sesel;
845 u64 eaddr; 966 u64 eaddr;
846 u64 raddr; 967 u64 raddr;
847 968
848 preempt_disable();
849 switch (tlbsel) { 969 switch (tlbsel) {
850 case 0: 970 case 0:
851 /* TLB0 */ 971 /* TLB0 */
@@ -853,7 +973,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
853 gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); 973 gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
854 974
855 stlbsel = 0; 975 stlbsel = 0;
856 sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); 976 kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
977 sesel = 0; /* unused */
857 978
858 break; 979 break;
859 980
@@ -874,8 +995,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
874 default: 995 default:
875 BUG(); 996 BUG();
876 } 997 }
877 write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); 998
878 preempt_enable(); 999 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
879 } 1000 }
880 1001
881 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); 1002 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
@@ -914,9 +1035,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
914 gva_t eaddr) 1035 gva_t eaddr)
915{ 1036{
916 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 1037 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
917 struct tlbe *gtlbe = 1038 struct kvm_book3e_206_tlb_entry *gtlbe;
918 &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)]; 1039 u64 pgmask;
919 u64 pgmask = get_tlb_bytes(gtlbe) - 1; 1040
1041 gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
1042 pgmask = get_tlb_bytes(gtlbe) - 1;
920 1043
921 return get_tlb_raddr(gtlbe) | (eaddr & pgmask); 1044 return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
922} 1045}
@@ -930,22 +1053,21 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
930{ 1053{
931 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 1054 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
932 struct tlbe_priv *priv; 1055 struct tlbe_priv *priv;
933 struct tlbe *gtlbe, stlbe; 1056 struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
934 int tlbsel = tlbsel_of(index); 1057 int tlbsel = tlbsel_of(index);
935 int esel = esel_of(index); 1058 int esel = esel_of(index);
936 int stlbsel, sesel; 1059 int stlbsel, sesel;
937 1060
938 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; 1061 gtlbe = get_entry(vcpu_e500, tlbsel, esel);
939 1062
940 preempt_disable();
941 switch (tlbsel) { 1063 switch (tlbsel) {
942 case 0: 1064 case 0:
943 stlbsel = 0; 1065 stlbsel = 0;
944 sesel = esel; 1066 sesel = 0; /* unused */
945 priv = &vcpu_e500->gtlb_priv[stlbsel][sesel]; 1067 priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
946 1068
947 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, 1069 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
948 priv, eaddr, &stlbe); 1070 &priv->ref, eaddr, &stlbe);
949 break; 1071 break;
950 1072
951 case 1: { 1073 case 1: {
@@ -962,8 +1084,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
962 break; 1084 break;
963 } 1085 }
964 1086
965 write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); 1087 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
966 preempt_enable();
967} 1088}
968 1089
969int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, 1090int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -993,85 +1114,279 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
993 1114
994void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) 1115void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
995{ 1116{
996 struct tlbe *tlbe; 1117 struct kvm_book3e_206_tlb_entry *tlbe;
997 1118
998 /* Insert large initial mapping for guest. */ 1119 /* Insert large initial mapping for guest. */
999 tlbe = &vcpu_e500->gtlb_arch[1][0]; 1120 tlbe = get_entry(vcpu_e500, 1, 0);
1000 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); 1121 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
1001 tlbe->mas2 = 0; 1122 tlbe->mas2 = 0;
1002 tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; 1123 tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
1003 tlbe->mas7 = 0;
1004 1124
1005 /* 4K map for serial output. Used by kernel wrapper. */ 1125 /* 4K map for serial output. Used by kernel wrapper. */
1006 tlbe = &vcpu_e500->gtlb_arch[1][1]; 1126 tlbe = get_entry(vcpu_e500, 1, 1);
1007 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); 1127 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
1008 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; 1128 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
1009 tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; 1129 tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
1010 tlbe->mas7 = 0; 1130}
1131
1132static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
1133{
1134 int i;
1135
1136 clear_tlb_refs(vcpu_e500);
1137 kfree(vcpu_e500->gtlb_priv[0]);
1138 kfree(vcpu_e500->gtlb_priv[1]);
1139
1140 if (vcpu_e500->shared_tlb_pages) {
1141 vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
1142 PAGE_SIZE)));
1143
1144 for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) {
1145 set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
1146 put_page(vcpu_e500->shared_tlb_pages[i]);
1147 }
1148
1149 vcpu_e500->num_shared_tlb_pages = 0;
1150 vcpu_e500->shared_tlb_pages = NULL;
1151 } else {
1152 kfree(vcpu_e500->gtlb_arch);
1153 }
1154
1155 vcpu_e500->gtlb_arch = NULL;
1156}
1157
1158int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1159 struct kvm_config_tlb *cfg)
1160{
1161 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1162 struct kvm_book3e_206_tlb_params params;
1163 char *virt;
1164 struct page **pages;
1165 struct tlbe_priv *privs[2] = {};
1166 size_t array_len;
1167 u32 sets;
1168 int num_pages, ret, i;
1169
1170 if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
1171 return -EINVAL;
1172
1173 if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
1174 sizeof(params)))
1175 return -EFAULT;
1176
1177 if (params.tlb_sizes[1] > 64)
1178 return -EINVAL;
1179 if (params.tlb_ways[1] != params.tlb_sizes[1])
1180 return -EINVAL;
1181 if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
1182 return -EINVAL;
1183 if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0)
1184 return -EINVAL;
1185
1186 if (!is_power_of_2(params.tlb_ways[0]))
1187 return -EINVAL;
1188
1189 sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
1190 if (!is_power_of_2(sets))
1191 return -EINVAL;
1192
1193 array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
1194 array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
1195
1196 if (cfg->array_len < array_len)
1197 return -EINVAL;
1198
1199 num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
1200 cfg->array / PAGE_SIZE;
1201 pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
1202 if (!pages)
1203 return -ENOMEM;
1204
1205 ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
1206 if (ret < 0)
1207 goto err_pages;
1208
1209 if (ret != num_pages) {
1210 num_pages = ret;
1211 ret = -EFAULT;
1212 goto err_put_page;
1213 }
1214
1215 virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
1216 if (!virt)
1217 goto err_put_page;
1218
1219 privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
1220 GFP_KERNEL);
1221 privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
1222 GFP_KERNEL);
1223
1224 if (!privs[0] || !privs[1])
1225 goto err_put_page;
1226
1227 free_gtlb(vcpu_e500);
1228
1229 vcpu_e500->gtlb_priv[0] = privs[0];
1230 vcpu_e500->gtlb_priv[1] = privs[1];
1231
1232 vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
1233 (virt + (cfg->array & (PAGE_SIZE - 1)));
1234
1235 vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0];
1236 vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1];
1237
1238 vcpu_e500->gtlb_offset[0] = 0;
1239 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
1240
1241 vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1242 if (params.tlb_sizes[0] <= 2048)
1243 vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
1244 vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
1245
1246 vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1247 vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
1248 vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
1249
1250 vcpu_e500->shared_tlb_pages = pages;
1251 vcpu_e500->num_shared_tlb_pages = num_pages;
1252
1253 vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0];
1254 vcpu_e500->gtlb_params[0].sets = sets;
1255
1256 vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
1257 vcpu_e500->gtlb_params[1].sets = 1;
1258
1259 return 0;
1260
1261err_put_page:
1262 kfree(privs[0]);
1263 kfree(privs[1]);
1264
1265 for (i = 0; i < num_pages; i++)
1266 put_page(pages[i]);
1267
1268err_pages:
1269 kfree(pages);
1270 return ret;
1271}
1272
1273int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
1274 struct kvm_dirty_tlb *dirty)
1275{
1276 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1277
1278 clear_tlb_refs(vcpu_e500);
1279 return 0;
1011} 1280}
1012 1281
1013int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) 1282int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
1014{ 1283{
1015 tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF; 1284 int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
1016 1285 int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
1017 vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE; 1286
1018 vcpu_e500->gtlb_arch[0] = 1287 host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
1019 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); 1288 host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
1020 if (vcpu_e500->gtlb_arch[0] == NULL) 1289
1021 goto err_out; 1290 /*
1022 1291 * This should never happen on real e500 hardware, but is
1023 vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE; 1292 * architecturally possible -- e.g. in some weird nested
1024 vcpu_e500->gtlb_arch[1] = 1293 * virtualization case.
1025 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); 1294 */
1026 if (vcpu_e500->gtlb_arch[1] == NULL) 1295 if (host_tlb_params[0].entries == 0 ||
1027 goto err_out_guest0; 1296 host_tlb_params[1].entries == 0) {
1028 1297 pr_err("%s: need to know host tlb size\n", __func__);
1029 vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *) 1298 return -ENODEV;
1030 kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL); 1299 }
1031 if (vcpu_e500->gtlb_priv[0] == NULL) 1300
1032 goto err_out_guest1; 1301 host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >>
1033 vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *) 1302 TLBnCFG_ASSOC_SHIFT;
1034 kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL); 1303 host_tlb_params[1].ways = host_tlb_params[1].entries;
1035 1304
1036 if (vcpu_e500->gtlb_priv[1] == NULL) 1305 if (!is_power_of_2(host_tlb_params[0].entries) ||
1037 goto err_out_priv0; 1306 !is_power_of_2(host_tlb_params[0].ways) ||
1307 host_tlb_params[0].entries < host_tlb_params[0].ways ||
1308 host_tlb_params[0].ways == 0) {
1309 pr_err("%s: bad tlb0 host config: %u entries %u ways\n",
1310 __func__, host_tlb_params[0].entries,
1311 host_tlb_params[0].ways);
1312 return -ENODEV;
1313 }
1314
1315 host_tlb_params[0].sets =
1316 host_tlb_params[0].entries / host_tlb_params[0].ways;
1317 host_tlb_params[1].sets = 1;
1318
1319 vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
1320 vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
1321
1322 vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM;
1323 vcpu_e500->gtlb_params[0].sets =
1324 KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
1325
1326 vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
1327 vcpu_e500->gtlb_params[1].sets = 1;
1328
1329 vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
1330 if (!vcpu_e500->gtlb_arch)
1331 return -ENOMEM;
1332
1333 vcpu_e500->gtlb_offset[0] = 0;
1334 vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
1335
1336 vcpu_e500->tlb_refs[0] =
1337 kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries,
1338 GFP_KERNEL);
1339 if (!vcpu_e500->tlb_refs[0])
1340 goto err;
1341
1342 vcpu_e500->tlb_refs[1] =
1343 kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries,
1344 GFP_KERNEL);
1345 if (!vcpu_e500->tlb_refs[1])
1346 goto err;
1347
1348 vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
1349 vcpu_e500->gtlb_params[0].entries,
1350 GFP_KERNEL);
1351 if (!vcpu_e500->gtlb_priv[0])
1352 goto err;
1353
1354 vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
1355 vcpu_e500->gtlb_params[1].entries,
1356 GFP_KERNEL);
1357 if (!vcpu_e500->gtlb_priv[1])
1358 goto err;
1038 1359
1039 if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) 1360 if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
1040 goto err_out_priv1; 1361 goto err;
1041 1362
1042 /* Init TLB configuration register */ 1363 /* Init TLB configuration register */
1043 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; 1364 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) &
1044 vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0]; 1365 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1045 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; 1366 vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries;
1046 vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1]; 1367 vcpu_e500->tlb0cfg |=
1368 vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
1369
1370 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) &
1371 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1372 vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries;
1373 vcpu_e500->tlb0cfg |=
1374 vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
1047 1375
1048 return 0; 1376 return 0;
1049 1377
1050err_out_priv1: 1378err:
1051 kfree(vcpu_e500->gtlb_priv[1]); 1379 free_gtlb(vcpu_e500);
1052err_out_priv0: 1380 kfree(vcpu_e500->tlb_refs[0]);
1053 kfree(vcpu_e500->gtlb_priv[0]); 1381 kfree(vcpu_e500->tlb_refs[1]);
1054err_out_guest1:
1055 kfree(vcpu_e500->gtlb_arch[1]);
1056err_out_guest0:
1057 kfree(vcpu_e500->gtlb_arch[0]);
1058err_out:
1059 return -1; 1382 return -1;
1060} 1383}
1061 1384
1062void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) 1385void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
1063{ 1386{
1064 int stlbsel, i; 1387 free_gtlb(vcpu_e500);
1065
1066 /* release all privs */
1067 for (stlbsel = 0; stlbsel < 2; stlbsel++)
1068 for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
1069 struct tlbe_priv *priv =
1070 &vcpu_e500->gtlb_priv[stlbsel][i];
1071 kvmppc_e500_priv_release(priv);
1072 }
1073
1074 kvmppc_e500_id_table_free(vcpu_e500); 1388 kvmppc_e500_id_table_free(vcpu_e500);
1075 kfree(vcpu_e500->gtlb_arch[1]); 1389
1076 kfree(vcpu_e500->gtlb_arch[0]); 1390 kfree(vcpu_e500->tlb_refs[0]);
1391 kfree(vcpu_e500->tlb_refs[1]);
1077} 1392}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 59b88e99a235..5c6d2d7bf058 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -20,13 +20,9 @@
20#include <asm/tlb.h> 20#include <asm/tlb.h>
21#include <asm/kvm_e500.h> 21#include <asm/kvm_e500.h>
22 22
23#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */ 23/* This geometry is the legacy default -- can be overridden by userspace */
24#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT) 24#define KVM_E500_TLB0_WAY_SIZE 128
25#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1) 25#define KVM_E500_TLB0_WAY_NUM 2
26
27#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */
28#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
29#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1)
30 26
31#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) 27#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
32#define KVM_E500_TLB1_SIZE 16 28#define KVM_E500_TLB1_SIZE 16
@@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
58extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); 54extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
59 55
60/* TLB helper functions */ 56/* TLB helper functions */
61static inline unsigned int get_tlb_size(const struct tlbe *tlbe) 57static inline unsigned int
58get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
62{ 59{
63 return (tlbe->mas1 >> 7) & 0x1f; 60 return (tlbe->mas1 >> 7) & 0x1f;
64} 61}
65 62
66static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) 63static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
67{ 64{
68 return tlbe->mas2 & 0xfffff000; 65 return tlbe->mas2 & 0xfffff000;
69} 66}
70 67
71static inline u64 get_tlb_bytes(const struct tlbe *tlbe) 68static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
72{ 69{
73 unsigned int pgsize = get_tlb_size(tlbe); 70 unsigned int pgsize = get_tlb_size(tlbe);
74 return 1ULL << 10 << pgsize; 71 return 1ULL << 10 << pgsize;
75} 72}
76 73
77static inline gva_t get_tlb_end(const struct tlbe *tlbe) 74static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
78{ 75{
79 u64 bytes = get_tlb_bytes(tlbe); 76 u64 bytes = get_tlb_bytes(tlbe);
80 return get_tlb_eaddr(tlbe) + bytes - 1; 77 return get_tlb_eaddr(tlbe) + bytes - 1;
81} 78}
82 79
83static inline u64 get_tlb_raddr(const struct tlbe *tlbe) 80static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
84{ 81{
85 u64 rpn = tlbe->mas7; 82 return tlbe->mas7_3 & ~0xfffULL;
86 return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
87} 83}
88 84
89static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) 85static inline unsigned int
86get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
90{ 87{
91 return (tlbe->mas1 >> 16) & 0xff; 88 return (tlbe->mas1 >> 16) & 0xff;
92} 89}
93 90
94static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) 91static inline unsigned int
92get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
95{ 93{
96 return (tlbe->mas1 >> 12) & 0x1; 94 return (tlbe->mas1 >> 12) & 0x1;
97} 95}
98 96
99static inline unsigned int get_tlb_v(const struct tlbe *tlbe) 97static inline unsigned int
98get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
100{ 99{
101 return (tlbe->mas1 >> 31) & 0x1; 100 return (tlbe->mas1 >> 31) & 0x1;
102} 101}
103 102
104static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe) 103static inline unsigned int
104get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
105{ 105{
106 return (tlbe->mas1 >> 30) & 0x1; 106 return (tlbe->mas1 >> 30) & 0x1;
107} 107}
@@ -121,59 +121,37 @@ static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
121 return !!(vcpu->arch.shared->msr & MSR_PR); 121 return !!(vcpu->arch.shared->msr & MSR_PR);
122} 122}
123 123
124static inline unsigned int get_cur_spid( 124static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
125 const struct kvmppc_vcpu_e500 *vcpu_e500)
126{ 125{
127 return (vcpu_e500->mas6 >> 16) & 0xff; 126 return (vcpu->arch.shared->mas6 >> 16) & 0xff;
128} 127}
129 128
130static inline unsigned int get_cur_sas( 129static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
131 const struct kvmppc_vcpu_e500 *vcpu_e500)
132{ 130{
133 return vcpu_e500->mas6 & 0x1; 131 return vcpu->arch.shared->mas6 & 0x1;
134} 132}
135 133
136static inline unsigned int get_tlb_tlbsel( 134static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
137 const struct kvmppc_vcpu_e500 *vcpu_e500)
138{ 135{
139 /* 136 /*
140 * Manual says that tlbsel has 2 bits wide. 137 * Manual says that tlbsel has 2 bits wide.
141 * Since we only have two TLBs, only lower bit is used. 138 * Since we only have two TLBs, only lower bit is used.
142 */ 139 */
143 return (vcpu_e500->mas0 >> 28) & 0x1; 140 return (vcpu->arch.shared->mas0 >> 28) & 0x1;
144}
145
146static inline unsigned int get_tlb_nv_bit(
147 const struct kvmppc_vcpu_e500 *vcpu_e500)
148{
149 return vcpu_e500->mas0 & 0xfff;
150} 141}
151 142
152static inline unsigned int get_tlb_esel_bit( 143static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
153 const struct kvmppc_vcpu_e500 *vcpu_e500)
154{ 144{
155 return (vcpu_e500->mas0 >> 16) & 0xfff; 145 return vcpu->arch.shared->mas0 & 0xfff;
156} 146}
157 147
158static inline unsigned int get_tlb_esel( 148static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
159 const struct kvmppc_vcpu_e500 *vcpu_e500,
160 int tlbsel)
161{ 149{
162 unsigned int esel = get_tlb_esel_bit(vcpu_e500); 150 return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
163
164 if (tlbsel == 0) {
165 esel &= KVM_E500_TLB0_WAY_NUM_MASK;
166 esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
167 << KVM_E500_TLB0_WAY_NUM_BIT;
168 } else {
169 esel &= KVM_E500_TLB1_SIZE - 1;
170 }
171
172 return esel;
173} 151}
174 152
175static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, 153static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
176 const struct tlbe *tlbe) 154 const struct kvm_book3e_206_tlb_entry *tlbe)
177{ 155{
178 gpa_t gpa; 156 gpa_t gpa;
179 157
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 141dce3c6810..968f40101883 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -13,6 +13,7 @@
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2007 15 * Copyright IBM Corp. 2007
16 * Copyright 2011 Freescale Semiconductor, Inc.
16 * 17 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */ 19 */
@@ -69,54 +70,55 @@
69#define OP_STH 44 70#define OP_STH 44
70#define OP_STHU 45 71#define OP_STHU 45
71 72
72#ifdef CONFIG_PPC_BOOK3S
73static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
74{
75 return 1;
76}
77#else
78static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
79{
80 return vcpu->arch.tcr & TCR_DIE;
81}
82#endif
83
84void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) 73void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
85{ 74{
86 unsigned long dec_nsec; 75 unsigned long dec_nsec;
76 unsigned long long dec_time;
87 77
88 pr_debug("mtDEC: %x\n", vcpu->arch.dec); 78 pr_debug("mtDEC: %x\n", vcpu->arch.dec);
79 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
80
89#ifdef CONFIG_PPC_BOOK3S 81#ifdef CONFIG_PPC_BOOK3S
90 /* mtdec lowers the interrupt line when positive. */ 82 /* mtdec lowers the interrupt line when positive. */
91 kvmppc_core_dequeue_dec(vcpu); 83 kvmppc_core_dequeue_dec(vcpu);
92 84
93 /* POWER4+ triggers a dec interrupt if the value is < 0 */ 85 /* POWER4+ triggers a dec interrupt if the value is < 0 */
94 if (vcpu->arch.dec & 0x80000000) { 86 if (vcpu->arch.dec & 0x80000000) {
95 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
96 kvmppc_core_queue_dec(vcpu); 87 kvmppc_core_queue_dec(vcpu);
97 return; 88 return;
98 } 89 }
99#endif 90#endif
100 if (kvmppc_dec_enabled(vcpu)) { 91
101 /* The decrementer ticks at the same rate as the timebase, so 92#ifdef CONFIG_BOOKE
102 * that's how we convert the guest DEC value to the number of 93 /* On BOOKE, DEC = 0 is as good as decrementer not enabled */
103 * host ticks. */ 94 if (vcpu->arch.dec == 0)
104 95 return;
105 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 96#endif
106 dec_nsec = vcpu->arch.dec; 97
107 dec_nsec *= 1000; 98 /*
108 dec_nsec /= tb_ticks_per_usec; 99 * The decrementer ticks at the same rate as the timebase, so
109 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), 100 * that's how we convert the guest DEC value to the number of
110 HRTIMER_MODE_REL); 101 * host ticks.
111 vcpu->arch.dec_jiffies = get_tb(); 102 */
112 } else { 103
113 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 104 dec_time = vcpu->arch.dec;
114 } 105 dec_time *= 1000;
106 do_div(dec_time, tb_ticks_per_usec);
107 dec_nsec = do_div(dec_time, NSEC_PER_SEC);
108 hrtimer_start(&vcpu->arch.dec_timer,
109 ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
110 vcpu->arch.dec_jiffies = get_tb();
115} 111}
116 112
117u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) 113u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
118{ 114{
119 u64 jd = tb - vcpu->arch.dec_jiffies; 115 u64 jd = tb - vcpu->arch.dec_jiffies;
116
117#ifdef CONFIG_BOOKE
118 if (vcpu->arch.dec < jd)
119 return 0;
120#endif
121
120 return vcpu->arch.dec - jd; 122 return vcpu->arch.dec - jd;
121} 123}
122 124
@@ -159,7 +161,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
159 case OP_TRAP_64: 161 case OP_TRAP_64:
160 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); 162 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
161#else 163#else
162 kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR); 164 kvmppc_core_queue_program(vcpu,
165 vcpu->arch.shared->esr | ESR_PTR);
163#endif 166#endif
164 advance = 0; 167 advance = 0;
165 break; 168 break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 607fbdf24b84..00d7e345b3fe 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,7 +39,8 @@
39int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 39int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
40{ 40{
41 return !(v->arch.shared->msr & MSR_WE) || 41 return !(v->arch.shared->msr & MSR_WE) ||
42 !!(v->arch.pending_exceptions); 42 !!(v->arch.pending_exceptions) ||
43 v->requests;
43} 44}
44 45
45int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 46int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -66,7 +67,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
66 vcpu->arch.magic_page_pa = param1; 67 vcpu->arch.magic_page_pa = param1;
67 vcpu->arch.magic_page_ea = param2; 68 vcpu->arch.magic_page_ea = param2;
68 69
69 r2 = KVM_MAGIC_FEAT_SR; 70 r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
70 71
71 r = HC_EV_SUCCESS; 72 r = HC_EV_SUCCESS;
72 break; 73 break;
@@ -171,8 +172,11 @@ void kvm_arch_check_processor_compat(void *rtn)
171 *(int *)rtn = kvmppc_core_check_processor_compat(); 172 *(int *)rtn = kvmppc_core_check_processor_compat();
172} 173}
173 174
174int kvm_arch_init_vm(struct kvm *kvm) 175int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
175{ 176{
177 if (type)
178 return -EINVAL;
179
176 return kvmppc_core_init_vm(kvm); 180 return kvmppc_core_init_vm(kvm);
177} 181}
178 182
@@ -208,17 +212,22 @@ int kvm_dev_ioctl_check_extension(long ext)
208 case KVM_CAP_PPC_BOOKE_SREGS: 212 case KVM_CAP_PPC_BOOKE_SREGS:
209#else 213#else
210 case KVM_CAP_PPC_SEGSTATE: 214 case KVM_CAP_PPC_SEGSTATE:
215 case KVM_CAP_PPC_HIOR:
211 case KVM_CAP_PPC_PAPR: 216 case KVM_CAP_PPC_PAPR:
212#endif 217#endif
213 case KVM_CAP_PPC_UNSET_IRQ: 218 case KVM_CAP_PPC_UNSET_IRQ:
214 case KVM_CAP_PPC_IRQ_LEVEL: 219 case KVM_CAP_PPC_IRQ_LEVEL:
215 case KVM_CAP_ENABLE_CAP: 220 case KVM_CAP_ENABLE_CAP:
221 case KVM_CAP_ONE_REG:
216 r = 1; 222 r = 1;
217 break; 223 break;
218#ifndef CONFIG_KVM_BOOK3S_64_HV 224#ifndef CONFIG_KVM_BOOK3S_64_HV
219 case KVM_CAP_PPC_PAIRED_SINGLES: 225 case KVM_CAP_PPC_PAIRED_SINGLES:
220 case KVM_CAP_PPC_OSI: 226 case KVM_CAP_PPC_OSI:
221 case KVM_CAP_PPC_GET_PVINFO: 227 case KVM_CAP_PPC_GET_PVINFO:
228#ifdef CONFIG_KVM_E500
229 case KVM_CAP_SW_TLB:
230#endif
222 r = 1; 231 r = 1;
223 break; 232 break;
224 case KVM_CAP_COALESCED_MMIO: 233 case KVM_CAP_COALESCED_MMIO:
@@ -238,7 +247,26 @@ int kvm_dev_ioctl_check_extension(long ext)
238 if (cpu_has_feature(CPU_FTR_ARCH_201)) 247 if (cpu_has_feature(CPU_FTR_ARCH_201))
239 r = 2; 248 r = 2;
240 break; 249 break;
250 case KVM_CAP_SYNC_MMU:
251 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
252 break;
241#endif 253#endif
254 case KVM_CAP_NR_VCPUS:
255 /*
256 * Recommending a number of CPUs is somewhat arbitrary; we
257 * return the number of present CPUs for -HV (since a host
258 * will have secondary threads "offline"), and for other KVM
259 * implementations just count online CPUs.
260 */
261#ifdef CONFIG_KVM_BOOK3S_64_HV
262 r = num_present_cpus();
263#else
264 r = num_online_cpus();
265#endif
266 break;
267 case KVM_CAP_MAX_VCPUS:
268 r = KVM_MAX_VCPUS;
269 break;
242 default: 270 default:
243 r = 0; 271 r = 0;
244 break; 272 break;
@@ -253,6 +281,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
253 return -EINVAL; 281 return -EINVAL;
254} 282}
255 283
284void kvm_arch_free_memslot(struct kvm_memory_slot *free,
285 struct kvm_memory_slot *dont)
286{
287}
288
289int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
290{
291 return 0;
292}
293
256int kvm_arch_prepare_memory_region(struct kvm *kvm, 294int kvm_arch_prepare_memory_region(struct kvm *kvm,
257 struct kvm_memory_slot *memslot, 295 struct kvm_memory_slot *memslot,
258 struct kvm_memory_slot old, 296 struct kvm_memory_slot old,
@@ -279,9 +317,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
279{ 317{
280 struct kvm_vcpu *vcpu; 318 struct kvm_vcpu *vcpu;
281 vcpu = kvmppc_core_vcpu_create(kvm, id); 319 vcpu = kvmppc_core_vcpu_create(kvm, id);
282 vcpu->arch.wqp = &vcpu->wq; 320 if (!IS_ERR(vcpu)) {
283 if (!IS_ERR(vcpu)) 321 vcpu->arch.wqp = &vcpu->wq;
284 kvmppc_create_vcpu_debugfs(vcpu, id); 322 kvmppc_create_vcpu_debugfs(vcpu, id);
323 }
285 return vcpu; 324 return vcpu;
286} 325}
287 326
@@ -305,18 +344,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
305 return kvmppc_core_pending_dec(vcpu); 344 return kvmppc_core_pending_dec(vcpu);
306} 345}
307 346
308static void kvmppc_decrementer_func(unsigned long data)
309{
310 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
311
312 kvmppc_core_queue_dec(vcpu);
313
314 if (waitqueue_active(vcpu->arch.wqp)) {
315 wake_up_interruptible(vcpu->arch.wqp);
316 vcpu->stat.halt_wakeup++;
317 }
318}
319
320/* 347/*
321 * low level hrtimer wake routine. Because this runs in hardirq context 348 * low level hrtimer wake routine. Because this runs in hardirq context
322 * we schedule a tasklet to do the real work. 349 * we schedule a tasklet to do the real work.
@@ -431,20 +458,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
431 458
432 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 459 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
433 460
434 switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { 461 switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
435 case KVM_REG_GPR: 462 case KVM_MMIO_REG_GPR:
436 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 463 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
437 break; 464 break;
438 case KVM_REG_FPR: 465 case KVM_MMIO_REG_FPR:
439 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 466 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
440 break; 467 break;
441#ifdef CONFIG_PPC_BOOK3S 468#ifdef CONFIG_PPC_BOOK3S
442 case KVM_REG_QPR: 469 case KVM_MMIO_REG_QPR:
443 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 470 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
444 break; 471 break;
445 case KVM_REG_FQPR: 472 case KVM_MMIO_REG_FQPR:
446 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 473 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
447 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 474 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
448 break; 475 break;
449#endif 476#endif
450 default: 477 default:
@@ -553,8 +580,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
553 vcpu->arch.hcall_needed = 0; 580 vcpu->arch.hcall_needed = 0;
554 } 581 }
555 582
556 kvmppc_core_deliver_interrupts(vcpu);
557
558 r = kvmppc_vcpu_run(run, vcpu); 583 r = kvmppc_vcpu_run(run, vcpu);
559 584
560 if (vcpu->sigset_active) 585 if (vcpu->sigset_active)
@@ -563,6 +588,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
563 return r; 588 return r;
564} 589}
565 590
591void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
592{
593 int me;
594 int cpu = vcpu->cpu;
595
596 me = get_cpu();
597 if (waitqueue_active(vcpu->arch.wqp)) {
598 wake_up_interruptible(vcpu->arch.wqp);
599 vcpu->stat.halt_wakeup++;
600 } else if (cpu != me && cpu != -1) {
601 smp_send_reschedule(vcpu->cpu);
602 }
603 put_cpu();
604}
605
566int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 606int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
567{ 607{
568 if (irq->irq == KVM_INTERRUPT_UNSET) { 608 if (irq->irq == KVM_INTERRUPT_UNSET) {
@@ -571,13 +611,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
571 } 611 }
572 612
573 kvmppc_core_queue_external(vcpu, irq); 613 kvmppc_core_queue_external(vcpu, irq);
574 614 kvm_vcpu_kick(vcpu);
575 if (waitqueue_active(vcpu->arch.wqp)) {
576 wake_up_interruptible(vcpu->arch.wqp);
577 vcpu->stat.halt_wakeup++;
578 } else if (vcpu->cpu != -1) {
579 smp_send_reschedule(vcpu->cpu);
580 }
581 615
582 return 0; 616 return 0;
583} 617}
@@ -599,6 +633,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
599 r = 0; 633 r = 0;
600 vcpu->arch.papr_enabled = true; 634 vcpu->arch.papr_enabled = true;
601 break; 635 break;
636#ifdef CONFIG_KVM_E500
637 case KVM_CAP_SW_TLB: {
638 struct kvm_config_tlb cfg;
639 void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
640
641 r = -EFAULT;
642 if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
643 break;
644
645 r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
646 break;
647 }
648#endif
602 default: 649 default:
603 r = -EINVAL; 650 r = -EINVAL;
604 break; 651 break;
@@ -648,6 +695,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
648 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 695 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
649 break; 696 break;
650 } 697 }
698
699 case KVM_SET_ONE_REG:
700 case KVM_GET_ONE_REG:
701 {
702 struct kvm_one_reg reg;
703 r = -EFAULT;
704 if (copy_from_user(&reg, argp, sizeof(reg)))
705 goto out;
706 if (ioctl == KVM_SET_ONE_REG)
707 r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
708 else
709 r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
710 break;
711 }
712
713#ifdef CONFIG_KVM_E500
714 case KVM_DIRTY_TLB: {
715 struct kvm_dirty_tlb dirty;
716 r = -EFAULT;
717 if (copy_from_user(&dirty, argp, sizeof(dirty)))
718 goto out;
719 r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
720 break;
721 }
722#endif
723
651 default: 724 default:
652 r = -EINVAL; 725 r = -EINVAL;
653 } 726 }
@@ -656,6 +729,11 @@ out:
656 return r; 729 return r;
657} 730}
658 731
732int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
733{
734 return VM_FAULT_SIGBUS;
735}
736
659static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) 737static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
660{ 738{
661 u32 inst_lis = 0x3c000000; 739 u32 inst_lis = 0x3c000000;
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index b135d3d397db..877186b7b1c3 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -118,11 +118,14 @@ TRACE_EVENT(kvm_book3s_exit,
118 ), 118 ),
119 119
120 TP_fast_assign( 120 TP_fast_assign(
121 struct kvmppc_book3s_shadow_vcpu *svcpu;
121 __entry->exit_nr = exit_nr; 122 __entry->exit_nr = exit_nr;
122 __entry->pc = kvmppc_get_pc(vcpu); 123 __entry->pc = kvmppc_get_pc(vcpu);
123 __entry->dar = kvmppc_get_fault_dar(vcpu); 124 __entry->dar = kvmppc_get_fault_dar(vcpu);
124 __entry->msr = vcpu->arch.shared->msr; 125 __entry->msr = vcpu->arch.shared->msr;
125 __entry->srr1 = to_svcpu(vcpu)->shadow_srr1; 126 svcpu = svcpu_get(vcpu);
127 __entry->srr1 = svcpu->shadow_srr1;
128 svcpu_put(svcpu);
126 ), 129 ),
127 130
128 TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", 131 TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
@@ -337,6 +340,63 @@ TRACE_EVENT(kvm_book3s_slbmte,
337 340
338#endif /* CONFIG_PPC_BOOK3S */ 341#endif /* CONFIG_PPC_BOOK3S */
339 342
343
344/*************************************************************************
345 * Book3E trace points *
346 *************************************************************************/
347
348#ifdef CONFIG_BOOKE
349
350TRACE_EVENT(kvm_booke206_stlb_write,
351 TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
352 TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
353
354 TP_STRUCT__entry(
355 __field( __u32, mas0 )
356 __field( __u32, mas8 )
357 __field( __u32, mas1 )
358 __field( __u64, mas2 )
359 __field( __u64, mas7_3 )
360 ),
361
362 TP_fast_assign(
363 __entry->mas0 = mas0;
364 __entry->mas8 = mas8;
365 __entry->mas1 = mas1;
366 __entry->mas2 = mas2;
367 __entry->mas7_3 = mas7_3;
368 ),
369
370 TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
371 __entry->mas0, __entry->mas8, __entry->mas1,
372 __entry->mas2, __entry->mas7_3)
373);
374
375TRACE_EVENT(kvm_booke206_gtlb_write,
376 TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
377 TP_ARGS(mas0, mas1, mas2, mas7_3),
378
379 TP_STRUCT__entry(
380 __field( __u32, mas0 )
381 __field( __u32, mas1 )
382 __field( __u64, mas2 )
383 __field( __u64, mas7_3 )
384 ),
385
386 TP_fast_assign(
387 __entry->mas0 = mas0;
388 __entry->mas1 = mas1;
389 __entry->mas2 = mas2;
390 __entry->mas7_3 = mas7_3;
391 ),
392
393 TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
394 __entry->mas0, __entry->mas1,
395 __entry->mas2, __entry->mas7_3)
396);
397
398#endif
399
340#endif /* _TRACE_KVM_H */ 400#endif /* _TRACE_KVM_H */
341 401
342/* This part must be outside protection */ 402/* This part must be outside protection */