aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 18:31:08 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 18:31:08 -0500
commit66cdd0ceaf65a18996f561b770eedde1d123b019 (patch)
tree4892eaa422d366fce5d1e866ff1fe0988af95569 /arch/powerpc/kvm
parent896ea17d3da5f44b2625c9cda9874d7dfe447393 (diff)
parent58b7825bc324da55415034a9f6ca5d716b8fd898 (diff)
Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti: "Considerable KVM/PPC work, x86 kvmclock vsyscall support, IA32_TSC_ADJUST MSR emulation, amongst others." Fix up trivial conflict in kernel/sched/core.c due to cross-cpu migration notifier added next to rq migration call-back. * tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits) KVM: emulator: fix real mode segment checks in address linearization VMX: remove unneeded enable_unrestricted_guest check KVM: VMX: fix DPL during entry to protected mode x86/kexec: crash_vmclear_local_vmcss needs __rcu kvm: Fix irqfd resampler list walk KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary KVM: MMU: optimize for set_spte KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation KVM: PPC: bookehv: Add guest computation mode for irq delivery KVM: PPC: Make EPCR a valid field for booke64 and bookehv KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation KVM: PPC: e500: Add emulation helper for getting instruction ea KVM: PPC: bookehv64: Add support for interrupt handling KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler KVM: PPC: booke: Fix get_tb() compile error on 64-bit KVM: PPC: e500: Silence bogus GCC warning in tlb code ...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x.c1
-rw-r--r--arch/powerpc/kvm/44x_emulate.c112
-rw-r--r--arch/powerpc/kvm/Kconfig4
-rw-r--r--arch/powerpc/kvm/Makefile5
-rw-r--r--arch/powerpc/kvm/book3s.c125
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c474
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c16
-rw-r--r--arch/powerpc/kvm/book3s_exports.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv.c655
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c144
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c143
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S142
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c5
-rw-r--r--arch/powerpc/kvm/book3s_pr.c294
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S18
-rw-r--r--arch/powerpc/kvm/booke.c346
-rw-r--r--arch/powerpc/kvm/booke.h1
-rw-r--r--arch/powerpc/kvm/booke_emulate.c36
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S145
-rw-r--r--arch/powerpc/kvm/e500.h11
-rw-r--r--arch/powerpc/kvm/e500_emulate.c14
-rw-r--r--arch/powerpc/kvm/e500_tlb.c132
-rw-r--r--arch/powerpc/kvm/emulate.c221
-rw-r--r--arch/powerpc/kvm/powerpc.c187
-rw-r--r--arch/powerpc/kvm/trace.h200
28 files changed, 2722 insertions, 722 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 50e7dbc7356c..3d7fd21c65f9 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
83 vcpu_44x->shadow_refs[i].gtlb_index = -1; 83 vcpu_44x->shadow_refs[i].gtlb_index = -1;
84 84
85 vcpu->arch.cpu_type = KVM_CPU_440; 85 vcpu->arch.cpu_type = KVM_CPU_440;
86 vcpu->arch.pvr = mfspr(SPRN_PVR);
86 87
87 return 0; 88 return 0;
88} 89}
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index c8c61578fdfc..35ec0a8547da 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,12 +27,70 @@
27#include "booke.h" 27#include "booke.h"
28#include "44x_tlb.h" 28#include "44x_tlb.h"
29 29
30#define XOP_MFDCRX 259
30#define XOP_MFDCR 323 31#define XOP_MFDCR 323
32#define XOP_MTDCRX 387
31#define XOP_MTDCR 451 33#define XOP_MTDCR 451
32#define XOP_TLBSX 914 34#define XOP_TLBSX 914
33#define XOP_ICCCI 966 35#define XOP_ICCCI 966
34#define XOP_TLBWE 978 36#define XOP_TLBWE 978
35 37
38static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
39{
40 /* emulate some access in kernel */
41 switch (dcrn) {
42 case DCRN_CPR0_CONFIG_ADDR:
43 vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
44 return EMULATE_DONE;
45 default:
46 vcpu->run->dcr.dcrn = dcrn;
47 vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
48 vcpu->run->dcr.is_write = 1;
49 vcpu->arch.dcr_is_write = 1;
50 vcpu->arch.dcr_needed = 1;
51 kvmppc_account_exit(vcpu, DCR_EXITS);
52 return EMULATE_DO_DCR;
53 }
54}
55
56static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
57{
58 /* The guest may access CPR0 registers to determine the timebase
59 * frequency, and it must know the real host frequency because it
60 * can directly access the timebase registers.
61 *
62 * It would be possible to emulate those accesses in userspace,
63 * but userspace can really only figure out the end frequency.
64 * We could decompose that into the factors that compute it, but
65 * that's tricky math, and it's easier to just report the real
66 * CPR0 values.
67 */
68 switch (dcrn) {
69 case DCRN_CPR0_CONFIG_ADDR:
70 kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
71 break;
72 case DCRN_CPR0_CONFIG_DATA:
73 local_irq_disable();
74 mtdcr(DCRN_CPR0_CONFIG_ADDR,
75 vcpu->arch.cpr0_cfgaddr);
76 kvmppc_set_gpr(vcpu, rt,
77 mfdcr(DCRN_CPR0_CONFIG_DATA));
78 local_irq_enable();
79 break;
80 default:
81 vcpu->run->dcr.dcrn = dcrn;
82 vcpu->run->dcr.data = 0;
83 vcpu->run->dcr.is_write = 0;
84 vcpu->arch.dcr_is_write = 0;
85 vcpu->arch.io_gpr = rt;
86 vcpu->arch.dcr_needed = 1;
87 kvmppc_account_exit(vcpu, DCR_EXITS);
88 return EMULATE_DO_DCR;
89 }
90
91 return EMULATE_DONE;
92}
93
36int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 94int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
37 unsigned int inst, int *advance) 95 unsigned int inst, int *advance)
38{ 96{
@@ -50,55 +108,21 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
50 switch (get_xop(inst)) { 108 switch (get_xop(inst)) {
51 109
52 case XOP_MFDCR: 110 case XOP_MFDCR:
53 /* The guest may access CPR0 registers to determine the timebase 111 emulated = emulate_mfdcr(vcpu, rt, dcrn);
54 * frequency, and it must know the real host frequency because it 112 break;
55 * can directly access the timebase registers.
56 *
57 * It would be possible to emulate those accesses in userspace,
58 * but userspace can really only figure out the end frequency.
59 * We could decompose that into the factors that compute it, but
60 * that's tricky math, and it's easier to just report the real
61 * CPR0 values.
62 */
63 switch (dcrn) {
64 case DCRN_CPR0_CONFIG_ADDR:
65 kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
66 break;
67 case DCRN_CPR0_CONFIG_DATA:
68 local_irq_disable();
69 mtdcr(DCRN_CPR0_CONFIG_ADDR,
70 vcpu->arch.cpr0_cfgaddr);
71 kvmppc_set_gpr(vcpu, rt,
72 mfdcr(DCRN_CPR0_CONFIG_DATA));
73 local_irq_enable();
74 break;
75 default:
76 run->dcr.dcrn = dcrn;
77 run->dcr.data = 0;
78 run->dcr.is_write = 0;
79 vcpu->arch.io_gpr = rt;
80 vcpu->arch.dcr_needed = 1;
81 kvmppc_account_exit(vcpu, DCR_EXITS);
82 emulated = EMULATE_DO_DCR;
83 }
84 113
114 case XOP_MFDCRX:
115 emulated = emulate_mfdcr(vcpu, rt,
116 kvmppc_get_gpr(vcpu, ra));
85 break; 117 break;
86 118
87 case XOP_MTDCR: 119 case XOP_MTDCR:
88 /* emulate some access in kernel */ 120 emulated = emulate_mtdcr(vcpu, rs, dcrn);
89 switch (dcrn) { 121 break;
90 case DCRN_CPR0_CONFIG_ADDR:
91 vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
92 break;
93 default:
94 run->dcr.dcrn = dcrn;
95 run->dcr.data = kvmppc_get_gpr(vcpu, rs);
96 run->dcr.is_write = 1;
97 vcpu->arch.dcr_needed = 1;
98 kvmppc_account_exit(vcpu, DCR_EXITS);
99 emulated = EMULATE_DO_DCR;
100 }
101 122
123 case XOP_MTDCRX:
124 emulated = emulate_mtdcr(vcpu, rs,
125 kvmppc_get_gpr(vcpu, ra));
102 break; 126 break;
103 127
104 case XOP_TLBWE: 128 case XOP_TLBWE:
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index f4dacb9c57fa..4730c953f435 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
20 bool 20 bool
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select HAVE_KVM_EVENTFD
23 24
24config KVM_BOOK3S_HANDLER 25config KVM_BOOK3S_HANDLER
25 bool 26 bool
@@ -36,6 +37,7 @@ config KVM_BOOK3S_64_HANDLER
36config KVM_BOOK3S_PR 37config KVM_BOOK3S_PR
37 bool 38 bool
38 select KVM_MMIO 39 select KVM_MMIO
40 select MMU_NOTIFIER
39 41
40config KVM_BOOK3S_32 42config KVM_BOOK3S_32
41 tristate "KVM support for PowerPC book3s_32 processors" 43 tristate "KVM support for PowerPC book3s_32 processors"
@@ -123,6 +125,7 @@ config KVM_E500V2
123 depends on EXPERIMENTAL && E500 && !PPC_E500MC 125 depends on EXPERIMENTAL && E500 && !PPC_E500MC
124 select KVM 126 select KVM
125 select KVM_MMIO 127 select KVM_MMIO
128 select MMU_NOTIFIER
126 ---help--- 129 ---help---
127 Support running unmodified E500 guest kernels in virtual machines on 130 Support running unmodified E500 guest kernels in virtual machines on
128 E500v2 host processors. 131 E500v2 host processors.
@@ -138,6 +141,7 @@ config KVM_E500MC
138 select KVM 141 select KVM
139 select KVM_MMIO 142 select KVM_MMIO
140 select KVM_BOOKE_HV 143 select KVM_BOOKE_HV
144 select MMU_NOTIFIER
141 ---help--- 145 ---help---
142 Support running unmodified E500MC/E5500 (32-bit) guest kernels in 146 Support running unmodified E500MC/E5500 (32-bit) guest kernels in
143 virtual machines on E500MC/E5500 host processors. 147 virtual machines on E500MC/E5500 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index c2a08636e6d4..1e473d46322c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm 7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
8 8
9common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) 9common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
10 eventfd.o)
10 11
11CFLAGS_44x_tlb.o := -I. 12CFLAGS_44x_tlb.o := -I.
12CFLAGS_e500_tlb.o := -I. 13CFLAGS_e500_tlb.o := -I.
@@ -72,10 +73,12 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
72 book3s_hv_rmhandlers.o \ 73 book3s_hv_rmhandlers.o \
73 book3s_hv_rm_mmu.o \ 74 book3s_hv_rm_mmu.o \
74 book3s_64_vio_hv.o \ 75 book3s_64_vio_hv.o \
76 book3s_hv_ras.o \
75 book3s_hv_builtin.o 77 book3s_hv_builtin.o
76 78
77kvm-book3s_64-module-objs := \ 79kvm-book3s_64-module-objs := \
78 ../../../virt/kvm/kvm_main.o \ 80 ../../../virt/kvm/kvm_main.o \
81 ../../../virt/kvm/eventfd.o \
79 powerpc.o \ 82 powerpc.o \
80 emulate.o \ 83 emulate.o \
81 book3s.o \ 84 book3s.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a8360c857..a4b645285240 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
411 return 0; 411 return 0;
412} 412}
413 413
414int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
415{
416 return 0;
417}
418
419void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
420{
421}
422
414int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 423int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
415{ 424{
416 int i; 425 int i;
@@ -476,6 +485,122 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
476 return -ENOTSUPP; 485 return -ENOTSUPP;
477} 486}
478 487
488int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
489{
490 int r;
491 union kvmppc_one_reg val;
492 int size;
493 long int i;
494
495 size = one_reg_size(reg->id);
496 if (size > sizeof(val))
497 return -EINVAL;
498
499 r = kvmppc_get_one_reg(vcpu, reg->id, &val);
500
501 if (r == -EINVAL) {
502 r = 0;
503 switch (reg->id) {
504 case KVM_REG_PPC_DAR:
505 val = get_reg_val(reg->id, vcpu->arch.shared->dar);
506 break;
507 case KVM_REG_PPC_DSISR:
508 val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
509 break;
510 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
511 i = reg->id - KVM_REG_PPC_FPR0;
512 val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
513 break;
514 case KVM_REG_PPC_FPSCR:
515 val = get_reg_val(reg->id, vcpu->arch.fpscr);
516 break;
517#ifdef CONFIG_ALTIVEC
518 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
519 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
520 r = -ENXIO;
521 break;
522 }
523 val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
524 break;
525 case KVM_REG_PPC_VSCR:
526 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
527 r = -ENXIO;
528 break;
529 }
530 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
531 break;
532#endif /* CONFIG_ALTIVEC */
533 default:
534 r = -EINVAL;
535 break;
536 }
537 }
538 if (r)
539 return r;
540
541 if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
542 r = -EFAULT;
543
544 return r;
545}
546
547int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
548{
549 int r;
550 union kvmppc_one_reg val;
551 int size;
552 long int i;
553
554 size = one_reg_size(reg->id);
555 if (size > sizeof(val))
556 return -EINVAL;
557
558 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
559 return -EFAULT;
560
561 r = kvmppc_set_one_reg(vcpu, reg->id, &val);
562
563 if (r == -EINVAL) {
564 r = 0;
565 switch (reg->id) {
566 case KVM_REG_PPC_DAR:
567 vcpu->arch.shared->dar = set_reg_val(reg->id, val);
568 break;
569 case KVM_REG_PPC_DSISR:
570 vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
571 break;
572 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
573 i = reg->id - KVM_REG_PPC_FPR0;
574 vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
575 break;
576 case KVM_REG_PPC_FPSCR:
577 vcpu->arch.fpscr = set_reg_val(reg->id, val);
578 break;
579#ifdef CONFIG_ALTIVEC
580 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
581 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
582 r = -ENXIO;
583 break;
584 }
585 vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
586 break;
587 case KVM_REG_PPC_VSCR:
588 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
589 r = -ENXIO;
590 break;
591 }
592 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
593 break;
594#endif /* CONFIG_ALTIVEC */
595 default:
596 r = -EINVAL;
597 break;
598 }
599 }
600
601 return r;
602}
603
479int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 604int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
480 struct kvm_translation *tr) 605 struct kvm_translation *tr)
481{ 606{
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index b0f625a33345..00e619bf608e 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -155,7 +155,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
155 155
156 /* Get host physical address for gpa */ 156 /* Get host physical address for gpa */
157 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 157 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
158 if (is_error_pfn(hpaddr)) { 158 if (is_error_noslot_pfn(hpaddr)) {
159 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", 159 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
160 orig_pte->eaddr); 160 orig_pte->eaddr);
161 r = -EINVAL; 161 r = -EINVAL;
@@ -254,6 +254,7 @@ next_pteg:
254 254
255 kvmppc_mmu_hpte_cache_map(vcpu, pte); 255 kvmppc_mmu_hpte_cache_map(vcpu, pte);
256 256
257 kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
257out: 258out:
258 return r; 259 return r;
259} 260}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 4d72f9ebc554..ead58e317294 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -93,7 +93,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
93 93
94 /* Get host physical address for gpa */ 94 /* Get host physical address for gpa */
95 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 95 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
96 if (is_error_pfn(hpaddr)) { 96 if (is_error_noslot_pfn(hpaddr)) {
97 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); 97 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
98 r = -EINVAL; 98 r = -EINVAL;
99 goto out; 99 goto out;
@@ -171,6 +171,7 @@ map_again:
171 171
172 kvmppc_mmu_hpte_cache_map(vcpu, pte); 172 kvmppc_mmu_hpte_cache_map(vcpu, pte);
173 } 173 }
174 kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
174 175
175out: 176out:
176 return r; 177 return r;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d95d11322a15..8cc18abd6dde 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -24,6 +24,9 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
26#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
27#include <linux/srcu.h>
28#include <linux/anon_inodes.h>
29#include <linux/file.h>
27 30
28#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
29#include <asm/kvm_ppc.h> 32#include <asm/kvm_ppc.h>
@@ -40,6 +43,11 @@
40/* Power architecture requires HPT is at least 256kB */ 43/* Power architecture requires HPT is at least 256kB */
41#define PPC_MIN_HPT_ORDER 18 44#define PPC_MIN_HPT_ORDER 18
42 45
46static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
47 long pte_index, unsigned long pteh,
48 unsigned long ptel, unsigned long *pte_idx_ret);
49static void kvmppc_rmap_reset(struct kvm *kvm);
50
43long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 51long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
44{ 52{
45 unsigned long hpt; 53 unsigned long hpt;
@@ -137,10 +145,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
137 /* Set the entire HPT to 0, i.e. invalid HPTEs */ 145 /* Set the entire HPT to 0, i.e. invalid HPTEs */
138 memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); 146 memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
139 /* 147 /*
140 * Set the whole last_vcpu array to an invalid vcpu number. 148 * Reset all the reverse-mapping chains for all memslots
141 * This ensures that each vcpu will flush its TLB on next entry.
142 */ 149 */
143 memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu)); 150 kvmppc_rmap_reset(kvm);
151 /* Ensure that each vcpu will flush its TLB on next entry. */
152 cpumask_setall(&kvm->arch.need_tlb_flush);
144 *htab_orderp = order; 153 *htab_orderp = order;
145 err = 0; 154 err = 0;
146 } else { 155 } else {
@@ -184,6 +193,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
184 unsigned long addr, hash; 193 unsigned long addr, hash;
185 unsigned long psize; 194 unsigned long psize;
186 unsigned long hp0, hp1; 195 unsigned long hp0, hp1;
196 unsigned long idx_ret;
187 long ret; 197 long ret;
188 struct kvm *kvm = vcpu->kvm; 198 struct kvm *kvm = vcpu->kvm;
189 199
@@ -215,7 +225,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
215 hash = (hash << 3) + 7; 225 hash = (hash << 3) + 7;
216 hp_v = hp0 | ((addr >> 16) & ~0x7fUL); 226 hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
217 hp_r = hp1 | addr; 227 hp_r = hp1 | addr;
218 ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); 228 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
229 &idx_ret);
219 if (ret != H_SUCCESS) { 230 if (ret != H_SUCCESS) {
220 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", 231 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
221 addr, ret); 232 addr, ret);
@@ -260,7 +271,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
260 271
261/* 272/*
262 * This is called to get a reference to a guest page if there isn't 273 * This is called to get a reference to a guest page if there isn't
263 * one already in the kvm->arch.slot_phys[][] arrays. 274 * one already in the memslot->arch.slot_phys[] array.
264 */ 275 */
265static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, 276static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
266 struct kvm_memory_slot *memslot, 277 struct kvm_memory_slot *memslot,
@@ -275,7 +286,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
275 struct vm_area_struct *vma; 286 struct vm_area_struct *vma;
276 unsigned long pfn, i, npages; 287 unsigned long pfn, i, npages;
277 288
278 physp = kvm->arch.slot_phys[memslot->id]; 289 physp = memslot->arch.slot_phys;
279 if (!physp) 290 if (!physp)
280 return -EINVAL; 291 return -EINVAL;
281 if (physp[gfn - memslot->base_gfn]) 292 if (physp[gfn - memslot->base_gfn])
@@ -353,15 +364,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
353 return err; 364 return err;
354} 365}
355 366
356/* 367long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
357 * We come here on a H_ENTER call from the guest when we are not 368 long pte_index, unsigned long pteh,
358 * using mmu notifiers and we don't have the requested page pinned 369 unsigned long ptel, unsigned long *pte_idx_ret)
359 * already.
360 */
361long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
362 long pte_index, unsigned long pteh, unsigned long ptel)
363{ 370{
364 struct kvm *kvm = vcpu->kvm;
365 unsigned long psize, gpa, gfn; 371 unsigned long psize, gpa, gfn;
366 struct kvm_memory_slot *memslot; 372 struct kvm_memory_slot *memslot;
367 long ret; 373 long ret;
@@ -389,8 +395,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
389 do_insert: 395 do_insert:
390 /* Protect linux PTE lookup from page table destruction */ 396 /* Protect linux PTE lookup from page table destruction */
391 rcu_read_lock_sched(); /* this disables preemption too */ 397 rcu_read_lock_sched(); /* this disables preemption too */
392 vcpu->arch.pgdir = current->mm->pgd; 398 ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
393 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); 399 current->mm->pgd, false, pte_idx_ret);
394 rcu_read_unlock_sched(); 400 rcu_read_unlock_sched();
395 if (ret == H_TOO_HARD) { 401 if (ret == H_TOO_HARD) {
396 /* this can't happen */ 402 /* this can't happen */
@@ -401,6 +407,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
401 407
402} 408}
403 409
410/*
411 * We come here on a H_ENTER call from the guest when we are not
412 * using mmu notifiers and we don't have the requested page pinned
413 * already.
414 */
415long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
416 long pte_index, unsigned long pteh,
417 unsigned long ptel)
418{
419 return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
420 pteh, ptel, &vcpu->arch.gpr[4]);
421}
422
404static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, 423static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
405 gva_t eaddr) 424 gva_t eaddr)
406{ 425{
@@ -570,7 +589,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
570 struct kvm *kvm = vcpu->kvm; 589 struct kvm *kvm = vcpu->kvm;
571 unsigned long *hptep, hpte[3], r; 590 unsigned long *hptep, hpte[3], r;
572 unsigned long mmu_seq, psize, pte_size; 591 unsigned long mmu_seq, psize, pte_size;
573 unsigned long gfn, hva, pfn; 592 unsigned long gpa, gfn, hva, pfn;
574 struct kvm_memory_slot *memslot; 593 struct kvm_memory_slot *memslot;
575 unsigned long *rmap; 594 unsigned long *rmap;
576 struct revmap_entry *rev; 595 struct revmap_entry *rev;
@@ -608,15 +627,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
608 627
609 /* Translate the logical address and get the page */ 628 /* Translate the logical address and get the page */
610 psize = hpte_page_size(hpte[0], r); 629 psize = hpte_page_size(hpte[0], r);
611 gfn = hpte_rpn(r, psize); 630 gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
631 gfn = gpa >> PAGE_SHIFT;
612 memslot = gfn_to_memslot(kvm, gfn); 632 memslot = gfn_to_memslot(kvm, gfn);
613 633
614 /* No memslot means it's an emulated MMIO region */ 634 /* No memslot means it's an emulated MMIO region */
615 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { 635 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
616 unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
617 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, 636 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
618 dsisr & DSISR_ISSTORE); 637 dsisr & DSISR_ISSTORE);
619 }
620 638
621 if (!kvm->arch.using_mmu_notifiers) 639 if (!kvm->arch.using_mmu_notifiers)
622 return -EFAULT; /* should never get here */ 640 return -EFAULT; /* should never get here */
@@ -710,7 +728,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
710 728
711 /* Check if we might have been invalidated; let the guest retry if so */ 729 /* Check if we might have been invalidated; let the guest retry if so */
712 ret = RESUME_GUEST; 730 ret = RESUME_GUEST;
713 if (mmu_notifier_retry(vcpu, mmu_seq)) { 731 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
714 unlock_rmap(rmap); 732 unlock_rmap(rmap);
715 goto out_unlock; 733 goto out_unlock;
716 } 734 }
@@ -756,6 +774,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
756 goto out_put; 774 goto out_put;
757} 775}
758 776
777static void kvmppc_rmap_reset(struct kvm *kvm)
778{
779 struct kvm_memslots *slots;
780 struct kvm_memory_slot *memslot;
781 int srcu_idx;
782
783 srcu_idx = srcu_read_lock(&kvm->srcu);
784 slots = kvm->memslots;
785 kvm_for_each_memslot(memslot, slots) {
786 /*
787 * This assumes it is acceptable to lose reference and
788 * change bits across a reset.
789 */
790 memset(memslot->arch.rmap, 0,
791 memslot->npages * sizeof(*memslot->arch.rmap));
792 }
793 srcu_read_unlock(&kvm->srcu, srcu_idx);
794}
795
759static int kvm_handle_hva_range(struct kvm *kvm, 796static int kvm_handle_hva_range(struct kvm *kvm,
760 unsigned long start, 797 unsigned long start,
761 unsigned long end, 798 unsigned long end,
@@ -850,7 +887,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
850 psize = hpte_page_size(hptep[0], ptel); 887 psize = hpte_page_size(hptep[0], ptel);
851 if ((hptep[0] & HPTE_V_VALID) && 888 if ((hptep[0] & HPTE_V_VALID) &&
852 hpte_rpn(ptel, psize) == gfn) { 889 hpte_rpn(ptel, psize) == gfn) {
853 hptep[0] |= HPTE_V_ABSENT; 890 if (kvm->arch.using_mmu_notifiers)
891 hptep[0] |= HPTE_V_ABSENT;
854 kvmppc_invalidate_hpte(kvm, hptep, i); 892 kvmppc_invalidate_hpte(kvm, hptep, i);
855 /* Harvest R and C */ 893 /* Harvest R and C */
856 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); 894 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
@@ -877,6 +915,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
877 return 0; 915 return 0;
878} 916}
879 917
918void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
919{
920 unsigned long *rmapp;
921 unsigned long gfn;
922 unsigned long n;
923
924 rmapp = memslot->arch.rmap;
925 gfn = memslot->base_gfn;
926 for (n = memslot->npages; n; --n) {
927 /*
928 * Testing the present bit without locking is OK because
929 * the memslot has been marked invalid already, and hence
930 * no new HPTEs referencing this page can be created,
931 * thus the present bit can't go from 0 to 1.
932 */
933 if (*rmapp & KVMPPC_RMAP_PRESENT)
934 kvm_unmap_rmapp(kvm, rmapp, gfn);
935 ++rmapp;
936 ++gfn;
937 }
938}
939
880static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 940static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
881 unsigned long gfn) 941 unsigned long gfn)
882{ 942{
@@ -1030,16 +1090,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1030 return ret; 1090 return ret;
1031} 1091}
1032 1092
1033long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 1093long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1094 unsigned long *map)
1034{ 1095{
1035 unsigned long i; 1096 unsigned long i;
1036 unsigned long *rmapp, *map; 1097 unsigned long *rmapp;
1037 1098
1038 preempt_disable(); 1099 preempt_disable();
1039 rmapp = memslot->arch.rmap; 1100 rmapp = memslot->arch.rmap;
1040 map = memslot->dirty_bitmap;
1041 for (i = 0; i < memslot->npages; ++i) { 1101 for (i = 0; i < memslot->npages; ++i) {
1042 if (kvm_test_clear_dirty(kvm, rmapp)) 1102 if (kvm_test_clear_dirty(kvm, rmapp) && map)
1043 __set_bit_le(i, map); 1103 __set_bit_le(i, map);
1044 ++rmapp; 1104 ++rmapp;
1045 } 1105 }
@@ -1057,20 +1117,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1057 unsigned long hva, psize, offset; 1117 unsigned long hva, psize, offset;
1058 unsigned long pa; 1118 unsigned long pa;
1059 unsigned long *physp; 1119 unsigned long *physp;
1120 int srcu_idx;
1060 1121
1122 srcu_idx = srcu_read_lock(&kvm->srcu);
1061 memslot = gfn_to_memslot(kvm, gfn); 1123 memslot = gfn_to_memslot(kvm, gfn);
1062 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1124 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1063 return NULL; 1125 goto err;
1064 if (!kvm->arch.using_mmu_notifiers) { 1126 if (!kvm->arch.using_mmu_notifiers) {
1065 physp = kvm->arch.slot_phys[memslot->id]; 1127 physp = memslot->arch.slot_phys;
1066 if (!physp) 1128 if (!physp)
1067 return NULL; 1129 goto err;
1068 physp += gfn - memslot->base_gfn; 1130 physp += gfn - memslot->base_gfn;
1069 pa = *physp; 1131 pa = *physp;
1070 if (!pa) { 1132 if (!pa) {
1071 if (kvmppc_get_guest_page(kvm, gfn, memslot, 1133 if (kvmppc_get_guest_page(kvm, gfn, memslot,
1072 PAGE_SIZE) < 0) 1134 PAGE_SIZE) < 0)
1073 return NULL; 1135 goto err;
1074 pa = *physp; 1136 pa = *physp;
1075 } 1137 }
1076 page = pfn_to_page(pa >> PAGE_SHIFT); 1138 page = pfn_to_page(pa >> PAGE_SHIFT);
@@ -1079,9 +1141,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1079 hva = gfn_to_hva_memslot(memslot, gfn); 1141 hva = gfn_to_hva_memslot(memslot, gfn);
1080 npages = get_user_pages_fast(hva, 1, 1, pages); 1142 npages = get_user_pages_fast(hva, 1, 1, pages);
1081 if (npages < 1) 1143 if (npages < 1)
1082 return NULL; 1144 goto err;
1083 page = pages[0]; 1145 page = pages[0];
1084 } 1146 }
1147 srcu_read_unlock(&kvm->srcu, srcu_idx);
1148
1085 psize = PAGE_SIZE; 1149 psize = PAGE_SIZE;
1086 if (PageHuge(page)) { 1150 if (PageHuge(page)) {
1087 page = compound_head(page); 1151 page = compound_head(page);
@@ -1091,6 +1155,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1091 if (nb_ret) 1155 if (nb_ret)
1092 *nb_ret = psize - offset; 1156 *nb_ret = psize - offset;
1093 return page_address(page) + offset; 1157 return page_address(page) + offset;
1158
1159 err:
1160 srcu_read_unlock(&kvm->srcu, srcu_idx);
1161 return NULL;
1094} 1162}
1095 1163
1096void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) 1164void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
@@ -1100,6 +1168,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
1100 put_page(page); 1168 put_page(page);
1101} 1169}
1102 1170
1171/*
1172 * Functions for reading and writing the hash table via reads and
1173 * writes on a file descriptor.
1174 *
1175 * Reads return the guest view of the hash table, which has to be
1176 * pieced together from the real hash table and the guest_rpte
1177 * values in the revmap array.
1178 *
1179 * On writes, each HPTE written is considered in turn, and if it
1180 * is valid, it is written to the HPT as if an H_ENTER with the
1181 * exact flag set was done. When the invalid count is non-zero
1182 * in the header written to the stream, the kernel will make
1183 * sure that that many HPTEs are invalid, and invalidate them
1184 * if not.
1185 */
1186
1187struct kvm_htab_ctx {
1188 unsigned long index;
1189 unsigned long flags;
1190 struct kvm *kvm;
1191 int first_pass;
1192};
1193
1194#define HPTE_SIZE (2 * sizeof(unsigned long))
1195
1196static long record_hpte(unsigned long flags, unsigned long *hptp,
1197 unsigned long *hpte, struct revmap_entry *revp,
1198 int want_valid, int first_pass)
1199{
1200 unsigned long v, r;
1201 int ok = 1;
1202 int valid, dirty;
1203
1204 /* Unmodified entries are uninteresting except on the first pass */
1205 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1206 if (!first_pass && !dirty)
1207 return 0;
1208
1209 valid = 0;
1210 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
1211 valid = 1;
1212 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
1213 !(hptp[0] & HPTE_V_BOLTED))
1214 valid = 0;
1215 }
1216 if (valid != want_valid)
1217 return 0;
1218
1219 v = r = 0;
1220 if (valid || dirty) {
1221 /* lock the HPTE so it's stable and read it */
1222 preempt_disable();
1223 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1224 cpu_relax();
1225 v = hptp[0];
1226 if (v & HPTE_V_ABSENT) {
1227 v &= ~HPTE_V_ABSENT;
1228 v |= HPTE_V_VALID;
1229 }
1230 /* re-evaluate valid and dirty from synchronized HPTE value */
1231 valid = !!(v & HPTE_V_VALID);
1232 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
1233 valid = 0;
1234 r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
1235 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1236 /* only clear modified if this is the right sort of entry */
1237 if (valid == want_valid && dirty) {
1238 r &= ~HPTE_GR_MODIFIED;
1239 revp->guest_rpte = r;
1240 }
1241 asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
1242 hptp[0] &= ~HPTE_V_HVLOCK;
1243 preempt_enable();
1244 if (!(valid == want_valid && (first_pass || dirty)))
1245 ok = 0;
1246 }
1247 hpte[0] = v;
1248 hpte[1] = r;
1249 return ok;
1250}
1251
1252static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1253 size_t count, loff_t *ppos)
1254{
1255 struct kvm_htab_ctx *ctx = file->private_data;
1256 struct kvm *kvm = ctx->kvm;
1257 struct kvm_get_htab_header hdr;
1258 unsigned long *hptp;
1259 struct revmap_entry *revp;
1260 unsigned long i, nb, nw;
1261 unsigned long __user *lbuf;
1262 struct kvm_get_htab_header __user *hptr;
1263 unsigned long flags;
1264 int first_pass;
1265 unsigned long hpte[2];
1266
1267 if (!access_ok(VERIFY_WRITE, buf, count))
1268 return -EFAULT;
1269
1270 first_pass = ctx->first_pass;
1271 flags = ctx->flags;
1272
1273 i = ctx->index;
1274 hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
1275 revp = kvm->arch.revmap + i;
1276 lbuf = (unsigned long __user *)buf;
1277
1278 nb = 0;
1279 while (nb + sizeof(hdr) + HPTE_SIZE < count) {
1280 /* Initialize header */
1281 hptr = (struct kvm_get_htab_header __user *)buf;
1282 hdr.n_valid = 0;
1283 hdr.n_invalid = 0;
1284 nw = nb;
1285 nb += sizeof(hdr);
1286 lbuf = (unsigned long __user *)(buf + sizeof(hdr));
1287
1288 /* Skip uninteresting entries, i.e. clean on not-first pass */
1289 if (!first_pass) {
1290 while (i < kvm->arch.hpt_npte &&
1291 !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
1292 ++i;
1293 hptp += 2;
1294 ++revp;
1295 }
1296 }
1297 hdr.index = i;
1298
1299 /* Grab a series of valid entries */
1300 while (i < kvm->arch.hpt_npte &&
1301 hdr.n_valid < 0xffff &&
1302 nb + HPTE_SIZE < count &&
1303 record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
1304 /* valid entry, write it out */
1305 ++hdr.n_valid;
1306 if (__put_user(hpte[0], lbuf) ||
1307 __put_user(hpte[1], lbuf + 1))
1308 return -EFAULT;
1309 nb += HPTE_SIZE;
1310 lbuf += 2;
1311 ++i;
1312 hptp += 2;
1313 ++revp;
1314 }
1315 /* Now skip invalid entries while we can */
1316 while (i < kvm->arch.hpt_npte &&
1317 hdr.n_invalid < 0xffff &&
1318 record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
1319 /* found an invalid entry */
1320 ++hdr.n_invalid;
1321 ++i;
1322 hptp += 2;
1323 ++revp;
1324 }
1325
1326 if (hdr.n_valid || hdr.n_invalid) {
1327 /* write back the header */
1328 if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
1329 return -EFAULT;
1330 nw = nb;
1331 buf = (char __user *)lbuf;
1332 } else {
1333 nb = nw;
1334 }
1335
1336 /* Check if we've wrapped around the hash table */
1337 if (i >= kvm->arch.hpt_npte) {
1338 i = 0;
1339 ctx->first_pass = 0;
1340 break;
1341 }
1342 }
1343
1344 ctx->index = i;
1345
1346 return nb;
1347}
1348
1349static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1350 size_t count, loff_t *ppos)
1351{
1352 struct kvm_htab_ctx *ctx = file->private_data;
1353 struct kvm *kvm = ctx->kvm;
1354 struct kvm_get_htab_header hdr;
1355 unsigned long i, j;
1356 unsigned long v, r;
1357 unsigned long __user *lbuf;
1358 unsigned long *hptp;
1359 unsigned long tmp[2];
1360 ssize_t nb;
1361 long int err, ret;
1362 int rma_setup;
1363
1364 if (!access_ok(VERIFY_READ, buf, count))
1365 return -EFAULT;
1366
1367 /* lock out vcpus from running while we're doing this */
1368 mutex_lock(&kvm->lock);
1369 rma_setup = kvm->arch.rma_setup_done;
1370 if (rma_setup) {
1371 kvm->arch.rma_setup_done = 0; /* temporarily */
1372 /* order rma_setup_done vs. vcpus_running */
1373 smp_mb();
1374 if (atomic_read(&kvm->arch.vcpus_running)) {
1375 kvm->arch.rma_setup_done = 1;
1376 mutex_unlock(&kvm->lock);
1377 return -EBUSY;
1378 }
1379 }
1380
1381 err = 0;
1382 for (nb = 0; nb + sizeof(hdr) <= count; ) {
1383 err = -EFAULT;
1384 if (__copy_from_user(&hdr, buf, sizeof(hdr)))
1385 break;
1386
1387 err = 0;
1388 if (nb + hdr.n_valid * HPTE_SIZE > count)
1389 break;
1390
1391 nb += sizeof(hdr);
1392 buf += sizeof(hdr);
1393
1394 err = -EINVAL;
1395 i = hdr.index;
1396 if (i >= kvm->arch.hpt_npte ||
1397 i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
1398 break;
1399
1400 hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
1401 lbuf = (unsigned long __user *)buf;
1402 for (j = 0; j < hdr.n_valid; ++j) {
1403 err = -EFAULT;
1404 if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
1405 goto out;
1406 err = -EINVAL;
1407 if (!(v & HPTE_V_VALID))
1408 goto out;
1409 lbuf += 2;
1410 nb += HPTE_SIZE;
1411
1412 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
1413 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1414 err = -EIO;
1415 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
1416 tmp);
1417 if (ret != H_SUCCESS) {
1418 pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
1419 "r=%lx\n", ret, i, v, r);
1420 goto out;
1421 }
1422 if (!rma_setup && is_vrma_hpte(v)) {
1423 unsigned long psize = hpte_page_size(v, r);
1424 unsigned long senc = slb_pgsize_encoding(psize);
1425 unsigned long lpcr;
1426
1427 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1428 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1429 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1430 lpcr |= senc << (LPCR_VRMASD_SH - 4);
1431 kvm->arch.lpcr = lpcr;
1432 rma_setup = 1;
1433 }
1434 ++i;
1435 hptp += 2;
1436 }
1437
1438 for (j = 0; j < hdr.n_invalid; ++j) {
1439 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
1440 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1441 ++i;
1442 hptp += 2;
1443 }
1444 err = 0;
1445 }
1446
1447 out:
1448 /* Order HPTE updates vs. rma_setup_done */
1449 smp_wmb();
1450 kvm->arch.rma_setup_done = rma_setup;
1451 mutex_unlock(&kvm->lock);
1452
1453 if (err)
1454 return err;
1455 return nb;
1456}
1457
1458static int kvm_htab_release(struct inode *inode, struct file *filp)
1459{
1460 struct kvm_htab_ctx *ctx = filp->private_data;
1461
1462 filp->private_data = NULL;
1463 if (!(ctx->flags & KVM_GET_HTAB_WRITE))
1464 atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
1465 kvm_put_kvm(ctx->kvm);
1466 kfree(ctx);
1467 return 0;
1468}
1469
1470static struct file_operations kvm_htab_fops = {
1471 .read = kvm_htab_read,
1472 .write = kvm_htab_write,
1473 .llseek = default_llseek,
1474 .release = kvm_htab_release,
1475};
1476
1477int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
1478{
1479 int ret;
1480 struct kvm_htab_ctx *ctx;
1481 int rwflag;
1482
1483 /* reject flags we don't recognize */
1484 if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
1485 return -EINVAL;
1486 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1487 if (!ctx)
1488 return -ENOMEM;
1489 kvm_get_kvm(kvm);
1490 ctx->kvm = kvm;
1491 ctx->index = ghf->start_index;
1492 ctx->flags = ghf->flags;
1493 ctx->first_pass = 1;
1494
1495 rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
1496 ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
1497 if (ret < 0) {
1498 kvm_put_kvm(kvm);
1499 return ret;
1500 }
1501
1502 if (rwflag == O_RDONLY) {
1503 mutex_lock(&kvm->slots_lock);
1504 atomic_inc(&kvm->arch.hpte_mod_interest);
1505 /* make sure kvmppc_do_h_enter etc. see the increment */
1506 synchronize_srcu_expedited(&kvm->srcu);
1507 mutex_unlock(&kvm->slots_lock);
1508 }
1509
1510 return ret;
1511}
1512
1103void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 1513void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
1104{ 1514{
1105 struct kvmppc_mmu *mmu = &vcpu->arch.mmu; 1515 struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index b9a989dc76cc..d31a716f7f2b 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -22,6 +22,7 @@
22#include <asm/kvm_book3s.h> 22#include <asm/kvm_book3s.h>
23#include <asm/reg.h> 23#include <asm/reg.h>
24#include <asm/switch_to.h> 24#include <asm/switch_to.h>
25#include <asm/time.h>
25 26
26#define OP_19_XOP_RFID 18 27#define OP_19_XOP_RFID 18
27#define OP_19_XOP_RFI 50 28#define OP_19_XOP_RFI 50
@@ -395,6 +396,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
395 (mfmsr() & MSR_HV)) 396 (mfmsr() & MSR_HV))
396 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 397 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
397 break; 398 break;
399 case SPRN_PURR:
400 to_book3s(vcpu)->purr_offset = spr_val - get_tb();
401 break;
402 case SPRN_SPURR:
403 to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
404 break;
398 case SPRN_GQR0: 405 case SPRN_GQR0:
399 case SPRN_GQR1: 406 case SPRN_GQR1:
400 case SPRN_GQR2: 407 case SPRN_GQR2:
@@ -412,6 +419,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
412 case SPRN_CTRLF: 419 case SPRN_CTRLF:
413 case SPRN_CTRLT: 420 case SPRN_CTRLT:
414 case SPRN_L2CR: 421 case SPRN_L2CR:
422 case SPRN_DSCR:
415 case SPRN_MMCR0_GEKKO: 423 case SPRN_MMCR0_GEKKO:
416 case SPRN_MMCR1_GEKKO: 424 case SPRN_MMCR1_GEKKO:
417 case SPRN_PMC1_GEKKO: 425 case SPRN_PMC1_GEKKO:
@@ -483,9 +491,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
483 *spr_val = to_book3s(vcpu)->hid[5]; 491 *spr_val = to_book3s(vcpu)->hid[5];
484 break; 492 break;
485 case SPRN_CFAR: 493 case SPRN_CFAR:
486 case SPRN_PURR: 494 case SPRN_DSCR:
487 *spr_val = 0; 495 *spr_val = 0;
488 break; 496 break;
497 case SPRN_PURR:
498 *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
499 break;
500 case SPRN_SPURR:
501 *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
502 break;
489 case SPRN_GQR0: 503 case SPRN_GQR0:
490 case SPRN_GQR1: 504 case SPRN_GQR1:
491 case SPRN_GQR2: 505 case SPRN_GQR2:
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index a150817d6d4c..7057a02f0906 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -28,8 +28,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
28#ifdef CONFIG_ALTIVEC 28#ifdef CONFIG_ALTIVEC
29EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); 29EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
30#endif 30#endif
31#ifdef CONFIG_VSX
32EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
33#endif
34#endif 31#endif
35 32
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 721d4603a235..71d0c90b62bf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -30,6 +30,7 @@
30#include <linux/cpumask.h> 30#include <linux/cpumask.h>
31#include <linux/spinlock.h> 31#include <linux/spinlock.h>
32#include <linux/page-flags.h> 32#include <linux/page-flags.h>
33#include <linux/srcu.h>
33 34
34#include <asm/reg.h> 35#include <asm/reg.h>
35#include <asm/cputable.h> 36#include <asm/cputable.h>
@@ -46,6 +47,7 @@
46#include <asm/page.h> 47#include <asm/page.h>
47#include <asm/hvcall.h> 48#include <asm/hvcall.h>
48#include <asm/switch_to.h> 49#include <asm/switch_to.h>
50#include <asm/smp.h>
49#include <linux/gfp.h> 51#include <linux/gfp.h>
50#include <linux/vmalloc.h> 52#include <linux/vmalloc.h>
51#include <linux/highmem.h> 53#include <linux/highmem.h>
@@ -55,25 +57,77 @@
55/* #define EXIT_DEBUG_SIMPLE */ 57/* #define EXIT_DEBUG_SIMPLE */
56/* #define EXIT_DEBUG_INT */ 58/* #define EXIT_DEBUG_INT */
57 59
60/* Used to indicate that a guest page fault needs to be handled */
61#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
62
63/* Used as a "null" value for timebase values */
64#define TB_NIL (~(u64)0)
65
58static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 66static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
59static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 67static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
60 68
69/*
70 * We use the vcpu_load/put functions to measure stolen time.
71 * Stolen time is counted as time when either the vcpu is able to
72 * run as part of a virtual core, but the task running the vcore
73 * is preempted or sleeping, or when the vcpu needs something done
74 * in the kernel by the task running the vcpu, but that task is
75 * preempted or sleeping. Those two things have to be counted
76 * separately, since one of the vcpu tasks will take on the job
77 * of running the core, and the other vcpu tasks in the vcore will
78 * sleep waiting for it to do that, but that sleep shouldn't count
79 * as stolen time.
80 *
81 * Hence we accumulate stolen time when the vcpu can run as part of
82 * a vcore using vc->stolen_tb, and the stolen time when the vcpu
83 * needs its task to do other things in the kernel (for example,
84 * service a page fault) in busy_stolen. We don't accumulate
85 * stolen time for a vcore when it is inactive, or for a vcpu
86 * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of
87 * a misnomer; it means that the vcpu task is not executing in
88 * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in
89 * the kernel. We don't have any way of dividing up that time
90 * between time that the vcpu is genuinely stopped, time that
91 * the task is actively working on behalf of the vcpu, and time
92 * that the task is preempted, so we don't count any of it as
93 * stolen.
94 *
95 * Updates to busy_stolen are protected by arch.tbacct_lock;
96 * updates to vc->stolen_tb are protected by the arch.tbacct_lock
97 * of the vcpu that has taken responsibility for running the vcore
98 * (i.e. vc->runner). The stolen times are measured in units of
99 * timebase ticks. (Note that the != TB_NIL checks below are
100 * purely defensive; they should never fail.)
101 */
102
61void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 103void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
62{ 104{
63 struct kvmppc_vcore *vc = vcpu->arch.vcore; 105 struct kvmppc_vcore *vc = vcpu->arch.vcore;
64 106
65 local_paca->kvm_hstate.kvm_vcpu = vcpu; 107 spin_lock(&vcpu->arch.tbacct_lock);
66 local_paca->kvm_hstate.kvm_vcore = vc; 108 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
67 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) 109 vc->preempt_tb != TB_NIL) {
68 vc->stolen_tb += mftb() - vc->preempt_tb; 110 vc->stolen_tb += mftb() - vc->preempt_tb;
111 vc->preempt_tb = TB_NIL;
112 }
113 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
114 vcpu->arch.busy_preempt != TB_NIL) {
115 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
116 vcpu->arch.busy_preempt = TB_NIL;
117 }
118 spin_unlock(&vcpu->arch.tbacct_lock);
69} 119}
70 120
71void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 121void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
72{ 122{
73 struct kvmppc_vcore *vc = vcpu->arch.vcore; 123 struct kvmppc_vcore *vc = vcpu->arch.vcore;
74 124
125 spin_lock(&vcpu->arch.tbacct_lock);
75 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) 126 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
76 vc->preempt_tb = mftb(); 127 vc->preempt_tb = mftb();
128 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
129 vcpu->arch.busy_preempt = mftb();
130 spin_unlock(&vcpu->arch.tbacct_lock);
77} 131}
78 132
79void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 133void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -142,6 +196,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
142 vpa->yield_count = 1; 196 vpa->yield_count = 1;
143} 197}
144 198
199static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
200 unsigned long addr, unsigned long len)
201{
202 /* check address is cacheline aligned */
203 if (addr & (L1_CACHE_BYTES - 1))
204 return -EINVAL;
205 spin_lock(&vcpu->arch.vpa_update_lock);
206 if (v->next_gpa != addr || v->len != len) {
207 v->next_gpa = addr;
208 v->len = addr ? len : 0;
209 v->update_pending = 1;
210 }
211 spin_unlock(&vcpu->arch.vpa_update_lock);
212 return 0;
213}
214
145/* Length for a per-processor buffer is passed in at offset 4 in the buffer */ 215/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
146struct reg_vpa { 216struct reg_vpa {
147 u32 dummy; 217 u32 dummy;
@@ -317,10 +387,16 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
317 387
318static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) 388static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
319{ 389{
390 if (!(vcpu->arch.vpa.update_pending ||
391 vcpu->arch.slb_shadow.update_pending ||
392 vcpu->arch.dtl.update_pending))
393 return;
394
320 spin_lock(&vcpu->arch.vpa_update_lock); 395 spin_lock(&vcpu->arch.vpa_update_lock);
321 if (vcpu->arch.vpa.update_pending) { 396 if (vcpu->arch.vpa.update_pending) {
322 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); 397 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
323 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); 398 if (vcpu->arch.vpa.pinned_addr)
399 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
324 } 400 }
325 if (vcpu->arch.dtl.update_pending) { 401 if (vcpu->arch.dtl.update_pending) {
326 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); 402 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
@@ -332,24 +408,61 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
332 spin_unlock(&vcpu->arch.vpa_update_lock); 408 spin_unlock(&vcpu->arch.vpa_update_lock);
333} 409}
334 410
411/*
412 * Return the accumulated stolen time for the vcore up until `now'.
413 * The caller should hold the vcore lock.
414 */
415static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
416{
417 u64 p;
418
419 /*
420 * If we are the task running the vcore, then since we hold
421 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
422 * can't be updated, so we don't need the tbacct_lock.
423 * If the vcore is inactive, it can't become active (since we
424 * hold the vcore lock), so the vcpu load/put functions won't
425 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
426 */
427 if (vc->vcore_state != VCORE_INACTIVE &&
428 vc->runner->arch.run_task != current) {
429 spin_lock(&vc->runner->arch.tbacct_lock);
430 p = vc->stolen_tb;
431 if (vc->preempt_tb != TB_NIL)
432 p += now - vc->preempt_tb;
433 spin_unlock(&vc->runner->arch.tbacct_lock);
434 } else {
435 p = vc->stolen_tb;
436 }
437 return p;
438}
439
335static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, 440static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
336 struct kvmppc_vcore *vc) 441 struct kvmppc_vcore *vc)
337{ 442{
338 struct dtl_entry *dt; 443 struct dtl_entry *dt;
339 struct lppaca *vpa; 444 struct lppaca *vpa;
340 unsigned long old_stolen; 445 unsigned long stolen;
446 unsigned long core_stolen;
447 u64 now;
341 448
342 dt = vcpu->arch.dtl_ptr; 449 dt = vcpu->arch.dtl_ptr;
343 vpa = vcpu->arch.vpa.pinned_addr; 450 vpa = vcpu->arch.vpa.pinned_addr;
344 old_stolen = vcpu->arch.stolen_logged; 451 now = mftb();
345 vcpu->arch.stolen_logged = vc->stolen_tb; 452 core_stolen = vcore_stolen_time(vc, now);
453 stolen = core_stolen - vcpu->arch.stolen_logged;
454 vcpu->arch.stolen_logged = core_stolen;
455 spin_lock(&vcpu->arch.tbacct_lock);
456 stolen += vcpu->arch.busy_stolen;
457 vcpu->arch.busy_stolen = 0;
458 spin_unlock(&vcpu->arch.tbacct_lock);
346 if (!dt || !vpa) 459 if (!dt || !vpa)
347 return; 460 return;
348 memset(dt, 0, sizeof(struct dtl_entry)); 461 memset(dt, 0, sizeof(struct dtl_entry));
349 dt->dispatch_reason = 7; 462 dt->dispatch_reason = 7;
350 dt->processor_id = vc->pcpu + vcpu->arch.ptid; 463 dt->processor_id = vc->pcpu + vcpu->arch.ptid;
351 dt->timebase = mftb(); 464 dt->timebase = now;
352 dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen; 465 dt->enqueue_to_dispatch_time = stolen;
353 dt->srr0 = kvmppc_get_pc(vcpu); 466 dt->srr0 = kvmppc_get_pc(vcpu);
354 dt->srr1 = vcpu->arch.shregs.msr; 467 dt->srr1 = vcpu->arch.shregs.msr;
355 ++dt; 468 ++dt;
@@ -366,13 +479,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
366 unsigned long req = kvmppc_get_gpr(vcpu, 3); 479 unsigned long req = kvmppc_get_gpr(vcpu, 3);
367 unsigned long target, ret = H_SUCCESS; 480 unsigned long target, ret = H_SUCCESS;
368 struct kvm_vcpu *tvcpu; 481 struct kvm_vcpu *tvcpu;
482 int idx;
369 483
370 switch (req) { 484 switch (req) {
371 case H_ENTER: 485 case H_ENTER:
486 idx = srcu_read_lock(&vcpu->kvm->srcu);
372 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), 487 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
373 kvmppc_get_gpr(vcpu, 5), 488 kvmppc_get_gpr(vcpu, 5),
374 kvmppc_get_gpr(vcpu, 6), 489 kvmppc_get_gpr(vcpu, 6),
375 kvmppc_get_gpr(vcpu, 7)); 490 kvmppc_get_gpr(vcpu, 7));
491 srcu_read_unlock(&vcpu->kvm->srcu, idx);
376 break; 492 break;
377 case H_CEDE: 493 case H_CEDE:
378 break; 494 break;
@@ -429,6 +545,17 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
429 case BOOK3S_INTERRUPT_PERFMON: 545 case BOOK3S_INTERRUPT_PERFMON:
430 r = RESUME_GUEST; 546 r = RESUME_GUEST;
431 break; 547 break;
548 case BOOK3S_INTERRUPT_MACHINE_CHECK:
549 /*
550 * Deliver a machine check interrupt to the guest.
551 * We have to do this, even if the host has handled the
552 * machine check, because machine checks use SRR0/1 and
553 * the interrupt might have trashed guest state in them.
554 */
555 kvmppc_book3s_queue_irqprio(vcpu,
556 BOOK3S_INTERRUPT_MACHINE_CHECK);
557 r = RESUME_GUEST;
558 break;
432 case BOOK3S_INTERRUPT_PROGRAM: 559 case BOOK3S_INTERRUPT_PROGRAM:
433 { 560 {
434 ulong flags; 561 ulong flags;
@@ -470,12 +597,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
470 * have been handled already. 597 * have been handled already.
471 */ 598 */
472 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 599 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
473 r = kvmppc_book3s_hv_page_fault(run, vcpu, 600 r = RESUME_PAGE_FAULT;
474 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
475 break; 601 break;
476 case BOOK3S_INTERRUPT_H_INST_STORAGE: 602 case BOOK3S_INTERRUPT_H_INST_STORAGE:
477 r = kvmppc_book3s_hv_page_fault(run, vcpu, 603 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
478 kvmppc_get_pc(vcpu), 0); 604 vcpu->arch.fault_dsisr = 0;
605 r = RESUME_PAGE_FAULT;
479 break; 606 break;
480 /* 607 /*
481 * This occurs if the guest executes an illegal instruction. 608 * This occurs if the guest executes an illegal instruction.
@@ -535,36 +662,174 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
535 return 0; 662 return 0;
536} 663}
537 664
538int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 665int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
539{ 666{
540 int r = -EINVAL; 667 int r = 0;
668 long int i;
541 669
542 switch (reg->id) { 670 switch (id) {
543 case KVM_REG_PPC_HIOR: 671 case KVM_REG_PPC_HIOR:
544 r = put_user(0, (u64 __user *)reg->addr); 672 *val = get_reg_val(id, 0);
673 break;
674 case KVM_REG_PPC_DABR:
675 *val = get_reg_val(id, vcpu->arch.dabr);
676 break;
677 case KVM_REG_PPC_DSCR:
678 *val = get_reg_val(id, vcpu->arch.dscr);
679 break;
680 case KVM_REG_PPC_PURR:
681 *val = get_reg_val(id, vcpu->arch.purr);
682 break;
683 case KVM_REG_PPC_SPURR:
684 *val = get_reg_val(id, vcpu->arch.spurr);
685 break;
686 case KVM_REG_PPC_AMR:
687 *val = get_reg_val(id, vcpu->arch.amr);
688 break;
689 case KVM_REG_PPC_UAMOR:
690 *val = get_reg_val(id, vcpu->arch.uamor);
691 break;
692 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
693 i = id - KVM_REG_PPC_MMCR0;
694 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
695 break;
696 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
697 i = id - KVM_REG_PPC_PMC1;
698 *val = get_reg_val(id, vcpu->arch.pmc[i]);
699 break;
700#ifdef CONFIG_VSX
701 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
702 if (cpu_has_feature(CPU_FTR_VSX)) {
703 /* VSX => FP reg i is stored in arch.vsr[2*i] */
704 long int i = id - KVM_REG_PPC_FPR0;
705 *val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
706 } else {
707 /* let generic code handle it */
708 r = -EINVAL;
709 }
710 break;
711 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
712 if (cpu_has_feature(CPU_FTR_VSX)) {
713 long int i = id - KVM_REG_PPC_VSR0;
714 val->vsxval[0] = vcpu->arch.vsr[2 * i];
715 val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
716 } else {
717 r = -ENXIO;
718 }
719 break;
720#endif /* CONFIG_VSX */
721 case KVM_REG_PPC_VPA_ADDR:
722 spin_lock(&vcpu->arch.vpa_update_lock);
723 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
724 spin_unlock(&vcpu->arch.vpa_update_lock);
725 break;
726 case KVM_REG_PPC_VPA_SLB:
727 spin_lock(&vcpu->arch.vpa_update_lock);
728 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
729 val->vpaval.length = vcpu->arch.slb_shadow.len;
730 spin_unlock(&vcpu->arch.vpa_update_lock);
731 break;
732 case KVM_REG_PPC_VPA_DTL:
733 spin_lock(&vcpu->arch.vpa_update_lock);
734 val->vpaval.addr = vcpu->arch.dtl.next_gpa;
735 val->vpaval.length = vcpu->arch.dtl.len;
736 spin_unlock(&vcpu->arch.vpa_update_lock);
545 break; 737 break;
546 default: 738 default:
739 r = -EINVAL;
547 break; 740 break;
548 } 741 }
549 742
550 return r; 743 return r;
551} 744}
552 745
553int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 746int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
554{ 747{
555 int r = -EINVAL; 748 int r = 0;
749 long int i;
750 unsigned long addr, len;
556 751
557 switch (reg->id) { 752 switch (id) {
558 case KVM_REG_PPC_HIOR: 753 case KVM_REG_PPC_HIOR:
559 {
560 u64 hior;
561 /* Only allow this to be set to zero */ 754 /* Only allow this to be set to zero */
562 r = get_user(hior, (u64 __user *)reg->addr); 755 if (set_reg_val(id, *val))
563 if (!r && (hior != 0))
564 r = -EINVAL; 756 r = -EINVAL;
565 break; 757 break;
566 } 758 case KVM_REG_PPC_DABR:
759 vcpu->arch.dabr = set_reg_val(id, *val);
760 break;
761 case KVM_REG_PPC_DSCR:
762 vcpu->arch.dscr = set_reg_val(id, *val);
763 break;
764 case KVM_REG_PPC_PURR:
765 vcpu->arch.purr = set_reg_val(id, *val);
766 break;
767 case KVM_REG_PPC_SPURR:
768 vcpu->arch.spurr = set_reg_val(id, *val);
769 break;
770 case KVM_REG_PPC_AMR:
771 vcpu->arch.amr = set_reg_val(id, *val);
772 break;
773 case KVM_REG_PPC_UAMOR:
774 vcpu->arch.uamor = set_reg_val(id, *val);
775 break;
776 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
777 i = id - KVM_REG_PPC_MMCR0;
778 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
779 break;
780 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
781 i = id - KVM_REG_PPC_PMC1;
782 vcpu->arch.pmc[i] = set_reg_val(id, *val);
783 break;
784#ifdef CONFIG_VSX
785 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
786 if (cpu_has_feature(CPU_FTR_VSX)) {
787 /* VSX => FP reg i is stored in arch.vsr[2*i] */
788 long int i = id - KVM_REG_PPC_FPR0;
789 vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
790 } else {
791 /* let generic code handle it */
792 r = -EINVAL;
793 }
794 break;
795 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
796 if (cpu_has_feature(CPU_FTR_VSX)) {
797 long int i = id - KVM_REG_PPC_VSR0;
798 vcpu->arch.vsr[2 * i] = val->vsxval[0];
799 vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
800 } else {
801 r = -ENXIO;
802 }
803 break;
804#endif /* CONFIG_VSX */
805 case KVM_REG_PPC_VPA_ADDR:
806 addr = set_reg_val(id, *val);
807 r = -EINVAL;
808 if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
809 vcpu->arch.dtl.next_gpa))
810 break;
811 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
812 break;
813 case KVM_REG_PPC_VPA_SLB:
814 addr = val->vpaval.addr;
815 len = val->vpaval.length;
816 r = -EINVAL;
817 if (addr && !vcpu->arch.vpa.next_gpa)
818 break;
819 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
820 break;
821 case KVM_REG_PPC_VPA_DTL:
822 addr = val->vpaval.addr;
823 len = val->vpaval.length;
824 r = -EINVAL;
825 if (addr && (len < sizeof(struct dtl_entry) ||
826 !vcpu->arch.vpa.next_gpa))
827 break;
828 len -= len % sizeof(struct dtl_entry);
829 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
830 break;
567 default: 831 default:
832 r = -EINVAL;
568 break; 833 break;
569 } 834 }
570 835
@@ -599,20 +864,18 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
599 goto free_vcpu; 864 goto free_vcpu;
600 865
601 vcpu->arch.shared = &vcpu->arch.shregs; 866 vcpu->arch.shared = &vcpu->arch.shregs;
602 vcpu->arch.last_cpu = -1;
603 vcpu->arch.mmcr[0] = MMCR0_FC; 867 vcpu->arch.mmcr[0] = MMCR0_FC;
604 vcpu->arch.ctrl = CTRL_RUNLATCH; 868 vcpu->arch.ctrl = CTRL_RUNLATCH;
605 /* default to host PVR, since we can't spoof it */ 869 /* default to host PVR, since we can't spoof it */
606 vcpu->arch.pvr = mfspr(SPRN_PVR); 870 vcpu->arch.pvr = mfspr(SPRN_PVR);
607 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 871 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
608 spin_lock_init(&vcpu->arch.vpa_update_lock); 872 spin_lock_init(&vcpu->arch.vpa_update_lock);
873 spin_lock_init(&vcpu->arch.tbacct_lock);
874 vcpu->arch.busy_preempt = TB_NIL;
609 875
610 kvmppc_mmu_book3s_hv_init(vcpu); 876 kvmppc_mmu_book3s_hv_init(vcpu);
611 877
612 /* 878 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
613 * We consider the vcpu stopped until we see the first run ioctl for it.
614 */
615 vcpu->arch.state = KVMPPC_VCPU_STOPPED;
616 879
617 init_waitqueue_head(&vcpu->arch.cpu_run); 880 init_waitqueue_head(&vcpu->arch.cpu_run);
618 881
@@ -624,9 +887,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
624 INIT_LIST_HEAD(&vcore->runnable_threads); 887 INIT_LIST_HEAD(&vcore->runnable_threads);
625 spin_lock_init(&vcore->lock); 888 spin_lock_init(&vcore->lock);
626 init_waitqueue_head(&vcore->wq); 889 init_waitqueue_head(&vcore->wq);
627 vcore->preempt_tb = mftb(); 890 vcore->preempt_tb = TB_NIL;
628 } 891 }
629 kvm->arch.vcores[core] = vcore; 892 kvm->arch.vcores[core] = vcore;
893 kvm->arch.online_vcores++;
630 } 894 }
631 mutex_unlock(&kvm->lock); 895 mutex_unlock(&kvm->lock);
632 896
@@ -637,7 +901,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
637 ++vcore->num_threads; 901 ++vcore->num_threads;
638 spin_unlock(&vcore->lock); 902 spin_unlock(&vcore->lock);
639 vcpu->arch.vcore = vcore; 903 vcpu->arch.vcore = vcore;
640 vcpu->arch.stolen_logged = vcore->stolen_tb;
641 904
642 vcpu->arch.cpu_type = KVM_CPU_3S_64; 905 vcpu->arch.cpu_type = KVM_CPU_3S_64;
643 kvmppc_sanity_check(vcpu); 906 kvmppc_sanity_check(vcpu);
@@ -697,17 +960,18 @@ extern void xics_wake_cpu(int cpu);
697static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 960static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
698 struct kvm_vcpu *vcpu) 961 struct kvm_vcpu *vcpu)
699{ 962{
700 struct kvm_vcpu *v; 963 u64 now;
701 964
702 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 965 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
703 return; 966 return;
967 spin_lock(&vcpu->arch.tbacct_lock);
968 now = mftb();
969 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
970 vcpu->arch.stolen_logged;
971 vcpu->arch.busy_preempt = now;
704 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 972 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
973 spin_unlock(&vcpu->arch.tbacct_lock);
705 --vc->n_runnable; 974 --vc->n_runnable;
706 ++vc->n_busy;
707 /* decrement the physical thread id of each following vcpu */
708 v = vcpu;
709 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
710 --v->arch.ptid;
711 list_del(&vcpu->arch.run_list); 975 list_del(&vcpu->arch.run_list);
712} 976}
713 977
@@ -720,6 +984,7 @@ static int kvmppc_grab_hwthread(int cpu)
720 984
721 /* Ensure the thread won't go into the kernel if it wakes */ 985 /* Ensure the thread won't go into the kernel if it wakes */
722 tpaca->kvm_hstate.hwthread_req = 1; 986 tpaca->kvm_hstate.hwthread_req = 1;
987 tpaca->kvm_hstate.kvm_vcpu = NULL;
723 988
724 /* 989 /*
725 * If the thread is already executing in the kernel (e.g. handling 990 * If the thread is already executing in the kernel (e.g. handling
@@ -769,7 +1034,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
769 smp_wmb(); 1034 smp_wmb();
770#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 1035#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
771 if (vcpu->arch.ptid) { 1036 if (vcpu->arch.ptid) {
772 kvmppc_grab_hwthread(cpu);
773 xics_wake_cpu(cpu); 1037 xics_wake_cpu(cpu);
774 ++vc->n_woken; 1038 ++vc->n_woken;
775 } 1039 }
@@ -795,7 +1059,8 @@ static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
795 1059
796/* 1060/*
797 * Check that we are on thread 0 and that any other threads in 1061 * Check that we are on thread 0 and that any other threads in
798 * this core are off-line. 1062 * this core are off-line. Then grab the threads so they can't
1063 * enter the kernel.
799 */ 1064 */
800static int on_primary_thread(void) 1065static int on_primary_thread(void)
801{ 1066{
@@ -807,6 +1072,17 @@ static int on_primary_thread(void)
807 while (++thr < threads_per_core) 1072 while (++thr < threads_per_core)
808 if (cpu_online(cpu + thr)) 1073 if (cpu_online(cpu + thr))
809 return 0; 1074 return 0;
1075
1076 /* Grab all hw threads so they can't go into the kernel */
1077 for (thr = 1; thr < threads_per_core; ++thr) {
1078 if (kvmppc_grab_hwthread(cpu + thr)) {
1079 /* Couldn't grab one; let the others go */
1080 do {
1081 kvmppc_release_hwthread(cpu + thr);
1082 } while (--thr > 0);
1083 return 0;
1084 }
1085 }
810 return 1; 1086 return 1;
811} 1087}
812 1088
@@ -814,21 +1090,24 @@ static int on_primary_thread(void)
814 * Run a set of guest threads on a physical core. 1090 * Run a set of guest threads on a physical core.
815 * Called with vc->lock held. 1091 * Called with vc->lock held.
816 */ 1092 */
817static int kvmppc_run_core(struct kvmppc_vcore *vc) 1093static void kvmppc_run_core(struct kvmppc_vcore *vc)
818{ 1094{
819 struct kvm_vcpu *vcpu, *vcpu0, *vnext; 1095 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
820 long ret; 1096 long ret;
821 u64 now; 1097 u64 now;
822 int ptid, i, need_vpa_update; 1098 int ptid, i, need_vpa_update;
1099 int srcu_idx;
1100 struct kvm_vcpu *vcpus_to_update[threads_per_core];
823 1101
824 /* don't start if any threads have a signal pending */ 1102 /* don't start if any threads have a signal pending */
825 need_vpa_update = 0; 1103 need_vpa_update = 0;
826 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1104 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
827 if (signal_pending(vcpu->arch.run_task)) 1105 if (signal_pending(vcpu->arch.run_task))
828 return 0; 1106 return;
829 need_vpa_update |= vcpu->arch.vpa.update_pending | 1107 if (vcpu->arch.vpa.update_pending ||
830 vcpu->arch.slb_shadow.update_pending | 1108 vcpu->arch.slb_shadow.update_pending ||
831 vcpu->arch.dtl.update_pending; 1109 vcpu->arch.dtl.update_pending)
1110 vcpus_to_update[need_vpa_update++] = vcpu;
832 } 1111 }
833 1112
834 /* 1113 /*
@@ -838,7 +1117,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
838 vc->n_woken = 0; 1117 vc->n_woken = 0;
839 vc->nap_count = 0; 1118 vc->nap_count = 0;
840 vc->entry_exit_count = 0; 1119 vc->entry_exit_count = 0;
841 vc->vcore_state = VCORE_RUNNING; 1120 vc->vcore_state = VCORE_STARTING;
842 vc->in_guest = 0; 1121 vc->in_guest = 0;
843 vc->napping_threads = 0; 1122 vc->napping_threads = 0;
844 1123
@@ -848,24 +1127,12 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
848 */ 1127 */
849 if (need_vpa_update) { 1128 if (need_vpa_update) {
850 spin_unlock(&vc->lock); 1129 spin_unlock(&vc->lock);
851 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1130 for (i = 0; i < need_vpa_update; ++i)
852 kvmppc_update_vpas(vcpu); 1131 kvmppc_update_vpas(vcpus_to_update[i]);
853 spin_lock(&vc->lock); 1132 spin_lock(&vc->lock);
854 } 1133 }
855 1134
856 /* 1135 /*
857 * Make sure we are running on thread 0, and that
858 * secondary threads are offline.
859 * XXX we should also block attempts to bring any
860 * secondary threads online.
861 */
862 if (threads_per_core > 1 && !on_primary_thread()) {
863 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
864 vcpu->arch.ret = -EBUSY;
865 goto out;
866 }
867
868 /*
869 * Assign physical thread IDs, first to non-ceded vcpus 1136 * Assign physical thread IDs, first to non-ceded vcpus
870 * and then to ceded ones. 1137 * and then to ceded ones.
871 */ 1138 */
@@ -879,28 +1146,36 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
879 } 1146 }
880 } 1147 }
881 if (!vcpu0) 1148 if (!vcpu0)
882 return 0; /* nothing to run */ 1149 goto out; /* nothing to run; should never happen */
883 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1150 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
884 if (vcpu->arch.ceded) 1151 if (vcpu->arch.ceded)
885 vcpu->arch.ptid = ptid++; 1152 vcpu->arch.ptid = ptid++;
886 1153
887 vc->stolen_tb += mftb() - vc->preempt_tb; 1154 /*
1155 * Make sure we are running on thread 0, and that
1156 * secondary threads are offline.
1157 */
1158 if (threads_per_core > 1 && !on_primary_thread()) {
1159 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
1160 vcpu->arch.ret = -EBUSY;
1161 goto out;
1162 }
1163
888 vc->pcpu = smp_processor_id(); 1164 vc->pcpu = smp_processor_id();
889 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1165 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
890 kvmppc_start_thread(vcpu); 1166 kvmppc_start_thread(vcpu);
891 kvmppc_create_dtl_entry(vcpu, vc); 1167 kvmppc_create_dtl_entry(vcpu, vc);
892 } 1168 }
893 /* Grab any remaining hw threads so they can't go into the kernel */
894 for (i = ptid; i < threads_per_core; ++i)
895 kvmppc_grab_hwthread(vc->pcpu + i);
896 1169
1170 vc->vcore_state = VCORE_RUNNING;
897 preempt_disable(); 1171 preempt_disable();
898 spin_unlock(&vc->lock); 1172 spin_unlock(&vc->lock);
899 1173
900 kvm_guest_enter(); 1174 kvm_guest_enter();
1175
1176 srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
1177
901 __kvmppc_vcore_entry(NULL, vcpu0); 1178 __kvmppc_vcore_entry(NULL, vcpu0);
902 for (i = 0; i < threads_per_core; ++i)
903 kvmppc_release_hwthread(vc->pcpu + i);
904 1179
905 spin_lock(&vc->lock); 1180 spin_lock(&vc->lock);
906 /* disable sending of IPIs on virtual external irqs */ 1181 /* disable sending of IPIs on virtual external irqs */
@@ -909,10 +1184,14 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
909 /* wait for secondary threads to finish writing their state to memory */ 1184 /* wait for secondary threads to finish writing their state to memory */
910 if (vc->nap_count < vc->n_woken) 1185 if (vc->nap_count < vc->n_woken)
911 kvmppc_wait_for_nap(vc); 1186 kvmppc_wait_for_nap(vc);
1187 for (i = 0; i < threads_per_core; ++i)
1188 kvmppc_release_hwthread(vc->pcpu + i);
912 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 1189 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
913 vc->vcore_state = VCORE_EXITING; 1190 vc->vcore_state = VCORE_EXITING;
914 spin_unlock(&vc->lock); 1191 spin_unlock(&vc->lock);
915 1192
1193 srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
1194
916 /* make sure updates to secondary vcpu structs are visible now */ 1195 /* make sure updates to secondary vcpu structs are visible now */
917 smp_mb(); 1196 smp_mb();
918 kvm_guest_exit(); 1197 kvm_guest_exit();
@@ -920,6 +1199,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
920 preempt_enable(); 1199 preempt_enable();
921 kvm_resched(vcpu); 1200 kvm_resched(vcpu);
922 1201
1202 spin_lock(&vc->lock);
923 now = get_tb(); 1203 now = get_tb();
924 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1204 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
925 /* cancel pending dec exception if dec is positive */ 1205 /* cancel pending dec exception if dec is positive */
@@ -943,10 +1223,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
943 } 1223 }
944 } 1224 }
945 1225
946 spin_lock(&vc->lock);
947 out: 1226 out:
948 vc->vcore_state = VCORE_INACTIVE; 1227 vc->vcore_state = VCORE_INACTIVE;
949 vc->preempt_tb = mftb();
950 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 1228 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
951 arch.run_list) { 1229 arch.run_list) {
952 if (vcpu->arch.ret != RESUME_GUEST) { 1230 if (vcpu->arch.ret != RESUME_GUEST) {
@@ -954,8 +1232,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
954 wake_up(&vcpu->arch.cpu_run); 1232 wake_up(&vcpu->arch.cpu_run);
955 } 1233 }
956 } 1234 }
957
958 return 1;
959} 1235}
960 1236
961/* 1237/*
@@ -979,20 +1255,11 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
979static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 1255static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
980{ 1256{
981 DEFINE_WAIT(wait); 1257 DEFINE_WAIT(wait);
982 struct kvm_vcpu *v;
983 int all_idle = 1;
984 1258
985 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 1259 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
986 vc->vcore_state = VCORE_SLEEPING; 1260 vc->vcore_state = VCORE_SLEEPING;
987 spin_unlock(&vc->lock); 1261 spin_unlock(&vc->lock);
988 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { 1262 schedule();
989 if (!v->arch.ceded || v->arch.pending_exceptions) {
990 all_idle = 0;
991 break;
992 }
993 }
994 if (all_idle)
995 schedule();
996 finish_wait(&vc->wq, &wait); 1263 finish_wait(&vc->wq, &wait);
997 spin_lock(&vc->lock); 1264 spin_lock(&vc->lock);
998 vc->vcore_state = VCORE_INACTIVE; 1265 vc->vcore_state = VCORE_INACTIVE;
@@ -1001,13 +1268,13 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
1001static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1268static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1002{ 1269{
1003 int n_ceded; 1270 int n_ceded;
1004 int prev_state;
1005 struct kvmppc_vcore *vc; 1271 struct kvmppc_vcore *vc;
1006 struct kvm_vcpu *v, *vn; 1272 struct kvm_vcpu *v, *vn;
1007 1273
1008 kvm_run->exit_reason = 0; 1274 kvm_run->exit_reason = 0;
1009 vcpu->arch.ret = RESUME_GUEST; 1275 vcpu->arch.ret = RESUME_GUEST;
1010 vcpu->arch.trap = 0; 1276 vcpu->arch.trap = 0;
1277 kvmppc_update_vpas(vcpu);
1011 1278
1012 /* 1279 /*
1013 * Synchronize with other threads in this virtual core 1280 * Synchronize with other threads in this virtual core
@@ -1017,8 +1284,9 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1017 vcpu->arch.ceded = 0; 1284 vcpu->arch.ceded = 0;
1018 vcpu->arch.run_task = current; 1285 vcpu->arch.run_task = current;
1019 vcpu->arch.kvm_run = kvm_run; 1286 vcpu->arch.kvm_run = kvm_run;
1020 prev_state = vcpu->arch.state; 1287 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
1021 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; 1288 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
1289 vcpu->arch.busy_preempt = TB_NIL;
1022 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 1290 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
1023 ++vc->n_runnable; 1291 ++vc->n_runnable;
1024 1292
@@ -1027,33 +1295,26 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1027 * If the vcore is already running, we may be able to start 1295 * If the vcore is already running, we may be able to start
1028 * this thread straight away and have it join in. 1296 * this thread straight away and have it join in.
1029 */ 1297 */
1030 if (prev_state == KVMPPC_VCPU_STOPPED) { 1298 if (!signal_pending(current)) {
1031 if (vc->vcore_state == VCORE_RUNNING && 1299 if (vc->vcore_state == VCORE_RUNNING &&
1032 VCORE_EXIT_COUNT(vc) == 0) { 1300 VCORE_EXIT_COUNT(vc) == 0) {
1033 vcpu->arch.ptid = vc->n_runnable - 1; 1301 vcpu->arch.ptid = vc->n_runnable - 1;
1302 kvmppc_create_dtl_entry(vcpu, vc);
1034 kvmppc_start_thread(vcpu); 1303 kvmppc_start_thread(vcpu);
1304 } else if (vc->vcore_state == VCORE_SLEEPING) {
1305 wake_up(&vc->wq);
1035 } 1306 }
1036 1307
1037 } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST) 1308 }
1038 --vc->n_busy;
1039 1309
1040 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 1310 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
1041 !signal_pending(current)) { 1311 !signal_pending(current)) {
1042 if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) { 1312 if (vc->vcore_state != VCORE_INACTIVE) {
1043 spin_unlock(&vc->lock); 1313 spin_unlock(&vc->lock);
1044 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); 1314 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
1045 spin_lock(&vc->lock); 1315 spin_lock(&vc->lock);
1046 continue; 1316 continue;
1047 } 1317 }
1048 vc->runner = vcpu;
1049 n_ceded = 0;
1050 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
1051 n_ceded += v->arch.ceded;
1052 if (n_ceded == vc->n_runnable)
1053 kvmppc_vcore_blocked(vc);
1054 else
1055 kvmppc_run_core(vc);
1056
1057 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 1318 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
1058 arch.run_list) { 1319 arch.run_list) {
1059 kvmppc_core_prepare_to_enter(v); 1320 kvmppc_core_prepare_to_enter(v);
@@ -1065,22 +1326,40 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1065 wake_up(&v->arch.cpu_run); 1326 wake_up(&v->arch.cpu_run);
1066 } 1327 }
1067 } 1328 }
1329 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
1330 break;
1331 vc->runner = vcpu;
1332 n_ceded = 0;
1333 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
1334 if (!v->arch.pending_exceptions)
1335 n_ceded += v->arch.ceded;
1336 if (n_ceded == vc->n_runnable)
1337 kvmppc_vcore_blocked(vc);
1338 else
1339 kvmppc_run_core(vc);
1068 vc->runner = NULL; 1340 vc->runner = NULL;
1069 } 1341 }
1070 1342
1071 if (signal_pending(current)) { 1343 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
1072 if (vc->vcore_state == VCORE_RUNNING || 1344 (vc->vcore_state == VCORE_RUNNING ||
1073 vc->vcore_state == VCORE_EXITING) { 1345 vc->vcore_state == VCORE_EXITING)) {
1074 spin_unlock(&vc->lock); 1346 spin_unlock(&vc->lock);
1075 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); 1347 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
1076 spin_lock(&vc->lock); 1348 spin_lock(&vc->lock);
1077 } 1349 }
1078 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 1350
1079 kvmppc_remove_runnable(vc, vcpu); 1351 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
1080 vcpu->stat.signal_exits++; 1352 kvmppc_remove_runnable(vc, vcpu);
1081 kvm_run->exit_reason = KVM_EXIT_INTR; 1353 vcpu->stat.signal_exits++;
1082 vcpu->arch.ret = -EINTR; 1354 kvm_run->exit_reason = KVM_EXIT_INTR;
1083 } 1355 vcpu->arch.ret = -EINTR;
1356 }
1357
1358 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
1359 /* Wake up some vcpu to run the core */
1360 v = list_first_entry(&vc->runnable_threads,
1361 struct kvm_vcpu, arch.run_list);
1362 wake_up(&v->arch.cpu_run);
1084 } 1363 }
1085 1364
1086 spin_unlock(&vc->lock); 1365 spin_unlock(&vc->lock);
@@ -1090,6 +1369,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1090int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 1369int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1091{ 1370{
1092 int r; 1371 int r;
1372 int srcu_idx;
1093 1373
1094 if (!vcpu->arch.sane) { 1374 if (!vcpu->arch.sane) {
1095 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1375 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -1120,6 +1400,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1120 flush_vsx_to_thread(current); 1400 flush_vsx_to_thread(current);
1121 vcpu->arch.wqp = &vcpu->arch.vcore->wq; 1401 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
1122 vcpu->arch.pgdir = current->mm->pgd; 1402 vcpu->arch.pgdir = current->mm->pgd;
1403 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
1123 1404
1124 do { 1405 do {
1125 r = kvmppc_run_vcpu(run, vcpu); 1406 r = kvmppc_run_vcpu(run, vcpu);
@@ -1128,10 +1409,16 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1128 !(vcpu->arch.shregs.msr & MSR_PR)) { 1409 !(vcpu->arch.shregs.msr & MSR_PR)) {
1129 r = kvmppc_pseries_do_hcall(vcpu); 1410 r = kvmppc_pseries_do_hcall(vcpu);
1130 kvmppc_core_prepare_to_enter(vcpu); 1411 kvmppc_core_prepare_to_enter(vcpu);
1412 } else if (r == RESUME_PAGE_FAULT) {
1413 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1414 r = kvmppc_book3s_hv_page_fault(run, vcpu,
1415 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
1416 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1131 } 1417 }
1132 } while (r == RESUME_GUEST); 1418 } while (r == RESUME_GUEST);
1133 1419
1134 out: 1420 out:
1421 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
1135 atomic_dec(&vcpu->kvm->arch.vcpus_running); 1422 atomic_dec(&vcpu->kvm->arch.vcpus_running);
1136 return r; 1423 return r;
1137} 1424}
@@ -1273,7 +1560,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1273 n = kvm_dirty_bitmap_bytes(memslot); 1560 n = kvm_dirty_bitmap_bytes(memslot);
1274 memset(memslot->dirty_bitmap, 0, n); 1561 memset(memslot->dirty_bitmap, 0, n);
1275 1562
1276 r = kvmppc_hv_get_dirty_log(kvm, memslot); 1563 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
1277 if (r) 1564 if (r)
1278 goto out; 1565 goto out;
1279 1566
@@ -1287,67 +1574,88 @@ out:
1287 return r; 1574 return r;
1288} 1575}
1289 1576
1290static unsigned long slb_pgsize_encoding(unsigned long psize) 1577static void unpin_slot(struct kvm_memory_slot *memslot)
1291{ 1578{
1292 unsigned long senc = 0; 1579 unsigned long *physp;
1580 unsigned long j, npages, pfn;
1581 struct page *page;
1293 1582
1294 if (psize > 0x1000) { 1583 physp = memslot->arch.slot_phys;
1295 senc = SLB_VSID_L; 1584 npages = memslot->npages;
1296 if (psize == 0x10000) 1585 if (!physp)
1297 senc |= SLB_VSID_LP_01; 1586 return;
1587 for (j = 0; j < npages; j++) {
1588 if (!(physp[j] & KVMPPC_GOT_PAGE))
1589 continue;
1590 pfn = physp[j] >> PAGE_SHIFT;
1591 page = pfn_to_page(pfn);
1592 SetPageDirty(page);
1593 put_page(page);
1594 }
1595}
1596
1597void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1598 struct kvm_memory_slot *dont)
1599{
1600 if (!dont || free->arch.rmap != dont->arch.rmap) {
1601 vfree(free->arch.rmap);
1602 free->arch.rmap = NULL;
1603 }
1604 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
1605 unpin_slot(free);
1606 vfree(free->arch.slot_phys);
1607 free->arch.slot_phys = NULL;
1298 } 1608 }
1299 return senc; 1609}
1610
1611int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1612 unsigned long npages)
1613{
1614 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
1615 if (!slot->arch.rmap)
1616 return -ENOMEM;
1617 slot->arch.slot_phys = NULL;
1618
1619 return 0;
1300} 1620}
1301 1621
1302int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1622int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1303 struct kvm_userspace_memory_region *mem) 1623 struct kvm_memory_slot *memslot,
1624 struct kvm_userspace_memory_region *mem)
1304{ 1625{
1305 unsigned long npages;
1306 unsigned long *phys; 1626 unsigned long *phys;
1307 1627
1308 /* Allocate a slot_phys array */ 1628 /* Allocate a slot_phys array if needed */
1309 phys = kvm->arch.slot_phys[mem->slot]; 1629 phys = memslot->arch.slot_phys;
1310 if (!kvm->arch.using_mmu_notifiers && !phys) { 1630 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
1311 npages = mem->memory_size >> PAGE_SHIFT; 1631 phys = vzalloc(memslot->npages * sizeof(unsigned long));
1312 phys = vzalloc(npages * sizeof(unsigned long));
1313 if (!phys) 1632 if (!phys)
1314 return -ENOMEM; 1633 return -ENOMEM;
1315 kvm->arch.slot_phys[mem->slot] = phys; 1634 memslot->arch.slot_phys = phys;
1316 kvm->arch.slot_npages[mem->slot] = npages;
1317 } 1635 }
1318 1636
1319 return 0; 1637 return 0;
1320} 1638}
1321 1639
1322static void unpin_slot(struct kvm *kvm, int slot_id) 1640void kvmppc_core_commit_memory_region(struct kvm *kvm,
1641 struct kvm_userspace_memory_region *mem,
1642 struct kvm_memory_slot old)
1323{ 1643{
1324 unsigned long *physp; 1644 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
1325 unsigned long j, npages, pfn; 1645 struct kvm_memory_slot *memslot;
1326 struct page *page;
1327 1646
1328 physp = kvm->arch.slot_phys[slot_id]; 1647 if (npages && old.npages) {
1329 npages = kvm->arch.slot_npages[slot_id]; 1648 /*
1330 if (physp) { 1649 * If modifying a memslot, reset all the rmap dirty bits.
1331 spin_lock(&kvm->arch.slot_phys_lock); 1650 * If this is a new memslot, we don't need to do anything
1332 for (j = 0; j < npages; j++) { 1651 * since the rmap array starts out as all zeroes,
1333 if (!(physp[j] & KVMPPC_GOT_PAGE)) 1652 * i.e. no pages are dirty.
1334 continue; 1653 */
1335 pfn = physp[j] >> PAGE_SHIFT; 1654 memslot = id_to_memslot(kvm->memslots, mem->slot);
1336 page = pfn_to_page(pfn); 1655 kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
1337 SetPageDirty(page);
1338 put_page(page);
1339 }
1340 kvm->arch.slot_phys[slot_id] = NULL;
1341 spin_unlock(&kvm->arch.slot_phys_lock);
1342 vfree(physp);
1343 } 1656 }
1344} 1657}
1345 1658
1346void kvmppc_core_commit_memory_region(struct kvm *kvm,
1347 struct kvm_userspace_memory_region *mem)
1348{
1349}
1350
1351static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 1659static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1352{ 1660{
1353 int err = 0; 1661 int err = 0;
@@ -1362,6 +1670,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1362 unsigned long rmls; 1670 unsigned long rmls;
1363 unsigned long *physp; 1671 unsigned long *physp;
1364 unsigned long i, npages; 1672 unsigned long i, npages;
1673 int srcu_idx;
1365 1674
1366 mutex_lock(&kvm->lock); 1675 mutex_lock(&kvm->lock);
1367 if (kvm->arch.rma_setup_done) 1676 if (kvm->arch.rma_setup_done)
@@ -1377,12 +1686,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1377 } 1686 }
1378 1687
1379 /* Look up the memslot for guest physical address 0 */ 1688 /* Look up the memslot for guest physical address 0 */
1689 srcu_idx = srcu_read_lock(&kvm->srcu);
1380 memslot = gfn_to_memslot(kvm, 0); 1690 memslot = gfn_to_memslot(kvm, 0);
1381 1691
1382 /* We must have some memory at 0 by now */ 1692 /* We must have some memory at 0 by now */
1383 err = -EINVAL; 1693 err = -EINVAL;
1384 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1694 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1385 goto out; 1695 goto out_srcu;
1386 1696
1387 /* Look up the VMA for the start of this memory slot */ 1697 /* Look up the VMA for the start of this memory slot */
1388 hva = memslot->userspace_addr; 1698 hva = memslot->userspace_addr;
@@ -1406,14 +1716,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1406 err = -EPERM; 1716 err = -EPERM;
1407 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1717 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1408 pr_err("KVM: CPU requires an RMO\n"); 1718 pr_err("KVM: CPU requires an RMO\n");
1409 goto out; 1719 goto out_srcu;
1410 } 1720 }
1411 1721
1412 /* We can handle 4k, 64k or 16M pages in the VRMA */ 1722 /* We can handle 4k, 64k or 16M pages in the VRMA */
1413 err = -EINVAL; 1723 err = -EINVAL;
1414 if (!(psize == 0x1000 || psize == 0x10000 || 1724 if (!(psize == 0x1000 || psize == 0x10000 ||
1415 psize == 0x1000000)) 1725 psize == 0x1000000))
1416 goto out; 1726 goto out_srcu;
1417 1727
1418 /* Update VRMASD field in the LPCR */ 1728 /* Update VRMASD field in the LPCR */
1419 senc = slb_pgsize_encoding(psize); 1729 senc = slb_pgsize_encoding(psize);
@@ -1436,7 +1746,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1436 err = -EINVAL; 1746 err = -EINVAL;
1437 if (rmls < 0) { 1747 if (rmls < 0) {
1438 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); 1748 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
1439 goto out; 1749 goto out_srcu;
1440 } 1750 }
1441 atomic_inc(&ri->use_count); 1751 atomic_inc(&ri->use_count);
1442 kvm->arch.rma = ri; 1752 kvm->arch.rma = ri;
@@ -1465,17 +1775,24 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1465 /* Initialize phys addrs of pages in RMO */ 1775 /* Initialize phys addrs of pages in RMO */
1466 npages = ri->npages; 1776 npages = ri->npages;
1467 porder = __ilog2(npages); 1777 porder = __ilog2(npages);
1468 physp = kvm->arch.slot_phys[memslot->id]; 1778 physp = memslot->arch.slot_phys;
1469 spin_lock(&kvm->arch.slot_phys_lock); 1779 if (physp) {
1470 for (i = 0; i < npages; ++i) 1780 if (npages > memslot->npages)
1471 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; 1781 npages = memslot->npages;
1472 spin_unlock(&kvm->arch.slot_phys_lock); 1782 spin_lock(&kvm->arch.slot_phys_lock);
1783 for (i = 0; i < npages; ++i)
1784 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
1785 porder;
1786 spin_unlock(&kvm->arch.slot_phys_lock);
1787 }
1473 } 1788 }
1474 1789
1475 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1790 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
1476 smp_wmb(); 1791 smp_wmb();
1477 kvm->arch.rma_setup_done = 1; 1792 kvm->arch.rma_setup_done = 1;
1478 err = 0; 1793 err = 0;
1794 out_srcu:
1795 srcu_read_unlock(&kvm->srcu, srcu_idx);
1479 out: 1796 out:
1480 mutex_unlock(&kvm->lock); 1797 mutex_unlock(&kvm->lock);
1481 return err; 1798 return err;
@@ -1496,6 +1813,13 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1496 return -ENOMEM; 1813 return -ENOMEM;
1497 kvm->arch.lpid = lpid; 1814 kvm->arch.lpid = lpid;
1498 1815
1816 /*
1817 * Since we don't flush the TLB when tearing down a VM,
1818 * and this lpid might have previously been used,
1819 * make sure we flush on each core before running the new VM.
1820 */
1821 cpumask_setall(&kvm->arch.need_tlb_flush);
1822
1499 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1823 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1500 1824
1501 kvm->arch.rma = NULL; 1825 kvm->arch.rma = NULL;
@@ -1523,16 +1847,19 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1523 1847
1524 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); 1848 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
1525 spin_lock_init(&kvm->arch.slot_phys_lock); 1849 spin_lock_init(&kvm->arch.slot_phys_lock);
1850
1851 /*
1852 * Don't allow secondary CPU threads to come online
1853 * while any KVM VMs exist.
1854 */
1855 inhibit_secondary_onlining();
1856
1526 return 0; 1857 return 0;
1527} 1858}
1528 1859
1529void kvmppc_core_destroy_vm(struct kvm *kvm) 1860void kvmppc_core_destroy_vm(struct kvm *kvm)
1530{ 1861{
1531 unsigned long i; 1862 uninhibit_secondary_onlining();
1532
1533 if (!kvm->arch.using_mmu_notifiers)
1534 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
1535 unpin_slot(kvm, i);
1536 1863
1537 if (kvm->arch.rma) { 1864 if (kvm->arch.rma) {
1538 kvm_release_rma(kvm->arch.rma); 1865 kvm_release_rma(kvm->arch.rma);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fb4eac290fef..ec0a9e5de100 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type)
157 linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); 157 linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
158 for (i = 0; i < count; ++i) { 158 for (i = 0; i < count; ++i) {
159 linear = alloc_bootmem_align(size, size); 159 linear = alloc_bootmem_align(size, size);
160 pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, 160 pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
161 size >> 20); 161 size >> 20);
162 linear_info[i].base_virt = linear; 162 linear_info[i].base_virt = linear;
163 linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; 163 linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
164 linear_info[i].npages = npages; 164 linear_info[i].npages = npages;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
new file mode 100644
index 000000000000..35f3cf0269b3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -0,0 +1,144 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
7 */
8
9#include <linux/types.h>
10#include <linux/string.h>
11#include <linux/kvm.h>
12#include <linux/kvm_host.h>
13#include <linux/kernel.h>
14#include <asm/opal.h>
15
16/* SRR1 bits for machine check on POWER7 */
17#define SRR1_MC_LDSTERR (1ul << (63-42))
18#define SRR1_MC_IFETCH_SH (63-45)
19#define SRR1_MC_IFETCH_MASK 0x7
20#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */
21#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */
22#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */
23#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */
24
25/* DSISR bits for machine check on POWER7 */
26#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */
27#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */
28#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */
29#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */
30#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */
31
32/* POWER7 SLB flush and reload */
33static void reload_slb(struct kvm_vcpu *vcpu)
34{
35 struct slb_shadow *slb;
36 unsigned long i, n;
37
38 /* First clear out SLB */
39 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
40
41 /* Do they have an SLB shadow buffer registered? */
42 slb = vcpu->arch.slb_shadow.pinned_addr;
43 if (!slb)
44 return;
45
46 /* Sanity check */
47 n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
48 if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
49 return;
50
51 /* Load up the SLB from that */
52 for (i = 0; i < n; ++i) {
53 unsigned long rb = slb->save_area[i].esid;
54 unsigned long rs = slb->save_area[i].vsid;
55
56 rb = (rb & ~0xFFFul) | i; /* insert entry number */
57 asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
58 }
59}
60
61/* POWER7 TLB flush */
62static void flush_tlb_power7(struct kvm_vcpu *vcpu)
63{
64 unsigned long i, rb;
65
66 rb = TLBIEL_INVAL_SET_LPID;
67 for (i = 0; i < POWER7_TLB_SETS; ++i) {
68 asm volatile("tlbiel %0" : : "r" (rb));
69 rb += 1 << TLBIEL_INVAL_SET_SHIFT;
70 }
71}
72
73/*
74 * On POWER7, see if we can handle a machine check that occurred inside
75 * the guest in real mode, without switching to the host partition.
76 *
77 * Returns: 0 => exit guest, 1 => deliver machine check to guest
78 */
79static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
80{
81 unsigned long srr1 = vcpu->arch.shregs.msr;
82 struct opal_machine_check_event *opal_evt;
83 long handled = 1;
84
85 if (srr1 & SRR1_MC_LDSTERR) {
86 /* error on load/store */
87 unsigned long dsisr = vcpu->arch.shregs.dsisr;
88
89 if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
90 DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
91 /* flush and reload SLB; flushes D-ERAT too */
92 reload_slb(vcpu);
93 dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
94 DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
95 }
96 if (dsisr & DSISR_MC_TLB_MULTI) {
97 flush_tlb_power7(vcpu);
98 dsisr &= ~DSISR_MC_TLB_MULTI;
99 }
100 /* Any other errors we don't understand? */
101 if (dsisr & 0xffffffffUL)
102 handled = 0;
103 }
104
105 switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
106 case 0:
107 break;
108 case SRR1_MC_IFETCH_SLBPAR:
109 case SRR1_MC_IFETCH_SLBMULTI:
110 case SRR1_MC_IFETCH_SLBPARMULTI:
111 reload_slb(vcpu);
112 break;
113 case SRR1_MC_IFETCH_TLBMULTI:
114 flush_tlb_power7(vcpu);
115 break;
116 default:
117 handled = 0;
118 }
119
120 /*
121 * See if OPAL has already handled the condition.
122 * We assume that if the condition is recovered then OPAL
123 * will have generated an error log event that we will pick
124 * up and log later.
125 */
126 opal_evt = local_paca->opal_mc_evt;
127 if (opal_evt->version == OpalMCE_V1 &&
128 (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
129 opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
130 handled = 1;
131
132 if (handled)
133 opal_evt->in_use = 0;
134
135 return handled;
136}
137
138long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
139{
140 if (cpu_has_feature(CPU_FTR_ARCH_206))
141 return kvmppc_realmode_mc_power7(vcpu);
142
143 return 0;
144}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fb0e821622d4..19c93bae1aea 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x)
35 return __va(addr); 35 return __va(addr);
36} 36}
37 37
38/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
39static int global_invalidates(struct kvm *kvm, unsigned long flags)
40{
41 int global;
42
43 /*
44 * If there is only one vcore, and it's currently running,
45 * we can use tlbiel as long as we mark all other physical
46 * cores as potentially having stale TLB entries for this lpid.
47 * If we're not using MMU notifiers, we never take pages away
48 * from the guest, so we can use tlbiel if requested.
49 * Otherwise, don't use tlbiel.
50 */
51 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
52 global = 0;
53 else if (kvm->arch.using_mmu_notifiers)
54 global = 1;
55 else
56 global = !(flags & H_LOCAL);
57
58 if (!global) {
59 /* any other core might now have stale TLB entries... */
60 smp_wmb();
61 cpumask_setall(&kvm->arch.need_tlb_flush);
62 cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
63 &kvm->arch.need_tlb_flush);
64 }
65
66 return global;
67}
68
38/* 69/*
39 * Add this HPTE into the chain for the real page. 70 * Add this HPTE into the chain for the real page.
40 * Must be called with the chain locked; it unlocks the chain. 71 * Must be called with the chain locked; it unlocks the chain.
@@ -59,13 +90,24 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
59 head->back = pte_index; 90 head->back = pte_index;
60 } else { 91 } else {
61 rev->forw = rev->back = pte_index; 92 rev->forw = rev->back = pte_index;
62 i = pte_index; 93 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
94 pte_index | KVMPPC_RMAP_PRESENT;
63 } 95 }
64 smp_wmb(); 96 unlock_rmap(rmap);
65 *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
66} 97}
67EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
68 99
100/*
101 * Note modification of an HPTE; set the HPTE modified bit
102 * if anyone is interested.
103 */
104static inline void note_hpte_modification(struct kvm *kvm,
105 struct revmap_entry *rev)
106{
107 if (atomic_read(&kvm->arch.hpte_mod_interest))
108 rev->guest_rpte |= HPTE_GR_MODIFIED;
109}
110
69/* Remove this HPTE from the chain for a real page */ 111/* Remove this HPTE from the chain for a real page */
70static void remove_revmap_chain(struct kvm *kvm, long pte_index, 112static void remove_revmap_chain(struct kvm *kvm, long pte_index,
71 struct revmap_entry *rev, 113 struct revmap_entry *rev,
@@ -81,7 +123,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
81 ptel = rev->guest_rpte |= rcbits; 123 ptel = rev->guest_rpte |= rcbits;
82 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); 124 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
83 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); 125 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
84 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 126 if (!memslot)
85 return; 127 return;
86 128
87 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 129 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
@@ -103,14 +145,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
103 unlock_rmap(rmap); 145 unlock_rmap(rmap);
104} 146}
105 147
106static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, 148static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
107 int writing, unsigned long *pte_sizep) 149 int writing, unsigned long *pte_sizep)
108{ 150{
109 pte_t *ptep; 151 pte_t *ptep;
110 unsigned long ps = *pte_sizep; 152 unsigned long ps = *pte_sizep;
111 unsigned int shift; 153 unsigned int shift;
112 154
113 ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift); 155 ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
114 if (!ptep) 156 if (!ptep)
115 return __pte(0); 157 return __pte(0);
116 if (shift) 158 if (shift)
@@ -130,15 +172,15 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
130 hpte[0] = hpte_v; 172 hpte[0] = hpte_v;
131} 173}
132 174
133long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 175long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
134 long pte_index, unsigned long pteh, unsigned long ptel) 176 long pte_index, unsigned long pteh, unsigned long ptel,
177 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
135{ 178{
136 struct kvm *kvm = vcpu->kvm;
137 unsigned long i, pa, gpa, gfn, psize; 179 unsigned long i, pa, gpa, gfn, psize;
138 unsigned long slot_fn, hva; 180 unsigned long slot_fn, hva;
139 unsigned long *hpte; 181 unsigned long *hpte;
140 struct revmap_entry *rev; 182 struct revmap_entry *rev;
141 unsigned long g_ptel = ptel; 183 unsigned long g_ptel;
142 struct kvm_memory_slot *memslot; 184 struct kvm_memory_slot *memslot;
143 unsigned long *physp, pte_size; 185 unsigned long *physp, pte_size;
144 unsigned long is_io; 186 unsigned long is_io;
@@ -147,13 +189,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
147 unsigned int writing; 189 unsigned int writing;
148 unsigned long mmu_seq; 190 unsigned long mmu_seq;
149 unsigned long rcbits; 191 unsigned long rcbits;
150 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
151 192
152 psize = hpte_page_size(pteh, ptel); 193 psize = hpte_page_size(pteh, ptel);
153 if (!psize) 194 if (!psize)
154 return H_PARAMETER; 195 return H_PARAMETER;
155 writing = hpte_is_writable(ptel); 196 writing = hpte_is_writable(ptel);
156 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 197 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
198 ptel &= ~HPTE_GR_RESERVED;
199 g_ptel = ptel;
157 200
158 /* used later to detect if we might have been invalidated */ 201 /* used later to detect if we might have been invalidated */
159 mmu_seq = kvm->mmu_notifier_seq; 202 mmu_seq = kvm->mmu_notifier_seq;
@@ -183,7 +226,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
183 rmap = &memslot->arch.rmap[slot_fn]; 226 rmap = &memslot->arch.rmap[slot_fn];
184 227
185 if (!kvm->arch.using_mmu_notifiers) { 228 if (!kvm->arch.using_mmu_notifiers) {
186 physp = kvm->arch.slot_phys[memslot->id]; 229 physp = memslot->arch.slot_phys;
187 if (!physp) 230 if (!physp)
188 return H_PARAMETER; 231 return H_PARAMETER;
189 physp += slot_fn; 232 physp += slot_fn;
@@ -201,7 +244,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
201 244
202 /* Look up the Linux PTE for the backing page */ 245 /* Look up the Linux PTE for the backing page */
203 pte_size = psize; 246 pte_size = psize;
204 pte = lookup_linux_pte(vcpu, hva, writing, &pte_size); 247 pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
205 if (pte_present(pte)) { 248 if (pte_present(pte)) {
206 if (writing && !pte_write(pte)) 249 if (writing && !pte_write(pte))
207 /* make the actual HPTE be read-only */ 250 /* make the actual HPTE be read-only */
@@ -210,6 +253,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
210 pa = pte_pfn(pte) << PAGE_SHIFT; 253 pa = pte_pfn(pte) << PAGE_SHIFT;
211 } 254 }
212 } 255 }
256
213 if (pte_size < psize) 257 if (pte_size < psize)
214 return H_PARAMETER; 258 return H_PARAMETER;
215 if (pa && pte_size > psize) 259 if (pa && pte_size > psize)
@@ -287,8 +331,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
287 rev = &kvm->arch.revmap[pte_index]; 331 rev = &kvm->arch.revmap[pte_index];
288 if (realmode) 332 if (realmode)
289 rev = real_vmalloc_addr(rev); 333 rev = real_vmalloc_addr(rev);
290 if (rev) 334 if (rev) {
291 rev->guest_rpte = g_ptel; 335 rev->guest_rpte = g_ptel;
336 note_hpte_modification(kvm, rev);
337 }
292 338
293 /* Link HPTE into reverse-map chain */ 339 /* Link HPTE into reverse-map chain */
294 if (pteh & HPTE_V_VALID) { 340 if (pteh & HPTE_V_VALID) {
@@ -297,7 +343,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
297 lock_rmap(rmap); 343 lock_rmap(rmap);
298 /* Check for pending invalidations under the rmap chain lock */ 344 /* Check for pending invalidations under the rmap chain lock */
299 if (kvm->arch.using_mmu_notifiers && 345 if (kvm->arch.using_mmu_notifiers &&
300 mmu_notifier_retry(vcpu, mmu_seq)) { 346 mmu_notifier_retry(kvm, mmu_seq)) {
301 /* inval in progress, write a non-present HPTE */ 347 /* inval in progress, write a non-present HPTE */
302 pteh |= HPTE_V_ABSENT; 348 pteh |= HPTE_V_ABSENT;
303 pteh &= ~HPTE_V_VALID; 349 pteh &= ~HPTE_V_VALID;
@@ -318,10 +364,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
318 hpte[0] = pteh; 364 hpte[0] = pteh;
319 asm volatile("ptesync" : : : "memory"); 365 asm volatile("ptesync" : : : "memory");
320 366
321 vcpu->arch.gpr[4] = pte_index; 367 *pte_idx_ret = pte_index;
322 return H_SUCCESS; 368 return H_SUCCESS;
323} 369}
324EXPORT_SYMBOL_GPL(kvmppc_h_enter); 370EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
371
372long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
373 long pte_index, unsigned long pteh, unsigned long ptel)
374{
375 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
376 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
377}
325 378
326#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 379#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
327 380
@@ -343,11 +396,10 @@ static inline int try_lock_tlbie(unsigned int *lock)
343 return old == 0; 396 return old == 0;
344} 397}
345 398
346long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 399long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
347 unsigned long pte_index, unsigned long avpn, 400 unsigned long pte_index, unsigned long avpn,
348 unsigned long va) 401 unsigned long *hpret)
349{ 402{
350 struct kvm *kvm = vcpu->kvm;
351 unsigned long *hpte; 403 unsigned long *hpte;
352 unsigned long v, r, rb; 404 unsigned long v, r, rb;
353 struct revmap_entry *rev; 405 struct revmap_entry *rev;
@@ -369,7 +421,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
369 if (v & HPTE_V_VALID) { 421 if (v & HPTE_V_VALID) {
370 hpte[0] &= ~HPTE_V_VALID; 422 hpte[0] &= ~HPTE_V_VALID;
371 rb = compute_tlbie_rb(v, hpte[1], pte_index); 423 rb = compute_tlbie_rb(v, hpte[1], pte_index);
372 if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) { 424 if (global_invalidates(kvm, flags)) {
373 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 425 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
374 cpu_relax(); 426 cpu_relax();
375 asm volatile("ptesync" : : : "memory"); 427 asm volatile("ptesync" : : : "memory");
@@ -385,13 +437,22 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
385 /* Read PTE low word after tlbie to get final R/C values */ 437 /* Read PTE low word after tlbie to get final R/C values */
386 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); 438 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
387 } 439 }
388 r = rev->guest_rpte; 440 r = rev->guest_rpte & ~HPTE_GR_RESERVED;
441 note_hpte_modification(kvm, rev);
389 unlock_hpte(hpte, 0); 442 unlock_hpte(hpte, 0);
390 443
391 vcpu->arch.gpr[4] = v; 444 hpret[0] = v;
392 vcpu->arch.gpr[5] = r; 445 hpret[1] = r;
393 return H_SUCCESS; 446 return H_SUCCESS;
394} 447}
448EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
449
450long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
451 unsigned long pte_index, unsigned long avpn)
452{
453 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
454 &vcpu->arch.gpr[4]);
455}
395 456
396long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 457long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
397{ 458{
@@ -459,6 +520,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
459 520
460 args[j] = ((0x80 | flags) << 56) + pte_index; 521 args[j] = ((0x80 | flags) << 56) + pte_index;
461 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 522 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
523 note_hpte_modification(kvm, rev);
462 524
463 if (!(hp[0] & HPTE_V_VALID)) { 525 if (!(hp[0] & HPTE_V_VALID)) {
464 /* insert R and C bits from PTE */ 526 /* insert R and C bits from PTE */
@@ -534,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
534 return H_NOT_FOUND; 596 return H_NOT_FOUND;
535 } 597 }
536 598
537 if (atomic_read(&kvm->online_vcpus) == 1)
538 flags |= H_LOCAL;
539 v = hpte[0]; 599 v = hpte[0];
540 bits = (flags << 55) & HPTE_R_PP0; 600 bits = (flags << 55) & HPTE_R_PP0;
541 bits |= (flags << 48) & HPTE_R_KEY_HI; 601 bits |= (flags << 48) & HPTE_R_KEY_HI;
@@ -548,6 +608,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
548 if (rev) { 608 if (rev) {
549 r = (rev->guest_rpte & ~mask) | bits; 609 r = (rev->guest_rpte & ~mask) | bits;
550 rev->guest_rpte = r; 610 rev->guest_rpte = r;
611 note_hpte_modification(kvm, rev);
551 } 612 }
552 r = (hpte[1] & ~mask) | bits; 613 r = (hpte[1] & ~mask) | bits;
553 614
@@ -555,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
555 if (v & HPTE_V_VALID) { 616 if (v & HPTE_V_VALID) {
556 rb = compute_tlbie_rb(v, r, pte_index); 617 rb = compute_tlbie_rb(v, r, pte_index);
557 hpte[0] = v & ~HPTE_V_VALID; 618 hpte[0] = v & ~HPTE_V_VALID;
558 if (!(flags & H_LOCAL)) { 619 if (global_invalidates(kvm, flags)) {
559 while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) 620 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
560 cpu_relax(); 621 cpu_relax();
561 asm volatile("ptesync" : : : "memory"); 622 asm volatile("ptesync" : : : "memory");
@@ -568,6 +629,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
568 asm volatile("tlbiel %0" : : "r" (rb)); 629 asm volatile("tlbiel %0" : : "r" (rb));
569 asm volatile("ptesync" : : : "memory"); 630 asm volatile("ptesync" : : : "memory");
570 } 631 }
632 /*
633 * If the host has this page as readonly but the guest
634 * wants to make it read/write, reduce the permissions.
635 * Checking the host permissions involves finding the
636 * memslot and then the Linux PTE for the page.
637 */
638 if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
639 unsigned long psize, gfn, hva;
640 struct kvm_memory_slot *memslot;
641 pgd_t *pgdir = vcpu->arch.pgdir;
642 pte_t pte;
643
644 psize = hpte_page_size(v, r);
645 gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
646 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
647 if (memslot) {
648 hva = __gfn_to_hva_memslot(memslot, gfn);
649 pte = lookup_linux_pte(pgdir, hva, 1, &psize);
650 if (pte_present(pte) && !pte_write(pte))
651 r = hpte_make_readonly(r);
652 }
653 }
571 } 654 }
572 hpte[1] = r; 655 hpte[1] = r;
573 eieio(); 656 eieio();
@@ -599,8 +682,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
599 v &= ~HPTE_V_ABSENT; 682 v &= ~HPTE_V_ABSENT;
600 v |= HPTE_V_VALID; 683 v |= HPTE_V_VALID;
601 } 684 }
602 if (v & HPTE_V_VALID) 685 if (v & HPTE_V_VALID) {
603 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 686 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
687 r &= ~HPTE_GR_RESERVED;
688 }
604 vcpu->arch.gpr[4 + i * 2] = v; 689 vcpu->arch.gpr[4 + i * 2] = v;
605 vcpu->arch.gpr[5 + i * 2] = r; 690 vcpu->arch.gpr[5 + i * 2] = r;
606 } 691 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 74a24bbb9637..10b6c358dd77 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -27,6 +27,7 @@
27#include <asm/asm-offsets.h> 27#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h> 28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h> 29#include <asm/kvm_book3s_asm.h>
30#include <asm/mmu-hash64.h>
30 31
31/***************************************************************************** 32/*****************************************************************************
32 * * 33 * *
@@ -134,8 +135,11 @@ kvm_start_guest:
134 135
13527: /* XXX should handle hypervisor maintenance interrupts etc. here */ 13627: /* XXX should handle hypervisor maintenance interrupts etc. here */
136 137
138 /* reload vcpu pointer after clearing the IPI */
139 ld r4,HSTATE_KVM_VCPU(r13)
140 cmpdi r4,0
137 /* if we have no vcpu to run, go back to sleep */ 141 /* if we have no vcpu to run, go back to sleep */
138 beq cr1,kvm_no_guest 142 beq kvm_no_guest
139 143
140 /* were we napping due to cede? */ 144 /* were we napping due to cede? */
141 lbz r0,HSTATE_NAPPING(r13) 145 lbz r0,HSTATE_NAPPING(r13)
@@ -310,7 +314,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
310 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 314 mtspr SPRN_SDR1,r6 /* switch to partition page table */
311 mtspr SPRN_LPID,r7 315 mtspr SPRN_LPID,r7
312 isync 316 isync
317
318 /* See if we need to flush the TLB */
319 lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
320 clrldi r7,r6,64-6 /* extract bit number (6 bits) */
321 srdi r6,r6,6 /* doubleword number */
322 sldi r6,r6,3 /* address offset */
323 add r6,r6,r9
324 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
313 li r0,1 325 li r0,1
326 sld r0,r0,r7
327 ld r7,0(r6)
328 and. r7,r7,r0
329 beq 22f
33023: ldarx r7,0,r6 /* if set, clear the bit */
331 andc r7,r7,r0
332 stdcx. r7,0,r6
333 bne 23b
334 li r6,128 /* and flush the TLB */
335 mtctr r6
336 li r7,0x800 /* IS field = 0b10 */
337 ptesync
33828: tlbiel r7
339 addi r7,r7,0x1000
340 bdnz 28b
341 ptesync
342
34322: li r0,1
314 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 344 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
315 b 10f 345 b 10f
316 346
@@ -333,36 +363,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
333 mr r9,r4 363 mr r9,r4
334 blt hdec_soon 364 blt hdec_soon
335 365
336 /*
337 * Invalidate the TLB if we could possibly have stale TLB
338 * entries for this partition on this core due to the use
339 * of tlbiel.
340 * XXX maybe only need this on primary thread?
341 */
342 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
343 lwz r5,VCPU_VCPUID(r4)
344 lhz r6,PACAPACAINDEX(r13)
345 rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
346 lhz r8,VCPU_LAST_CPU(r4)
347 sldi r7,r6,1 /* see if this is the same vcpu */
348 add r7,r7,r9 /* as last ran on this pcpu */
349 lhz r0,KVM_LAST_VCPU(r7)
350 cmpw r6,r8 /* on the same cpu core as last time? */
351 bne 3f
352 cmpw r0,r5 /* same vcpu as this core last ran? */
353 beq 1f
3543: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
355 sth r5,KVM_LAST_VCPU(r7)
356 li r6,128
357 mtctr r6
358 li r7,0x800 /* IS field = 0b10 */
359 ptesync
3602: tlbiel r7
361 addi r7,r7,0x1000
362 bdnz 2b
363 ptesync
3641:
365
366 /* Save purr/spurr */ 366 /* Save purr/spurr */
367 mfspr r5,SPRN_PURR 367 mfspr r5,SPRN_PURR
368 mfspr r6,SPRN_SPURR 368 mfspr r6,SPRN_SPURR
@@ -679,8 +679,7 @@ BEGIN_FTR_SECTION
6791: 6791:
680END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 680END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
681 681
682nohpte_cont: 682guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
683hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
684 /* Save DEC */ 683 /* Save DEC */
685 mfspr r5,SPRN_DEC 684 mfspr r5,SPRN_DEC
686 mftb r6 685 mftb r6
@@ -701,6 +700,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
701 std r6, VCPU_FAULT_DAR(r9) 700 std r6, VCPU_FAULT_DAR(r9)
702 stw r7, VCPU_FAULT_DSISR(r9) 701 stw r7, VCPU_FAULT_DSISR(r9)
703 702
703 /* See if it is a machine check */
704 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
705 beq machine_check_realmode
706mc_cont:
707
704 /* Save guest CTRL register, set runlatch to 1 */ 708 /* Save guest CTRL register, set runlatch to 1 */
7056: mfspr r6,SPRN_CTRLF 7096: mfspr r6,SPRN_CTRLF
706 stw r6,VCPU_CTRL(r9) 710 stw r6,VCPU_CTRL(r9)
@@ -1113,38 +1117,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1113 /* 1117 /*
1114 * For external and machine check interrupts, we need 1118 * For external and machine check interrupts, we need
1115 * to call the Linux handler to process the interrupt. 1119 * to call the Linux handler to process the interrupt.
1116 * We do that by jumping to the interrupt vector address 1120 * We do that by jumping to absolute address 0x500 for
1117 * which we have in r12. The [h]rfid at the end of the 1121 * external interrupts, or the machine_check_fwnmi label
1122 * for machine checks (since firmware might have patched
1123 * the vector area at 0x200). The [h]rfid at the end of the
1118 * handler will return to the book3s_hv_interrupts.S code. 1124 * handler will return to the book3s_hv_interrupts.S code.
1119 * For other interrupts we do the rfid to get back 1125 * For other interrupts we do the rfid to get back
1120 * to the book3s_interrupts.S code here. 1126 * to the book3s_hv_interrupts.S code here.
1121 */ 1127 */
1122 ld r8, HSTATE_VMHANDLER(r13) 1128 ld r8, HSTATE_VMHANDLER(r13)
1123 ld r7, HSTATE_HOST_MSR(r13) 1129 ld r7, HSTATE_HOST_MSR(r13)
1124 1130
1131 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1125 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1132 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1133BEGIN_FTR_SECTION
1126 beq 11f 1134 beq 11f
1127 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1135END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1128 1136
1129 /* RFI into the highmem handler, or branch to interrupt handler */ 1137 /* RFI into the highmem handler, or branch to interrupt handler */
113012: mfmsr r6 1138 mfmsr r6
1131 mtctr r12
1132 li r0, MSR_RI 1139 li r0, MSR_RI
1133 andc r6, r6, r0 1140 andc r6, r6, r0
1134 mtmsrd r6, 1 /* Clear RI in MSR */ 1141 mtmsrd r6, 1 /* Clear RI in MSR */
1135 mtsrr0 r8 1142 mtsrr0 r8
1136 mtsrr1 r7 1143 mtsrr1 r7
1137 beqctr 1144 beqa 0x500 /* external interrupt (PPC970) */
1145 beq cr1, 13f /* machine check */
1138 RFI 1146 RFI
1139 1147
114011: 1148 /* On POWER7, we have external interrupts set to use HSRR0/1 */
1141BEGIN_FTR_SECTION 114911: mtspr SPRN_HSRR0, r8
1142 b 12b
1143END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1144 mtspr SPRN_HSRR0, r8
1145 mtspr SPRN_HSRR1, r7 1150 mtspr SPRN_HSRR1, r7
1146 ba 0x500 1151 ba 0x500
1147 1152
115313: b machine_check_fwnmi
1154
1148/* 1155/*
1149 * Check whether an HDSI is an HPTE not found fault or something else. 1156 * Check whether an HDSI is an HPTE not found fault or something else.
1150 * If it is an HPTE not found fault that is due to the guest accessing 1157 * If it is an HPTE not found fault that is due to the guest accessing
@@ -1177,7 +1184,7 @@ kvmppc_hdsi:
1177 cmpdi r3, 0 /* retry the instruction */ 1184 cmpdi r3, 0 /* retry the instruction */
1178 beq 6f 1185 beq 6f
1179 cmpdi r3, -1 /* handle in kernel mode */ 1186 cmpdi r3, -1 /* handle in kernel mode */
1180 beq nohpte_cont 1187 beq guest_exit_cont
1181 cmpdi r3, -2 /* MMIO emulation; need instr word */ 1188 cmpdi r3, -2 /* MMIO emulation; need instr word */
1182 beq 2f 1189 beq 2f
1183 1190
@@ -1191,6 +1198,7 @@ kvmppc_hdsi:
1191 li r10, BOOK3S_INTERRUPT_DATA_STORAGE 1198 li r10, BOOK3S_INTERRUPT_DATA_STORAGE
1192 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ 1199 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1193 rotldi r11, r11, 63 1200 rotldi r11, r11, 63
1201fast_interrupt_c_return:
11946: ld r7, VCPU_CTR(r9) 12026: ld r7, VCPU_CTR(r9)
1195 lwz r8, VCPU_XER(r9) 1203 lwz r8, VCPU_XER(r9)
1196 mtctr r7 1204 mtctr r7
@@ -1223,7 +1231,7 @@ kvmppc_hdsi:
1223 /* Unset guest mode. */ 1231 /* Unset guest mode. */
1224 li r0, KVM_GUEST_MODE_NONE 1232 li r0, KVM_GUEST_MODE_NONE
1225 stb r0, HSTATE_IN_GUEST(r13) 1233 stb r0, HSTATE_IN_GUEST(r13)
1226 b nohpte_cont 1234 b guest_exit_cont
1227 1235
1228/* 1236/*
1229 * Similarly for an HISI, reflect it to the guest as an ISI unless 1237 * Similarly for an HISI, reflect it to the guest as an ISI unless
@@ -1249,9 +1257,9 @@ kvmppc_hisi:
1249 ld r11, VCPU_MSR(r9) 1257 ld r11, VCPU_MSR(r9)
1250 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE 1258 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1251 cmpdi r3, 0 /* retry the instruction */ 1259 cmpdi r3, 0 /* retry the instruction */
1252 beq 6f 1260 beq fast_interrupt_c_return
1253 cmpdi r3, -1 /* handle in kernel mode */ 1261 cmpdi r3, -1 /* handle in kernel mode */
1254 beq nohpte_cont 1262 beq guest_exit_cont
1255 1263
1256 /* Synthesize an ISI for the guest */ 1264 /* Synthesize an ISI for the guest */
1257 mr r11, r3 1265 mr r11, r3
@@ -1260,12 +1268,7 @@ kvmppc_hisi:
1260 li r10, BOOK3S_INTERRUPT_INST_STORAGE 1268 li r10, BOOK3S_INTERRUPT_INST_STORAGE
1261 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ 1269 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1262 rotldi r11, r11, 63 1270 rotldi r11, r11, 63
12636: ld r7, VCPU_CTR(r9) 1271 b fast_interrupt_c_return
1264 lwz r8, VCPU_XER(r9)
1265 mtctr r7
1266 mtxer r8
1267 mr r4, r9
1268 b fast_guest_return
1269 1272
12703: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ 12733: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
1271 ld r5, KVM_VRMA_SLB_V(r6) 1274 ld r5, KVM_VRMA_SLB_V(r6)
@@ -1281,14 +1284,14 @@ kvmppc_hisi:
1281hcall_try_real_mode: 1284hcall_try_real_mode:
1282 ld r3,VCPU_GPR(R3)(r9) 1285 ld r3,VCPU_GPR(R3)(r9)
1283 andi. r0,r11,MSR_PR 1286 andi. r0,r11,MSR_PR
1284 bne hcall_real_cont 1287 bne guest_exit_cont
1285 clrrdi r3,r3,2 1288 clrrdi r3,r3,2
1286 cmpldi r3,hcall_real_table_end - hcall_real_table 1289 cmpldi r3,hcall_real_table_end - hcall_real_table
1287 bge hcall_real_cont 1290 bge guest_exit_cont
1288 LOAD_REG_ADDR(r4, hcall_real_table) 1291 LOAD_REG_ADDR(r4, hcall_real_table)
1289 lwzx r3,r3,r4 1292 lwzx r3,r3,r4
1290 cmpwi r3,0 1293 cmpwi r3,0
1291 beq hcall_real_cont 1294 beq guest_exit_cont
1292 add r3,r3,r4 1295 add r3,r3,r4
1293 mtctr r3 1296 mtctr r3
1294 mr r3,r9 /* get vcpu pointer */ 1297 mr r3,r9 /* get vcpu pointer */
@@ -1309,7 +1312,7 @@ hcall_real_fallback:
1309 li r12,BOOK3S_INTERRUPT_SYSCALL 1312 li r12,BOOK3S_INTERRUPT_SYSCALL
1310 ld r9, HSTATE_KVM_VCPU(r13) 1313 ld r9, HSTATE_KVM_VCPU(r13)
1311 1314
1312 b hcall_real_cont 1315 b guest_exit_cont
1313 1316
1314 .globl hcall_real_table 1317 .globl hcall_real_table
1315hcall_real_table: 1318hcall_real_table:
@@ -1568,6 +1571,21 @@ kvm_cede_exit:
1568 li r3,H_TOO_HARD 1571 li r3,H_TOO_HARD
1569 blr 1572 blr
1570 1573
1574 /* Try to handle a machine check in real mode */
1575machine_check_realmode:
1576 mr r3, r9 /* get vcpu pointer */
1577 bl .kvmppc_realmode_machine_check
1578 nop
1579 cmpdi r3, 0 /* continue exiting from guest? */
1580 ld r9, HSTATE_KVM_VCPU(r13)
1581 li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1582 beq mc_cont
1583 /* If not, deliver a machine check. SRR0/1 are already set */
1584 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
1585 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1586 rotldi r11, r11, 63
1587 b fast_interrupt_c_return
1588
1571secondary_too_late: 1589secondary_too_late:
1572 ld r5,HSTATE_KVM_VCORE(r13) 1590 ld r5,HSTATE_KVM_VCORE(r13)
1573 HMT_LOW 1591 HMT_LOW
@@ -1587,6 +1605,10 @@ secondary_too_late:
1587 .endr 1605 .endr
1588 1606
1589secondary_nap: 1607secondary_nap:
1608 /* Clear our vcpu pointer so we don't come back in early */
1609 li r0, 0
1610 std r0, HSTATE_KVM_VCPU(r13)
1611 lwsync
1590 /* Clear any pending IPI - assume we're a secondary thread */ 1612 /* Clear any pending IPI - assume we're a secondary thread */
1591 ld r5, HSTATE_XICS_PHYS(r13) 1613 ld r5, HSTATE_XICS_PHYS(r13)
1592 li r7, XICS_XIRR 1614 li r7, XICS_XIRR
@@ -1612,8 +1634,6 @@ secondary_nap:
1612kvm_no_guest: 1634kvm_no_guest:
1613 li r0, KVM_HWTHREAD_IN_NAP 1635 li r0, KVM_HWTHREAD_IN_NAP
1614 stb r0, HSTATE_HWTHREAD_STATE(r13) 1636 stb r0, HSTATE_HWTHREAD_STATE(r13)
1615 li r0, 0
1616 std r0, HSTATE_KVM_VCPU(r13)
1617 1637
1618 li r3, LPCR_PECE0 1638 li r3, LPCR_PECE0
1619 mfspr r4, SPRN_LPCR 1639 mfspr r4, SPRN_LPCR
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 41cb0017e757..2c86b0d63714 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
114 hlist_del_init_rcu(&pte->list_vpte); 114 hlist_del_init_rcu(&pte->list_vpte);
115 hlist_del_init_rcu(&pte->list_vpte_long); 115 hlist_del_init_rcu(&pte->list_vpte_long);
116 116
117 if (pte->pte.may_write)
118 kvm_release_pfn_dirty(pte->pfn);
119 else
120 kvm_release_pfn_clean(pte->pfn);
121
122 spin_unlock(&vcpu3s->mmu_lock); 117 spin_unlock(&vcpu3s->mmu_lock);
123 118
124 vcpu3s->hpte_cache_count--; 119 vcpu3s->hpte_cache_count--;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 05c28f59f77f..28d38adeca73 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
52#define MSR_USER32 MSR_USER 52#define MSR_USER32 MSR_USER
53#define MSR_USER64 MSR_USER 53#define MSR_USER64 MSR_USER
54#define HW_PAGE_SIZE PAGE_SIZE 54#define HW_PAGE_SIZE PAGE_SIZE
55#define __hard_irq_disable local_irq_disable
56#define __hard_irq_enable local_irq_enable
57#endif 55#endif
58 56
59void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 57void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -66,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
66 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; 64 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
67 svcpu_put(svcpu); 65 svcpu_put(svcpu);
68#endif 66#endif
69 67 vcpu->cpu = smp_processor_id();
70#ifdef CONFIG_PPC_BOOK3S_32 68#ifdef CONFIG_PPC_BOOK3S_32
71 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; 69 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
72#endif 70#endif
@@ -83,17 +81,71 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
83 svcpu_put(svcpu); 81 svcpu_put(svcpu);
84#endif 82#endif
85 83
86 kvmppc_giveup_ext(vcpu, MSR_FP); 84 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
87 kvmppc_giveup_ext(vcpu, MSR_VEC); 85 vcpu->cpu = -1;
88 kvmppc_giveup_ext(vcpu, MSR_VSX); 86}
87
88int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
89{
90 int r = 1; /* Indicate we want to get back into the guest */
91
92 /* We misuse TLB_FLUSH to indicate that we want to clear
93 all shadow cache entries */
94 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
95 kvmppc_mmu_pte_flush(vcpu, 0, 0);
96
97 return r;
98}
99
100/************* MMU Notifiers *************/
101
102int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
103{
104 trace_kvm_unmap_hva(hva);
105
106 /*
107 * Flush all shadow tlb entries everywhere. This is slow, but
108 * we are 100% sure that we catch the to be unmapped page
109 */
110 kvm_flush_remote_tlbs(kvm);
111
112 return 0;
113}
114
115int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
116{
117 /* kvm_unmap_hva flushes everything anyways */
118 kvm_unmap_hva(kvm, start);
119
120 return 0;
121}
122
123int kvm_age_hva(struct kvm *kvm, unsigned long hva)
124{
125 /* XXX could be more clever ;) */
126 return 0;
127}
128
129int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
130{
131 /* XXX could be more clever ;) */
132 return 0;
89} 133}
90 134
135void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
136{
137 /* The page will get remapped properly on its next fault */
138 kvm_unmap_hva(kvm, hva);
139}
140
141/*****************************************/
142
91static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 143static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
92{ 144{
93 ulong smsr = vcpu->arch.shared->msr; 145 ulong smsr = vcpu->arch.shared->msr;
94 146
95 /* Guest MSR values */ 147 /* Guest MSR values */
96 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; 148 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
97 /* Process MSR values */ 149 /* Process MSR values */
98 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; 150 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
99 /* External providers the guest reserved */ 151 /* External providers the guest reserved */
@@ -379,10 +431,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
379 431
380static inline int get_fpr_index(int i) 432static inline int get_fpr_index(int i)
381{ 433{
382#ifdef CONFIG_VSX 434 return i * TS_FPRWIDTH;
383 i *= 2;
384#endif
385 return i;
386} 435}
387 436
388/* Give up external provider (FPU, Altivec, VSX) */ 437/* Give up external provider (FPU, Altivec, VSX) */
@@ -396,41 +445,49 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
396 u64 *thread_fpr = (u64*)t->fpr; 445 u64 *thread_fpr = (u64*)t->fpr;
397 int i; 446 int i;
398 447
399 if (!(vcpu->arch.guest_owned_ext & msr)) 448 /*
449 * VSX instructions can access FP and vector registers, so if
450 * we are giving up VSX, make sure we give up FP and VMX as well.
451 */
452 if (msr & MSR_VSX)
453 msr |= MSR_FP | MSR_VEC;
454
455 msr &= vcpu->arch.guest_owned_ext;
456 if (!msr)
400 return; 457 return;
401 458
402#ifdef DEBUG_EXT 459#ifdef DEBUG_EXT
403 printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 460 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
404#endif 461#endif
405 462
406 switch (msr) { 463 if (msr & MSR_FP) {
407 case MSR_FP: 464 /*
465 * Note that on CPUs with VSX, giveup_fpu stores
466 * both the traditional FP registers and the added VSX
467 * registers into thread.fpr[].
468 */
408 giveup_fpu(current); 469 giveup_fpu(current);
409 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 470 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
410 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; 471 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
411 472
412 vcpu->arch.fpscr = t->fpscr.val; 473 vcpu->arch.fpscr = t->fpscr.val;
413 break; 474
414 case MSR_VEC: 475#ifdef CONFIG_VSX
476 if (cpu_has_feature(CPU_FTR_VSX))
477 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
478 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
479#endif
480 }
481
415#ifdef CONFIG_ALTIVEC 482#ifdef CONFIG_ALTIVEC
483 if (msr & MSR_VEC) {
416 giveup_altivec(current); 484 giveup_altivec(current);
417 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); 485 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
418 vcpu->arch.vscr = t->vscr; 486 vcpu->arch.vscr = t->vscr;
419#endif
420 break;
421 case MSR_VSX:
422#ifdef CONFIG_VSX
423 __giveup_vsx(current);
424 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
425 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
426#endif
427 break;
428 default:
429 BUG();
430 } 487 }
488#endif
431 489
432 vcpu->arch.guest_owned_ext &= ~msr; 490 vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
433 current->thread.regs->msr &= ~msr;
434 kvmppc_recalc_shadow_msr(vcpu); 491 kvmppc_recalc_shadow_msr(vcpu);
435} 492}
436 493
@@ -490,47 +547,56 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
490 return RESUME_GUEST; 547 return RESUME_GUEST;
491 } 548 }
492 549
493 /* We already own the ext */ 550 if (msr == MSR_VSX) {
494 if (vcpu->arch.guest_owned_ext & msr) { 551 /* No VSX? Give an illegal instruction interrupt */
495 return RESUME_GUEST; 552#ifdef CONFIG_VSX
553 if (!cpu_has_feature(CPU_FTR_VSX))
554#endif
555 {
556 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
557 return RESUME_GUEST;
558 }
559
560 /*
561 * We have to load up all the FP and VMX registers before
562 * we can let the guest use VSX instructions.
563 */
564 msr = MSR_FP | MSR_VEC | MSR_VSX;
496 } 565 }
497 566
567 /* See if we already own all the ext(s) needed */
568 msr &= ~vcpu->arch.guest_owned_ext;
569 if (!msr)
570 return RESUME_GUEST;
571
498#ifdef DEBUG_EXT 572#ifdef DEBUG_EXT
499 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 573 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
500#endif 574#endif
501 575
502 current->thread.regs->msr |= msr; 576 current->thread.regs->msr |= msr;
503 577
504 switch (msr) { 578 if (msr & MSR_FP) {
505 case MSR_FP:
506 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 579 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
507 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; 580 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
508 581#ifdef CONFIG_VSX
582 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
583 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
584#endif
509 t->fpscr.val = vcpu->arch.fpscr; 585 t->fpscr.val = vcpu->arch.fpscr;
510 t->fpexc_mode = 0; 586 t->fpexc_mode = 0;
511 kvmppc_load_up_fpu(); 587 kvmppc_load_up_fpu();
512 break; 588 }
513 case MSR_VEC: 589
590 if (msr & MSR_VEC) {
514#ifdef CONFIG_ALTIVEC 591#ifdef CONFIG_ALTIVEC
515 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); 592 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
516 t->vscr = vcpu->arch.vscr; 593 t->vscr = vcpu->arch.vscr;
517 t->vrsave = -1; 594 t->vrsave = -1;
518 kvmppc_load_up_altivec(); 595 kvmppc_load_up_altivec();
519#endif 596#endif
520 break;
521 case MSR_VSX:
522#ifdef CONFIG_VSX
523 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
524 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
525 kvmppc_load_up_vsx();
526#endif
527 break;
528 default:
529 BUG();
530 } 597 }
531 598
532 vcpu->arch.guest_owned_ext |= msr; 599 vcpu->arch.guest_owned_ext |= msr;
533
534 kvmppc_recalc_shadow_msr(vcpu); 600 kvmppc_recalc_shadow_msr(vcpu);
535 601
536 return RESUME_GUEST; 602 return RESUME_GUEST;
@@ -540,18 +606,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
540 unsigned int exit_nr) 606 unsigned int exit_nr)
541{ 607{
542 int r = RESUME_HOST; 608 int r = RESUME_HOST;
609 int s;
543 610
544 vcpu->stat.sum_exits++; 611 vcpu->stat.sum_exits++;
545 612
546 run->exit_reason = KVM_EXIT_UNKNOWN; 613 run->exit_reason = KVM_EXIT_UNKNOWN;
547 run->ready_for_interrupt_injection = 1; 614 run->ready_for_interrupt_injection = 1;
548 615
549 /* We get here with MSR.EE=0, so enable it to be a nice citizen */ 616 /* We get here with MSR.EE=1 */
550 __hard_irq_enable(); 617
618 trace_kvm_exit(exit_nr, vcpu);
619 kvm_guest_exit();
551 620
552 trace_kvm_book3s_exit(exit_nr, vcpu);
553 preempt_enable();
554 kvm_resched(vcpu);
555 switch (exit_nr) { 621 switch (exit_nr) {
556 case BOOK3S_INTERRUPT_INST_STORAGE: 622 case BOOK3S_INTERRUPT_INST_STORAGE:
557 { 623 {
@@ -802,7 +868,6 @@ program_interrupt:
802 } 868 }
803 } 869 }
804 870
805 preempt_disable();
806 if (!(r & RESUME_HOST)) { 871 if (!(r & RESUME_HOST)) {
807 /* To avoid clobbering exit_reason, only check for signals if 872 /* To avoid clobbering exit_reason, only check for signals if
808 * we aren't already exiting to userspace for some other 873 * we aren't already exiting to userspace for some other
@@ -814,20 +879,13 @@ program_interrupt:
814 * and if we really did time things so badly, then we just exit 879 * and if we really did time things so badly, then we just exit
815 * again due to a host external interrupt. 880 * again due to a host external interrupt.
816 */ 881 */
817 __hard_irq_disable(); 882 local_irq_disable();
818 if (signal_pending(current)) { 883 s = kvmppc_prepare_to_enter(vcpu);
819 __hard_irq_enable(); 884 if (s <= 0) {
820#ifdef EXIT_DEBUG 885 local_irq_enable();
821 printk(KERN_EMERG "KVM: Going back to host\n"); 886 r = s;
822#endif
823 vcpu->stat.signal_exits++;
824 run->exit_reason = KVM_EXIT_INTR;
825 r = -EINTR;
826 } else { 887 } else {
827 /* In case an interrupt came in that was triggered 888 kvmppc_lazy_ee_enable();
828 * from userspace (like DEC), we need to check what
829 * to inject now! */
830 kvmppc_core_prepare_to_enter(vcpu);
831 } 889 }
832 } 890 }
833 891
@@ -899,34 +957,59 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
899 return 0; 957 return 0;
900} 958}
901 959
902int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 960int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
903{ 961{
904 int r = -EINVAL; 962 int r = 0;
905 963
906 switch (reg->id) { 964 switch (id) {
907 case KVM_REG_PPC_HIOR: 965 case KVM_REG_PPC_HIOR:
908 r = copy_to_user((u64 __user *)(long)reg->addr, 966 *val = get_reg_val(id, to_book3s(vcpu)->hior);
909 &to_book3s(vcpu)->hior, sizeof(u64));
910 break; 967 break;
968#ifdef CONFIG_VSX
969 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
970 long int i = id - KVM_REG_PPC_VSR0;
971
972 if (!cpu_has_feature(CPU_FTR_VSX)) {
973 r = -ENXIO;
974 break;
975 }
976 val->vsxval[0] = vcpu->arch.fpr[i];
977 val->vsxval[1] = vcpu->arch.vsr[i];
978 break;
979 }
980#endif /* CONFIG_VSX */
911 default: 981 default:
982 r = -EINVAL;
912 break; 983 break;
913 } 984 }
914 985
915 return r; 986 return r;
916} 987}
917 988
918int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 989int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
919{ 990{
920 int r = -EINVAL; 991 int r = 0;
921 992
922 switch (reg->id) { 993 switch (id) {
923 case KVM_REG_PPC_HIOR: 994 case KVM_REG_PPC_HIOR:
924 r = copy_from_user(&to_book3s(vcpu)->hior, 995 to_book3s(vcpu)->hior = set_reg_val(id, *val);
925 (u64 __user *)(long)reg->addr, sizeof(u64)); 996 to_book3s(vcpu)->hior_explicit = true;
926 if (!r) 997 break;
927 to_book3s(vcpu)->hior_explicit = true; 998#ifdef CONFIG_VSX
999 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
1000 long int i = id - KVM_REG_PPC_VSR0;
1001
1002 if (!cpu_has_feature(CPU_FTR_VSX)) {
1003 r = -ENXIO;
1004 break;
1005 }
1006 vcpu->arch.fpr[i] = val->vsxval[0];
1007 vcpu->arch.vsr[i] = val->vsxval[1];
928 break; 1008 break;
1009 }
1010#endif /* CONFIG_VSX */
929 default: 1011 default:
1012 r = -EINVAL;
930 break; 1013 break;
931 } 1014 }
932 1015
@@ -1020,8 +1103,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1020#endif 1103#endif
1021 ulong ext_msr; 1104 ulong ext_msr;
1022 1105
1023 preempt_disable();
1024
1025 /* Check if we can run the vcpu at all */ 1106 /* Check if we can run the vcpu at all */
1026 if (!vcpu->arch.sane) { 1107 if (!vcpu->arch.sane) {
1027 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1108 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -1029,21 +1110,16 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1029 goto out; 1110 goto out;
1030 } 1111 }
1031 1112
1032 kvmppc_core_prepare_to_enter(vcpu);
1033
1034 /* 1113 /*
1035 * Interrupts could be timers for the guest which we have to inject 1114 * Interrupts could be timers for the guest which we have to inject
1036 * again, so let's postpone them until we're in the guest and if we 1115 * again, so let's postpone them until we're in the guest and if we
1037 * really did time things so badly, then we just exit again due to 1116 * really did time things so badly, then we just exit again due to
1038 * a host external interrupt. 1117 * a host external interrupt.
1039 */ 1118 */
1040 __hard_irq_disable(); 1119 local_irq_disable();
1041 1120 ret = kvmppc_prepare_to_enter(vcpu);
1042 /* No need to go into the guest when all we do is going out */ 1121 if (ret <= 0) {
1043 if (signal_pending(current)) { 1122 local_irq_enable();
1044 __hard_irq_enable();
1045 kvm_run->exit_reason = KVM_EXIT_INTR;
1046 ret = -EINTR;
1047 goto out; 1123 goto out;
1048 } 1124 }
1049 1125
@@ -1070,7 +1146,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1070 /* Save VSX state in stack */ 1146 /* Save VSX state in stack */
1071 used_vsr = current->thread.used_vsr; 1147 used_vsr = current->thread.used_vsr;
1072 if (used_vsr && (current->thread.regs->msr & MSR_VSX)) 1148 if (used_vsr && (current->thread.regs->msr & MSR_VSX))
1073 __giveup_vsx(current); 1149 __giveup_vsx(current);
1074#endif 1150#endif
1075 1151
1076 /* Remember the MSR with disabled extensions */ 1152 /* Remember the MSR with disabled extensions */
@@ -1080,20 +1156,19 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1080 if (vcpu->arch.shared->msr & MSR_FP) 1156 if (vcpu->arch.shared->msr & MSR_FP)
1081 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 1157 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1082 1158
1083 kvm_guest_enter(); 1159 kvmppc_lazy_ee_enable();
1084 1160
1085 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 1161 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
1086 1162
1087 kvm_guest_exit(); 1163 /* No need for kvm_guest_exit. It's done in handle_exit.
1088 1164 We also get here with interrupts enabled. */
1089 current->thread.regs->msr = ext_msr;
1090 1165
1091 /* Make sure we save the guest FPU/Altivec/VSX state */ 1166 /* Make sure we save the guest FPU/Altivec/VSX state */
1092 kvmppc_giveup_ext(vcpu, MSR_FP); 1167 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
1093 kvmppc_giveup_ext(vcpu, MSR_VEC); 1168
1094 kvmppc_giveup_ext(vcpu, MSR_VSX); 1169 current->thread.regs->msr = ext_msr;
1095 1170
1096 /* Restore FPU state from stack */ 1171 /* Restore FPU/VSX state from stack */
1097 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); 1172 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
1098 current->thread.fpscr.val = fpscr; 1173 current->thread.fpscr.val = fpscr;
1099 current->thread.fpexc_mode = fpexc_mode; 1174 current->thread.fpexc_mode = fpexc_mode;
@@ -1113,7 +1188,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1113#endif 1188#endif
1114 1189
1115out: 1190out:
1116 preempt_enable(); 1191 vcpu->mode = OUTSIDE_GUEST_MODE;
1117 return ret; 1192 return ret;
1118} 1193}
1119 1194
@@ -1181,14 +1256,31 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1181} 1256}
1182#endif /* CONFIG_PPC64 */ 1257#endif /* CONFIG_PPC64 */
1183 1258
1259void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1260 struct kvm_memory_slot *dont)
1261{
1262}
1263
1264int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1265 unsigned long npages)
1266{
1267 return 0;
1268}
1269
1184int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1270int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1271 struct kvm_memory_slot *memslot,
1185 struct kvm_userspace_memory_region *mem) 1272 struct kvm_userspace_memory_region *mem)
1186{ 1273{
1187 return 0; 1274 return 0;
1188} 1275}
1189 1276
1190void kvmppc_core_commit_memory_region(struct kvm *kvm, 1277void kvmppc_core_commit_memory_region(struct kvm *kvm,
1191 struct kvm_userspace_memory_region *mem) 1278 struct kvm_userspace_memory_region *mem,
1279 struct kvm_memory_slot old)
1280{
1281}
1282
1283void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
1192{ 1284{
1193} 1285}
1194 1286
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 9ecf6e35cd8d..8f7633e3afb8 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -170,20 +170,21 @@ kvmppc_handler_skip_ins:
170 * Call kvmppc_handler_trampoline_enter in real mode 170 * Call kvmppc_handler_trampoline_enter in real mode
171 * 171 *
172 * On entry, r4 contains the guest shadow MSR 172 * On entry, r4 contains the guest shadow MSR
173 * MSR.EE has to be 0 when calling this function
173 */ 174 */
174_GLOBAL(kvmppc_entry_trampoline) 175_GLOBAL(kvmppc_entry_trampoline)
175 mfmsr r5 176 mfmsr r5
176 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) 177 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
177 toreal(r7) 178 toreal(r7)
178 179
179 li r9, MSR_RI
180 ori r9, r9, MSR_EE
181 andc r9, r5, r9 /* Clear EE and RI in MSR value */
182 li r6, MSR_IR | MSR_DR 180 li r6, MSR_IR | MSR_DR
183 ori r6, r6, MSR_EE 181 andc r6, r5, r6 /* Clear DR and IR in MSR value */
184 andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */ 182 /*
185 MTMSR_EERI(r9) /* Clear EE and RI in MSR */ 183 * Set EE in HOST_MSR so that it's enabled when we get into our
186 mtsrr0 r7 /* before we set srr0/1 */ 184 * C exit handler function
185 */
186 ori r5, r5, MSR_EE
187 mtsrr0 r7
187 mtsrr1 r6 188 mtsrr1 r6
188 RFI 189 RFI
189 190
@@ -233,8 +234,5 @@ define_load_up(fpu)
233#ifdef CONFIG_ALTIVEC 234#ifdef CONFIG_ALTIVEC
234define_load_up(altivec) 235define_load_up(altivec)
235#endif 236#endif
236#ifdef CONFIG_VSX
237define_load_up(vsx)
238#endif
239 237
240#include "book3s_segment.S" 238#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097c852b..69f114015780 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,9 +36,11 @@
36#include <asm/dbell.h> 36#include <asm/dbell.h>
37#include <asm/hw_irq.h> 37#include <asm/hw_irq.h>
38#include <asm/irq.h> 38#include <asm/irq.h>
39#include <asm/time.h>
39 40
40#include "timing.h" 41#include "timing.h"
41#include "booke.h" 42#include "booke.h"
43#include "trace.h"
42 44
43unsigned long kvmppc_booke_handlers; 45unsigned long kvmppc_booke_handlers;
44 46
@@ -62,6 +64,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 64 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
63 { "doorbell", VCPU_STAT(dbell_exits) }, 65 { "doorbell", VCPU_STAT(dbell_exits) },
64 { "guest doorbell", VCPU_STAT(gdbell_exits) }, 66 { "guest doorbell", VCPU_STAT(gdbell_exits) },
67 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
65 { NULL } 68 { NULL }
66}; 69};
67 70
@@ -120,6 +123,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
120} 123}
121#endif 124#endif
122 125
126static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
127{
128#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
129 /* We always treat the FP bit as enabled from the host
130 perspective, so only need to adjust the shadow MSR */
131 vcpu->arch.shadow_msr &= ~MSR_FP;
132 vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
133#endif
134}
135
123/* 136/*
124 * Helper function for "full" MSR writes. No need to call this if only 137 * Helper function for "full" MSR writes. No need to call this if only
125 * EE/CE/ME/DE/RI are changing. 138 * EE/CE/ME/DE/RI are changing.
@@ -136,11 +149,13 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
136 149
137 kvmppc_mmu_msr_notify(vcpu, old_msr); 150 kvmppc_mmu_msr_notify(vcpu, old_msr);
138 kvmppc_vcpu_sync_spe(vcpu); 151 kvmppc_vcpu_sync_spe(vcpu);
152 kvmppc_vcpu_sync_fpu(vcpu);
139} 153}
140 154
141static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, 155static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
142 unsigned int priority) 156 unsigned int priority)
143{ 157{
158 trace_kvm_booke_queue_irqprio(vcpu, priority);
144 set_bit(priority, &vcpu->arch.pending_exceptions); 159 set_bit(priority, &vcpu->arch.pending_exceptions);
145} 160}
146 161
@@ -206,6 +221,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
206 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); 221 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
207} 222}
208 223
224static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
225{
226 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
227}
228
229static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
230{
231 clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
232}
233
209static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) 234static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
210{ 235{
211#ifdef CONFIG_KVM_BOOKE_HV 236#ifdef CONFIG_KVM_BOOKE_HV
@@ -287,6 +312,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
287 bool crit; 312 bool crit;
288 bool keep_irq = false; 313 bool keep_irq = false;
289 enum int_class int_class; 314 enum int_class int_class;
315 ulong new_msr = vcpu->arch.shared->msr;
290 316
291 /* Truncate crit indicators in 32 bit mode */ 317 /* Truncate crit indicators in 32 bit mode */
292 if (!(vcpu->arch.shared->msr & MSR_SF)) { 318 if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -325,6 +351,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
325 msr_mask = MSR_CE | MSR_ME | MSR_DE; 351 msr_mask = MSR_CE | MSR_ME | MSR_DE;
326 int_class = INT_CLASS_NONCRIT; 352 int_class = INT_CLASS_NONCRIT;
327 break; 353 break;
354 case BOOKE_IRQPRIO_WATCHDOG:
328 case BOOKE_IRQPRIO_CRITICAL: 355 case BOOKE_IRQPRIO_CRITICAL:
329 case BOOKE_IRQPRIO_DBELL_CRIT: 356 case BOOKE_IRQPRIO_DBELL_CRIT:
330 allowed = vcpu->arch.shared->msr & MSR_CE; 357 allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -381,7 +408,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
381 set_guest_esr(vcpu, vcpu->arch.queued_esr); 408 set_guest_esr(vcpu, vcpu->arch.queued_esr);
382 if (update_dear == true) 409 if (update_dear == true)
383 set_guest_dear(vcpu, vcpu->arch.queued_dear); 410 set_guest_dear(vcpu, vcpu->arch.queued_dear);
384 kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); 411
412 new_msr &= msr_mask;
413#if defined(CONFIG_64BIT)
414 if (vcpu->arch.epcr & SPRN_EPCR_ICM)
415 new_msr |= MSR_CM;
416#endif
417 kvmppc_set_msr(vcpu, new_msr);
385 418
386 if (!keep_irq) 419 if (!keep_irq)
387 clear_bit(priority, &vcpu->arch.pending_exceptions); 420 clear_bit(priority, &vcpu->arch.pending_exceptions);
@@ -404,12 +437,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
404 return allowed; 437 return allowed;
405} 438}
406 439
440/*
441 * Return the number of jiffies until the next timeout. If the timeout is
442 * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
443 * because the larger value can break the timer APIs.
444 */
445static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
446{
447 u64 tb, wdt_tb, wdt_ticks = 0;
448 u64 nr_jiffies = 0;
449 u32 period = TCR_GET_WP(vcpu->arch.tcr);
450
451 wdt_tb = 1ULL << (63 - period);
452 tb = get_tb();
453 /*
454 * The watchdog timeout will hapeen when TB bit corresponding
455 * to watchdog will toggle from 0 to 1.
456 */
457 if (tb & wdt_tb)
458 wdt_ticks = wdt_tb;
459
460 wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
461
462 /* Convert timebase ticks to jiffies */
463 nr_jiffies = wdt_ticks;
464
465 if (do_div(nr_jiffies, tb_ticks_per_jiffy))
466 nr_jiffies++;
467
468 return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
469}
470
471static void arm_next_watchdog(struct kvm_vcpu *vcpu)
472{
473 unsigned long nr_jiffies;
474 unsigned long flags;
475
476 /*
477 * If TSR_ENW and TSR_WIS are not set then no need to exit to
478 * userspace, so clear the KVM_REQ_WATCHDOG request.
479 */
480 if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
481 clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
482
483 spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
484 nr_jiffies = watchdog_next_timeout(vcpu);
485 /*
486 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
487 * then do not run the watchdog timer as this can break timer APIs.
488 */
489 if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
490 mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
491 else
492 del_timer(&vcpu->arch.wdt_timer);
493 spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
494}
495
496void kvmppc_watchdog_func(unsigned long data)
497{
498 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
499 u32 tsr, new_tsr;
500 int final;
501
502 do {
503 new_tsr = tsr = vcpu->arch.tsr;
504 final = 0;
505
506 /* Time out event */
507 if (tsr & TSR_ENW) {
508 if (tsr & TSR_WIS)
509 final = 1;
510 else
511 new_tsr = tsr | TSR_WIS;
512 } else {
513 new_tsr = tsr | TSR_ENW;
514 }
515 } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
516
517 if (new_tsr & TSR_WIS) {
518 smp_wmb();
519 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
520 kvm_vcpu_kick(vcpu);
521 }
522
523 /*
524 * If this is final watchdog expiry and some action is required
525 * then exit to userspace.
526 */
527 if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
528 vcpu->arch.watchdog_enabled) {
529 smp_wmb();
530 kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
531 kvm_vcpu_kick(vcpu);
532 }
533
534 /*
535 * Stop running the watchdog timer after final expiration to
536 * prevent the host from being flooded with timers if the
537 * guest sets a short period.
538 * Timers will resume when TSR/TCR is updated next time.
539 */
540 if (!final)
541 arm_next_watchdog(vcpu);
542}
543
407static void update_timer_ints(struct kvm_vcpu *vcpu) 544static void update_timer_ints(struct kvm_vcpu *vcpu)
408{ 545{
409 if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) 546 if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
410 kvmppc_core_queue_dec(vcpu); 547 kvmppc_core_queue_dec(vcpu);
411 else 548 else
412 kvmppc_core_dequeue_dec(vcpu); 549 kvmppc_core_dequeue_dec(vcpu);
550
551 if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
552 kvmppc_core_queue_watchdog(vcpu);
553 else
554 kvmppc_core_dequeue_watchdog(vcpu);
413} 555}
414 556
415static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) 557static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -417,13 +559,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
417 unsigned long *pending = &vcpu->arch.pending_exceptions; 559 unsigned long *pending = &vcpu->arch.pending_exceptions;
418 unsigned int priority; 560 unsigned int priority;
419 561
420 if (vcpu->requests) {
421 if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
422 smp_mb();
423 update_timer_ints(vcpu);
424 }
425 }
426
427 priority = __ffs(*pending); 562 priority = __ffs(*pending);
428 while (priority < BOOKE_IRQPRIO_MAX) { 563 while (priority < BOOKE_IRQPRIO_MAX) {
429 if (kvmppc_booke_irqprio_deliver(vcpu, priority)) 564 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -459,37 +594,20 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
459 return r; 594 return r;
460} 595}
461 596
462/* 597int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
463 * Common checks before entering the guest world. Call with interrupts
464 * disabled.
465 *
466 * returns !0 if a signal is pending and check_signal is true
467 */
468static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
469{ 598{
470 int r = 0; 599 int r = 1; /* Indicate we want to get back into the guest */
471 600
472 WARN_ON_ONCE(!irqs_disabled()); 601 if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
473 while (true) { 602 update_timer_ints(vcpu);
474 if (need_resched()) { 603#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
475 local_irq_enable(); 604 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
476 cond_resched(); 605 kvmppc_core_flush_tlb(vcpu);
477 local_irq_disable(); 606#endif
478 continue;
479 }
480
481 if (signal_pending(current)) {
482 r = 1;
483 break;
484 }
485
486 if (kvmppc_core_prepare_to_enter(vcpu)) {
487 /* interrupts got enabled in between, so we
488 are back at square 1 */
489 continue;
490 }
491 607
492 break; 608 if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
609 vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
610 r = 0;
493 } 611 }
494 612
495 return r; 613 return r;
@@ -497,7 +615,7 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
497 615
498int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 616int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
499{ 617{
500 int ret; 618 int ret, s;
501#ifdef CONFIG_PPC_FPU 619#ifdef CONFIG_PPC_FPU
502 unsigned int fpscr; 620 unsigned int fpscr;
503 int fpexc_mode; 621 int fpexc_mode;
@@ -510,11 +628,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
510 } 628 }
511 629
512 local_irq_disable(); 630 local_irq_disable();
513 if (kvmppc_prepare_to_enter(vcpu)) { 631 s = kvmppc_prepare_to_enter(vcpu);
514 kvm_run->exit_reason = KVM_EXIT_INTR; 632 if (s <= 0) {
515 ret = -EINTR; 633 local_irq_enable();
634 ret = s;
516 goto out; 635 goto out;
517 } 636 }
637 kvmppc_lazy_ee_enable();
518 638
519 kvm_guest_enter(); 639 kvm_guest_enter();
520 640
@@ -542,6 +662,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
542 662
543 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 663 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
544 664
665 /* No need for kvm_guest_exit. It's done in handle_exit.
666 We also get here with interrupts enabled. */
667
545#ifdef CONFIG_PPC_FPU 668#ifdef CONFIG_PPC_FPU
546 kvmppc_save_guest_fp(vcpu); 669 kvmppc_save_guest_fp(vcpu);
547 670
@@ -557,10 +680,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
557 current->thread.fpexc_mode = fpexc_mode; 680 current->thread.fpexc_mode = fpexc_mode;
558#endif 681#endif
559 682
560 kvm_guest_exit();
561
562out: 683out:
563 local_irq_enable(); 684 vcpu->mode = OUTSIDE_GUEST_MODE;
564 return ret; 685 return ret;
565} 686}
566 687
@@ -668,6 +789,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
668 unsigned int exit_nr) 789 unsigned int exit_nr)
669{ 790{
670 int r = RESUME_HOST; 791 int r = RESUME_HOST;
792 int s;
671 793
672 /* update before a new last_exit_type is rewritten */ 794 /* update before a new last_exit_type is rewritten */
673 kvmppc_update_timing_stats(vcpu); 795 kvmppc_update_timing_stats(vcpu);
@@ -677,6 +799,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
677 799
678 local_irq_enable(); 800 local_irq_enable();
679 801
802 trace_kvm_exit(exit_nr, vcpu);
803 kvm_guest_exit();
804
680 run->exit_reason = KVM_EXIT_UNKNOWN; 805 run->exit_reason = KVM_EXIT_UNKNOWN;
681 run->ready_for_interrupt_injection = 1; 806 run->ready_for_interrupt_injection = 1;
682 807
@@ -971,10 +1096,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
971 */ 1096 */
972 if (!(r & RESUME_HOST)) { 1097 if (!(r & RESUME_HOST)) {
973 local_irq_disable(); 1098 local_irq_disable();
974 if (kvmppc_prepare_to_enter(vcpu)) { 1099 s = kvmppc_prepare_to_enter(vcpu);
975 run->exit_reason = KVM_EXIT_INTR; 1100 if (s <= 0) {
976 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 1101 local_irq_enable();
977 kvmppc_account_exit(vcpu, SIGNAL_EXITS); 1102 r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
1103 } else {
1104 kvmppc_lazy_ee_enable();
978 } 1105 }
979 } 1106 }
980 1107
@@ -1011,6 +1138,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1011 return r; 1138 return r;
1012} 1139}
1013 1140
1141int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
1142{
1143 /* setup watchdog timer once */
1144 spin_lock_init(&vcpu->arch.wdt_lock);
1145 setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
1146 (unsigned long)vcpu);
1147
1148 return 0;
1149}
1150
1151void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
1152{
1153 del_timer_sync(&vcpu->arch.wdt_timer);
1154}
1155
1014int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1156int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1015{ 1157{
1016 int i; 1158 int i;
@@ -1106,7 +1248,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
1106 } 1248 }
1107 1249
1108 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { 1250 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
1251 u32 old_tsr = vcpu->arch.tsr;
1252
1109 vcpu->arch.tsr = sregs->u.e.tsr; 1253 vcpu->arch.tsr = sregs->u.e.tsr;
1254
1255 if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
1256 arm_next_watchdog(vcpu);
1257
1110 update_timer_ints(vcpu); 1258 update_timer_ints(vcpu);
1111 } 1259 }
1112 1260
@@ -1221,12 +1369,70 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1221 1369
1222int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1370int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1223{ 1371{
1224 return -EINVAL; 1372 int r = -EINVAL;
1373
1374 switch (reg->id) {
1375 case KVM_REG_PPC_IAC1:
1376 case KVM_REG_PPC_IAC2:
1377 case KVM_REG_PPC_IAC3:
1378 case KVM_REG_PPC_IAC4: {
1379 int iac = reg->id - KVM_REG_PPC_IAC1;
1380 r = copy_to_user((u64 __user *)(long)reg->addr,
1381 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
1382 break;
1383 }
1384 case KVM_REG_PPC_DAC1:
1385 case KVM_REG_PPC_DAC2: {
1386 int dac = reg->id - KVM_REG_PPC_DAC1;
1387 r = copy_to_user((u64 __user *)(long)reg->addr,
1388 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
1389 break;
1390 }
1391#if defined(CONFIG_64BIT)
1392 case KVM_REG_PPC_EPCR:
1393 r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
1394 break;
1395#endif
1396 default:
1397 break;
1398 }
1399 return r;
1225} 1400}
1226 1401
1227int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1402int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1228{ 1403{
1229 return -EINVAL; 1404 int r = -EINVAL;
1405
1406 switch (reg->id) {
1407 case KVM_REG_PPC_IAC1:
1408 case KVM_REG_PPC_IAC2:
1409 case KVM_REG_PPC_IAC3:
1410 case KVM_REG_PPC_IAC4: {
1411 int iac = reg->id - KVM_REG_PPC_IAC1;
1412 r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
1413 (u64 __user *)(long)reg->addr, sizeof(u64));
1414 break;
1415 }
1416 case KVM_REG_PPC_DAC1:
1417 case KVM_REG_PPC_DAC2: {
1418 int dac = reg->id - KVM_REG_PPC_DAC1;
1419 r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
1420 (u64 __user *)(long)reg->addr, sizeof(u64));
1421 break;
1422 }
1423#if defined(CONFIG_64BIT)
1424 case KVM_REG_PPC_EPCR: {
1425 u32 new_epcr;
1426 r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
1427 if (r == 0)
1428 kvmppc_set_epcr(vcpu, new_epcr);
1429 break;
1430 }
1431#endif
1432 default:
1433 break;
1434 }
1435 return r;
1230} 1436}
1231 1437
1232int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1438int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
@@ -1253,20 +1459,50 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1253 return -ENOTSUPP; 1459 return -ENOTSUPP;
1254} 1460}
1255 1461
1462void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1463 struct kvm_memory_slot *dont)
1464{
1465}
1466
1467int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1468 unsigned long npages)
1469{
1470 return 0;
1471}
1472
1256int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1473int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1474 struct kvm_memory_slot *memslot,
1257 struct kvm_userspace_memory_region *mem) 1475 struct kvm_userspace_memory_region *mem)
1258{ 1476{
1259 return 0; 1477 return 0;
1260} 1478}
1261 1479
1262void kvmppc_core_commit_memory_region(struct kvm *kvm, 1480void kvmppc_core_commit_memory_region(struct kvm *kvm,
1263 struct kvm_userspace_memory_region *mem) 1481 struct kvm_userspace_memory_region *mem,
1482 struct kvm_memory_slot old)
1483{
1484}
1485
1486void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
1487{
1488}
1489
1490void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
1264{ 1491{
1492#if defined(CONFIG_64BIT)
1493 vcpu->arch.epcr = new_epcr;
1494#ifdef CONFIG_KVM_BOOKE_HV
1495 vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
1496 if (vcpu->arch.epcr & SPRN_EPCR_ICM)
1497 vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
1498#endif
1499#endif
1265} 1500}
1266 1501
1267void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) 1502void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
1268{ 1503{
1269 vcpu->arch.tcr = new_tcr; 1504 vcpu->arch.tcr = new_tcr;
1505 arm_next_watchdog(vcpu);
1270 update_timer_ints(vcpu); 1506 update_timer_ints(vcpu);
1271} 1507}
1272 1508
@@ -1281,6 +1517,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
1281void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) 1517void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
1282{ 1518{
1283 clear_bits(tsr_bits, &vcpu->arch.tsr); 1519 clear_bits(tsr_bits, &vcpu->arch.tsr);
1520
1521 /*
1522 * We may have stopped the watchdog due to
1523 * being stuck on final expiration.
1524 */
1525 if (tsr_bits & (TSR_ENW | TSR_WIS))
1526 arm_next_watchdog(vcpu);
1527
1284 update_timer_ints(vcpu); 1528 update_timer_ints(vcpu);
1285} 1529}
1286 1530
@@ -1298,12 +1542,14 @@ void kvmppc_decrementer_func(unsigned long data)
1298 1542
1299void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1543void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1300{ 1544{
1545 vcpu->cpu = smp_processor_id();
1301 current->thread.kvm_vcpu = vcpu; 1546 current->thread.kvm_vcpu = vcpu;
1302} 1547}
1303 1548
1304void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) 1549void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
1305{ 1550{
1306 current->thread.kvm_vcpu = NULL; 1551 current->thread.kvm_vcpu = NULL;
1552 vcpu->cpu = -1;
1307} 1553}
1308 1554
1309int __init kvmppc_booke_init(void) 1555int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index ba61974c1e20..e9b88e433f64 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -69,6 +69,7 @@ extern unsigned long kvmppc_booke_handlers;
69void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); 69void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
70void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); 70void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
71 71
72void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
72void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); 73void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
73void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); 74void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
74void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); 75void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb608ab..4685b8cf2249 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
133 vcpu->arch.csrr1 = spr_val; 133 vcpu->arch.csrr1 = spr_val;
134 break; 134 break;
135 case SPRN_DBCR0: 135 case SPRN_DBCR0:
136 vcpu->arch.dbcr0 = spr_val; 136 vcpu->arch.dbg_reg.dbcr0 = spr_val;
137 break; 137 break;
138 case SPRN_DBCR1: 138 case SPRN_DBCR1:
139 vcpu->arch.dbcr1 = spr_val; 139 vcpu->arch.dbg_reg.dbcr1 = spr_val;
140 break; 140 break;
141 case SPRN_DBSR: 141 case SPRN_DBSR:
142 vcpu->arch.dbsr &= ~spr_val; 142 vcpu->arch.dbsr &= ~spr_val;
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
145 kvmppc_clr_tsr_bits(vcpu, spr_val); 145 kvmppc_clr_tsr_bits(vcpu, spr_val);
146 break; 146 break;
147 case SPRN_TCR: 147 case SPRN_TCR:
148 /*
149 * WRC is a 2-bit field that is supposed to preserve its
150 * value once written to non-zero.
151 */
152 if (vcpu->arch.tcr & TCR_WRC_MASK) {
153 spr_val &= ~TCR_WRC_MASK;
154 spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
155 }
148 kvmppc_set_tcr(vcpu, spr_val); 156 kvmppc_set_tcr(vcpu, spr_val);
149 break; 157 break;
150 158
@@ -229,7 +237,17 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
229 case SPRN_IVOR15: 237 case SPRN_IVOR15:
230 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; 238 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
231 break; 239 break;
232 240 case SPRN_MCSR:
241 vcpu->arch.mcsr &= ~spr_val;
242 break;
243#if defined(CONFIG_64BIT)
244 case SPRN_EPCR:
245 kvmppc_set_epcr(vcpu, spr_val);
246#ifdef CONFIG_KVM_BOOKE_HV
247 mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
248#endif
249 break;
250#endif
233 default: 251 default:
234 emulated = EMULATE_FAIL; 252 emulated = EMULATE_FAIL;
235 } 253 }
@@ -258,10 +276,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
258 *spr_val = vcpu->arch.csrr1; 276 *spr_val = vcpu->arch.csrr1;
259 break; 277 break;
260 case SPRN_DBCR0: 278 case SPRN_DBCR0:
261 *spr_val = vcpu->arch.dbcr0; 279 *spr_val = vcpu->arch.dbg_reg.dbcr0;
262 break; 280 break;
263 case SPRN_DBCR1: 281 case SPRN_DBCR1:
264 *spr_val = vcpu->arch.dbcr1; 282 *spr_val = vcpu->arch.dbg_reg.dbcr1;
265 break; 283 break;
266 case SPRN_DBSR: 284 case SPRN_DBSR:
267 *spr_val = vcpu->arch.dbsr; 285 *spr_val = vcpu->arch.dbsr;
@@ -321,6 +339,14 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
321 case SPRN_IVOR15: 339 case SPRN_IVOR15:
322 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 340 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
323 break; 341 break;
342 case SPRN_MCSR:
343 *spr_val = vcpu->arch.mcsr;
344 break;
345#if defined(CONFIG_64BIT)
346 case SPRN_EPCR:
347 *spr_val = vcpu->arch.epcr;
348 break;
349#endif
324 350
325 default: 351 default:
326 emulated = EMULATE_FAIL; 352 emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 099fe8272b57..e8ed7d659c55 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -16,6 +16,7 @@
16 * 16 *
17 * Author: Varun Sethi <varun.sethi@freescale.com> 17 * Author: Varun Sethi <varun.sethi@freescale.com>
18 * Author: Scott Wood <scotwood@freescale.com> 18 * Author: Scott Wood <scotwood@freescale.com>
19 * Author: Mihai Caraman <mihai.caraman@freescale.com>
19 * 20 *
20 * This file is derived from arch/powerpc/kvm/booke_interrupts.S 21 * This file is derived from arch/powerpc/kvm/booke_interrupts.S
21 */ 22 */
@@ -30,31 +31,33 @@
30#include <asm/bitsperlong.h> 31#include <asm/bitsperlong.h>
31#include <asm/thread_info.h> 32#include <asm/thread_info.h>
32 33
34#ifdef CONFIG_64BIT
35#include <asm/exception-64e.h>
36#else
33#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ 37#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
34 38#endif
35#define GET_VCPU(vcpu, thread) \
36 PPC_LL vcpu, THREAD_KVM_VCPU(thread)
37 39
38#define LONGBYTES (BITS_PER_LONG / 8) 40#define LONGBYTES (BITS_PER_LONG / 8)
39 41
40#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES)) 42#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
41 43
42/* The host stack layout: */ 44/* The host stack layout: */
43#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */ 45#define HOST_R1 0 /* Implied by stwu. */
44#define HOST_CALLEE_LR (1 * LONGBYTES) 46#define HOST_CALLEE_LR PPC_LR_STKOFF
45#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */ 47#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES)
46/* 48/*
47 * r2 is special: it holds 'current', and it made nonvolatile in the 49 * r2 is special: it holds 'current', and it made nonvolatile in the
48 * kernel with the -ffixed-r2 gcc option. 50 * kernel with the -ffixed-r2 gcc option.
49 */ 51 */
50#define HOST_R2 (3 * LONGBYTES) 52#define HOST_R2 (HOST_RUN + LONGBYTES)
51#define HOST_CR (4 * LONGBYTES) 53#define HOST_CR (HOST_R2 + LONGBYTES)
52#define HOST_NV_GPRS (5 * LONGBYTES) 54#define HOST_NV_GPRS (HOST_CR + LONGBYTES)
53#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES)) 55#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
54#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n) 56#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
55#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES) 57#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
56#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */ 58#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
57#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */ 59/* LR in caller stack frame. */
60#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF)
58 61
59#define NEED_EMU 0x00000001 /* emulation -- save nv regs */ 62#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
60#define NEED_DEAR 0x00000002 /* save faulting DEAR */ 63#define NEED_DEAR 0x00000002 /* save faulting DEAR */
@@ -201,12 +204,128 @@
201 b kvmppc_resume_host 204 b kvmppc_resume_host
202.endm 205.endm
203 206
207#ifdef CONFIG_64BIT
208/* Exception types */
209#define EX_GEN 1
210#define EX_GDBELL 2
211#define EX_DBG 3
212#define EX_MC 4
213#define EX_CRIT 5
214#define EX_TLB 6
215
216/*
217 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
218 */
219.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
220 _GLOBAL(kvmppc_handler_\intno\()_\srr1)
221 mr r11, r4
222 /*
223 * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
224 */
225 PPC_LL r4, PACACURRENT(r13)
226 PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
227 stw r10, VCPU_CR(r4)
228 PPC_STL r11, VCPU_GPR(R4)(r4)
229 PPC_STL r5, VCPU_GPR(R5)(r4)
230 .if \type == EX_CRIT
231 PPC_LL r5, (\paca_ex + EX_R13)(r13)
232 .else
233 mfspr r5, \scratch
234 .endif
235 PPC_STL r6, VCPU_GPR(R6)(r4)
236 PPC_STL r8, VCPU_GPR(R8)(r4)
237 PPC_STL r9, VCPU_GPR(R9)(r4)
238 PPC_STL r5, VCPU_GPR(R13)(r4)
239 PPC_LL r6, (\paca_ex + \ex_r10)(r13)
240 PPC_LL r8, (\paca_ex + \ex_r11)(r13)
241 PPC_STL r3, VCPU_GPR(R3)(r4)
242 PPC_STL r7, VCPU_GPR(R7)(r4)
243 PPC_STL r12, VCPU_GPR(R12)(r4)
244 PPC_STL r6, VCPU_GPR(R10)(r4)
245 PPC_STL r8, VCPU_GPR(R11)(r4)
246 mfctr r5
247 PPC_STL r5, VCPU_CTR(r4)
248 mfspr r5, \srr0
249 mfspr r6, \srr1
250 kvm_handler_common \intno, \srr0, \flags
251.endm
252
253#define EX_PARAMS(type) \
254 EX_##type, \
255 SPRN_SPRG_##type##_SCRATCH, \
256 PACA_EX##type, \
257 EX_R10, \
258 EX_R11
259
260#define EX_PARAMS_TLB \
261 EX_TLB, \
262 SPRN_SPRG_GEN_SCRATCH, \
263 PACA_EXTLB, \
264 EX_TLB_R10, \
265 EX_TLB_R11
266
267kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
268 SPRN_CSRR0, SPRN_CSRR1, 0
269kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
270 SPRN_MCSRR0, SPRN_MCSRR1, 0
271kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
272 SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
273kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
274 SPRN_SRR0, SPRN_SRR1, NEED_ESR
275kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
276 SPRN_SRR0, SPRN_SRR1, 0
277kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
278 SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
279kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
280 SPRN_SRR0, SPRN_SRR1,NEED_ESR
281kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
282 SPRN_SRR0, SPRN_SRR1, 0
283kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
284 SPRN_SRR0, SPRN_SRR1, 0
285kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
286 SPRN_SRR0, SPRN_SRR1, 0
287kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
288 SPRN_SRR0, SPRN_SRR1, 0
289kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
290 SPRN_CSRR0, SPRN_CSRR1, 0
291/*
292 * Only bolted TLB miss exception handlers are supported for now
293 */
294kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
295 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
296kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
297 SPRN_SRR0, SPRN_SRR1, 0
298kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
299 SPRN_SRR0, SPRN_SRR1, 0
300kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
301 SPRN_SRR0, SPRN_SRR1, 0
302kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
303 SPRN_SRR0, SPRN_SRR1, 0
304kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
305 SPRN_SRR0, SPRN_SRR1, 0
306kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
307 SPRN_SRR0, SPRN_SRR1, 0
308kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
309 SPRN_CSRR0, SPRN_CSRR1, 0
310kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
311 SPRN_SRR0, SPRN_SRR1, NEED_EMU
312kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
313 SPRN_SRR0, SPRN_SRR1, 0
314kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
315 SPRN_GSRR0, SPRN_GSRR1, 0
316kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
317 SPRN_CSRR0, SPRN_CSRR1, 0
318kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
319 SPRN_DSRR0, SPRN_DSRR1, 0
320kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
321 SPRN_CSRR0, SPRN_CSRR1, 0
322#else
204/* 323/*
205 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h 324 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
206 */ 325 */
207.macro kvm_handler intno srr0, srr1, flags 326.macro kvm_handler intno srr0, srr1, flags
208_GLOBAL(kvmppc_handler_\intno\()_\srr1) 327_GLOBAL(kvmppc_handler_\intno\()_\srr1)
209 GET_VCPU(r11, r10) 328 PPC_LL r11, THREAD_KVM_VCPU(r10)
210 PPC_STL r3, VCPU_GPR(R3)(r11) 329 PPC_STL r3, VCPU_GPR(R3)(r11)
211 mfspr r3, SPRN_SPRG_RSCRATCH0 330 mfspr r3, SPRN_SPRG_RSCRATCH0
212 PPC_STL r4, VCPU_GPR(R4)(r11) 331 PPC_STL r4, VCPU_GPR(R4)(r11)
@@ -233,7 +352,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
233.macro kvm_lvl_handler intno scratch srr0, srr1, flags 352.macro kvm_lvl_handler intno scratch srr0, srr1, flags
234_GLOBAL(kvmppc_handler_\intno\()_\srr1) 353_GLOBAL(kvmppc_handler_\intno\()_\srr1)
235 mfspr r10, SPRN_SPRG_THREAD 354 mfspr r10, SPRN_SPRG_THREAD
236 GET_VCPU(r11, r10) 355 PPC_LL r11, THREAD_KVM_VCPU(r10)
237 PPC_STL r3, VCPU_GPR(R3)(r11) 356 PPC_STL r3, VCPU_GPR(R3)(r11)
238 mfspr r3, \scratch 357 mfspr r3, \scratch
239 PPC_STL r4, VCPU_GPR(R4)(r11) 358 PPC_STL r4, VCPU_GPR(R4)(r11)
@@ -295,7 +414,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
295 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 414 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
296kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ 415kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
297 SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0 416 SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
298 417#endif
299 418
300/* Registers: 419/* Registers:
301 * SPRG_SCRATCH0: guest r10 420 * SPRG_SCRATCH0: guest r10
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index aa8b81428bf4..c70d37ed770a 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -27,8 +27,7 @@
27#define E500_TLB_NUM 2 27#define E500_TLB_NUM 2
28 28
29#define E500_TLB_VALID 1 29#define E500_TLB_VALID 1
30#define E500_TLB_DIRTY 2 30#define E500_TLB_BITMAP 2
31#define E500_TLB_BITMAP 4
32 31
33struct tlbe_ref { 32struct tlbe_ref {
34 pfn_t pfn; 33 pfn_t pfn;
@@ -130,9 +129,9 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
130 ulong value); 129 ulong value);
131int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu); 130int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
132int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu); 131int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
133int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb); 132int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
134int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb); 133int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
135int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb); 134int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
136int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500); 135int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
137void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); 136void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
138 137
@@ -155,7 +154,7 @@ get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
155 154
156static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) 155static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
157{ 156{
158 return tlbe->mas2 & 0xfffff000; 157 return tlbe->mas2 & MAS2_EPN;
159} 158}
160 159
161static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) 160static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e04b0ef55ce0..e78f353a836a 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -89,6 +89,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
89 int ra = get_ra(inst); 89 int ra = get_ra(inst);
90 int rb = get_rb(inst); 90 int rb = get_rb(inst);
91 int rt = get_rt(inst); 91 int rt = get_rt(inst);
92 gva_t ea;
92 93
93 switch (get_op(inst)) { 94 switch (get_op(inst)) {
94 case 31: 95 case 31:
@@ -113,15 +114,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
113 break; 114 break;
114 115
115 case XOP_TLBSX: 116 case XOP_TLBSX:
116 emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); 117 ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
118 emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
117 break; 119 break;
118 120
119 case XOP_TLBILX: 121 case XOP_TLBILX: {
120 emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb); 122 int type = rt & 0x3;
123 ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
124 emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
121 break; 125 break;
126 }
122 127
123 case XOP_TLBIVAX: 128 case XOP_TLBIVAX:
124 emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); 129 ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
130 emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
125 break; 131 break;
126 132
127 default: 133 default:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index ff38b664195d..cf3f18012371 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -304,17 +304,13 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
304 ref->flags = E500_TLB_VALID; 304 ref->flags = E500_TLB_VALID;
305 305
306 if (tlbe_is_writable(gtlbe)) 306 if (tlbe_is_writable(gtlbe))
307 ref->flags |= E500_TLB_DIRTY; 307 kvm_set_pfn_dirty(pfn);
308} 308}
309 309
310static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) 310static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
311{ 311{
312 if (ref->flags & E500_TLB_VALID) { 312 if (ref->flags & E500_TLB_VALID) {
313 if (ref->flags & E500_TLB_DIRTY) 313 trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
314 kvm_release_pfn_dirty(ref->pfn);
315 else
316 kvm_release_pfn_clean(ref->pfn);
317
318 ref->flags = 0; 314 ref->flags = 0;
319 } 315 }
320} 316}
@@ -357,6 +353,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
357 clear_tlb_privs(vcpu_e500); 353 clear_tlb_privs(vcpu_e500);
358} 354}
359 355
356void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
357{
358 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
359 clear_tlb_refs(vcpu_e500);
360 clear_tlb1_bitmap(vcpu_e500);
361}
362
360static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, 363static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
361 unsigned int eaddr, int as) 364 unsigned int eaddr, int as)
362{ 365{
@@ -412,7 +415,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
412 struct tlbe_ref *ref) 415 struct tlbe_ref *ref)
413{ 416{
414 struct kvm_memory_slot *slot; 417 struct kvm_memory_slot *slot;
415 unsigned long pfn, hva; 418 unsigned long pfn = 0; /* silence GCC warning */
419 unsigned long hva;
416 int pfnmap = 0; 420 int pfnmap = 0;
417 int tsize = BOOK3E_PAGESZ_4K; 421 int tsize = BOOK3E_PAGESZ_4K;
418 422
@@ -521,7 +525,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
521 if (likely(!pfnmap)) { 525 if (likely(!pfnmap)) {
522 unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); 526 unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
523 pfn = gfn_to_pfn_memslot(slot, gfn); 527 pfn = gfn_to_pfn_memslot(slot, gfn);
524 if (is_error_pfn(pfn)) { 528 if (is_error_noslot_pfn(pfn)) {
525 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", 529 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
526 (long)gfn); 530 (long)gfn);
527 return; 531 return;
@@ -541,6 +545,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
541 545
542 /* Clear i-cache for new pages */ 546 /* Clear i-cache for new pages */
543 kvmppc_mmu_flush_icache(pfn); 547 kvmppc_mmu_flush_icache(pfn);
548
549 /* Drop refcount on page, so that mmu notifiers can clear it */
550 kvm_release_pfn_clean(pfn);
544} 551}
545 552
546/* XXX only map the one-one case, for now use TLB0 */ 553/* XXX only map the one-one case, for now use TLB0 */
@@ -682,14 +689,11 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
682 return EMULATE_DONE; 689 return EMULATE_DONE;
683} 690}
684 691
685int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) 692int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
686{ 693{
687 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 694 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
688 unsigned int ia; 695 unsigned int ia;
689 int esel, tlbsel; 696 int esel, tlbsel;
690 gva_t ea;
691
692 ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
693 697
694 ia = (ea >> 2) & 0x1; 698 ia = (ea >> 2) & 0x1;
695 699
@@ -716,7 +720,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
716} 720}
717 721
718static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, 722static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
719 int pid, int rt) 723 int pid, int type)
720{ 724{
721 struct kvm_book3e_206_tlb_entry *tlbe; 725 struct kvm_book3e_206_tlb_entry *tlbe;
722 int tid, esel; 726 int tid, esel;
@@ -725,7 +729,7 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
725 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) { 729 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
726 tlbe = get_entry(vcpu_e500, tlbsel, esel); 730 tlbe = get_entry(vcpu_e500, tlbsel, esel);
727 tid = get_tlb_tid(tlbe); 731 tid = get_tlb_tid(tlbe);
728 if (rt == 0 || tid == pid) { 732 if (type == 0 || tid == pid) {
729 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); 733 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
730 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); 734 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
731 } 735 }
@@ -733,14 +737,9 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
733} 737}
734 738
735static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, 739static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
736 int ra, int rb) 740 gva_t ea)
737{ 741{
738 int tlbsel, esel; 742 int tlbsel, esel;
739 gva_t ea;
740
741 ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
742 if (ra)
743 ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
744 743
745 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 744 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
746 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1); 745 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
@@ -752,16 +751,16 @@ static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
752 } 751 }
753} 752}
754 753
755int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb) 754int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
756{ 755{
757 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 756 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
758 int pid = get_cur_spid(vcpu); 757 int pid = get_cur_spid(vcpu);
759 758
760 if (rt == 0 || rt == 1) { 759 if (type == 0 || type == 1) {
761 tlbilx_all(vcpu_e500, 0, pid, rt); 760 tlbilx_all(vcpu_e500, 0, pid, type);
762 tlbilx_all(vcpu_e500, 1, pid, rt); 761 tlbilx_all(vcpu_e500, 1, pid, type);
763 } else if (rt == 3) { 762 } else if (type == 3) {
764 tlbilx_one(vcpu_e500, pid, ra, rb); 763 tlbilx_one(vcpu_e500, pid, ea);
765 } 764 }
766 765
767 return EMULATE_DONE; 766 return EMULATE_DONE;
@@ -786,16 +785,13 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
786 return EMULATE_DONE; 785 return EMULATE_DONE;
787} 786}
788 787
789int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) 788int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
790{ 789{
791 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 790 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
792 int as = !!get_cur_sas(vcpu); 791 int as = !!get_cur_sas(vcpu);
793 unsigned int pid = get_cur_spid(vcpu); 792 unsigned int pid = get_cur_spid(vcpu);
794 int esel, tlbsel; 793 int esel, tlbsel;
795 struct kvm_book3e_206_tlb_entry *gtlbe = NULL; 794 struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
796 gva_t ea;
797
798 ea = kvmppc_get_gpr(vcpu, rb);
799 795
800 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 796 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
801 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); 797 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@@ -875,6 +871,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
875 871
876 gtlbe->mas1 = vcpu->arch.shared->mas1; 872 gtlbe->mas1 = vcpu->arch.shared->mas1;
877 gtlbe->mas2 = vcpu->arch.shared->mas2; 873 gtlbe->mas2 = vcpu->arch.shared->mas2;
874 if (!(vcpu->arch.shared->msr & MSR_CM))
875 gtlbe->mas2 &= 0xffffffffUL;
878 gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; 876 gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
879 877
880 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, 878 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
@@ -1039,8 +1037,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1039 sesel = 0; /* unused */ 1037 sesel = 0; /* unused */
1040 priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; 1038 priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
1041 1039
1042 kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, 1040 /* Only triggers after clear_tlb_refs */
1043 &priv->ref, eaddr, &stlbe); 1041 if (unlikely(!(priv->ref.flags & E500_TLB_VALID)))
1042 kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
1043 else
1044 kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
1045 &priv->ref, eaddr, &stlbe);
1044 break; 1046 break;
1045 1047
1046 case 1: { 1048 case 1: {
@@ -1060,6 +1062,49 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1060 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); 1062 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
1061} 1063}
1062 1064
1065/************* MMU Notifiers *************/
1066
1067int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1068{
1069 trace_kvm_unmap_hva(hva);
1070
1071 /*
1072 * Flush all shadow tlb entries everywhere. This is slow, but
1073 * we are 100% sure that we catch the to be unmapped page
1074 */
1075 kvm_flush_remote_tlbs(kvm);
1076
1077 return 0;
1078}
1079
1080int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
1081{
1082 /* kvm_unmap_hva flushes everything anyways */
1083 kvm_unmap_hva(kvm, start);
1084
1085 return 0;
1086}
1087
1088int kvm_age_hva(struct kvm *kvm, unsigned long hva)
1089{
1090 /* XXX could be more clever ;) */
1091 return 0;
1092}
1093
1094int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1095{
1096 /* XXX could be more clever ;) */
1097 return 0;
1098}
1099
1100void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1101{
1102 /* The page will get remapped properly on its next fault */
1103 kvm_unmap_hva(kvm, hva);
1104}
1105
1106/*****************************************/
1107
1063static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) 1108static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
1064{ 1109{
1065 int i; 1110 int i;
@@ -1081,6 +1126,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
1081 } 1126 }
1082 1127
1083 vcpu_e500->num_shared_tlb_pages = 0; 1128 vcpu_e500->num_shared_tlb_pages = 0;
1129
1130 kfree(vcpu_e500->shared_tlb_pages);
1084 vcpu_e500->shared_tlb_pages = NULL; 1131 vcpu_e500->shared_tlb_pages = NULL;
1085 } else { 1132 } else {
1086 kfree(vcpu_e500->gtlb_arch); 1133 kfree(vcpu_e500->gtlb_arch);
@@ -1178,21 +1225,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1178 } 1225 }
1179 1226
1180 virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); 1227 virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
1181 if (!virt) 1228 if (!virt) {
1229 ret = -ENOMEM;
1182 goto err_put_page; 1230 goto err_put_page;
1231 }
1183 1232
1184 privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], 1233 privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
1185 GFP_KERNEL); 1234 GFP_KERNEL);
1186 privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], 1235 privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
1187 GFP_KERNEL); 1236 GFP_KERNEL);
1188 1237
1189 if (!privs[0] || !privs[1]) 1238 if (!privs[0] || !privs[1]) {
1190 goto err_put_page; 1239 ret = -ENOMEM;
1240 goto err_privs;
1241 }
1191 1242
1192 g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], 1243 g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
1193 GFP_KERNEL); 1244 GFP_KERNEL);
1194 if (!g2h_bitmap) 1245 if (!g2h_bitmap) {
1195 goto err_put_page; 1246 ret = -ENOMEM;
1247 goto err_privs;
1248 }
1196 1249
1197 free_gtlb(vcpu_e500); 1250 free_gtlb(vcpu_e500);
1198 1251
@@ -1232,10 +1285,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1232 kvmppc_recalc_tlb1map_range(vcpu_e500); 1285 kvmppc_recalc_tlb1map_range(vcpu_e500);
1233 return 0; 1286 return 0;
1234 1287
1235err_put_page: 1288err_privs:
1236 kfree(privs[0]); 1289 kfree(privs[0]);
1237 kfree(privs[1]); 1290 kfree(privs[1]);
1238 1291
1292err_put_page:
1239 for (i = 0; i < num_pages; i++) 1293 for (i = 0; i < num_pages; i++)
1240 put_page(pages[i]); 1294 put_page(pages[i]);
1241 1295
@@ -1332,7 +1386,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
1332 if (!vcpu_e500->gtlb_priv[1]) 1386 if (!vcpu_e500->gtlb_priv[1])
1333 goto err; 1387 goto err;
1334 1388
1335 vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) * 1389 vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
1336 vcpu_e500->gtlb_params[1].entries, 1390 vcpu_e500->gtlb_params[1].entries,
1337 GFP_KERNEL); 1391 GFP_KERNEL);
1338 if (!vcpu_e500->g2h_tlb1_map) 1392 if (!vcpu_e500->g2h_tlb1_map)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index ee04abaefe23..b0855e5d8905 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -131,6 +131,125 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
131 return vcpu->arch.dec - jd; 131 return vcpu->arch.dec - jd;
132} 132}
133 133
134static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
135{
136 enum emulation_result emulated = EMULATE_DONE;
137 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
138
139 switch (sprn) {
140 case SPRN_SRR0:
141 vcpu->arch.shared->srr0 = spr_val;
142 break;
143 case SPRN_SRR1:
144 vcpu->arch.shared->srr1 = spr_val;
145 break;
146
147 /* XXX We need to context-switch the timebase for
148 * watchdog and FIT. */
149 case SPRN_TBWL: break;
150 case SPRN_TBWU: break;
151
152 case SPRN_MSSSR0: break;
153
154 case SPRN_DEC:
155 vcpu->arch.dec = spr_val;
156 kvmppc_emulate_dec(vcpu);
157 break;
158
159 case SPRN_SPRG0:
160 vcpu->arch.shared->sprg0 = spr_val;
161 break;
162 case SPRN_SPRG1:
163 vcpu->arch.shared->sprg1 = spr_val;
164 break;
165 case SPRN_SPRG2:
166 vcpu->arch.shared->sprg2 = spr_val;
167 break;
168 case SPRN_SPRG3:
169 vcpu->arch.shared->sprg3 = spr_val;
170 break;
171
172 default:
173 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
174 spr_val);
175 if (emulated == EMULATE_FAIL)
176 printk(KERN_INFO "mtspr: unknown spr "
177 "0x%x\n", sprn);
178 break;
179 }
180
181 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
182
183 return emulated;
184}
185
186static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
187{
188 enum emulation_result emulated = EMULATE_DONE;
189 ulong spr_val = 0;
190
191 switch (sprn) {
192 case SPRN_SRR0:
193 spr_val = vcpu->arch.shared->srr0;
194 break;
195 case SPRN_SRR1:
196 spr_val = vcpu->arch.shared->srr1;
197 break;
198 case SPRN_PVR:
199 spr_val = vcpu->arch.pvr;
200 break;
201 case SPRN_PIR:
202 spr_val = vcpu->vcpu_id;
203 break;
204 case SPRN_MSSSR0:
205 spr_val = 0;
206 break;
207
208 /* Note: mftb and TBRL/TBWL are user-accessible, so
209 * the guest can always access the real TB anyways.
210 * In fact, we probably will never see these traps. */
211 case SPRN_TBWL:
212 spr_val = get_tb() >> 32;
213 break;
214 case SPRN_TBWU:
215 spr_val = get_tb();
216 break;
217
218 case SPRN_SPRG0:
219 spr_val = vcpu->arch.shared->sprg0;
220 break;
221 case SPRN_SPRG1:
222 spr_val = vcpu->arch.shared->sprg1;
223 break;
224 case SPRN_SPRG2:
225 spr_val = vcpu->arch.shared->sprg2;
226 break;
227 case SPRN_SPRG3:
228 spr_val = vcpu->arch.shared->sprg3;
229 break;
230 /* Note: SPRG4-7 are user-readable, so we don't get
231 * a trap. */
232
233 case SPRN_DEC:
234 spr_val = kvmppc_get_dec(vcpu, get_tb());
235 break;
236 default:
237 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
238 &spr_val);
239 if (unlikely(emulated == EMULATE_FAIL)) {
240 printk(KERN_INFO "mfspr: unknown spr "
241 "0x%x\n", sprn);
242 }
243 break;
244 }
245
246 if (emulated == EMULATE_DONE)
247 kvmppc_set_gpr(vcpu, rt, spr_val);
248 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
249
250 return emulated;
251}
252
134/* XXX to do: 253/* XXX to do:
135 * lhax 254 * lhax
136 * lhaux 255 * lhaux
@@ -156,7 +275,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
156 int sprn = get_sprn(inst); 275 int sprn = get_sprn(inst);
157 enum emulation_result emulated = EMULATE_DONE; 276 enum emulation_result emulated = EMULATE_DONE;
158 int advance = 1; 277 int advance = 1;
159 ulong spr_val = 0;
160 278
161 /* this default type might be overwritten by subcategories */ 279 /* this default type might be overwritten by subcategories */
162 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); 280 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -236,62 +354,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
236 break; 354 break;
237 355
238 case OP_31_XOP_MFSPR: 356 case OP_31_XOP_MFSPR:
239 switch (sprn) { 357 emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
240 case SPRN_SRR0:
241 spr_val = vcpu->arch.shared->srr0;
242 break;
243 case SPRN_SRR1:
244 spr_val = vcpu->arch.shared->srr1;
245 break;
246 case SPRN_PVR:
247 spr_val = vcpu->arch.pvr;
248 break;
249 case SPRN_PIR:
250 spr_val = vcpu->vcpu_id;
251 break;
252 case SPRN_MSSSR0:
253 spr_val = 0;
254 break;
255
256 /* Note: mftb and TBRL/TBWL are user-accessible, so
257 * the guest can always access the real TB anyways.
258 * In fact, we probably will never see these traps. */
259 case SPRN_TBWL:
260 spr_val = get_tb() >> 32;
261 break;
262 case SPRN_TBWU:
263 spr_val = get_tb();
264 break;
265
266 case SPRN_SPRG0:
267 spr_val = vcpu->arch.shared->sprg0;
268 break;
269 case SPRN_SPRG1:
270 spr_val = vcpu->arch.shared->sprg1;
271 break;
272 case SPRN_SPRG2:
273 spr_val = vcpu->arch.shared->sprg2;
274 break;
275 case SPRN_SPRG3:
276 spr_val = vcpu->arch.shared->sprg3;
277 break;
278 /* Note: SPRG4-7 are user-readable, so we don't get
279 * a trap. */
280
281 case SPRN_DEC:
282 spr_val = kvmppc_get_dec(vcpu, get_tb());
283 break;
284 default:
285 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
286 &spr_val);
287 if (unlikely(emulated == EMULATE_FAIL)) {
288 printk(KERN_INFO "mfspr: unknown spr "
289 "0x%x\n", sprn);
290 }
291 break;
292 }
293 kvmppc_set_gpr(vcpu, rt, spr_val);
294 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
295 break; 358 break;
296 359
297 case OP_31_XOP_STHX: 360 case OP_31_XOP_STHX:
@@ -308,49 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
308 break; 371 break;
309 372
310 case OP_31_XOP_MTSPR: 373 case OP_31_XOP_MTSPR:
311 spr_val = kvmppc_get_gpr(vcpu, rs); 374 emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
312 switch (sprn) {
313 case SPRN_SRR0:
314 vcpu->arch.shared->srr0 = spr_val;
315 break;
316 case SPRN_SRR1:
317 vcpu->arch.shared->srr1 = spr_val;
318 break;
319
320 /* XXX We need to context-switch the timebase for
321 * watchdog and FIT. */
322 case SPRN_TBWL: break;
323 case SPRN_TBWU: break;
324
325 case SPRN_MSSSR0: break;
326
327 case SPRN_DEC:
328 vcpu->arch.dec = spr_val;
329 kvmppc_emulate_dec(vcpu);
330 break;
331
332 case SPRN_SPRG0:
333 vcpu->arch.shared->sprg0 = spr_val;
334 break;
335 case SPRN_SPRG1:
336 vcpu->arch.shared->sprg1 = spr_val;
337 break;
338 case SPRN_SPRG2:
339 vcpu->arch.shared->sprg2 = spr_val;
340 break;
341 case SPRN_SPRG3:
342 vcpu->arch.shared->sprg3 = spr_val;
343 break;
344
345 default:
346 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
347 spr_val);
348 if (emulated == EMULATE_FAIL)
349 printk(KERN_INFO "mtspr: unknown spr "
350 "0x%x\n", sprn);
351 break;
352 }
353 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
354 break; 375 break;
355 376
356 case OP_31_XOP_DCBI: 377 case OP_31_XOP_DCBI:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4d213b8b0fb5..70739a089560 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
30#include <asm/kvm_ppc.h> 30#include <asm/kvm_ppc.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/cputhreads.h> 32#include <asm/cputhreads.h>
33#include <asm/irqflags.h>
33#include "timing.h" 34#include "timing.h"
34#include "../mm/mmu_decl.h" 35#include "../mm/mmu_decl.h"
35 36
@@ -38,8 +39,7 @@
38 39
39int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 40int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
40{ 41{
41 return !(v->arch.shared->msr & MSR_WE) || 42 return !!(v->arch.pending_exceptions) ||
42 !!(v->arch.pending_exceptions) ||
43 v->requests; 43 v->requests;
44} 44}
45 45
@@ -48,6 +48,85 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
48 return 1; 48 return 1;
49} 49}
50 50
51#ifndef CONFIG_KVM_BOOK3S_64_HV
52/*
53 * Common checks before entering the guest world. Call with interrupts
54 * disabled.
55 *
56 * returns:
57 *
58 * == 1 if we're ready to go into guest state
59 * <= 0 if we need to go back to the host with return value
60 */
61int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
62{
63 int r = 1;
64
65 WARN_ON_ONCE(!irqs_disabled());
66 while (true) {
67 if (need_resched()) {
68 local_irq_enable();
69 cond_resched();
70 local_irq_disable();
71 continue;
72 }
73
74 if (signal_pending(current)) {
75 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
76 vcpu->run->exit_reason = KVM_EXIT_INTR;
77 r = -EINTR;
78 break;
79 }
80
81 vcpu->mode = IN_GUEST_MODE;
82
83 /*
84 * Reading vcpu->requests must happen after setting vcpu->mode,
85 * so we don't miss a request because the requester sees
86 * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
87 * before next entering the guest (and thus doesn't IPI).
88 */
89 smp_mb();
90
91 if (vcpu->requests) {
92 /* Make sure we process requests preemptable */
93 local_irq_enable();
94 trace_kvm_check_requests(vcpu);
95 r = kvmppc_core_check_requests(vcpu);
96 local_irq_disable();
97 if (r > 0)
98 continue;
99 break;
100 }
101
102 if (kvmppc_core_prepare_to_enter(vcpu)) {
103 /* interrupts got enabled in between, so we
104 are back at square 1 */
105 continue;
106 }
107
108#ifdef CONFIG_PPC64
109 /* lazy EE magic */
110 hard_irq_disable();
111 if (lazy_irq_pending()) {
112 /* Got an interrupt in between, try again */
113 local_irq_enable();
114 local_irq_disable();
115 kvm_guest_exit();
116 continue;
117 }
118
119 trace_hardirqs_on();
120#endif
121
122 kvm_guest_enter();
123 break;
124 }
125
126 return r;
127}
128#endif /* CONFIG_KVM_BOOK3S_64_HV */
129
51int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 130int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
52{ 131{
53 int nr = kvmppc_get_gpr(vcpu, 11); 132 int nr = kvmppc_get_gpr(vcpu, 11);
@@ -67,18 +146,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
67 } 146 }
68 147
69 switch (nr) { 148 switch (nr) {
70 case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE: 149 case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
71 { 150 {
72 vcpu->arch.magic_page_pa = param1; 151 vcpu->arch.magic_page_pa = param1;
73 vcpu->arch.magic_page_ea = param2; 152 vcpu->arch.magic_page_ea = param2;
74 153
75 r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; 154 r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
76 155
77 r = HC_EV_SUCCESS; 156 r = EV_SUCCESS;
78 break; 157 break;
79 } 158 }
80 case HC_VENDOR_KVM | KVM_HC_FEATURES: 159 case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
81 r = HC_EV_SUCCESS; 160 r = EV_SUCCESS;
82#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) 161#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
83 /* XXX Missing magic page on 44x */ 162 /* XXX Missing magic page on 44x */
84 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); 163 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
@@ -86,8 +165,13 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
86 165
87 /* Second return value is in r4 */ 166 /* Second return value is in r4 */
88 break; 167 break;
168 case EV_HCALL_TOKEN(EV_IDLE):
169 r = EV_SUCCESS;
170 kvm_vcpu_block(vcpu);
171 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
172 break;
89 default: 173 default:
90 r = HC_EV_UNIMPLEMENTED; 174 r = EV_UNIMPLEMENTED;
91 break; 175 break;
92 } 176 }
93 177
@@ -220,6 +304,7 @@ int kvm_dev_ioctl_check_extension(long ext)
220 switch (ext) { 304 switch (ext) {
221#ifdef CONFIG_BOOKE 305#ifdef CONFIG_BOOKE
222 case KVM_CAP_PPC_BOOKE_SREGS: 306 case KVM_CAP_PPC_BOOKE_SREGS:
307 case KVM_CAP_PPC_BOOKE_WATCHDOG:
223#else 308#else
224 case KVM_CAP_PPC_SEGSTATE: 309 case KVM_CAP_PPC_SEGSTATE:
225 case KVM_CAP_PPC_HIOR: 310 case KVM_CAP_PPC_HIOR:
@@ -229,6 +314,7 @@ int kvm_dev_ioctl_check_extension(long ext)
229 case KVM_CAP_PPC_IRQ_LEVEL: 314 case KVM_CAP_PPC_IRQ_LEVEL:
230 case KVM_CAP_ENABLE_CAP: 315 case KVM_CAP_ENABLE_CAP:
231 case KVM_CAP_ONE_REG: 316 case KVM_CAP_ONE_REG:
317 case KVM_CAP_IOEVENTFD:
232 r = 1; 318 r = 1;
233 break; 319 break;
234#ifndef CONFIG_KVM_BOOK3S_64_HV 320#ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -260,10 +346,22 @@ int kvm_dev_ioctl_check_extension(long ext)
260 if (cpu_has_feature(CPU_FTR_ARCH_201)) 346 if (cpu_has_feature(CPU_FTR_ARCH_201))
261 r = 2; 347 r = 2;
262 break; 348 break;
349#endif
263 case KVM_CAP_SYNC_MMU: 350 case KVM_CAP_SYNC_MMU:
351#ifdef CONFIG_KVM_BOOK3S_64_HV
264 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; 352 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
353#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
354 r = 1;
355#else
356 r = 0;
357 break;
358#endif
359#ifdef CONFIG_KVM_BOOK3S_64_HV
360 case KVM_CAP_PPC_HTAB_FD:
361 r = 1;
265 break; 362 break;
266#endif 363#endif
364 break;
267 case KVM_CAP_NR_VCPUS: 365 case KVM_CAP_NR_VCPUS:
268 /* 366 /*
269 * Recommending a number of CPUs is somewhat arbitrary; we 367 * Recommending a number of CPUs is somewhat arbitrary; we
@@ -302,19 +400,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
302void kvm_arch_free_memslot(struct kvm_memory_slot *free, 400void kvm_arch_free_memslot(struct kvm_memory_slot *free,
303 struct kvm_memory_slot *dont) 401 struct kvm_memory_slot *dont)
304{ 402{
305 if (!dont || free->arch.rmap != dont->arch.rmap) { 403 kvmppc_core_free_memslot(free, dont);
306 vfree(free->arch.rmap);
307 free->arch.rmap = NULL;
308 }
309} 404}
310 405
311int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 406int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
312{ 407{
313 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 408 return kvmppc_core_create_memslot(slot, npages);
314 if (!slot->arch.rmap)
315 return -ENOMEM;
316
317 return 0;
318} 409}
319 410
320int kvm_arch_prepare_memory_region(struct kvm *kvm, 411int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -323,7 +414,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
323 struct kvm_userspace_memory_region *mem, 414 struct kvm_userspace_memory_region *mem,
324 int user_alloc) 415 int user_alloc)
325{ 416{
326 return kvmppc_core_prepare_memory_region(kvm, mem); 417 return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
327} 418}
328 419
329void kvm_arch_commit_memory_region(struct kvm *kvm, 420void kvm_arch_commit_memory_region(struct kvm *kvm,
@@ -331,7 +422,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
331 struct kvm_memory_slot old, 422 struct kvm_memory_slot old,
332 int user_alloc) 423 int user_alloc)
333{ 424{
334 kvmppc_core_commit_memory_region(kvm, mem); 425 kvmppc_core_commit_memory_region(kvm, mem, old);
335} 426}
336 427
337void kvm_arch_flush_shadow_all(struct kvm *kvm) 428void kvm_arch_flush_shadow_all(struct kvm *kvm)
@@ -341,6 +432,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
341void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 432void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
342 struct kvm_memory_slot *slot) 433 struct kvm_memory_slot *slot)
343{ 434{
435 kvmppc_core_flush_memslot(kvm, slot);
344} 436}
345 437
346struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 438struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
@@ -354,6 +446,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
354 return vcpu; 446 return vcpu;
355} 447}
356 448
449int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
450{
451 return 0;
452}
453
357void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 454void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
358{ 455{
359 /* Make sure we're not using the vcpu anymore */ 456 /* Make sure we're not using the vcpu anymore */
@@ -390,6 +487,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
390 487
391int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 488int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
392{ 489{
490 int ret;
491
393 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 492 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
394 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); 493 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
395 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 494 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -398,13 +497,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
398#ifdef CONFIG_KVM_EXIT_TIMING 497#ifdef CONFIG_KVM_EXIT_TIMING
399 mutex_init(&vcpu->arch.exit_timing_lock); 498 mutex_init(&vcpu->arch.exit_timing_lock);
400#endif 499#endif
401 500 ret = kvmppc_subarch_vcpu_init(vcpu);
402 return 0; 501 return ret;
403} 502}
404 503
405void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 504void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
406{ 505{
407 kvmppc_mmu_destroy(vcpu); 506 kvmppc_mmu_destroy(vcpu);
507 kvmppc_subarch_vcpu_uninit(vcpu);
408} 508}
409 509
410void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 510void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -420,7 +520,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
420 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); 520 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
421#endif 521#endif
422 kvmppc_core_vcpu_load(vcpu, cpu); 522 kvmppc_core_vcpu_load(vcpu, cpu);
423 vcpu->cpu = smp_processor_id();
424} 523}
425 524
426void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 525void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -429,7 +528,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
429#ifdef CONFIG_BOOKE 528#ifdef CONFIG_BOOKE
430 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); 529 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
431#endif 530#endif
432 vcpu->cpu = -1;
433} 531}
434 532
435int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 533int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -527,6 +625,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
527 vcpu->mmio_is_write = 0; 625 vcpu->mmio_is_write = 0;
528 vcpu->arch.mmio_sign_extend = 0; 626 vcpu->arch.mmio_sign_extend = 0;
529 627
628 if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
629 bytes, &run->mmio.data)) {
630 kvmppc_complete_mmio_load(vcpu, run);
631 vcpu->mmio_needed = 0;
632 return EMULATE_DONE;
633 }
634
530 return EMULATE_DO_MMIO; 635 return EMULATE_DO_MMIO;
531} 636}
532 637
@@ -536,8 +641,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
536{ 641{
537 int r; 642 int r;
538 643
539 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
540 vcpu->arch.mmio_sign_extend = 1; 644 vcpu->arch.mmio_sign_extend = 1;
645 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
541 646
542 return r; 647 return r;
543} 648}
@@ -575,6 +680,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
575 } 680 }
576 } 681 }
577 682
683 if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
684 bytes, &run->mmio.data)) {
685 kvmppc_complete_mmio_load(vcpu, run);
686 vcpu->mmio_needed = 0;
687 return EMULATE_DONE;
688 }
689
578 return EMULATE_DO_MMIO; 690 return EMULATE_DO_MMIO;
579} 691}
580 692
@@ -649,6 +761,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
649 r = 0; 761 r = 0;
650 vcpu->arch.papr_enabled = true; 762 vcpu->arch.papr_enabled = true;
651 break; 763 break;
764#ifdef CONFIG_BOOKE
765 case KVM_CAP_PPC_BOOKE_WATCHDOG:
766 r = 0;
767 vcpu->arch.watchdog_enabled = true;
768 break;
769#endif
652#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) 770#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
653 case KVM_CAP_SW_TLB: { 771 case KVM_CAP_SW_TLB: {
654 struct kvm_config_tlb cfg; 772 struct kvm_config_tlb cfg;
@@ -751,9 +869,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
751 869
752static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) 870static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
753{ 871{
872 u32 inst_nop = 0x60000000;
873#ifdef CONFIG_KVM_BOOKE_HV
874 u32 inst_sc1 = 0x44000022;
875 pvinfo->hcall[0] = inst_sc1;
876 pvinfo->hcall[1] = inst_nop;
877 pvinfo->hcall[2] = inst_nop;
878 pvinfo->hcall[3] = inst_nop;
879#else
754 u32 inst_lis = 0x3c000000; 880 u32 inst_lis = 0x3c000000;
755 u32 inst_ori = 0x60000000; 881 u32 inst_ori = 0x60000000;
756 u32 inst_nop = 0x60000000;
757 u32 inst_sc = 0x44000002; 882 u32 inst_sc = 0x44000002;
758 u32 inst_imm_mask = 0xffff; 883 u32 inst_imm_mask = 0xffff;
759 884
@@ -770,6 +895,9 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
770 pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); 895 pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
771 pvinfo->hcall[2] = inst_sc; 896 pvinfo->hcall[2] = inst_sc;
772 pvinfo->hcall[3] = inst_nop; 897 pvinfo->hcall[3] = inst_nop;
898#endif
899
900 pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
773 901
774 return 0; 902 return 0;
775} 903}
@@ -832,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
832 r = 0; 960 r = 0;
833 break; 961 break;
834 } 962 }
963
964 case KVM_PPC_GET_HTAB_FD: {
965 struct kvm *kvm = filp->private_data;
966 struct kvm_get_htab_fd ghf;
967
968 r = -EFAULT;
969 if (copy_from_user(&ghf, argp, sizeof(ghf)))
970 break;
971 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
972 break;
973 }
835#endif /* CONFIG_KVM_BOOK3S_64_HV */ 974#endif /* CONFIG_KVM_BOOK3S_64_HV */
836 975
837#ifdef CONFIG_PPC_BOOK3S_64 976#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index ddb6a2149d44..e326489a5420 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,6 +31,126 @@ TRACE_EVENT(kvm_ppc_instr,
31 __entry->inst, __entry->pc, __entry->emulate) 31 __entry->inst, __entry->pc, __entry->emulate)
32); 32);
33 33
34#ifdef CONFIG_PPC_BOOK3S
35#define kvm_trace_symbol_exit \
36 {0x100, "SYSTEM_RESET"}, \
37 {0x200, "MACHINE_CHECK"}, \
38 {0x300, "DATA_STORAGE"}, \
39 {0x380, "DATA_SEGMENT"}, \
40 {0x400, "INST_STORAGE"}, \
41 {0x480, "INST_SEGMENT"}, \
42 {0x500, "EXTERNAL"}, \
43 {0x501, "EXTERNAL_LEVEL"}, \
44 {0x502, "EXTERNAL_HV"}, \
45 {0x600, "ALIGNMENT"}, \
46 {0x700, "PROGRAM"}, \
47 {0x800, "FP_UNAVAIL"}, \
48 {0x900, "DECREMENTER"}, \
49 {0x980, "HV_DECREMENTER"}, \
50 {0xc00, "SYSCALL"}, \
51 {0xd00, "TRACE"}, \
52 {0xe00, "H_DATA_STORAGE"}, \
53 {0xe20, "H_INST_STORAGE"}, \
54 {0xe40, "H_EMUL_ASSIST"}, \
55 {0xf00, "PERFMON"}, \
56 {0xf20, "ALTIVEC"}, \
57 {0xf40, "VSX"}
58#else
59#define kvm_trace_symbol_exit \
60 {0, "CRITICAL"}, \
61 {1, "MACHINE_CHECK"}, \
62 {2, "DATA_STORAGE"}, \
63 {3, "INST_STORAGE"}, \
64 {4, "EXTERNAL"}, \
65 {5, "ALIGNMENT"}, \
66 {6, "PROGRAM"}, \
67 {7, "FP_UNAVAIL"}, \
68 {8, "SYSCALL"}, \
69 {9, "AP_UNAVAIL"}, \
70 {10, "DECREMENTER"}, \
71 {11, "FIT"}, \
72 {12, "WATCHDOG"}, \
73 {13, "DTLB_MISS"}, \
74 {14, "ITLB_MISS"}, \
75 {15, "DEBUG"}, \
76 {32, "SPE_UNAVAIL"}, \
77 {33, "SPE_FP_DATA"}, \
78 {34, "SPE_FP_ROUND"}, \
79 {35, "PERFORMANCE_MONITOR"}, \
80 {36, "DOORBELL"}, \
81 {37, "DOORBELL_CRITICAL"}, \
82 {38, "GUEST_DBELL"}, \
83 {39, "GUEST_DBELL_CRIT"}, \
84 {40, "HV_SYSCALL"}, \
85 {41, "HV_PRIV"}
86#endif
87
88TRACE_EVENT(kvm_exit,
89 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
90 TP_ARGS(exit_nr, vcpu),
91
92 TP_STRUCT__entry(
93 __field( unsigned int, exit_nr )
94 __field( unsigned long, pc )
95 __field( unsigned long, msr )
96 __field( unsigned long, dar )
97#ifdef CONFIG_KVM_BOOK3S_PR
98 __field( unsigned long, srr1 )
99#endif
100 __field( unsigned long, last_inst )
101 ),
102
103 TP_fast_assign(
104#ifdef CONFIG_KVM_BOOK3S_PR
105 struct kvmppc_book3s_shadow_vcpu *svcpu;
106#endif
107 __entry->exit_nr = exit_nr;
108 __entry->pc = kvmppc_get_pc(vcpu);
109 __entry->dar = kvmppc_get_fault_dar(vcpu);
110 __entry->msr = vcpu->arch.shared->msr;
111#ifdef CONFIG_KVM_BOOK3S_PR
112 svcpu = svcpu_get(vcpu);
113 __entry->srr1 = svcpu->shadow_srr1;
114 svcpu_put(svcpu);
115#endif
116 __entry->last_inst = vcpu->arch.last_inst;
117 ),
118
119 TP_printk("exit=%s"
120 " | pc=0x%lx"
121 " | msr=0x%lx"
122 " | dar=0x%lx"
123#ifdef CONFIG_KVM_BOOK3S_PR
124 " | srr1=0x%lx"
125#endif
126 " | last_inst=0x%lx"
127 ,
128 __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
129 __entry->pc,
130 __entry->msr,
131 __entry->dar,
132#ifdef CONFIG_KVM_BOOK3S_PR
133 __entry->srr1,
134#endif
135 __entry->last_inst
136 )
137);
138
139TRACE_EVENT(kvm_unmap_hva,
140 TP_PROTO(unsigned long hva),
141 TP_ARGS(hva),
142
143 TP_STRUCT__entry(
144 __field( unsigned long, hva )
145 ),
146
147 TP_fast_assign(
148 __entry->hva = hva;
149 ),
150
151 TP_printk("unmap hva 0x%lx\n", __entry->hva)
152);
153
34TRACE_EVENT(kvm_stlb_inval, 154TRACE_EVENT(kvm_stlb_inval,
35 TP_PROTO(unsigned int stlb_index), 155 TP_PROTO(unsigned int stlb_index),
36 TP_ARGS(stlb_index), 156 TP_ARGS(stlb_index),
@@ -98,41 +218,31 @@ TRACE_EVENT(kvm_gtlb_write,
98 __entry->word1, __entry->word2) 218 __entry->word1, __entry->word2)
99); 219);
100 220
101 221TRACE_EVENT(kvm_check_requests,
102/************************************************************************* 222 TP_PROTO(struct kvm_vcpu *vcpu),
103 * Book3S trace points * 223 TP_ARGS(vcpu),
104 *************************************************************************/
105
106#ifdef CONFIG_KVM_BOOK3S_PR
107
108TRACE_EVENT(kvm_book3s_exit,
109 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
110 TP_ARGS(exit_nr, vcpu),
111 224
112 TP_STRUCT__entry( 225 TP_STRUCT__entry(
113 __field( unsigned int, exit_nr ) 226 __field( __u32, cpu_nr )
114 __field( unsigned long, pc ) 227 __field( __u32, requests )
115 __field( unsigned long, msr )
116 __field( unsigned long, dar )
117 __field( unsigned long, srr1 )
118 ), 228 ),
119 229
120 TP_fast_assign( 230 TP_fast_assign(
121 struct kvmppc_book3s_shadow_vcpu *svcpu; 231 __entry->cpu_nr = vcpu->vcpu_id;
122 __entry->exit_nr = exit_nr; 232 __entry->requests = vcpu->requests;
123 __entry->pc = kvmppc_get_pc(vcpu);
124 __entry->dar = kvmppc_get_fault_dar(vcpu);
125 __entry->msr = vcpu->arch.shared->msr;
126 svcpu = svcpu_get(vcpu);
127 __entry->srr1 = svcpu->shadow_srr1;
128 svcpu_put(svcpu);
129 ), 233 ),
130 234
131 TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", 235 TP_printk("vcpu=%x requests=%x",
132 __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar, 236 __entry->cpu_nr, __entry->requests)
133 __entry->srr1)
134); 237);
135 238
239
240/*************************************************************************
241 * Book3S trace points *
242 *************************************************************************/
243
244#ifdef CONFIG_KVM_BOOK3S_PR
245
136TRACE_EVENT(kvm_book3s_reenter, 246TRACE_EVENT(kvm_book3s_reenter,
137 TP_PROTO(int r, struct kvm_vcpu *vcpu), 247 TP_PROTO(int r, struct kvm_vcpu *vcpu),
138 TP_ARGS(r, vcpu), 248 TP_ARGS(r, vcpu),
@@ -395,6 +505,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write,
395 __entry->mas2, __entry->mas7_3) 505 __entry->mas2, __entry->mas7_3)
396); 506);
397 507
508TRACE_EVENT(kvm_booke206_ref_release,
509 TP_PROTO(__u64 pfn, __u32 flags),
510 TP_ARGS(pfn, flags),
511
512 TP_STRUCT__entry(
513 __field( __u64, pfn )
514 __field( __u32, flags )
515 ),
516
517 TP_fast_assign(
518 __entry->pfn = pfn;
519 __entry->flags = flags;
520 ),
521
522 TP_printk("pfn=%llx flags=%x",
523 __entry->pfn, __entry->flags)
524);
525
526TRACE_EVENT(kvm_booke_queue_irqprio,
527 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
528 TP_ARGS(vcpu, priority),
529
530 TP_STRUCT__entry(
531 __field( __u32, cpu_nr )
532 __field( __u32, priority )
533 __field( unsigned long, pending )
534 ),
535
536 TP_fast_assign(
537 __entry->cpu_nr = vcpu->vcpu_id;
538 __entry->priority = priority;
539 __entry->pending = vcpu->arch.pending_exceptions;
540 ),
541
542 TP_printk("vcpu=%x prio=%x pending=%lx",
543 __entry->cpu_nr, __entry->priority, __entry->pending)
544);
545
398#endif 546#endif
399 547
400#endif /* _TRACE_KVM_H */ 548#endif /* _TRACE_KVM_H */