aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/powerpc/kvm
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x.c16
-rw-r--r--arch/powerpc/kvm/44x_emulate.c163
-rw-r--r--arch/powerpc/kvm/44x_tlb.c1
-rw-r--r--arch/powerpc/kvm/Kconfig33
-rw-r--r--arch/powerpc/kvm/Makefile26
-rw-r--r--arch/powerpc/kvm/book3s.c180
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c37
-rw-r--r--arch/powerpc/kvm/book3s_32_sr.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c101
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c1468
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S4
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c150
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c156
-rw-r--r--arch/powerpc/kvm/book3s_exports.c9
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1631
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c218
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S17
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c148
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c967
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S835
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S216
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c5
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c10
-rw-r--r--arch/powerpc/kvm/book3s_pr.c524
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c252
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S54
-rw-r--r--arch/powerpc/kvm/book3s_segment.S138
-rw-r--r--arch/powerpc/kvm/booke.c883
-rw-r--r--arch/powerpc/kvm/booke.h67
-rw-r--r--arch/powerpc/kvm/booke_emulate.c181
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S355
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S712
-rw-r--r--arch/powerpc/kvm/e500.c381
-rw-r--r--arch/powerpc/kvm/e500.h305
-rw-r--r--arch/powerpc/kvm/e500_emulate.c231
-rw-r--r--arch/powerpc/kvm/e500_tlb.c1403
-rw-r--r--arch/powerpc/kvm/e500mc.c346
-rw-r--r--arch/powerpc/kvm/emulate.c419
-rw-r--r--arch/powerpc/kvm/powerpc.c442
-rw-r--r--arch/powerpc/kvm/timing.h6
-rw-r--r--arch/powerpc/kvm/trace.h268
43 files changed, 2913 insertions, 10458 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 3d7fd21c65f..da3a1225c0a 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -20,7 +20,6 @@
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/err.h> 22#include <linux/err.h>
23#include <linux/export.h>
24 23
25#include <asm/reg.h> 24#include <asm/reg.h>
26#include <asm/cputable.h> 25#include <asm/cputable.h>
@@ -29,18 +28,15 @@
29#include <asm/kvm_ppc.h> 28#include <asm/kvm_ppc.h>
30 29
31#include "44x_tlb.h" 30#include "44x_tlb.h"
32#include "booke.h"
33 31
34void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 32void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
35{ 33{
36 kvmppc_booke_vcpu_load(vcpu, cpu);
37 kvmppc_44x_tlb_load(vcpu); 34 kvmppc_44x_tlb_load(vcpu);
38} 35}
39 36
40void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 37void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
41{ 38{
42 kvmppc_44x_tlb_put(vcpu); 39 kvmppc_44x_tlb_put(vcpu);
43 kvmppc_booke_vcpu_put(vcpu);
44} 40}
45 41
46int kvmppc_core_check_processor_compat(void) 42int kvmppc_core_check_processor_compat(void)
@@ -82,9 +78,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
82 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) 78 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
83 vcpu_44x->shadow_refs[i].gtlb_index = -1; 79 vcpu_44x->shadow_refs[i].gtlb_index = -1;
84 80
85 vcpu->arch.cpu_type = KVM_CPU_440;
86 vcpu->arch.pvr = mfspr(SPRN_PVR);
87
88 return 0; 81 return 0;
89} 82}
90 83
@@ -164,15 +157,6 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
164 kmem_cache_free(kvm_vcpu_cache, vcpu_44x); 157 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
165} 158}
166 159
167int kvmppc_core_init_vm(struct kvm *kvm)
168{
169 return 0;
170}
171
172void kvmppc_core_destroy_vm(struct kvm *kvm)
173{
174}
175
176static int __init kvmppc_44x_init(void) 160static int __init kvmppc_44x_init(void)
177{ 161{
178 int r; 162 int r;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 35ec0a8547d..549bb2c9a47 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,109 +27,98 @@
27#include "booke.h" 27#include "booke.h"
28#include "44x_tlb.h" 28#include "44x_tlb.h"
29 29
30#define XOP_MFDCRX 259
31#define XOP_MFDCR 323 30#define XOP_MFDCR 323
32#define XOP_MTDCRX 387
33#define XOP_MTDCR 451 31#define XOP_MTDCR 451
34#define XOP_TLBSX 914 32#define XOP_TLBSX 914
35#define XOP_ICCCI 966 33#define XOP_ICCCI 966
36#define XOP_TLBWE 978 34#define XOP_TLBWE 978
37 35
38static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
39{
40 /* emulate some access in kernel */
41 switch (dcrn) {
42 case DCRN_CPR0_CONFIG_ADDR:
43 vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
44 return EMULATE_DONE;
45 default:
46 vcpu->run->dcr.dcrn = dcrn;
47 vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
48 vcpu->run->dcr.is_write = 1;
49 vcpu->arch.dcr_is_write = 1;
50 vcpu->arch.dcr_needed = 1;
51 kvmppc_account_exit(vcpu, DCR_EXITS);
52 return EMULATE_DO_DCR;
53 }
54}
55
56static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
57{
58 /* The guest may access CPR0 registers to determine the timebase
59 * frequency, and it must know the real host frequency because it
60 * can directly access the timebase registers.
61 *
62 * It would be possible to emulate those accesses in userspace,
63 * but userspace can really only figure out the end frequency.
64 * We could decompose that into the factors that compute it, but
65 * that's tricky math, and it's easier to just report the real
66 * CPR0 values.
67 */
68 switch (dcrn) {
69 case DCRN_CPR0_CONFIG_ADDR:
70 kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
71 break;
72 case DCRN_CPR0_CONFIG_DATA:
73 local_irq_disable();
74 mtdcr(DCRN_CPR0_CONFIG_ADDR,
75 vcpu->arch.cpr0_cfgaddr);
76 kvmppc_set_gpr(vcpu, rt,
77 mfdcr(DCRN_CPR0_CONFIG_DATA));
78 local_irq_enable();
79 break;
80 default:
81 vcpu->run->dcr.dcrn = dcrn;
82 vcpu->run->dcr.data = 0;
83 vcpu->run->dcr.is_write = 0;
84 vcpu->arch.dcr_is_write = 0;
85 vcpu->arch.io_gpr = rt;
86 vcpu->arch.dcr_needed = 1;
87 kvmppc_account_exit(vcpu, DCR_EXITS);
88 return EMULATE_DO_DCR;
89 }
90
91 return EMULATE_DONE;
92}
93
94int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 36int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
95 unsigned int inst, int *advance) 37 unsigned int inst, int *advance)
96{ 38{
97 int emulated = EMULATE_DONE; 39 int emulated = EMULATE_DONE;
98 int dcrn = get_dcrn(inst); 40 int dcrn;
99 int ra = get_ra(inst); 41 int ra;
100 int rb = get_rb(inst); 42 int rb;
101 int rc = get_rc(inst); 43 int rc;
102 int rs = get_rs(inst); 44 int rs;
103 int rt = get_rt(inst); 45 int rt;
104 int ws = get_ws(inst); 46 int ws;
105 47
106 switch (get_op(inst)) { 48 switch (get_op(inst)) {
107 case 31: 49 case 31:
108 switch (get_xop(inst)) { 50 switch (get_xop(inst)) {
109 51
110 case XOP_MFDCR: 52 case XOP_MFDCR:
111 emulated = emulate_mfdcr(vcpu, rt, dcrn); 53 dcrn = get_dcrn(inst);
112 break; 54 rt = get_rt(inst);
55
56 /* The guest may access CPR0 registers to determine the timebase
57 * frequency, and it must know the real host frequency because it
58 * can directly access the timebase registers.
59 *
60 * It would be possible to emulate those accesses in userspace,
61 * but userspace can really only figure out the end frequency.
62 * We could decompose that into the factors that compute it, but
63 * that's tricky math, and it's easier to just report the real
64 * CPR0 values.
65 */
66 switch (dcrn) {
67 case DCRN_CPR0_CONFIG_ADDR:
68 kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
69 break;
70 case DCRN_CPR0_CONFIG_DATA:
71 local_irq_disable();
72 mtdcr(DCRN_CPR0_CONFIG_ADDR,
73 vcpu->arch.cpr0_cfgaddr);
74 kvmppc_set_gpr(vcpu, rt,
75 mfdcr(DCRN_CPR0_CONFIG_DATA));
76 local_irq_enable();
77 break;
78 default:
79 run->dcr.dcrn = dcrn;
80 run->dcr.data = 0;
81 run->dcr.is_write = 0;
82 vcpu->arch.io_gpr = rt;
83 vcpu->arch.dcr_needed = 1;
84 kvmppc_account_exit(vcpu, DCR_EXITS);
85 emulated = EMULATE_DO_DCR;
86 }
113 87
114 case XOP_MFDCRX:
115 emulated = emulate_mfdcr(vcpu, rt,
116 kvmppc_get_gpr(vcpu, ra));
117 break; 88 break;
118 89
119 case XOP_MTDCR: 90 case XOP_MTDCR:
120 emulated = emulate_mtdcr(vcpu, rs, dcrn); 91 dcrn = get_dcrn(inst);
121 break; 92 rs = get_rs(inst);
93
94 /* emulate some access in kernel */
95 switch (dcrn) {
96 case DCRN_CPR0_CONFIG_ADDR:
97 vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
98 break;
99 default:
100 run->dcr.dcrn = dcrn;
101 run->dcr.data = kvmppc_get_gpr(vcpu, rs);
102 run->dcr.is_write = 1;
103 vcpu->arch.dcr_needed = 1;
104 kvmppc_account_exit(vcpu, DCR_EXITS);
105 emulated = EMULATE_DO_DCR;
106 }
122 107
123 case XOP_MTDCRX:
124 emulated = emulate_mtdcr(vcpu, rs,
125 kvmppc_get_gpr(vcpu, ra));
126 break; 108 break;
127 109
128 case XOP_TLBWE: 110 case XOP_TLBWE:
111 ra = get_ra(inst);
112 rs = get_rs(inst);
113 ws = get_ws(inst);
129 emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); 114 emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
130 break; 115 break;
131 116
132 case XOP_TLBSX: 117 case XOP_TLBSX:
118 rt = get_rt(inst);
119 ra = get_ra(inst);
120 rb = get_rb(inst);
121 rc = get_rc(inst);
133 emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); 122 emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
134 break; 123 break;
135 124
@@ -152,41 +141,41 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
152 return emulated; 141 return emulated;
153} 142}
154 143
155int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 144int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
156{ 145{
157 int emulated = EMULATE_DONE; 146 int emulated = EMULATE_DONE;
158 147
159 switch (sprn) { 148 switch (sprn) {
160 case SPRN_PID: 149 case SPRN_PID:
161 kvmppc_set_pid(vcpu, spr_val); break; 150 kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
162 case SPRN_MMUCR: 151 case SPRN_MMUCR:
163 vcpu->arch.mmucr = spr_val; break; 152 vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
164 case SPRN_CCR0: 153 case SPRN_CCR0:
165 vcpu->arch.ccr0 = spr_val; break; 154 vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
166 case SPRN_CCR1: 155 case SPRN_CCR1:
167 vcpu->arch.ccr1 = spr_val; break; 156 vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
168 default: 157 default:
169 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); 158 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
170 } 159 }
171 160
172 return emulated; 161 return emulated;
173} 162}
174 163
175int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 164int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
176{ 165{
177 int emulated = EMULATE_DONE; 166 int emulated = EMULATE_DONE;
178 167
179 switch (sprn) { 168 switch (sprn) {
180 case SPRN_PID: 169 case SPRN_PID:
181 *spr_val = vcpu->arch.pid; break; 170 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
182 case SPRN_MMUCR: 171 case SPRN_MMUCR:
183 *spr_val = vcpu->arch.mmucr; break; 172 kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
184 case SPRN_CCR0: 173 case SPRN_CCR0:
185 *spr_val = vcpu->arch.ccr0; break; 174 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
186 case SPRN_CCR1: 175 case SPRN_CCR1:
187 *spr_val = vcpu->arch.ccr1; break; 176 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
188 default: 177 default:
189 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); 178 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
190 } 179 }
191 180
192 return emulated; 181 return emulated;
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 5dd3ab46997..33aa715dab2 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -319,6 +319,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
319 if (is_error_page(new_page)) { 319 if (is_error_page(new_page)) {
320 printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", 320 printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
321 (unsigned long long)gfn); 321 (unsigned long long)gfn);
322 kvm_release_page_clean(new_page);
322 return; 323 return;
323 } 324 }
324 hpaddr = page_to_phys(new_page); 325 hpaddr = page_to_phys(new_page);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 4730c953f43..78133deb4b6 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
20 bool 20 bool
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select HAVE_KVM_EVENTFD
24 23
25config KVM_BOOK3S_HANDLER 24config KVM_BOOK3S_HANDLER
26 bool 25 bool
@@ -37,7 +36,6 @@ config KVM_BOOK3S_64_HANDLER
37config KVM_BOOK3S_PR 36config KVM_BOOK3S_PR
38 bool 37 bool
39 select KVM_MMIO 38 select KVM_MMIO
40 select MMU_NOTIFIER
41 39
42config KVM_BOOK3S_32 40config KVM_BOOK3S_32
43 tristate "KVM support for PowerPC book3s_32 processors" 41 tristate "KVM support for PowerPC book3s_32 processors"
@@ -71,7 +69,6 @@ config KVM_BOOK3S_64
71config KVM_BOOK3S_64_HV 69config KVM_BOOK3S_64_HV
72 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" 70 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
73 depends on KVM_BOOK3S_64 71 depends on KVM_BOOK3S_64
74 select MMU_NOTIFIER
75 ---help--- 72 ---help---
76 Support running unmodified book3s_64 guest kernels in 73 Support running unmodified book3s_64 guest kernels in
77 virtual machines on POWER7 and PPC970 processors that have 74 virtual machines on POWER7 and PPC970 processors that have
@@ -92,9 +89,6 @@ config KVM_BOOK3S_64_PR
92 depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV 89 depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
93 select KVM_BOOK3S_PR 90 select KVM_BOOK3S_PR
94 91
95config KVM_BOOKE_HV
96 bool
97
98config KVM_440 92config KVM_440
99 bool "KVM support for PowerPC 440 processors" 93 bool "KVM support for PowerPC 440 processors"
100 depends on EXPERIMENTAL && 44x 94 depends on EXPERIMENTAL && 44x
@@ -111,7 +105,7 @@ config KVM_440
111 105
112config KVM_EXIT_TIMING 106config KVM_EXIT_TIMING
113 bool "Detailed exit timing" 107 bool "Detailed exit timing"
114 depends on KVM_440 || KVM_E500V2 || KVM_E500MC 108 depends on KVM_440 || KVM_E500
115 ---help--- 109 ---help---
116 Calculate elapsed time for every exit/enter cycle. A per-vcpu 110 Calculate elapsed time for every exit/enter cycle. A per-vcpu
117 report is available in debugfs kvm/vm#_vcpu#_timing. 111 report is available in debugfs kvm/vm#_vcpu#_timing.
@@ -120,31 +114,14 @@ config KVM_EXIT_TIMING
120 114
121 If unsure, say N. 115 If unsure, say N.
122 116
123config KVM_E500V2 117config KVM_E500
124 bool "KVM support for PowerPC E500v2 processors" 118 bool "KVM support for PowerPC E500 processors"
125 depends on EXPERIMENTAL && E500 && !PPC_E500MC 119 depends on EXPERIMENTAL && E500
126 select KVM 120 select KVM
127 select KVM_MMIO 121 select KVM_MMIO
128 select MMU_NOTIFIER
129 ---help--- 122 ---help---
130 Support running unmodified E500 guest kernels in virtual machines on 123 Support running unmodified E500 guest kernels in virtual machines on
131 E500v2 host processors. 124 E500 host processors.
132
133 This module provides access to the hardware capabilities through
134 a character device node named /dev/kvm.
135
136 If unsure, say N.
137
138config KVM_E500MC
139 bool "KVM support for PowerPC E500MC/E5500 processors"
140 depends on EXPERIMENTAL && PPC_E500MC
141 select KVM
142 select KVM_MMIO
143 select KVM_BOOKE_HV
144 select MMU_NOTIFIER
145 ---help---
146 Support running unmodified E500MC/E5500 (32-bit) guest kernels in
147 virtual machines on E500MC/E5500 host processors.
148 125
149 This module provides access to the hardware capabilities through 126 This module provides access to the hardware capabilities through
150 a character device node named /dev/kvm. 127 a character device node named /dev/kvm.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 1e473d46322..08428e2c188 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,8 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm 7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
8 8
9common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ 9common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
10 eventfd.o)
11 10
12CFLAGS_44x_tlb.o := -I. 11CFLAGS_44x_tlb.o := -I.
13CFLAGS_e500_tlb.o := -I. 12CFLAGS_e500_tlb.o := -I.
@@ -37,52 +36,34 @@ kvm-e500-objs := \
37 e500.o \ 36 e500.o \
38 e500_tlb.o \ 37 e500_tlb.o \
39 e500_emulate.o 38 e500_emulate.o
40kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) 39kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
41
42kvm-e500mc-objs := \
43 $(common-objs-y) \
44 booke.o \
45 booke_emulate.o \
46 bookehv_interrupts.o \
47 e500mc.o \
48 e500_tlb.o \
49 e500_emulate.o
50kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
51 40
52kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 41kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
53 ../../../virt/kvm/coalesced_mmio.o \ 42 ../../../virt/kvm/coalesced_mmio.o \
54 fpu.o \ 43 fpu.o \
55 book3s_paired_singles.o \ 44 book3s_paired_singles.o \
56 book3s_pr.o \ 45 book3s_pr.o \
57 book3s_pr_papr.o \
58 book3s_64_vio_hv.o \
59 book3s_emulate.o \ 46 book3s_emulate.o \
60 book3s_interrupts.o \ 47 book3s_interrupts.o \
61 book3s_mmu_hpte.o \ 48 book3s_mmu_hpte.o \
62 book3s_64_mmu_host.o \ 49 book3s_64_mmu_host.o \
63 book3s_64_mmu.o \ 50 book3s_64_mmu.o \
64 book3s_32_mmu.o 51 book3s_32_mmu.o
65kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
66 book3s_rmhandlers.o
67 52
68kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 53kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
69 book3s_hv.o \ 54 book3s_hv.o \
70 book3s_hv_interrupts.o \ 55 book3s_hv_interrupts.o \
71 book3s_64_mmu_hv.o 56 book3s_64_mmu_hv.o
72kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 57kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
73 book3s_hv_rmhandlers.o \
74 book3s_hv_rm_mmu.o \ 58 book3s_hv_rm_mmu.o \
75 book3s_64_vio_hv.o \ 59 book3s_64_vio_hv.o \
76 book3s_hv_ras.o \
77 book3s_hv_builtin.o 60 book3s_hv_builtin.o
78 61
79kvm-book3s_64-module-objs := \ 62kvm-book3s_64-module-objs := \
80 ../../../virt/kvm/kvm_main.o \ 63 ../../../virt/kvm/kvm_main.o \
81 ../../../virt/kvm/eventfd.o \
82 powerpc.o \ 64 powerpc.o \
83 emulate.o \ 65 emulate.o \
84 book3s.o \ 66 book3s.o \
85 book3s_64_vio.o \
86 $(kvm-book3s_64-objs-y) 67 $(kvm-book3s_64-objs-y)
87 68
88kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) 69kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -103,8 +84,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
103kvm-objs := $(kvm-objs-m) $(kvm-objs-y) 84kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
104 85
105obj-$(CONFIG_KVM_440) += kvm.o 86obj-$(CONFIG_KVM_440) += kvm.o
106obj-$(CONFIG_KVM_E500V2) += kvm.o 87obj-$(CONFIG_KVM_E500) += kvm.o
107obj-$(CONFIG_KVM_E500MC) += kvm.o
108obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 88obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
109obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o 89obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
110 90
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a4b64528524..f68a34d1603 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -16,7 +16,6 @@
16 16
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include <linux/err.h> 18#include <linux/err.h>
19#include <linux/export.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
21 20
22#include <asm/reg.h> 21#include <asm/reg.h>
@@ -258,7 +257,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
258 return true; 257 return true;
259} 258}
260 259
261int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) 260void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
262{ 261{
263 unsigned long *pending = &vcpu->arch.pending_exceptions; 262 unsigned long *pending = &vcpu->arch.pending_exceptions;
264 unsigned long old_pending = vcpu->arch.pending_exceptions; 263 unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -283,17 +282,12 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
283 282
284 /* Tell the guest about our interrupt status */ 283 /* Tell the guest about our interrupt status */
285 kvmppc_update_int_pending(vcpu, *pending, old_pending); 284 kvmppc_update_int_pending(vcpu, *pending, old_pending);
286
287 return 0;
288} 285}
289 286
290pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) 287pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
291{ 288{
292 ulong mp_pa = vcpu->arch.magic_page_pa; 289 ulong mp_pa = vcpu->arch.magic_page_pa;
293 290
294 if (!(vcpu->arch.shared->msr & MSR_SF))
295 mp_pa = (uint32_t)mp_pa;
296
297 /* Magic page override */ 291 /* Magic page override */
298 if (unlikely(mp_pa) && 292 if (unlikely(mp_pa) &&
299 unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == 293 unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
@@ -411,15 +405,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
411 return 0; 405 return 0;
412} 406}
413 407
414int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
415{
416 return 0;
417}
418
419void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
420{
421}
422
423int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 408int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
424{ 409{
425 int i; 410 int i;
@@ -437,10 +422,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
437 regs->sprg1 = vcpu->arch.shared->sprg1; 422 regs->sprg1 = vcpu->arch.shared->sprg1;
438 regs->sprg2 = vcpu->arch.shared->sprg2; 423 regs->sprg2 = vcpu->arch.shared->sprg2;
439 regs->sprg3 = vcpu->arch.shared->sprg3; 424 regs->sprg3 = vcpu->arch.shared->sprg3;
440 regs->sprg4 = vcpu->arch.shared->sprg4; 425 regs->sprg4 = vcpu->arch.sprg4;
441 regs->sprg5 = vcpu->arch.shared->sprg5; 426 regs->sprg5 = vcpu->arch.sprg5;
442 regs->sprg6 = vcpu->arch.shared->sprg6; 427 regs->sprg6 = vcpu->arch.sprg6;
443 regs->sprg7 = vcpu->arch.shared->sprg7; 428 regs->sprg7 = vcpu->arch.sprg7;
444 429
445 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 430 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
446 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 431 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -464,10 +449,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
464 vcpu->arch.shared->sprg1 = regs->sprg1; 449 vcpu->arch.shared->sprg1 = regs->sprg1;
465 vcpu->arch.shared->sprg2 = regs->sprg2; 450 vcpu->arch.shared->sprg2 = regs->sprg2;
466 vcpu->arch.shared->sprg3 = regs->sprg3; 451 vcpu->arch.shared->sprg3 = regs->sprg3;
467 vcpu->arch.shared->sprg4 = regs->sprg4; 452 vcpu->arch.sprg4 = regs->sprg4;
468 vcpu->arch.shared->sprg5 = regs->sprg5; 453 vcpu->arch.sprg5 = regs->sprg5;
469 vcpu->arch.shared->sprg6 = regs->sprg6; 454 vcpu->arch.sprg6 = regs->sprg6;
470 vcpu->arch.shared->sprg7 = regs->sprg7; 455 vcpu->arch.sprg7 = regs->sprg7;
471 456
472 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 457 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
473 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 458 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -485,132 +470,47 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
485 return -ENOTSUPP; 470 return -ENOTSUPP;
486} 471}
487 472
488int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 473int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
474 struct kvm_translation *tr)
489{ 475{
490 int r; 476 return 0;
491 union kvmppc_one_reg val;
492 int size;
493 long int i;
494
495 size = one_reg_size(reg->id);
496 if (size > sizeof(val))
497 return -EINVAL;
498
499 r = kvmppc_get_one_reg(vcpu, reg->id, &val);
500
501 if (r == -EINVAL) {
502 r = 0;
503 switch (reg->id) {
504 case KVM_REG_PPC_DAR:
505 val = get_reg_val(reg->id, vcpu->arch.shared->dar);
506 break;
507 case KVM_REG_PPC_DSISR:
508 val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
509 break;
510 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
511 i = reg->id - KVM_REG_PPC_FPR0;
512 val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
513 break;
514 case KVM_REG_PPC_FPSCR:
515 val = get_reg_val(reg->id, vcpu->arch.fpscr);
516 break;
517#ifdef CONFIG_ALTIVEC
518 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
519 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
520 r = -ENXIO;
521 break;
522 }
523 val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
524 break;
525 case KVM_REG_PPC_VSCR:
526 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
527 r = -ENXIO;
528 break;
529 }
530 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
531 break;
532#endif /* CONFIG_ALTIVEC */
533 default:
534 r = -EINVAL;
535 break;
536 }
537 }
538 if (r)
539 return r;
540
541 if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
542 r = -EFAULT;
543
544 return r;
545} 477}
546 478
547int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 479/*
480 * Get (and clear) the dirty memory log for a memory slot.
481 */
482int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
483 struct kvm_dirty_log *log)
548{ 484{
485 struct kvm_memory_slot *memslot;
486 struct kvm_vcpu *vcpu;
487 ulong ga, ga_end;
488 int is_dirty = 0;
549 int r; 489 int r;
550 union kvmppc_one_reg val; 490 unsigned long n;
551 int size;
552 long int i;
553 491
554 size = one_reg_size(reg->id); 492 mutex_lock(&kvm->slots_lock);
555 if (size > sizeof(val))
556 return -EINVAL;
557 493
558 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) 494 r = kvm_get_dirty_log(kvm, log, &is_dirty);
559 return -EFAULT; 495 if (r)
496 goto out;
560 497
561 r = kvmppc_set_one_reg(vcpu, reg->id, &val); 498 /* If nothing is dirty, don't bother messing with page tables. */
499 if (is_dirty) {
500 memslot = &kvm->memslots->memslots[log->slot];
562 501
563 if (r == -EINVAL) { 502 ga = memslot->base_gfn << PAGE_SHIFT;
564 r = 0; 503 ga_end = ga + (memslot->npages << PAGE_SHIFT);
565 switch (reg->id) {
566 case KVM_REG_PPC_DAR:
567 vcpu->arch.shared->dar = set_reg_val(reg->id, val);
568 break;
569 case KVM_REG_PPC_DSISR:
570 vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
571 break;
572 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
573 i = reg->id - KVM_REG_PPC_FPR0;
574 vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
575 break;
576 case KVM_REG_PPC_FPSCR:
577 vcpu->arch.fpscr = set_reg_val(reg->id, val);
578 break;
579#ifdef CONFIG_ALTIVEC
580 case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
581 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
582 r = -ENXIO;
583 break;
584 }
585 vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
586 break;
587 case KVM_REG_PPC_VSCR:
588 if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
589 r = -ENXIO;
590 break;
591 }
592 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
593 break;
594#endif /* CONFIG_ALTIVEC */
595 default:
596 r = -EINVAL;
597 break;
598 }
599 }
600 504
601 return r; 505 kvm_for_each_vcpu(n, vcpu, kvm)
602} 506 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
603 507
604int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 508 n = kvm_dirty_bitmap_bytes(memslot);
605 struct kvm_translation *tr) 509 memset(memslot->dirty_bitmap, 0, n);
606{ 510 }
607 return 0;
608}
609
610void kvmppc_decrementer_func(unsigned long data)
611{
612 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
613 511
614 kvmppc_core_queue_dec(vcpu); 512 r = 0;
615 kvm_vcpu_kick(vcpu); 513out:
514 mutex_unlock(&kvm->slots_lock);
515 return r;
616} 516}
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 00e619bf608..9fecbfbce77 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -141,7 +141,7 @@ extern char etext[];
141int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 141int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
142{ 142{
143 pfn_t hpaddr; 143 pfn_t hpaddr;
144 u64 vpn; 144 u64 va;
145 u64 vsid; 145 u64 vsid;
146 struct kvmppc_sid_map *map; 146 struct kvmppc_sid_map *map;
147 volatile u32 *pteg; 147 volatile u32 *pteg;
@@ -151,15 +151,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
151 bool primary = false; 151 bool primary = false;
152 bool evict = false; 152 bool evict = false;
153 struct hpte_cache *pte; 153 struct hpte_cache *pte;
154 int r = 0;
155 154
156 /* Get host physical address for gpa */ 155 /* Get host physical address for gpa */
157 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 156 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
158 if (is_error_noslot_pfn(hpaddr)) { 157 if (is_error_pfn(hpaddr)) {
159 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", 158 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
160 orig_pte->eaddr); 159 orig_pte->eaddr);
161 r = -EINVAL; 160 return -EINVAL;
162 goto out;
163 } 161 }
164 hpaddr <<= PAGE_SHIFT; 162 hpaddr <<= PAGE_SHIFT;
165 163
@@ -173,8 +171,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
173 BUG_ON(!map); 171 BUG_ON(!map);
174 172
175 vsid = map->host_vsid; 173 vsid = map->host_vsid;
176 vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) | 174 va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK);
177 ((eaddr & ~ESID_MASK) >> VPN_SHIFT); 175
178next_pteg: 176next_pteg:
179 if (rr == 16) { 177 if (rr == 16) {
180 primary = !primary; 178 primary = !primary;
@@ -211,9 +209,6 @@ next_pteg:
211 pteg1 |= PP_RWRX; 209 pteg1 |= PP_RWRX;
212 } 210 }
213 211
214 if (orig_pte->may_execute)
215 kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
216
217 local_irq_disable(); 212 local_irq_disable();
218 213
219 if (pteg[rr]) { 214 if (pteg[rr]) {
@@ -244,19 +239,17 @@ next_pteg:
244 dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", 239 dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
245 orig_pte->may_write ? 'w' : '-', 240 orig_pte->may_write ? 'w' : '-',
246 orig_pte->may_execute ? 'x' : '-', 241 orig_pte->may_execute ? 'x' : '-',
247 orig_pte->eaddr, (ulong)pteg, vpn, 242 orig_pte->eaddr, (ulong)pteg, va,
248 orig_pte->vpage, hpaddr); 243 orig_pte->vpage, hpaddr);
249 244
250 pte->slot = (ulong)&pteg[rr]; 245 pte->slot = (ulong)&pteg[rr];
251 pte->host_vpn = vpn; 246 pte->host_va = va;
252 pte->pte = *orig_pte; 247 pte->pte = *orig_pte;
253 pte->pfn = hpaddr >> PAGE_SHIFT; 248 pte->pfn = hpaddr >> PAGE_SHIFT;
254 249
255 kvmppc_mmu_hpte_cache_map(vcpu, pte); 250 kvmppc_mmu_hpte_cache_map(vcpu, pte);
256 251
257 kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); 252 return 0;
258out:
259 return r;
260} 253}
261 254
262static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) 255static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -304,14 +297,12 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
304 u64 gvsid; 297 u64 gvsid;
305 u32 sr; 298 u32 sr;
306 struct kvmppc_sid_map *map; 299 struct kvmppc_sid_map *map;
307 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 300 struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
308 int r = 0;
309 301
310 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { 302 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
311 /* Invalidate an entry */ 303 /* Invalidate an entry */
312 svcpu->sr[esid] = SR_INVALID; 304 svcpu->sr[esid] = SR_INVALID;
313 r = -ENOENT; 305 return -ENOENT;
314 goto out;
315 } 306 }
316 307
317 map = find_sid_vsid(vcpu, gvsid); 308 map = find_sid_vsid(vcpu, gvsid);
@@ -324,21 +315,17 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
324 315
325 dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); 316 dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
326 317
327out: 318 return 0;
328 svcpu_put(svcpu);
329 return r;
330} 319}
331 320
332void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 321void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
333{ 322{
334 int i; 323 int i;
335 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 324 struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
336 325
337 dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); 326 dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
338 for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) 327 for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
339 svcpu->sr[i] = SR_INVALID; 328 svcpu->sr[i] = SR_INVALID;
340
341 svcpu_put(svcpu);
342} 329}
343 330
344void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 331void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
index 7e06a6fc8d0..3608471ad2d 100644
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -31,7 +31,7 @@
31 * R1 = host R1 31 * R1 = host R1
32 * R2 = host R2 32 * R2 = host R2
33 * R3 = shadow vcpu 33 * R3 = shadow vcpu
34 * all other volatile GPRS = free except R4, R6 34 * all other volatile GPRS = free
35 * SVCPU[CR] = guest CR 35 * SVCPU[CR] = guest CR
36 * SVCPU[XER] = guest XER 36 * SVCPU[XER] = guest XER
37 * SVCPU[CTR] = guest CTR 37 * SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index b871721c005..c6d3e194b6b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -128,13 +128,7 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
128 dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n", 128 dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
129 page, vcpu_book3s->sdr1, pteg, slbe->vsid); 129 page, vcpu_book3s->sdr1, pteg, slbe->vsid);
130 130
131 /* When running a PAPR guest, SDR1 contains a HVA address instead 131 r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
132 of a GPA */
133 if (vcpu_book3s->vcpu.arch.papr_enabled)
134 r = pteg;
135 else
136 r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
137
138 if (kvm_is_error_hva(r)) 132 if (kvm_is_error_hva(r))
139 return r; 133 return r;
140 return r | (pteg & ~PAGE_MASK); 134 return r | (pteg & ~PAGE_MASK);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index ead58e31729..fa2f08434ba 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -33,7 +33,7 @@
33 33
34void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 34void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
35{ 35{
36 ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, 36 ppc_md.hpte_invalidate(pte->slot, pte->host_va,
37 MMU_PAGE_4K, MMU_SEGSIZE_256M, 37 MMU_PAGE_4K, MMU_SEGSIZE_256M,
38 false); 38 false);
39} 39}
@@ -80,23 +80,20 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
80 80
81int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 81int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
82{ 82{
83 unsigned long vpn;
84 pfn_t hpaddr; 83 pfn_t hpaddr;
85 ulong hash, hpteg; 84 ulong hash, hpteg, va;
86 u64 vsid; 85 u64 vsid;
87 int ret; 86 int ret;
88 int rflags = 0x192; 87 int rflags = 0x192;
89 int vflags = 0; 88 int vflags = 0;
90 int attempt = 0; 89 int attempt = 0;
91 struct kvmppc_sid_map *map; 90 struct kvmppc_sid_map *map;
92 int r = 0;
93 91
94 /* Get host physical address for gpa */ 92 /* Get host physical address for gpa */
95 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 93 hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
96 if (is_error_noslot_pfn(hpaddr)) { 94 if (is_error_pfn(hpaddr)) {
97 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); 95 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
98 r = -EINVAL; 96 return -EINVAL;
99 goto out;
100 } 97 }
101 hpaddr <<= PAGE_SHIFT; 98 hpaddr <<= PAGE_SHIFT;
102 hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); 99 hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
@@ -113,12 +110,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
113 printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", 110 printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
114 vsid, orig_pte->eaddr); 111 vsid, orig_pte->eaddr);
115 WARN_ON(true); 112 WARN_ON(true);
116 r = -EINVAL; 113 return -EINVAL;
117 goto out;
118 } 114 }
119 115
120 vsid = map->host_vsid; 116 vsid = map->host_vsid;
121 vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); 117 va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
122 118
123 if (!orig_pte->may_write) 119 if (!orig_pte->may_write)
124 rflags |= HPTE_R_PP; 120 rflags |= HPTE_R_PP;
@@ -127,23 +123,18 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
127 123
128 if (!orig_pte->may_execute) 124 if (!orig_pte->may_execute)
129 rflags |= HPTE_R_N; 125 rflags |= HPTE_R_N;
130 else
131 kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
132 126
133 hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M); 127 hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
134 128
135map_again: 129map_again:
136 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 130 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
137 131
138 /* In case we tried normal mapping already, let's nuke old entries */ 132 /* In case we tried normal mapping already, let's nuke old entries */
139 if (attempt > 1) 133 if (attempt > 1)
140 if (ppc_md.hpte_remove(hpteg) < 0) { 134 if (ppc_md.hpte_remove(hpteg) < 0)
141 r = -1; 135 return -1;
142 goto out;
143 }
144 136
145 ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, 137 ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
146 MMU_PAGE_4K, MMU_SEGSIZE_256M);
147 138
148 if (ret < 0) { 139 if (ret < 0) {
149 /* If we couldn't map a primary PTE, try a secondary */ 140 /* If we couldn't map a primary PTE, try a secondary */
@@ -154,8 +145,7 @@ map_again:
154 } else { 145 } else {
155 struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); 146 struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
156 147
157 trace_kvm_book3s_64_mmu_map(rflags, hpteg, 148 trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
158 vpn, hpaddr, orig_pte);
159 149
160 /* The ppc_md code may give us a secondary entry even though we 150 /* The ppc_md code may give us a secondary entry even though we
161 asked for a primary. Fix up. */ 151 asked for a primary. Fix up. */
@@ -165,16 +155,14 @@ map_again:
165 } 155 }
166 156
167 pte->slot = hpteg + (ret & 7); 157 pte->slot = hpteg + (ret & 7);
168 pte->host_vpn = vpn; 158 pte->host_va = va;
169 pte->pte = *orig_pte; 159 pte->pte = *orig_pte;
170 pte->pfn = hpaddr >> PAGE_SHIFT; 160 pte->pfn = hpaddr >> PAGE_SHIFT;
171 161
172 kvmppc_mmu_hpte_cache_map(vcpu, pte); 162 kvmppc_mmu_hpte_cache_map(vcpu, pte);
173 } 163 }
174 kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
175 164
176out: 165 return 0;
177 return r;
178} 166}
179 167
180static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) 168static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -200,14 +188,14 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
200 backwards_map = !backwards_map; 188 backwards_map = !backwards_map;
201 189
202 /* Uh-oh ... out of mappings. Let's flush! */ 190 /* Uh-oh ... out of mappings. Let's flush! */
203 if (vcpu_book3s->proto_vsid_next == vcpu_book3s->proto_vsid_max) { 191 if (vcpu_book3s->vsid_next == vcpu_book3s->vsid_max) {
204 vcpu_book3s->proto_vsid_next = vcpu_book3s->proto_vsid_first; 192 vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
205 memset(vcpu_book3s->sid_map, 0, 193 memset(vcpu_book3s->sid_map, 0,
206 sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); 194 sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
207 kvmppc_mmu_pte_flush(vcpu, 0, 0); 195 kvmppc_mmu_pte_flush(vcpu, 0, 0);
208 kvmppc_mmu_flush_segments(vcpu); 196 kvmppc_mmu_flush_segments(vcpu);
209 } 197 }
210 map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M); 198 map->host_vsid = vcpu_book3s->vsid_next++;
211 199
212 map->guest_vsid = gvsid; 200 map->guest_vsid = gvsid;
213 map->valid = true; 201 map->valid = true;
@@ -219,30 +207,25 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
219 207
220static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) 208static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
221{ 209{
222 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
223 int i; 210 int i;
224 int max_slb_size = 64; 211 int max_slb_size = 64;
225 int found_inval = -1; 212 int found_inval = -1;
226 int r; 213 int r;
227 214
228 if (!svcpu->slb_max) 215 if (!to_svcpu(vcpu)->slb_max)
229 svcpu->slb_max = 1; 216 to_svcpu(vcpu)->slb_max = 1;
230 217
231 /* Are we overwriting? */ 218 /* Are we overwriting? */
232 for (i = 1; i < svcpu->slb_max; i++) { 219 for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) {
233 if (!(svcpu->slb[i].esid & SLB_ESID_V)) 220 if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V))
234 found_inval = i; 221 found_inval = i;
235 else if ((svcpu->slb[i].esid & ESID_MASK) == esid) { 222 else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid)
236 r = i; 223 return i;
237 goto out;
238 }
239 } 224 }
240 225
241 /* Found a spare entry that was invalidated before */ 226 /* Found a spare entry that was invalidated before */
242 if (found_inval > 0) { 227 if (found_inval > 0)
243 r = found_inval; 228 return found_inval;
244 goto out;
245 }
246 229
247 /* No spare invalid entry, so create one */ 230 /* No spare invalid entry, so create one */
248 231
@@ -250,35 +233,30 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
250 max_slb_size = mmu_slb_size; 233 max_slb_size = mmu_slb_size;
251 234
252 /* Overflowing -> purge */ 235 /* Overflowing -> purge */
253 if ((svcpu->slb_max) == max_slb_size) 236 if ((to_svcpu(vcpu)->slb_max) == max_slb_size)
254 kvmppc_mmu_flush_segments(vcpu); 237 kvmppc_mmu_flush_segments(vcpu);
255 238
256 r = svcpu->slb_max; 239 r = to_svcpu(vcpu)->slb_max;
257 svcpu->slb_max++; 240 to_svcpu(vcpu)->slb_max++;
258 241
259out:
260 svcpu_put(svcpu);
261 return r; 242 return r;
262} 243}
263 244
264int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) 245int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
265{ 246{
266 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
267 u64 esid = eaddr >> SID_SHIFT; 247 u64 esid = eaddr >> SID_SHIFT;
268 u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V; 248 u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
269 u64 slb_vsid = SLB_VSID_USER; 249 u64 slb_vsid = SLB_VSID_USER;
270 u64 gvsid; 250 u64 gvsid;
271 int slb_index; 251 int slb_index;
272 struct kvmppc_sid_map *map; 252 struct kvmppc_sid_map *map;
273 int r = 0;
274 253
275 slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK); 254 slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
276 255
277 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { 256 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
278 /* Invalidate an entry */ 257 /* Invalidate an entry */
279 svcpu->slb[slb_index].esid = 0; 258 to_svcpu(vcpu)->slb[slb_index].esid = 0;
280 r = -ENOENT; 259 return -ENOENT;
281 goto out;
282 } 260 }
283 261
284 map = find_sid_vsid(vcpu, gvsid); 262 map = find_sid_vsid(vcpu, gvsid);
@@ -291,22 +269,18 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
291 slb_vsid &= ~SLB_VSID_KP; 269 slb_vsid &= ~SLB_VSID_KP;
292 slb_esid |= slb_index; 270 slb_esid |= slb_index;
293 271
294 svcpu->slb[slb_index].esid = slb_esid; 272 to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
295 svcpu->slb[slb_index].vsid = slb_vsid; 273 to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
296 274
297 trace_kvm_book3s_slbmte(slb_vsid, slb_esid); 275 trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
298 276
299out: 277 return 0;
300 svcpu_put(svcpu);
301 return r;
302} 278}
303 279
304void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 280void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
305{ 281{
306 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 282 to_svcpu(vcpu)->slb_max = 1;
307 svcpu->slb_max = 1; 283 to_svcpu(vcpu)->slb[0].esid = 0;
308 svcpu->slb[0].esid = 0;
309 svcpu_put(svcpu);
310} 284}
311 285
312void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 286void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
@@ -325,10 +299,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
325 return -1; 299 return -1;
326 vcpu3s->context_id[0] = err; 300 vcpu3s->context_id[0] = err;
327 301
328 vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) 302 vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1;
329 << USER_ESID_BITS) - 1; 303 vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
330 vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; 304 vcpu3s->vsid_next = vcpu3s->vsid_first;
331 vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
332 305
333 kvmppc_mmu_hpte_init(vcpu); 306 kvmppc_mmu_hpte_init(vcpu);
334 307
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8cc18abd6dd..bc3a2ea9421 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -23,10 +23,6 @@
23#include <linux/gfp.h> 23#include <linux/gfp.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
26#include <linux/vmalloc.h>
27#include <linux/srcu.h>
28#include <linux/anon_inodes.h>
29#include <linux/file.h>
30 26
31#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
32#include <asm/kvm_ppc.h> 28#include <asm/kvm_ppc.h>
@@ -37,201 +33,95 @@
37#include <asm/ppc-opcode.h> 33#include <asm/ppc-opcode.h>
38#include <asm/cputable.h> 34#include <asm/cputable.h>
39 35
40/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 36/* For now use fixed-size 16MB page table */
41#define MAX_LPID_970 63 37#define HPT_ORDER 24
38#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
39#define HPT_HASH_MASK (HPT_NPTEG - 1)
42 40
43/* Power architecture requires HPT is at least 256kB */ 41/* Pages in the VRMA are 16MB pages */
44#define PPC_MIN_HPT_ORDER 18 42#define VRMA_PAGE_ORDER 24
43#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
45 44
46static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 45/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
47 long pte_index, unsigned long pteh, 46#define MAX_LPID_970 63
48 unsigned long ptel, unsigned long *pte_idx_ret); 47#define NR_LPIDS (LPID_RSVD + 1)
49static void kvmppc_rmap_reset(struct kvm *kvm); 48unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
50 49
51long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 50long kvmppc_alloc_hpt(struct kvm *kvm)
52{ 51{
53 unsigned long hpt; 52 unsigned long hpt;
54 struct revmap_entry *rev; 53 unsigned long lpid;
55 struct kvmppc_linear_info *li;
56 long order = kvm_hpt_order;
57
58 if (htab_orderp) {
59 order = *htab_orderp;
60 if (order < PPC_MIN_HPT_ORDER)
61 order = PPC_MIN_HPT_ORDER;
62 }
63 54
64 /* 55 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
65 * If the user wants a different size from default, 56 HPT_ORDER - PAGE_SHIFT);
66 * try first to allocate it from the kernel page allocator.
67 */
68 hpt = 0;
69 if (order != kvm_hpt_order) {
70 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
71 __GFP_NOWARN, order - PAGE_SHIFT);
72 if (!hpt)
73 --order;
74 }
75
76 /* Next try to allocate from the preallocated pool */
77 if (!hpt) { 57 if (!hpt) {
78 li = kvm_alloc_hpt(); 58 pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
79 if (li) {
80 hpt = (ulong)li->base_virt;
81 kvm->arch.hpt_li = li;
82 order = kvm_hpt_order;
83 }
84 }
85
86 /* Lastly try successively smaller sizes from the page allocator */
87 while (!hpt && order > PPC_MIN_HPT_ORDER) {
88 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
89 __GFP_NOWARN, order - PAGE_SHIFT);
90 if (!hpt)
91 --order;
92 }
93
94 if (!hpt)
95 return -ENOMEM; 59 return -ENOMEM;
96 60 }
97 kvm->arch.hpt_virt = hpt; 61 kvm->arch.hpt_virt = hpt;
98 kvm->arch.hpt_order = order;
99 /* HPTEs are 2**4 bytes long */
100 kvm->arch.hpt_npte = 1ul << (order - 4);
101 /* 128 (2**7) bytes in each HPTEG */
102 kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
103 62
104 /* Allocate reverse map array */ 63 do {
105 rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); 64 lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
106 if (!rev) { 65 if (lpid >= NR_LPIDS) {
107 pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); 66 pr_err("kvm_alloc_hpt: No LPIDs free\n");
108 goto out_freehpt; 67 free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
109 } 68 return -ENOMEM;
110 kvm->arch.revmap = rev; 69 }
111 kvm->arch.sdr1 = __pa(hpt) | (order - 18); 70 } while (test_and_set_bit(lpid, lpid_inuse));
112 71
113 pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", 72 kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
114 hpt, order, kvm->arch.lpid); 73 kvm->arch.lpid = lpid;
115 74
116 if (htab_orderp) 75 pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
117 *htab_orderp = order;
118 return 0; 76 return 0;
119
120 out_freehpt:
121 if (kvm->arch.hpt_li)
122 kvm_release_hpt(kvm->arch.hpt_li);
123 else
124 free_pages(hpt, order - PAGE_SHIFT);
125 return -ENOMEM;
126}
127
128long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
129{
130 long err = -EBUSY;
131 long order;
132
133 mutex_lock(&kvm->lock);
134 if (kvm->arch.rma_setup_done) {
135 kvm->arch.rma_setup_done = 0;
136 /* order rma_setup_done vs. vcpus_running */
137 smp_mb();
138 if (atomic_read(&kvm->arch.vcpus_running)) {
139 kvm->arch.rma_setup_done = 1;
140 goto out;
141 }
142 }
143 if (kvm->arch.hpt_virt) {
144 order = kvm->arch.hpt_order;
145 /* Set the entire HPT to 0, i.e. invalid HPTEs */
146 memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
147 /*
148 * Reset all the reverse-mapping chains for all memslots
149 */
150 kvmppc_rmap_reset(kvm);
151 /* Ensure that each vcpu will flush its TLB on next entry. */
152 cpumask_setall(&kvm->arch.need_tlb_flush);
153 *htab_orderp = order;
154 err = 0;
155 } else {
156 err = kvmppc_alloc_hpt(kvm, htab_orderp);
157 order = *htab_orderp;
158 }
159 out:
160 mutex_unlock(&kvm->lock);
161 return err;
162} 77}
163 78
164void kvmppc_free_hpt(struct kvm *kvm) 79void kvmppc_free_hpt(struct kvm *kvm)
165{ 80{
166 kvmppc_free_lpid(kvm->arch.lpid); 81 clear_bit(kvm->arch.lpid, lpid_inuse);
167 vfree(kvm->arch.revmap); 82 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
168 if (kvm->arch.hpt_li)
169 kvm_release_hpt(kvm->arch.hpt_li);
170 else
171 free_pages(kvm->arch.hpt_virt,
172 kvm->arch.hpt_order - PAGE_SHIFT);
173}
174
175/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
176static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
177{
178 return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
179}
180
181/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
182static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
183{
184 return (pgsize == 0x10000) ? 0x1000 : 0;
185} 83}
186 84
187void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, 85void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
188 unsigned long porder)
189{ 86{
190 unsigned long i; 87 unsigned long i;
191 unsigned long npages; 88 unsigned long npages = kvm->arch.ram_npages;
192 unsigned long hp_v, hp_r; 89 unsigned long pfn;
193 unsigned long addr, hash; 90 unsigned long *hpte;
194 unsigned long psize; 91 unsigned long hash;
195 unsigned long hp0, hp1; 92 struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
196 unsigned long idx_ret;
197 long ret;
198 struct kvm *kvm = vcpu->kvm;
199 93
200 psize = 1ul << porder; 94 if (!pginfo)
201 npages = memslot->npages >> (porder - PAGE_SHIFT); 95 return;
202 96
203 /* VRMA can't be > 1TB */ 97 /* VRMA can't be > 1TB */
204 if (npages > 1ul << (40 - porder)) 98 if (npages > 1ul << (40 - kvm->arch.ram_porder))
205 npages = 1ul << (40 - porder); 99 npages = 1ul << (40 - kvm->arch.ram_porder);
206 /* Can't use more than 1 HPTE per HPTEG */ 100 /* Can't use more than 1 HPTE per HPTEG */
207 if (npages > kvm->arch.hpt_mask + 1) 101 if (npages > HPT_NPTEG)
208 npages = kvm->arch.hpt_mask + 1; 102 npages = HPT_NPTEG;
209
210 hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
211 HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
212 hp1 = hpte1_pgsize_encoding(psize) |
213 HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
214 103
215 for (i = 0; i < npages; ++i) { 104 for (i = 0; i < npages; ++i) {
216 addr = i << porder; 105 pfn = pginfo[i].pfn;
106 if (!pfn)
107 break;
217 /* can't use hpt_hash since va > 64 bits */ 108 /* can't use hpt_hash since va > 64 bits */
218 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; 109 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
219 /* 110 /*
220 * We assume that the hash table is empty and no 111 * We assume that the hash table is empty and no
221 * vcpus are using it at this stage. Since we create 112 * vcpus are using it at this stage. Since we create
222 * at most one HPTE per HPTEG, we just assume entry 7 113 * at most one HPTE per HPTEG, we just assume entry 7
223 * is available and use it. 114 * is available and use it.
224 */ 115 */
225 hash = (hash << 3) + 7; 116 hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7));
226 hp_v = hp0 | ((addr >> 16) & ~0x7fUL); 117 hpte += 7 * 2;
227 hp_r = hp1 | addr; 118 /* HPTE low word - RPN, protection, etc. */
228 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r, 119 hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
229 &idx_ret); 120 HPTE_R_M | PP_RWXX;
230 if (ret != H_SUCCESS) { 121 wmb();
231 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", 122 hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
232 addr, ret); 123 (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
233 break; 124 HPTE_V_LARGE | HPTE_V_VALID;
234 }
235 } 125 }
236} 126}
237 127
@@ -242,7 +132,8 @@ int kvmppc_mmu_hv_init(void)
242 if (!cpu_has_feature(CPU_FTR_HVMODE)) 132 if (!cpu_has_feature(CPU_FTR_HVMODE))
243 return -EINVAL; 133 return -EINVAL;
244 134
245 /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ 135 memset(lpid_inuse, 0, sizeof(lpid_inuse));
136
246 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 137 if (cpu_has_feature(CPU_FTR_ARCH_206)) {
247 host_lpid = mfspr(SPRN_LPID); /* POWER7 */ 138 host_lpid = mfspr(SPRN_LPID); /* POWER7 */
248 rsvd_lpid = LPID_RSVD; 139 rsvd_lpid = LPID_RSVD;
@@ -251,11 +142,9 @@ int kvmppc_mmu_hv_init(void)
251 rsvd_lpid = MAX_LPID_970; 142 rsvd_lpid = MAX_LPID_970;
252 } 143 }
253 144
254 kvmppc_init_lpid(rsvd_lpid + 1); 145 set_bit(host_lpid, lpid_inuse);
255
256 kvmppc_claim_lpid(host_lpid);
257 /* rsvd_lpid is reserved for use in partition switching */ 146 /* rsvd_lpid is reserved for use in partition switching */
258 kvmppc_claim_lpid(rsvd_lpid); 147 set_bit(rsvd_lpid, lpid_inuse);
259 148
260 return 0; 149 return 0;
261} 150}
@@ -269,1245 +158,10 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
269 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 158 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
270} 159}
271 160
272/*
273 * This is called to get a reference to a guest page if there isn't
274 * one already in the memslot->arch.slot_phys[] array.
275 */
276static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
277 struct kvm_memory_slot *memslot,
278 unsigned long psize)
279{
280 unsigned long start;
281 long np, err;
282 struct page *page, *hpage, *pages[1];
283 unsigned long s, pgsize;
284 unsigned long *physp;
285 unsigned int is_io, got, pgorder;
286 struct vm_area_struct *vma;
287 unsigned long pfn, i, npages;
288
289 physp = memslot->arch.slot_phys;
290 if (!physp)
291 return -EINVAL;
292 if (physp[gfn - memslot->base_gfn])
293 return 0;
294
295 is_io = 0;
296 got = 0;
297 page = NULL;
298 pgsize = psize;
299 err = -EINVAL;
300 start = gfn_to_hva_memslot(memslot, gfn);
301
302 /* Instantiate and get the page we want access to */
303 np = get_user_pages_fast(start, 1, 1, pages);
304 if (np != 1) {
305 /* Look up the vma for the page */
306 down_read(&current->mm->mmap_sem);
307 vma = find_vma(current->mm, start);
308 if (!vma || vma->vm_start > start ||
309 start + psize > vma->vm_end ||
310 !(vma->vm_flags & VM_PFNMAP))
311 goto up_err;
312 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
313 pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
314 /* check alignment of pfn vs. requested page size */
315 if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
316 goto up_err;
317 up_read(&current->mm->mmap_sem);
318
319 } else {
320 page = pages[0];
321 got = KVMPPC_GOT_PAGE;
322
323 /* See if this is a large page */
324 s = PAGE_SIZE;
325 if (PageHuge(page)) {
326 hpage = compound_head(page);
327 s <<= compound_order(hpage);
328 /* Get the whole large page if slot alignment is ok */
329 if (s > psize && slot_is_aligned(memslot, s) &&
330 !(memslot->userspace_addr & (s - 1))) {
331 start &= ~(s - 1);
332 pgsize = s;
333 get_page(hpage);
334 put_page(page);
335 page = hpage;
336 }
337 }
338 if (s < psize)
339 goto out;
340 pfn = page_to_pfn(page);
341 }
342
343 npages = pgsize >> PAGE_SHIFT;
344 pgorder = __ilog2(npages);
345 physp += (gfn - memslot->base_gfn) & ~(npages - 1);
346 spin_lock(&kvm->arch.slot_phys_lock);
347 for (i = 0; i < npages; ++i) {
348 if (!physp[i]) {
349 physp[i] = ((pfn + i) << PAGE_SHIFT) +
350 got + is_io + pgorder;
351 got = 0;
352 }
353 }
354 spin_unlock(&kvm->arch.slot_phys_lock);
355 err = 0;
356
357 out:
358 if (got)
359 put_page(page);
360 return err;
361
362 up_err:
363 up_read(&current->mm->mmap_sem);
364 return err;
365}
366
367long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
368 long pte_index, unsigned long pteh,
369 unsigned long ptel, unsigned long *pte_idx_ret)
370{
371 unsigned long psize, gpa, gfn;
372 struct kvm_memory_slot *memslot;
373 long ret;
374
375 if (kvm->arch.using_mmu_notifiers)
376 goto do_insert;
377
378 psize = hpte_page_size(pteh, ptel);
379 if (!psize)
380 return H_PARAMETER;
381
382 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
383
384 /* Find the memslot (if any) for this address */
385 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
386 gfn = gpa >> PAGE_SHIFT;
387 memslot = gfn_to_memslot(kvm, gfn);
388 if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
389 if (!slot_is_aligned(memslot, psize))
390 return H_PARAMETER;
391 if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
392 return H_PARAMETER;
393 }
394
395 do_insert:
396 /* Protect linux PTE lookup from page table destruction */
397 rcu_read_lock_sched(); /* this disables preemption too */
398 ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
399 current->mm->pgd, false, pte_idx_ret);
400 rcu_read_unlock_sched();
401 if (ret == H_TOO_HARD) {
402 /* this can't happen */
403 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
404 ret = H_RESOURCE; /* or something */
405 }
406 return ret;
407
408}
409
410/*
411 * We come here on a H_ENTER call from the guest when we are not
412 * using mmu notifiers and we don't have the requested page pinned
413 * already.
414 */
415long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
416 long pte_index, unsigned long pteh,
417 unsigned long ptel)
418{
419 return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
420 pteh, ptel, &vcpu->arch.gpr[4]);
421}
422
423static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
424 gva_t eaddr)
425{
426 u64 mask;
427 int i;
428
429 for (i = 0; i < vcpu->arch.slb_nr; i++) {
430 if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
431 continue;
432
433 if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
434 mask = ESID_MASK_1T;
435 else
436 mask = ESID_MASK;
437
438 if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
439 return &vcpu->arch.slb[i];
440 }
441 return NULL;
442}
443
444static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
445 unsigned long ea)
446{
447 unsigned long ra_mask;
448
449 ra_mask = hpte_page_size(v, r) - 1;
450 return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
451}
452
453static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 161static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
454 struct kvmppc_pte *gpte, bool data) 162 struct kvmppc_pte *gpte, bool data)
455{
456 struct kvm *kvm = vcpu->kvm;
457 struct kvmppc_slb *slbe;
458 unsigned long slb_v;
459 unsigned long pp, key;
460 unsigned long v, gr;
461 unsigned long *hptep;
462 int index;
463 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
464
465 /* Get SLB entry */
466 if (virtmode) {
467 slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
468 if (!slbe)
469 return -EINVAL;
470 slb_v = slbe->origv;
471 } else {
472 /* real mode access */
473 slb_v = vcpu->kvm->arch.vrma_slb_v;
474 }
475
476 /* Find the HPTE in the hash table */
477 index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
478 HPTE_V_VALID | HPTE_V_ABSENT);
479 if (index < 0)
480 return -ENOENT;
481 hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
482 v = hptep[0] & ~HPTE_V_HVLOCK;
483 gr = kvm->arch.revmap[index].guest_rpte;
484
485 /* Unlock the HPTE */
486 asm volatile("lwsync" : : : "memory");
487 hptep[0] = v;
488
489 gpte->eaddr = eaddr;
490 gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
491
492 /* Get PP bits and key for permission check */
493 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
494 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
495 key &= slb_v;
496
497 /* Calculate permissions */
498 gpte->may_read = hpte_read_permission(pp, key);
499 gpte->may_write = hpte_write_permission(pp, key);
500 gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
501
502 /* Storage key permission check for POWER7 */
503 if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
504 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
505 if (amrfield & 1)
506 gpte->may_read = 0;
507 if (amrfield & 2)
508 gpte->may_write = 0;
509 }
510
511 /* Get the guest physical address */
512 gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
513 return 0;
514}
515
516/*
517 * Quick test for whether an instruction is a load or a store.
518 * If the instruction is a load or a store, then this will indicate
519 * which it is, at least on server processors. (Embedded processors
520 * have some external PID instructions that don't follow the rule
521 * embodied here.) If the instruction isn't a load or store, then
522 * this doesn't return anything useful.
523 */
524static int instruction_is_store(unsigned int instr)
525{
526 unsigned int mask;
527
528 mask = 0x10000000;
529 if ((instr & 0xfc000000) == 0x7c000000)
530 mask = 0x100; /* major opcode 31 */
531 return (instr & mask) != 0;
532}
533
534static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
535 unsigned long gpa, gva_t ea, int is_store)
536{
537 int ret;
538 u32 last_inst;
539 unsigned long srr0 = kvmppc_get_pc(vcpu);
540
541 /* We try to load the last instruction. We don't let
542 * emulate_instruction do it as it doesn't check what
543 * kvmppc_ld returns.
544 * If we fail, we just return to the guest and try executing it again.
545 */
546 if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
547 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
548 if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
549 return RESUME_GUEST;
550 vcpu->arch.last_inst = last_inst;
551 }
552
553 /*
554 * WARNING: We do not know for sure whether the instruction we just
555 * read from memory is the same that caused the fault in the first
556 * place. If the instruction we read is neither an load or a store,
557 * then it can't access memory, so we don't need to worry about
558 * enforcing access permissions. So, assuming it is a load or
559 * store, we just check that its direction (load or store) is
560 * consistent with the original fault, since that's what we
561 * checked the access permissions against. If there is a mismatch
562 * we just return and retry the instruction.
563 */
564
565 if (instruction_is_store(vcpu->arch.last_inst) != !!is_store)
566 return RESUME_GUEST;
567
568 /*
569 * Emulated accesses are emulated by looking at the hash for
570 * translation once, then performing the access later. The
571 * translation could be invalidated in the meantime in which
572 * point performing the subsequent memory access on the old
573 * physical address could possibly be a security hole for the
574 * guest (but not the host).
575 *
576 * This is less of an issue for MMIO stores since they aren't
577 * globally visible. It could be an issue for MMIO loads to
578 * a certain extent but we'll ignore it for now.
579 */
580
581 vcpu->arch.paddr_accessed = gpa;
582 vcpu->arch.vaddr_accessed = ea;
583 return kvmppc_emulate_mmio(run, vcpu);
584}
585
586int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
587 unsigned long ea, unsigned long dsisr)
588{
589 struct kvm *kvm = vcpu->kvm;
590 unsigned long *hptep, hpte[3], r;
591 unsigned long mmu_seq, psize, pte_size;
592 unsigned long gpa, gfn, hva, pfn;
593 struct kvm_memory_slot *memslot;
594 unsigned long *rmap;
595 struct revmap_entry *rev;
596 struct page *page, *pages[1];
597 long index, ret, npages;
598 unsigned long is_io;
599 unsigned int writing, write_ok;
600 struct vm_area_struct *vma;
601 unsigned long rcbits;
602
603 /*
604 * Real-mode code has already searched the HPT and found the
605 * entry we're interested in. Lock the entry and check that
606 * it hasn't changed. If it has, just return and re-execute the
607 * instruction.
608 */
609 if (ea != vcpu->arch.pgfault_addr)
610 return RESUME_GUEST;
611 index = vcpu->arch.pgfault_index;
612 hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
613 rev = &kvm->arch.revmap[index];
614 preempt_disable();
615 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
616 cpu_relax();
617 hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
618 hpte[1] = hptep[1];
619 hpte[2] = r = rev->guest_rpte;
620 asm volatile("lwsync" : : : "memory");
621 hptep[0] = hpte[0];
622 preempt_enable();
623
624 if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
625 hpte[1] != vcpu->arch.pgfault_hpte[1])
626 return RESUME_GUEST;
627
628 /* Translate the logical address and get the page */
629 psize = hpte_page_size(hpte[0], r);
630 gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
631 gfn = gpa >> PAGE_SHIFT;
632 memslot = gfn_to_memslot(kvm, gfn);
633
634 /* No memslot means it's an emulated MMIO region */
635 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
636 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
637 dsisr & DSISR_ISSTORE);
638
639 if (!kvm->arch.using_mmu_notifiers)
640 return -EFAULT; /* should never get here */
641
642 /* used to check for invalidations in progress */
643 mmu_seq = kvm->mmu_notifier_seq;
644 smp_rmb();
645
646 is_io = 0;
647 pfn = 0;
648 page = NULL;
649 pte_size = PAGE_SIZE;
650 writing = (dsisr & DSISR_ISSTORE) != 0;
651 /* If writing != 0, then the HPTE must allow writing, if we get here */
652 write_ok = writing;
653 hva = gfn_to_hva_memslot(memslot, gfn);
654 npages = get_user_pages_fast(hva, 1, writing, pages);
655 if (npages < 1) {
656 /* Check if it's an I/O mapping */
657 down_read(&current->mm->mmap_sem);
658 vma = find_vma(current->mm, hva);
659 if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
660 (vma->vm_flags & VM_PFNMAP)) {
661 pfn = vma->vm_pgoff +
662 ((hva - vma->vm_start) >> PAGE_SHIFT);
663 pte_size = psize;
664 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
665 write_ok = vma->vm_flags & VM_WRITE;
666 }
667 up_read(&current->mm->mmap_sem);
668 if (!pfn)
669 return -EFAULT;
670 } else {
671 page = pages[0];
672 if (PageHuge(page)) {
673 page = compound_head(page);
674 pte_size <<= compound_order(page);
675 }
676 /* if the guest wants write access, see if that is OK */
677 if (!writing && hpte_is_writable(r)) {
678 pte_t *ptep, pte;
679
680 /*
681 * We need to protect against page table destruction
682 * while looking up and updating the pte.
683 */
684 rcu_read_lock_sched();
685 ptep = find_linux_pte_or_hugepte(current->mm->pgd,
686 hva, NULL);
687 if (ptep && pte_present(*ptep)) {
688 pte = kvmppc_read_update_linux_pte(ptep, 1);
689 if (pte_write(pte))
690 write_ok = 1;
691 }
692 rcu_read_unlock_sched();
693 }
694 pfn = page_to_pfn(page);
695 }
696
697 ret = -EFAULT;
698 if (psize > pte_size)
699 goto out_put;
700
701 /* Check WIMG vs. the actual page we're accessing */
702 if (!hpte_cache_flags_ok(r, is_io)) {
703 if (is_io)
704 return -EFAULT;
705 /*
706 * Allow guest to map emulated device memory as
707 * uncacheable, but actually make it cacheable.
708 */
709 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
710 }
711
712 /* Set the HPTE to point to pfn */
713 r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
714 if (hpte_is_writable(r) && !write_ok)
715 r = hpte_make_readonly(r);
716 ret = RESUME_GUEST;
717 preempt_disable();
718 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
719 cpu_relax();
720 if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
721 rev->guest_rpte != hpte[2])
722 /* HPTE has been changed under us; let the guest retry */
723 goto out_unlock;
724 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
725
726 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
727 lock_rmap(rmap);
728
729 /* Check if we might have been invalidated; let the guest retry if so */
730 ret = RESUME_GUEST;
731 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
732 unlock_rmap(rmap);
733 goto out_unlock;
734 }
735
736 /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
737 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
738 r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
739
740 if (hptep[0] & HPTE_V_VALID) {
741 /* HPTE was previously valid, so we need to invalidate it */
742 unlock_rmap(rmap);
743 hptep[0] |= HPTE_V_ABSENT;
744 kvmppc_invalidate_hpte(kvm, hptep, index);
745 /* don't lose previous R and C bits */
746 r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
747 } else {
748 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
749 }
750
751 hptep[1] = r;
752 eieio();
753 hptep[0] = hpte[0];
754 asm volatile("ptesync" : : : "memory");
755 preempt_enable();
756 if (page && hpte_is_writable(r))
757 SetPageDirty(page);
758
759 out_put:
760 if (page) {
761 /*
762 * We drop pages[0] here, not page because page might
763 * have been set to the head page of a compound, but
764 * we have to drop the reference on the correct tail
765 * page to match the get inside gup()
766 */
767 put_page(pages[0]);
768 }
769 return ret;
770
771 out_unlock:
772 hptep[0] &= ~HPTE_V_HVLOCK;
773 preempt_enable();
774 goto out_put;
775}
776
777static void kvmppc_rmap_reset(struct kvm *kvm)
778{
779 struct kvm_memslots *slots;
780 struct kvm_memory_slot *memslot;
781 int srcu_idx;
782
783 srcu_idx = srcu_read_lock(&kvm->srcu);
784 slots = kvm->memslots;
785 kvm_for_each_memslot(memslot, slots) {
786 /*
787 * This assumes it is acceptable to lose reference and
788 * change bits across a reset.
789 */
790 memset(memslot->arch.rmap, 0,
791 memslot->npages * sizeof(*memslot->arch.rmap));
792 }
793 srcu_read_unlock(&kvm->srcu, srcu_idx);
794}
795
796static int kvm_handle_hva_range(struct kvm *kvm,
797 unsigned long start,
798 unsigned long end,
799 int (*handler)(struct kvm *kvm,
800 unsigned long *rmapp,
801 unsigned long gfn))
802{ 163{
803 int ret; 164 return -ENOENT;
804 int retval = 0;
805 struct kvm_memslots *slots;
806 struct kvm_memory_slot *memslot;
807
808 slots = kvm_memslots(kvm);
809 kvm_for_each_memslot(memslot, slots) {
810 unsigned long hva_start, hva_end;
811 gfn_t gfn, gfn_end;
812
813 hva_start = max(start, memslot->userspace_addr);
814 hva_end = min(end, memslot->userspace_addr +
815 (memslot->npages << PAGE_SHIFT));
816 if (hva_start >= hva_end)
817 continue;
818 /*
819 * {gfn(page) | page intersects with [hva_start, hva_end)} =
820 * {gfn, gfn+1, ..., gfn_end-1}.
821 */
822 gfn = hva_to_gfn_memslot(hva_start, memslot);
823 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
824
825 for (; gfn < gfn_end; ++gfn) {
826 gfn_t gfn_offset = gfn - memslot->base_gfn;
827
828 ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
829 retval |= ret;
830 }
831 }
832
833 return retval;
834}
835
836static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
837 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
838 unsigned long gfn))
839{
840 return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
841}
842
843static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
844 unsigned long gfn)
845{
846 struct revmap_entry *rev = kvm->arch.revmap;
847 unsigned long h, i, j;
848 unsigned long *hptep;
849 unsigned long ptel, psize, rcbits;
850
851 for (;;) {
852 lock_rmap(rmapp);
853 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
854 unlock_rmap(rmapp);
855 break;
856 }
857
858 /*
859 * To avoid an ABBA deadlock with the HPTE lock bit,
860 * we can't spin on the HPTE lock while holding the
861 * rmap chain lock.
862 */
863 i = *rmapp & KVMPPC_RMAP_INDEX;
864 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
865 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
866 /* unlock rmap before spinning on the HPTE lock */
867 unlock_rmap(rmapp);
868 while (hptep[0] & HPTE_V_HVLOCK)
869 cpu_relax();
870 continue;
871 }
872 j = rev[i].forw;
873 if (j == i) {
874 /* chain is now empty */
875 *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
876 } else {
877 /* remove i from chain */
878 h = rev[i].back;
879 rev[h].forw = j;
880 rev[j].back = h;
881 rev[i].forw = rev[i].back = i;
882 *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
883 }
884
885 /* Now check and modify the HPTE */
886 ptel = rev[i].guest_rpte;
887 psize = hpte_page_size(hptep[0], ptel);
888 if ((hptep[0] & HPTE_V_VALID) &&
889 hpte_rpn(ptel, psize) == gfn) {
890 if (kvm->arch.using_mmu_notifiers)
891 hptep[0] |= HPTE_V_ABSENT;
892 kvmppc_invalidate_hpte(kvm, hptep, i);
893 /* Harvest R and C */
894 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
895 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
896 rev[i].guest_rpte = ptel | rcbits;
897 }
898 unlock_rmap(rmapp);
899 hptep[0] &= ~HPTE_V_HVLOCK;
900 }
901 return 0;
902}
903
904int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
905{
906 if (kvm->arch.using_mmu_notifiers)
907 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
908 return 0;
909}
910
911int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
912{
913 if (kvm->arch.using_mmu_notifiers)
914 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
915 return 0;
916}
917
918void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
919{
920 unsigned long *rmapp;
921 unsigned long gfn;
922 unsigned long n;
923
924 rmapp = memslot->arch.rmap;
925 gfn = memslot->base_gfn;
926 for (n = memslot->npages; n; --n) {
927 /*
928 * Testing the present bit without locking is OK because
929 * the memslot has been marked invalid already, and hence
930 * no new HPTEs referencing this page can be created,
931 * thus the present bit can't go from 0 to 1.
932 */
933 if (*rmapp & KVMPPC_RMAP_PRESENT)
934 kvm_unmap_rmapp(kvm, rmapp, gfn);
935 ++rmapp;
936 ++gfn;
937 }
938}
939
940static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
941 unsigned long gfn)
942{
943 struct revmap_entry *rev = kvm->arch.revmap;
944 unsigned long head, i, j;
945 unsigned long *hptep;
946 int ret = 0;
947
948 retry:
949 lock_rmap(rmapp);
950 if (*rmapp & KVMPPC_RMAP_REFERENCED) {
951 *rmapp &= ~KVMPPC_RMAP_REFERENCED;
952 ret = 1;
953 }
954 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
955 unlock_rmap(rmapp);
956 return ret;
957 }
958
959 i = head = *rmapp & KVMPPC_RMAP_INDEX;
960 do {
961 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
962 j = rev[i].forw;
963
964 /* If this HPTE isn't referenced, ignore it */
965 if (!(hptep[1] & HPTE_R_R))
966 continue;
967
968 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
969 /* unlock rmap before spinning on the HPTE lock */
970 unlock_rmap(rmapp);
971 while (hptep[0] & HPTE_V_HVLOCK)
972 cpu_relax();
973 goto retry;
974 }
975
976 /* Now check and modify the HPTE */
977 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
978 kvmppc_clear_ref_hpte(kvm, hptep, i);
979 rev[i].guest_rpte |= HPTE_R_R;
980 ret = 1;
981 }
982 hptep[0] &= ~HPTE_V_HVLOCK;
983 } while ((i = j) != head);
984
985 unlock_rmap(rmapp);
986 return ret;
987}
988
989int kvm_age_hva(struct kvm *kvm, unsigned long hva)
990{
991 if (!kvm->arch.using_mmu_notifiers)
992 return 0;
993 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
994}
995
996static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
997 unsigned long gfn)
998{
999 struct revmap_entry *rev = kvm->arch.revmap;
1000 unsigned long head, i, j;
1001 unsigned long *hp;
1002 int ret = 1;
1003
1004 if (*rmapp & KVMPPC_RMAP_REFERENCED)
1005 return 1;
1006
1007 lock_rmap(rmapp);
1008 if (*rmapp & KVMPPC_RMAP_REFERENCED)
1009 goto out;
1010
1011 if (*rmapp & KVMPPC_RMAP_PRESENT) {
1012 i = head = *rmapp & KVMPPC_RMAP_INDEX;
1013 do {
1014 hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
1015 j = rev[i].forw;
1016 if (hp[1] & HPTE_R_R)
1017 goto out;
1018 } while ((i = j) != head);
1019 }
1020 ret = 0;
1021
1022 out:
1023 unlock_rmap(rmapp);
1024 return ret;
1025}
1026
1027int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1028{
1029 if (!kvm->arch.using_mmu_notifiers)
1030 return 0;
1031 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
1032}
1033
1034void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1035{
1036 if (!kvm->arch.using_mmu_notifiers)
1037 return;
1038 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
1039}
1040
1041static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1042{
1043 struct revmap_entry *rev = kvm->arch.revmap;
1044 unsigned long head, i, j;
1045 unsigned long *hptep;
1046 int ret = 0;
1047
1048 retry:
1049 lock_rmap(rmapp);
1050 if (*rmapp & KVMPPC_RMAP_CHANGED) {
1051 *rmapp &= ~KVMPPC_RMAP_CHANGED;
1052 ret = 1;
1053 }
1054 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
1055 unlock_rmap(rmapp);
1056 return ret;
1057 }
1058
1059 i = head = *rmapp & KVMPPC_RMAP_INDEX;
1060 do {
1061 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
1062 j = rev[i].forw;
1063
1064 if (!(hptep[1] & HPTE_R_C))
1065 continue;
1066
1067 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
1068 /* unlock rmap before spinning on the HPTE lock */
1069 unlock_rmap(rmapp);
1070 while (hptep[0] & HPTE_V_HVLOCK)
1071 cpu_relax();
1072 goto retry;
1073 }
1074
1075 /* Now check and modify the HPTE */
1076 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
1077 /* need to make it temporarily absent to clear C */
1078 hptep[0] |= HPTE_V_ABSENT;
1079 kvmppc_invalidate_hpte(kvm, hptep, i);
1080 hptep[1] &= ~HPTE_R_C;
1081 eieio();
1082 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
1083 rev[i].guest_rpte |= HPTE_R_C;
1084 ret = 1;
1085 }
1086 hptep[0] &= ~HPTE_V_HVLOCK;
1087 } while ((i = j) != head);
1088
1089 unlock_rmap(rmapp);
1090 return ret;
1091}
1092
1093long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1094 unsigned long *map)
1095{
1096 unsigned long i;
1097 unsigned long *rmapp;
1098
1099 preempt_disable();
1100 rmapp = memslot->arch.rmap;
1101 for (i = 0; i < memslot->npages; ++i) {
1102 if (kvm_test_clear_dirty(kvm, rmapp) && map)
1103 __set_bit_le(i, map);
1104 ++rmapp;
1105 }
1106 preempt_enable();
1107 return 0;
1108}
1109
1110void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1111 unsigned long *nb_ret)
1112{
1113 struct kvm_memory_slot *memslot;
1114 unsigned long gfn = gpa >> PAGE_SHIFT;
1115 struct page *page, *pages[1];
1116 int npages;
1117 unsigned long hva, psize, offset;
1118 unsigned long pa;
1119 unsigned long *physp;
1120 int srcu_idx;
1121
1122 srcu_idx = srcu_read_lock(&kvm->srcu);
1123 memslot = gfn_to_memslot(kvm, gfn);
1124 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1125 goto err;
1126 if (!kvm->arch.using_mmu_notifiers) {
1127 physp = memslot->arch.slot_phys;
1128 if (!physp)
1129 goto err;
1130 physp += gfn - memslot->base_gfn;
1131 pa = *physp;
1132 if (!pa) {
1133 if (kvmppc_get_guest_page(kvm, gfn, memslot,
1134 PAGE_SIZE) < 0)
1135 goto err;
1136 pa = *physp;
1137 }
1138 page = pfn_to_page(pa >> PAGE_SHIFT);
1139 get_page(page);
1140 } else {
1141 hva = gfn_to_hva_memslot(memslot, gfn);
1142 npages = get_user_pages_fast(hva, 1, 1, pages);
1143 if (npages < 1)
1144 goto err;
1145 page = pages[0];
1146 }
1147 srcu_read_unlock(&kvm->srcu, srcu_idx);
1148
1149 psize = PAGE_SIZE;
1150 if (PageHuge(page)) {
1151 page = compound_head(page);
1152 psize <<= compound_order(page);
1153 }
1154 offset = gpa & (psize - 1);
1155 if (nb_ret)
1156 *nb_ret = psize - offset;
1157 return page_address(page) + offset;
1158
1159 err:
1160 srcu_read_unlock(&kvm->srcu, srcu_idx);
1161 return NULL;
1162}
1163
1164void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
1165{
1166 struct page *page = virt_to_page(va);
1167
1168 put_page(page);
1169}
1170
1171/*
1172 * Functions for reading and writing the hash table via reads and
1173 * writes on a file descriptor.
1174 *
1175 * Reads return the guest view of the hash table, which has to be
1176 * pieced together from the real hash table and the guest_rpte
1177 * values in the revmap array.
1178 *
1179 * On writes, each HPTE written is considered in turn, and if it
1180 * is valid, it is written to the HPT as if an H_ENTER with the
1181 * exact flag set was done. When the invalid count is non-zero
1182 * in the header written to the stream, the kernel will make
1183 * sure that that many HPTEs are invalid, and invalidate them
1184 * if not.
1185 */
1186
1187struct kvm_htab_ctx {
1188 unsigned long index;
1189 unsigned long flags;
1190 struct kvm *kvm;
1191 int first_pass;
1192};
1193
1194#define HPTE_SIZE (2 * sizeof(unsigned long))
1195
1196static long record_hpte(unsigned long flags, unsigned long *hptp,
1197 unsigned long *hpte, struct revmap_entry *revp,
1198 int want_valid, int first_pass)
1199{
1200 unsigned long v, r;
1201 int ok = 1;
1202 int valid, dirty;
1203
1204 /* Unmodified entries are uninteresting except on the first pass */
1205 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1206 if (!first_pass && !dirty)
1207 return 0;
1208
1209 valid = 0;
1210 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
1211 valid = 1;
1212 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
1213 !(hptp[0] & HPTE_V_BOLTED))
1214 valid = 0;
1215 }
1216 if (valid != want_valid)
1217 return 0;
1218
1219 v = r = 0;
1220 if (valid || dirty) {
1221 /* lock the HPTE so it's stable and read it */
1222 preempt_disable();
1223 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1224 cpu_relax();
1225 v = hptp[0];
1226 if (v & HPTE_V_ABSENT) {
1227 v &= ~HPTE_V_ABSENT;
1228 v |= HPTE_V_VALID;
1229 }
1230 /* re-evaluate valid and dirty from synchronized HPTE value */
1231 valid = !!(v & HPTE_V_VALID);
1232 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
1233 valid = 0;
1234 r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
1235 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1236 /* only clear modified if this is the right sort of entry */
1237 if (valid == want_valid && dirty) {
1238 r &= ~HPTE_GR_MODIFIED;
1239 revp->guest_rpte = r;
1240 }
1241 asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
1242 hptp[0] &= ~HPTE_V_HVLOCK;
1243 preempt_enable();
1244 if (!(valid == want_valid && (first_pass || dirty)))
1245 ok = 0;
1246 }
1247 hpte[0] = v;
1248 hpte[1] = r;
1249 return ok;
1250}
1251
1252static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1253 size_t count, loff_t *ppos)
1254{
1255 struct kvm_htab_ctx *ctx = file->private_data;
1256 struct kvm *kvm = ctx->kvm;
1257 struct kvm_get_htab_header hdr;
1258 unsigned long *hptp;
1259 struct revmap_entry *revp;
1260 unsigned long i, nb, nw;
1261 unsigned long __user *lbuf;
1262 struct kvm_get_htab_header __user *hptr;
1263 unsigned long flags;
1264 int first_pass;
1265 unsigned long hpte[2];
1266
1267 if (!access_ok(VERIFY_WRITE, buf, count))
1268 return -EFAULT;
1269
1270 first_pass = ctx->first_pass;
1271 flags = ctx->flags;
1272
1273 i = ctx->index;
1274 hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
1275 revp = kvm->arch.revmap + i;
1276 lbuf = (unsigned long __user *)buf;
1277
1278 nb = 0;
1279 while (nb + sizeof(hdr) + HPTE_SIZE < count) {
1280 /* Initialize header */
1281 hptr = (struct kvm_get_htab_header __user *)buf;
1282 hdr.n_valid = 0;
1283 hdr.n_invalid = 0;
1284 nw = nb;
1285 nb += sizeof(hdr);
1286 lbuf = (unsigned long __user *)(buf + sizeof(hdr));
1287
1288 /* Skip uninteresting entries, i.e. clean on not-first pass */
1289 if (!first_pass) {
1290 while (i < kvm->arch.hpt_npte &&
1291 !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
1292 ++i;
1293 hptp += 2;
1294 ++revp;
1295 }
1296 }
1297 hdr.index = i;
1298
1299 /* Grab a series of valid entries */
1300 while (i < kvm->arch.hpt_npte &&
1301 hdr.n_valid < 0xffff &&
1302 nb + HPTE_SIZE < count &&
1303 record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
1304 /* valid entry, write it out */
1305 ++hdr.n_valid;
1306 if (__put_user(hpte[0], lbuf) ||
1307 __put_user(hpte[1], lbuf + 1))
1308 return -EFAULT;
1309 nb += HPTE_SIZE;
1310 lbuf += 2;
1311 ++i;
1312 hptp += 2;
1313 ++revp;
1314 }
1315 /* Now skip invalid entries while we can */
1316 while (i < kvm->arch.hpt_npte &&
1317 hdr.n_invalid < 0xffff &&
1318 record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
1319 /* found an invalid entry */
1320 ++hdr.n_invalid;
1321 ++i;
1322 hptp += 2;
1323 ++revp;
1324 }
1325
1326 if (hdr.n_valid || hdr.n_invalid) {
1327 /* write back the header */
1328 if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
1329 return -EFAULT;
1330 nw = nb;
1331 buf = (char __user *)lbuf;
1332 } else {
1333 nb = nw;
1334 }
1335
1336 /* Check if we've wrapped around the hash table */
1337 if (i >= kvm->arch.hpt_npte) {
1338 i = 0;
1339 ctx->first_pass = 0;
1340 break;
1341 }
1342 }
1343
1344 ctx->index = i;
1345
1346 return nb;
1347}
1348
1349static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1350 size_t count, loff_t *ppos)
1351{
1352 struct kvm_htab_ctx *ctx = file->private_data;
1353 struct kvm *kvm = ctx->kvm;
1354 struct kvm_get_htab_header hdr;
1355 unsigned long i, j;
1356 unsigned long v, r;
1357 unsigned long __user *lbuf;
1358 unsigned long *hptp;
1359 unsigned long tmp[2];
1360 ssize_t nb;
1361 long int err, ret;
1362 int rma_setup;
1363
1364 if (!access_ok(VERIFY_READ, buf, count))
1365 return -EFAULT;
1366
1367 /* lock out vcpus from running while we're doing this */
1368 mutex_lock(&kvm->lock);
1369 rma_setup = kvm->arch.rma_setup_done;
1370 if (rma_setup) {
1371 kvm->arch.rma_setup_done = 0; /* temporarily */
1372 /* order rma_setup_done vs. vcpus_running */
1373 smp_mb();
1374 if (atomic_read(&kvm->arch.vcpus_running)) {
1375 kvm->arch.rma_setup_done = 1;
1376 mutex_unlock(&kvm->lock);
1377 return -EBUSY;
1378 }
1379 }
1380
1381 err = 0;
1382 for (nb = 0; nb + sizeof(hdr) <= count; ) {
1383 err = -EFAULT;
1384 if (__copy_from_user(&hdr, buf, sizeof(hdr)))
1385 break;
1386
1387 err = 0;
1388 if (nb + hdr.n_valid * HPTE_SIZE > count)
1389 break;
1390
1391 nb += sizeof(hdr);
1392 buf += sizeof(hdr);
1393
1394 err = -EINVAL;
1395 i = hdr.index;
1396 if (i >= kvm->arch.hpt_npte ||
1397 i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
1398 break;
1399
1400 hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
1401 lbuf = (unsigned long __user *)buf;
1402 for (j = 0; j < hdr.n_valid; ++j) {
1403 err = -EFAULT;
1404 if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
1405 goto out;
1406 err = -EINVAL;
1407 if (!(v & HPTE_V_VALID))
1408 goto out;
1409 lbuf += 2;
1410 nb += HPTE_SIZE;
1411
1412 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
1413 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1414 err = -EIO;
1415 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
1416 tmp);
1417 if (ret != H_SUCCESS) {
1418 pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
1419 "r=%lx\n", ret, i, v, r);
1420 goto out;
1421 }
1422 if (!rma_setup && is_vrma_hpte(v)) {
1423 unsigned long psize = hpte_page_size(v, r);
1424 unsigned long senc = slb_pgsize_encoding(psize);
1425 unsigned long lpcr;
1426
1427 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1428 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1429 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1430 lpcr |= senc << (LPCR_VRMASD_SH - 4);
1431 kvm->arch.lpcr = lpcr;
1432 rma_setup = 1;
1433 }
1434 ++i;
1435 hptp += 2;
1436 }
1437
1438 for (j = 0; j < hdr.n_invalid; ++j) {
1439 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
1440 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1441 ++i;
1442 hptp += 2;
1443 }
1444 err = 0;
1445 }
1446
1447 out:
1448 /* Order HPTE updates vs. rma_setup_done */
1449 smp_wmb();
1450 kvm->arch.rma_setup_done = rma_setup;
1451 mutex_unlock(&kvm->lock);
1452
1453 if (err)
1454 return err;
1455 return nb;
1456}
1457
1458static int kvm_htab_release(struct inode *inode, struct file *filp)
1459{
1460 struct kvm_htab_ctx *ctx = filp->private_data;
1461
1462 filp->private_data = NULL;
1463 if (!(ctx->flags & KVM_GET_HTAB_WRITE))
1464 atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
1465 kvm_put_kvm(ctx->kvm);
1466 kfree(ctx);
1467 return 0;
1468}
1469
1470static struct file_operations kvm_htab_fops = {
1471 .read = kvm_htab_read,
1472 .write = kvm_htab_write,
1473 .llseek = default_llseek,
1474 .release = kvm_htab_release,
1475};
1476
1477int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
1478{
1479 int ret;
1480 struct kvm_htab_ctx *ctx;
1481 int rwflag;
1482
1483 /* reject flags we don't recognize */
1484 if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
1485 return -EINVAL;
1486 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1487 if (!ctx)
1488 return -ENOMEM;
1489 kvm_get_kvm(kvm);
1490 ctx->kvm = kvm;
1491 ctx->index = ghf->start_index;
1492 ctx->flags = ghf->flags;
1493 ctx->first_pass = 1;
1494
1495 rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
1496 ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
1497 if (ret < 0) {
1498 kvm_put_kvm(kvm);
1499 return ret;
1500 }
1501
1502 if (rwflag == O_RDONLY) {
1503 mutex_lock(&kvm->slots_lock);
1504 atomic_inc(&kvm->arch.hpte_mod_interest);
1505 /* make sure kvmppc_do_h_enter etc. see the increment */
1506 synchronize_srcu_expedited(&kvm->srcu);
1507 mutex_unlock(&kvm->slots_lock);
1508 }
1509
1510 return ret;
1511} 165}
1512 166
1513void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 167void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 56b983e7b73..04e7d3bbfe8 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -53,7 +53,7 @@ slb_exit_skip_ ## num:
53 * R1 = host R1 53 * R1 = host R1
54 * R2 = host R2 54 * R2 = host R2
55 * R3 = shadow vcpu 55 * R3 = shadow vcpu
56 * all other volatile GPRS = free except R4, R6 56 * all other volatile GPRS = free
57 * SVCPU[CR] = guest CR 57 * SVCPU[CR] = guest CR
58 * SVCPU[XER] = guest XER 58 * SVCPU[XER] = guest XER
59 * SVCPU[CTR] = guest CTR 59 * SVCPU[CTR] = guest CTR
@@ -90,6 +90,8 @@ slb_exit_skip_ ## num:
90 or r10, r10, r12 90 or r10, r10, r12
91 slbie r10 91 slbie r10
92 92
93 isync
94
93 /* Fill SLB with our shadow */ 95 /* Fill SLB with our shadow */
94 96
95 lbz r12, SVCPU_SLB_MAX(r3) 97 lbz r12, SVCPU_SLB_MAX(r3)
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
deleted file mode 100644
index 72ffc899c08..00000000000
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ /dev/null
@@ -1,150 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 */
18
19#include <linux/types.h>
20#include <linux/string.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/highmem.h>
24#include <linux/gfp.h>
25#include <linux/slab.h>
26#include <linux/hugetlb.h>
27#include <linux/list.h>
28#include <linux/anon_inodes.h>
29
30#include <asm/tlbflush.h>
31#include <asm/kvm_ppc.h>
32#include <asm/kvm_book3s.h>
33#include <asm/mmu-hash64.h>
34#include <asm/hvcall.h>
35#include <asm/synch.h>
36#include <asm/ppc-opcode.h>
37#include <asm/kvm_host.h>
38#include <asm/udbg.h>
39
40#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
41
42static long kvmppc_stt_npages(unsigned long window_size)
43{
44 return ALIGN((window_size >> SPAPR_TCE_SHIFT)
45 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
46}
47
48static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
49{
50 struct kvm *kvm = stt->kvm;
51 int i;
52
53 mutex_lock(&kvm->lock);
54 list_del(&stt->list);
55 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
56 __free_page(stt->pages[i]);
57 kfree(stt);
58 mutex_unlock(&kvm->lock);
59
60 kvm_put_kvm(kvm);
61}
62
63static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
64{
65 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
66 struct page *page;
67
68 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
69 return VM_FAULT_SIGBUS;
70
71 page = stt->pages[vmf->pgoff];
72 get_page(page);
73 vmf->page = page;
74 return 0;
75}
76
77static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
78 .fault = kvm_spapr_tce_fault,
79};
80
81static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
82{
83 vma->vm_ops = &kvm_spapr_tce_vm_ops;
84 return 0;
85}
86
87static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
88{
89 struct kvmppc_spapr_tce_table *stt = filp->private_data;
90
91 release_spapr_tce_table(stt);
92 return 0;
93}
94
95static struct file_operations kvm_spapr_tce_fops = {
96 .mmap = kvm_spapr_tce_mmap,
97 .release = kvm_spapr_tce_release,
98};
99
100long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
101 struct kvm_create_spapr_tce *args)
102{
103 struct kvmppc_spapr_tce_table *stt = NULL;
104 long npages;
105 int ret = -ENOMEM;
106 int i;
107
108 /* Check this LIOBN hasn't been previously allocated */
109 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
110 if (stt->liobn == args->liobn)
111 return -EBUSY;
112 }
113
114 npages = kvmppc_stt_npages(args->window_size);
115
116 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
117 GFP_KERNEL);
118 if (!stt)
119 goto fail;
120
121 stt->liobn = args->liobn;
122 stt->window_size = args->window_size;
123 stt->kvm = kvm;
124
125 for (i = 0; i < npages; i++) {
126 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
127 if (!stt->pages[i])
128 goto fail;
129 }
130
131 kvm_get_kvm(kvm);
132
133 mutex_lock(&kvm->lock);
134 list_add(&stt->list, &kvm->arch.spapr_tce_tables);
135
136 mutex_unlock(&kvm->lock);
137
138 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
139 stt, O_RDWR);
140
141fail:
142 if (stt) {
143 for (i = 0; i < npages; i++)
144 if (stt->pages[i])
145 __free_page(stt->pages[i]);
146
147 kfree(stt);
148 }
149 return ret;
150}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 30c2f3b134c..ea0f8c537c2 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -38,9 +38,6 @@
38 38
39#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 39#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
40 40
41/* WARNING: This will be called in real-mode on HV KVM and virtual
42 * mode on PR KVM
43 */
44long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 41long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
45 unsigned long ioba, unsigned long tce) 42 unsigned long ioba, unsigned long tce)
46{ 43{
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index d31a716f7f2..46684655708 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -21,8 +21,6 @@
21#include <asm/disassemble.h> 21#include <asm/disassemble.h>
22#include <asm/kvm_book3s.h> 22#include <asm/kvm_book3s.h>
23#include <asm/reg.h> 23#include <asm/reg.h>
24#include <asm/switch_to.h>
25#include <asm/time.h>
26 24
27#define OP_19_XOP_RFID 18 25#define OP_19_XOP_RFID 18
28#define OP_19_XOP_RFI 50 26#define OP_19_XOP_RFI 50
@@ -65,33 +63,10 @@
65 * function pointers, so let's just disable the define. */ 63 * function pointers, so let's just disable the define. */
66#undef mfsrin 64#undef mfsrin
67 65
68enum priv_level {
69 PRIV_PROBLEM = 0,
70 PRIV_SUPER = 1,
71 PRIV_HYPER = 2,
72};
73
74static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
75{
76 /* PAPR VMs only access supervisor SPRs */
77 if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
78 return false;
79
80 /* Limit user space to its own small SPR set */
81 if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
82 return false;
83
84 return true;
85}
86
87int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 66int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
88 unsigned int inst, int *advance) 67 unsigned int inst, int *advance)
89{ 68{
90 int emulated = EMULATE_DONE; 69 int emulated = EMULATE_DONE;
91 int rt = get_rt(inst);
92 int rs = get_rs(inst);
93 int ra = get_ra(inst);
94 int rb = get_rb(inst);
95 70
96 switch (get_op(inst)) { 71 switch (get_op(inst)) {
97 case 19: 72 case 19:
@@ -111,22 +86,21 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
111 case 31: 86 case 31:
112 switch (get_xop(inst)) { 87 switch (get_xop(inst)) {
113 case OP_31_XOP_MFMSR: 88 case OP_31_XOP_MFMSR:
114 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); 89 kvmppc_set_gpr(vcpu, get_rt(inst),
90 vcpu->arch.shared->msr);
115 break; 91 break;
116 case OP_31_XOP_MTMSRD: 92 case OP_31_XOP_MTMSRD:
117 { 93 {
118 ulong rs_val = kvmppc_get_gpr(vcpu, rs); 94 ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
119 if (inst & 0x10000) { 95 if (inst & 0x10000) {
120 ulong new_msr = vcpu->arch.shared->msr; 96 vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
121 new_msr &= ~(MSR_RI | MSR_EE); 97 vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
122 new_msr |= rs_val & (MSR_RI | MSR_EE);
123 vcpu->arch.shared->msr = new_msr;
124 } else 98 } else
125 kvmppc_set_msr(vcpu, rs_val); 99 kvmppc_set_msr(vcpu, rs);
126 break; 100 break;
127 } 101 }
128 case OP_31_XOP_MTMSR: 102 case OP_31_XOP_MTMSR:
129 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); 103 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
130 break; 104 break;
131 case OP_31_XOP_MFSR: 105 case OP_31_XOP_MFSR:
132 { 106 {
@@ -136,7 +110,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
136 if (vcpu->arch.mmu.mfsrin) { 110 if (vcpu->arch.mmu.mfsrin) {
137 u32 sr; 111 u32 sr;
138 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 112 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
139 kvmppc_set_gpr(vcpu, rt, sr); 113 kvmppc_set_gpr(vcpu, get_rt(inst), sr);
140 } 114 }
141 break; 115 break;
142 } 116 }
@@ -144,29 +118,29 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
144 { 118 {
145 int srnum; 119 int srnum;
146 120
147 srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf; 121 srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
148 if (vcpu->arch.mmu.mfsrin) { 122 if (vcpu->arch.mmu.mfsrin) {
149 u32 sr; 123 u32 sr;
150 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 124 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
151 kvmppc_set_gpr(vcpu, rt, sr); 125 kvmppc_set_gpr(vcpu, get_rt(inst), sr);
152 } 126 }
153 break; 127 break;
154 } 128 }
155 case OP_31_XOP_MTSR: 129 case OP_31_XOP_MTSR:
156 vcpu->arch.mmu.mtsrin(vcpu, 130 vcpu->arch.mmu.mtsrin(vcpu,
157 (inst >> 16) & 0xf, 131 (inst >> 16) & 0xf,
158 kvmppc_get_gpr(vcpu, rs)); 132 kvmppc_get_gpr(vcpu, get_rs(inst)));
159 break; 133 break;
160 case OP_31_XOP_MTSRIN: 134 case OP_31_XOP_MTSRIN:
161 vcpu->arch.mmu.mtsrin(vcpu, 135 vcpu->arch.mmu.mtsrin(vcpu,
162 (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf, 136 (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
163 kvmppc_get_gpr(vcpu, rs)); 137 kvmppc_get_gpr(vcpu, get_rs(inst)));
164 break; 138 break;
165 case OP_31_XOP_TLBIE: 139 case OP_31_XOP_TLBIE:
166 case OP_31_XOP_TLBIEL: 140 case OP_31_XOP_TLBIEL:
167 { 141 {
168 bool large = (inst & 0x00200000) ? true : false; 142 bool large = (inst & 0x00200000) ? true : false;
169 ulong addr = kvmppc_get_gpr(vcpu, rb); 143 ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
170 vcpu->arch.mmu.tlbie(vcpu, addr, large); 144 vcpu->arch.mmu.tlbie(vcpu, addr, large);
171 break; 145 break;
172 } 146 }
@@ -177,15 +151,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
177 return EMULATE_FAIL; 151 return EMULATE_FAIL;
178 152
179 vcpu->arch.mmu.slbmte(vcpu, 153 vcpu->arch.mmu.slbmte(vcpu,
180 kvmppc_get_gpr(vcpu, rs), 154 kvmppc_get_gpr(vcpu, get_rs(inst)),
181 kvmppc_get_gpr(vcpu, rb)); 155 kvmppc_get_gpr(vcpu, get_rb(inst)));
182 break; 156 break;
183 case OP_31_XOP_SLBIE: 157 case OP_31_XOP_SLBIE:
184 if (!vcpu->arch.mmu.slbie) 158 if (!vcpu->arch.mmu.slbie)
185 return EMULATE_FAIL; 159 return EMULATE_FAIL;
186 160
187 vcpu->arch.mmu.slbie(vcpu, 161 vcpu->arch.mmu.slbie(vcpu,
188 kvmppc_get_gpr(vcpu, rb)); 162 kvmppc_get_gpr(vcpu, get_rb(inst)));
189 break; 163 break;
190 case OP_31_XOP_SLBIA: 164 case OP_31_XOP_SLBIA:
191 if (!vcpu->arch.mmu.slbia) 165 if (!vcpu->arch.mmu.slbia)
@@ -197,22 +171,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
197 if (!vcpu->arch.mmu.slbmfee) { 171 if (!vcpu->arch.mmu.slbmfee) {
198 emulated = EMULATE_FAIL; 172 emulated = EMULATE_FAIL;
199 } else { 173 } else {
200 ulong t, rb_val; 174 ulong t, rb;
201 175
202 rb_val = kvmppc_get_gpr(vcpu, rb); 176 rb = kvmppc_get_gpr(vcpu, get_rb(inst));
203 t = vcpu->arch.mmu.slbmfee(vcpu, rb_val); 177 t = vcpu->arch.mmu.slbmfee(vcpu, rb);
204 kvmppc_set_gpr(vcpu, rt, t); 178 kvmppc_set_gpr(vcpu, get_rt(inst), t);
205 } 179 }
206 break; 180 break;
207 case OP_31_XOP_SLBMFEV: 181 case OP_31_XOP_SLBMFEV:
208 if (!vcpu->arch.mmu.slbmfev) { 182 if (!vcpu->arch.mmu.slbmfev) {
209 emulated = EMULATE_FAIL; 183 emulated = EMULATE_FAIL;
210 } else { 184 } else {
211 ulong t, rb_val; 185 ulong t, rb;
212 186
213 rb_val = kvmppc_get_gpr(vcpu, rb); 187 rb = kvmppc_get_gpr(vcpu, get_rb(inst));
214 t = vcpu->arch.mmu.slbmfev(vcpu, rb_val); 188 t = vcpu->arch.mmu.slbmfev(vcpu, rb);
215 kvmppc_set_gpr(vcpu, rt, t); 189 kvmppc_set_gpr(vcpu, get_rt(inst), t);
216 } 190 }
217 break; 191 break;
218 case OP_31_XOP_DCBA: 192 case OP_31_XOP_DCBA:
@@ -220,29 +194,26 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
220 break; 194 break;
221 case OP_31_XOP_DCBZ: 195 case OP_31_XOP_DCBZ:
222 { 196 {
223 ulong rb_val = kvmppc_get_gpr(vcpu, rb); 197 ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
224 ulong ra_val = 0; 198 ulong ra = 0;
225 ulong addr, vaddr; 199 ulong addr, vaddr;
226 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 200 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
227 u32 dsisr; 201 u32 dsisr;
228 int r; 202 int r;
229 203
230 if (ra) 204 if (get_ra(inst))
231 ra_val = kvmppc_get_gpr(vcpu, ra); 205 ra = kvmppc_get_gpr(vcpu, get_ra(inst));
232 206
233 addr = (ra_val + rb_val) & ~31ULL; 207 addr = (ra + rb) & ~31ULL;
234 if (!(vcpu->arch.shared->msr & MSR_SF)) 208 if (!(vcpu->arch.shared->msr & MSR_SF))
235 addr &= 0xffffffff; 209 addr &= 0xffffffff;
236 vaddr = addr; 210 vaddr = addr;
237 211
238 r = kvmppc_st(vcpu, &addr, 32, zeros, true); 212 r = kvmppc_st(vcpu, &addr, 32, zeros, true);
239 if ((r == -ENOENT) || (r == -EPERM)) { 213 if ((r == -ENOENT) || (r == -EPERM)) {
240 struct kvmppc_book3s_shadow_vcpu *svcpu;
241
242 svcpu = svcpu_get(vcpu);
243 *advance = 0; 214 *advance = 0;
244 vcpu->arch.shared->dar = vaddr; 215 vcpu->arch.shared->dar = vaddr;
245 svcpu->fault_dar = vaddr; 216 to_svcpu(vcpu)->fault_dar = vaddr;
246 217
247 dsisr = DSISR_ISSTORE; 218 dsisr = DSISR_ISSTORE;
248 if (r == -ENOENT) 219 if (r == -ENOENT)
@@ -251,8 +222,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
251 dsisr |= DSISR_PROTFAULT; 222 dsisr |= DSISR_PROTFAULT;
252 223
253 vcpu->arch.shared->dsisr = dsisr; 224 vcpu->arch.shared->dsisr = dsisr;
254 svcpu->fault_dsisr = dsisr; 225 to_svcpu(vcpu)->fault_dsisr = dsisr;
255 svcpu_put(svcpu);
256 226
257 kvmppc_book3s_queue_irqprio(vcpu, 227 kvmppc_book3s_queue_irqprio(vcpu,
258 BOOK3S_INTERRUPT_DATA_STORAGE); 228 BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -319,14 +289,13 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
319 return bat; 289 return bat;
320} 290}
321 291
322int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 292int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
323{ 293{
324 int emulated = EMULATE_DONE; 294 int emulated = EMULATE_DONE;
295 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
325 296
326 switch (sprn) { 297 switch (sprn) {
327 case SPRN_SDR1: 298 case SPRN_SDR1:
328 if (!spr_allowed(vcpu, PRIV_HYPER))
329 goto unprivileged;
330 to_book3s(vcpu)->sdr1 = spr_val; 299 to_book3s(vcpu)->sdr1 = spr_val;
331 break; 300 break;
332 case SPRN_DSISR: 301 case SPRN_DSISR:
@@ -396,12 +365,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
396 (mfmsr() & MSR_HV)) 365 (mfmsr() & MSR_HV))
397 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 366 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
398 break; 367 break;
399 case SPRN_PURR:
400 to_book3s(vcpu)->purr_offset = spr_val - get_tb();
401 break;
402 case SPRN_SPURR:
403 to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
404 break;
405 case SPRN_GQR0: 368 case SPRN_GQR0:
406 case SPRN_GQR1: 369 case SPRN_GQR1:
407 case SPRN_GQR2: 370 case SPRN_GQR2:
@@ -419,7 +382,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
419 case SPRN_CTRLF: 382 case SPRN_CTRLF:
420 case SPRN_CTRLT: 383 case SPRN_CTRLT:
421 case SPRN_L2CR: 384 case SPRN_L2CR:
422 case SPRN_DSCR:
423 case SPRN_MMCR0_GEKKO: 385 case SPRN_MMCR0_GEKKO:
424 case SPRN_MMCR1_GEKKO: 386 case SPRN_MMCR1_GEKKO:
425 case SPRN_PMC1_GEKKO: 387 case SPRN_PMC1_GEKKO:
@@ -428,7 +390,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
428 case SPRN_PMC4_GEKKO: 390 case SPRN_PMC4_GEKKO:
429 case SPRN_WPAR_GEKKO: 391 case SPRN_WPAR_GEKKO:
430 break; 392 break;
431unprivileged:
432 default: 393 default:
433 printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); 394 printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
434#ifndef DEBUG_SPR 395#ifndef DEBUG_SPR
@@ -440,7 +401,7 @@ unprivileged:
440 return emulated; 401 return emulated;
441} 402}
442 403
443int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 404int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
444{ 405{
445 int emulated = EMULATE_DONE; 406 int emulated = EMULATE_DONE;
446 407
@@ -453,52 +414,40 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
453 struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); 414 struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
454 415
455 if (sprn % 2) 416 if (sprn % 2)
456 *spr_val = bat->raw >> 32; 417 kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
457 else 418 else
458 *spr_val = bat->raw; 419 kvmppc_set_gpr(vcpu, rt, bat->raw);
459 420
460 break; 421 break;
461 } 422 }
462 case SPRN_SDR1: 423 case SPRN_SDR1:
463 if (!spr_allowed(vcpu, PRIV_HYPER)) 424 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
464 goto unprivileged;
465 *spr_val = to_book3s(vcpu)->sdr1;
466 break; 425 break;
467 case SPRN_DSISR: 426 case SPRN_DSISR:
468 *spr_val = vcpu->arch.shared->dsisr; 427 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
469 break; 428 break;
470 case SPRN_DAR: 429 case SPRN_DAR:
471 *spr_val = vcpu->arch.shared->dar; 430 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
472 break; 431 break;
473 case SPRN_HIOR: 432 case SPRN_HIOR:
474 *spr_val = to_book3s(vcpu)->hior; 433 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
475 break; 434 break;
476 case SPRN_HID0: 435 case SPRN_HID0:
477 *spr_val = to_book3s(vcpu)->hid[0]; 436 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
478 break; 437 break;
479 case SPRN_HID1: 438 case SPRN_HID1:
480 *spr_val = to_book3s(vcpu)->hid[1]; 439 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
481 break; 440 break;
482 case SPRN_HID2: 441 case SPRN_HID2:
483 case SPRN_HID2_GEKKO: 442 case SPRN_HID2_GEKKO:
484 *spr_val = to_book3s(vcpu)->hid[2]; 443 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
485 break; 444 break;
486 case SPRN_HID4: 445 case SPRN_HID4:
487 case SPRN_HID4_GEKKO: 446 case SPRN_HID4_GEKKO:
488 *spr_val = to_book3s(vcpu)->hid[4]; 447 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
489 break; 448 break;
490 case SPRN_HID5: 449 case SPRN_HID5:
491 *spr_val = to_book3s(vcpu)->hid[5]; 450 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
492 break;
493 case SPRN_CFAR:
494 case SPRN_DSCR:
495 *spr_val = 0;
496 break;
497 case SPRN_PURR:
498 *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
499 break;
500 case SPRN_SPURR:
501 *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
502 break; 451 break;
503 case SPRN_GQR0: 452 case SPRN_GQR0:
504 case SPRN_GQR1: 453 case SPRN_GQR1:
@@ -508,7 +457,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
508 case SPRN_GQR5: 457 case SPRN_GQR5:
509 case SPRN_GQR6: 458 case SPRN_GQR6:
510 case SPRN_GQR7: 459 case SPRN_GQR7:
511 *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; 460 kvmppc_set_gpr(vcpu, rt,
461 to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]);
512 break; 462 break;
513 case SPRN_THRM1: 463 case SPRN_THRM1:
514 case SPRN_THRM2: 464 case SPRN_THRM2:
@@ -523,10 +473,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
523 case SPRN_PMC3_GEKKO: 473 case SPRN_PMC3_GEKKO:
524 case SPRN_PMC4_GEKKO: 474 case SPRN_PMC4_GEKKO:
525 case SPRN_WPAR_GEKKO: 475 case SPRN_WPAR_GEKKO:
526 *spr_val = 0; 476 kvmppc_set_gpr(vcpu, rt, 0);
527 break; 477 break;
528 default: 478 default:
529unprivileged:
530 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); 479 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
531#ifndef DEBUG_SPR 480#ifndef DEBUG_SPR
532 emulated = EMULATE_FAIL; 481 emulated = EMULATE_FAIL;
@@ -582,22 +531,23 @@ u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
582ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) 531ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
583{ 532{
584 ulong dar = 0; 533 ulong dar = 0;
585 ulong ra = get_ra(inst); 534 ulong ra;
586 ulong rb = get_rb(inst);
587 535
588 switch (get_op(inst)) { 536 switch (get_op(inst)) {
589 case OP_LFS: 537 case OP_LFS:
590 case OP_LFD: 538 case OP_LFD:
591 case OP_STFD: 539 case OP_STFD:
592 case OP_STFS: 540 case OP_STFS:
541 ra = get_ra(inst);
593 if (ra) 542 if (ra)
594 dar = kvmppc_get_gpr(vcpu, ra); 543 dar = kvmppc_get_gpr(vcpu, ra);
595 dar += (s32)((s16)inst); 544 dar += (s32)((s16)inst);
596 break; 545 break;
597 case 31: 546 case 31:
547 ra = get_ra(inst);
598 if (ra) 548 if (ra)
599 dar = kvmppc_get_gpr(vcpu, ra); 549 dar = kvmppc_get_gpr(vcpu, ra);
600 dar += kvmppc_get_gpr(vcpu, rb); 550 dar += kvmppc_get_gpr(vcpu, get_rb(inst));
601 break; 551 break;
602 default: 552 default:
603 printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); 553 printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 7057a02f090..88c8f26add0 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -17,16 +17,21 @@
17 * Authors: Alexander Graf <agraf@suse.de> 17 * Authors: Alexander Graf <agraf@suse.de>
18 */ 18 */
19 19
20#include <linux/export.h> 20#include <linux/module.h>
21#include <asm/kvm_book3s.h> 21#include <asm/kvm_book3s.h>
22 22
23#ifdef CONFIG_KVM_BOOK3S_64_HV 23#ifdef CONFIG_KVM_BOOK3S_64_HV
24EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); 24EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
25#else 25#else
26EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); 26EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
27EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
28EXPORT_SYMBOL_GPL(kvmppc_rmcall);
27EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); 29EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
28#ifdef CONFIG_ALTIVEC 30#ifdef CONFIG_ALTIVEC
29EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); 31EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
30#endif 32#endif
33#ifdef CONFIG_VSX
34EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
35#endif
31#endif 36#endif
32 37
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 71d0c90b62b..0e85639d48a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -24,13 +24,11 @@
24#include <linux/preempt.h> 24#include <linux/preempt.h>
25#include <linux/sched.h> 25#include <linux/sched.h>
26#include <linux/delay.h> 26#include <linux/delay.h>
27#include <linux/export.h>
28#include <linux/fs.h> 27#include <linux/fs.h>
29#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
30#include <linux/cpumask.h> 29#include <linux/cpumask.h>
31#include <linux/spinlock.h> 30#include <linux/spinlock.h>
32#include <linux/page-flags.h> 31#include <linux/page-flags.h>
33#include <linux/srcu.h>
34 32
35#include <asm/reg.h> 33#include <asm/reg.h>
36#include <asm/cputable.h> 34#include <asm/cputable.h>
@@ -46,94 +44,69 @@
46#include <asm/cputhreads.h> 44#include <asm/cputhreads.h>
47#include <asm/page.h> 45#include <asm/page.h>
48#include <asm/hvcall.h> 46#include <asm/hvcall.h>
49#include <asm/switch_to.h>
50#include <asm/smp.h>
51#include <linux/gfp.h> 47#include <linux/gfp.h>
48#include <linux/sched.h>
52#include <linux/vmalloc.h> 49#include <linux/vmalloc.h>
53#include <linux/highmem.h> 50#include <linux/highmem.h>
54#include <linux/hugetlb.h> 51
52/*
53 * For now, limit memory to 64GB and require it to be large pages.
54 * This value is chosen because it makes the ram_pginfo array be
55 * 64kB in size, which is about as large as we want to be trying
56 * to allocate with kmalloc.
57 */
58#define MAX_MEM_ORDER 36
59
60#define LARGE_PAGE_ORDER 24 /* 16MB pages */
55 61
56/* #define EXIT_DEBUG */ 62/* #define EXIT_DEBUG */
57/* #define EXIT_DEBUG_SIMPLE */ 63/* #define EXIT_DEBUG_SIMPLE */
58/* #define EXIT_DEBUG_INT */ 64/* #define EXIT_DEBUG_INT */
59 65
60/* Used to indicate that a guest page fault needs to be handled */ 66void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
61#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) 67{
62 68 local_paca->kvm_hstate.kvm_vcpu = vcpu;
63/* Used as a "null" value for timebase values */ 69 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
64#define TB_NIL (~(u64)0) 70}
65 71
66static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 72void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
67static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 73{
74}
68 75
69/* 76static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
70 * We use the vcpu_load/put functions to measure stolen time. 77static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
71 * Stolen time is counted as time when either the vcpu is able to
72 * run as part of a virtual core, but the task running the vcore
73 * is preempted or sleeping, or when the vcpu needs something done
74 * in the kernel by the task running the vcpu, but that task is
75 * preempted or sleeping. Those two things have to be counted
76 * separately, since one of the vcpu tasks will take on the job
77 * of running the core, and the other vcpu tasks in the vcore will
78 * sleep waiting for it to do that, but that sleep shouldn't count
79 * as stolen time.
80 *
81 * Hence we accumulate stolen time when the vcpu can run as part of
82 * a vcore using vc->stolen_tb, and the stolen time when the vcpu
83 * needs its task to do other things in the kernel (for example,
84 * service a page fault) in busy_stolen. We don't accumulate
85 * stolen time for a vcore when it is inactive, or for a vcpu
86 * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of
87 * a misnomer; it means that the vcpu task is not executing in
88 * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in
89 * the kernel. We don't have any way of dividing up that time
90 * between time that the vcpu is genuinely stopped, time that
91 * the task is actively working on behalf of the vcpu, and time
92 * that the task is preempted, so we don't count any of it as
93 * stolen.
94 *
95 * Updates to busy_stolen are protected by arch.tbacct_lock;
96 * updates to vc->stolen_tb are protected by the arch.tbacct_lock
97 * of the vcpu that has taken responsibility for running the vcore
98 * (i.e. vc->runner). The stolen times are measured in units of
99 * timebase ticks. (Note that the != TB_NIL checks below are
100 * purely defensive; they should never fail.)
101 */
102 78
103void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 79void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
104{ 80{
105 struct kvmppc_vcore *vc = vcpu->arch.vcore; 81 u64 now;
82 unsigned long dec_nsec;
106 83
107 spin_lock(&vcpu->arch.tbacct_lock); 84 now = get_tb();
108 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && 85 if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
109 vc->preempt_tb != TB_NIL) { 86 kvmppc_core_queue_dec(vcpu);
110 vc->stolen_tb += mftb() - vc->preempt_tb; 87 if (vcpu->arch.pending_exceptions)
111 vc->preempt_tb = TB_NIL; 88 return;
112 } 89 if (vcpu->arch.dec_expires != ~(u64)0) {
113 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 90 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
114 vcpu->arch.busy_preempt != TB_NIL) { 91 tb_ticks_per_sec;
115 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; 92 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
116 vcpu->arch.busy_preempt = TB_NIL; 93 HRTIMER_MODE_REL);
117 } 94 }
118 spin_unlock(&vcpu->arch.tbacct_lock);
119}
120 95
121void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 96 kvmppc_vcpu_blocked(vcpu);
122{ 97
123 struct kvmppc_vcore *vc = vcpu->arch.vcore; 98 kvm_vcpu_block(vcpu);
99 vcpu->stat.halt_wakeup++;
124 100
125 spin_lock(&vcpu->arch.tbacct_lock); 101 if (vcpu->arch.dec_expires != ~(u64)0)
126 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) 102 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
127 vc->preempt_tb = mftb(); 103
128 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 104 kvmppc_vcpu_unblocked(vcpu);
129 vcpu->arch.busy_preempt = mftb();
130 spin_unlock(&vcpu->arch.tbacct_lock);
131} 105}
132 106
133void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 107void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
134{ 108{
135 vcpu->arch.shregs.msr = msr; 109 vcpu->arch.shregs.msr = msr;
136 kvmppc_end_cede(vcpu);
137} 110}
138 111
139void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 112void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
@@ -196,282 +169,85 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
196 vpa->yield_count = 1; 169 vpa->yield_count = 1;
197} 170}
198 171
199static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
200 unsigned long addr, unsigned long len)
201{
202 /* check address is cacheline aligned */
203 if (addr & (L1_CACHE_BYTES - 1))
204 return -EINVAL;
205 spin_lock(&vcpu->arch.vpa_update_lock);
206 if (v->next_gpa != addr || v->len != len) {
207 v->next_gpa = addr;
208 v->len = addr ? len : 0;
209 v->update_pending = 1;
210 }
211 spin_unlock(&vcpu->arch.vpa_update_lock);
212 return 0;
213}
214
215/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
216struct reg_vpa {
217 u32 dummy;
218 union {
219 u16 hword;
220 u32 word;
221 } length;
222};
223
224static int vpa_is_registered(struct kvmppc_vpa *vpap)
225{
226 if (vpap->update_pending)
227 return vpap->next_gpa != 0;
228 return vpap->pinned_addr != NULL;
229}
230
231static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, 172static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
232 unsigned long flags, 173 unsigned long flags,
233 unsigned long vcpuid, unsigned long vpa) 174 unsigned long vcpuid, unsigned long vpa)
234{ 175{
235 struct kvm *kvm = vcpu->kvm; 176 struct kvm *kvm = vcpu->kvm;
236 unsigned long len, nb; 177 unsigned long pg_index, ra, len;
178 unsigned long pg_offset;
237 void *va; 179 void *va;
238 struct kvm_vcpu *tvcpu; 180 struct kvm_vcpu *tvcpu;
239 int err;
240 int subfunc;
241 struct kvmppc_vpa *vpap;
242 181
243 tvcpu = kvmppc_find_vcpu(kvm, vcpuid); 182 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
244 if (!tvcpu) 183 if (!tvcpu)
245 return H_PARAMETER; 184 return H_PARAMETER;
246 185
247 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK; 186 flags >>= 63 - 18;
248 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL || 187 flags &= 7;
249 subfunc == H_VPA_REG_SLB) { 188 if (flags == 0 || flags == 4)
250 /* Registering new area - address must be cache-line aligned */ 189 return H_PARAMETER;
251 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa) 190 if (flags < 4) {
191 if (vpa & 0x7f)
252 return H_PARAMETER; 192 return H_PARAMETER;
253 193 /* registering new area; convert logical addr to real */
254 /* convert logical addr to kernel addr and read length */ 194 pg_index = vpa >> kvm->arch.ram_porder;
255 va = kvmppc_pin_guest_page(kvm, vpa, &nb); 195 pg_offset = vpa & (kvm->arch.ram_psize - 1);
256 if (va == NULL) 196 if (pg_index >= kvm->arch.ram_npages)
197 return H_PARAMETER;
198 if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
257 return H_PARAMETER; 199 return H_PARAMETER;
258 if (subfunc == H_VPA_REG_VPA) 200 ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
259 len = ((struct reg_vpa *)va)->length.hword; 201 ra |= pg_offset;
202 va = __va(ra);
203 if (flags <= 1)
204 len = *(unsigned short *)(va + 4);
260 else 205 else
261 len = ((struct reg_vpa *)va)->length.word; 206 len = *(unsigned int *)(va + 4);
262 kvmppc_unpin_guest_page(kvm, va); 207 if (pg_offset + len > kvm->arch.ram_psize)
263
264 /* Check length */
265 if (len > nb || len < sizeof(struct reg_vpa))
266 return H_PARAMETER; 208 return H_PARAMETER;
267 } else { 209 switch (flags) {
268 vpa = 0; 210 case 1: /* register VPA */
269 len = 0; 211 if (len < 640)
270 } 212 return H_PARAMETER;
271 213 tvcpu->arch.vpa = va;
272 err = H_PARAMETER; 214 init_vpa(vcpu, va);
273 vpap = NULL;
274 spin_lock(&tvcpu->arch.vpa_update_lock);
275
276 switch (subfunc) {
277 case H_VPA_REG_VPA: /* register VPA */
278 if (len < sizeof(struct lppaca))
279 break; 215 break;
280 vpap = &tvcpu->arch.vpa; 216 case 2: /* register DTL */
281 err = 0; 217 if (len < 48)
282 break; 218 return H_PARAMETER;
283 219 if (!tvcpu->arch.vpa)
284 case H_VPA_REG_DTL: /* register DTL */ 220 return H_RESOURCE;
285 if (len < sizeof(struct dtl_entry)) 221 len -= len % 48;
222 tvcpu->arch.dtl = va;
223 tvcpu->arch.dtl_end = va + len;
286 break; 224 break;
287 len -= len % sizeof(struct dtl_entry); 225 case 3: /* register SLB shadow buffer */
288 226 if (len < 8)
289 /* Check that they have previously registered a VPA */ 227 return H_PARAMETER;
290 err = H_RESOURCE; 228 if (!tvcpu->arch.vpa)
291 if (!vpa_is_registered(&tvcpu->arch.vpa)) 229 return H_RESOURCE;
230 tvcpu->arch.slb_shadow = va;
231 len = (len - 16) / 16;
232 tvcpu->arch.slb_shadow = va;
292 break; 233 break;
293 234 }
294 vpap = &tvcpu->arch.dtl; 235 } else {
295 err = 0; 236 switch (flags) {
296 break; 237 case 5: /* unregister VPA */
297 238 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
298 case H_VPA_REG_SLB: /* register SLB shadow buffer */ 239 return H_RESOURCE;
299 /* Check that they have previously registered a VPA */ 240 tvcpu->arch.vpa = NULL;
300 err = H_RESOURCE;
301 if (!vpa_is_registered(&tvcpu->arch.vpa))
302 break; 241 break;
303 242 case 6: /* unregister DTL */
304 vpap = &tvcpu->arch.slb_shadow; 243 tvcpu->arch.dtl = NULL;
305 err = 0;
306 break;
307
308 case H_VPA_DEREG_VPA: /* deregister VPA */
309 /* Check they don't still have a DTL or SLB buf registered */
310 err = H_RESOURCE;
311 if (vpa_is_registered(&tvcpu->arch.dtl) ||
312 vpa_is_registered(&tvcpu->arch.slb_shadow))
313 break; 244 break;
314 245 case 7: /* unregister SLB shadow buffer */
315 vpap = &tvcpu->arch.vpa; 246 tvcpu->arch.slb_shadow = NULL;
316 err = 0;
317 break;
318
319 case H_VPA_DEREG_DTL: /* deregister DTL */
320 vpap = &tvcpu->arch.dtl;
321 err = 0;
322 break;
323
324 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
325 vpap = &tvcpu->arch.slb_shadow;
326 err = 0;
327 break;
328 }
329
330 if (vpap) {
331 vpap->next_gpa = vpa;
332 vpap->len = len;
333 vpap->update_pending = 1;
334 }
335
336 spin_unlock(&tvcpu->arch.vpa_update_lock);
337
338 return err;
339}
340
341static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
342{
343 struct kvm *kvm = vcpu->kvm;
344 void *va;
345 unsigned long nb;
346 unsigned long gpa;
347
348 /*
349 * We need to pin the page pointed to by vpap->next_gpa,
350 * but we can't call kvmppc_pin_guest_page under the lock
351 * as it does get_user_pages() and down_read(). So we
352 * have to drop the lock, pin the page, then get the lock
353 * again and check that a new area didn't get registered
354 * in the meantime.
355 */
356 for (;;) {
357 gpa = vpap->next_gpa;
358 spin_unlock(&vcpu->arch.vpa_update_lock);
359 va = NULL;
360 nb = 0;
361 if (gpa)
362 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
363 spin_lock(&vcpu->arch.vpa_update_lock);
364 if (gpa == vpap->next_gpa)
365 break; 247 break;
366 /* sigh... unpin that one and try again */ 248 }
367 if (va)
368 kvmppc_unpin_guest_page(kvm, va);
369 }
370
371 vpap->update_pending = 0;
372 if (va && nb < vpap->len) {
373 /*
374 * If it's now too short, it must be that userspace
375 * has changed the mappings underlying guest memory,
376 * so unregister the region.
377 */
378 kvmppc_unpin_guest_page(kvm, va);
379 va = NULL;
380 }
381 if (vpap->pinned_addr)
382 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
383 vpap->pinned_addr = va;
384 if (va)
385 vpap->pinned_end = va + vpap->len;
386}
387
388static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
389{
390 if (!(vcpu->arch.vpa.update_pending ||
391 vcpu->arch.slb_shadow.update_pending ||
392 vcpu->arch.dtl.update_pending))
393 return;
394
395 spin_lock(&vcpu->arch.vpa_update_lock);
396 if (vcpu->arch.vpa.update_pending) {
397 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
398 if (vcpu->arch.vpa.pinned_addr)
399 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
400 }
401 if (vcpu->arch.dtl.update_pending) {
402 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
403 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
404 vcpu->arch.dtl_index = 0;
405 }
406 if (vcpu->arch.slb_shadow.update_pending)
407 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
408 spin_unlock(&vcpu->arch.vpa_update_lock);
409}
410
411/*
412 * Return the accumulated stolen time for the vcore up until `now'.
413 * The caller should hold the vcore lock.
414 */
415static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
416{
417 u64 p;
418
419 /*
420 * If we are the task running the vcore, then since we hold
421 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
422 * can't be updated, so we don't need the tbacct_lock.
423 * If the vcore is inactive, it can't become active (since we
424 * hold the vcore lock), so the vcpu load/put functions won't
425 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
426 */
427 if (vc->vcore_state != VCORE_INACTIVE &&
428 vc->runner->arch.run_task != current) {
429 spin_lock(&vc->runner->arch.tbacct_lock);
430 p = vc->stolen_tb;
431 if (vc->preempt_tb != TB_NIL)
432 p += now - vc->preempt_tb;
433 spin_unlock(&vc->runner->arch.tbacct_lock);
434 } else {
435 p = vc->stolen_tb;
436 } 249 }
437 return p; 250 return H_SUCCESS;
438}
439
440static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
441 struct kvmppc_vcore *vc)
442{
443 struct dtl_entry *dt;
444 struct lppaca *vpa;
445 unsigned long stolen;
446 unsigned long core_stolen;
447 u64 now;
448
449 dt = vcpu->arch.dtl_ptr;
450 vpa = vcpu->arch.vpa.pinned_addr;
451 now = mftb();
452 core_stolen = vcore_stolen_time(vc, now);
453 stolen = core_stolen - vcpu->arch.stolen_logged;
454 vcpu->arch.stolen_logged = core_stolen;
455 spin_lock(&vcpu->arch.tbacct_lock);
456 stolen += vcpu->arch.busy_stolen;
457 vcpu->arch.busy_stolen = 0;
458 spin_unlock(&vcpu->arch.tbacct_lock);
459 if (!dt || !vpa)
460 return;
461 memset(dt, 0, sizeof(struct dtl_entry));
462 dt->dispatch_reason = 7;
463 dt->processor_id = vc->pcpu + vcpu->arch.ptid;
464 dt->timebase = now;
465 dt->enqueue_to_dispatch_time = stolen;
466 dt->srr0 = kvmppc_get_pc(vcpu);
467 dt->srr1 = vcpu->arch.shregs.msr;
468 ++dt;
469 if (dt == vcpu->arch.dtl.pinned_end)
470 dt = vcpu->arch.dtl.pinned_addr;
471 vcpu->arch.dtl_ptr = dt;
472 /* order writing *dt vs. writing vpa->dtl_idx */
473 smp_wmb();
474 vpa->dtl_idx = ++vcpu->arch.dtl_index;
475} 251}
476 252
477int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 253int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -479,18 +255,18 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
479 unsigned long req = kvmppc_get_gpr(vcpu, 3); 255 unsigned long req = kvmppc_get_gpr(vcpu, 3);
480 unsigned long target, ret = H_SUCCESS; 256 unsigned long target, ret = H_SUCCESS;
481 struct kvm_vcpu *tvcpu; 257 struct kvm_vcpu *tvcpu;
482 int idx;
483 258
484 switch (req) { 259 switch (req) {
485 case H_ENTER:
486 idx = srcu_read_lock(&vcpu->kvm->srcu);
487 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
488 kvmppc_get_gpr(vcpu, 5),
489 kvmppc_get_gpr(vcpu, 6),
490 kvmppc_get_gpr(vcpu, 7));
491 srcu_read_unlock(&vcpu->kvm->srcu, idx);
492 break;
493 case H_CEDE: 260 case H_CEDE:
261 vcpu->arch.shregs.msr |= MSR_EE;
262 vcpu->arch.ceded = 1;
263 smp_mb();
264 if (!vcpu->arch.prodded)
265 kvmppc_vcpu_block(vcpu);
266 else
267 vcpu->arch.prodded = 0;
268 smp_mb();
269 vcpu->arch.ceded = 0;
494 break; 270 break;
495 case H_PROD: 271 case H_PROD:
496 target = kvmppc_get_gpr(vcpu, 4); 272 target = kvmppc_get_gpr(vcpu, 4);
@@ -545,17 +321,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
545 case BOOK3S_INTERRUPT_PERFMON: 321 case BOOK3S_INTERRUPT_PERFMON:
546 r = RESUME_GUEST; 322 r = RESUME_GUEST;
547 break; 323 break;
548 case BOOK3S_INTERRUPT_MACHINE_CHECK:
549 /*
550 * Deliver a machine check interrupt to the guest.
551 * We have to do this, even if the host has handled the
552 * machine check, because machine checks use SRR0/1 and
553 * the interrupt might have trashed guest state in them.
554 */
555 kvmppc_book3s_queue_irqprio(vcpu,
556 BOOK3S_INTERRUPT_MACHINE_CHECK);
557 r = RESUME_GUEST;
558 break;
559 case BOOK3S_INTERRUPT_PROGRAM: 324 case BOOK3S_INTERRUPT_PROGRAM:
560 { 325 {
561 ulong flags; 326 ulong flags;
@@ -590,19 +355,20 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
590 break; 355 break;
591 } 356 }
592 /* 357 /*
593 * We get these next two if the guest accesses a page which it thinks 358 * We get these next two if the guest does a bad real-mode access,
594 * it has mapped but which is not actually present, either because 359 * as we have enabled VRMA (virtualized real mode area) mode in the
595 * it is for an emulated I/O device or because the corresonding 360 * LPCR. We just generate an appropriate DSI/ISI to the guest.
596 * host page has been paged out. Any other HDSI/HISI interrupts
597 * have been handled already.
598 */ 361 */
599 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 362 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
600 r = RESUME_PAGE_FAULT; 363 vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
364 vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
365 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
366 r = RESUME_GUEST;
601 break; 367 break;
602 case BOOK3S_INTERRUPT_H_INST_STORAGE: 368 case BOOK3S_INTERRUPT_H_INST_STORAGE:
603 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); 369 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
604 vcpu->arch.fault_dsisr = 0; 370 0x08000000);
605 r = RESUME_PAGE_FAULT; 371 r = RESUME_GUEST;
606 break; 372 break;
607 /* 373 /*
608 * This occurs if the guest executes an illegal instruction. 374 * This occurs if the guest executes an illegal instruction.
@@ -623,6 +389,20 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
623 break; 389 break;
624 } 390 }
625 391
392
393 if (!(r & RESUME_HOST)) {
394 /* To avoid clobbering exit_reason, only check for signals if
395 * we aren't already exiting to userspace for some other
396 * reason. */
397 if (signal_pending(tsk)) {
398 vcpu->stat.signal_exits++;
399 run->exit_reason = KVM_EXIT_INTR;
400 r = -EINTR;
401 } else {
402 kvmppc_core_deliver_interrupts(vcpu);
403 }
404 }
405
626 return r; 406 return r;
627} 407}
628 408
@@ -662,180 +442,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
662 return 0; 442 return 0;
663} 443}
664 444
665int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
666{
667 int r = 0;
668 long int i;
669
670 switch (id) {
671 case KVM_REG_PPC_HIOR:
672 *val = get_reg_val(id, 0);
673 break;
674 case KVM_REG_PPC_DABR:
675 *val = get_reg_val(id, vcpu->arch.dabr);
676 break;
677 case KVM_REG_PPC_DSCR:
678 *val = get_reg_val(id, vcpu->arch.dscr);
679 break;
680 case KVM_REG_PPC_PURR:
681 *val = get_reg_val(id, vcpu->arch.purr);
682 break;
683 case KVM_REG_PPC_SPURR:
684 *val = get_reg_val(id, vcpu->arch.spurr);
685 break;
686 case KVM_REG_PPC_AMR:
687 *val = get_reg_val(id, vcpu->arch.amr);
688 break;
689 case KVM_REG_PPC_UAMOR:
690 *val = get_reg_val(id, vcpu->arch.uamor);
691 break;
692 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
693 i = id - KVM_REG_PPC_MMCR0;
694 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
695 break;
696 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
697 i = id - KVM_REG_PPC_PMC1;
698 *val = get_reg_val(id, vcpu->arch.pmc[i]);
699 break;
700#ifdef CONFIG_VSX
701 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
702 if (cpu_has_feature(CPU_FTR_VSX)) {
703 /* VSX => FP reg i is stored in arch.vsr[2*i] */
704 long int i = id - KVM_REG_PPC_FPR0;
705 *val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
706 } else {
707 /* let generic code handle it */
708 r = -EINVAL;
709 }
710 break;
711 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
712 if (cpu_has_feature(CPU_FTR_VSX)) {
713 long int i = id - KVM_REG_PPC_VSR0;
714 val->vsxval[0] = vcpu->arch.vsr[2 * i];
715 val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
716 } else {
717 r = -ENXIO;
718 }
719 break;
720#endif /* CONFIG_VSX */
721 case KVM_REG_PPC_VPA_ADDR:
722 spin_lock(&vcpu->arch.vpa_update_lock);
723 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
724 spin_unlock(&vcpu->arch.vpa_update_lock);
725 break;
726 case KVM_REG_PPC_VPA_SLB:
727 spin_lock(&vcpu->arch.vpa_update_lock);
728 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
729 val->vpaval.length = vcpu->arch.slb_shadow.len;
730 spin_unlock(&vcpu->arch.vpa_update_lock);
731 break;
732 case KVM_REG_PPC_VPA_DTL:
733 spin_lock(&vcpu->arch.vpa_update_lock);
734 val->vpaval.addr = vcpu->arch.dtl.next_gpa;
735 val->vpaval.length = vcpu->arch.dtl.len;
736 spin_unlock(&vcpu->arch.vpa_update_lock);
737 break;
738 default:
739 r = -EINVAL;
740 break;
741 }
742
743 return r;
744}
745
746int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
747{
748 int r = 0;
749 long int i;
750 unsigned long addr, len;
751
752 switch (id) {
753 case KVM_REG_PPC_HIOR:
754 /* Only allow this to be set to zero */
755 if (set_reg_val(id, *val))
756 r = -EINVAL;
757 break;
758 case KVM_REG_PPC_DABR:
759 vcpu->arch.dabr = set_reg_val(id, *val);
760 break;
761 case KVM_REG_PPC_DSCR:
762 vcpu->arch.dscr = set_reg_val(id, *val);
763 break;
764 case KVM_REG_PPC_PURR:
765 vcpu->arch.purr = set_reg_val(id, *val);
766 break;
767 case KVM_REG_PPC_SPURR:
768 vcpu->arch.spurr = set_reg_val(id, *val);
769 break;
770 case KVM_REG_PPC_AMR:
771 vcpu->arch.amr = set_reg_val(id, *val);
772 break;
773 case KVM_REG_PPC_UAMOR:
774 vcpu->arch.uamor = set_reg_val(id, *val);
775 break;
776 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
777 i = id - KVM_REG_PPC_MMCR0;
778 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
779 break;
780 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
781 i = id - KVM_REG_PPC_PMC1;
782 vcpu->arch.pmc[i] = set_reg_val(id, *val);
783 break;
784#ifdef CONFIG_VSX
785 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
786 if (cpu_has_feature(CPU_FTR_VSX)) {
787 /* VSX => FP reg i is stored in arch.vsr[2*i] */
788 long int i = id - KVM_REG_PPC_FPR0;
789 vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
790 } else {
791 /* let generic code handle it */
792 r = -EINVAL;
793 }
794 break;
795 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
796 if (cpu_has_feature(CPU_FTR_VSX)) {
797 long int i = id - KVM_REG_PPC_VSR0;
798 vcpu->arch.vsr[2 * i] = val->vsxval[0];
799 vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
800 } else {
801 r = -ENXIO;
802 }
803 break;
804#endif /* CONFIG_VSX */
805 case KVM_REG_PPC_VPA_ADDR:
806 addr = set_reg_val(id, *val);
807 r = -EINVAL;
808 if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
809 vcpu->arch.dtl.next_gpa))
810 break;
811 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
812 break;
813 case KVM_REG_PPC_VPA_SLB:
814 addr = val->vpaval.addr;
815 len = val->vpaval.length;
816 r = -EINVAL;
817 if (addr && !vcpu->arch.vpa.next_gpa)
818 break;
819 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
820 break;
821 case KVM_REG_PPC_VPA_DTL:
822 addr = val->vpaval.addr;
823 len = val->vpaval.length;
824 r = -EINVAL;
825 if (addr && (len < sizeof(struct dtl_entry) ||
826 !vcpu->arch.vpa.next_gpa))
827 break;
828 len -= len % sizeof(struct dtl_entry);
829 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
830 break;
831 default:
832 r = -EINVAL;
833 break;
834 }
835
836 return r;
837}
838
839int kvmppc_core_check_processor_compat(void) 445int kvmppc_core_check_processor_compat(void)
840{ 446{
841 if (cpu_has_feature(CPU_FTR_HVMODE)) 447 if (cpu_has_feature(CPU_FTR_HVMODE))
@@ -855,7 +461,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
855 goto out; 461 goto out;
856 462
857 err = -ENOMEM; 463 err = -ENOMEM;
858 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 464 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
859 if (!vcpu) 465 if (!vcpu)
860 goto out; 466 goto out;
861 467
@@ -864,18 +470,23 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
864 goto free_vcpu; 470 goto free_vcpu;
865 471
866 vcpu->arch.shared = &vcpu->arch.shregs; 472 vcpu->arch.shared = &vcpu->arch.shregs;
473 vcpu->arch.last_cpu = -1;
867 vcpu->arch.mmcr[0] = MMCR0_FC; 474 vcpu->arch.mmcr[0] = MMCR0_FC;
868 vcpu->arch.ctrl = CTRL_RUNLATCH; 475 vcpu->arch.ctrl = CTRL_RUNLATCH;
869 /* default to host PVR, since we can't spoof it */ 476 /* default to host PVR, since we can't spoof it */
870 vcpu->arch.pvr = mfspr(SPRN_PVR); 477 vcpu->arch.pvr = mfspr(SPRN_PVR);
871 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 478 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
872 spin_lock_init(&vcpu->arch.vpa_update_lock);
873 spin_lock_init(&vcpu->arch.tbacct_lock);
874 vcpu->arch.busy_preempt = TB_NIL;
875 479
876 kvmppc_mmu_book3s_hv_init(vcpu); 480 kvmppc_mmu_book3s_hv_init(vcpu);
877 481
878 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 482 /*
483 * Some vcpus may start out in stopped state. If we initialize
484 * them to busy-in-host state they will stop other vcpus in the
485 * vcore from running. Instead we initialize them to blocked
486 * state, effectively considering them to be stopped until we
487 * see the first run ioctl for them.
488 */
489 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
879 490
880 init_waitqueue_head(&vcpu->arch.cpu_run); 491 init_waitqueue_head(&vcpu->arch.cpu_run);
881 492
@@ -886,11 +497,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
886 if (vcore) { 497 if (vcore) {
887 INIT_LIST_HEAD(&vcore->runnable_threads); 498 INIT_LIST_HEAD(&vcore->runnable_threads);
888 spin_lock_init(&vcore->lock); 499 spin_lock_init(&vcore->lock);
889 init_waitqueue_head(&vcore->wq);
890 vcore->preempt_tb = TB_NIL;
891 } 500 }
892 kvm->arch.vcores[core] = vcore; 501 kvm->arch.vcores[core] = vcore;
893 kvm->arch.online_vcores++;
894 } 502 }
895 mutex_unlock(&kvm->lock); 503 mutex_unlock(&kvm->lock);
896 504
@@ -899,59 +507,48 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
899 507
900 spin_lock(&vcore->lock); 508 spin_lock(&vcore->lock);
901 ++vcore->num_threads; 509 ++vcore->num_threads;
510 ++vcore->n_blocked;
902 spin_unlock(&vcore->lock); 511 spin_unlock(&vcore->lock);
903 vcpu->arch.vcore = vcore; 512 vcpu->arch.vcore = vcore;
904 513
905 vcpu->arch.cpu_type = KVM_CPU_3S_64;
906 kvmppc_sanity_check(vcpu);
907
908 return vcpu; 514 return vcpu;
909 515
910free_vcpu: 516free_vcpu:
911 kmem_cache_free(kvm_vcpu_cache, vcpu); 517 kfree(vcpu);
912out: 518out:
913 return ERR_PTR(err); 519 return ERR_PTR(err);
914} 520}
915 521
916void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 522void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
917{ 523{
918 spin_lock(&vcpu->arch.vpa_update_lock);
919 if (vcpu->arch.dtl.pinned_addr)
920 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
921 if (vcpu->arch.slb_shadow.pinned_addr)
922 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
923 if (vcpu->arch.vpa.pinned_addr)
924 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
925 spin_unlock(&vcpu->arch.vpa_update_lock);
926 kvm_vcpu_uninit(vcpu); 524 kvm_vcpu_uninit(vcpu);
927 kmem_cache_free(kvm_vcpu_cache, vcpu); 525 kfree(vcpu);
928} 526}
929 527
930static void kvmppc_set_timer(struct kvm_vcpu *vcpu) 528static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
931{ 529{
932 unsigned long dec_nsec, now; 530 struct kvmppc_vcore *vc = vcpu->arch.vcore;
933 531
934 now = get_tb(); 532 spin_lock(&vc->lock);
935 if (now > vcpu->arch.dec_expires) { 533 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
936 /* decrementer has already gone negative */ 534 ++vc->n_blocked;
937 kvmppc_core_queue_dec(vcpu); 535 if (vc->n_runnable > 0 &&
938 kvmppc_core_prepare_to_enter(vcpu); 536 vc->n_runnable + vc->n_blocked == vc->num_threads) {
939 return; 537 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
538 arch.run_list);
539 wake_up(&vcpu->arch.cpu_run);
940 } 540 }
941 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC 541 spin_unlock(&vc->lock);
942 / tb_ticks_per_sec;
943 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
944 HRTIMER_MODE_REL);
945 vcpu->arch.timer_running = 1;
946} 542}
947 543
948static void kvmppc_end_cede(struct kvm_vcpu *vcpu) 544static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
949{ 545{
950 vcpu->arch.ceded = 0; 546 struct kvmppc_vcore *vc = vcpu->arch.vcore;
951 if (vcpu->arch.timer_running) { 547
952 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 548 spin_lock(&vc->lock);
953 vcpu->arch.timer_running = 0; 549 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
954 } 550 --vc->n_blocked;
551 spin_unlock(&vc->lock);
955} 552}
956 553
957extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 554extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -960,80 +557,35 @@ extern void xics_wake_cpu(int cpu);
960static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 557static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
961 struct kvm_vcpu *vcpu) 558 struct kvm_vcpu *vcpu)
962{ 559{
963 u64 now; 560 struct kvm_vcpu *v;
964 561
965 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 562 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
966 return; 563 return;
967 spin_lock(&vcpu->arch.tbacct_lock);
968 now = mftb();
969 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
970 vcpu->arch.stolen_logged;
971 vcpu->arch.busy_preempt = now;
972 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 564 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
973 spin_unlock(&vcpu->arch.tbacct_lock);
974 --vc->n_runnable; 565 --vc->n_runnable;
566 /* decrement the physical thread id of each following vcpu */
567 v = vcpu;
568 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
569 --v->arch.ptid;
975 list_del(&vcpu->arch.run_list); 570 list_del(&vcpu->arch.run_list);
976} 571}
977 572
978static int kvmppc_grab_hwthread(int cpu)
979{
980 struct paca_struct *tpaca;
981 long timeout = 1000;
982
983 tpaca = &paca[cpu];
984
985 /* Ensure the thread won't go into the kernel if it wakes */
986 tpaca->kvm_hstate.hwthread_req = 1;
987 tpaca->kvm_hstate.kvm_vcpu = NULL;
988
989 /*
990 * If the thread is already executing in the kernel (e.g. handling
991 * a stray interrupt), wait for it to get back to nap mode.
992 * The smp_mb() is to ensure that our setting of hwthread_req
993 * is visible before we look at hwthread_state, so if this
994 * races with the code at system_reset_pSeries and the thread
995 * misses our setting of hwthread_req, we are sure to see its
996 * setting of hwthread_state, and vice versa.
997 */
998 smp_mb();
999 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
1000 if (--timeout <= 0) {
1001 pr_err("KVM: couldn't grab cpu %d\n", cpu);
1002 return -EBUSY;
1003 }
1004 udelay(1);
1005 }
1006 return 0;
1007}
1008
1009static void kvmppc_release_hwthread(int cpu)
1010{
1011 struct paca_struct *tpaca;
1012
1013 tpaca = &paca[cpu];
1014 tpaca->kvm_hstate.hwthread_req = 0;
1015 tpaca->kvm_hstate.kvm_vcpu = NULL;
1016}
1017
1018static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 573static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
1019{ 574{
1020 int cpu; 575 int cpu;
1021 struct paca_struct *tpaca; 576 struct paca_struct *tpaca;
1022 struct kvmppc_vcore *vc = vcpu->arch.vcore; 577 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1023 578
1024 if (vcpu->arch.timer_running) {
1025 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
1026 vcpu->arch.timer_running = 0;
1027 }
1028 cpu = vc->pcpu + vcpu->arch.ptid; 579 cpu = vc->pcpu + vcpu->arch.ptid;
1029 tpaca = &paca[cpu]; 580 tpaca = &paca[cpu];
1030 tpaca->kvm_hstate.kvm_vcpu = vcpu; 581 tpaca->kvm_hstate.kvm_vcpu = vcpu;
1031 tpaca->kvm_hstate.kvm_vcore = vc; 582 tpaca->kvm_hstate.kvm_vcore = vc;
1032 tpaca->kvm_hstate.napping = 0;
1033 vcpu->cpu = vc->pcpu;
1034 smp_wmb(); 583 smp_wmb();
1035#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 584#ifdef CONFIG_PPC_ICP_NATIVE
1036 if (vcpu->arch.ptid) { 585 if (vcpu->arch.ptid) {
586 tpaca->cpu_start = 0x80;
587 tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
588 wmb();
1037 xics_wake_cpu(cpu); 589 xics_wake_cpu(cpu);
1038 ++vc->n_woken; 590 ++vc->n_woken;
1039 } 591 }
@@ -1059,8 +611,7 @@ static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
1059 611
1060/* 612/*
1061 * Check that we are on thread 0 and that any other threads in 613 * Check that we are on thread 0 and that any other threads in
1062 * this core are off-line. Then grab the threads so they can't 614 * this core are off-line.
1063 * enter the kernel.
1064 */ 615 */
1065static int on_primary_thread(void) 616static int on_primary_thread(void)
1066{ 617{
@@ -1072,17 +623,6 @@ static int on_primary_thread(void)
1072 while (++thr < threads_per_core) 623 while (++thr < threads_per_core)
1073 if (cpu_online(cpu + thr)) 624 if (cpu_online(cpu + thr))
1074 return 0; 625 return 0;
1075
1076 /* Grab all hw threads so they can't go into the kernel */
1077 for (thr = 1; thr < threads_per_core; ++thr) {
1078 if (kvmppc_grab_hwthread(cpu + thr)) {
1079 /* Couldn't grab one; let the others go */
1080 do {
1081 kvmppc_release_hwthread(cpu + thr);
1082 } while (--thr > 0);
1083 return 0;
1084 }
1085 }
1086 return 1; 626 return 1;
1087} 627}
1088 628
@@ -1090,70 +630,22 @@ static int on_primary_thread(void)
1090 * Run a set of guest threads on a physical core. 630 * Run a set of guest threads on a physical core.
1091 * Called with vc->lock held. 631 * Called with vc->lock held.
1092 */ 632 */
1093static void kvmppc_run_core(struct kvmppc_vcore *vc) 633static int kvmppc_run_core(struct kvmppc_vcore *vc)
1094{ 634{
1095 struct kvm_vcpu *vcpu, *vcpu0, *vnext; 635 struct kvm_vcpu *vcpu, *vnext;
1096 long ret; 636 long ret;
1097 u64 now; 637 u64 now;
1098 int ptid, i, need_vpa_update;
1099 int srcu_idx;
1100 struct kvm_vcpu *vcpus_to_update[threads_per_core];
1101 638
1102 /* don't start if any threads have a signal pending */ 639 /* don't start if any threads have a signal pending */
1103 need_vpa_update = 0;
1104 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
1105 if (signal_pending(vcpu->arch.run_task))
1106 return;
1107 if (vcpu->arch.vpa.update_pending ||
1108 vcpu->arch.slb_shadow.update_pending ||
1109 vcpu->arch.dtl.update_pending)
1110 vcpus_to_update[need_vpa_update++] = vcpu;
1111 }
1112
1113 /*
1114 * Initialize *vc, in particular vc->vcore_state, so we can
1115 * drop the vcore lock if necessary.
1116 */
1117 vc->n_woken = 0;
1118 vc->nap_count = 0;
1119 vc->entry_exit_count = 0;
1120 vc->vcore_state = VCORE_STARTING;
1121 vc->in_guest = 0;
1122 vc->napping_threads = 0;
1123
1124 /*
1125 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
1126 * which can't be called with any spinlocks held.
1127 */
1128 if (need_vpa_update) {
1129 spin_unlock(&vc->lock);
1130 for (i = 0; i < need_vpa_update; ++i)
1131 kvmppc_update_vpas(vcpus_to_update[i]);
1132 spin_lock(&vc->lock);
1133 }
1134
1135 /*
1136 * Assign physical thread IDs, first to non-ceded vcpus
1137 * and then to ceded ones.
1138 */
1139 ptid = 0;
1140 vcpu0 = NULL;
1141 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
1142 if (!vcpu->arch.ceded) {
1143 if (!ptid)
1144 vcpu0 = vcpu;
1145 vcpu->arch.ptid = ptid++;
1146 }
1147 }
1148 if (!vcpu0)
1149 goto out; /* nothing to run; should never happen */
1150 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 640 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
1151 if (vcpu->arch.ceded) 641 if (signal_pending(vcpu->arch.run_task))
1152 vcpu->arch.ptid = ptid++; 642 return 0;
1153 643
1154 /* 644 /*
1155 * Make sure we are running on thread 0, and that 645 * Make sure we are running on thread 0, and that
1156 * secondary threads are offline. 646 * secondary threads are offline.
647 * XXX we should also block attempts to bring any
648 * secondary threads online.
1157 */ 649 */
1158 if (threads_per_core > 1 && !on_primary_thread()) { 650 if (threads_per_core > 1 && !on_primary_thread()) {
1159 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 651 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -1161,37 +653,31 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1161 goto out; 653 goto out;
1162 } 654 }
1163 655
656 vc->n_woken = 0;
657 vc->nap_count = 0;
658 vc->entry_exit_count = 0;
659 vc->vcore_running = 1;
660 vc->in_guest = 0;
1164 vc->pcpu = smp_processor_id(); 661 vc->pcpu = smp_processor_id();
1165 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 662 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
1166 kvmppc_start_thread(vcpu); 663 kvmppc_start_thread(vcpu);
1167 kvmppc_create_dtl_entry(vcpu, vc); 664 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
1168 } 665 arch.run_list);
1169 666
1170 vc->vcore_state = VCORE_RUNNING;
1171 preempt_disable();
1172 spin_unlock(&vc->lock); 667 spin_unlock(&vc->lock);
1173 668
669 preempt_disable();
1174 kvm_guest_enter(); 670 kvm_guest_enter();
671 __kvmppc_vcore_entry(NULL, vcpu);
1175 672
1176 srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
1177
1178 __kvmppc_vcore_entry(NULL, vcpu0);
1179
1180 spin_lock(&vc->lock);
1181 /* disable sending of IPIs on virtual external irqs */
1182 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
1183 vcpu->cpu = -1;
1184 /* wait for secondary threads to finish writing their state to memory */ 673 /* wait for secondary threads to finish writing their state to memory */
674 spin_lock(&vc->lock);
1185 if (vc->nap_count < vc->n_woken) 675 if (vc->nap_count < vc->n_woken)
1186 kvmppc_wait_for_nap(vc); 676 kvmppc_wait_for_nap(vc);
1187 for (i = 0; i < threads_per_core; ++i)
1188 kvmppc_release_hwthread(vc->pcpu + i);
1189 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 677 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
1190 vc->vcore_state = VCORE_EXITING; 678 vc->vcore_running = 2;
1191 spin_unlock(&vc->lock); 679 spin_unlock(&vc->lock);
1192 680
1193 srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
1194
1195 /* make sure updates to secondary vcpu structs are visible now */ 681 /* make sure updates to secondary vcpu structs are visible now */
1196 smp_mb(); 682 smp_mb();
1197 kvm_guest_exit(); 683 kvm_guest_exit();
@@ -1199,32 +685,28 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1199 preempt_enable(); 685 preempt_enable();
1200 kvm_resched(vcpu); 686 kvm_resched(vcpu);
1201 687
1202 spin_lock(&vc->lock);
1203 now = get_tb(); 688 now = get_tb();
1204 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 689 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
1205 /* cancel pending dec exception if dec is positive */ 690 /* cancel pending dec exception if dec is positive */
1206 if (now < vcpu->arch.dec_expires && 691 if (now < vcpu->arch.dec_expires &&
1207 kvmppc_core_pending_dec(vcpu)) 692 kvmppc_core_pending_dec(vcpu))
1208 kvmppc_core_dequeue_dec(vcpu); 693 kvmppc_core_dequeue_dec(vcpu);
1209 694 if (!vcpu->arch.trap) {
1210 ret = RESUME_GUEST; 695 if (signal_pending(vcpu->arch.run_task)) {
1211 if (vcpu->arch.trap) 696 vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
1212 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, 697 vcpu->arch.ret = -EINTR;
1213 vcpu->arch.run_task); 698 }
1214 699 continue; /* didn't get to run */
700 }
701 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
702 vcpu->arch.run_task);
1215 vcpu->arch.ret = ret; 703 vcpu->arch.ret = ret;
1216 vcpu->arch.trap = 0; 704 vcpu->arch.trap = 0;
1217
1218 if (vcpu->arch.ceded) {
1219 if (ret != RESUME_GUEST)
1220 kvmppc_end_cede(vcpu);
1221 else
1222 kvmppc_set_timer(vcpu);
1223 }
1224 } 705 }
1225 706
707 spin_lock(&vc->lock);
1226 out: 708 out:
1227 vc->vcore_state = VCORE_INACTIVE; 709 vc->vcore_running = 0;
1228 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 710 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1229 arch.run_list) { 711 arch.run_list) {
1230 if (vcpu->arch.ret != RESUME_GUEST) { 712 if (vcpu->arch.ret != RESUME_GUEST) {
@@ -1232,175 +714,92 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1232 wake_up(&vcpu->arch.cpu_run); 714 wake_up(&vcpu->arch.cpu_run);
1233 } 715 }
1234 } 716 }
1235}
1236
1237/*
1238 * Wait for some other vcpu thread to execute us, and
1239 * wake us up when we need to handle something in the host.
1240 */
1241static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
1242{
1243 DEFINE_WAIT(wait);
1244 717
1245 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 718 return 1;
1246 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
1247 schedule();
1248 finish_wait(&vcpu->arch.cpu_run, &wait);
1249} 719}
1250 720
1251/* 721static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1252 * All the vcpus in this vcore are idle, so wait for a decrementer
1253 * or external interrupt to one of the vcpus. vc->lock is held.
1254 */
1255static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
1256{ 722{
723 int ptid;
724 int wait_state;
725 struct kvmppc_vcore *vc;
1257 DEFINE_WAIT(wait); 726 DEFINE_WAIT(wait);
1258 727
1259 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 728 /* No need to go into the guest when all we do is going out */
1260 vc->vcore_state = VCORE_SLEEPING; 729 if (signal_pending(current)) {
1261 spin_unlock(&vc->lock); 730 kvm_run->exit_reason = KVM_EXIT_INTR;
1262 schedule(); 731 return -EINTR;
1263 finish_wait(&vc->wq, &wait); 732 }
1264 spin_lock(&vc->lock);
1265 vc->vcore_state = VCORE_INACTIVE;
1266}
1267 733
1268static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 734 /* On PPC970, check that we have an RMA region */
1269{ 735 if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
1270 int n_ceded; 736 return -EPERM;
1271 struct kvmppc_vcore *vc;
1272 struct kvm_vcpu *v, *vn;
1273 737
1274 kvm_run->exit_reason = 0; 738 kvm_run->exit_reason = 0;
1275 vcpu->arch.ret = RESUME_GUEST; 739 vcpu->arch.ret = RESUME_GUEST;
1276 vcpu->arch.trap = 0; 740 vcpu->arch.trap = 0;
1277 kvmppc_update_vpas(vcpu); 741
742 flush_fp_to_thread(current);
743 flush_altivec_to_thread(current);
744 flush_vsx_to_thread(current);
1278 745
1279 /* 746 /*
1280 * Synchronize with other threads in this virtual core 747 * Synchronize with other threads in this virtual core
1281 */ 748 */
1282 vc = vcpu->arch.vcore; 749 vc = vcpu->arch.vcore;
1283 spin_lock(&vc->lock); 750 spin_lock(&vc->lock);
1284 vcpu->arch.ceded = 0; 751 /* This happens the first time this is called for a vcpu */
752 if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
753 --vc->n_blocked;
754 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
755 ptid = vc->n_runnable;
1285 vcpu->arch.run_task = current; 756 vcpu->arch.run_task = current;
1286 vcpu->arch.kvm_run = kvm_run; 757 vcpu->arch.kvm_run = kvm_run;
1287 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); 758 vcpu->arch.ptid = ptid;
1288 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
1289 vcpu->arch.busy_preempt = TB_NIL;
1290 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 759 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
1291 ++vc->n_runnable; 760 ++vc->n_runnable;
1292 761
1293 /* 762 wait_state = TASK_INTERRUPTIBLE;
1294 * This happens the first time this is called for a vcpu. 763 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
1295 * If the vcore is already running, we may be able to start 764 if (signal_pending(current)) {
1296 * this thread straight away and have it join in. 765 if (!vc->vcore_running) {
1297 */ 766 kvm_run->exit_reason = KVM_EXIT_INTR;
1298 if (!signal_pending(current)) { 767 vcpu->arch.ret = -EINTR;
1299 if (vc->vcore_state == VCORE_RUNNING && 768 break;
1300 VCORE_EXIT_COUNT(vc) == 0) { 769 }
1301 vcpu->arch.ptid = vc->n_runnable - 1; 770 /* have to wait for vcore to stop executing guest */
1302 kvmppc_create_dtl_entry(vcpu, vc); 771 wait_state = TASK_UNINTERRUPTIBLE;
1303 kvmppc_start_thread(vcpu); 772 smp_send_reschedule(vc->pcpu);
1304 } else if (vc->vcore_state == VCORE_SLEEPING) {
1305 wake_up(&vc->wq);
1306 } 773 }
1307 774
1308 } 775 if (!vc->vcore_running &&
1309 776 vc->n_runnable + vc->n_blocked == vc->num_threads) {
1310 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 777 /* we can run now */
1311 !signal_pending(current)) { 778 if (kvmppc_run_core(vc))
1312 if (vc->vcore_state != VCORE_INACTIVE) { 779 continue;
1313 spin_unlock(&vc->lock);
1314 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
1315 spin_lock(&vc->lock);
1316 continue;
1317 } 780 }
1318 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
1319 arch.run_list) {
1320 kvmppc_core_prepare_to_enter(v);
1321 if (signal_pending(v->arch.run_task)) {
1322 kvmppc_remove_runnable(vc, v);
1323 v->stat.signal_exits++;
1324 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
1325 v->arch.ret = -EINTR;
1326 wake_up(&v->arch.cpu_run);
1327 }
1328 }
1329 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
1330 break;
1331 vc->runner = vcpu;
1332 n_ceded = 0;
1333 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
1334 if (!v->arch.pending_exceptions)
1335 n_ceded += v->arch.ceded;
1336 if (n_ceded == vc->n_runnable)
1337 kvmppc_vcore_blocked(vc);
1338 else
1339 kvmppc_run_core(vc);
1340 vc->runner = NULL;
1341 }
1342 781
1343 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 782 if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
1344 (vc->vcore_state == VCORE_RUNNING || 783 kvmppc_start_thread(vcpu);
1345 vc->vcore_state == VCORE_EXITING)) { 784
785 /* wait for other threads to come in, or wait for vcore */
786 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
1346 spin_unlock(&vc->lock); 787 spin_unlock(&vc->lock);
1347 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); 788 schedule();
789 finish_wait(&vcpu->arch.cpu_run, &wait);
1348 spin_lock(&vc->lock); 790 spin_lock(&vc->lock);
1349 } 791 }
1350 792
1351 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 793 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
1352 kvmppc_remove_runnable(vc, vcpu); 794 kvmppc_remove_runnable(vc, vcpu);
1353 vcpu->stat.signal_exits++;
1354 kvm_run->exit_reason = KVM_EXIT_INTR;
1355 vcpu->arch.ret = -EINTR;
1356 }
1357
1358 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
1359 /* Wake up some vcpu to run the core */
1360 v = list_first_entry(&vc->runnable_threads,
1361 struct kvm_vcpu, arch.run_list);
1362 wake_up(&v->arch.cpu_run);
1363 }
1364
1365 spin_unlock(&vc->lock); 795 spin_unlock(&vc->lock);
796
1366 return vcpu->arch.ret; 797 return vcpu->arch.ret;
1367} 798}
1368 799
1369int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 800int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1370{ 801{
1371 int r; 802 int r;
1372 int srcu_idx;
1373
1374 if (!vcpu->arch.sane) {
1375 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1376 return -EINVAL;
1377 }
1378
1379 kvmppc_core_prepare_to_enter(vcpu);
1380
1381 /* No need to go into the guest when all we'll do is come back out */
1382 if (signal_pending(current)) {
1383 run->exit_reason = KVM_EXIT_INTR;
1384 return -EINTR;
1385 }
1386
1387 atomic_inc(&vcpu->kvm->arch.vcpus_running);
1388 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
1389 smp_mb();
1390
1391 /* On the first time here, set up HTAB and VRMA or RMA */
1392 if (!vcpu->kvm->arch.rma_setup_done) {
1393 r = kvmppc_hv_setup_htab_rma(vcpu);
1394 if (r)
1395 goto out;
1396 }
1397
1398 flush_fp_to_thread(current);
1399 flush_altivec_to_thread(current);
1400 flush_vsx_to_thread(current);
1401 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
1402 vcpu->arch.pgdir = current->mm->pgd;
1403 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
1404 803
1405 do { 804 do {
1406 r = kvmppc_run_vcpu(run, vcpu); 805 r = kvmppc_run_vcpu(run, vcpu);
@@ -1408,21 +807,121 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1408 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 807 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
1409 !(vcpu->arch.shregs.msr & MSR_PR)) { 808 !(vcpu->arch.shregs.msr & MSR_PR)) {
1410 r = kvmppc_pseries_do_hcall(vcpu); 809 r = kvmppc_pseries_do_hcall(vcpu);
1411 kvmppc_core_prepare_to_enter(vcpu); 810 kvmppc_core_deliver_interrupts(vcpu);
1412 } else if (r == RESUME_PAGE_FAULT) {
1413 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1414 r = kvmppc_book3s_hv_page_fault(run, vcpu,
1415 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
1416 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1417 } 811 }
1418 } while (r == RESUME_GUEST); 812 } while (r == RESUME_GUEST);
1419
1420 out:
1421 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
1422 atomic_dec(&vcpu->kvm->arch.vcpus_running);
1423 return r; 813 return r;
1424} 814}
1425 815
816static long kvmppc_stt_npages(unsigned long window_size)
817{
818 return ALIGN((window_size >> SPAPR_TCE_SHIFT)
819 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
820}
821
822static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
823{
824 struct kvm *kvm = stt->kvm;
825 int i;
826
827 mutex_lock(&kvm->lock);
828 list_del(&stt->list);
829 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
830 __free_page(stt->pages[i]);
831 kfree(stt);
832 mutex_unlock(&kvm->lock);
833
834 kvm_put_kvm(kvm);
835}
836
837static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
838{
839 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
840 struct page *page;
841
842 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
843 return VM_FAULT_SIGBUS;
844
845 page = stt->pages[vmf->pgoff];
846 get_page(page);
847 vmf->page = page;
848 return 0;
849}
850
851static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
852 .fault = kvm_spapr_tce_fault,
853};
854
855static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
856{
857 vma->vm_ops = &kvm_spapr_tce_vm_ops;
858 return 0;
859}
860
861static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
862{
863 struct kvmppc_spapr_tce_table *stt = filp->private_data;
864
865 release_spapr_tce_table(stt);
866 return 0;
867}
868
869static struct file_operations kvm_spapr_tce_fops = {
870 .mmap = kvm_spapr_tce_mmap,
871 .release = kvm_spapr_tce_release,
872};
873
874long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
875 struct kvm_create_spapr_tce *args)
876{
877 struct kvmppc_spapr_tce_table *stt = NULL;
878 long npages;
879 int ret = -ENOMEM;
880 int i;
881
882 /* Check this LIOBN hasn't been previously allocated */
883 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
884 if (stt->liobn == args->liobn)
885 return -EBUSY;
886 }
887
888 npages = kvmppc_stt_npages(args->window_size);
889
890 stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
891 GFP_KERNEL);
892 if (!stt)
893 goto fail;
894
895 stt->liobn = args->liobn;
896 stt->window_size = args->window_size;
897 stt->kvm = kvm;
898
899 for (i = 0; i < npages; i++) {
900 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
901 if (!stt->pages[i])
902 goto fail;
903 }
904
905 kvm_get_kvm(kvm);
906
907 mutex_lock(&kvm->lock);
908 list_add(&stt->list, &kvm->arch.spapr_tce_tables);
909
910 mutex_unlock(&kvm->lock);
911
912 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
913 stt, O_RDWR);
914
915fail:
916 if (stt) {
917 for (i = 0; i < npages; i++)
918 if (stt->pages[i])
919 __free_page(stt->pages[i]);
920
921 kfree(stt);
922 }
923 return ret;
924}
1426 925
1427/* Work out RMLS (real mode limit selector) field value for a given RMA size. 926/* Work out RMLS (real mode limit selector) field value for a given RMA size.
1428 Assumes POWER7 or PPC970. */ 927 Assumes POWER7 or PPC970. */
@@ -1452,7 +951,7 @@ static inline int lpcr_rmls(unsigned long rma_size)
1452 951
1453static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 952static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1454{ 953{
1455 struct kvmppc_linear_info *ri = vma->vm_file->private_data; 954 struct kvmppc_rma_info *ri = vma->vm_file->private_data;
1456 struct page *page; 955 struct page *page;
1457 956
1458 if (vmf->pgoff >= ri->npages) 957 if (vmf->pgoff >= ri->npages)
@@ -1470,14 +969,14 @@ static const struct vm_operations_struct kvm_rma_vm_ops = {
1470 969
1471static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) 970static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1472{ 971{
1473 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 972 vma->vm_flags |= VM_RESERVED;
1474 vma->vm_ops = &kvm_rma_vm_ops; 973 vma->vm_ops = &kvm_rma_vm_ops;
1475 return 0; 974 return 0;
1476} 975}
1477 976
1478static int kvm_rma_release(struct inode *inode, struct file *filp) 977static int kvm_rma_release(struct inode *inode, struct file *filp)
1479{ 978{
1480 struct kvmppc_linear_info *ri = filp->private_data; 979 struct kvmppc_rma_info *ri = filp->private_data;
1481 980
1482 kvm_release_rma(ri); 981 kvm_release_rma(ri);
1483 return 0; 982 return 0;
@@ -1490,7 +989,7 @@ static struct file_operations kvm_rma_fops = {
1490 989
1491long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 990long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1492{ 991{
1493 struct kvmppc_linear_info *ri; 992 struct kvmppc_rma_info *ri;
1494 long fd; 993 long fd;
1495 994
1496 ri = kvm_alloc_rma(); 995 ri = kvm_alloc_rma();
@@ -1505,251 +1004,89 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1505 return fd; 1004 return fd;
1506} 1005}
1507 1006
1508static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, 1007static struct page *hva_to_page(unsigned long addr)
1509 int linux_psize)
1510{
1511 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
1512
1513 if (!def->shift)
1514 return;
1515 (*sps)->page_shift = def->shift;
1516 (*sps)->slb_enc = def->sllp;
1517 (*sps)->enc[0].page_shift = def->shift;
1518 (*sps)->enc[0].pte_enc = def->penc;
1519 (*sps)++;
1520}
1521
1522int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1523{ 1008{
1524 struct kvm_ppc_one_seg_page_size *sps; 1009 struct page *page[1];
1010 int npages;
1525 1011
1526 info->flags = KVM_PPC_PAGE_SIZES_REAL; 1012 might_sleep();
1527 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1528 info->flags |= KVM_PPC_1T_SEGMENTS;
1529 info->slb_size = mmu_slb_size;
1530 1013
1531 /* We only support these sizes for now, and no muti-size segments */ 1014 npages = get_user_pages_fast(addr, 1, 1, page);
1532 sps = &info->sps[0];
1533 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
1534 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
1535 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
1536
1537 return 0;
1538}
1539
1540/*
1541 * Get (and clear) the dirty memory log for a memory slot.
1542 */
1543int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1544{
1545 struct kvm_memory_slot *memslot;
1546 int r;
1547 unsigned long n;
1548 1015
1549 mutex_lock(&kvm->slots_lock); 1016 if (unlikely(npages != 1))
1550 1017 return 0;
1551 r = -EINVAL;
1552 if (log->slot >= KVM_MEMORY_SLOTS)
1553 goto out;
1554
1555 memslot = id_to_memslot(kvm->memslots, log->slot);
1556 r = -ENOENT;
1557 if (!memslot->dirty_bitmap)
1558 goto out;
1559
1560 n = kvm_dirty_bitmap_bytes(memslot);
1561 memset(memslot->dirty_bitmap, 0, n);
1562
1563 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
1564 if (r)
1565 goto out;
1566
1567 r = -EFAULT;
1568 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
1569 goto out;
1570
1571 r = 0;
1572out:
1573 mutex_unlock(&kvm->slots_lock);
1574 return r;
1575}
1576
1577static void unpin_slot(struct kvm_memory_slot *memslot)
1578{
1579 unsigned long *physp;
1580 unsigned long j, npages, pfn;
1581 struct page *page;
1582 1018
1583 physp = memslot->arch.slot_phys; 1019 return page[0];
1584 npages = memslot->npages;
1585 if (!physp)
1586 return;
1587 for (j = 0; j < npages; j++) {
1588 if (!(physp[j] & KVMPPC_GOT_PAGE))
1589 continue;
1590 pfn = physp[j] >> PAGE_SHIFT;
1591 page = pfn_to_page(pfn);
1592 SetPageDirty(page);
1593 put_page(page);
1594 }
1595}
1596
1597void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1598 struct kvm_memory_slot *dont)
1599{
1600 if (!dont || free->arch.rmap != dont->arch.rmap) {
1601 vfree(free->arch.rmap);
1602 free->arch.rmap = NULL;
1603 }
1604 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
1605 unpin_slot(free);
1606 vfree(free->arch.slot_phys);
1607 free->arch.slot_phys = NULL;
1608 }
1609}
1610
1611int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1612 unsigned long npages)
1613{
1614 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
1615 if (!slot->arch.rmap)
1616 return -ENOMEM;
1617 slot->arch.slot_phys = NULL;
1618
1619 return 0;
1620} 1020}
1621 1021
1622int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1022int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1623 struct kvm_memory_slot *memslot, 1023 struct kvm_userspace_memory_region *mem)
1624 struct kvm_userspace_memory_region *mem)
1625{
1626 unsigned long *phys;
1627
1628 /* Allocate a slot_phys array if needed */
1629 phys = memslot->arch.slot_phys;
1630 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
1631 phys = vzalloc(memslot->npages * sizeof(unsigned long));
1632 if (!phys)
1633 return -ENOMEM;
1634 memslot->arch.slot_phys = phys;
1635 }
1636
1637 return 0;
1638}
1639
1640void kvmppc_core_commit_memory_region(struct kvm *kvm,
1641 struct kvm_userspace_memory_region *mem,
1642 struct kvm_memory_slot old)
1643{ 1024{
1644 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
1645 struct kvm_memory_slot *memslot;
1646
1647 if (npages && old.npages) {
1648 /*
1649 * If modifying a memslot, reset all the rmap dirty bits.
1650 * If this is a new memslot, we don't need to do anything
1651 * since the rmap array starts out as all zeroes,
1652 * i.e. no pages are dirty.
1653 */
1654 memslot = id_to_memslot(kvm->memslots, mem->slot);
1655 kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
1656 }
1657}
1658
1659static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1660{
1661 int err = 0;
1662 struct kvm *kvm = vcpu->kvm;
1663 struct kvmppc_linear_info *ri = NULL;
1664 unsigned long hva;
1665 struct kvm_memory_slot *memslot;
1666 struct vm_area_struct *vma;
1667 unsigned long lpcr, senc;
1668 unsigned long psize, porder; 1025 unsigned long psize, porder;
1669 unsigned long rma_size; 1026 unsigned long i, npages, totalpages;
1670 unsigned long rmls; 1027 unsigned long pg_ix;
1671 unsigned long *physp; 1028 struct kvmppc_pginfo *pginfo;
1672 unsigned long i, npages; 1029 unsigned long hva;
1673 int srcu_idx; 1030 struct kvmppc_rma_info *ri = NULL;
1031 struct page *page;
1674 1032
1675 mutex_lock(&kvm->lock); 1033 /* For now, only allow 16MB pages */
1676 if (kvm->arch.rma_setup_done) 1034 porder = LARGE_PAGE_ORDER;
1677 goto out; /* another vcpu beat us to it */ 1035 psize = 1ul << porder;
1678 1036 if ((mem->memory_size & (psize - 1)) ||
1679 /* Allocate hashed page table (if not done already) and reset it */ 1037 (mem->guest_phys_addr & (psize - 1))) {
1680 if (!kvm->arch.hpt_virt) { 1038 pr_err("bad memory_size=%llx @ %llx\n",
1681 err = kvmppc_alloc_hpt(kvm, NULL); 1039 mem->memory_size, mem->guest_phys_addr);
1682 if (err) { 1040 return -EINVAL;
1683 pr_err("KVM: Couldn't alloc HPT\n");
1684 goto out;
1685 }
1686 } 1041 }
1687 1042
1688 /* Look up the memslot for guest physical address 0 */ 1043 npages = mem->memory_size >> porder;
1689 srcu_idx = srcu_read_lock(&kvm->srcu); 1044 totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
1690 memslot = gfn_to_memslot(kvm, 0);
1691 1045
1692 /* We must have some memory at 0 by now */ 1046 /* More memory than we have space to track? */
1693 err = -EINVAL; 1047 if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
1694 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1048 return -EINVAL;
1695 goto out_srcu;
1696 1049
1697 /* Look up the VMA for the start of this memory slot */ 1050 /* Do we already have an RMA registered? */
1698 hva = memslot->userspace_addr; 1051 if (mem->guest_phys_addr == 0 && kvm->arch.rma)
1699 down_read(&current->mm->mmap_sem); 1052 return -EINVAL;
1700 vma = find_vma(current->mm, hva);
1701 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
1702 goto up_out;
1703 1053
1704 psize = vma_kernel_pagesize(vma); 1054 if (totalpages > kvm->arch.ram_npages)
1705 porder = __ilog2(psize); 1055 kvm->arch.ram_npages = totalpages;
1706 1056
1707 /* Is this one of our preallocated RMAs? */ 1057 /* Is this one of our preallocated RMAs? */
1708 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && 1058 if (mem->guest_phys_addr == 0) {
1709 hva == vma->vm_start) 1059 struct vm_area_struct *vma;
1710 ri = vma->vm_file->private_data; 1060
1711 1061 down_read(&current->mm->mmap_sem);
1712 up_read(&current->mm->mmap_sem); 1062 vma = find_vma(current->mm, mem->userspace_addr);
1713 1063 if (vma && vma->vm_file &&
1714 if (!ri) { 1064 vma->vm_file->f_op == &kvm_rma_fops &&
1715 /* On POWER7, use VRMA; on PPC970, give up */ 1065 mem->userspace_addr == vma->vm_start)
1716 err = -EPERM; 1066 ri = vma->vm_file->private_data;
1717 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1067 up_read(&current->mm->mmap_sem);
1718 pr_err("KVM: CPU requires an RMO\n"); 1068 if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) {
1719 goto out_srcu; 1069 pr_err("CPU requires an RMO\n");
1070 return -EINVAL;
1720 } 1071 }
1072 }
1721 1073
1722 /* We can handle 4k, 64k or 16M pages in the VRMA */ 1074 if (ri) {
1723 err = -EINVAL; 1075 unsigned long rma_size;
1724 if (!(psize == 0x1000 || psize == 0x10000 || 1076 unsigned long lpcr;
1725 psize == 0x1000000)) 1077 long rmls;
1726 goto out_srcu;
1727
1728 /* Update VRMASD field in the LPCR */
1729 senc = slb_pgsize_encoding(psize);
1730 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1731 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1732 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1733 lpcr |= senc << (LPCR_VRMASD_SH - 4);
1734 kvm->arch.lpcr = lpcr;
1735
1736 /* Create HPTEs in the hash page table for the VRMA */
1737 kvmppc_map_vrma(vcpu, memslot, porder);
1738 1078
1739 } else { 1079 rma_size = ri->npages << PAGE_SHIFT;
1740 /* Set up to use an RMO region */ 1080 if (rma_size > mem->memory_size)
1741 rma_size = ri->npages; 1081 rma_size = mem->memory_size;
1742 if (rma_size > memslot->npages)
1743 rma_size = memslot->npages;
1744 rma_size <<= PAGE_SHIFT;
1745 rmls = lpcr_rmls(rma_size); 1082 rmls = lpcr_rmls(rma_size);
1746 err = -EINVAL;
1747 if (rmls < 0) { 1083 if (rmls < 0) {
1748 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); 1084 pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
1749 goto out_srcu; 1085 return -EINVAL;
1750 } 1086 }
1751 atomic_inc(&ri->use_count); 1087 atomic_inc(&ri->use_count);
1752 kvm->arch.rma = ri; 1088 kvm->arch.rma = ri;
1089 kvm->arch.n_rma_pages = rma_size >> porder;
1753 1090
1754 /* Update LPCR and RMOR */ 1091 /* Update LPCR and RMOR */
1755 lpcr = kvm->arch.lpcr; 1092 lpcr = kvm->arch.lpcr;
@@ -1769,65 +1106,81 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1769 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; 1106 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1770 } 1107 }
1771 kvm->arch.lpcr = lpcr; 1108 kvm->arch.lpcr = lpcr;
1772 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1109 pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
1773 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1110 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1111 }
1774 1112
1775 /* Initialize phys addrs of pages in RMO */ 1113 pg_ix = mem->guest_phys_addr >> porder;
1776 npages = ri->npages; 1114 pginfo = kvm->arch.ram_pginfo + pg_ix;
1777 porder = __ilog2(npages); 1115 for (i = 0; i < npages; ++i, ++pg_ix) {
1778 physp = memslot->arch.slot_phys; 1116 if (ri && pg_ix < kvm->arch.n_rma_pages) {
1779 if (physp) { 1117 pginfo[i].pfn = ri->base_pfn +
1780 if (npages > memslot->npages) 1118 (pg_ix << (porder - PAGE_SHIFT));
1781 npages = memslot->npages; 1119 continue;
1782 spin_lock(&kvm->arch.slot_phys_lock); 1120 }
1783 for (i = 0; i < npages; ++i) 1121 hva = mem->userspace_addr + (i << porder);
1784 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + 1122 page = hva_to_page(hva);
1785 porder; 1123 if (!page) {
1786 spin_unlock(&kvm->arch.slot_phys_lock); 1124 pr_err("oops, no pfn for hva %lx\n", hva);
1125 goto err;
1126 }
1127 /* Check it's a 16MB page */
1128 if (!PageHead(page) ||
1129 compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
1130 pr_err("page at %lx isn't 16MB (o=%d)\n",
1131 hva, compound_order(page));
1132 goto err;
1787 } 1133 }
1134 pginfo[i].pfn = page_to_pfn(page);
1788 } 1135 }
1789 1136
1790 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1137 return 0;
1791 smp_wmb();
1792 kvm->arch.rma_setup_done = 1;
1793 err = 0;
1794 out_srcu:
1795 srcu_read_unlock(&kvm->srcu, srcu_idx);
1796 out:
1797 mutex_unlock(&kvm->lock);
1798 return err;
1799 1138
1800 up_out: 1139 err:
1801 up_read(&current->mm->mmap_sem); 1140 return -EINVAL;
1802 goto out;
1803} 1141}
1804 1142
1805int kvmppc_core_init_vm(struct kvm *kvm) 1143void kvmppc_core_commit_memory_region(struct kvm *kvm,
1144 struct kvm_userspace_memory_region *mem)
1806{ 1145{
1807 unsigned long lpcr, lpid; 1146 if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
1808 1147 !kvm->arch.rma)
1809 /* Allocate the guest's logical partition ID */ 1148 kvmppc_map_vrma(kvm, mem);
1149}
1810 1150
1811 lpid = kvmppc_alloc_lpid(); 1151int kvmppc_core_init_vm(struct kvm *kvm)
1812 if (lpid < 0) 1152{
1813 return -ENOMEM; 1153 long r;
1814 kvm->arch.lpid = lpid; 1154 unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
1155 long err = -ENOMEM;
1156 unsigned long lpcr;
1815 1157
1816 /* 1158 /* Allocate hashed page table */
1817 * Since we don't flush the TLB when tearing down a VM, 1159 r = kvmppc_alloc_hpt(kvm);
1818 * and this lpid might have previously been used, 1160 if (r)
1819 * make sure we flush on each core before running the new VM. 1161 return r;
1820 */
1821 cpumask_setall(&kvm->arch.need_tlb_flush);
1822 1162
1823 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1163 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1824 1164
1165 kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
1166 GFP_KERNEL);
1167 if (!kvm->arch.ram_pginfo) {
1168 pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
1169 npages * sizeof(struct kvmppc_pginfo));
1170 goto out_free;
1171 }
1172
1173 kvm->arch.ram_npages = 0;
1174 kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
1175 kvm->arch.ram_porder = LARGE_PAGE_ORDER;
1825 kvm->arch.rma = NULL; 1176 kvm->arch.rma = NULL;
1177 kvm->arch.n_rma_pages = 0;
1826 1178
1827 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 1179 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
1828 1180
1829 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1181 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1830 /* PPC970; HID4 is effectively the LPCR */ 1182 /* PPC970; HID4 is effectively the LPCR */
1183 unsigned long lpid = kvm->arch.lpid;
1831 kvm->arch.host_lpid = 0; 1184 kvm->arch.host_lpid = 0;
1832 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); 1185 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
1833 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); 1186 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
@@ -1839,28 +1192,30 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1839 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 1192 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
1840 lpcr &= LPCR_PECE | LPCR_LPES; 1193 lpcr &= LPCR_PECE | LPCR_LPES;
1841 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | 1194 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
1842 LPCR_VPM0 | LPCR_VPM1; 1195 LPCR_VPM0 | LPCR_VRMA_L;
1843 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
1844 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1845 } 1196 }
1846 kvm->arch.lpcr = lpcr; 1197 kvm->arch.lpcr = lpcr;
1847 1198
1848 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
1849 spin_lock_init(&kvm->arch.slot_phys_lock);
1850
1851 /*
1852 * Don't allow secondary CPU threads to come online
1853 * while any KVM VMs exist.
1854 */
1855 inhibit_secondary_onlining();
1856
1857 return 0; 1199 return 0;
1200
1201 out_free:
1202 kvmppc_free_hpt(kvm);
1203 return err;
1858} 1204}
1859 1205
1860void kvmppc_core_destroy_vm(struct kvm *kvm) 1206void kvmppc_core_destroy_vm(struct kvm *kvm)
1861{ 1207{
1862 uninhibit_secondary_onlining(); 1208 struct kvmppc_pginfo *pginfo;
1863 1209 unsigned long i;
1210
1211 if (kvm->arch.ram_pginfo) {
1212 pginfo = kvm->arch.ram_pginfo;
1213 kvm->arch.ram_pginfo = NULL;
1214 for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
1215 if (pginfo[i].pfn)
1216 put_page(pfn_to_page(pginfo[i].pfn));
1217 kfree(pginfo);
1218 }
1864 if (kvm->arch.rma) { 1219 if (kvm->arch.rma) {
1865 kvm_release_rma(kvm->arch.rma); 1220 kvm_release_rma(kvm->arch.rma);
1866 kvm->arch.rma = NULL; 1221 kvm->arch.rma = NULL;
@@ -1882,12 +1237,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
1882 return EMULATE_FAIL; 1237 return EMULATE_FAIL;
1883} 1238}
1884 1239
1885int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 1240int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
1886{ 1241{
1887 return EMULATE_FAIL; 1242 return EMULATE_FAIL;
1888} 1243}
1889 1244
1890int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 1245int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
1891{ 1246{
1892 return EMULATE_FAIL; 1247 return EMULATE_FAIL;
1893} 1248}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5de10..d43120355ee 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/kvm_host.h> 9#include <linux/kvm_host.h>
10#include <linux/preempt.h> 10#include <linux/preempt.h>
11#include <linux/export.h>
12#include <linux/sched.h> 11#include <linux/sched.h>
13#include <linux/spinlock.h> 12#include <linux/spinlock.h>
14#include <linux/bootmem.h> 13#include <linux/bootmem.h>
@@ -18,18 +17,6 @@
18#include <asm/kvm_ppc.h> 17#include <asm/kvm_ppc.h>
19#include <asm/kvm_book3s.h> 18#include <asm/kvm_book3s.h>
20 19
21#define KVM_LINEAR_RMA 0
22#define KVM_LINEAR_HPT 1
23
24static void __init kvm_linear_init_one(ulong size, int count, int type);
25static struct kvmppc_linear_info *kvm_alloc_linear(int type);
26static void kvm_release_linear(struct kvmppc_linear_info *ri);
27
28int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
29EXPORT_SYMBOL_GPL(kvm_hpt_order);
30
31/*************** RMA *************/
32
33/* 20/*
34 * This maintains a list of RMAs (real mode areas) for KVM guests to use. 21 * This maintains a list of RMAs (real mode areas) for KVM guests to use.
35 * Each RMA has to be physically contiguous and of a size that the 22 * Each RMA has to be physically contiguous and of a size that the
@@ -41,32 +28,6 @@ EXPORT_SYMBOL_GPL(kvm_hpt_order);
41static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ 28static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
42static unsigned long kvm_rma_count; 29static unsigned long kvm_rma_count;
43 30
44/* Work out RMLS (real mode limit selector) field value for a given RMA size.
45 Assumes POWER7 or PPC970. */
46static inline int lpcr_rmls(unsigned long rma_size)
47{
48 switch (rma_size) {
49 case 32ul << 20: /* 32 MB */
50 if (cpu_has_feature(CPU_FTR_ARCH_206))
51 return 8; /* only supported on POWER7 */
52 return -1;
53 case 64ul << 20: /* 64 MB */
54 return 3;
55 case 128ul << 20: /* 128 MB */
56 return 7;
57 case 256ul << 20: /* 256 MB */
58 return 4;
59 case 1ul << 30: /* 1 GB */
60 return 2;
61 case 16ul << 30: /* 16 GB */
62 return 1;
63 case 256ul << 30: /* 256 GB */
64 return 0;
65 default:
66 return -1;
67 }
68}
69
70static int __init early_parse_rma_size(char *p) 31static int __init early_parse_rma_size(char *p)
71{ 32{
72 if (!p) 33 if (!p)
@@ -89,84 +50,75 @@ static int __init early_parse_rma_count(char *p)
89} 50}
90early_param("kvm_rma_count", early_parse_rma_count); 51early_param("kvm_rma_count", early_parse_rma_count);
91 52
92struct kvmppc_linear_info *kvm_alloc_rma(void) 53static struct kvmppc_rma_info *rma_info;
93{ 54static LIST_HEAD(free_rmas);
94 return kvm_alloc_linear(KVM_LINEAR_RMA); 55static DEFINE_SPINLOCK(rma_lock);
95}
96EXPORT_SYMBOL_GPL(kvm_alloc_rma);
97 56
98void kvm_release_rma(struct kvmppc_linear_info *ri) 57/* Work out RMLS (real mode limit selector) field value for a given RMA size.
58 Assumes POWER7 or PPC970. */
59static inline int lpcr_rmls(unsigned long rma_size)
99{ 60{
100 kvm_release_linear(ri); 61 switch (rma_size) {
62 case 32ul << 20: /* 32 MB */
63 if (cpu_has_feature(CPU_FTR_ARCH_206))
64 return 8; /* only supported on POWER7 */
65 return -1;
66 case 64ul << 20: /* 64 MB */
67 return 3;
68 case 128ul << 20: /* 128 MB */
69 return 7;
70 case 256ul << 20: /* 256 MB */
71 return 4;
72 case 1ul << 30: /* 1 GB */
73 return 2;
74 case 16ul << 30: /* 16 GB */
75 return 1;
76 case 256ul << 30: /* 256 GB */
77 return 0;
78 default:
79 return -1;
80 }
101} 81}
102EXPORT_SYMBOL_GPL(kvm_release_rma);
103
104/*************** HPT *************/
105 82
106/* 83/*
107 * This maintains a list of big linear HPT tables that contain the GVA->HPA 84 * Called at boot time while the bootmem allocator is active,
108 * memory mappings. If we don't reserve those early on, we might not be able 85 * to allocate contiguous physical memory for the real memory
109 * to get a big (usually 16MB) linear memory region from the kernel anymore. 86 * areas for guests.
110 */ 87 */
111 88void kvm_rma_init(void)
112static unsigned long kvm_hpt_count;
113
114static int __init early_parse_hpt_count(char *p)
115{
116 if (!p)
117 return 1;
118
119 kvm_hpt_count = simple_strtoul(p, NULL, 0);
120
121 return 0;
122}
123early_param("kvm_hpt_count", early_parse_hpt_count);
124
125struct kvmppc_linear_info *kvm_alloc_hpt(void)
126{
127 return kvm_alloc_linear(KVM_LINEAR_HPT);
128}
129EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
130
131void kvm_release_hpt(struct kvmppc_linear_info *li)
132{
133 kvm_release_linear(li);
134}
135EXPORT_SYMBOL_GPL(kvm_release_hpt);
136
137/*************** generic *************/
138
139static LIST_HEAD(free_linears);
140static DEFINE_SPINLOCK(linear_lock);
141
142static void __init kvm_linear_init_one(ulong size, int count, int type)
143{ 89{
144 unsigned long i; 90 unsigned long i;
145 unsigned long j, npages; 91 unsigned long j, npages;
146 void *linear; 92 void *rma;
147 struct page *pg; 93 struct page *pg;
148 const char *typestr;
149 struct kvmppc_linear_info *linear_info;
150 94
151 if (!count) 95 /* Only do this on PPC970 in HV mode */
96 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
97 !cpu_has_feature(CPU_FTR_ARCH_201))
98 return;
99
100 if (!kvm_rma_size || !kvm_rma_count)
101 return;
102
103 /* Check that the requested size is one supported in hardware */
104 if (lpcr_rmls(kvm_rma_size) < 0) {
105 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
152 return; 106 return;
107 }
153 108
154 typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; 109 npages = kvm_rma_size >> PAGE_SHIFT;
155 110 rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info));
156 npages = size >> PAGE_SHIFT; 111 for (i = 0; i < kvm_rma_count; ++i) {
157 linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); 112 rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size);
158 for (i = 0; i < count; ++i) { 113 pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma,
159 linear = alloc_bootmem_align(size, size); 114 kvm_rma_size >> 20);
160 pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, 115 rma_info[i].base_virt = rma;
161 size >> 20); 116 rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT;
162 linear_info[i].base_virt = linear; 117 rma_info[i].npages = npages;
163 linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; 118 list_add_tail(&rma_info[i].list, &free_rmas);
164 linear_info[i].npages = npages; 119 atomic_set(&rma_info[i].use_count, 0);
165 linear_info[i].type = type; 120
166 list_add_tail(&linear_info[i].list, &free_linears); 121 pg = pfn_to_page(rma_info[i].base_pfn);
167 atomic_set(&linear_info[i].use_count, 0);
168
169 pg = pfn_to_page(linear_info[i].base_pfn);
170 for (j = 0; j < npages; ++j) { 122 for (j = 0; j < npages; ++j) {
171 atomic_inc(&pg->_count); 123 atomic_inc(&pg->_count);
172 ++pg; 124 ++pg;
@@ -174,60 +126,30 @@ static void __init kvm_linear_init_one(ulong size, int count, int type)
174 } 126 }
175} 127}
176 128
177static struct kvmppc_linear_info *kvm_alloc_linear(int type) 129struct kvmppc_rma_info *kvm_alloc_rma(void)
178{ 130{
179 struct kvmppc_linear_info *ri, *ret; 131 struct kvmppc_rma_info *ri;
180
181 ret = NULL;
182 spin_lock(&linear_lock);
183 list_for_each_entry(ri, &free_linears, list) {
184 if (ri->type != type)
185 continue;
186 132
133 ri = NULL;
134 spin_lock(&rma_lock);
135 if (!list_empty(&free_rmas)) {
136 ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list);
187 list_del(&ri->list); 137 list_del(&ri->list);
188 atomic_inc(&ri->use_count); 138 atomic_inc(&ri->use_count);
189 memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
190 ret = ri;
191 break;
192 } 139 }
193 spin_unlock(&linear_lock); 140 spin_unlock(&rma_lock);
194 return ret; 141 return ri;
195} 142}
143EXPORT_SYMBOL_GPL(kvm_alloc_rma);
196 144
197static void kvm_release_linear(struct kvmppc_linear_info *ri) 145void kvm_release_rma(struct kvmppc_rma_info *ri)
198{ 146{
199 if (atomic_dec_and_test(&ri->use_count)) { 147 if (atomic_dec_and_test(&ri->use_count)) {
200 spin_lock(&linear_lock); 148 spin_lock(&rma_lock);
201 list_add_tail(&ri->list, &free_linears); 149 list_add_tail(&ri->list, &free_rmas);
202 spin_unlock(&linear_lock); 150 spin_unlock(&rma_lock);
203 151
204 } 152 }
205} 153}
154EXPORT_SYMBOL_GPL(kvm_release_rma);
206 155
207/*
208 * Called at boot time while the bootmem allocator is active,
209 * to allocate contiguous physical memory for the hash page
210 * tables for guests.
211 */
212void __init kvm_linear_init(void)
213{
214 /* HPT */
215 kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
216
217 /* RMA */
218 /* Only do this on PPC970 in HV mode */
219 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
220 !cpu_has_feature(CPU_FTR_ARCH_201))
221 return;
222
223 if (!kvm_rma_size || !kvm_rma_count)
224 return;
225
226 /* Check that the requested size is one supported in hardware */
227 if (lpcr_rmls(kvm_rma_size) < 0) {
228 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
229 return;
230 }
231
232 kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
233}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 84035a528c8..3f7b674dd4b 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -46,10 +46,8 @@ _GLOBAL(__kvmppc_vcore_entry)
46 /* Save host state to the stack */ 46 /* Save host state to the stack */
47 stdu r1, -SWITCH_FRAME_SIZE(r1) 47 stdu r1, -SWITCH_FRAME_SIZE(r1)
48 48
49 /* Save non-volatile registers (r14 - r31) and CR */ 49 /* Save non-volatile registers (r14 - r31) */
50 SAVE_NVGPRS(r1) 50 SAVE_NVGPRS(r1)
51 mfcr r3
52 std r3, _CCR(r1)
53 51
54 /* Save host DSCR */ 52 /* Save host DSCR */
55BEGIN_FTR_SECTION 53BEGIN_FTR_SECTION
@@ -68,24 +66,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
68 rotldi r10,r10,16 66 rotldi r10,r10,16
69 mtmsrd r10,1 67 mtmsrd r10,1
70 68
71 /* Save host PMU registers */ 69 /* Save host PMU registers and load guest PMU registers */
72 /* R4 is live here (vcpu pointer) but not r3 or r5 */ 70 /* R4 is live here (vcpu pointer) but not r3 or r5 */
73 li r3, 1 71 li r3, 1
74 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 72 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
75 mfspr r7, SPRN_MMCR0 /* save MMCR0 */ 73 mfspr r7, SPRN_MMCR0 /* save MMCR0 */
76 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ 74 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
77 mfspr r6, SPRN_MMCRA
78BEGIN_FTR_SECTION
79 /* On P7, clear MMCRA in order to disable SDAR updates */
80 li r5, 0
81 mtspr SPRN_MMCRA, r5
82END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
83 isync 75 isync
84 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 76 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
85 lbz r5, LPPACA_PMCINUSE(r3) 77 lbz r5, LPPACA_PMCINUSE(r3)
86 cmpwi r5, 0 78 cmpwi r5, 0
87 beq 31f /* skip if not */ 79 beq 31f /* skip if not */
88 mfspr r5, SPRN_MMCR1 80 mfspr r5, SPRN_MMCR1
81 mfspr r6, SPRN_MMCRA
89 std r7, HSTATE_MMCR(r13) 82 std r7, HSTATE_MMCR(r13)
90 std r5, HSTATE_MMCR + 8(r13) 83 std r5, HSTATE_MMCR + 8(r13)
91 std r6, HSTATE_MMCR + 16(r13) 84 std r6, HSTATE_MMCR + 16(r13)
@@ -164,10 +157,8 @@ kvmppc_handler_highmem:
164 * R13 = PACA 157 * R13 = PACA
165 */ 158 */
166 159
167 /* Restore non-volatile host registers (r14 - r31) and CR */ 160 /* Restore non-volatile host registers (r14 - r31) */
168 REST_NVGPRS(r1) 161 REST_NVGPRS(r1)
169 ld r4, _CCR(r1)
170 mtcr r4
171 162
172 addi r1, r1, SWITCH_FRAME_SIZE 163 addi r1, r1, SWITCH_FRAME_SIZE
173 ld r0, PPC_LR_STKOFF(r1) 164 ld r0, PPC_LR_STKOFF(r1)
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
deleted file mode 100644
index a353c485808..00000000000
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ /dev/null
@@ -1,148 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
7 */
8
9#include <linux/types.h>
10#include <linux/string.h>
11#include <linux/kvm.h>
12#include <linux/kvm_host.h>
13#include <linux/kernel.h>
14#include <asm/opal.h>
15
16/* SRR1 bits for machine check on POWER7 */
17#define SRR1_MC_LDSTERR (1ul << (63-42))
18#define SRR1_MC_IFETCH_SH (63-45)
19#define SRR1_MC_IFETCH_MASK 0x7
20#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */
21#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */
22#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */
23#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */
24
25/* DSISR bits for machine check on POWER7 */
26#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */
27#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */
28#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */
29#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */
30#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */
31
32/* POWER7 SLB flush and reload */
33static void reload_slb(struct kvm_vcpu *vcpu)
34{
35 struct slb_shadow *slb;
36 unsigned long i, n;
37
38 /* First clear out SLB */
39 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
40
41 /* Do they have an SLB shadow buffer registered? */
42 slb = vcpu->arch.slb_shadow.pinned_addr;
43 if (!slb)
44 return;
45
46 /* Sanity check */
47 n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
48 if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
49 return;
50
51 /* Load up the SLB from that */
52 for (i = 0; i < n; ++i) {
53 unsigned long rb = slb->save_area[i].esid;
54 unsigned long rs = slb->save_area[i].vsid;
55
56 rb = (rb & ~0xFFFul) | i; /* insert entry number */
57 asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
58 }
59}
60
61/* POWER7 TLB flush */
62static void flush_tlb_power7(struct kvm_vcpu *vcpu)
63{
64 unsigned long i, rb;
65
66 rb = TLBIEL_INVAL_SET_LPID;
67 for (i = 0; i < POWER7_TLB_SETS; ++i) {
68 asm volatile("tlbiel %0" : : "r" (rb));
69 rb += 1 << TLBIEL_INVAL_SET_SHIFT;
70 }
71}
72
73/*
74 * On POWER7, see if we can handle a machine check that occurred inside
75 * the guest in real mode, without switching to the host partition.
76 *
77 * Returns: 0 => exit guest, 1 => deliver machine check to guest
78 */
79static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
80{
81 unsigned long srr1 = vcpu->arch.shregs.msr;
82#ifdef CONFIG_PPC_POWERNV
83 struct opal_machine_check_event *opal_evt;
84#endif
85 long handled = 1;
86
87 if (srr1 & SRR1_MC_LDSTERR) {
88 /* error on load/store */
89 unsigned long dsisr = vcpu->arch.shregs.dsisr;
90
91 if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
92 DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
93 /* flush and reload SLB; flushes D-ERAT too */
94 reload_slb(vcpu);
95 dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
96 DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
97 }
98 if (dsisr & DSISR_MC_TLB_MULTI) {
99 flush_tlb_power7(vcpu);
100 dsisr &= ~DSISR_MC_TLB_MULTI;
101 }
102 /* Any other errors we don't understand? */
103 if (dsisr & 0xffffffffUL)
104 handled = 0;
105 }
106
107 switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
108 case 0:
109 break;
110 case SRR1_MC_IFETCH_SLBPAR:
111 case SRR1_MC_IFETCH_SLBMULTI:
112 case SRR1_MC_IFETCH_SLBPARMULTI:
113 reload_slb(vcpu);
114 break;
115 case SRR1_MC_IFETCH_TLBMULTI:
116 flush_tlb_power7(vcpu);
117 break;
118 default:
119 handled = 0;
120 }
121
122#ifdef CONFIG_PPC_POWERNV
123 /*
124 * See if OPAL has already handled the condition.
125 * We assume that if the condition is recovered then OPAL
126 * will have generated an error log event that we will pick
127 * up and log later.
128 */
129 opal_evt = local_paca->opal_mc_evt;
130 if (opal_evt->version == OpalMCE_V1 &&
131 (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
132 opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
133 handled = 1;
134
135 if (handled)
136 opal_evt->in_use = 0;
137#endif
138
139 return handled;
140}
141
142long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
143{
144 if (cpu_has_feature(CPU_FTR_ARCH_206))
145 return kvmppc_realmode_mc_power7(vcpu);
146
147 return 0;
148}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 19c93bae1ae..fcfe6b05555 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -11,7 +11,6 @@
11#include <linux/kvm.h> 11#include <linux/kvm.h>
12#include <linux/kvm_host.h> 12#include <linux/kvm_host.h>
13#include <linux/hugetlb.h> 13#include <linux/hugetlb.h>
14#include <linux/module.h>
15 14
16#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
17#include <asm/kvm_ppc.h> 16#include <asm/kvm_ppc.h>
@@ -21,359 +20,127 @@
21#include <asm/synch.h> 20#include <asm/synch.h>
22#include <asm/ppc-opcode.h> 21#include <asm/ppc-opcode.h>
23 22
24/* Translate address of a vmalloc'd thing to a linear map address */ 23/* For now use fixed-size 16MB page table */
25static void *real_vmalloc_addr(void *x) 24#define HPT_ORDER 24
26{ 25#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
27 unsigned long addr = (unsigned long) x; 26#define HPT_HASH_MASK (HPT_NPTEG - 1)
28 pte_t *p;
29
30 p = find_linux_pte(swapper_pg_dir, addr);
31 if (!p || !pte_present(*p))
32 return NULL;
33 /* assume we don't have huge pages in vmalloc space... */
34 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
35 return __va(addr);
36}
37
38/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
39static int global_invalidates(struct kvm *kvm, unsigned long flags)
40{
41 int global;
42
43 /*
44 * If there is only one vcore, and it's currently running,
45 * we can use tlbiel as long as we mark all other physical
46 * cores as potentially having stale TLB entries for this lpid.
47 * If we're not using MMU notifiers, we never take pages away
48 * from the guest, so we can use tlbiel if requested.
49 * Otherwise, don't use tlbiel.
50 */
51 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
52 global = 0;
53 else if (kvm->arch.using_mmu_notifiers)
54 global = 1;
55 else
56 global = !(flags & H_LOCAL);
57
58 if (!global) {
59 /* any other core might now have stale TLB entries... */
60 smp_wmb();
61 cpumask_setall(&kvm->arch.need_tlb_flush);
62 cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
63 &kvm->arch.need_tlb_flush);
64 }
65
66 return global;
67}
68
69/*
70 * Add this HPTE into the chain for the real page.
71 * Must be called with the chain locked; it unlocks the chain.
72 */
73void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
74 unsigned long *rmap, long pte_index, int realmode)
75{
76 struct revmap_entry *head, *tail;
77 unsigned long i;
78
79 if (*rmap & KVMPPC_RMAP_PRESENT) {
80 i = *rmap & KVMPPC_RMAP_INDEX;
81 head = &kvm->arch.revmap[i];
82 if (realmode)
83 head = real_vmalloc_addr(head);
84 tail = &kvm->arch.revmap[head->back];
85 if (realmode)
86 tail = real_vmalloc_addr(tail);
87 rev->forw = i;
88 rev->back = head->back;
89 tail->forw = pte_index;
90 head->back = pte_index;
91 } else {
92 rev->forw = rev->back = pte_index;
93 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
94 pte_index | KVMPPC_RMAP_PRESENT;
95 }
96 unlock_rmap(rmap);
97}
98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
99 27
100/* 28#define HPTE_V_HVLOCK 0x40UL
101 * Note modification of an HPTE; set the HPTE modified bit
102 * if anyone is interested.
103 */
104static inline void note_hpte_modification(struct kvm *kvm,
105 struct revmap_entry *rev)
106{
107 if (atomic_read(&kvm->arch.hpte_mod_interest))
108 rev->guest_rpte |= HPTE_GR_MODIFIED;
109}
110 29
111/* Remove this HPTE from the chain for a real page */ 30static inline long lock_hpte(unsigned long *hpte, unsigned long bits)
112static void remove_revmap_chain(struct kvm *kvm, long pte_index,
113 struct revmap_entry *rev,
114 unsigned long hpte_v, unsigned long hpte_r)
115{ 31{
116 struct revmap_entry *next, *prev; 32 unsigned long tmp, old;
117 unsigned long gfn, ptel, head;
118 struct kvm_memory_slot *memslot;
119 unsigned long *rmap;
120 unsigned long rcbits;
121
122 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
123 ptel = rev->guest_rpte |= rcbits;
124 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
125 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
126 if (!memslot)
127 return;
128 33
129 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 34 asm volatile(" ldarx %0,0,%2\n"
130 lock_rmap(rmap); 35 " and. %1,%0,%3\n"
131 36 " bne 2f\n"
132 head = *rmap & KVMPPC_RMAP_INDEX; 37 " ori %0,%0,%4\n"
133 next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]); 38 " stdcx. %0,0,%2\n"
134 prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]); 39 " beq+ 2f\n"
135 next->back = rev->back; 40 " li %1,%3\n"
136 prev->forw = rev->forw; 41 "2: isync"
137 if (head == pte_index) { 42 : "=&r" (tmp), "=&r" (old)
138 head = rev->forw; 43 : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
139 if (head == pte_index) 44 : "cc", "memory");
140 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 45 return old == 0;
141 else
142 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
143 }
144 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
145 unlock_rmap(rmap);
146}
147
148static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
149 int writing, unsigned long *pte_sizep)
150{
151 pte_t *ptep;
152 unsigned long ps = *pte_sizep;
153 unsigned int shift;
154
155 ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
156 if (!ptep)
157 return __pte(0);
158 if (shift)
159 *pte_sizep = 1ul << shift;
160 else
161 *pte_sizep = PAGE_SIZE;
162 if (ps > *pte_sizep)
163 return __pte(0);
164 if (!pte_present(*ptep))
165 return __pte(0);
166 return kvmppc_read_update_linux_pte(ptep, writing);
167}
168
169static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
170{
171 asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
172 hpte[0] = hpte_v;
173} 46}
174 47
175long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 48long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
176 long pte_index, unsigned long pteh, unsigned long ptel, 49 long pte_index, unsigned long pteh, unsigned long ptel)
177 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
178{ 50{
179 unsigned long i, pa, gpa, gfn, psize; 51 unsigned long porder;
180 unsigned long slot_fn, hva; 52 struct kvm *kvm = vcpu->kvm;
53 unsigned long i, lpn, pa;
181 unsigned long *hpte; 54 unsigned long *hpte;
182 struct revmap_entry *rev;
183 unsigned long g_ptel;
184 struct kvm_memory_slot *memslot;
185 unsigned long *physp, pte_size;
186 unsigned long is_io;
187 unsigned long *rmap;
188 pte_t pte;
189 unsigned int writing;
190 unsigned long mmu_seq;
191 unsigned long rcbits;
192 55
193 psize = hpte_page_size(pteh, ptel); 56 /* only handle 4k, 64k and 16M pages for now */
194 if (!psize) 57 porder = 12;
195 return H_PARAMETER; 58 if (pteh & HPTE_V_LARGE) {
196 writing = hpte_is_writable(ptel); 59 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
197 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 60 (ptel & 0xf000) == 0x1000) {
198 ptel &= ~HPTE_GR_RESERVED; 61 /* 64k page */
199 g_ptel = ptel; 62 porder = 16;
200 63 } else if ((ptel & 0xff000) == 0) {
201 /* used later to detect if we might have been invalidated */ 64 /* 16M page */
202 mmu_seq = kvm->mmu_notifier_seq; 65 porder = 24;
203 smp_rmb(); 66 /* lowest AVA bit must be 0 for 16M pages */
204 67 if (pteh & 0x80)
205 /* Find the memslot (if any) for this address */ 68 return H_PARAMETER;
206 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 69 } else
207 gfn = gpa >> PAGE_SHIFT;
208 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
209 pa = 0;
210 is_io = ~0ul;
211 rmap = NULL;
212 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
213 /* PPC970 can't do emulated MMIO */
214 if (!cpu_has_feature(CPU_FTR_ARCH_206))
215 return H_PARAMETER; 70 return H_PARAMETER;
216 /* Emulated MMIO - mark this with key=31 */
217 pteh |= HPTE_V_ABSENT;
218 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
219 goto do_insert;
220 } 71 }
221 72 lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder;
222 /* Check if the requested page fits entirely in the memslot. */ 73 if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder)
223 if (!slot_is_aligned(memslot, psize))
224 return H_PARAMETER; 74 return H_PARAMETER;
225 slot_fn = gfn - memslot->base_gfn; 75 pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
226 rmap = &memslot->arch.rmap[slot_fn]; 76 if (!pa)
227
228 if (!kvm->arch.using_mmu_notifiers) {
229 physp = memslot->arch.slot_phys;
230 if (!physp)
231 return H_PARAMETER;
232 physp += slot_fn;
233 if (realmode)
234 physp = real_vmalloc_addr(physp);
235 pa = *physp;
236 if (!pa)
237 return H_TOO_HARD;
238 is_io = pa & (HPTE_R_I | HPTE_R_W);
239 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
240 pa &= PAGE_MASK;
241 } else {
242 /* Translate to host virtual address */
243 hva = __gfn_to_hva_memslot(memslot, gfn);
244
245 /* Look up the Linux PTE for the backing page */
246 pte_size = psize;
247 pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
248 if (pte_present(pte)) {
249 if (writing && !pte_write(pte))
250 /* make the actual HPTE be read-only */
251 ptel = hpte_make_readonly(ptel);
252 is_io = hpte_cache_bits(pte_val(pte));
253 pa = pte_pfn(pte) << PAGE_SHIFT;
254 }
255 }
256
257 if (pte_size < psize)
258 return H_PARAMETER; 77 return H_PARAMETER;
259 if (pa && pte_size > psize)
260 pa |= gpa & (pte_size - 1);
261
262 ptel &= ~(HPTE_R_PP0 - psize);
263 ptel |= pa;
264
265 if (pa)
266 pteh |= HPTE_V_VALID;
267 else
268 pteh |= HPTE_V_ABSENT;
269
270 /* Check WIMG */ 78 /* Check WIMG */
271 if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { 79 if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
272 if (is_io) 80 (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
273 return H_PARAMETER; 81 return H_PARAMETER;
274 /* 82 pteh &= ~0x60UL;
275 * Allow guest to map emulated device memory as 83 ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize);
276 * uncacheable, but actually make it cacheable. 84 ptel |= pa;
277 */ 85 if (pte_index >= (HPT_NPTEG << 3))
278 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
279 ptel |= HPTE_R_M;
280 }
281
282 /* Find and lock the HPTEG slot to use */
283 do_insert:
284 if (pte_index >= kvm->arch.hpt_npte)
285 return H_PARAMETER; 86 return H_PARAMETER;
286 if (likely((flags & H_EXACT) == 0)) { 87 if (likely((flags & H_EXACT) == 0)) {
287 pte_index &= ~7UL; 88 pte_index &= ~7UL;
288 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 89 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
289 for (i = 0; i < 8; ++i) { 90 for (i = 0; ; ++i) {
91 if (i == 8)
92 return H_PTEG_FULL;
290 if ((*hpte & HPTE_V_VALID) == 0 && 93 if ((*hpte & HPTE_V_VALID) == 0 &&
291 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 94 lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
292 HPTE_V_ABSENT))
293 break; 95 break;
294 hpte += 2; 96 hpte += 2;
295 } 97 }
296 if (i == 8) {
297 /*
298 * Since try_lock_hpte doesn't retry (not even stdcx.
299 * failures), it could be that there is a free slot
300 * but we transiently failed to lock it. Try again,
301 * actually locking each slot and checking it.
302 */
303 hpte -= 16;
304 for (i = 0; i < 8; ++i) {
305 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
306 cpu_relax();
307 if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
308 break;
309 *hpte &= ~HPTE_V_HVLOCK;
310 hpte += 2;
311 }
312 if (i == 8)
313 return H_PTEG_FULL;
314 }
315 pte_index += i;
316 } else { 98 } else {
99 i = 0;
317 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 100 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
318 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 101 if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
319 HPTE_V_ABSENT)) { 102 return H_PTEG_FULL;
320 /* Lock the slot and check again */
321 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
322 cpu_relax();
323 if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
324 *hpte &= ~HPTE_V_HVLOCK;
325 return H_PTEG_FULL;
326 }
327 }
328 } 103 }
329
330 /* Save away the guest's idea of the second HPTE dword */
331 rev = &kvm->arch.revmap[pte_index];
332 if (realmode)
333 rev = real_vmalloc_addr(rev);
334 if (rev) {
335 rev->guest_rpte = g_ptel;
336 note_hpte_modification(kvm, rev);
337 }
338
339 /* Link HPTE into reverse-map chain */
340 if (pteh & HPTE_V_VALID) {
341 if (realmode)
342 rmap = real_vmalloc_addr(rmap);
343 lock_rmap(rmap);
344 /* Check for pending invalidations under the rmap chain lock */
345 if (kvm->arch.using_mmu_notifiers &&
346 mmu_notifier_retry(kvm, mmu_seq)) {
347 /* inval in progress, write a non-present HPTE */
348 pteh |= HPTE_V_ABSENT;
349 pteh &= ~HPTE_V_VALID;
350 unlock_rmap(rmap);
351 } else {
352 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
353 realmode);
354 /* Only set R/C in real HPTE if already set in *rmap */
355 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
356 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
357 }
358 }
359
360 hpte[1] = ptel; 104 hpte[1] = ptel;
361
362 /* Write the first HPTE dword, unlocking the HPTE and making it valid */
363 eieio(); 105 eieio();
364 hpte[0] = pteh; 106 hpte[0] = pteh;
365 asm volatile("ptesync" : : : "memory"); 107 asm volatile("ptesync" : : : "memory");
366 108 atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt);
367 *pte_idx_ret = pte_index; 109 vcpu->arch.gpr[4] = pte_index + i;
368 return H_SUCCESS; 110 return H_SUCCESS;
369} 111}
370EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
371 112
372long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 113static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
373 long pte_index, unsigned long pteh, unsigned long ptel) 114 unsigned long pte_index)
374{ 115{
375 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 116 unsigned long rb, va_low;
376 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]); 117
118 rb = (v & ~0x7fUL) << 16; /* AVA field */
119 va_low = pte_index >> 3;
120 if (v & HPTE_V_SECONDARY)
121 va_low = ~va_low;
122 /* xor vsid from AVA */
123 if (!(v & HPTE_V_1TB_SEG))
124 va_low ^= v >> 12;
125 else
126 va_low ^= v >> 24;
127 va_low &= 0x7ff;
128 if (v & HPTE_V_LARGE) {
129 rb |= 1; /* L field */
130 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
131 (r & 0xff000)) {
132 /* non-16MB large page, must be 64k */
133 /* (masks depend on page size) */
134 rb |= 0x1000; /* page encoding in LP field */
135 rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
136 rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
137 }
138 } else {
139 /* 4kB page */
140 rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
141 }
142 rb |= (v >> 54) & 0x300; /* B field */
143 return rb;
377} 144}
378 145
379#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 146#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -396,182 +163,123 @@ static inline int try_lock_tlbie(unsigned int *lock)
396 return old == 0; 163 return old == 0;
397} 164}
398 165
399long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 166long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
400 unsigned long pte_index, unsigned long avpn, 167 unsigned long pte_index, unsigned long avpn,
401 unsigned long *hpret) 168 unsigned long va)
402{ 169{
170 struct kvm *kvm = vcpu->kvm;
403 unsigned long *hpte; 171 unsigned long *hpte;
404 unsigned long v, r, rb; 172 unsigned long v, r, rb;
405 struct revmap_entry *rev;
406 173
407 if (pte_index >= kvm->arch.hpt_npte) 174 if (pte_index >= (HPT_NPTEG << 3))
408 return H_PARAMETER; 175 return H_PARAMETER;
409 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 176 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
410 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 177 while (!lock_hpte(hpte, HPTE_V_HVLOCK))
411 cpu_relax(); 178 cpu_relax();
412 if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 179 if ((hpte[0] & HPTE_V_VALID) == 0 ||
413 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || 180 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
414 ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { 181 ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
415 hpte[0] &= ~HPTE_V_HVLOCK; 182 hpte[0] &= ~HPTE_V_HVLOCK;
416 return H_NOT_FOUND; 183 return H_NOT_FOUND;
417 } 184 }
418 185 if (atomic_read(&kvm->online_vcpus) == 1)
419 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 186 flags |= H_LOCAL;
420 v = hpte[0] & ~HPTE_V_HVLOCK; 187 vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
421 if (v & HPTE_V_VALID) { 188 vcpu->arch.gpr[5] = r = hpte[1];
422 hpte[0] &= ~HPTE_V_VALID; 189 rb = compute_tlbie_rb(v, r, pte_index);
423 rb = compute_tlbie_rb(v, hpte[1], pte_index); 190 hpte[0] = 0;
424 if (global_invalidates(kvm, flags)) { 191 if (!(flags & H_LOCAL)) {
425 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 192 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
426 cpu_relax(); 193 cpu_relax();
427 asm volatile("ptesync" : : : "memory"); 194 asm volatile("ptesync" : : : "memory");
428 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" 195 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
429 : : "r" (rb), "r" (kvm->arch.lpid)); 196 : : "r" (rb), "r" (kvm->arch.lpid));
430 asm volatile("ptesync" : : : "memory"); 197 asm volatile("ptesync" : : : "memory");
431 kvm->arch.tlbie_lock = 0; 198 kvm->arch.tlbie_lock = 0;
432 } else { 199 } else {
433 asm volatile("ptesync" : : : "memory"); 200 asm volatile("ptesync" : : : "memory");
434 asm volatile("tlbiel %0" : : "r" (rb)); 201 asm volatile("tlbiel %0" : : "r" (rb));
435 asm volatile("ptesync" : : : "memory"); 202 asm volatile("ptesync" : : : "memory");
436 }
437 /* Read PTE low word after tlbie to get final R/C values */
438 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
439 } 203 }
440 r = rev->guest_rpte & ~HPTE_GR_RESERVED;
441 note_hpte_modification(kvm, rev);
442 unlock_hpte(hpte, 0);
443
444 hpret[0] = v;
445 hpret[1] = r;
446 return H_SUCCESS; 204 return H_SUCCESS;
447} 205}
448EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
449
450long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
451 unsigned long pte_index, unsigned long avpn)
452{
453 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
454 &vcpu->arch.gpr[4]);
455}
456 206
457long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 207long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
458{ 208{
459 struct kvm *kvm = vcpu->kvm; 209 struct kvm *kvm = vcpu->kvm;
460 unsigned long *args = &vcpu->arch.gpr[4]; 210 unsigned long *args = &vcpu->arch.gpr[4];
461 unsigned long *hp, *hptes[4], tlbrb[4]; 211 unsigned long *hp, tlbrb[4];
462 long int i, j, k, n, found, indexes[4]; 212 long int i, found;
463 unsigned long flags, req, pte_index, rcbits; 213 long int n_inval = 0;
214 unsigned long flags, req, pte_index;
464 long int local = 0; 215 long int local = 0;
465 long int ret = H_SUCCESS; 216 long int ret = H_SUCCESS;
466 struct revmap_entry *rev, *revs[4];
467 217
468 if (atomic_read(&kvm->online_vcpus) == 1) 218 if (atomic_read(&kvm->online_vcpus) == 1)
469 local = 1; 219 local = 1;
470 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 220 for (i = 0; i < 4; ++i) {
471 n = 0; 221 pte_index = args[i * 2];
472 for (; i < 4; ++i) { 222 flags = pte_index >> 56;
473 j = i * 2; 223 pte_index &= ((1ul << 56) - 1);
474 pte_index = args[j]; 224 req = flags >> 6;
475 flags = pte_index >> 56; 225 flags &= 3;
476 pte_index &= ((1ul << 56) - 1); 226 if (req == 3)
477 req = flags >> 6; 227 break;
478 flags &= 3; 228 if (req != 1 || flags == 3 ||
479 if (req == 3) { /* no more requests */ 229 pte_index >= (HPT_NPTEG << 3)) {
480 i = 4; 230 /* parameter error */
231 args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
232 ret = H_PARAMETER;
233 break;
234 }
235 hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
236 while (!lock_hpte(hp, HPTE_V_HVLOCK))
237 cpu_relax();
238 found = 0;
239 if (hp[0] & HPTE_V_VALID) {
240 switch (flags & 3) {
241 case 0: /* absolute */
242 found = 1;
481 break; 243 break;
482 } 244 case 1: /* andcond */
483 if (req != 1 || flags == 3 || 245 if (!(hp[0] & args[i * 2 + 1]))
484 pte_index >= kvm->arch.hpt_npte) { 246 found = 1;
485 /* parameter error */
486 args[j] = ((0xa0 | flags) << 56) + pte_index;
487 ret = H_PARAMETER;
488 break; 247 break;
489 } 248 case 2: /* AVPN */
490 hp = (unsigned long *) 249 if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
491 (kvm->arch.hpt_virt + (pte_index << 4));
492 /* to avoid deadlock, don't spin except for first */
493 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
494 if (n)
495 break;
496 while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
497 cpu_relax();
498 }
499 found = 0;
500 if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
501 switch (flags & 3) {
502 case 0: /* absolute */
503 found = 1; 250 found = 1;
504 break; 251 break;
505 case 1: /* andcond */
506 if (!(hp[0] & args[j + 1]))
507 found = 1;
508 break;
509 case 2: /* AVPN */
510 if ((hp[0] & ~0x7fUL) == args[j + 1])
511 found = 1;
512 break;
513 }
514 }
515 if (!found) {
516 hp[0] &= ~HPTE_V_HVLOCK;
517 args[j] = ((0x90 | flags) << 56) + pte_index;
518 continue;
519 }
520
521 args[j] = ((0x80 | flags) << 56) + pte_index;
522 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
523 note_hpte_modification(kvm, rev);
524
525 if (!(hp[0] & HPTE_V_VALID)) {
526 /* insert R and C bits from PTE */
527 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
528 args[j] |= rcbits << (56 - 5);
529 hp[0] = 0;
530 continue;
531 } 252 }
532
533 hp[0] &= ~HPTE_V_VALID; /* leave it locked */
534 tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
535 indexes[n] = j;
536 hptes[n] = hp;
537 revs[n] = rev;
538 ++n;
539 }
540
541 if (!n)
542 break;
543
544 /* Now that we've collected a batch, do the tlbies */
545 if (!local) {
546 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
547 cpu_relax();
548 asm volatile("ptesync" : : : "memory");
549 for (k = 0; k < n; ++k)
550 asm volatile(PPC_TLBIE(%1,%0) : :
551 "r" (tlbrb[k]),
552 "r" (kvm->arch.lpid));
553 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
554 kvm->arch.tlbie_lock = 0;
555 } else {
556 asm volatile("ptesync" : : : "memory");
557 for (k = 0; k < n; ++k)
558 asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
559 asm volatile("ptesync" : : : "memory");
560 } 253 }
561 254 if (!found) {
562 /* Read PTE low words after tlbie to get final R/C values */ 255 hp[0] &= ~HPTE_V_HVLOCK;
563 for (k = 0; k < n; ++k) { 256 args[i * 2] = ((0x90 | flags) << 56) + pte_index;
564 j = indexes[k]; 257 continue;
565 pte_index = args[j] & ((1ul << 56) - 1);
566 hp = hptes[k];
567 rev = revs[k];
568 remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
569 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
570 args[j] |= rcbits << (56 - 5);
571 hp[0] = 0;
572 } 258 }
259 /* insert R and C bits from PTE */
260 flags |= (hp[1] >> 5) & 0x0c;
261 args[i * 2] = ((0x80 | flags) << 56) + pte_index;
262 tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
263 hp[0] = 0;
264 }
265 if (n_inval == 0)
266 return ret;
267
268 if (!local) {
269 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
270 cpu_relax();
271 asm volatile("ptesync" : : : "memory");
272 for (i = 0; i < n_inval; ++i)
273 asm volatile(PPC_TLBIE(%1,%0)
274 : : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
275 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
276 kvm->arch.tlbie_lock = 0;
277 } else {
278 asm volatile("ptesync" : : : "memory");
279 for (i = 0; i < n_inval; ++i)
280 asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
281 asm volatile("ptesync" : : : "memory");
573 } 282 }
574
575 return ret; 283 return ret;
576} 284}
577 285
@@ -581,76 +289,40 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
581{ 289{
582 struct kvm *kvm = vcpu->kvm; 290 struct kvm *kvm = vcpu->kvm;
583 unsigned long *hpte; 291 unsigned long *hpte;
584 struct revmap_entry *rev; 292 unsigned long v, r, rb;
585 unsigned long v, r, rb, mask, bits;
586 293
587 if (pte_index >= kvm->arch.hpt_npte) 294 if (pte_index >= (HPT_NPTEG << 3))
588 return H_PARAMETER; 295 return H_PARAMETER;
589
590 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 296 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
591 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 297 while (!lock_hpte(hpte, HPTE_V_HVLOCK))
592 cpu_relax(); 298 cpu_relax();
593 if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 299 if ((hpte[0] & HPTE_V_VALID) == 0 ||
594 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { 300 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
595 hpte[0] &= ~HPTE_V_HVLOCK; 301 hpte[0] &= ~HPTE_V_HVLOCK;
596 return H_NOT_FOUND; 302 return H_NOT_FOUND;
597 } 303 }
598 304 if (atomic_read(&kvm->online_vcpus) == 1)
305 flags |= H_LOCAL;
599 v = hpte[0]; 306 v = hpte[0];
600 bits = (flags << 55) & HPTE_R_PP0; 307 r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
601 bits |= (flags << 48) & HPTE_R_KEY_HI; 308 HPTE_R_KEY_HI | HPTE_R_KEY_LO);
602 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 309 r |= (flags << 55) & HPTE_R_PP0;
603 310 r |= (flags << 48) & HPTE_R_KEY_HI;
604 /* Update guest view of 2nd HPTE dword */ 311 r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
605 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 312 rb = compute_tlbie_rb(v, r, pte_index);
606 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 313 hpte[0] = v & ~HPTE_V_VALID;
607 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 314 if (!(flags & H_LOCAL)) {
608 if (rev) { 315 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
609 r = (rev->guest_rpte & ~mask) | bits; 316 cpu_relax();
610 rev->guest_rpte = r; 317 asm volatile("ptesync" : : : "memory");
611 note_hpte_modification(kvm, rev); 318 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
612 } 319 : : "r" (rb), "r" (kvm->arch.lpid));
613 r = (hpte[1] & ~mask) | bits; 320 asm volatile("ptesync" : : : "memory");
614 321 kvm->arch.tlbie_lock = 0;
615 /* Update HPTE */ 322 } else {
616 if (v & HPTE_V_VALID) { 323 asm volatile("ptesync" : : : "memory");
617 rb = compute_tlbie_rb(v, r, pte_index); 324 asm volatile("tlbiel %0" : : "r" (rb));
618 hpte[0] = v & ~HPTE_V_VALID; 325 asm volatile("ptesync" : : : "memory");
619 if (global_invalidates(kvm, flags)) {
620 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
621 cpu_relax();
622 asm volatile("ptesync" : : : "memory");
623 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
624 : : "r" (rb), "r" (kvm->arch.lpid));
625 asm volatile("ptesync" : : : "memory");
626 kvm->arch.tlbie_lock = 0;
627 } else {
628 asm volatile("ptesync" : : : "memory");
629 asm volatile("tlbiel %0" : : "r" (rb));
630 asm volatile("ptesync" : : : "memory");
631 }
632 /*
633 * If the host has this page as readonly but the guest
634 * wants to make it read/write, reduce the permissions.
635 * Checking the host permissions involves finding the
636 * memslot and then the Linux PTE for the page.
637 */
638 if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
639 unsigned long psize, gfn, hva;
640 struct kvm_memory_slot *memslot;
641 pgd_t *pgdir = vcpu->arch.pgdir;
642 pte_t pte;
643
644 psize = hpte_page_size(v, r);
645 gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
646 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
647 if (memslot) {
648 hva = __gfn_to_hva_memslot(memslot, gfn);
649 pte = lookup_linux_pte(pgdir, hva, 1, &psize);
650 if (pte_present(pte) && !pte_write(pte))
651 r = hpte_make_readonly(r);
652 }
653 }
654 } 326 }
655 hpte[1] = r; 327 hpte[1] = r;
656 eieio(); 328 eieio();
@@ -659,245 +331,40 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
659 return H_SUCCESS; 331 return H_SUCCESS;
660} 332}
661 333
334static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
335{
336 long int i;
337 unsigned long offset, rpn;
338
339 offset = realaddr & (kvm->arch.ram_psize - 1);
340 rpn = (realaddr - offset) >> PAGE_SHIFT;
341 for (i = 0; i < kvm->arch.ram_npages; ++i)
342 if (rpn == kvm->arch.ram_pginfo[i].pfn)
343 return (i << PAGE_SHIFT) + offset;
344 return HPTE_R_RPN; /* all 1s in the RPN field */
345}
346
662long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 347long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
663 unsigned long pte_index) 348 unsigned long pte_index)
664{ 349{
665 struct kvm *kvm = vcpu->kvm; 350 struct kvm *kvm = vcpu->kvm;
666 unsigned long *hpte, v, r; 351 unsigned long *hpte, r;
667 int i, n = 1; 352 int i, n = 1;
668 struct revmap_entry *rev = NULL;
669 353
670 if (pte_index >= kvm->arch.hpt_npte) 354 if (pte_index >= (HPT_NPTEG << 3))
671 return H_PARAMETER; 355 return H_PARAMETER;
672 if (flags & H_READ_4) { 356 if (flags & H_READ_4) {
673 pte_index &= ~3; 357 pte_index &= ~3;
674 n = 4; 358 n = 4;
675 } 359 }
676 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
677 for (i = 0; i < n; ++i, ++pte_index) { 360 for (i = 0; i < n; ++i, ++pte_index) {
678 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 361 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
679 v = hpte[0] & ~HPTE_V_HVLOCK;
680 r = hpte[1]; 362 r = hpte[1];
681 if (v & HPTE_V_ABSENT) { 363 if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID))
682 v &= ~HPTE_V_ABSENT; 364 r = reverse_xlate(kvm, r & HPTE_R_RPN) |
683 v |= HPTE_V_VALID; 365 (r & ~HPTE_R_RPN);
684 } 366 vcpu->arch.gpr[4 + i * 2] = hpte[0];
685 if (v & HPTE_V_VALID) {
686 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
687 r &= ~HPTE_GR_RESERVED;
688 }
689 vcpu->arch.gpr[4 + i * 2] = v;
690 vcpu->arch.gpr[5 + i * 2] = r; 367 vcpu->arch.gpr[5 + i * 2] = r;
691 } 368 }
692 return H_SUCCESS; 369 return H_SUCCESS;
693} 370}
694
695void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
696 unsigned long pte_index)
697{
698 unsigned long rb;
699
700 hptep[0] &= ~HPTE_V_VALID;
701 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
702 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
703 cpu_relax();
704 asm volatile("ptesync" : : : "memory");
705 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
706 : : "r" (rb), "r" (kvm->arch.lpid));
707 asm volatile("ptesync" : : : "memory");
708 kvm->arch.tlbie_lock = 0;
709}
710EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
711
712void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
713 unsigned long pte_index)
714{
715 unsigned long rb;
716 unsigned char rbyte;
717
718 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
719 rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
720 /* modify only the second-last byte, which contains the ref bit */
721 *((char *)hptep + 14) = rbyte;
722 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
723 cpu_relax();
724 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
725 : : "r" (rb), "r" (kvm->arch.lpid));
726 asm volatile("ptesync" : : : "memory");
727 kvm->arch.tlbie_lock = 0;
728}
729EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
730
731static int slb_base_page_shift[4] = {
732 24, /* 16M */
733 16, /* 64k */
734 34, /* 16G */
735 20, /* 1M, unsupported */
736};
737
738long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
739 unsigned long valid)
740{
741 unsigned int i;
742 unsigned int pshift;
743 unsigned long somask;
744 unsigned long vsid, hash;
745 unsigned long avpn;
746 unsigned long *hpte;
747 unsigned long mask, val;
748 unsigned long v, r;
749
750 /* Get page shift, work out hash and AVPN etc. */
751 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
752 val = 0;
753 pshift = 12;
754 if (slb_v & SLB_VSID_L) {
755 mask |= HPTE_V_LARGE;
756 val |= HPTE_V_LARGE;
757 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
758 }
759 if (slb_v & SLB_VSID_B_1T) {
760 somask = (1UL << 40) - 1;
761 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
762 vsid ^= vsid << 25;
763 } else {
764 somask = (1UL << 28) - 1;
765 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
766 }
767 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
768 avpn = slb_v & ~(somask >> 16); /* also includes B */
769 avpn |= (eaddr & somask) >> 16;
770
771 if (pshift >= 24)
772 avpn &= ~((1UL << (pshift - 16)) - 1);
773 else
774 avpn &= ~0x7fUL;
775 val |= avpn;
776
777 for (;;) {
778 hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
779
780 for (i = 0; i < 16; i += 2) {
781 /* Read the PTE racily */
782 v = hpte[i] & ~HPTE_V_HVLOCK;
783
784 /* Check valid/absent, hash, segment size and AVPN */
785 if (!(v & valid) || (v & mask) != val)
786 continue;
787
788 /* Lock the PTE and read it under the lock */
789 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
790 cpu_relax();
791 v = hpte[i] & ~HPTE_V_HVLOCK;
792 r = hpte[i+1];
793
794 /*
795 * Check the HPTE again, including large page size
796 * Since we don't currently allow any MPSS (mixed
797 * page-size segment) page sizes, it is sufficient
798 * to check against the actual page size.
799 */
800 if ((v & valid) && (v & mask) == val &&
801 hpte_page_size(v, r) == (1ul << pshift))
802 /* Return with the HPTE still locked */
803 return (hash << 3) + (i >> 1);
804
805 /* Unlock and move on */
806 hpte[i] = v;
807 }
808
809 if (val & HPTE_V_SECONDARY)
810 break;
811 val |= HPTE_V_SECONDARY;
812 hash = hash ^ kvm->arch.hpt_mask;
813 }
814 return -1;
815}
816EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
817
818/*
819 * Called in real mode to check whether an HPTE not found fault
820 * is due to accessing a paged-out page or an emulated MMIO page,
821 * or if a protection fault is due to accessing a page that the
822 * guest wanted read/write access to but which we made read-only.
823 * Returns a possibly modified status (DSISR) value if not
824 * (i.e. pass the interrupt to the guest),
825 * -1 to pass the fault up to host kernel mode code, -2 to do that
826 * and also load the instruction word (for MMIO emulation),
827 * or 0 if we should make the guest retry the access.
828 */
829long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
830 unsigned long slb_v, unsigned int status, bool data)
831{
832 struct kvm *kvm = vcpu->kvm;
833 long int index;
834 unsigned long v, r, gr;
835 unsigned long *hpte;
836 unsigned long valid;
837 struct revmap_entry *rev;
838 unsigned long pp, key;
839
840 /* For protection fault, expect to find a valid HPTE */
841 valid = HPTE_V_VALID;
842 if (status & DSISR_NOHPTE)
843 valid |= HPTE_V_ABSENT;
844
845 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
846 if (index < 0) {
847 if (status & DSISR_NOHPTE)
848 return status; /* there really was no HPTE */
849 return 0; /* for prot fault, HPTE disappeared */
850 }
851 hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
852 v = hpte[0] & ~HPTE_V_HVLOCK;
853 r = hpte[1];
854 rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
855 gr = rev->guest_rpte;
856
857 unlock_hpte(hpte, v);
858
859 /* For not found, if the HPTE is valid by now, retry the instruction */
860 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
861 return 0;
862
863 /* Check access permissions to the page */
864 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
865 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
866 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
867 if (!data) {
868 if (gr & (HPTE_R_N | HPTE_R_G))
869 return status | SRR1_ISI_N_OR_G;
870 if (!hpte_read_permission(pp, slb_v & key))
871 return status | SRR1_ISI_PROT;
872 } else if (status & DSISR_ISSTORE) {
873 /* check write permission */
874 if (!hpte_write_permission(pp, slb_v & key))
875 return status | DSISR_PROTFAULT;
876 } else {
877 if (!hpte_read_permission(pp, slb_v & key))
878 return status | DSISR_PROTFAULT;
879 }
880
881 /* Check storage key, if applicable */
882 if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
883 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
884 if (status & DSISR_ISSTORE)
885 perm >>= 1;
886 if (perm & 1)
887 return status | DSISR_KEYFAULT;
888 }
889
890 /* Save HPTE info for virtual-mode handler */
891 vcpu->arch.pgfault_addr = addr;
892 vcpu->arch.pgfault_index = index;
893 vcpu->arch.pgfault_hpte[0] = v;
894 vcpu->arch.pgfault_hpte[1] = r;
895
896 /* Check the storage key to see if it is possibly emulated MMIO */
897 if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
898 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
899 (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
900 return -2; /* MMIO emulation - load instr word */
901
902 return -1; /* send fault up to host kernel mode */
903}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 10b6c358dd7..de2950135e6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -20,14 +20,9 @@
20#include <asm/ppc_asm.h> 20#include <asm/ppc_asm.h>
21#include <asm/kvm_asm.h> 21#include <asm/kvm_asm.h>
22#include <asm/reg.h> 22#include <asm/reg.h>
23#include <asm/mmu.h>
24#include <asm/page.h> 23#include <asm/page.h>
25#include <asm/ptrace.h>
26#include <asm/hvcall.h>
27#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h> 25#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h>
30#include <asm/mmu-hash64.h>
31 26
32/***************************************************************************** 27/*****************************************************************************
33 * * 28 * *
@@ -54,7 +49,7 @@ kvmppc_skip_Hinterrupt:
54 b . 49 b .
55 50
56/* 51/*
57 * Call kvmppc_hv_entry in real mode. 52 * Call kvmppc_handler_trampoline_enter in real mode.
58 * Must be called with interrupts hard-disabled. 53 * Must be called with interrupts hard-disabled.
59 * 54 *
60 * Input Registers: 55 * Input Registers:
@@ -73,6 +68,9 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
73 mtsrr1 r6 68 mtsrr1 r6
74 RFI 69 RFI
75 70
71#define ULONG_SIZE 8
72#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
73
76/****************************************************************************** 74/******************************************************************************
77 * * 75 * *
78 * Entry code * 76 * Entry code *
@@ -81,7 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
81 79
82#define XICS_XIRR 4 80#define XICS_XIRR 4
83#define XICS_QIRR 0xc 81#define XICS_QIRR 0xc
84#define XICS_IPI 2 /* interrupt source # for IPIs */
85 82
86/* 83/*
87 * We come in here when wakened from nap mode on a secondary hw thread. 84 * We come in here when wakened from nap mode on a secondary hw thread.
@@ -92,59 +89,19 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
92kvm_start_guest: 89kvm_start_guest:
93 ld r1,PACAEMERGSP(r13) 90 ld r1,PACAEMERGSP(r13)
94 subi r1,r1,STACK_FRAME_OVERHEAD 91 subi r1,r1,STACK_FRAME_OVERHEAD
95 ld r2,PACATOC(r13)
96
97 li r0,KVM_HWTHREAD_IN_KVM
98 stb r0,HSTATE_HWTHREAD_STATE(r13)
99 92
100 /* NV GPR values from power7_idle() will no longer be valid */ 93 /* get vcpu pointer */
101 li r0,1 94 ld r4, HSTATE_KVM_VCPU(r13)
102 stb r0,PACA_NAPSTATELOST(r13)
103 95
104 /* get vcpu pointer, NULL if we have no vcpu to run */ 96 /* We got here with an IPI; clear it */
105 ld r4,HSTATE_KVM_VCPU(r13) 97 ld r5, HSTATE_XICS_PHYS(r13)
106 cmpdi cr1,r4,0 98 li r0, 0xff
107 99 li r6, XICS_QIRR
108 /* Check the wake reason in SRR1 to see why we got here */ 100 li r7, XICS_XIRR
109 mfspr r3,SPRN_SRR1 101 lwzcix r8, r5, r7 /* ack the interrupt */
110 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
111 cmpwi r3,4 /* was it an external interrupt? */
112 bne 27f
113
114 /*
115 * External interrupt - for now assume it is an IPI, since we
116 * should never get any other interrupts sent to offline threads.
117 * Only do this for secondary threads.
118 */
119 beq cr1,25f
120 lwz r3,VCPU_PTID(r4)
121 cmpwi r3,0
122 beq 27f
12325: ld r5,HSTATE_XICS_PHYS(r13)
124 li r0,0xff
125 li r6,XICS_QIRR
126 li r7,XICS_XIRR
127 lwzcix r8,r5,r7 /* get and ack the interrupt */
128 sync 102 sync
129 clrldi. r9,r8,40 /* get interrupt source ID. */ 103 stbcix r0, r5, r6 /* clear it */
130 beq 27f /* none there? */ 104 stwcix r8, r5, r7 /* EOI it */
131 cmpwi r9,XICS_IPI
132 bne 26f
133 stbcix r0,r5,r6 /* clear IPI */
13426: stwcix r8,r5,r7 /* EOI the interrupt */
135
13627: /* XXX should handle hypervisor maintenance interrupts etc. here */
137
138 /* reload vcpu pointer after clearing the IPI */
139 ld r4,HSTATE_KVM_VCPU(r13)
140 cmpdi r4,0
141 /* if we have no vcpu to run, go back to sleep */
142 beq kvm_no_guest
143
144 /* were we napping due to cede? */
145 lbz r0,HSTATE_NAPPING(r13)
146 cmpwi r0,0
147 bne kvm_end_cede
148 105
149.global kvmppc_hv_entry 106.global kvmppc_hv_entry
150kvmppc_hv_entry: 107kvmppc_hv_entry:
@@ -160,15 +117,24 @@ kvmppc_hv_entry:
160 mflr r0 117 mflr r0
161 std r0, HSTATE_VMHANDLER(r13) 118 std r0, HSTATE_VMHANDLER(r13)
162 119
163 /* Set partition DABR */ 120 ld r14, VCPU_GPR(r14)(r4)
164 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ 121 ld r15, VCPU_GPR(r15)(r4)
165 li r5,3 122 ld r16, VCPU_GPR(r16)(r4)
166 ld r6,VCPU_DABR(r4) 123 ld r17, VCPU_GPR(r17)(r4)
167 mtspr SPRN_DABRX,r5 124 ld r18, VCPU_GPR(r18)(r4)
168 mtspr SPRN_DABR,r6 125 ld r19, VCPU_GPR(r19)(r4)
169BEGIN_FTR_SECTION 126 ld r20, VCPU_GPR(r20)(r4)
170 isync 127 ld r21, VCPU_GPR(r21)(r4)
171END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 128 ld r22, VCPU_GPR(r22)(r4)
129 ld r23, VCPU_GPR(r23)(r4)
130 ld r24, VCPU_GPR(r24)(r4)
131 ld r25, VCPU_GPR(r25)(r4)
132 ld r26, VCPU_GPR(r26)(r4)
133 ld r27, VCPU_GPR(r27)(r4)
134 ld r28, VCPU_GPR(r28)(r4)
135 ld r29, VCPU_GPR(r29)(r4)
136 ld r30, VCPU_GPR(r30)(r4)
137 ld r31, VCPU_GPR(r31)(r4)
172 138
173 /* Load guest PMU registers */ 139 /* Load guest PMU registers */
174 /* R4 is live here (vcpu pointer) */ 140 /* R4 is live here (vcpu pointer) */
@@ -207,25 +173,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
207 /* Load up FP, VMX and VSX registers */ 173 /* Load up FP, VMX and VSX registers */
208 bl kvmppc_load_fp 174 bl kvmppc_load_fp
209 175
210 ld r14, VCPU_GPR(R14)(r4)
211 ld r15, VCPU_GPR(R15)(r4)
212 ld r16, VCPU_GPR(R16)(r4)
213 ld r17, VCPU_GPR(R17)(r4)
214 ld r18, VCPU_GPR(R18)(r4)
215 ld r19, VCPU_GPR(R19)(r4)
216 ld r20, VCPU_GPR(R20)(r4)
217 ld r21, VCPU_GPR(R21)(r4)
218 ld r22, VCPU_GPR(R22)(r4)
219 ld r23, VCPU_GPR(R23)(r4)
220 ld r24, VCPU_GPR(R24)(r4)
221 ld r25, VCPU_GPR(R25)(r4)
222 ld r26, VCPU_GPR(R26)(r4)
223 ld r27, VCPU_GPR(R27)(r4)
224 ld r28, VCPU_GPR(R28)(r4)
225 ld r29, VCPU_GPR(R29)(r4)
226 ld r30, VCPU_GPR(R30)(r4)
227 ld r31, VCPU_GPR(R31)(r4)
228
229BEGIN_FTR_SECTION 176BEGIN_FTR_SECTION
230 /* Switch DSCR to guest value */ 177 /* Switch DSCR to guest value */
231 ld r5, VCPU_DSCR(r4) 178 ld r5, VCPU_DSCR(r4)
@@ -267,6 +214,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
267 mtspr SPRN_DAR, r5 214 mtspr SPRN_DAR, r5
268 mtspr SPRN_DSISR, r6 215 mtspr SPRN_DSISR, r6
269 216
217 /* Set partition DABR */
218 li r5,3
219 ld r6,VCPU_DABR(r4)
220 mtspr SPRN_DABRX,r5
221 mtspr SPRN_DABR,r6
222
270BEGIN_FTR_SECTION 223BEGIN_FTR_SECTION
271 /* Restore AMR and UAMOR, set AMOR to all 1s */ 224 /* Restore AMR and UAMOR, set AMOR to all 1s */
272 ld r5,VCPU_AMR(r4) 225 ld r5,VCPU_AMR(r4)
@@ -314,33 +267,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
314 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 267 mtspr SPRN_SDR1,r6 /* switch to partition page table */
315 mtspr SPRN_LPID,r7 268 mtspr SPRN_LPID,r7
316 isync 269 isync
317
318 /* See if we need to flush the TLB */
319 lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
320 clrldi r7,r6,64-6 /* extract bit number (6 bits) */
321 srdi r6,r6,6 /* doubleword number */
322 sldi r6,r6,3 /* address offset */
323 add r6,r6,r9
324 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
325 li r0,1 270 li r0,1
326 sld r0,r0,r7
327 ld r7,0(r6)
328 and. r7,r7,r0
329 beq 22f
33023: ldarx r7,0,r6 /* if set, clear the bit */
331 andc r7,r7,r0
332 stdcx. r7,0,r6
333 bne 23b
334 li r6,128 /* and flush the TLB */
335 mtctr r6
336 li r7,0x800 /* IS field = 0b10 */
337 ptesync
33828: tlbiel r7
339 addi r7,r7,0x1000
340 bdnz 28b
341 ptesync
342
34322: li r0,1
344 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 271 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
345 b 10f 272 b 10f
346 273
@@ -349,9 +276,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
349 cmpwi r0,0 276 cmpwi r0,0
350 beq 20b 277 beq 20b
351 278
352 /* Set LPCR and RMOR. */ 279 /* Set LPCR. Set the MER bit if there is a pending external irq. */
35310: ld r8,KVM_LPCR(r9) 28010: ld r8,KVM_LPCR(r9)
354 mtspr SPRN_LPCR,r8 281 ld r0,VCPU_PENDING_EXC(r4)
282 li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
283 oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
284 and. r0,r0,r7
285 beq 11f
286 ori r8,r8,LPCR_MER
28711: mtspr SPRN_LPCR,r8
355 ld r8,KVM_RMOR(r9) 288 ld r8,KVM_RMOR(r9)
356 mtspr SPRN_RMOR,r8 289 mtspr SPRN_RMOR,r8
357 isync 290 isync
@@ -363,6 +296,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
363 mr r9,r4 296 mr r9,r4
364 blt hdec_soon 297 blt hdec_soon
365 298
299 /*
300 * Invalidate the TLB if we could possibly have stale TLB
301 * entries for this partition on this core due to the use
302 * of tlbiel.
303 * XXX maybe only need this on primary thread?
304 */
305 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
306 lwz r5,VCPU_VCPUID(r4)
307 lhz r6,PACAPACAINDEX(r13)
308 rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
309 lhz r8,VCPU_LAST_CPU(r4)
310 sldi r7,r6,1 /* see if this is the same vcpu */
311 add r7,r7,r9 /* as last ran on this pcpu */
312 lhz r0,KVM_LAST_VCPU(r7)
313 cmpw r6,r8 /* on the same cpu core as last time? */
314 bne 3f
315 cmpw r0,r5 /* same vcpu as this core last ran? */
316 beq 1f
3173: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
318 sth r5,KVM_LAST_VCPU(r7)
319 li r6,128
320 mtctr r6
321 li r7,0x800 /* IS field = 0b10 */
322 ptesync
3232: tlbiel r7
324 addi r7,r7,0x1000
325 bdnz 2b
326 ptesync
3271:
328
366 /* Save purr/spurr */ 329 /* Save purr/spurr */
367 mfspr r5,SPRN_PURR 330 mfspr r5,SPRN_PURR
368 mfspr r6,SPRN_SPURR 331 mfspr r6,SPRN_SPURR
@@ -485,50 +448,19 @@ toc_tlbie_lock:
485 mtctr r6 448 mtctr r6
486 mtxer r7 449 mtxer r7
487 450
488kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ 451 /* Move SRR0 and SRR1 into the respective regs */
489 ld r6, VCPU_SRR0(r4) 452 ld r6, VCPU_SRR0(r4)
490 ld r7, VCPU_SRR1(r4) 453 ld r7, VCPU_SRR1(r4)
454 mtspr SPRN_SRR0, r6
455 mtspr SPRN_SRR1, r7
456
491 ld r10, VCPU_PC(r4) 457 ld r10, VCPU_PC(r4)
492 ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
493 458
459 ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */
494 rldicl r11, r11, 63 - MSR_HV_LG, 1 460 rldicl r11, r11, 63 - MSR_HV_LG, 1
495 rotldi r11, r11, 1 + MSR_HV_LG 461 rotldi r11, r11, 1 + MSR_HV_LG
496 ori r11, r11, MSR_ME 462 ori r11, r11, MSR_ME
497 463
498 /* Check if we can deliver an external or decrementer interrupt now */
499 ld r0,VCPU_PENDING_EXC(r4)
500 li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
501 oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
502 and r0,r0,r8
503 cmpdi cr1,r0,0
504 andi. r0,r11,MSR_EE
505 beq cr1,11f
506BEGIN_FTR_SECTION
507 mfspr r8,SPRN_LPCR
508 ori r8,r8,LPCR_MER
509 mtspr SPRN_LPCR,r8
510 isync
511END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
512 beq 5f
513 li r0,BOOK3S_INTERRUPT_EXTERNAL
51412: mr r6,r10
515 mr r10,r0
516 mr r7,r11
517 li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
518 rotldi r11,r11,63
519 b 5f
52011: beq 5f
521 mfspr r0,SPRN_DEC
522 cmpwi r0,0
523 li r0,BOOK3S_INTERRUPT_DECREMENTER
524 blt 12b
525
526 /* Move SRR0 and SRR1 into the respective regs */
5275: mtspr SPRN_SRR0, r6
528 mtspr SPRN_SRR1, r7
529 li r0,0
530 stb r0,VCPU_CEDED(r4) /* cancel cede */
531
532fast_guest_return: 464fast_guest_return:
533 mtspr SPRN_HSRR0,r10 465 mtspr SPRN_HSRR0,r10
534 mtspr SPRN_HSRR1,r11 466 mtspr SPRN_HSRR1,r11
@@ -544,21 +476,21 @@ fast_guest_return:
544 mtlr r5 476 mtlr r5
545 mtcr r6 477 mtcr r6
546 478
547 ld r0, VCPU_GPR(R0)(r4) 479 ld r0, VCPU_GPR(r0)(r4)
548 ld r1, VCPU_GPR(R1)(r4) 480 ld r1, VCPU_GPR(r1)(r4)
549 ld r2, VCPU_GPR(R2)(r4) 481 ld r2, VCPU_GPR(r2)(r4)
550 ld r3, VCPU_GPR(R3)(r4) 482 ld r3, VCPU_GPR(r3)(r4)
551 ld r5, VCPU_GPR(R5)(r4) 483 ld r5, VCPU_GPR(r5)(r4)
552 ld r6, VCPU_GPR(R6)(r4) 484 ld r6, VCPU_GPR(r6)(r4)
553 ld r7, VCPU_GPR(R7)(r4) 485 ld r7, VCPU_GPR(r7)(r4)
554 ld r8, VCPU_GPR(R8)(r4) 486 ld r8, VCPU_GPR(r8)(r4)
555 ld r9, VCPU_GPR(R9)(r4) 487 ld r9, VCPU_GPR(r9)(r4)
556 ld r10, VCPU_GPR(R10)(r4) 488 ld r10, VCPU_GPR(r10)(r4)
557 ld r11, VCPU_GPR(R11)(r4) 489 ld r11, VCPU_GPR(r11)(r4)
558 ld r12, VCPU_GPR(R12)(r4) 490 ld r12, VCPU_GPR(r12)(r4)
559 ld r13, VCPU_GPR(R13)(r4) 491 ld r13, VCPU_GPR(r13)(r4)
560 492
561 ld r4, VCPU_GPR(R4)(r4) 493 ld r4, VCPU_GPR(r4)(r4)
562 494
563 hrfid 495 hrfid
564 b . 496 b .
@@ -587,22 +519,22 @@ kvmppc_interrupt:
587 519
588 /* Save registers */ 520 /* Save registers */
589 521
590 std r0, VCPU_GPR(R0)(r9) 522 std r0, VCPU_GPR(r0)(r9)
591 std r1, VCPU_GPR(R1)(r9) 523 std r1, VCPU_GPR(r1)(r9)
592 std r2, VCPU_GPR(R2)(r9) 524 std r2, VCPU_GPR(r2)(r9)
593 std r3, VCPU_GPR(R3)(r9) 525 std r3, VCPU_GPR(r3)(r9)
594 std r4, VCPU_GPR(R4)(r9) 526 std r4, VCPU_GPR(r4)(r9)
595 std r5, VCPU_GPR(R5)(r9) 527 std r5, VCPU_GPR(r5)(r9)
596 std r6, VCPU_GPR(R6)(r9) 528 std r6, VCPU_GPR(r6)(r9)
597 std r7, VCPU_GPR(R7)(r9) 529 std r7, VCPU_GPR(r7)(r9)
598 std r8, VCPU_GPR(R8)(r9) 530 std r8, VCPU_GPR(r8)(r9)
599 ld r0, HSTATE_HOST_R2(r13) 531 ld r0, HSTATE_HOST_R2(r13)
600 std r0, VCPU_GPR(R9)(r9) 532 std r0, VCPU_GPR(r9)(r9)
601 std r10, VCPU_GPR(R10)(r9) 533 std r10, VCPU_GPR(r10)(r9)
602 std r11, VCPU_GPR(R11)(r9) 534 std r11, VCPU_GPR(r11)(r9)
603 ld r3, HSTATE_SCRATCH0(r13) 535 ld r3, HSTATE_SCRATCH0(r13)
604 lwz r4, HSTATE_SCRATCH1(r13) 536 lwz r4, HSTATE_SCRATCH1(r13)
605 std r3, VCPU_GPR(R12)(r9) 537 std r3, VCPU_GPR(r12)(r9)
606 stw r4, VCPU_CR(r9) 538 stw r4, VCPU_CR(r9)
607 539
608 /* Restore R1/R2 so we can handle faults */ 540 /* Restore R1/R2 so we can handle faults */
@@ -623,7 +555,7 @@ kvmppc_interrupt:
623 555
624 GET_SCRATCH0(r3) 556 GET_SCRATCH0(r3)
625 mflr r4 557 mflr r4
626 std r3, VCPU_GPR(R13)(r9) 558 std r3, VCPU_GPR(r13)(r9)
627 std r4, VCPU_LR(r9) 559 std r4, VCPU_LR(r9)
628 560
629 /* Unset guest mode */ 561 /* Unset guest mode */
@@ -632,30 +564,6 @@ kvmppc_interrupt:
632 564
633 stw r12,VCPU_TRAP(r9) 565 stw r12,VCPU_TRAP(r9)
634 566
635 /* Save HEIR (HV emulation assist reg) in last_inst
636 if this is an HEI (HV emulation interrupt, e40) */
637 li r3,KVM_INST_FETCH_FAILED
638BEGIN_FTR_SECTION
639 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
640 bne 11f
641 mfspr r3,SPRN_HEIR
642END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
64311: stw r3,VCPU_LAST_INST(r9)
644
645 /* these are volatile across C function calls */
646 mfctr r3
647 mfxer r4
648 std r3, VCPU_CTR(r9)
649 stw r4, VCPU_XER(r9)
650
651BEGIN_FTR_SECTION
652 /* If this is a page table miss then see if it's theirs or ours */
653 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
654 beq kvmppc_hdsi
655 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
656 beq kvmppc_hisi
657END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
658
659 /* See if this is a leftover HDEC interrupt */ 567 /* See if this is a leftover HDEC interrupt */
660 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 568 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
661 bne 2f 569 bne 2f
@@ -663,23 +571,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
663 cmpwi r3,0 571 cmpwi r3,0
664 bge ignore_hdec 572 bge ignore_hdec
6652: 5732:
666 /* See if this is an hcall we can handle in real mode */ 574 /* See if this is something we can handle in real mode */
667 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL 575 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
668 beq hcall_try_real_mode 576 beq hcall_try_real_mode
577hcall_real_cont:
669 578
670 /* Check for mediated interrupts (could be done earlier really ...) */ 579 /* Check for mediated interrupts (could be done earlier really ...) */
671BEGIN_FTR_SECTION 580BEGIN_FTR_SECTION
672 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL 581 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
673 bne+ 1f 582 bne+ 1f
583 ld r5,VCPU_KVM(r9)
584 ld r5,KVM_LPCR(r5)
674 andi. r0,r11,MSR_EE 585 andi. r0,r11,MSR_EE
675 beq 1f 586 beq 1f
676 mfspr r5,SPRN_LPCR
677 andi. r0,r5,LPCR_MER 587 andi. r0,r5,LPCR_MER
678 bne bounce_ext_interrupt 588 bne bounce_ext_interrupt
6791: 5891:
680END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 590END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
681 591
682guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
683 /* Save DEC */ 592 /* Save DEC */
684 mfspr r5,SPRN_DEC 593 mfspr r5,SPRN_DEC
685 mftb r6 594 mftb r6
@@ -687,26 +596,36 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
687 add r5,r5,r6 596 add r5,r5,r6
688 std r5,VCPU_DEC_EXPIRES(r9) 597 std r5,VCPU_DEC_EXPIRES(r9)
689 598
599 /* Save HEIR (HV emulation assist reg) in last_inst
600 if this is an HEI (HV emulation interrupt, e40) */
601 li r3,-1
602BEGIN_FTR_SECTION
603 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
604 bne 11f
605 mfspr r3,SPRN_HEIR
606END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
60711: stw r3,VCPU_LAST_INST(r9)
608
690 /* Save more register state */ 609 /* Save more register state */
610 mfxer r5
691 mfdar r6 611 mfdar r6
692 mfdsisr r7 612 mfdsisr r7
613 mfctr r8
614
615 stw r5, VCPU_XER(r9)
693 std r6, VCPU_DAR(r9) 616 std r6, VCPU_DAR(r9)
694 stw r7, VCPU_DSISR(r9) 617 stw r7, VCPU_DSISR(r9)
618 std r8, VCPU_CTR(r9)
619 /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */
695BEGIN_FTR_SECTION 620BEGIN_FTR_SECTION
696 /* don't overwrite fault_dar/fault_dsisr if HDSI */
697 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE 621 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
698 beq 6f 622 beq 6f
699END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 623END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
700 std r6, VCPU_FAULT_DAR(r9) 6247: std r6, VCPU_FAULT_DAR(r9)
701 stw r7, VCPU_FAULT_DSISR(r9) 625 stw r7, VCPU_FAULT_DSISR(r9)
702 626
703 /* See if it is a machine check */
704 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
705 beq machine_check_realmode
706mc_cont:
707
708 /* Save guest CTRL register, set runlatch to 1 */ 627 /* Save guest CTRL register, set runlatch to 1 */
7096: mfspr r6,SPRN_CTRLF 628 mfspr r6,SPRN_CTRLF
710 stw r6,VCPU_CTRL(r9) 629 stw r6,VCPU_CTRL(r9)
711 andi. r0,r6,1 630 andi. r0,r6,1
712 bne 4f 631 bne 4f
@@ -763,7 +682,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
763 slbia 682 slbia
764 ptesync 683 ptesync
765 684
766hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */ 685hdec_soon:
767BEGIN_FTR_SECTION 686BEGIN_FTR_SECTION
768 b 32f 687 b 32f
769END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 688END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
@@ -781,7 +700,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
781 addi r0,r3,0x100 700 addi r0,r3,0x100
782 stwcx. r0,0,r6 701 stwcx. r0,0,r6
783 bne 41b 702 bne 41b
784 lwsync
785 703
786 /* 704 /*
787 * At this point we have an interrupt that we have to pass 705 * At this point we have an interrupt that we have to pass
@@ -795,39 +713,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
795 * interrupt, since the other threads will already be on their 713 * interrupt, since the other threads will already be on their
796 * way here in that case. 714 * way here in that case.
797 */ 715 */
798 cmpwi r3,0x100 /* Are we the first here? */
799 bge 43f
800 cmpwi r3,1 /* Are any other threads in the guest? */
801 ble 43f
802 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 716 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
803 beq 40f 717 beq 40f
718 cmpwi r3,0x100 /* Are we the first here? */
719 bge 40f
720 cmpwi r3,1
721 ble 40f
804 li r0,0 722 li r0,0
805 mtspr SPRN_HDEC,r0 723 mtspr SPRN_HDEC,r0
80640: 72440:
807 /*
808 * Send an IPI to any napping threads, since an HDEC interrupt
809 * doesn't wake CPUs up from nap.
810 */
811 lwz r3,VCORE_NAPPING_THREADS(r5)
812 lwz r4,VCPU_PTID(r9)
813 li r0,1
814 sld r0,r0,r4
815 andc. r3,r3,r0 /* no sense IPI'ing ourselves */
816 beq 43f
817 mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
818 subf r6,r4,r13
81942: andi. r0,r3,1
820 beq 44f
821 ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
822 li r0,IPI_PRIORITY
823 li r7,XICS_QIRR
824 stbcix r0,r7,r8 /* trigger the IPI */
82544: srdi. r3,r3,1
826 addi r6,r6,PACA_SIZE
827 bne 42b
828 725
829 /* Secondary threads wait for primary to do partition switch */ 726 /* Secondary threads wait for primary to do partition switch */
83043: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ 727 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
831 ld r5,HSTATE_KVM_VCORE(r13) 728 ld r5,HSTATE_KVM_VCORE(r13)
832 lwz r3,VCPU_PTID(r9) 729 lwz r3,VCPU_PTID(r9)
833 cmpwi r3,0 730 cmpwi r3,0
@@ -960,6 +857,12 @@ BEGIN_FTR_SECTION
960 mtspr SPRN_AMR,r6 857 mtspr SPRN_AMR,r6
961END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 858END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
962 859
860 /* Restore host DABR and DABRX */
861 ld r5,HSTATE_DABR(r13)
862 li r6,7
863 mtspr SPRN_DABR,r5
864 mtspr SPRN_DABRX,r6
865
963 /* Switch DSCR back to host value */ 866 /* Switch DSCR back to host value */
964BEGIN_FTR_SECTION 867BEGIN_FTR_SECTION
965 mfspr r8, SPRN_DSCR 868 mfspr r8, SPRN_DSCR
@@ -969,24 +872,24 @@ BEGIN_FTR_SECTION
969END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 872END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
970 873
971 /* Save non-volatile GPRs */ 874 /* Save non-volatile GPRs */
972 std r14, VCPU_GPR(R14)(r9) 875 std r14, VCPU_GPR(r14)(r9)
973 std r15, VCPU_GPR(R15)(r9) 876 std r15, VCPU_GPR(r15)(r9)
974 std r16, VCPU_GPR(R16)(r9) 877 std r16, VCPU_GPR(r16)(r9)
975 std r17, VCPU_GPR(R17)(r9) 878 std r17, VCPU_GPR(r17)(r9)
976 std r18, VCPU_GPR(R18)(r9) 879 std r18, VCPU_GPR(r18)(r9)
977 std r19, VCPU_GPR(R19)(r9) 880 std r19, VCPU_GPR(r19)(r9)
978 std r20, VCPU_GPR(R20)(r9) 881 std r20, VCPU_GPR(r20)(r9)
979 std r21, VCPU_GPR(R21)(r9) 882 std r21, VCPU_GPR(r21)(r9)
980 std r22, VCPU_GPR(R22)(r9) 883 std r22, VCPU_GPR(r22)(r9)
981 std r23, VCPU_GPR(R23)(r9) 884 std r23, VCPU_GPR(r23)(r9)
982 std r24, VCPU_GPR(R24)(r9) 885 std r24, VCPU_GPR(r24)(r9)
983 std r25, VCPU_GPR(R25)(r9) 886 std r25, VCPU_GPR(r25)(r9)
984 std r26, VCPU_GPR(R26)(r9) 887 std r26, VCPU_GPR(r26)(r9)
985 std r27, VCPU_GPR(R27)(r9) 888 std r27, VCPU_GPR(r27)(r9)
986 std r28, VCPU_GPR(R28)(r9) 889 std r28, VCPU_GPR(r28)(r9)
987 std r29, VCPU_GPR(R29)(r9) 890 std r29, VCPU_GPR(r29)(r9)
988 std r30, VCPU_GPR(R30)(r9) 891 std r30, VCPU_GPR(r30)(r9)
989 std r31, VCPU_GPR(R31)(r9) 892 std r31, VCPU_GPR(r31)(r9)
990 893
991 /* Save SPRGs */ 894 /* Save SPRGs */
992 mfspr r3, SPRN_SPRG0 895 mfspr r3, SPRN_SPRG0
@@ -998,10 +901,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
998 std r5, VCPU_SPRG2(r9) 901 std r5, VCPU_SPRG2(r9)
999 std r6, VCPU_SPRG3(r9) 902 std r6, VCPU_SPRG3(r9)
1000 903
1001 /* save FP state */
1002 mr r3, r9
1003 bl .kvmppc_save_fp
1004
1005 /* Increment yield count if they have a VPA */ 904 /* Increment yield count if they have a VPA */
1006 ld r8, VCPU_VPA(r9) /* do they have a VPA? */ 905 ld r8, VCPU_VPA(r9) /* do they have a VPA? */
1007 cmpdi r8, 0 906 cmpdi r8, 0
@@ -1016,12 +915,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1016 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 915 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
1017 mfspr r4, SPRN_MMCR0 /* save MMCR0 */ 916 mfspr r4, SPRN_MMCR0 /* save MMCR0 */
1018 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ 917 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
1019 mfspr r6, SPRN_MMCRA
1020BEGIN_FTR_SECTION
1021 /* On P7, clear MMCRA in order to disable SDAR updates */
1022 li r7, 0
1023 mtspr SPRN_MMCRA, r7
1024END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1025 isync 918 isync
1026 beq 21f /* if no VPA, save PMU stuff anyway */ 919 beq 21f /* if no VPA, save PMU stuff anyway */
1027 lbz r7, LPPACA_PMCINUSE(r8) 920 lbz r7, LPPACA_PMCINUSE(r8)
@@ -1030,6 +923,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1030 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ 923 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
1031 b 22f 924 b 22f
103221: mfspr r5, SPRN_MMCR1 92521: mfspr r5, SPRN_MMCR1
926 mfspr r6, SPRN_MMCRA
1033 std r4, VCPU_MMCR(r9) 927 std r4, VCPU_MMCR(r9)
1034 std r5, VCPU_MMCR + 8(r9) 928 std r5, VCPU_MMCR + 8(r9)
1035 std r6, VCPU_MMCR + 16(r9) 929 std r6, VCPU_MMCR + 16(r9)
@@ -1054,24 +948,17 @@ BEGIN_FTR_SECTION
1054 stw r11, VCPU_PMC + 28(r9) 948 stw r11, VCPU_PMC + 28(r9)
1055END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 949END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
105622: 95022:
951 /* save FP state */
952 mr r3, r9
953 bl .kvmppc_save_fp
1057 954
1058 /* Secondary threads go off to take a nap on POWER7 */ 955 /* Secondary threads go off to take a nap on POWER7 */
1059BEGIN_FTR_SECTION 956BEGIN_FTR_SECTION
1060 lwz r0,VCPU_PTID(r9) 957 lwz r0,VCPU_PTID(r3)
1061 cmpwi r0,0 958 cmpwi r0,0
1062 bne secondary_nap 959 bne secondary_nap
1063END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 960END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1064 961
1065 /* Restore host DABR and DABRX */
1066 ld r5,HSTATE_DABR(r13)
1067 li r6,7
1068 mtspr SPRN_DABR,r5
1069 mtspr SPRN_DABRX,r6
1070
1071 /* Restore SPRG3 */
1072 ld r3,PACA_SPRG3(r13)
1073 mtspr SPRN_SPRG3,r3
1074
1075 /* 962 /*
1076 * Reload DEC. HDEC interrupts were disabled when 963 * Reload DEC. HDEC interrupts were disabled when
1077 * we reloaded the host's LPCR value. 964 * we reloaded the host's LPCR value.
@@ -1117,162 +1004,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1117 /* 1004 /*
1118 * For external and machine check interrupts, we need 1005 * For external and machine check interrupts, we need
1119 * to call the Linux handler to process the interrupt. 1006 * to call the Linux handler to process the interrupt.
1120 * We do that by jumping to absolute address 0x500 for 1007 * We do that by jumping to the interrupt vector address
1121 * external interrupts, or the machine_check_fwnmi label 1008 * which we have in r12. The [h]rfid at the end of the
1122 * for machine checks (since firmware might have patched
1123 * the vector area at 0x200). The [h]rfid at the end of the
1124 * handler will return to the book3s_hv_interrupts.S code. 1009 * handler will return to the book3s_hv_interrupts.S code.
1125 * For other interrupts we do the rfid to get back 1010 * For other interrupts we do the rfid to get back
1126 * to the book3s_hv_interrupts.S code here. 1011 * to the book3s_interrupts.S code here.
1127 */ 1012 */
1128 ld r8, HSTATE_VMHANDLER(r13) 1013 ld r8, HSTATE_VMHANDLER(r13)
1129 ld r7, HSTATE_HOST_MSR(r13) 1014 ld r7, HSTATE_HOST_MSR(r13)
1130 1015
1131 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1132 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1016 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1133BEGIN_FTR_SECTION
1134 beq 11f 1017 beq 11f
1135END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1018 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1136 1019
1137 /* RFI into the highmem handler, or branch to interrupt handler */ 1020 /* RFI into the highmem handler, or branch to interrupt handler */
1138 mfmsr r6 102112: mfmsr r6
1022 mtctr r12
1139 li r0, MSR_RI 1023 li r0, MSR_RI
1140 andc r6, r6, r0 1024 andc r6, r6, r0
1141 mtmsrd r6, 1 /* Clear RI in MSR */ 1025 mtmsrd r6, 1 /* Clear RI in MSR */
1142 mtsrr0 r8 1026 mtsrr0 r8
1143 mtsrr1 r7 1027 mtsrr1 r7
1144 beqa 0x500 /* external interrupt (PPC970) */ 1028 beqctr
1145 beq cr1, 13f /* machine check */
1146 RFI 1029 RFI
1147 1030
1148 /* On POWER7, we have external interrupts set to use HSRR0/1 */ 103111:
114911: mtspr SPRN_HSRR0, r8 1032BEGIN_FTR_SECTION
1033 b 12b
1034END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1035 mtspr SPRN_HSRR0, r8
1150 mtspr SPRN_HSRR1, r7 1036 mtspr SPRN_HSRR1, r7
1151 ba 0x500 1037 ba 0x500
1152 1038
115313: b machine_check_fwnmi 10396: mfspr r6,SPRN_HDAR
1154 1040 mfspr r7,SPRN_HDSISR
1155/* 1041 b 7b
1156 * Check whether an HDSI is an HPTE not found fault or something else.
1157 * If it is an HPTE not found fault that is due to the guest accessing
1158 * a page that they have mapped but which we have paged out, then
1159 * we continue on with the guest exit path. In all other cases,
1160 * reflect the HDSI to the guest as a DSI.
1161 */
1162kvmppc_hdsi:
1163 mfspr r4, SPRN_HDAR
1164 mfspr r6, SPRN_HDSISR
1165 /* HPTE not found fault or protection fault? */
1166 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
1167 beq 1f /* if not, send it to the guest */
1168 andi. r0, r11, MSR_DR /* data relocation enabled? */
1169 beq 3f
1170 clrrdi r0, r4, 28
1171 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
1172 bne 1f /* if no SLB entry found */
11734: std r4, VCPU_FAULT_DAR(r9)
1174 stw r6, VCPU_FAULT_DSISR(r9)
1175
1176 /* Search the hash table. */
1177 mr r3, r9 /* vcpu pointer */
1178 li r7, 1 /* data fault */
1179 bl .kvmppc_hpte_hv_fault
1180 ld r9, HSTATE_KVM_VCPU(r13)
1181 ld r10, VCPU_PC(r9)
1182 ld r11, VCPU_MSR(r9)
1183 li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1184 cmpdi r3, 0 /* retry the instruction */
1185 beq 6f
1186 cmpdi r3, -1 /* handle in kernel mode */
1187 beq guest_exit_cont
1188 cmpdi r3, -2 /* MMIO emulation; need instr word */
1189 beq 2f
1190
1191 /* Synthesize a DSI for the guest */
1192 ld r4, VCPU_FAULT_DAR(r9)
1193 mr r6, r3
11941: mtspr SPRN_DAR, r4
1195 mtspr SPRN_DSISR, r6
1196 mtspr SPRN_SRR0, r10
1197 mtspr SPRN_SRR1, r11
1198 li r10, BOOK3S_INTERRUPT_DATA_STORAGE
1199 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1200 rotldi r11, r11, 63
1201fast_interrupt_c_return:
12026: ld r7, VCPU_CTR(r9)
1203 lwz r8, VCPU_XER(r9)
1204 mtctr r7
1205 mtxer r8
1206 mr r4, r9
1207 b fast_guest_return
1208
12093: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */
1210 ld r5, KVM_VRMA_SLB_V(r5)
1211 b 4b
1212
1213 /* If this is for emulated MMIO, load the instruction word */
12142: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */
1215
1216 /* Set guest mode to 'jump over instruction' so if lwz faults
1217 * we'll just continue at the next IP. */
1218 li r0, KVM_GUEST_MODE_SKIP
1219 stb r0, HSTATE_IN_GUEST(r13)
1220
1221 /* Do the access with MSR:DR enabled */
1222 mfmsr r3
1223 ori r4, r3, MSR_DR /* Enable paging for data */
1224 mtmsrd r4
1225 lwz r8, 0(r10)
1226 mtmsrd r3
1227
1228 /* Store the result */
1229 stw r8, VCPU_LAST_INST(r9)
1230
1231 /* Unset guest mode. */
1232 li r0, KVM_GUEST_MODE_NONE
1233 stb r0, HSTATE_IN_GUEST(r13)
1234 b guest_exit_cont
1235
1236/*
1237 * Similarly for an HISI, reflect it to the guest as an ISI unless
1238 * it is an HPTE not found fault for a page that we have paged out.
1239 */
1240kvmppc_hisi:
1241 andis. r0, r11, SRR1_ISI_NOPT@h
1242 beq 1f
1243 andi. r0, r11, MSR_IR /* instruction relocation enabled? */
1244 beq 3f
1245 clrrdi r0, r10, 28
1246 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
1247 bne 1f /* if no SLB entry found */
12484:
1249 /* Search the hash table. */
1250 mr r3, r9 /* vcpu pointer */
1251 mr r4, r10
1252 mr r6, r11
1253 li r7, 0 /* instruction fault */
1254 bl .kvmppc_hpte_hv_fault
1255 ld r9, HSTATE_KVM_VCPU(r13)
1256 ld r10, VCPU_PC(r9)
1257 ld r11, VCPU_MSR(r9)
1258 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1259 cmpdi r3, 0 /* retry the instruction */
1260 beq fast_interrupt_c_return
1261 cmpdi r3, -1 /* handle in kernel mode */
1262 beq guest_exit_cont
1263
1264 /* Synthesize an ISI for the guest */
1265 mr r11, r3
12661: mtspr SPRN_SRR0, r10
1267 mtspr SPRN_SRR1, r11
1268 li r10, BOOK3S_INTERRUPT_INST_STORAGE
1269 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1270 rotldi r11, r11, 63
1271 b fast_interrupt_c_return
1272
12733: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
1274 ld r5, KVM_VRMA_SLB_V(r6)
1275 b 4b
1276 1042
1277/* 1043/*
1278 * Try to handle an hcall in real mode. 1044 * Try to handle an hcall in real mode.
@@ -1282,25 +1048,25 @@ kvmppc_hisi:
1282 */ 1048 */
1283 .globl hcall_try_real_mode 1049 .globl hcall_try_real_mode
1284hcall_try_real_mode: 1050hcall_try_real_mode:
1285 ld r3,VCPU_GPR(R3)(r9) 1051 ld r3,VCPU_GPR(r3)(r9)
1286 andi. r0,r11,MSR_PR 1052 andi. r0,r11,MSR_PR
1287 bne guest_exit_cont 1053 bne hcall_real_cont
1288 clrrdi r3,r3,2 1054 clrrdi r3,r3,2
1289 cmpldi r3,hcall_real_table_end - hcall_real_table 1055 cmpldi r3,hcall_real_table_end - hcall_real_table
1290 bge guest_exit_cont 1056 bge hcall_real_cont
1291 LOAD_REG_ADDR(r4, hcall_real_table) 1057 LOAD_REG_ADDR(r4, hcall_real_table)
1292 lwzx r3,r3,r4 1058 lwzx r3,r3,r4
1293 cmpwi r3,0 1059 cmpwi r3,0
1294 beq guest_exit_cont 1060 beq hcall_real_cont
1295 add r3,r3,r4 1061 add r3,r3,r4
1296 mtctr r3 1062 mtctr r3
1297 mr r3,r9 /* get vcpu pointer */ 1063 mr r3,r9 /* get vcpu pointer */
1298 ld r4,VCPU_GPR(R4)(r9) 1064 ld r4,VCPU_GPR(r4)(r9)
1299 bctrl 1065 bctrl
1300 cmpdi r3,H_TOO_HARD 1066 cmpdi r3,H_TOO_HARD
1301 beq hcall_real_fallback 1067 beq hcall_real_fallback
1302 ld r4,HSTATE_KVM_VCPU(r13) 1068 ld r4,HSTATE_KVM_VCPU(r13)
1303 std r3,VCPU_GPR(R3)(r4) 1069 std r3,VCPU_GPR(r3)(r4)
1304 ld r10,VCPU_PC(r4) 1070 ld r10,VCPU_PC(r4)
1305 ld r11,VCPU_MSR(r4) 1071 ld r11,VCPU_MSR(r4)
1306 b fast_guest_return 1072 b fast_guest_return
@@ -1311,8 +1077,9 @@ hcall_try_real_mode:
1311hcall_real_fallback: 1077hcall_real_fallback:
1312 li r12,BOOK3S_INTERRUPT_SYSCALL 1078 li r12,BOOK3S_INTERRUPT_SYSCALL
1313 ld r9, HSTATE_KVM_VCPU(r13) 1079 ld r9, HSTATE_KVM_VCPU(r13)
1080 ld r11, VCPU_MSR(r9)
1314 1081
1315 b guest_exit_cont 1082 b hcall_real_cont
1316 1083
1317 .globl hcall_real_table 1084 .globl hcall_real_table
1318hcall_real_table: 1085hcall_real_table:
@@ -1372,7 +1139,7 @@ hcall_real_table:
1372 .long 0 /* 0xd4 */ 1139 .long 0 /* 0xd4 */
1373 .long 0 /* 0xd8 */ 1140 .long 0 /* 0xd8 */
1374 .long 0 /* 0xdc */ 1141 .long 0 /* 0xdc */
1375 .long .kvmppc_h_cede - hcall_real_table 1142 .long 0 /* 0xe0 */
1376 .long 0 /* 0xe4 */ 1143 .long 0 /* 0xe4 */
1377 .long 0 /* 0xe8 */ 1144 .long 0 /* 0xe8 */
1378 .long 0 /* 0xec */ 1145 .long 0 /* 0xec */
@@ -1401,191 +1168,15 @@ bounce_ext_interrupt:
1401 mtspr SPRN_SRR0,r10 1168 mtspr SPRN_SRR0,r10
1402 mtspr SPRN_SRR1,r11 1169 mtspr SPRN_SRR1,r11
1403 li r10,BOOK3S_INTERRUPT_EXTERNAL 1170 li r10,BOOK3S_INTERRUPT_EXTERNAL
1404 li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ 1171 LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME);
1405 rotldi r11,r11,63
1406 b fast_guest_return 1172 b fast_guest_return
1407 1173
1408_GLOBAL(kvmppc_h_set_dabr) 1174_GLOBAL(kvmppc_h_set_dabr)
1409 std r4,VCPU_DABR(r3) 1175 std r4,VCPU_DABR(r3)
1410 /* Work around P7 bug where DABR can get corrupted on mtspr */ 1176 mtspr SPRN_DABR,r4
14111: mtspr SPRN_DABR,r4
1412 mfspr r5, SPRN_DABR
1413 cmpd r4, r5
1414 bne 1b
1415 isync
1416 li r3,0 1177 li r3,0
1417 blr 1178 blr
1418 1179
1419_GLOBAL(kvmppc_h_cede)
1420 ori r11,r11,MSR_EE
1421 std r11,VCPU_MSR(r3)
1422 li r0,1
1423 stb r0,VCPU_CEDED(r3)
1424 sync /* order setting ceded vs. testing prodded */
1425 lbz r5,VCPU_PRODDED(r3)
1426 cmpwi r5,0
1427 bne kvm_cede_prodded
1428 li r0,0 /* set trap to 0 to say hcall is handled */
1429 stw r0,VCPU_TRAP(r3)
1430 li r0,H_SUCCESS
1431 std r0,VCPU_GPR(R3)(r3)
1432BEGIN_FTR_SECTION
1433 b kvm_cede_exit /* just send it up to host on 970 */
1434END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1435
1436 /*
1437 * Set our bit in the bitmask of napping threads unless all the
1438 * other threads are already napping, in which case we send this
1439 * up to the host.
1440 */
1441 ld r5,HSTATE_KVM_VCORE(r13)
1442 lwz r6,VCPU_PTID(r3)
1443 lwz r8,VCORE_ENTRY_EXIT(r5)
1444 clrldi r8,r8,56
1445 li r0,1
1446 sld r0,r0,r6
1447 addi r6,r5,VCORE_NAPPING_THREADS
144831: lwarx r4,0,r6
1449 or r4,r4,r0
1450 PPC_POPCNTW(R7,R4)
1451 cmpw r7,r8
1452 bge kvm_cede_exit
1453 stwcx. r4,0,r6
1454 bne 31b
1455 li r0,1
1456 stb r0,HSTATE_NAPPING(r13)
1457 /* order napping_threads update vs testing entry_exit_count */
1458 lwsync
1459 mr r4,r3
1460 lwz r7,VCORE_ENTRY_EXIT(r5)
1461 cmpwi r7,0x100
1462 bge 33f /* another thread already exiting */
1463
1464/*
1465 * Although not specifically required by the architecture, POWER7
1466 * preserves the following registers in nap mode, even if an SMT mode
1467 * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
1468 * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
1469 */
1470 /* Save non-volatile GPRs */
1471 std r14, VCPU_GPR(R14)(r3)
1472 std r15, VCPU_GPR(R15)(r3)
1473 std r16, VCPU_GPR(R16)(r3)
1474 std r17, VCPU_GPR(R17)(r3)
1475 std r18, VCPU_GPR(R18)(r3)
1476 std r19, VCPU_GPR(R19)(r3)
1477 std r20, VCPU_GPR(R20)(r3)
1478 std r21, VCPU_GPR(R21)(r3)
1479 std r22, VCPU_GPR(R22)(r3)
1480 std r23, VCPU_GPR(R23)(r3)
1481 std r24, VCPU_GPR(R24)(r3)
1482 std r25, VCPU_GPR(R25)(r3)
1483 std r26, VCPU_GPR(R26)(r3)
1484 std r27, VCPU_GPR(R27)(r3)
1485 std r28, VCPU_GPR(R28)(r3)
1486 std r29, VCPU_GPR(R29)(r3)
1487 std r30, VCPU_GPR(R30)(r3)
1488 std r31, VCPU_GPR(R31)(r3)
1489
1490 /* save FP state */
1491 bl .kvmppc_save_fp
1492
1493 /*
1494 * Take a nap until a decrementer or external interrupt occurs,
1495 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
1496 */
1497 li r0,1
1498 stb r0,HSTATE_HWTHREAD_REQ(r13)
1499 mfspr r5,SPRN_LPCR
1500 ori r5,r5,LPCR_PECE0 | LPCR_PECE1
1501 mtspr SPRN_LPCR,r5
1502 isync
1503 li r0, 0
1504 std r0, HSTATE_SCRATCH0(r13)
1505 ptesync
1506 ld r0, HSTATE_SCRATCH0(r13)
15071: cmpd r0, r0
1508 bne 1b
1509 nap
1510 b .
1511
1512kvm_end_cede:
1513 /* Woken by external or decrementer interrupt */
1514 ld r1, HSTATE_HOST_R1(r13)
1515
1516 /* load up FP state */
1517 bl kvmppc_load_fp
1518
1519 /* Load NV GPRS */
1520 ld r14, VCPU_GPR(R14)(r4)
1521 ld r15, VCPU_GPR(R15)(r4)
1522 ld r16, VCPU_GPR(R16)(r4)
1523 ld r17, VCPU_GPR(R17)(r4)
1524 ld r18, VCPU_GPR(R18)(r4)
1525 ld r19, VCPU_GPR(R19)(r4)
1526 ld r20, VCPU_GPR(R20)(r4)
1527 ld r21, VCPU_GPR(R21)(r4)
1528 ld r22, VCPU_GPR(R22)(r4)
1529 ld r23, VCPU_GPR(R23)(r4)
1530 ld r24, VCPU_GPR(R24)(r4)
1531 ld r25, VCPU_GPR(R25)(r4)
1532 ld r26, VCPU_GPR(R26)(r4)
1533 ld r27, VCPU_GPR(R27)(r4)
1534 ld r28, VCPU_GPR(R28)(r4)
1535 ld r29, VCPU_GPR(R29)(r4)
1536 ld r30, VCPU_GPR(R30)(r4)
1537 ld r31, VCPU_GPR(R31)(r4)
1538
1539 /* clear our bit in vcore->napping_threads */
154033: ld r5,HSTATE_KVM_VCORE(r13)
1541 lwz r3,VCPU_PTID(r4)
1542 li r0,1
1543 sld r0,r0,r3
1544 addi r6,r5,VCORE_NAPPING_THREADS
154532: lwarx r7,0,r6
1546 andc r7,r7,r0
1547 stwcx. r7,0,r6
1548 bne 32b
1549 li r0,0
1550 stb r0,HSTATE_NAPPING(r13)
1551
1552 /* see if any other thread is already exiting */
1553 lwz r0,VCORE_ENTRY_EXIT(r5)
1554 cmpwi r0,0x100
1555 blt kvmppc_cede_reentry /* if not go back to guest */
1556
1557 /* some threads are exiting, so go to the guest exit path */
1558 b hcall_real_fallback
1559
1560 /* cede when already previously prodded case */
1561kvm_cede_prodded:
1562 li r0,0
1563 stb r0,VCPU_PRODDED(r3)
1564 sync /* order testing prodded vs. clearing ceded */
1565 stb r0,VCPU_CEDED(r3)
1566 li r3,H_SUCCESS
1567 blr
1568
1569 /* we've ceded but we want to give control to the host */
1570kvm_cede_exit:
1571 li r3,H_TOO_HARD
1572 blr
1573
1574 /* Try to handle a machine check in real mode */
1575machine_check_realmode:
1576 mr r3, r9 /* get vcpu pointer */
1577 bl .kvmppc_realmode_machine_check
1578 nop
1579 cmpdi r3, 0 /* continue exiting from guest? */
1580 ld r9, HSTATE_KVM_VCPU(r13)
1581 li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1582 beq mc_cont
1583 /* If not, deliver a machine check. SRR0/1 are already set */
1584 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
1585 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1586 rotldi r11, r11, 63
1587 b fast_interrupt_c_return
1588
1589secondary_too_late: 1180secondary_too_late:
1590 ld r5,HSTATE_KVM_VCORE(r13) 1181 ld r5,HSTATE_KVM_VCORE(r13)
1591 HMT_LOW 1182 HMT_LOW
@@ -1603,24 +1194,14 @@ secondary_too_late:
1603 slbmte r6,r5 1194 slbmte r6,r5
16041: addi r11,r11,16 11951: addi r11,r11,16
1605 .endr 1196 .endr
1197 b 50f
1606 1198
1607secondary_nap: 1199secondary_nap:
1608 /* Clear our vcpu pointer so we don't come back in early */ 1200 /* Clear any pending IPI */
1609 li r0, 0 120150: ld r5, HSTATE_XICS_PHYS(r13)
1610 std r0, HSTATE_KVM_VCPU(r13)
1611 lwsync
1612 /* Clear any pending IPI - assume we're a secondary thread */
1613 ld r5, HSTATE_XICS_PHYS(r13)
1614 li r7, XICS_XIRR
1615 lwzcix r3, r5, r7 /* ack any pending interrupt */
1616 rlwinm. r0, r3, 0, 0xffffff /* any pending? */
1617 beq 37f
1618 sync
1619 li r0, 0xff 1202 li r0, 0xff
1620 li r6, XICS_QIRR 1203 li r6, XICS_QIRR
1621 stbcix r0, r5, r6 /* clear the IPI */ 1204 stbcix r0, r5, r6
1622 stwcix r3, r5, r7 /* EOI it */
162337: sync
1624 1205
1625 /* increment the nap count and then go to nap mode */ 1206 /* increment the nap count and then go to nap mode */
1626 ld r4, HSTATE_KVM_VCORE(r13) 1207 ld r4, HSTATE_KVM_VCORE(r13)
@@ -1630,16 +1211,14 @@ secondary_nap:
1630 addi r3, r3, 1 1211 addi r3, r3, 1
1631 stwcx. r3, 0, r4 1212 stwcx. r3, 0, r4
1632 bne 51b 1213 bne 51b
1214 isync
1633 1215
1634kvm_no_guest:
1635 li r0, KVM_HWTHREAD_IN_NAP
1636 stb r0, HSTATE_HWTHREAD_STATE(r13)
1637
1638 li r3, LPCR_PECE0
1639 mfspr r4, SPRN_LPCR 1216 mfspr r4, SPRN_LPCR
1640 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 1217 li r0, LPCR_PECE
1218 andc r4, r4, r0
1219 ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
1641 mtspr SPRN_LPCR, r4 1220 mtspr SPRN_LPCR, r4
1642 isync 1221 li r0, 0
1643 std r0, HSTATE_SCRATCH0(r13) 1222 std r0, HSTATE_SCRATCH0(r13)
1644 ptesync 1223 ptesync
1645 ld r0, HSTATE_SCRATCH0(r13) 1224 ld r0, HSTATE_SCRATCH0(r13)
@@ -1653,8 +1232,8 @@ kvm_no_guest:
1653 * r3 = vcpu pointer 1232 * r3 = vcpu pointer
1654 */ 1233 */
1655_GLOBAL(kvmppc_save_fp) 1234_GLOBAL(kvmppc_save_fp)
1656 mfmsr r5 1235 mfmsr r9
1657 ori r8,r5,MSR_FP 1236 ori r8,r9,MSR_FP
1658#ifdef CONFIG_ALTIVEC 1237#ifdef CONFIG_ALTIVEC
1659BEGIN_FTR_SECTION 1238BEGIN_FTR_SECTION
1660 oris r8,r8,MSR_VEC@h 1239 oris r8,r8,MSR_VEC@h
@@ -1672,7 +1251,7 @@ BEGIN_FTR_SECTION
1672 reg = 0 1251 reg = 0
1673 .rept 32 1252 .rept 32
1674 li r6,reg*16+VCPU_VSRS 1253 li r6,reg*16+VCPU_VSRS
1675 STXVD2X(reg,R6,R3) 1254 STXVD2X(reg,r6,r3)
1676 reg = reg + 1 1255 reg = reg + 1
1677 .endr 1256 .endr
1678FTR_SECTION_ELSE 1257FTR_SECTION_ELSE
@@ -1703,7 +1282,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1703#endif 1282#endif
1704 mfspr r6,SPRN_VRSAVE 1283 mfspr r6,SPRN_VRSAVE
1705 stw r6,VCPU_VRSAVE(r3) 1284 stw r6,VCPU_VRSAVE(r3)
1706 mtmsrd r5 1285 mtmsrd r9
1707 isync 1286 isync
1708 blr 1287 blr
1709 1288
@@ -1734,7 +1313,7 @@ BEGIN_FTR_SECTION
1734 reg = 0 1313 reg = 0
1735 .rept 32 1314 .rept 32
1736 li r7,reg*16+VCPU_VSRS 1315 li r7,reg*16+VCPU_VSRS
1737 LXVD2X(reg,R7,R4) 1316 LXVD2X(reg,r7,r4)
1738 reg = reg + 1 1317 reg = reg + 1
1739 .endr 1318 .endr
1740FTR_SECTION_ELSE 1319FTR_SECTION_ELSE
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 48cbbf86295..c54b0e30cf3 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -25,30 +25,54 @@
25#include <asm/exception-64s.h> 25#include <asm/exception-64s.h>
26 26
27#if defined(CONFIG_PPC_BOOK3S_64) 27#if defined(CONFIG_PPC_BOOK3S_64)
28
29#define ULONG_SIZE 8
28#define FUNC(name) GLUE(.,name) 30#define FUNC(name) GLUE(.,name)
31
32#define GET_SHADOW_VCPU_R13
33
34#define DISABLE_INTERRUPTS \
35 mfmsr r0; \
36 rldicl r0,r0,48,1; \
37 rotldi r0,r0,16; \
38 mtmsrd r0,1; \
39
29#elif defined(CONFIG_PPC_BOOK3S_32) 40#elif defined(CONFIG_PPC_BOOK3S_32)
41
42#define ULONG_SIZE 4
30#define FUNC(name) name 43#define FUNC(name) name
44
45#define GET_SHADOW_VCPU_R13 \
46 lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2)
47
48#define DISABLE_INTERRUPTS \
49 mfmsr r0; \
50 rlwinm r0,r0,0,17,15; \
51 mtmsr r0; \
52
31#endif /* CONFIG_PPC_BOOK3S_XX */ 53#endif /* CONFIG_PPC_BOOK3S_XX */
32 54
55
56#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
33#define VCPU_LOAD_NVGPRS(vcpu) \ 57#define VCPU_LOAD_NVGPRS(vcpu) \
34 PPC_LL r14, VCPU_GPR(R14)(vcpu); \ 58 PPC_LL r14, VCPU_GPR(r14)(vcpu); \
35 PPC_LL r15, VCPU_GPR(R15)(vcpu); \ 59 PPC_LL r15, VCPU_GPR(r15)(vcpu); \
36 PPC_LL r16, VCPU_GPR(R16)(vcpu); \ 60 PPC_LL r16, VCPU_GPR(r16)(vcpu); \
37 PPC_LL r17, VCPU_GPR(R17)(vcpu); \ 61 PPC_LL r17, VCPU_GPR(r17)(vcpu); \
38 PPC_LL r18, VCPU_GPR(R18)(vcpu); \ 62 PPC_LL r18, VCPU_GPR(r18)(vcpu); \
39 PPC_LL r19, VCPU_GPR(R19)(vcpu); \ 63 PPC_LL r19, VCPU_GPR(r19)(vcpu); \
40 PPC_LL r20, VCPU_GPR(R20)(vcpu); \ 64 PPC_LL r20, VCPU_GPR(r20)(vcpu); \
41 PPC_LL r21, VCPU_GPR(R21)(vcpu); \ 65 PPC_LL r21, VCPU_GPR(r21)(vcpu); \
42 PPC_LL r22, VCPU_GPR(R22)(vcpu); \ 66 PPC_LL r22, VCPU_GPR(r22)(vcpu); \
43 PPC_LL r23, VCPU_GPR(R23)(vcpu); \ 67 PPC_LL r23, VCPU_GPR(r23)(vcpu); \
44 PPC_LL r24, VCPU_GPR(R24)(vcpu); \ 68 PPC_LL r24, VCPU_GPR(r24)(vcpu); \
45 PPC_LL r25, VCPU_GPR(R25)(vcpu); \ 69 PPC_LL r25, VCPU_GPR(r25)(vcpu); \
46 PPC_LL r26, VCPU_GPR(R26)(vcpu); \ 70 PPC_LL r26, VCPU_GPR(r26)(vcpu); \
47 PPC_LL r27, VCPU_GPR(R27)(vcpu); \ 71 PPC_LL r27, VCPU_GPR(r27)(vcpu); \
48 PPC_LL r28, VCPU_GPR(R28)(vcpu); \ 72 PPC_LL r28, VCPU_GPR(r28)(vcpu); \
49 PPC_LL r29, VCPU_GPR(R29)(vcpu); \ 73 PPC_LL r29, VCPU_GPR(r29)(vcpu); \
50 PPC_LL r30, VCPU_GPR(R30)(vcpu); \ 74 PPC_LL r30, VCPU_GPR(r30)(vcpu); \
51 PPC_LL r31, VCPU_GPR(R31)(vcpu); \ 75 PPC_LL r31, VCPU_GPR(r31)(vcpu); \
52 76
53/***************************************************************************** 77/*****************************************************************************
54 * * 78 * *
@@ -76,10 +100,6 @@ kvm_start_entry:
76 /* Save non-volatile registers (r14 - r31) */ 100 /* Save non-volatile registers (r14 - r31) */
77 SAVE_NVGPRS(r1) 101 SAVE_NVGPRS(r1)
78 102
79 /* Save CR */
80 mfcr r14
81 stw r14, _CCR(r1)
82
83 /* Save LR */ 103 /* Save LR */
84 PPC_STL r0, _LINK(r1) 104 PPC_STL r0, _LINK(r1)
85 105
@@ -88,17 +108,44 @@ kvm_start_entry:
88 108
89kvm_start_lightweight: 109kvm_start_lightweight:
90 110
111 GET_SHADOW_VCPU_R13
112 PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
113 PPC_STL r3, HSTATE_VMHANDLER(r13)
114
115 PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
116
117 DISABLE_INTERRUPTS
118
91#ifdef CONFIG_PPC_BOOK3S_64 119#ifdef CONFIG_PPC_BOOK3S_64
120 /* Some guests may need to have dcbz set to 32 byte length.
121 *
122 * Usually we ensure that by patching the guest's instructions
123 * to trap on dcbz and emulate it in the hypervisor.
124 *
125 * If we can, we should tell the CPU to use 32 byte dcbz though,
126 * because that's a lot faster.
127 */
128
92 PPC_LL r3, VCPU_HFLAGS(r4) 129 PPC_LL r3, VCPU_HFLAGS(r4)
93 rldicl r3, r3, 0, 63 /* r3 &= 1 */ 130 rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */
94 stb r3, HSTATE_RESTORE_HID5(r13) 131 beq no_dcbz32_on
132
133 mfspr r3,SPRN_HID5
134 ori r3, r3, 0x80 /* XXX HID5_dcbz32 = 0x80 */
135 mtspr SPRN_HID5,r3
136
137no_dcbz32_on:
138
95#endif /* CONFIG_PPC_BOOK3S_64 */ 139#endif /* CONFIG_PPC_BOOK3S_64 */
96 140
97 PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ 141 PPC_LL r6, VCPU_RMCALL(r4)
142 mtctr r6
143
144 PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4)
145 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
98 146
99 /* Jump to segment patching handler and into our guest */ 147 /* Jump to segment patching handler and into our guest */
100 bl FUNC(kvmppc_entry_trampoline) 148 bctr
101 nop
102 149
103/* 150/*
104 * This is the handler in module memory. It gets jumped at from the 151 * This is the handler in module memory. It gets jumped at from the
@@ -123,24 +170,100 @@ kvmppc_handler_highmem:
123 /* R7 = vcpu */ 170 /* R7 = vcpu */
124 PPC_LL r7, GPR4(r1) 171 PPC_LL r7, GPR4(r1)
125 172
126 PPC_STL r14, VCPU_GPR(R14)(r7) 173#ifdef CONFIG_PPC_BOOK3S_64
127 PPC_STL r15, VCPU_GPR(R15)(r7) 174
128 PPC_STL r16, VCPU_GPR(R16)(r7) 175 PPC_LL r5, VCPU_HFLAGS(r7)
129 PPC_STL r17, VCPU_GPR(R17)(r7) 176 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
130 PPC_STL r18, VCPU_GPR(R18)(r7) 177 beq no_dcbz32_off
131 PPC_STL r19, VCPU_GPR(R19)(r7) 178
132 PPC_STL r20, VCPU_GPR(R20)(r7) 179 li r4, 0
133 PPC_STL r21, VCPU_GPR(R21)(r7) 180 mfspr r5,SPRN_HID5
134 PPC_STL r22, VCPU_GPR(R22)(r7) 181 rldimi r5,r4,6,56
135 PPC_STL r23, VCPU_GPR(R23)(r7) 182 mtspr SPRN_HID5,r5
136 PPC_STL r24, VCPU_GPR(R24)(r7) 183
137 PPC_STL r25, VCPU_GPR(R25)(r7) 184no_dcbz32_off:
138 PPC_STL r26, VCPU_GPR(R26)(r7) 185
139 PPC_STL r27, VCPU_GPR(R27)(r7) 186#endif /* CONFIG_PPC_BOOK3S_64 */
140 PPC_STL r28, VCPU_GPR(R28)(r7) 187
141 PPC_STL r29, VCPU_GPR(R29)(r7) 188 PPC_STL r14, VCPU_GPR(r14)(r7)
142 PPC_STL r30, VCPU_GPR(R30)(r7) 189 PPC_STL r15, VCPU_GPR(r15)(r7)
143 PPC_STL r31, VCPU_GPR(R31)(r7) 190 PPC_STL r16, VCPU_GPR(r16)(r7)
191 PPC_STL r17, VCPU_GPR(r17)(r7)
192 PPC_STL r18, VCPU_GPR(r18)(r7)
193 PPC_STL r19, VCPU_GPR(r19)(r7)
194 PPC_STL r20, VCPU_GPR(r20)(r7)
195 PPC_STL r21, VCPU_GPR(r21)(r7)
196 PPC_STL r22, VCPU_GPR(r22)(r7)
197 PPC_STL r23, VCPU_GPR(r23)(r7)
198 PPC_STL r24, VCPU_GPR(r24)(r7)
199 PPC_STL r25, VCPU_GPR(r25)(r7)
200 PPC_STL r26, VCPU_GPR(r26)(r7)
201 PPC_STL r27, VCPU_GPR(r27)(r7)
202 PPC_STL r28, VCPU_GPR(r28)(r7)
203 PPC_STL r29, VCPU_GPR(r29)(r7)
204 PPC_STL r30, VCPU_GPR(r30)(r7)
205 PPC_STL r31, VCPU_GPR(r31)(r7)
206
207 /* Restore host msr -> SRR1 */
208 PPC_LL r6, VCPU_HOST_MSR(r7)
209
210 /*
211 * For some interrupts, we need to call the real Linux
212 * handler, so it can do work for us. This has to happen
213 * as if the interrupt arrived from the kernel though,
214 * so let's fake it here where most state is restored.
215 *
216 * Call Linux for hardware interrupts/decrementer
217 * r3 = address of interrupt handler (exit reason)
218 */
219
220 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
221 beq call_linux_handler
222 cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
223 beq call_linux_handler
224 cmpwi r12, BOOK3S_INTERRUPT_PERFMON
225 beq call_linux_handler
226
227 /* Back to EE=1 */
228 mtmsr r6
229 sync
230 b kvm_return_point
231
232call_linux_handler:
233
234 /*
235 * If we land here we need to jump back to the handler we
236 * came from.
237 *
238 * We have a page that we can access from real mode, so let's
239 * jump back to that and use it as a trampoline to get back into the
240 * interrupt handler!
241 *
242 * R3 still contains the exit code,
243 * R5 VCPU_HOST_RETIP and
244 * R6 VCPU_HOST_MSR
245 */
246
247 /* Restore host IP -> SRR0 */
248 PPC_LL r5, VCPU_HOST_RETIP(r7)
249
250 /* XXX Better move to a safe function?
251 * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
252
253 mtlr r12
254
255 PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7)
256 mtsrr0 r4
257 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
258 mtsrr1 r3
259
260 RFI
261
262.global kvm_return_point
263kvm_return_point:
264
265 /* Jump back to lightweight entry if we're supposed to */
266 /* go back into the guest */
144 267
145 /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ 268 /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
146 mr r5, r12 269 mr r5, r12
@@ -161,9 +284,6 @@ kvm_exit_loop:
161 PPC_LL r4, _LINK(r1) 284 PPC_LL r4, _LINK(r1)
162 mtlr r4 285 mtlr r4
163 286
164 lwz r14, _CCR(r1)
165 mtcr r14
166
167 /* Restore non-volatile host registers (r14 - r31) */ 287 /* Restore non-volatile host registers (r14 - r31) */
168 REST_NVGPRS(r1) 288 REST_NVGPRS(r1)
169 289
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 2c86b0d6371..41cb0017e75 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -114,6 +114,11 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
114 hlist_del_init_rcu(&pte->list_vpte); 114 hlist_del_init_rcu(&pte->list_vpte);
115 hlist_del_init_rcu(&pte->list_vpte_long); 115 hlist_del_init_rcu(&pte->list_vpte_long);
116 116
117 if (pte->pte.may_write)
118 kvm_release_pfn_dirty(pte->pfn);
119 else
120 kvm_release_pfn_clean(pte->pfn);
121
117 spin_unlock(&vcpu3s->mmu_lock); 122 spin_unlock(&vcpu3s->mmu_lock);
118 123
119 vcpu3s->hpte_cache_count--; 124 vcpu3s->hpte_cache_count--;
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index a59a25a1321..7b0ee96c1be 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -24,7 +24,6 @@
24#include <asm/kvm_fpu.h> 24#include <asm/kvm_fpu.h>
25#include <asm/reg.h> 25#include <asm/reg.h>
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/switch_to.h>
28#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
29 28
30/* #define DEBUG */ 29/* #define DEBUG */
@@ -197,8 +196,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
197 kvmppc_inject_pf(vcpu, addr, false); 196 kvmppc_inject_pf(vcpu, addr, false);
198 goto done_load; 197 goto done_load;
199 } else if (r == EMULATE_DO_MMIO) { 198 } else if (r == EMULATE_DO_MMIO) {
200 emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, 199 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1);
201 len, 1);
202 goto done_load; 200 goto done_load;
203 } 201 }
204 202
@@ -288,13 +286,11 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
288 kvmppc_inject_pf(vcpu, addr, false); 286 kvmppc_inject_pf(vcpu, addr, false);
289 goto done_load; 287 goto done_load;
290 } else if ((r == EMULATE_DO_MMIO) && w) { 288 } else if ((r == EMULATE_DO_MMIO) && w) {
291 emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, 289 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1);
292 4, 1);
293 vcpu->arch.qpr[rs] = tmp[1]; 290 vcpu->arch.qpr[rs] = tmp[1];
294 goto done_load; 291 goto done_load;
295 } else if (r == EMULATE_DO_MMIO) { 292 } else if (r == EMULATE_DO_MMIO) {
296 emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs, 293 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1);
297 8, 1);
298 goto done_load; 294 goto done_load;
299 } 295 }
300 296
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 28d38adeca7..0c0d3f27443 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -20,7 +20,6 @@
20 */ 20 */
21 21
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23#include <linux/export.h>
24#include <linux/err.h> 23#include <linux/err.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
26 25
@@ -33,7 +32,6 @@
33#include <asm/kvm_ppc.h> 32#include <asm/kvm_ppc.h>
34#include <asm/kvm_book3s.h> 33#include <asm/kvm_book3s.h>
35#include <asm/mmu_context.h> 34#include <asm/mmu_context.h>
36#include <asm/switch_to.h>
37#include <linux/gfp.h> 35#include <linux/gfp.h>
38#include <linux/sched.h> 36#include <linux/sched.h>
39#include <linux/vmalloc.h> 37#include <linux/vmalloc.h>
@@ -57,14 +55,12 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
57void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 55void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
58{ 56{
59#ifdef CONFIG_PPC_BOOK3S_64 57#ifdef CONFIG_PPC_BOOK3S_64
60 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 58 memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
61 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
62 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, 59 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
63 sizeof(get_paca()->shadow_vcpu)); 60 sizeof(get_paca()->shadow_vcpu));
64 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; 61 to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
65 svcpu_put(svcpu);
66#endif 62#endif
67 vcpu->cpu = smp_processor_id(); 63
68#ifdef CONFIG_PPC_BOOK3S_32 64#ifdef CONFIG_PPC_BOOK3S_32
69 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; 65 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
70#endif 66#endif
@@ -73,79 +69,23 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
73void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 69void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
74{ 70{
75#ifdef CONFIG_PPC_BOOK3S_64 71#ifdef CONFIG_PPC_BOOK3S_64
76 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 72 memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
77 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
78 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 73 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
79 sizeof(get_paca()->shadow_vcpu)); 74 sizeof(get_paca()->shadow_vcpu));
80 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; 75 to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
81 svcpu_put(svcpu);
82#endif 76#endif
83 77
84 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); 78 kvmppc_giveup_ext(vcpu, MSR_FP);
85 vcpu->cpu = -1; 79 kvmppc_giveup_ext(vcpu, MSR_VEC);
86} 80 kvmppc_giveup_ext(vcpu, MSR_VSX);
87
88int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
89{
90 int r = 1; /* Indicate we want to get back into the guest */
91
92 /* We misuse TLB_FLUSH to indicate that we want to clear
93 all shadow cache entries */
94 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
95 kvmppc_mmu_pte_flush(vcpu, 0, 0);
96
97 return r;
98}
99
100/************* MMU Notifiers *************/
101
102int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
103{
104 trace_kvm_unmap_hva(hva);
105
106 /*
107 * Flush all shadow tlb entries everywhere. This is slow, but
108 * we are 100% sure that we catch the to be unmapped page
109 */
110 kvm_flush_remote_tlbs(kvm);
111
112 return 0;
113}
114
115int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
116{
117 /* kvm_unmap_hva flushes everything anyways */
118 kvm_unmap_hva(kvm, start);
119
120 return 0;
121}
122
123int kvm_age_hva(struct kvm *kvm, unsigned long hva)
124{
125 /* XXX could be more clever ;) */
126 return 0;
127}
128
129int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
130{
131 /* XXX could be more clever ;) */
132 return 0;
133}
134
135void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
136{
137 /* The page will get remapped properly on its next fault */
138 kvm_unmap_hva(kvm, hva);
139} 81}
140 82
141/*****************************************/
142
143static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 83static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
144{ 84{
145 ulong smsr = vcpu->arch.shared->msr; 85 ulong smsr = vcpu->arch.shared->msr;
146 86
147 /* Guest MSR values */ 87 /* Guest MSR values */
148 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE; 88 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
149 /* Process MSR values */ 89 /* Process MSR values */
150 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; 90 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
151 /* External providers the guest reserved */ 91 /* External providers the guest reserved */
@@ -172,7 +112,6 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
172 if (msr & MSR_POW) { 112 if (msr & MSR_POW) {
173 if (!vcpu->arch.pending_exceptions) { 113 if (!vcpu->arch.pending_exceptions) {
174 kvm_vcpu_block(vcpu); 114 kvm_vcpu_block(vcpu);
175 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
176 vcpu->stat.halt_wakeup++; 115 vcpu->stat.halt_wakeup++;
177 116
178 /* Unset POW bit after we woke up */ 117 /* Unset POW bit after we woke up */
@@ -197,21 +136,6 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
197 } 136 }
198 } 137 }
199 138
200 /*
201 * When switching from 32 to 64-bit, we may have a stale 32-bit
202 * magic page around, we need to flush it. Typically 32-bit magic
203 * page will be instanciated when calling into RTAS. Note: We
204 * assume that such transition only happens while in kernel mode,
205 * ie, we never transition from user 32-bit to kernel 64-bit with
206 * a 32-bit magic page around.
207 */
208 if (vcpu->arch.magic_page_pa &&
209 !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
210 /* going from RTAS to normal kernel code */
211 kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
212 ~0xFFFUL);
213 }
214
215 /* Preload FPU if it's enabled */ 139 /* Preload FPU if it's enabled */
216 if (vcpu->arch.shared->msr & MSR_FP) 140 if (vcpu->arch.shared->msr & MSR_FP)
217 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 141 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
@@ -226,22 +150,16 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
226#ifdef CONFIG_PPC_BOOK3S_64 150#ifdef CONFIG_PPC_BOOK3S_64
227 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 151 if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
228 kvmppc_mmu_book3s_64_init(vcpu); 152 kvmppc_mmu_book3s_64_init(vcpu);
229 if (!to_book3s(vcpu)->hior_explicit) 153 to_book3s(vcpu)->hior = 0xfff00000;
230 to_book3s(vcpu)->hior = 0xfff00000;
231 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 154 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
232 vcpu->arch.cpu_type = KVM_CPU_3S_64;
233 } else 155 } else
234#endif 156#endif
235 { 157 {
236 kvmppc_mmu_book3s_32_init(vcpu); 158 kvmppc_mmu_book3s_32_init(vcpu);
237 if (!to_book3s(vcpu)->hior_explicit) 159 to_book3s(vcpu)->hior = 0;
238 to_book3s(vcpu)->hior = 0;
239 to_book3s(vcpu)->msr_mask = 0xffffffffULL; 160 to_book3s(vcpu)->msr_mask = 0xffffffffULL;
240 vcpu->arch.cpu_type = KVM_CPU_3S_32;
241 } 161 }
242 162
243 kvmppc_sanity_check(vcpu);
244
245 /* If we are in hypervisor level on 970, we can tell the CPU to 163 /* If we are in hypervisor level on 970, we can tell the CPU to
246 * treat DCBZ as 32 bytes store */ 164 * treat DCBZ as 32 bytes store */
247 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; 165 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
@@ -294,22 +212,24 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
294 int i; 212 int i;
295 213
296 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 214 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
297 if (is_error_page(hpage)) 215 if (is_error_page(hpage)) {
216 kvm_release_page_clean(hpage);
298 return; 217 return;
218 }
299 219
300 hpage_offset = pte->raddr & ~PAGE_MASK; 220 hpage_offset = pte->raddr & ~PAGE_MASK;
301 hpage_offset &= ~0xFFFULL; 221 hpage_offset &= ~0xFFFULL;
302 hpage_offset /= 4; 222 hpage_offset /= 4;
303 223
304 get_page(hpage); 224 get_page(hpage);
305 page = kmap_atomic(hpage); 225 page = kmap_atomic(hpage, KM_USER0);
306 226
307 /* patch dcbz into reserved instruction, so we trap */ 227 /* patch dcbz into reserved instruction, so we trap */
308 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) 228 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
309 if ((page[i] & 0xff0007ff) == INS_DCBZ) 229 if ((page[i] & 0xff0007ff) == INS_DCBZ)
310 page[i] &= 0xfffffff7; 230 page[i] &= 0xfffffff7;
311 231
312 kunmap_atomic(page); 232 kunmap_atomic(page, KM_USER0);
313 put_page(hpage); 233 put_page(hpage);
314} 234}
315 235
@@ -317,9 +237,6 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
317{ 237{
318 ulong mp_pa = vcpu->arch.magic_page_pa; 238 ulong mp_pa = vcpu->arch.magic_page_pa;
319 239
320 if (!(vcpu->arch.shared->msr & MSR_SF))
321 mp_pa = (uint32_t)mp_pa;
322
323 if (unlikely(mp_pa) && 240 if (unlikely(mp_pa) &&
324 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { 241 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
325 return 1; 242 return 1;
@@ -386,22 +303,19 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
386 303
387 if (page_found == -ENOENT) { 304 if (page_found == -ENOENT) {
388 /* Page not found in guest PTE entries */ 305 /* Page not found in guest PTE entries */
389 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
390 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 306 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
391 vcpu->arch.shared->dsisr = svcpu->fault_dsisr; 307 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
392 vcpu->arch.shared->msr |= 308 vcpu->arch.shared->msr |=
393 (svcpu->shadow_srr1 & 0x00000000f8000000ULL); 309 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
394 svcpu_put(svcpu);
395 kvmppc_book3s_queue_irqprio(vcpu, vec); 310 kvmppc_book3s_queue_irqprio(vcpu, vec);
396 } else if (page_found == -EPERM) { 311 } else if (page_found == -EPERM) {
397 /* Storage protection */ 312 /* Storage protection */
398 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
399 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 313 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
400 vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; 314 vcpu->arch.shared->dsisr =
315 to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
401 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; 316 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
402 vcpu->arch.shared->msr |= 317 vcpu->arch.shared->msr |=
403 svcpu->shadow_srr1 & 0x00000000f8000000ULL; 318 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
404 svcpu_put(svcpu);
405 kvmppc_book3s_queue_irqprio(vcpu, vec); 319 kvmppc_book3s_queue_irqprio(vcpu, vec);
406 } else if (page_found == -EINVAL) { 320 } else if (page_found == -EINVAL) {
407 /* Page not found in guest SLB */ 321 /* Page not found in guest SLB */
@@ -420,7 +334,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
420 /* MMIO */ 334 /* MMIO */
421 vcpu->stat.mmio_exits++; 335 vcpu->stat.mmio_exits++;
422 vcpu->arch.paddr_accessed = pte.raddr; 336 vcpu->arch.paddr_accessed = pte.raddr;
423 vcpu->arch.vaddr_accessed = pte.eaddr;
424 r = kvmppc_emulate_mmio(run, vcpu); 337 r = kvmppc_emulate_mmio(run, vcpu);
425 if ( r == RESUME_HOST_NV ) 338 if ( r == RESUME_HOST_NV )
426 r = RESUME_HOST; 339 r = RESUME_HOST;
@@ -431,7 +344,10 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
431 344
432static inline int get_fpr_index(int i) 345static inline int get_fpr_index(int i)
433{ 346{
434 return i * TS_FPRWIDTH; 347#ifdef CONFIG_VSX
348 i *= 2;
349#endif
350 return i;
435} 351}
436 352
437/* Give up external provider (FPU, Altivec, VSX) */ 353/* Give up external provider (FPU, Altivec, VSX) */
@@ -445,49 +361,41 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
445 u64 *thread_fpr = (u64*)t->fpr; 361 u64 *thread_fpr = (u64*)t->fpr;
446 int i; 362 int i;
447 363
448 /* 364 if (!(vcpu->arch.guest_owned_ext & msr))
449 * VSX instructions can access FP and vector registers, so if
450 * we are giving up VSX, make sure we give up FP and VMX as well.
451 */
452 if (msr & MSR_VSX)
453 msr |= MSR_FP | MSR_VEC;
454
455 msr &= vcpu->arch.guest_owned_ext;
456 if (!msr)
457 return; 365 return;
458 366
459#ifdef DEBUG_EXT 367#ifdef DEBUG_EXT
460 printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 368 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
461#endif 369#endif
462 370
463 if (msr & MSR_FP) { 371 switch (msr) {
464 /* 372 case MSR_FP:
465 * Note that on CPUs with VSX, giveup_fpu stores
466 * both the traditional FP registers and the added VSX
467 * registers into thread.fpr[].
468 */
469 giveup_fpu(current); 373 giveup_fpu(current);
470 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 374 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
471 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; 375 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
472 376
473 vcpu->arch.fpscr = t->fpscr.val; 377 vcpu->arch.fpscr = t->fpscr.val;
474 378 break;
475#ifdef CONFIG_VSX 379 case MSR_VEC:
476 if (cpu_has_feature(CPU_FTR_VSX))
477 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
478 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
479#endif
480 }
481
482#ifdef CONFIG_ALTIVEC 380#ifdef CONFIG_ALTIVEC
483 if (msr & MSR_VEC) {
484 giveup_altivec(current); 381 giveup_altivec(current);
485 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); 382 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
486 vcpu->arch.vscr = t->vscr; 383 vcpu->arch.vscr = t->vscr;
487 }
488#endif 384#endif
385 break;
386 case MSR_VSX:
387#ifdef CONFIG_VSX
388 __giveup_vsx(current);
389 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
390 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
391#endif
392 break;
393 default:
394 BUG();
395 }
489 396
490 vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX); 397 vcpu->arch.guest_owned_ext &= ~msr;
398 current->thread.regs->msr &= ~msr;
491 kvmppc_recalc_shadow_msr(vcpu); 399 kvmppc_recalc_shadow_msr(vcpu);
492} 400}
493 401
@@ -547,27 +455,10 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
547 return RESUME_GUEST; 455 return RESUME_GUEST;
548 } 456 }
549 457
550 if (msr == MSR_VSX) { 458 /* We already own the ext */
551 /* No VSX? Give an illegal instruction interrupt */ 459 if (vcpu->arch.guest_owned_ext & msr) {
552#ifdef CONFIG_VSX
553 if (!cpu_has_feature(CPU_FTR_VSX))
554#endif
555 {
556 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
557 return RESUME_GUEST;
558 }
559
560 /*
561 * We have to load up all the FP and VMX registers before
562 * we can let the guest use VSX instructions.
563 */
564 msr = MSR_FP | MSR_VEC | MSR_VSX;
565 }
566
567 /* See if we already own all the ext(s) needed */
568 msr &= ~vcpu->arch.guest_owned_ext;
569 if (!msr)
570 return RESUME_GUEST; 460 return RESUME_GUEST;
461 }
571 462
572#ifdef DEBUG_EXT 463#ifdef DEBUG_EXT
573 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 464 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
@@ -575,28 +466,36 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
575 466
576 current->thread.regs->msr |= msr; 467 current->thread.regs->msr |= msr;
577 468
578 if (msr & MSR_FP) { 469 switch (msr) {
470 case MSR_FP:
579 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 471 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
580 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; 472 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
581#ifdef CONFIG_VSX 473
582 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
583 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
584#endif
585 t->fpscr.val = vcpu->arch.fpscr; 474 t->fpscr.val = vcpu->arch.fpscr;
586 t->fpexc_mode = 0; 475 t->fpexc_mode = 0;
587 kvmppc_load_up_fpu(); 476 kvmppc_load_up_fpu();
588 } 477 break;
589 478 case MSR_VEC:
590 if (msr & MSR_VEC) {
591#ifdef CONFIG_ALTIVEC 479#ifdef CONFIG_ALTIVEC
592 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); 480 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
593 t->vscr = vcpu->arch.vscr; 481 t->vscr = vcpu->arch.vscr;
594 t->vrsave = -1; 482 t->vrsave = -1;
595 kvmppc_load_up_altivec(); 483 kvmppc_load_up_altivec();
596#endif 484#endif
485 break;
486 case MSR_VSX:
487#ifdef CONFIG_VSX
488 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
489 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
490 kvmppc_load_up_vsx();
491#endif
492 break;
493 default:
494 BUG();
597 } 495 }
598 496
599 vcpu->arch.guest_owned_ext |= msr; 497 vcpu->arch.guest_owned_ext |= msr;
498
600 kvmppc_recalc_shadow_msr(vcpu); 499 kvmppc_recalc_shadow_msr(vcpu);
601 500
602 return RESUME_GUEST; 501 return RESUME_GUEST;
@@ -606,39 +505,31 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
606 unsigned int exit_nr) 505 unsigned int exit_nr)
607{ 506{
608 int r = RESUME_HOST; 507 int r = RESUME_HOST;
609 int s;
610 508
611 vcpu->stat.sum_exits++; 509 vcpu->stat.sum_exits++;
612 510
613 run->exit_reason = KVM_EXIT_UNKNOWN; 511 run->exit_reason = KVM_EXIT_UNKNOWN;
614 run->ready_for_interrupt_injection = 1; 512 run->ready_for_interrupt_injection = 1;
615 513
616 /* We get here with MSR.EE=1 */ 514 trace_kvm_book3s_exit(exit_nr, vcpu);
617 515 kvm_resched(vcpu);
618 trace_kvm_exit(exit_nr, vcpu);
619 kvm_guest_exit();
620
621 switch (exit_nr) { 516 switch (exit_nr) {
622 case BOOK3S_INTERRUPT_INST_STORAGE: 517 case BOOK3S_INTERRUPT_INST_STORAGE:
623 {
624 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
625 ulong shadow_srr1 = svcpu->shadow_srr1;
626 vcpu->stat.pf_instruc++; 518 vcpu->stat.pf_instruc++;
627 519
628#ifdef CONFIG_PPC_BOOK3S_32 520#ifdef CONFIG_PPC_BOOK3S_32
629 /* We set segments as unused segments when invalidating them. So 521 /* We set segments as unused segments when invalidating them. So
630 * treat the respective fault as segment fault. */ 522 * treat the respective fault as segment fault. */
631 if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { 523 if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
524 == SR_INVALID) {
632 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 525 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
633 r = RESUME_GUEST; 526 r = RESUME_GUEST;
634 svcpu_put(svcpu);
635 break; 527 break;
636 } 528 }
637#endif 529#endif
638 svcpu_put(svcpu);
639 530
640 /* only care about PTEG not found errors, but leave NX alone */ 531 /* only care about PTEG not found errors, but leave NX alone */
641 if (shadow_srr1 & 0x40000000) { 532 if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
642 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 533 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
643 vcpu->stat.sp_instruc++; 534 vcpu->stat.sp_instruc++;
644 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 535 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -651,37 +542,33 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
651 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 542 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
652 r = RESUME_GUEST; 543 r = RESUME_GUEST;
653 } else { 544 } else {
654 vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000; 545 vcpu->arch.shared->msr |=
546 to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
655 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 547 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
656 r = RESUME_GUEST; 548 r = RESUME_GUEST;
657 } 549 }
658 break; 550 break;
659 }
660 case BOOK3S_INTERRUPT_DATA_STORAGE: 551 case BOOK3S_INTERRUPT_DATA_STORAGE:
661 { 552 {
662 ulong dar = kvmppc_get_fault_dar(vcpu); 553 ulong dar = kvmppc_get_fault_dar(vcpu);
663 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
664 u32 fault_dsisr = svcpu->fault_dsisr;
665 vcpu->stat.pf_storage++; 554 vcpu->stat.pf_storage++;
666 555
667#ifdef CONFIG_PPC_BOOK3S_32 556#ifdef CONFIG_PPC_BOOK3S_32
668 /* We set segments as unused segments when invalidating them. So 557 /* We set segments as unused segments when invalidating them. So
669 * treat the respective fault as segment fault. */ 558 * treat the respective fault as segment fault. */
670 if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { 559 if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
671 kvmppc_mmu_map_segment(vcpu, dar); 560 kvmppc_mmu_map_segment(vcpu, dar);
672 r = RESUME_GUEST; 561 r = RESUME_GUEST;
673 svcpu_put(svcpu);
674 break; 562 break;
675 } 563 }
676#endif 564#endif
677 svcpu_put(svcpu);
678 565
679 /* The only case we need to handle is missing shadow PTEs */ 566 /* The only case we need to handle is missing shadow PTEs */
680 if (fault_dsisr & DSISR_NOHPTE) { 567 if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
681 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 568 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
682 } else { 569 } else {
683 vcpu->arch.shared->dar = dar; 570 vcpu->arch.shared->dar = dar;
684 vcpu->arch.shared->dsisr = fault_dsisr; 571 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
685 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 572 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
686 r = RESUME_GUEST; 573 r = RESUME_GUEST;
687 } 574 }
@@ -704,13 +591,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
704 break; 591 break;
705 /* We're good on these - the host merely wanted to get our attention */ 592 /* We're good on these - the host merely wanted to get our attention */
706 case BOOK3S_INTERRUPT_DECREMENTER: 593 case BOOK3S_INTERRUPT_DECREMENTER:
707 case BOOK3S_INTERRUPT_HV_DECREMENTER:
708 vcpu->stat.dec_exits++; 594 vcpu->stat.dec_exits++;
709 r = RESUME_GUEST; 595 r = RESUME_GUEST;
710 break; 596 break;
711 case BOOK3S_INTERRUPT_EXTERNAL: 597 case BOOK3S_INTERRUPT_EXTERNAL:
712 case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
713 case BOOK3S_INTERRUPT_EXTERNAL_HV:
714 vcpu->stat.ext_intr_exits++; 598 vcpu->stat.ext_intr_exits++;
715 r = RESUME_GUEST; 599 r = RESUME_GUEST;
716 break; 600 break;
@@ -718,16 +602,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
718 r = RESUME_GUEST; 602 r = RESUME_GUEST;
719 break; 603 break;
720 case BOOK3S_INTERRUPT_PROGRAM: 604 case BOOK3S_INTERRUPT_PROGRAM:
721 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
722 { 605 {
723 enum emulation_result er; 606 enum emulation_result er;
724 struct kvmppc_book3s_shadow_vcpu *svcpu;
725 ulong flags; 607 ulong flags;
726 608
727program_interrupt: 609program_interrupt:
728 svcpu = svcpu_get(vcpu); 610 flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
729 flags = svcpu->shadow_srr1 & 0x1f0000ull;
730 svcpu_put(svcpu);
731 611
732 if (vcpu->arch.shared->msr & MSR_PR) { 612 if (vcpu->arch.shared->msr & MSR_PR) {
733#ifdef EXIT_DEBUG 613#ifdef EXIT_DEBUG
@@ -766,29 +646,7 @@ program_interrupt:
766 break; 646 break;
767 } 647 }
768 case BOOK3S_INTERRUPT_SYSCALL: 648 case BOOK3S_INTERRUPT_SYSCALL:
769 if (vcpu->arch.papr_enabled && 649 if (vcpu->arch.osi_enabled &&
770 (kvmppc_get_last_inst(vcpu) == 0x44000022) &&
771 !(vcpu->arch.shared->msr & MSR_PR)) {
772 /* SC 1 papr hypercalls */
773 ulong cmd = kvmppc_get_gpr(vcpu, 3);
774 int i;
775
776#ifdef CONFIG_KVM_BOOK3S_64_PR
777 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
778 r = RESUME_GUEST;
779 break;
780 }
781#endif
782
783 run->papr_hcall.nr = cmd;
784 for (i = 0; i < 9; ++i) {
785 ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
786 run->papr_hcall.args[i] = gpr;
787 }
788 run->exit_reason = KVM_EXIT_PAPR_HCALL;
789 vcpu->arch.hcall_needed = 1;
790 r = RESUME_HOST;
791 } else if (vcpu->arch.osi_enabled &&
792 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && 650 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
793 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { 651 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
794 /* MOL hypercalls */ 652 /* MOL hypercalls */
@@ -855,37 +713,31 @@ program_interrupt:
855 r = RESUME_GUEST; 713 r = RESUME_GUEST;
856 break; 714 break;
857 default: 715 default:
858 {
859 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
860 ulong shadow_srr1 = svcpu->shadow_srr1;
861 svcpu_put(svcpu);
862 /* Ugh - bork here! What did we get? */ 716 /* Ugh - bork here! What did we get? */
863 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 717 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
864 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); 718 exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
865 r = RESUME_HOST; 719 r = RESUME_HOST;
866 BUG(); 720 BUG();
867 break; 721 break;
868 } 722 }
869 } 723
870 724
871 if (!(r & RESUME_HOST)) { 725 if (!(r & RESUME_HOST)) {
872 /* To avoid clobbering exit_reason, only check for signals if 726 /* To avoid clobbering exit_reason, only check for signals if
873 * we aren't already exiting to userspace for some other 727 * we aren't already exiting to userspace for some other
874 * reason. */ 728 * reason. */
875 729 if (signal_pending(current)) {
876 /* 730#ifdef EXIT_DEBUG
877 * Interrupts could be timers for the guest which we have to 731 printk(KERN_EMERG "KVM: Going back to host\n");
878 * inject again, so let's postpone them until we're in the guest 732#endif
879 * and if we really did time things so badly, then we just exit 733 vcpu->stat.signal_exits++;
880 * again due to a host external interrupt. 734 run->exit_reason = KVM_EXIT_INTR;
881 */ 735 r = -EINTR;
882 local_irq_disable();
883 s = kvmppc_prepare_to_enter(vcpu);
884 if (s <= 0) {
885 local_irq_enable();
886 r = s;
887 } else { 736 } else {
888 kvmppc_lazy_ee_enable(); 737 /* In case an interrupt came in that was triggered
738 * from userspace (like DEC), we need to check what
739 * to inject now! */
740 kvmppc_core_deliver_interrupts(vcpu);
889 } 741 }
890 } 742 }
891 743
@@ -957,65 +809,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
957 return 0; 809 return 0;
958} 810}
959 811
960int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
961{
962 int r = 0;
963
964 switch (id) {
965 case KVM_REG_PPC_HIOR:
966 *val = get_reg_val(id, to_book3s(vcpu)->hior);
967 break;
968#ifdef CONFIG_VSX
969 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
970 long int i = id - KVM_REG_PPC_VSR0;
971
972 if (!cpu_has_feature(CPU_FTR_VSX)) {
973 r = -ENXIO;
974 break;
975 }
976 val->vsxval[0] = vcpu->arch.fpr[i];
977 val->vsxval[1] = vcpu->arch.vsr[i];
978 break;
979 }
980#endif /* CONFIG_VSX */
981 default:
982 r = -EINVAL;
983 break;
984 }
985
986 return r;
987}
988
989int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
990{
991 int r = 0;
992
993 switch (id) {
994 case KVM_REG_PPC_HIOR:
995 to_book3s(vcpu)->hior = set_reg_val(id, *val);
996 to_book3s(vcpu)->hior_explicit = true;
997 break;
998#ifdef CONFIG_VSX
999 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
1000 long int i = id - KVM_REG_PPC_VSR0;
1001
1002 if (!cpu_has_feature(CPU_FTR_VSX)) {
1003 r = -ENXIO;
1004 break;
1005 }
1006 vcpu->arch.fpr[i] = val->vsxval[0];
1007 vcpu->arch.vsr[i] = val->vsxval[1];
1008 break;
1009 }
1010#endif /* CONFIG_VSX */
1011 default:
1012 r = -EINVAL;
1013 break;
1014 }
1015
1016 return r;
1017}
1018
1019int kvmppc_core_check_processor_compat(void) 812int kvmppc_core_check_processor_compat(void)
1020{ 813{
1021 return 0; 814 return 0;
@@ -1048,6 +841,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1048 if (!p) 841 if (!p)
1049 goto uninit_vcpu; 842 goto uninit_vcpu;
1050 843
844 vcpu->arch.host_retip = kvm_return_point;
845 vcpu->arch.host_msr = mfmsr();
1051#ifdef CONFIG_PPC_BOOK3S_64 846#ifdef CONFIG_PPC_BOOK3S_64
1052 /* default to book3s_64 (970fx) */ 847 /* default to book3s_64 (970fx) */
1053 vcpu->arch.pvr = 0x3C0301; 848 vcpu->arch.pvr = 0x3C0301;
@@ -1058,6 +853,16 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1058 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 853 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
1059 vcpu->arch.slb_nr = 64; 854 vcpu->arch.slb_nr = 64;
1060 855
856 /* remember where some real-mode handlers are */
857 vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
858 vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
859 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
860#ifdef CONFIG_PPC_BOOK3S_64
861 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
862#else
863 vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
864#endif
865
1061 vcpu->arch.shadow_msr = MSR_USER64; 866 vcpu->arch.shadow_msr = MSR_USER64;
1062 867
1063 err = kvmppc_mmu_init(vcpu); 868 err = kvmppc_mmu_init(vcpu);
@@ -1103,24 +908,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1103#endif 908#endif
1104 ulong ext_msr; 909 ulong ext_msr;
1105 910
1106 /* Check if we can run the vcpu at all */ 911 /* No need to go into the guest when all we do is going out */
1107 if (!vcpu->arch.sane) { 912 if (signal_pending(current)) {
1108 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 913 kvm_run->exit_reason = KVM_EXIT_INTR;
1109 ret = -EINVAL; 914 return -EINTR;
1110 goto out;
1111 }
1112
1113 /*
1114 * Interrupts could be timers for the guest which we have to inject
1115 * again, so let's postpone them until we're in the guest and if we
1116 * really did time things so badly, then we just exit again due to
1117 * a host external interrupt.
1118 */
1119 local_irq_disable();
1120 ret = kvmppc_prepare_to_enter(vcpu);
1121 if (ret <= 0) {
1122 local_irq_enable();
1123 goto out;
1124 } 915 }
1125 916
1126 /* Save FPU state in stack */ 917 /* Save FPU state in stack */
@@ -1146,7 +937,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1146 /* Save VSX state in stack */ 937 /* Save VSX state in stack */
1147 used_vsr = current->thread.used_vsr; 938 used_vsr = current->thread.used_vsr;
1148 if (used_vsr && (current->thread.regs->msr & MSR_VSX)) 939 if (used_vsr && (current->thread.regs->msr & MSR_VSX))
1149 __giveup_vsx(current); 940 __giveup_vsx(current);
1150#endif 941#endif
1151 942
1152 /* Remember the MSR with disabled extensions */ 943 /* Remember the MSR with disabled extensions */
@@ -1156,19 +947,22 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1156 if (vcpu->arch.shared->msr & MSR_FP) 947 if (vcpu->arch.shared->msr & MSR_FP)
1157 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 948 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1158 949
1159 kvmppc_lazy_ee_enable(); 950 kvm_guest_enter();
1160 951
1161 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 952 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
1162 953
1163 /* No need for kvm_guest_exit. It's done in handle_exit. 954 kvm_guest_exit();
1164 We also get here with interrupts enabled. */
1165 955
1166 /* Make sure we save the guest FPU/Altivec/VSX state */ 956 local_irq_disable();
1167 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
1168 957
1169 current->thread.regs->msr = ext_msr; 958 current->thread.regs->msr = ext_msr;
1170 959
1171 /* Restore FPU/VSX state from stack */ 960 /* Make sure we save the guest FPU/Altivec/VSX state */
961 kvmppc_giveup_ext(vcpu, MSR_FP);
962 kvmppc_giveup_ext(vcpu, MSR_VEC);
963 kvmppc_giveup_ext(vcpu, MSR_VSX);
964
965 /* Restore FPU state from stack */
1172 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); 966 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
1173 current->thread.fpscr.val = fpscr; 967 current->thread.fpscr.val = fpscr;
1174 current->thread.fpexc_mode = fpexc_mode; 968 current->thread.fpexc_mode = fpexc_mode;
@@ -1187,117 +981,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1187 current->thread.used_vsr = used_vsr; 981 current->thread.used_vsr = used_vsr;
1188#endif 982#endif
1189 983
1190out:
1191 vcpu->mode = OUTSIDE_GUEST_MODE;
1192 return ret; 984 return ret;
1193} 985}
1194 986
1195/*
1196 * Get (and clear) the dirty memory log for a memory slot.
1197 */
1198int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1199 struct kvm_dirty_log *log)
1200{
1201 struct kvm_memory_slot *memslot;
1202 struct kvm_vcpu *vcpu;
1203 ulong ga, ga_end;
1204 int is_dirty = 0;
1205 int r;
1206 unsigned long n;
1207
1208 mutex_lock(&kvm->slots_lock);
1209
1210 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1211 if (r)
1212 goto out;
1213
1214 /* If nothing is dirty, don't bother messing with page tables. */
1215 if (is_dirty) {
1216 memslot = id_to_memslot(kvm->memslots, log->slot);
1217
1218 ga = memslot->base_gfn << PAGE_SHIFT;
1219 ga_end = ga + (memslot->npages << PAGE_SHIFT);
1220
1221 kvm_for_each_vcpu(n, vcpu, kvm)
1222 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
1223
1224 n = kvm_dirty_bitmap_bytes(memslot);
1225 memset(memslot->dirty_bitmap, 0, n);
1226 }
1227
1228 r = 0;
1229out:
1230 mutex_unlock(&kvm->slots_lock);
1231 return r;
1232}
1233
1234#ifdef CONFIG_PPC64
1235int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1236{
1237 /* No flags */
1238 info->flags = 0;
1239
1240 /* SLB is always 64 entries */
1241 info->slb_size = 64;
1242
1243 /* Standard 4k base page size segment */
1244 info->sps[0].page_shift = 12;
1245 info->sps[0].slb_enc = 0;
1246 info->sps[0].enc[0].page_shift = 12;
1247 info->sps[0].enc[0].pte_enc = 0;
1248
1249 /* Standard 16M large page size segment */
1250 info->sps[1].page_shift = 24;
1251 info->sps[1].slb_enc = SLB_VSID_L;
1252 info->sps[1].enc[0].page_shift = 24;
1253 info->sps[1].enc[0].pte_enc = 0;
1254
1255 return 0;
1256}
1257#endif /* CONFIG_PPC64 */
1258
1259void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1260 struct kvm_memory_slot *dont)
1261{
1262}
1263
1264int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1265 unsigned long npages)
1266{
1267 return 0;
1268}
1269
1270int kvmppc_core_prepare_memory_region(struct kvm *kvm, 987int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1271 struct kvm_memory_slot *memslot,
1272 struct kvm_userspace_memory_region *mem) 988 struct kvm_userspace_memory_region *mem)
1273{ 989{
1274 return 0; 990 return 0;
1275} 991}
1276 992
1277void kvmppc_core_commit_memory_region(struct kvm *kvm, 993void kvmppc_core_commit_memory_region(struct kvm *kvm,
1278 struct kvm_userspace_memory_region *mem, 994 struct kvm_userspace_memory_region *mem)
1279 struct kvm_memory_slot old)
1280{
1281}
1282
1283void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
1284{ 995{
1285} 996}
1286 997
1287int kvmppc_core_init_vm(struct kvm *kvm) 998int kvmppc_core_init_vm(struct kvm *kvm)
1288{ 999{
1289#ifdef CONFIG_PPC64
1290 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1291#endif
1292
1293 return 0; 1000 return 0;
1294} 1001}
1295 1002
1296void kvmppc_core_destroy_vm(struct kvm *kvm) 1003void kvmppc_core_destroy_vm(struct kvm *kvm)
1297{ 1004{
1298#ifdef CONFIG_PPC64
1299 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
1300#endif
1301} 1005}
1302 1006
1303static int kvmppc_book3s_init(void) 1007static int kvmppc_book3s_init(void)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
deleted file mode 100644
index ee02b30878e..00000000000
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ /dev/null
@@ -1,252 +0,0 @@
1/*
2 * Copyright (C) 2011. Freescale Inc. All rights reserved.
3 *
4 * Authors:
5 * Alexander Graf <agraf@suse.de>
6 * Paul Mackerras <paulus@samba.org>
7 *
8 * Description:
9 *
10 * Hypercall handling for running PAPR guests in PR KVM on Book 3S
11 * processors.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License, version 2, as
15 * published by the Free Software Foundation.
16 */
17
18#include <linux/anon_inodes.h>
19
20#include <asm/uaccess.h>
21#include <asm/kvm_ppc.h>
22#include <asm/kvm_book3s.h>
23
24static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
25{
26 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
27 unsigned long pteg_addr;
28
29 pte_index <<= 4;
30 pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
31 pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
32 pteg_addr |= pte_index;
33
34 return pteg_addr;
35}
36
37static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
38{
39 long flags = kvmppc_get_gpr(vcpu, 4);
40 long pte_index = kvmppc_get_gpr(vcpu, 5);
41 unsigned long pteg[2 * 8];
42 unsigned long pteg_addr, i, *hpte;
43
44 pte_index &= ~7UL;
45 pteg_addr = get_pteg_addr(vcpu, pte_index);
46
47 copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
48 hpte = pteg;
49
50 if (likely((flags & H_EXACT) == 0)) {
51 pte_index &= ~7UL;
52 for (i = 0; ; ++i) {
53 if (i == 8)
54 return H_PTEG_FULL;
55 if ((*hpte & HPTE_V_VALID) == 0)
56 break;
57 hpte += 2;
58 }
59 } else {
60 i = kvmppc_get_gpr(vcpu, 5) & 7UL;
61 hpte += i * 2;
62 }
63
64 hpte[0] = kvmppc_get_gpr(vcpu, 6);
65 hpte[1] = kvmppc_get_gpr(vcpu, 7);
66 copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
67 kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
68 kvmppc_set_gpr(vcpu, 4, pte_index | i);
69
70 return EMULATE_DONE;
71}
72
73static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
74{
75 unsigned long flags= kvmppc_get_gpr(vcpu, 4);
76 unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
77 unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
78 unsigned long v = 0, pteg, rb;
79 unsigned long pte[2];
80
81 pteg = get_pteg_addr(vcpu, pte_index);
82 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
83
84 if ((pte[0] & HPTE_V_VALID) == 0 ||
85 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
86 ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
87 kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
88 return EMULATE_DONE;
89 }
90
91 copy_to_user((void __user *)pteg, &v, sizeof(v));
92
93 rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
94 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
95
96 kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
97 kvmppc_set_gpr(vcpu, 4, pte[0]);
98 kvmppc_set_gpr(vcpu, 5, pte[1]);
99
100 return EMULATE_DONE;
101}
102
103/* Request defs for kvmppc_h_pr_bulk_remove() */
104#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
105#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
106#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
107#define H_BULK_REMOVE_END 0xc000000000000000ULL
108#define H_BULK_REMOVE_CODE 0x3000000000000000ULL
109#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
110#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
111#define H_BULK_REMOVE_PARM 0x2000000000000000ULL
112#define H_BULK_REMOVE_HW 0x3000000000000000ULL
113#define H_BULK_REMOVE_RC 0x0c00000000000000ULL
114#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
115#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
116#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
117#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
118#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
119#define H_BULK_REMOVE_MAX_BATCH 4
120
121static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
122{
123 int i;
124 int paramnr = 4;
125 int ret = H_SUCCESS;
126
127 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
128 unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
129 unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
130 unsigned long pteg, rb, flags;
131 unsigned long pte[2];
132 unsigned long v = 0;
133
134 if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
135 break; /* Exit success */
136 } else if ((tsh & H_BULK_REMOVE_TYPE) !=
137 H_BULK_REMOVE_REQUEST) {
138 ret = H_PARAMETER;
139 break; /* Exit fail */
140 }
141
142 tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
143 tsh |= H_BULK_REMOVE_RESPONSE;
144
145 if ((tsh & H_BULK_REMOVE_ANDCOND) &&
146 (tsh & H_BULK_REMOVE_AVPN)) {
147 tsh |= H_BULK_REMOVE_PARM;
148 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
149 ret = H_PARAMETER;
150 break; /* Exit fail */
151 }
152
153 pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
154 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
155
156 /* tsl = AVPN */
157 flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
158
159 if ((pte[0] & HPTE_V_VALID) == 0 ||
160 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) ||
161 ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) {
162 tsh |= H_BULK_REMOVE_NOT_FOUND;
163 } else {
164 /* Splat the pteg in (userland) hpt */
165 copy_to_user((void __user *)pteg, &v, sizeof(v));
166
167 rb = compute_tlbie_rb(pte[0], pte[1],
168 tsh & H_BULK_REMOVE_PTEX);
169 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
170 tsh |= H_BULK_REMOVE_SUCCESS;
171 tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43;
172 }
173 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
174 }
175 kvmppc_set_gpr(vcpu, 3, ret);
176
177 return EMULATE_DONE;
178}
179
180static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
181{
182 unsigned long flags = kvmppc_get_gpr(vcpu, 4);
183 unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
184 unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
185 unsigned long rb, pteg, r, v;
186 unsigned long pte[2];
187
188 pteg = get_pteg_addr(vcpu, pte_index);
189 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
190
191 if ((pte[0] & HPTE_V_VALID) == 0 ||
192 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
193 kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
194 return EMULATE_DONE;
195 }
196
197 v = pte[0];
198 r = pte[1];
199 r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
200 HPTE_R_KEY_LO);
201 r |= (flags << 55) & HPTE_R_PP0;
202 r |= (flags << 48) & HPTE_R_KEY_HI;
203 r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
204
205 pte[1] = r;
206
207 rb = compute_tlbie_rb(v, r, pte_index);
208 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
209 copy_to_user((void __user *)pteg, pte, sizeof(pte));
210
211 kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
212
213 return EMULATE_DONE;
214}
215
216static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
217{
218 unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
219 unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
220 unsigned long tce = kvmppc_get_gpr(vcpu, 6);
221 long rc;
222
223 rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
224 if (rc == H_TOO_HARD)
225 return EMULATE_FAIL;
226 kvmppc_set_gpr(vcpu, 3, rc);
227 return EMULATE_DONE;
228}
229
230int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
231{
232 switch (cmd) {
233 case H_ENTER:
234 return kvmppc_h_pr_enter(vcpu);
235 case H_REMOVE:
236 return kvmppc_h_pr_remove(vcpu);
237 case H_PROTECT:
238 return kvmppc_h_pr_protect(vcpu);
239 case H_BULK_REMOVE:
240 return kvmppc_h_pr_bulk_remove(vcpu);
241 case H_PUT_TCE:
242 return kvmppc_h_pr_put_tce(vcpu);
243 case H_CEDE:
244 vcpu->arch.shared->msr |= MSR_EE;
245 kvm_vcpu_block(vcpu);
246 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
247 vcpu->stat.halt_wakeup++;
248 return EMULATE_DONE;
249 }
250
251 return EMULATE_FAIL;
252}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 8f7633e3afb..c1f877c4a88 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -20,7 +20,6 @@
20#include <asm/ppc_asm.h> 20#include <asm/ppc_asm.h>
21#include <asm/kvm_asm.h> 21#include <asm/kvm_asm.h>
22#include <asm/reg.h> 22#include <asm/reg.h>
23#include <asm/mmu.h>
24#include <asm/page.h> 23#include <asm/page.h>
25#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
26 25
@@ -36,9 +35,10 @@
36 35
37#if defined(CONFIG_PPC_BOOK3S_64) 36#if defined(CONFIG_PPC_BOOK3S_64)
38 37
38#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg)
39#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
39#define FUNC(name) GLUE(.,name) 40#define FUNC(name) GLUE(.,name)
40 41
41 .globl kvmppc_skip_interrupt
42kvmppc_skip_interrupt: 42kvmppc_skip_interrupt:
43 /* 43 /*
44 * Here all GPRs are unchanged from when the interrupt happened 44 * Here all GPRs are unchanged from when the interrupt happened
@@ -51,7 +51,6 @@ kvmppc_skip_interrupt:
51 rfid 51 rfid
52 b . 52 b .
53 53
54 .globl kvmppc_skip_Hinterrupt
55kvmppc_skip_Hinterrupt: 54kvmppc_skip_Hinterrupt:
56 /* 55 /*
57 * Here all GPRs are unchanged from when the interrupt happened 56 * Here all GPRs are unchanged from when the interrupt happened
@@ -66,6 +65,7 @@ kvmppc_skip_Hinterrupt:
66 65
67#elif defined(CONFIG_PPC_BOOK3S_32) 66#elif defined(CONFIG_PPC_BOOK3S_32)
68 67
68#define MSR_NOIRQ MSR_KERNEL
69#define FUNC(name) name 69#define FUNC(name) name
70 70
71.macro INTERRUPT_TRAMPOLINE intno 71.macro INTERRUPT_TRAMPOLINE intno
@@ -167,25 +167,40 @@ kvmppc_handler_skip_ins:
167#endif 167#endif
168 168
169/* 169/*
170 * Call kvmppc_handler_trampoline_enter in real mode 170 * This trampoline brings us back to a real mode handler
171 *
172 * Input Registers:
173 *
174 * R5 = SRR0
175 * R6 = SRR1
176 * LR = real-mode IP
171 * 177 *
172 * On entry, r4 contains the guest shadow MSR
173 * MSR.EE has to be 0 when calling this function
174 */ 178 */
175_GLOBAL(kvmppc_entry_trampoline) 179.global kvmppc_handler_lowmem_trampoline
176 mfmsr r5 180kvmppc_handler_lowmem_trampoline:
177 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
178 toreal(r7)
179 181
180 li r6, MSR_IR | MSR_DR 182 mtsrr0 r5
181 andc r6, r5, r6 /* Clear DR and IR in MSR value */
182 /*
183 * Set EE in HOST_MSR so that it's enabled when we get into our
184 * C exit handler function
185 */
186 ori r5, r5, MSR_EE
187 mtsrr0 r7
188 mtsrr1 r6 183 mtsrr1 r6
184 blr
185kvmppc_handler_lowmem_trampoline_end:
186
187/*
188 * Call a function in real mode
189 *
190 * Input Registers:
191 *
192 * R3 = function
193 * R4 = MSR
194 * R5 = scratch register
195 *
196 */
197_GLOBAL(kvmppc_rmcall)
198 LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
199 mtmsr r5 /* Disable relocation and interrupts, so mtsrr
200 doesn't get interrupted */
201 sync
202 mtsrr0 r3
203 mtsrr1 r4
189 RFI 204 RFI
190 205
191#if defined(CONFIG_PPC_BOOK3S_32) 206#if defined(CONFIG_PPC_BOOK3S_32)
@@ -234,5 +249,8 @@ define_load_up(fpu)
234#ifdef CONFIG_ALTIVEC 249#ifdef CONFIG_ALTIVEC
235define_load_up(altivec) 250define_load_up(altivec)
236#endif 251#endif
252#ifdef CONFIG_VSX
253define_load_up(vsx)
254#endif
237 255
238#include "book3s_segment.S" 256#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1abe4788191..aed32e51721 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -57,12 +57,10 @@ kvmppc_handler_trampoline_enter:
57 /* Required state: 57 /* Required state:
58 * 58 *
59 * MSR = ~IR|DR 59 * MSR = ~IR|DR
60 * R13 = PACA
60 * R1 = host R1 61 * R1 = host R1
61 * R2 = host R2 62 * R2 = host R2
62 * R4 = guest shadow MSR 63 * R10 = guest MSR
63 * R5 = normal host MSR
64 * R6 = current host MSR (EE, IR, DR off)
65 * LR = highmem guest exit code
66 * all other volatile GPRS = free 64 * all other volatile GPRS = free
67 * SVCPU[CR] = guest CR 65 * SVCPU[CR] = guest CR
68 * SVCPU[XER] = guest XER 66 * SVCPU[XER] = guest XER
@@ -73,15 +71,15 @@ kvmppc_handler_trampoline_enter:
73 /* r3 = shadow vcpu */ 71 /* r3 = shadow vcpu */
74 GET_SHADOW_VCPU(r3) 72 GET_SHADOW_VCPU(r3)
75 73
76 /* Save guest exit handler address and MSR */
77 mflr r0
78 PPC_STL r0, HSTATE_VMHANDLER(r3)
79 PPC_STL r5, HSTATE_HOST_MSR(r3)
80
81 /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */ 74 /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
82 PPC_STL r1, HSTATE_HOST_R1(r3) 75 PPC_STL r1, HSTATE_HOST_R1(r3)
83 PPC_STL r2, HSTATE_HOST_R2(r3) 76 PPC_STL r2, HSTATE_HOST_R2(r3)
84 77
78 /* Move SRR0 and SRR1 into the respective regs */
79 PPC_LL r9, SVCPU_PC(r3)
80 mtsrr0 r9
81 mtsrr1 r10
82
85 /* Activate guest mode, so faults get handled by KVM */ 83 /* Activate guest mode, so faults get handled by KVM */
86 li r11, KVM_GUEST_MODE_GUEST 84 li r11, KVM_GUEST_MODE_GUEST
87 stb r11, HSTATE_IN_GUEST(r3) 85 stb r11, HSTATE_IN_GUEST(r3)
@@ -89,62 +87,32 @@ kvmppc_handler_trampoline_enter:
89 /* Switch to guest segment. This is subarch specific. */ 87 /* Switch to guest segment. This is subarch specific. */
90 LOAD_GUEST_SEGMENTS 88 LOAD_GUEST_SEGMENTS
91 89
92#ifdef CONFIG_PPC_BOOK3S_64
93 /* Some guests may need to have dcbz set to 32 byte length.
94 *
95 * Usually we ensure that by patching the guest's instructions
96 * to trap on dcbz and emulate it in the hypervisor.
97 *
98 * If we can, we should tell the CPU to use 32 byte dcbz though,
99 * because that's a lot faster.
100 */
101 lbz r0, HSTATE_RESTORE_HID5(r3)
102 cmpwi r0, 0
103 beq no_dcbz32_on
104
105 mfspr r0,SPRN_HID5
106 ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */
107 mtspr SPRN_HID5,r0
108no_dcbz32_on:
109
110#endif /* CONFIG_PPC_BOOK3S_64 */
111
112 /* Enter guest */ 90 /* Enter guest */
113 91
114 PPC_LL r8, SVCPU_CTR(r3) 92 PPC_LL r4, SVCPU_CTR(r3)
115 PPC_LL r9, SVCPU_LR(r3) 93 PPC_LL r5, SVCPU_LR(r3)
116 lwz r10, SVCPU_CR(r3) 94 lwz r6, SVCPU_CR(r3)
117 lwz r11, SVCPU_XER(r3) 95 lwz r7, SVCPU_XER(r3)
118 96
119 mtctr r8 97 mtctr r4
120 mtlr r9 98 mtlr r5
121 mtcr r10 99 mtcr r6
122 mtxer r11 100 mtxer r7
123
124 /* Move SRR0 and SRR1 into the respective regs */
125 PPC_LL r9, SVCPU_PC(r3)
126 /* First clear RI in our current MSR value */
127 li r0, MSR_RI
128 andc r6, r6, r0
129 101
130 PPC_LL r0, SVCPU_R0(r3) 102 PPC_LL r0, SVCPU_R0(r3)
131 PPC_LL r1, SVCPU_R1(r3) 103 PPC_LL r1, SVCPU_R1(r3)
132 PPC_LL r2, SVCPU_R2(r3) 104 PPC_LL r2, SVCPU_R2(r3)
105 PPC_LL r4, SVCPU_R4(r3)
133 PPC_LL r5, SVCPU_R5(r3) 106 PPC_LL r5, SVCPU_R5(r3)
107 PPC_LL r6, SVCPU_R6(r3)
134 PPC_LL r7, SVCPU_R7(r3) 108 PPC_LL r7, SVCPU_R7(r3)
135 PPC_LL r8, SVCPU_R8(r3) 109 PPC_LL r8, SVCPU_R8(r3)
110 PPC_LL r9, SVCPU_R9(r3)
136 PPC_LL r10, SVCPU_R10(r3) 111 PPC_LL r10, SVCPU_R10(r3)
137 PPC_LL r11, SVCPU_R11(r3) 112 PPC_LL r11, SVCPU_R11(r3)
138 PPC_LL r12, SVCPU_R12(r3) 113 PPC_LL r12, SVCPU_R12(r3)
139 PPC_LL r13, SVCPU_R13(r3) 114 PPC_LL r13, SVCPU_R13(r3)
140 115
141 MTMSR_EERI(r6)
142 mtsrr0 r9
143 mtsrr1 r4
144
145 PPC_LL r4, SVCPU_R4(r3)
146 PPC_LL r6, SVCPU_R6(r3)
147 PPC_LL r9, SVCPU_R9(r3)
148 PPC_LL r3, (SVCPU_R3)(r3) 116 PPC_LL r3, (SVCPU_R3)(r3)
149 117
150 RFI 118 RFI
@@ -196,8 +164,7 @@ kvmppc_interrupt:
196 /* Save guest PC and MSR */ 164 /* Save guest PC and MSR */
197#ifdef CONFIG_PPC64 165#ifdef CONFIG_PPC64
198BEGIN_FTR_SECTION 166BEGIN_FTR_SECTION
199 andi. r0, r12, 0x2 167 andi. r0,r12,0x2
200 cmpwi cr1, r0, 0
201 beq 1f 168 beq 1f
202 mfspr r3,SPRN_HSRR0 169 mfspr r3,SPRN_HSRR0
203 mfspr r4,SPRN_HSRR1 170 mfspr r4,SPRN_HSRR1
@@ -246,22 +213,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
246 beq ld_last_inst 213 beq ld_last_inst
247 cmpwi r12, BOOK3S_INTERRUPT_PROGRAM 214 cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
248 beq ld_last_inst 215 beq ld_last_inst
249 cmpwi r12, BOOK3S_INTERRUPT_SYSCALL
250 beq ld_last_prev_inst
251 cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT 216 cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
252 beq- ld_last_inst 217 beq- ld_last_inst
253#ifdef CONFIG_PPC64
254BEGIN_FTR_SECTION
255 cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
256 beq- ld_last_inst
257END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
258#endif
259 218
260 b no_ld_last_inst 219 b no_ld_last_inst
261 220
262ld_last_prev_inst:
263 addi r3, r3, -4
264
265ld_last_inst: 221ld_last_inst:
266 /* Save off the guest instruction we're at */ 222 /* Save off the guest instruction we're at */
267 223
@@ -298,69 +254,23 @@ no_ld_last_inst:
298 /* Switch back to host MMU */ 254 /* Switch back to host MMU */
299 LOAD_HOST_SEGMENTS 255 LOAD_HOST_SEGMENTS
300 256
301#ifdef CONFIG_PPC_BOOK3S_64
302
303 lbz r5, HSTATE_RESTORE_HID5(r13)
304 cmpwi r5, 0
305 beq no_dcbz32_off
306
307 li r4, 0
308 mfspr r5,SPRN_HID5
309 rldimi r5,r4,6,56
310 mtspr SPRN_HID5,r5
311
312no_dcbz32_off:
313
314#endif /* CONFIG_PPC_BOOK3S_64 */
315
316 /*
317 * For some interrupts, we need to call the real Linux
318 * handler, so it can do work for us. This has to happen
319 * as if the interrupt arrived from the kernel though,
320 * so let's fake it here where most state is restored.
321 *
322 * Having set up SRR0/1 with the address where we want
323 * to continue with relocation on (potentially in module
324 * space), we either just go straight there with rfi[d],
325 * or we jump to an interrupt handler if there is an
326 * interrupt to be handled first. In the latter case,
327 * the rfi[d] at the end of the interrupt handler will
328 * get us back to where we want to continue.
329 */
330
331 /* Register usage at this point: 257 /* Register usage at this point:
332 * 258 *
333 * R1 = host R1 259 * R1 = host R1
334 * R2 = host R2 260 * R2 = host R2
335 * R10 = raw exit handler id
336 * R12 = exit handler id 261 * R12 = exit handler id
337 * R13 = shadow vcpu (32-bit) or PACA (64-bit) 262 * R13 = shadow vcpu (32-bit) or PACA (64-bit)
338 * SVCPU.* = guest * 263 * SVCPU.* = guest *
339 * 264 *
340 */ 265 */
341 266
342 PPC_LL r6, HSTATE_HOST_MSR(r13) 267 /* RFI into the highmem handler */
343 PPC_LL r8, HSTATE_VMHANDLER(r13) 268 mfmsr r7
344 269 ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
345#ifdef CONFIG_PPC64 270 mtsrr1 r7
346BEGIN_FTR_SECTION
347 beq cr1, 1f
348 mtspr SPRN_HSRR1, r6
349 mtspr SPRN_HSRR0, r8
350END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
351#endif
3521: /* Restore host msr -> SRR1 */
353 mtsrr1 r6
354 /* Load highmem handler address */ 271 /* Load highmem handler address */
272 PPC_LL r8, HSTATE_VMHANDLER(r13)
355 mtsrr0 r8 273 mtsrr0 r8
356 274
357 /* RFI into the highmem handler, or jump to interrupt handler */
358 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
359 beqa BOOK3S_INTERRUPT_EXTERNAL
360 cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
361 beqa BOOK3S_INTERRUPT_DECREMENTER
362 cmpwi r12, BOOK3S_INTERRUPT_PERFMON
363 beqa BOOK3S_INTERRUPT_PERFMON
364
365 RFI 275 RFI
366kvmppc_handler_trampoline_exit_end: 276kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 69f11401578..ee45fa01220 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -17,8 +17,6 @@
17 * 17 *
18 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
19 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 19 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
20 * Scott Wood <scottwood@freescale.com>
21 * Varun Sethi <varun.sethi@freescale.com>
22 */ 20 */
23 21
24#include <linux/errno.h> 22#include <linux/errno.h>
@@ -32,15 +30,10 @@
32#include <asm/cputable.h> 30#include <asm/cputable.h>
33#include <asm/uaccess.h> 31#include <asm/uaccess.h>
34#include <asm/kvm_ppc.h> 32#include <asm/kvm_ppc.h>
33#include "timing.h"
35#include <asm/cacheflush.h> 34#include <asm/cacheflush.h>
36#include <asm/dbell.h>
37#include <asm/hw_irq.h>
38#include <asm/irq.h>
39#include <asm/time.h>
40 35
41#include "timing.h"
42#include "booke.h" 36#include "booke.h"
43#include "trace.h"
44 37
45unsigned long kvmppc_booke_handlers; 38unsigned long kvmppc_booke_handlers;
46 39
@@ -62,9 +55,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "dec", VCPU_STAT(dec_exits) }, 55 { "dec", VCPU_STAT(dec_exits) },
63 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 56 { "ext_intr", VCPU_STAT(ext_intr_exits) },
64 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 57 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
65 { "doorbell", VCPU_STAT(dbell_exits) },
66 { "guest doorbell", VCPU_STAT(gdbell_exits) },
67 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
68 { NULL } 58 { NULL }
69}; 59};
70 60
@@ -123,16 +113,6 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
123} 113}
124#endif 114#endif
125 115
126static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
127{
128#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
129 /* We always treat the FP bit as enabled from the host
130 perspective, so only need to adjust the shadow MSR */
131 vcpu->arch.shadow_msr &= ~MSR_FP;
132 vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
133#endif
134}
135
136/* 116/*
137 * Helper function for "full" MSR writes. No need to call this if only 117 * Helper function for "full" MSR writes. No need to call this if only
138 * EE/CE/ME/DE/RI are changing. 118 * EE/CE/ME/DE/RI are changing.
@@ -141,21 +121,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
141{ 121{
142 u32 old_msr = vcpu->arch.shared->msr; 122 u32 old_msr = vcpu->arch.shared->msr;
143 123
144#ifdef CONFIG_KVM_BOOKE_HV
145 new_msr |= MSR_GS;
146#endif
147
148 vcpu->arch.shared->msr = new_msr; 124 vcpu->arch.shared->msr = new_msr;
149 125
150 kvmppc_mmu_msr_notify(vcpu, old_msr); 126 kvmppc_mmu_msr_notify(vcpu, old_msr);
127
128 if (vcpu->arch.shared->msr & MSR_WE) {
129 kvm_vcpu_block(vcpu);
130 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
131 };
132
151 kvmppc_vcpu_sync_spe(vcpu); 133 kvmppc_vcpu_sync_spe(vcpu);
152 kvmppc_vcpu_sync_fpu(vcpu);
153} 134}
154 135
155static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, 136static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
156 unsigned int priority) 137 unsigned int priority)
157{ 138{
158 trace_kvm_booke_queue_irqprio(vcpu, priority);
159 set_bit(priority, &vcpu->arch.pending_exceptions); 139 set_bit(priority, &vcpu->arch.pending_exceptions);
160} 140}
161 141
@@ -221,98 +201,17 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
221 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); 201 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
222} 202}
223 203
224static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
225{
226 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
227}
228
229static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
230{
231 clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
232}
233
234static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
235{
236#ifdef CONFIG_KVM_BOOKE_HV
237 mtspr(SPRN_GSRR0, srr0);
238 mtspr(SPRN_GSRR1, srr1);
239#else
240 vcpu->arch.shared->srr0 = srr0;
241 vcpu->arch.shared->srr1 = srr1;
242#endif
243}
244
245static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
246{
247 vcpu->arch.csrr0 = srr0;
248 vcpu->arch.csrr1 = srr1;
249}
250
251static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
252{
253 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) {
254 vcpu->arch.dsrr0 = srr0;
255 vcpu->arch.dsrr1 = srr1;
256 } else {
257 set_guest_csrr(vcpu, srr0, srr1);
258 }
259}
260
261static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
262{
263 vcpu->arch.mcsrr0 = srr0;
264 vcpu->arch.mcsrr1 = srr1;
265}
266
267static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
268{
269#ifdef CONFIG_KVM_BOOKE_HV
270 return mfspr(SPRN_GDEAR);
271#else
272 return vcpu->arch.shared->dar;
273#endif
274}
275
276static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
277{
278#ifdef CONFIG_KVM_BOOKE_HV
279 mtspr(SPRN_GDEAR, dear);
280#else
281 vcpu->arch.shared->dar = dear;
282#endif
283}
284
285static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
286{
287#ifdef CONFIG_KVM_BOOKE_HV
288 return mfspr(SPRN_GESR);
289#else
290 return vcpu->arch.shared->esr;
291#endif
292}
293
294static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
295{
296#ifdef CONFIG_KVM_BOOKE_HV
297 mtspr(SPRN_GESR, esr);
298#else
299 vcpu->arch.shared->esr = esr;
300#endif
301}
302
303/* Deliver the interrupt of the corresponding priority, if possible. */ 204/* Deliver the interrupt of the corresponding priority, if possible. */
304static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, 205static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
305 unsigned int priority) 206 unsigned int priority)
306{ 207{
307 int allowed = 0; 208 int allowed = 0;
308 ulong msr_mask = 0; 209 ulong uninitialized_var(msr_mask);
309 bool update_esr = false, update_dear = false; 210 bool update_esr = false, update_dear = false;
310 ulong crit_raw = vcpu->arch.shared->critical; 211 ulong crit_raw = vcpu->arch.shared->critical;
311 ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); 212 ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
312 bool crit; 213 bool crit;
313 bool keep_irq = false; 214 bool keep_irq = false;
314 enum int_class int_class;
315 ulong new_msr = vcpu->arch.shared->msr;
316 215
317 /* Truncate crit indicators in 32 bit mode */ 216 /* Truncate crit indicators in 32 bit mode */
318 if (!(vcpu->arch.shared->msr & MSR_SF)) { 217 if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -348,219 +247,56 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
348 case BOOKE_IRQPRIO_AP_UNAVAIL: 247 case BOOKE_IRQPRIO_AP_UNAVAIL:
349 case BOOKE_IRQPRIO_ALIGNMENT: 248 case BOOKE_IRQPRIO_ALIGNMENT:
350 allowed = 1; 249 allowed = 1;
351 msr_mask = MSR_CE | MSR_ME | MSR_DE; 250 msr_mask = MSR_CE|MSR_ME|MSR_DE;
352 int_class = INT_CLASS_NONCRIT;
353 break; 251 break;
354 case BOOKE_IRQPRIO_WATCHDOG:
355 case BOOKE_IRQPRIO_CRITICAL: 252 case BOOKE_IRQPRIO_CRITICAL:
356 case BOOKE_IRQPRIO_DBELL_CRIT: 253 case BOOKE_IRQPRIO_WATCHDOG:
357 allowed = vcpu->arch.shared->msr & MSR_CE; 254 allowed = vcpu->arch.shared->msr & MSR_CE;
358 allowed = allowed && !crit;
359 msr_mask = MSR_ME; 255 msr_mask = MSR_ME;
360 int_class = INT_CLASS_CRIT;
361 break; 256 break;
362 case BOOKE_IRQPRIO_MACHINE_CHECK: 257 case BOOKE_IRQPRIO_MACHINE_CHECK:
363 allowed = vcpu->arch.shared->msr & MSR_ME; 258 allowed = vcpu->arch.shared->msr & MSR_ME;
364 allowed = allowed && !crit; 259 msr_mask = 0;
365 int_class = INT_CLASS_MC;
366 break; 260 break;
261 case BOOKE_IRQPRIO_EXTERNAL:
367 case BOOKE_IRQPRIO_DECREMENTER: 262 case BOOKE_IRQPRIO_DECREMENTER:
368 case BOOKE_IRQPRIO_FIT: 263 case BOOKE_IRQPRIO_FIT:
369 keep_irq = true;
370 /* fall through */
371 case BOOKE_IRQPRIO_EXTERNAL:
372 case BOOKE_IRQPRIO_DBELL:
373 allowed = vcpu->arch.shared->msr & MSR_EE; 264 allowed = vcpu->arch.shared->msr & MSR_EE;
374 allowed = allowed && !crit; 265 allowed = allowed && !crit;
375 msr_mask = MSR_CE | MSR_ME | MSR_DE; 266 msr_mask = MSR_CE|MSR_ME|MSR_DE;
376 int_class = INT_CLASS_NONCRIT;
377 break; 267 break;
378 case BOOKE_IRQPRIO_DEBUG: 268 case BOOKE_IRQPRIO_DEBUG:
379 allowed = vcpu->arch.shared->msr & MSR_DE; 269 allowed = vcpu->arch.shared->msr & MSR_DE;
380 allowed = allowed && !crit;
381 msr_mask = MSR_ME; 270 msr_mask = MSR_ME;
382 int_class = INT_CLASS_CRIT;
383 break; 271 break;
384 } 272 }
385 273
386 if (allowed) { 274 if (allowed) {
387 switch (int_class) { 275 vcpu->arch.shared->srr0 = vcpu->arch.pc;
388 case INT_CLASS_NONCRIT: 276 vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
389 set_guest_srr(vcpu, vcpu->arch.pc,
390 vcpu->arch.shared->msr);
391 break;
392 case INT_CLASS_CRIT:
393 set_guest_csrr(vcpu, vcpu->arch.pc,
394 vcpu->arch.shared->msr);
395 break;
396 case INT_CLASS_DBG:
397 set_guest_dsrr(vcpu, vcpu->arch.pc,
398 vcpu->arch.shared->msr);
399 break;
400 case INT_CLASS_MC:
401 set_guest_mcsrr(vcpu, vcpu->arch.pc,
402 vcpu->arch.shared->msr);
403 break;
404 }
405
406 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 277 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
407 if (update_esr == true) 278 if (update_esr == true)
408 set_guest_esr(vcpu, vcpu->arch.queued_esr); 279 vcpu->arch.esr = vcpu->arch.queued_esr;
409 if (update_dear == true) 280 if (update_dear == true)
410 set_guest_dear(vcpu, vcpu->arch.queued_dear); 281 vcpu->arch.shared->dar = vcpu->arch.queued_dear;
411 282 kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
412 new_msr &= msr_mask;
413#if defined(CONFIG_64BIT)
414 if (vcpu->arch.epcr & SPRN_EPCR_ICM)
415 new_msr |= MSR_CM;
416#endif
417 kvmppc_set_msr(vcpu, new_msr);
418 283
419 if (!keep_irq) 284 if (!keep_irq)
420 clear_bit(priority, &vcpu->arch.pending_exceptions); 285 clear_bit(priority, &vcpu->arch.pending_exceptions);
421 } 286 }
422 287
423#ifdef CONFIG_KVM_BOOKE_HV
424 /*
425 * If an interrupt is pending but masked, raise a guest doorbell
426 * so that we are notified when the guest enables the relevant
427 * MSR bit.
428 */
429 if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE)
430 kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT);
431 if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE)
432 kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT);
433 if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK)
434 kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC);
435#endif
436
437 return allowed; 288 return allowed;
438} 289}
439 290
440/* 291/* Check pending exceptions and deliver one, if possible. */
441 * Return the number of jiffies until the next timeout. If the timeout is 292void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
442 * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
443 * because the larger value can break the timer APIs.
444 */
445static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
446{
447 u64 tb, wdt_tb, wdt_ticks = 0;
448 u64 nr_jiffies = 0;
449 u32 period = TCR_GET_WP(vcpu->arch.tcr);
450
451 wdt_tb = 1ULL << (63 - period);
452 tb = get_tb();
453 /*
454 * The watchdog timeout will hapeen when TB bit corresponding
455 * to watchdog will toggle from 0 to 1.
456 */
457 if (tb & wdt_tb)
458 wdt_ticks = wdt_tb;
459
460 wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
461
462 /* Convert timebase ticks to jiffies */
463 nr_jiffies = wdt_ticks;
464
465 if (do_div(nr_jiffies, tb_ticks_per_jiffy))
466 nr_jiffies++;
467
468 return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
469}
470
471static void arm_next_watchdog(struct kvm_vcpu *vcpu)
472{
473 unsigned long nr_jiffies;
474 unsigned long flags;
475
476 /*
477 * If TSR_ENW and TSR_WIS are not set then no need to exit to
478 * userspace, so clear the KVM_REQ_WATCHDOG request.
479 */
480 if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
481 clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
482
483 spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
484 nr_jiffies = watchdog_next_timeout(vcpu);
485 /*
486 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
487 * then do not run the watchdog timer as this can break timer APIs.
488 */
489 if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
490 mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
491 else
492 del_timer(&vcpu->arch.wdt_timer);
493 spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
494}
495
496void kvmppc_watchdog_func(unsigned long data)
497{
498 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
499 u32 tsr, new_tsr;
500 int final;
501
502 do {
503 new_tsr = tsr = vcpu->arch.tsr;
504 final = 0;
505
506 /* Time out event */
507 if (tsr & TSR_ENW) {
508 if (tsr & TSR_WIS)
509 final = 1;
510 else
511 new_tsr = tsr | TSR_WIS;
512 } else {
513 new_tsr = tsr | TSR_ENW;
514 }
515 } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
516
517 if (new_tsr & TSR_WIS) {
518 smp_wmb();
519 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
520 kvm_vcpu_kick(vcpu);
521 }
522
523 /*
524 * If this is final watchdog expiry and some action is required
525 * then exit to userspace.
526 */
527 if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
528 vcpu->arch.watchdog_enabled) {
529 smp_wmb();
530 kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
531 kvm_vcpu_kick(vcpu);
532 }
533
534 /*
535 * Stop running the watchdog timer after final expiration to
536 * prevent the host from being flooded with timers if the
537 * guest sets a short period.
538 * Timers will resume when TSR/TCR is updated next time.
539 */
540 if (!final)
541 arm_next_watchdog(vcpu);
542}
543
544static void update_timer_ints(struct kvm_vcpu *vcpu)
545{
546 if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
547 kvmppc_core_queue_dec(vcpu);
548 else
549 kvmppc_core_dequeue_dec(vcpu);
550
551 if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
552 kvmppc_core_queue_watchdog(vcpu);
553 else
554 kvmppc_core_dequeue_watchdog(vcpu);
555}
556
557static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
558{ 293{
559 unsigned long *pending = &vcpu->arch.pending_exceptions; 294 unsigned long *pending = &vcpu->arch.pending_exceptions;
295 unsigned long old_pending = vcpu->arch.pending_exceptions;
560 unsigned int priority; 296 unsigned int priority;
561 297
562 priority = __ffs(*pending); 298 priority = __ffs(*pending);
563 while (priority < BOOKE_IRQPRIO_MAX) { 299 while (priority <= BOOKE_IRQPRIO_MAX) {
564 if (kvmppc_booke_irqprio_deliver(vcpu, priority)) 300 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
565 break; 301 break;
566 302
@@ -570,216 +306,25 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
570 } 306 }
571 307
572 /* Tell the guest about our interrupt status */ 308 /* Tell the guest about our interrupt status */
573 vcpu->arch.shared->int_pending = !!*pending; 309 if (*pending)
574} 310 vcpu->arch.shared->int_pending = 1;
575 311 else if (old_pending)
576/* Check pending exceptions and deliver one, if possible. */ 312 vcpu->arch.shared->int_pending = 0;
577int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
578{
579 int r = 0;
580 WARN_ON_ONCE(!irqs_disabled());
581
582 kvmppc_core_check_exceptions(vcpu);
583
584 if (vcpu->arch.shared->msr & MSR_WE) {
585 local_irq_enable();
586 kvm_vcpu_block(vcpu);
587 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
588 local_irq_disable();
589
590 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
591 r = 1;
592 };
593
594 return r;
595}
596
597int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
598{
599 int r = 1; /* Indicate we want to get back into the guest */
600
601 if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
602 update_timer_ints(vcpu);
603#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
604 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
605 kvmppc_core_flush_tlb(vcpu);
606#endif
607
608 if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
609 vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
610 r = 0;
611 }
612
613 return r;
614} 313}
615 314
616int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 315int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
617{ 316{
618 int ret, s; 317 int ret;
619#ifdef CONFIG_PPC_FPU
620 unsigned int fpscr;
621 int fpexc_mode;
622 u64 fpr[32];
623#endif
624
625 if (!vcpu->arch.sane) {
626 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
627 return -EINVAL;
628 }
629 318
630 local_irq_disable(); 319 local_irq_disable();
631 s = kvmppc_prepare_to_enter(vcpu);
632 if (s <= 0) {
633 local_irq_enable();
634 ret = s;
635 goto out;
636 }
637 kvmppc_lazy_ee_enable();
638
639 kvm_guest_enter(); 320 kvm_guest_enter();
640
641#ifdef CONFIG_PPC_FPU
642 /* Save userspace FPU state in stack */
643 enable_kernel_fp();
644 memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
645 fpscr = current->thread.fpscr.val;
646 fpexc_mode = current->thread.fpexc_mode;
647
648 /* Restore guest FPU state to thread */
649 memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
650 current->thread.fpscr.val = vcpu->arch.fpscr;
651
652 /*
653 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
654 * as always using the FPU. Kernel usage of FP (via
655 * enable_kernel_fp()) in this thread must not occur while
656 * vcpu->fpu_active is set.
657 */
658 vcpu->fpu_active = 1;
659
660 kvmppc_load_guest_fp(vcpu);
661#endif
662
663 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 321 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
322 kvm_guest_exit();
323 local_irq_enable();
664 324
665 /* No need for kvm_guest_exit. It's done in handle_exit.
666 We also get here with interrupts enabled. */
667
668#ifdef CONFIG_PPC_FPU
669 kvmppc_save_guest_fp(vcpu);
670
671 vcpu->fpu_active = 0;
672
673 /* Save guest FPU state from thread */
674 memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
675 vcpu->arch.fpscr = current->thread.fpscr.val;
676
677 /* Restore userspace FPU state from stack */
678 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
679 current->thread.fpscr.val = fpscr;
680 current->thread.fpexc_mode = fpexc_mode;
681#endif
682
683out:
684 vcpu->mode = OUTSIDE_GUEST_MODE;
685 return ret; 325 return ret;
686} 326}
687 327
688static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
689{
690 enum emulation_result er;
691
692 er = kvmppc_emulate_instruction(run, vcpu);
693 switch (er) {
694 case EMULATE_DONE:
695 /* don't overwrite subtypes, just account kvm_stats */
696 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
697 /* Future optimization: only reload non-volatiles if
698 * they were actually modified by emulation. */
699 return RESUME_GUEST_NV;
700
701 case EMULATE_DO_DCR:
702 run->exit_reason = KVM_EXIT_DCR;
703 return RESUME_HOST;
704
705 case EMULATE_FAIL:
706 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
707 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
708 /* For debugging, encode the failing instruction and
709 * report it to userspace. */
710 run->hw.hardware_exit_reason = ~0ULL << 32;
711 run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
712 kvmppc_core_queue_program(vcpu, ESR_PIL);
713 return RESUME_HOST;
714
715 default:
716 BUG();
717 }
718}
719
720static void kvmppc_fill_pt_regs(struct pt_regs *regs)
721{
722 ulong r1, ip, msr, lr;
723
724 asm("mr %0, 1" : "=r"(r1));
725 asm("mflr %0" : "=r"(lr));
726 asm("mfmsr %0" : "=r"(msr));
727 asm("bl 1f; 1: mflr %0" : "=r"(ip));
728
729 memset(regs, 0, sizeof(*regs));
730 regs->gpr[1] = r1;
731 regs->nip = ip;
732 regs->msr = msr;
733 regs->link = lr;
734}
735
736/*
737 * For interrupts needed to be handled by host interrupt handlers,
738 * corresponding host handler are called from here in similar way
739 * (but not exact) as they are called from low level handler
740 * (such as from arch/powerpc/kernel/head_fsl_booke.S).
741 */
742static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
743 unsigned int exit_nr)
744{
745 struct pt_regs regs;
746
747 switch (exit_nr) {
748 case BOOKE_INTERRUPT_EXTERNAL:
749 kvmppc_fill_pt_regs(&regs);
750 do_IRQ(&regs);
751 break;
752 case BOOKE_INTERRUPT_DECREMENTER:
753 kvmppc_fill_pt_regs(&regs);
754 timer_interrupt(&regs);
755 break;
756#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
757 case BOOKE_INTERRUPT_DOORBELL:
758 kvmppc_fill_pt_regs(&regs);
759 doorbell_exception(&regs);
760 break;
761#endif
762 case BOOKE_INTERRUPT_MACHINE_CHECK:
763 /* FIXME */
764 break;
765 case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
766 kvmppc_fill_pt_regs(&regs);
767 performance_monitor_exception(&regs);
768 break;
769 case BOOKE_INTERRUPT_WATCHDOG:
770 kvmppc_fill_pt_regs(&regs);
771#ifdef CONFIG_BOOKE_WDT
772 WatchdogException(&regs);
773#else
774 unknown_exception(&regs);
775#endif
776 break;
777 case BOOKE_INTERRUPT_CRITICAL:
778 unknown_exception(&regs);
779 break;
780 }
781}
782
783/** 328/**
784 * kvmppc_handle_exit 329 * kvmppc_handle_exit
785 * 330 *
@@ -788,20 +333,14 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
788int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 333int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
789 unsigned int exit_nr) 334 unsigned int exit_nr)
790{ 335{
336 enum emulation_result er;
791 int r = RESUME_HOST; 337 int r = RESUME_HOST;
792 int s;
793 338
794 /* update before a new last_exit_type is rewritten */ 339 /* update before a new last_exit_type is rewritten */
795 kvmppc_update_timing_stats(vcpu); 340 kvmppc_update_timing_stats(vcpu);
796 341
797 /* restart interrupts if they were meant for the host */
798 kvmppc_restart_interrupt(vcpu, exit_nr);
799
800 local_irq_enable(); 342 local_irq_enable();
801 343
802 trace_kvm_exit(exit_nr, vcpu);
803 kvm_guest_exit();
804
805 run->exit_reason = KVM_EXIT_UNKNOWN; 344 run->exit_reason = KVM_EXIT_UNKNOWN;
806 run->ready_for_interrupt_injection = 1; 345 run->ready_for_interrupt_injection = 1;
807 346
@@ -809,78 +348,62 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
809 case BOOKE_INTERRUPT_MACHINE_CHECK: 348 case BOOKE_INTERRUPT_MACHINE_CHECK:
810 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); 349 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
811 kvmppc_dump_vcpu(vcpu); 350 kvmppc_dump_vcpu(vcpu);
812 /* For debugging, send invalid exit reason to user space */
813 run->hw.hardware_exit_reason = ~1ULL << 32;
814 run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR);
815 r = RESUME_HOST; 351 r = RESUME_HOST;
816 break; 352 break;
817 353
818 case BOOKE_INTERRUPT_EXTERNAL: 354 case BOOKE_INTERRUPT_EXTERNAL:
819 kvmppc_account_exit(vcpu, EXT_INTR_EXITS); 355 kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
356 if (need_resched())
357 cond_resched();
820 r = RESUME_GUEST; 358 r = RESUME_GUEST;
821 break; 359 break;
822 360
823 case BOOKE_INTERRUPT_DECREMENTER: 361 case BOOKE_INTERRUPT_DECREMENTER:
362 /* Since we switched IVPR back to the host's value, the host
363 * handled this interrupt the moment we enabled interrupts.
364 * Now we just offer it a chance to reschedule the guest. */
824 kvmppc_account_exit(vcpu, DEC_EXITS); 365 kvmppc_account_exit(vcpu, DEC_EXITS);
366 if (need_resched())
367 cond_resched();
825 r = RESUME_GUEST; 368 r = RESUME_GUEST;
826 break; 369 break;
827 370
828 case BOOKE_INTERRUPT_WATCHDOG:
829 r = RESUME_GUEST;
830 break;
831
832 case BOOKE_INTERRUPT_DOORBELL:
833 kvmppc_account_exit(vcpu, DBELL_EXITS);
834 r = RESUME_GUEST;
835 break;
836
837 case BOOKE_INTERRUPT_GUEST_DBELL_CRIT:
838 kvmppc_account_exit(vcpu, GDBELL_EXITS);
839
840 /*
841 * We are here because there is a pending guest interrupt
842 * which could not be delivered as MSR_CE or MSR_ME was not
843 * set. Once we break from here we will retry delivery.
844 */
845 r = RESUME_GUEST;
846 break;
847
848 case BOOKE_INTERRUPT_GUEST_DBELL:
849 kvmppc_account_exit(vcpu, GDBELL_EXITS);
850
851 /*
852 * We are here because there is a pending guest interrupt
853 * which could not be delivered as MSR_EE was not set. Once
854 * we break from here we will retry delivery.
855 */
856 r = RESUME_GUEST;
857 break;
858
859 case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
860 r = RESUME_GUEST;
861 break;
862
863 case BOOKE_INTERRUPT_HV_PRIV:
864 r = emulation_exit(run, vcpu);
865 break;
866
867 case BOOKE_INTERRUPT_PROGRAM: 371 case BOOKE_INTERRUPT_PROGRAM:
868 if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { 372 if (vcpu->arch.shared->msr & MSR_PR) {
869 /* 373 /* Program traps generated by user-level software must be handled
870 * Program traps generated by user-level software must 374 * by the guest kernel. */
871 * be handled by the guest kernel.
872 *
873 * In GS mode, hypervisor privileged instructions trap
874 * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are
875 * actual program interrupts, handled by the guest.
876 */
877 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); 375 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
878 r = RESUME_GUEST; 376 r = RESUME_GUEST;
879 kvmppc_account_exit(vcpu, USR_PR_INST); 377 kvmppc_account_exit(vcpu, USR_PR_INST);
880 break; 378 break;
881 } 379 }
882 380
883 r = emulation_exit(run, vcpu); 381 er = kvmppc_emulate_instruction(run, vcpu);
382 switch (er) {
383 case EMULATE_DONE:
384 /* don't overwrite subtypes, just account kvm_stats */
385 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
386 /* Future optimization: only reload non-volatiles if
387 * they were actually modified by emulation. */
388 r = RESUME_GUEST_NV;
389 break;
390 case EMULATE_DO_DCR:
391 run->exit_reason = KVM_EXIT_DCR;
392 r = RESUME_HOST;
393 break;
394 case EMULATE_FAIL:
395 /* XXX Deliver Program interrupt to guest. */
396 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
397 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
398 /* For debugging, encode the failing instruction and
399 * report it to userspace. */
400 run->hw.hardware_exit_reason = ~0ULL << 32;
401 run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
402 r = RESUME_HOST;
403 break;
404 default:
405 BUG();
406 }
884 break; 407 break;
885 408
886 case BOOKE_INTERRUPT_FP_UNAVAIL: 409 case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -945,21 +468,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
945 r = RESUME_GUEST; 468 r = RESUME_GUEST;
946 break; 469 break;
947 470
948#ifdef CONFIG_KVM_BOOKE_HV
949 case BOOKE_INTERRUPT_HV_SYSCALL:
950 if (!(vcpu->arch.shared->msr & MSR_PR)) {
951 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
952 } else {
953 /*
954 * hcall from guest userspace -- send privileged
955 * instruction program check.
956 */
957 kvmppc_core_queue_program(vcpu, ESR_PPR);
958 }
959
960 r = RESUME_GUEST;
961 break;
962#else
963 case BOOKE_INTERRUPT_SYSCALL: 471 case BOOKE_INTERRUPT_SYSCALL:
964 if (!(vcpu->arch.shared->msr & MSR_PR) && 472 if (!(vcpu->arch.shared->msr & MSR_PR) &&
965 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { 473 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
@@ -973,7 +481,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
973 kvmppc_account_exit(vcpu, SYSCALL_EXITS); 481 kvmppc_account_exit(vcpu, SYSCALL_EXITS);
974 r = RESUME_GUEST; 482 r = RESUME_GUEST;
975 break; 483 break;
976#endif
977 484
978 case BOOKE_INTERRUPT_DTLB_MISS: { 485 case BOOKE_INTERRUPT_DTLB_MISS: {
979 unsigned long eaddr = vcpu->arch.fault_dear; 486 unsigned long eaddr = vcpu->arch.fault_dear;
@@ -981,7 +488,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
981 gpa_t gpaddr; 488 gpa_t gpaddr;
982 gfn_t gfn; 489 gfn_t gfn;
983 490
984#ifdef CONFIG_KVM_E500V2 491#ifdef CONFIG_KVM_E500
985 if (!(vcpu->arch.shared->msr & MSR_PR) && 492 if (!(vcpu->arch.shared->msr & MSR_PR) &&
986 (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { 493 (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
987 kvmppc_map_magic(vcpu); 494 kvmppc_map_magic(vcpu);
@@ -1022,7 +529,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1022 /* Guest has mapped and accessed a page which is not 529 /* Guest has mapped and accessed a page which is not
1023 * actually RAM. */ 530 * actually RAM. */
1024 vcpu->arch.paddr_accessed = gpaddr; 531 vcpu->arch.paddr_accessed = gpaddr;
1025 vcpu->arch.vaddr_accessed = eaddr;
1026 r = kvmppc_emulate_mmio(run, vcpu); 532 r = kvmppc_emulate_mmio(run, vcpu);
1027 kvmppc_account_exit(vcpu, MMIO_EXITS); 533 kvmppc_account_exit(vcpu, MMIO_EXITS);
1028 } 534 }
@@ -1090,18 +596,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1090 BUG(); 596 BUG();
1091 } 597 }
1092 598
1093 /* 599 local_irq_disable();
1094 * To avoid clobbering exit_reason, only check for signals if we 600
1095 * aren't already exiting to userspace for some other reason. 601 kvmppc_core_deliver_interrupts(vcpu);
1096 */ 602
1097 if (!(r & RESUME_HOST)) { 603 if (!(r & RESUME_HOST)) {
1098 local_irq_disable(); 604 /* To avoid clobbering exit_reason, only check for signals if
1099 s = kvmppc_prepare_to_enter(vcpu); 605 * we aren't already exiting to userspace for some other
1100 if (s <= 0) { 606 * reason. */
1101 local_irq_enable(); 607 if (signal_pending(current)) {
1102 r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 608 run->exit_reason = KVM_EXIT_INTR;
1103 } else { 609 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
1104 kvmppc_lazy_ee_enable(); 610 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
1105 } 611 }
1106 } 612 }
1107 613
@@ -1112,18 +618,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1112int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 618int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1113{ 619{
1114 int i; 620 int i;
1115 int r;
1116 621
1117 vcpu->arch.pc = 0; 622 vcpu->arch.pc = 0;
1118 vcpu->arch.shared->pir = vcpu->vcpu_id; 623 vcpu->arch.shared->msr = 0;
624 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
1119 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ 625 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
1120 kvmppc_set_msr(vcpu, 0);
1121 626
1122#ifndef CONFIG_KVM_BOOKE_HV
1123 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
1124 vcpu->arch.shadow_pid = 1; 627 vcpu->arch.shadow_pid = 1;
1125 vcpu->arch.shared->msr = 0;
1126#endif
1127 628
1128 /* Eye-catching numbers so we know if the guest takes an interrupt 629 /* Eye-catching numbers so we know if the guest takes an interrupt
1129 * before it's programmed its own IVPR/IVORs. */ 630 * before it's programmed its own IVPR/IVORs. */
@@ -1133,24 +634,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1133 634
1134 kvmppc_init_timing_stats(vcpu); 635 kvmppc_init_timing_stats(vcpu);
1135 636
1136 r = kvmppc_core_vcpu_setup(vcpu); 637 return kvmppc_core_vcpu_setup(vcpu);
1137 kvmppc_sanity_check(vcpu);
1138 return r;
1139}
1140
1141int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
1142{
1143 /* setup watchdog timer once */
1144 spin_lock_init(&vcpu->arch.wdt_lock);
1145 setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
1146 (unsigned long)vcpu);
1147
1148 return 0;
1149}
1150
1151void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
1152{
1153 del_timer_sync(&vcpu->arch.wdt_timer);
1154} 638}
1155 639
1156int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 640int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -1170,10 +654,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1170 regs->sprg1 = vcpu->arch.shared->sprg1; 654 regs->sprg1 = vcpu->arch.shared->sprg1;
1171 regs->sprg2 = vcpu->arch.shared->sprg2; 655 regs->sprg2 = vcpu->arch.shared->sprg2;
1172 regs->sprg3 = vcpu->arch.shared->sprg3; 656 regs->sprg3 = vcpu->arch.shared->sprg3;
1173 regs->sprg4 = vcpu->arch.shared->sprg4; 657 regs->sprg4 = vcpu->arch.sprg4;
1174 regs->sprg5 = vcpu->arch.shared->sprg5; 658 regs->sprg5 = vcpu->arch.sprg5;
1175 regs->sprg6 = vcpu->arch.shared->sprg6; 659 regs->sprg6 = vcpu->arch.sprg6;
1176 regs->sprg7 = vcpu->arch.shared->sprg7; 660 regs->sprg7 = vcpu->arch.sprg7;
1177 661
1178 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 662 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
1179 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 663 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -1198,10 +682,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1198 vcpu->arch.shared->sprg1 = regs->sprg1; 682 vcpu->arch.shared->sprg1 = regs->sprg1;
1199 vcpu->arch.shared->sprg2 = regs->sprg2; 683 vcpu->arch.shared->sprg2 = regs->sprg2;
1200 vcpu->arch.shared->sprg3 = regs->sprg3; 684 vcpu->arch.shared->sprg3 = regs->sprg3;
1201 vcpu->arch.shared->sprg4 = regs->sprg4; 685 vcpu->arch.sprg4 = regs->sprg4;
1202 vcpu->arch.shared->sprg5 = regs->sprg5; 686 vcpu->arch.sprg5 = regs->sprg5;
1203 vcpu->arch.shared->sprg6 = regs->sprg6; 687 vcpu->arch.sprg6 = regs->sprg6;
1204 vcpu->arch.shared->sprg7 = regs->sprg7; 688 vcpu->arch.sprg7 = regs->sprg7;
1205 689
1206 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 690 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
1207 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 691 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -1219,8 +703,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
1219 sregs->u.e.csrr0 = vcpu->arch.csrr0; 703 sregs->u.e.csrr0 = vcpu->arch.csrr0;
1220 sregs->u.e.csrr1 = vcpu->arch.csrr1; 704 sregs->u.e.csrr1 = vcpu->arch.csrr1;
1221 sregs->u.e.mcsr = vcpu->arch.mcsr; 705 sregs->u.e.mcsr = vcpu->arch.mcsr;
1222 sregs->u.e.esr = get_guest_esr(vcpu); 706 sregs->u.e.esr = vcpu->arch.esr;
1223 sregs->u.e.dear = get_guest_dear(vcpu); 707 sregs->u.e.dear = vcpu->arch.shared->dar;
1224 sregs->u.e.tsr = vcpu->arch.tsr; 708 sregs->u.e.tsr = vcpu->arch.tsr;
1225 sregs->u.e.tcr = vcpu->arch.tcr; 709 sregs->u.e.tcr = vcpu->arch.tcr;
1226 sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); 710 sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
@@ -1237,25 +721,28 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
1237 vcpu->arch.csrr0 = sregs->u.e.csrr0; 721 vcpu->arch.csrr0 = sregs->u.e.csrr0;
1238 vcpu->arch.csrr1 = sregs->u.e.csrr1; 722 vcpu->arch.csrr1 = sregs->u.e.csrr1;
1239 vcpu->arch.mcsr = sregs->u.e.mcsr; 723 vcpu->arch.mcsr = sregs->u.e.mcsr;
1240 set_guest_esr(vcpu, sregs->u.e.esr); 724 vcpu->arch.esr = sregs->u.e.esr;
1241 set_guest_dear(vcpu, sregs->u.e.dear); 725 vcpu->arch.shared->dar = sregs->u.e.dear;
1242 vcpu->arch.vrsave = sregs->u.e.vrsave; 726 vcpu->arch.vrsave = sregs->u.e.vrsave;
1243 kvmppc_set_tcr(vcpu, sregs->u.e.tcr); 727 vcpu->arch.tcr = sregs->u.e.tcr;
1244 728
1245 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) { 729 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
1246 vcpu->arch.dec = sregs->u.e.dec; 730 vcpu->arch.dec = sregs->u.e.dec;
1247 kvmppc_emulate_dec(vcpu);
1248 }
1249 731
1250 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { 732 kvmppc_emulate_dec(vcpu);
1251 u32 old_tsr = vcpu->arch.tsr;
1252 733
1253 vcpu->arch.tsr = sregs->u.e.tsr; 734 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
1254 735 /*
1255 if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) 736 * FIXME: existing KVM timer handling is incomplete.
1256 arm_next_watchdog(vcpu); 737 * TSR cannot be read by the guest, and its value in
738 * vcpu->arch is always zero. For now, just handle
739 * the case where the caller is trying to inject a
740 * decrementer interrupt.
741 */
1257 742
1258 update_timer_ints(vcpu); 743 if ((sregs->u.e.tsr & TSR_DIS) &&
744 (vcpu->arch.tcr & TCR_DIE))
745 kvmppc_core_queue_dec(vcpu);
1259 } 746 }
1260 747
1261 return 0; 748 return 0;
@@ -1266,7 +753,7 @@ static void get_sregs_arch206(struct kvm_vcpu *vcpu,
1266{ 753{
1267 sregs->u.e.features |= KVM_SREGS_E_ARCH206; 754 sregs->u.e.features |= KVM_SREGS_E_ARCH206;
1268 755
1269 sregs->u.e.pir = vcpu->vcpu_id; 756 sregs->u.e.pir = 0;
1270 sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; 757 sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
1271 sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; 758 sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
1272 sregs->u.e.decar = vcpu->arch.decar; 759 sregs->u.e.decar = vcpu->arch.decar;
@@ -1279,7 +766,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
1279 if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) 766 if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
1280 return 0; 767 return 0;
1281 768
1282 if (sregs->u.e.pir != vcpu->vcpu_id) 769 if (sregs->u.e.pir != 0)
1283 return -EINVAL; 770 return -EINVAL;
1284 771
1285 vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; 772 vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
@@ -1367,74 +854,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1367 return kvmppc_core_set_sregs(vcpu, sregs); 854 return kvmppc_core_set_sregs(vcpu, sregs);
1368} 855}
1369 856
1370int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1371{
1372 int r = -EINVAL;
1373
1374 switch (reg->id) {
1375 case KVM_REG_PPC_IAC1:
1376 case KVM_REG_PPC_IAC2:
1377 case KVM_REG_PPC_IAC3:
1378 case KVM_REG_PPC_IAC4: {
1379 int iac = reg->id - KVM_REG_PPC_IAC1;
1380 r = copy_to_user((u64 __user *)(long)reg->addr,
1381 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
1382 break;
1383 }
1384 case KVM_REG_PPC_DAC1:
1385 case KVM_REG_PPC_DAC2: {
1386 int dac = reg->id - KVM_REG_PPC_DAC1;
1387 r = copy_to_user((u64 __user *)(long)reg->addr,
1388 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
1389 break;
1390 }
1391#if defined(CONFIG_64BIT)
1392 case KVM_REG_PPC_EPCR:
1393 r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
1394 break;
1395#endif
1396 default:
1397 break;
1398 }
1399 return r;
1400}
1401
1402int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1403{
1404 int r = -EINVAL;
1405
1406 switch (reg->id) {
1407 case KVM_REG_PPC_IAC1:
1408 case KVM_REG_PPC_IAC2:
1409 case KVM_REG_PPC_IAC3:
1410 case KVM_REG_PPC_IAC4: {
1411 int iac = reg->id - KVM_REG_PPC_IAC1;
1412 r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
1413 (u64 __user *)(long)reg->addr, sizeof(u64));
1414 break;
1415 }
1416 case KVM_REG_PPC_DAC1:
1417 case KVM_REG_PPC_DAC2: {
1418 int dac = reg->id - KVM_REG_PPC_DAC1;
1419 r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
1420 (u64 __user *)(long)reg->addr, sizeof(u64));
1421 break;
1422 }
1423#if defined(CONFIG_64BIT)
1424 case KVM_REG_PPC_EPCR: {
1425 u32 new_epcr;
1426 r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
1427 if (r == 0)
1428 kvmppc_set_epcr(vcpu, new_epcr);
1429 break;
1430 }
1431#endif
1432 default:
1433 break;
1434 }
1435 return r;
1436}
1437
1438int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 857int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1439{ 858{
1440 return -ENOTSUPP; 859 return -ENOTSUPP;
@@ -1459,102 +878,28 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1459 return -ENOTSUPP; 878 return -ENOTSUPP;
1460} 879}
1461 880
1462void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1463 struct kvm_memory_slot *dont)
1464{
1465}
1466
1467int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1468 unsigned long npages)
1469{
1470 return 0;
1471}
1472
1473int kvmppc_core_prepare_memory_region(struct kvm *kvm, 881int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1474 struct kvm_memory_slot *memslot,
1475 struct kvm_userspace_memory_region *mem) 882 struct kvm_userspace_memory_region *mem)
1476{ 883{
1477 return 0; 884 return 0;
1478} 885}
1479 886
1480void kvmppc_core_commit_memory_region(struct kvm *kvm, 887void kvmppc_core_commit_memory_region(struct kvm *kvm,
1481 struct kvm_userspace_memory_region *mem, 888 struct kvm_userspace_memory_region *mem)
1482 struct kvm_memory_slot old)
1483{
1484}
1485
1486void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
1487{
1488}
1489
1490void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
1491{
1492#if defined(CONFIG_64BIT)
1493 vcpu->arch.epcr = new_epcr;
1494#ifdef CONFIG_KVM_BOOKE_HV
1495 vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
1496 if (vcpu->arch.epcr & SPRN_EPCR_ICM)
1497 vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
1498#endif
1499#endif
1500}
1501
1502void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
1503{
1504 vcpu->arch.tcr = new_tcr;
1505 arm_next_watchdog(vcpu);
1506 update_timer_ints(vcpu);
1507}
1508
1509void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
1510{
1511 set_bits(tsr_bits, &vcpu->arch.tsr);
1512 smp_wmb();
1513 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1514 kvm_vcpu_kick(vcpu);
1515}
1516
1517void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
1518{
1519 clear_bits(tsr_bits, &vcpu->arch.tsr);
1520
1521 /*
1522 * We may have stopped the watchdog due to
1523 * being stuck on final expiration.
1524 */
1525 if (tsr_bits & (TSR_ENW | TSR_WIS))
1526 arm_next_watchdog(vcpu);
1527
1528 update_timer_ints(vcpu);
1529}
1530
1531void kvmppc_decrementer_func(unsigned long data)
1532{ 889{
1533 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
1534
1535 if (vcpu->arch.tcr & TCR_ARE) {
1536 vcpu->arch.dec = vcpu->arch.decar;
1537 kvmppc_emulate_dec(vcpu);
1538 }
1539
1540 kvmppc_set_tsr_bits(vcpu, TSR_DIS);
1541} 890}
1542 891
1543void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 892int kvmppc_core_init_vm(struct kvm *kvm)
1544{ 893{
1545 vcpu->cpu = smp_processor_id(); 894 return 0;
1546 current->thread.kvm_vcpu = vcpu;
1547} 895}
1548 896
1549void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) 897void kvmppc_core_destroy_vm(struct kvm *kvm)
1550{ 898{
1551 current->thread.kvm_vcpu = NULL;
1552 vcpu->cpu = -1;
1553} 899}
1554 900
1555int __init kvmppc_booke_init(void) 901int __init kvmppc_booke_init(void)
1556{ 902{
1557#ifndef CONFIG_KVM_BOOKE_HV
1558 unsigned long ivor[16]; 903 unsigned long ivor[16];
1559 unsigned long max_ivor = 0; 904 unsigned long max_ivor = 0;
1560 int i; 905 int i;
@@ -1597,7 +942,7 @@ int __init kvmppc_booke_init(void)
1597 } 942 }
1598 flush_icache_range(kvmppc_booke_handlers, 943 flush_icache_range(kvmppc_booke_handlers,
1599 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); 944 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
1600#endif /* !BOOKE_HV */ 945
1601 return 0; 946 return 0;
1602} 947}
1603 948
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index e9b88e433f6..8e1fe33d64e 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -23,7 +23,6 @@
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25#include <asm/kvm_ppc.h> 25#include <asm/kvm_ppc.h>
26#include <asm/switch_to.h>
27#include "timing.h" 26#include "timing.h"
28 27
29/* interrupt priortity ordering */ 28/* interrupt priortity ordering */
@@ -49,35 +48,17 @@
49#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 48#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
50/* Internal pseudo-irqprio for level triggered externals */ 49/* Internal pseudo-irqprio for level triggered externals */
51#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 50#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
52#define BOOKE_IRQPRIO_DBELL 21 51#define BOOKE_IRQPRIO_MAX 20
53#define BOOKE_IRQPRIO_DBELL_CRIT 22
54#define BOOKE_IRQPRIO_MAX 23
55
56#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \
57 (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \
58 (1 << BOOKE_IRQPRIO_DBELL) | \
59 (1 << BOOKE_IRQPRIO_DECREMENTER) | \
60 (1 << BOOKE_IRQPRIO_FIT) | \
61 (1 << BOOKE_IRQPRIO_EXTERNAL))
62
63#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \
64 (1 << BOOKE_IRQPRIO_WATCHDOG) | \
65 (1 << BOOKE_IRQPRIO_CRITICAL))
66 52
67extern unsigned long kvmppc_booke_handlers; 53extern unsigned long kvmppc_booke_handlers;
68 54
69void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); 55void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
70void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); 56void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
71 57
72void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
73void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
74void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
75void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
76
77int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 58int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
78 unsigned int inst, int *advance); 59 unsigned int inst, int *advance);
79int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); 60int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
80int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); 61int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
81 62
82/* low-level asm code to transfer guest state */ 63/* low-level asm code to transfer guest state */
83void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu); 64void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
@@ -86,46 +67,4 @@ void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
86/* high-level function, manages flags, host state */ 67/* high-level function, manages flags, host state */
87void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); 68void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
88 69
89void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
90void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu);
91
92enum int_class {
93 INT_CLASS_NONCRIT,
94 INT_CLASS_CRIT,
95 INT_CLASS_MC,
96 INT_CLASS_DBG,
97};
98
99void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
100
101/*
102 * Load up guest vcpu FP state if it's needed.
103 * It also set the MSR_FP in thread so that host know
104 * we're holding FPU, and then host can help to save
105 * guest vcpu FP state if other threads require to use FPU.
106 * This simulates an FP unavailable fault.
107 *
108 * It requires to be called with preemption disabled.
109 */
110static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
111{
112#ifdef CONFIG_PPC_FPU
113 if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
114 load_up_fpu();
115 current->thread.regs->msr |= MSR_FP;
116 }
117#endif
118}
119
120/*
121 * Save guest vcpu FP state into thread.
122 * It requires to be called with preemption disabled.
123 */
124static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
125{
126#ifdef CONFIG_PPC_FPU
127 if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
128 giveup_fpu(current);
129#endif
130}
131#endif /* __KVM_BOOKE_H__ */ 70#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 4685b8cf224..1260f5f24c0 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -13,7 +13,6 @@
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2008 15 * Copyright IBM Corp. 2008
16 * Copyright 2011 Freescale Semiconductor, Inc.
17 * 16 *
18 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
19 */ 18 */
@@ -24,7 +23,6 @@
24#include "booke.h" 23#include "booke.h"
25 24
26#define OP_19_XOP_RFI 50 25#define OP_19_XOP_RFI 50
27#define OP_19_XOP_RFCI 51
28 26
29#define OP_31_XOP_MFMSR 83 27#define OP_31_XOP_MFMSR 83
30#define OP_31_XOP_WRTEE 131 28#define OP_31_XOP_WRTEE 131
@@ -37,18 +35,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
37 kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); 35 kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
38} 36}
39 37
40static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
41{
42 vcpu->arch.pc = vcpu->arch.csrr0;
43 kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
44}
45
46int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 38int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
47 unsigned int inst, int *advance) 39 unsigned int inst, int *advance)
48{ 40{
49 int emulated = EMULATE_DONE; 41 int emulated = EMULATE_DONE;
50 int rs = get_rs(inst); 42 int rs;
51 int rt = get_rt(inst); 43 int rt;
52 44
53 switch (get_op(inst)) { 45 switch (get_op(inst)) {
54 case 19: 46 case 19:
@@ -59,12 +51,6 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
59 *advance = 0; 51 *advance = 0;
60 break; 52 break;
61 53
62 case OP_19_XOP_RFCI:
63 kvmppc_emul_rfci(vcpu);
64 kvmppc_set_exit_type(vcpu, EMULATED_RFCI_EXITS);
65 *advance = 0;
66 break;
67
68 default: 54 default:
69 emulated = EMULATE_FAIL; 55 emulated = EMULATE_FAIL;
70 break; 56 break;
@@ -75,16 +61,19 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
75 switch (get_xop(inst)) { 61 switch (get_xop(inst)) {
76 62
77 case OP_31_XOP_MFMSR: 63 case OP_31_XOP_MFMSR:
64 rt = get_rt(inst);
78 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); 65 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
79 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); 66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
80 break; 67 break;
81 68
82 case OP_31_XOP_MTMSR: 69 case OP_31_XOP_MTMSR:
70 rs = get_rs(inst);
83 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); 71 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
84 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); 72 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
85 break; 73 break;
86 74
87 case OP_31_XOP_WRTEE: 75 case OP_31_XOP_WRTEE:
76 rs = get_rs(inst);
88 vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) 77 vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
89 | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); 78 | (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
90 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); 79 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
@@ -109,79 +98,43 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
109 return emulated; 98 return emulated;
110} 99}
111 100
112/* 101int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
113 * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode).
114 * Their backing store is in real registers, and these functions
115 * will return the wrong result if called for them in another context
116 * (such as debugging).
117 */
118int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
119{ 102{
120 int emulated = EMULATE_DONE; 103 int emulated = EMULATE_DONE;
104 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
121 105
122 switch (sprn) { 106 switch (sprn) {
123 case SPRN_DEAR: 107 case SPRN_DEAR:
124 vcpu->arch.shared->dar = spr_val; 108 vcpu->arch.shared->dar = spr_val; break;
125 break;
126 case SPRN_ESR: 109 case SPRN_ESR:
127 vcpu->arch.shared->esr = spr_val; 110 vcpu->arch.esr = spr_val; break;
128 break;
129 case SPRN_CSRR0:
130 vcpu->arch.csrr0 = spr_val;
131 break;
132 case SPRN_CSRR1:
133 vcpu->arch.csrr1 = spr_val;
134 break;
135 case SPRN_DBCR0: 111 case SPRN_DBCR0:
136 vcpu->arch.dbg_reg.dbcr0 = spr_val; 112 vcpu->arch.dbcr0 = spr_val; break;
137 break;
138 case SPRN_DBCR1: 113 case SPRN_DBCR1:
139 vcpu->arch.dbg_reg.dbcr1 = spr_val; 114 vcpu->arch.dbcr1 = spr_val; break;
140 break;
141 case SPRN_DBSR: 115 case SPRN_DBSR:
142 vcpu->arch.dbsr &= ~spr_val; 116 vcpu->arch.dbsr &= ~spr_val; break;
143 break;
144 case SPRN_TSR: 117 case SPRN_TSR:
145 kvmppc_clr_tsr_bits(vcpu, spr_val); 118 vcpu->arch.tsr &= ~spr_val; break;
146 break;
147 case SPRN_TCR: 119 case SPRN_TCR:
148 /* 120 vcpu->arch.tcr = spr_val;
149 * WRC is a 2-bit field that is supposed to preserve its 121 kvmppc_emulate_dec(vcpu);
150 * value once written to non-zero.
151 */
152 if (vcpu->arch.tcr & TCR_WRC_MASK) {
153 spr_val &= ~TCR_WRC_MASK;
154 spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
155 }
156 kvmppc_set_tcr(vcpu, spr_val);
157 break; 122 break;
158 123
159 case SPRN_DECAR: 124 /* Note: SPRG4-7 are user-readable. These values are
160 vcpu->arch.decar = spr_val; 125 * loaded into the real SPRGs when resuming the
161 break; 126 * guest. */
162 /*
163 * Note: SPRG4-7 are user-readable.
164 * These values are loaded into the real SPRGs when resuming the
165 * guest (PR-mode only).
166 */
167 case SPRN_SPRG4: 127 case SPRN_SPRG4:
168 vcpu->arch.shared->sprg4 = spr_val; 128 vcpu->arch.sprg4 = spr_val; break;
169 break;
170 case SPRN_SPRG5: 129 case SPRN_SPRG5:
171 vcpu->arch.shared->sprg5 = spr_val; 130 vcpu->arch.sprg5 = spr_val; break;
172 break;
173 case SPRN_SPRG6: 131 case SPRN_SPRG6:
174 vcpu->arch.shared->sprg6 = spr_val; 132 vcpu->arch.sprg6 = spr_val; break;
175 break;
176 case SPRN_SPRG7: 133 case SPRN_SPRG7:
177 vcpu->arch.shared->sprg7 = spr_val; 134 vcpu->arch.sprg7 = spr_val; break;
178 break;
179 135
180 case SPRN_IVPR: 136 case SPRN_IVPR:
181 vcpu->arch.ivpr = spr_val; 137 vcpu->arch.ivpr = spr_val;
182#ifdef CONFIG_KVM_BOOKE_HV
183 mtspr(SPRN_GIVPR, spr_val);
184#endif
185 break; 138 break;
186 case SPRN_IVOR0: 139 case SPRN_IVOR0:
187 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; 140 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
@@ -191,9 +144,6 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
191 break; 144 break;
192 case SPRN_IVOR2: 145 case SPRN_IVOR2:
193 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; 146 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
194#ifdef CONFIG_KVM_BOOKE_HV
195 mtspr(SPRN_GIVOR2, spr_val);
196#endif
197 break; 147 break;
198 case SPRN_IVOR3: 148 case SPRN_IVOR3:
199 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; 149 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
@@ -212,9 +162,6 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
212 break; 162 break;
213 case SPRN_IVOR8: 163 case SPRN_IVOR8:
214 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; 164 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
215#ifdef CONFIG_KVM_BOOKE_HV
216 mtspr(SPRN_GIVOR8, spr_val);
217#endif
218 break; 165 break;
219 case SPRN_IVOR9: 166 case SPRN_IVOR9:
220 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; 167 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
@@ -237,17 +184,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
237 case SPRN_IVOR15: 184 case SPRN_IVOR15:
238 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; 185 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
239 break; 186 break;
240 case SPRN_MCSR: 187
241 vcpu->arch.mcsr &= ~spr_val;
242 break;
243#if defined(CONFIG_64BIT)
244 case SPRN_EPCR:
245 kvmppc_set_epcr(vcpu, spr_val);
246#ifdef CONFIG_KVM_BOOKE_HV
247 mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
248#endif
249 break;
250#endif
251 default: 188 default:
252 emulated = EMULATE_FAIL; 189 emulated = EMULATE_FAIL;
253 } 190 }
@@ -255,98 +192,72 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
255 return emulated; 192 return emulated;
256} 193}
257 194
258int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 195int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
259{ 196{
260 int emulated = EMULATE_DONE; 197 int emulated = EMULATE_DONE;
261 198
262 switch (sprn) { 199 switch (sprn) {
263 case SPRN_IVPR: 200 case SPRN_IVPR:
264 *spr_val = vcpu->arch.ivpr; 201 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
265 break;
266 case SPRN_DEAR: 202 case SPRN_DEAR:
267 *spr_val = vcpu->arch.shared->dar; 203 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
268 break;
269 case SPRN_ESR: 204 case SPRN_ESR:
270 *spr_val = vcpu->arch.shared->esr; 205 kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
271 break;
272 case SPRN_CSRR0:
273 *spr_val = vcpu->arch.csrr0;
274 break;
275 case SPRN_CSRR1:
276 *spr_val = vcpu->arch.csrr1;
277 break;
278 case SPRN_DBCR0: 206 case SPRN_DBCR0:
279 *spr_val = vcpu->arch.dbg_reg.dbcr0; 207 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
280 break;
281 case SPRN_DBCR1: 208 case SPRN_DBCR1:
282 *spr_val = vcpu->arch.dbg_reg.dbcr1; 209 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
283 break;
284 case SPRN_DBSR: 210 case SPRN_DBSR:
285 *spr_val = vcpu->arch.dbsr; 211 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
286 break;
287 case SPRN_TSR:
288 *spr_val = vcpu->arch.tsr;
289 break;
290 case SPRN_TCR:
291 *spr_val = vcpu->arch.tcr;
292 break;
293 212
294 case SPRN_IVOR0: 213 case SPRN_IVOR0:
295 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; 214 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
296 break; 215 break;
297 case SPRN_IVOR1: 216 case SPRN_IVOR1:
298 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; 217 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
299 break; 218 break;
300 case SPRN_IVOR2: 219 case SPRN_IVOR2:
301 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; 220 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
302 break; 221 break;
303 case SPRN_IVOR3: 222 case SPRN_IVOR3:
304 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; 223 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
305 break; 224 break;
306 case SPRN_IVOR4: 225 case SPRN_IVOR4:
307 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; 226 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
308 break; 227 break;
309 case SPRN_IVOR5: 228 case SPRN_IVOR5:
310 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; 229 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
311 break; 230 break;
312 case SPRN_IVOR6: 231 case SPRN_IVOR6:
313 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; 232 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
314 break; 233 break;
315 case SPRN_IVOR7: 234 case SPRN_IVOR7:
316 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; 235 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
317 break; 236 break;
318 case SPRN_IVOR8: 237 case SPRN_IVOR8:
319 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; 238 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
320 break; 239 break;
321 case SPRN_IVOR9: 240 case SPRN_IVOR9:
322 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; 241 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
323 break; 242 break;
324 case SPRN_IVOR10: 243 case SPRN_IVOR10:
325 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; 244 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
326 break; 245 break;
327 case SPRN_IVOR11: 246 case SPRN_IVOR11:
328 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; 247 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
329 break; 248 break;
330 case SPRN_IVOR12: 249 case SPRN_IVOR12:
331 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; 250 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
332 break; 251 break;
333 case SPRN_IVOR13: 252 case SPRN_IVOR13:
334 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 253 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
335 break; 254 break;
336 case SPRN_IVOR14: 255 case SPRN_IVOR14:
337 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 256 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
338 break; 257 break;
339 case SPRN_IVOR15: 258 case SPRN_IVOR15:
340 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 259 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
341 break;
342 case SPRN_MCSR:
343 *spr_val = vcpu->arch.mcsr;
344 break;
345#if defined(CONFIG_64BIT)
346 case SPRN_EPCR:
347 *spr_val = vcpu->arch.epcr;
348 break; 260 break;
349#endif
350 261
351 default: 262 default:
352 emulated = EMULATE_FAIL; 263 emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index bb46b32f981..42f2fb1f66e 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -25,6 +25,8 @@
25#include <asm/page.h> 25#include <asm/page.h>
26#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
27 27
28#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
29
28/* The host stack layout: */ 30/* The host stack layout: */
29#define HOST_R1 0 /* Implied by stwu. */ 31#define HOST_R1 0 /* Implied by stwu. */
30#define HOST_CALLEE_LR 4 32#define HOST_CALLEE_LR 4
@@ -32,11 +34,9 @@
32/* r2 is special: it holds 'current', and it made nonvolatile in the 34/* r2 is special: it holds 'current', and it made nonvolatile in the
33 * kernel with the -ffixed-r2 gcc option. */ 35 * kernel with the -ffixed-r2 gcc option. */
34#define HOST_R2 12 36#define HOST_R2 12
35#define HOST_CR 16 37#define HOST_NV_GPRS 16
36#define HOST_NV_GPRS 20 38#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4))
37#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4)) 39#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4)
38#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
39#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
40#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */ 40#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
41#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */ 41#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */
42 42
@@ -52,21 +52,16 @@
52 (1<<BOOKE_INTERRUPT_PROGRAM) | \ 52 (1<<BOOKE_INTERRUPT_PROGRAM) | \
53 (1<<BOOKE_INTERRUPT_DTLB_MISS)) 53 (1<<BOOKE_INTERRUPT_DTLB_MISS))
54 54
55.macro KVM_HANDLER ivor_nr scratch srr0 55.macro KVM_HANDLER ivor_nr
56_GLOBAL(kvmppc_handler_\ivor_nr) 56_GLOBAL(kvmppc_handler_\ivor_nr)
57 /* Get pointer to vcpu and record exit number. */ 57 /* Get pointer to vcpu and record exit number. */
58 mtspr \scratch , r4 58 mtspr SPRN_SPRG_WSCRATCH0, r4
59 mfspr r4, SPRN_SPRG_RVCPU 59 mfspr r4, SPRN_SPRG_RVCPU
60 stw r3, VCPU_GPR(R3)(r4) 60 stw r5, VCPU_GPR(r5)(r4)
61 stw r5, VCPU_GPR(R5)(r4) 61 stw r6, VCPU_GPR(r6)(r4)
62 stw r6, VCPU_GPR(R6)(r4)
63 mfspr r3, \scratch
64 mfctr r5 62 mfctr r5
65 stw r3, VCPU_GPR(R4)(r4)
66 stw r5, VCPU_CTR(r4)
67 mfspr r3, \srr0
68 lis r6, kvmppc_resume_host@h 63 lis r6, kvmppc_resume_host@h
69 stw r3, VCPU_PC(r4) 64 stw r5, VCPU_CTR(r4)
70 li r5, \ivor_nr 65 li r5, \ivor_nr
71 ori r6, r6, kvmppc_resume_host@l 66 ori r6, r6, kvmppc_resume_host@l
72 mtctr r6 67 mtctr r6
@@ -74,40 +69,42 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
74.endm 69.endm
75 70
76_GLOBAL(kvmppc_handlers_start) 71_GLOBAL(kvmppc_handlers_start)
77KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 72KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
78KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0 73KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
79KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0 74KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
80KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0 75KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
81KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 76KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
82KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 77KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
83KVM_HANDLER BOOKE_INTERRUPT_PROGRAM SPRN_SPRG_RSCRATCH0 SPRN_SRR0 78KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
84KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 79KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
85KVM_HANDLER BOOKE_INTERRUPT_SYSCALL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 80KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
86KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 81KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
87KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER SPRN_SPRG_RSCRATCH0 SPRN_SRR0 82KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
88KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 83KVM_HANDLER BOOKE_INTERRUPT_FIT
89KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 84KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
90KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 85KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
91KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 86KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
92KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 87KVM_HANDLER BOOKE_INTERRUPT_DEBUG
93KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 88KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
94KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 89KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
95KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 90KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
96 91
97_GLOBAL(kvmppc_handler_len) 92_GLOBAL(kvmppc_handler_len)
98 .long kvmppc_handler_1 - kvmppc_handler_0 93 .long kvmppc_handler_1 - kvmppc_handler_0
99 94
95
100/* Registers: 96/* Registers:
101 * SPRG_SCRATCH0: guest r4 97 * SPRG_SCRATCH0: guest r4
102 * r4: vcpu pointer 98 * r4: vcpu pointer
103 * r5: KVM exit number 99 * r5: KVM exit number
104 */ 100 */
105_GLOBAL(kvmppc_resume_host) 101_GLOBAL(kvmppc_resume_host)
102 stw r3, VCPU_GPR(r3)(r4)
106 mfcr r3 103 mfcr r3
107 stw r3, VCPU_CR(r4) 104 stw r3, VCPU_CR(r4)
108 stw r7, VCPU_GPR(R7)(r4) 105 stw r7, VCPU_GPR(r7)(r4)
109 stw r8, VCPU_GPR(R8)(r4) 106 stw r8, VCPU_GPR(r8)(r4)
110 stw r9, VCPU_GPR(R9)(r4) 107 stw r9, VCPU_GPR(r9)(r4)
111 108
112 li r6, 1 109 li r6, 1
113 slw r6, r6, r5 110 slw r6, r6, r5
@@ -137,23 +134,23 @@ _GLOBAL(kvmppc_resume_host)
137 isync 134 isync
138 stw r9, VCPU_LAST_INST(r4) 135 stw r9, VCPU_LAST_INST(r4)
139 136
140 stw r15, VCPU_GPR(R15)(r4) 137 stw r15, VCPU_GPR(r15)(r4)
141 stw r16, VCPU_GPR(R16)(r4) 138 stw r16, VCPU_GPR(r16)(r4)
142 stw r17, VCPU_GPR(R17)(r4) 139 stw r17, VCPU_GPR(r17)(r4)
143 stw r18, VCPU_GPR(R18)(r4) 140 stw r18, VCPU_GPR(r18)(r4)
144 stw r19, VCPU_GPR(R19)(r4) 141 stw r19, VCPU_GPR(r19)(r4)
145 stw r20, VCPU_GPR(R20)(r4) 142 stw r20, VCPU_GPR(r20)(r4)
146 stw r21, VCPU_GPR(R21)(r4) 143 stw r21, VCPU_GPR(r21)(r4)
147 stw r22, VCPU_GPR(R22)(r4) 144 stw r22, VCPU_GPR(r22)(r4)
148 stw r23, VCPU_GPR(R23)(r4) 145 stw r23, VCPU_GPR(r23)(r4)
149 stw r24, VCPU_GPR(R24)(r4) 146 stw r24, VCPU_GPR(r24)(r4)
150 stw r25, VCPU_GPR(R25)(r4) 147 stw r25, VCPU_GPR(r25)(r4)
151 stw r26, VCPU_GPR(R26)(r4) 148 stw r26, VCPU_GPR(r26)(r4)
152 stw r27, VCPU_GPR(R27)(r4) 149 stw r27, VCPU_GPR(r27)(r4)
153 stw r28, VCPU_GPR(R28)(r4) 150 stw r28, VCPU_GPR(r28)(r4)
154 stw r29, VCPU_GPR(R29)(r4) 151 stw r29, VCPU_GPR(r29)(r4)
155 stw r30, VCPU_GPR(R30)(r4) 152 stw r30, VCPU_GPR(r30)(r4)
156 stw r31, VCPU_GPR(R31)(r4) 153 stw r31, VCPU_GPR(r31)(r4)
157..skip_inst_copy: 154..skip_inst_copy:
158 155
159 /* Also grab DEAR and ESR before the host can clobber them. */ 156 /* Also grab DEAR and ESR before the host can clobber them. */
@@ -171,18 +168,22 @@ _GLOBAL(kvmppc_resume_host)
171..skip_esr: 168..skip_esr:
172 169
173 /* Save remaining volatile guest register state to vcpu. */ 170 /* Save remaining volatile guest register state to vcpu. */
174 stw r0, VCPU_GPR(R0)(r4) 171 stw r0, VCPU_GPR(r0)(r4)
175 stw r1, VCPU_GPR(R1)(r4) 172 stw r1, VCPU_GPR(r1)(r4)
176 stw r2, VCPU_GPR(R2)(r4) 173 stw r2, VCPU_GPR(r2)(r4)
177 stw r10, VCPU_GPR(R10)(r4) 174 stw r10, VCPU_GPR(r10)(r4)
178 stw r11, VCPU_GPR(R11)(r4) 175 stw r11, VCPU_GPR(r11)(r4)
179 stw r12, VCPU_GPR(R12)(r4) 176 stw r12, VCPU_GPR(r12)(r4)
180 stw r13, VCPU_GPR(R13)(r4) 177 stw r13, VCPU_GPR(r13)(r4)
181 stw r14, VCPU_GPR(R14)(r4) /* We need a NV GPR below. */ 178 stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */
182 mflr r3 179 mflr r3
183 stw r3, VCPU_LR(r4) 180 stw r3, VCPU_LR(r4)
184 mfxer r3 181 mfxer r3
185 stw r3, VCPU_XER(r4) 182 stw r3, VCPU_XER(r4)
183 mfspr r3, SPRN_SPRG_RSCRATCH0
184 stw r3, VCPU_GPR(r4)(r4)
185 mfspr r3, SPRN_SRR0
186 stw r3, VCPU_PC(r4)
186 187
187 /* Restore host stack pointer and PID before IVPR, since the host 188 /* Restore host stack pointer and PID before IVPR, since the host
188 * exception handlers use them. */ 189 * exception handlers use them. */
@@ -212,28 +213,28 @@ _GLOBAL(kvmppc_resume_host)
212 213
213 /* Restore vcpu pointer and the nonvolatiles we used. */ 214 /* Restore vcpu pointer and the nonvolatiles we used. */
214 mr r4, r14 215 mr r4, r14
215 lwz r14, VCPU_GPR(R14)(r4) 216 lwz r14, VCPU_GPR(r14)(r4)
216 217
217 /* Sometimes instruction emulation must restore complete GPR state. */ 218 /* Sometimes instruction emulation must restore complete GPR state. */
218 andi. r5, r3, RESUME_FLAG_NV 219 andi. r5, r3, RESUME_FLAG_NV
219 beq ..skip_nv_load 220 beq ..skip_nv_load
220 lwz r15, VCPU_GPR(R15)(r4) 221 lwz r15, VCPU_GPR(r15)(r4)
221 lwz r16, VCPU_GPR(R16)(r4) 222 lwz r16, VCPU_GPR(r16)(r4)
222 lwz r17, VCPU_GPR(R17)(r4) 223 lwz r17, VCPU_GPR(r17)(r4)
223 lwz r18, VCPU_GPR(R18)(r4) 224 lwz r18, VCPU_GPR(r18)(r4)
224 lwz r19, VCPU_GPR(R19)(r4) 225 lwz r19, VCPU_GPR(r19)(r4)
225 lwz r20, VCPU_GPR(R20)(r4) 226 lwz r20, VCPU_GPR(r20)(r4)
226 lwz r21, VCPU_GPR(R21)(r4) 227 lwz r21, VCPU_GPR(r21)(r4)
227 lwz r22, VCPU_GPR(R22)(r4) 228 lwz r22, VCPU_GPR(r22)(r4)
228 lwz r23, VCPU_GPR(R23)(r4) 229 lwz r23, VCPU_GPR(r23)(r4)
229 lwz r24, VCPU_GPR(R24)(r4) 230 lwz r24, VCPU_GPR(r24)(r4)
230 lwz r25, VCPU_GPR(R25)(r4) 231 lwz r25, VCPU_GPR(r25)(r4)
231 lwz r26, VCPU_GPR(R26)(r4) 232 lwz r26, VCPU_GPR(r26)(r4)
232 lwz r27, VCPU_GPR(R27)(r4) 233 lwz r27, VCPU_GPR(r27)(r4)
233 lwz r28, VCPU_GPR(R28)(r4) 234 lwz r28, VCPU_GPR(r28)(r4)
234 lwz r29, VCPU_GPR(R29)(r4) 235 lwz r29, VCPU_GPR(r29)(r4)
235 lwz r30, VCPU_GPR(R30)(r4) 236 lwz r30, VCPU_GPR(r30)(r4)
236 lwz r31, VCPU_GPR(R31)(r4) 237 lwz r31, VCPU_GPR(r31)(r4)
237..skip_nv_load: 238..skip_nv_load:
238 239
239 /* Should we return to the guest? */ 240 /* Should we return to the guest? */
@@ -255,50 +256,48 @@ heavyweight_exit:
255 256
256 /* We already saved guest volatile register state; now save the 257 /* We already saved guest volatile register state; now save the
257 * non-volatiles. */ 258 * non-volatiles. */
258 stw r15, VCPU_GPR(R15)(r4) 259 stw r15, VCPU_GPR(r15)(r4)
259 stw r16, VCPU_GPR(R16)(r4) 260 stw r16, VCPU_GPR(r16)(r4)
260 stw r17, VCPU_GPR(R17)(r4) 261 stw r17, VCPU_GPR(r17)(r4)
261 stw r18, VCPU_GPR(R18)(r4) 262 stw r18, VCPU_GPR(r18)(r4)
262 stw r19, VCPU_GPR(R19)(r4) 263 stw r19, VCPU_GPR(r19)(r4)
263 stw r20, VCPU_GPR(R20)(r4) 264 stw r20, VCPU_GPR(r20)(r4)
264 stw r21, VCPU_GPR(R21)(r4) 265 stw r21, VCPU_GPR(r21)(r4)
265 stw r22, VCPU_GPR(R22)(r4) 266 stw r22, VCPU_GPR(r22)(r4)
266 stw r23, VCPU_GPR(R23)(r4) 267 stw r23, VCPU_GPR(r23)(r4)
267 stw r24, VCPU_GPR(R24)(r4) 268 stw r24, VCPU_GPR(r24)(r4)
268 stw r25, VCPU_GPR(R25)(r4) 269 stw r25, VCPU_GPR(r25)(r4)
269 stw r26, VCPU_GPR(R26)(r4) 270 stw r26, VCPU_GPR(r26)(r4)
270 stw r27, VCPU_GPR(R27)(r4) 271 stw r27, VCPU_GPR(r27)(r4)
271 stw r28, VCPU_GPR(R28)(r4) 272 stw r28, VCPU_GPR(r28)(r4)
272 stw r29, VCPU_GPR(R29)(r4) 273 stw r29, VCPU_GPR(r29)(r4)
273 stw r30, VCPU_GPR(R30)(r4) 274 stw r30, VCPU_GPR(r30)(r4)
274 stw r31, VCPU_GPR(R31)(r4) 275 stw r31, VCPU_GPR(r31)(r4)
275 276
276 /* Load host non-volatile register state from host stack. */ 277 /* Load host non-volatile register state from host stack. */
277 lwz r14, HOST_NV_GPR(R14)(r1) 278 lwz r14, HOST_NV_GPR(r14)(r1)
278 lwz r15, HOST_NV_GPR(R15)(r1) 279 lwz r15, HOST_NV_GPR(r15)(r1)
279 lwz r16, HOST_NV_GPR(R16)(r1) 280 lwz r16, HOST_NV_GPR(r16)(r1)
280 lwz r17, HOST_NV_GPR(R17)(r1) 281 lwz r17, HOST_NV_GPR(r17)(r1)
281 lwz r18, HOST_NV_GPR(R18)(r1) 282 lwz r18, HOST_NV_GPR(r18)(r1)
282 lwz r19, HOST_NV_GPR(R19)(r1) 283 lwz r19, HOST_NV_GPR(r19)(r1)
283 lwz r20, HOST_NV_GPR(R20)(r1) 284 lwz r20, HOST_NV_GPR(r20)(r1)
284 lwz r21, HOST_NV_GPR(R21)(r1) 285 lwz r21, HOST_NV_GPR(r21)(r1)
285 lwz r22, HOST_NV_GPR(R22)(r1) 286 lwz r22, HOST_NV_GPR(r22)(r1)
286 lwz r23, HOST_NV_GPR(R23)(r1) 287 lwz r23, HOST_NV_GPR(r23)(r1)
287 lwz r24, HOST_NV_GPR(R24)(r1) 288 lwz r24, HOST_NV_GPR(r24)(r1)
288 lwz r25, HOST_NV_GPR(R25)(r1) 289 lwz r25, HOST_NV_GPR(r25)(r1)
289 lwz r26, HOST_NV_GPR(R26)(r1) 290 lwz r26, HOST_NV_GPR(r26)(r1)
290 lwz r27, HOST_NV_GPR(R27)(r1) 291 lwz r27, HOST_NV_GPR(r27)(r1)
291 lwz r28, HOST_NV_GPR(R28)(r1) 292 lwz r28, HOST_NV_GPR(r28)(r1)
292 lwz r29, HOST_NV_GPR(R29)(r1) 293 lwz r29, HOST_NV_GPR(r29)(r1)
293 lwz r30, HOST_NV_GPR(R30)(r1) 294 lwz r30, HOST_NV_GPR(r30)(r1)
294 lwz r31, HOST_NV_GPR(R31)(r1) 295 lwz r31, HOST_NV_GPR(r31)(r1)
295 296
296 /* Return to kvm_vcpu_run(). */ 297 /* Return to kvm_vcpu_run(). */
297 lwz r4, HOST_STACK_LR(r1) 298 lwz r4, HOST_STACK_LR(r1)
298 lwz r5, HOST_CR(r1)
299 addi r1, r1, HOST_STACK_SIZE 299 addi r1, r1, HOST_STACK_SIZE
300 mtlr r4 300 mtlr r4
301 mtcr r5
302 /* r3 still contains the return code from kvmppc_handle_exit(). */ 301 /* r3 still contains the return code from kvmppc_handle_exit(). */
303 blr 302 blr
304 303
@@ -315,48 +314,46 @@ _GLOBAL(__kvmppc_vcpu_run)
315 stw r3, HOST_RUN(r1) 314 stw r3, HOST_RUN(r1)
316 mflr r3 315 mflr r3
317 stw r3, HOST_STACK_LR(r1) 316 stw r3, HOST_STACK_LR(r1)
318 mfcr r5
319 stw r5, HOST_CR(r1)
320 317
321 /* Save host non-volatile register state to stack. */ 318 /* Save host non-volatile register state to stack. */
322 stw r14, HOST_NV_GPR(R14)(r1) 319 stw r14, HOST_NV_GPR(r14)(r1)
323 stw r15, HOST_NV_GPR(R15)(r1) 320 stw r15, HOST_NV_GPR(r15)(r1)
324 stw r16, HOST_NV_GPR(R16)(r1) 321 stw r16, HOST_NV_GPR(r16)(r1)
325 stw r17, HOST_NV_GPR(R17)(r1) 322 stw r17, HOST_NV_GPR(r17)(r1)
326 stw r18, HOST_NV_GPR(R18)(r1) 323 stw r18, HOST_NV_GPR(r18)(r1)
327 stw r19, HOST_NV_GPR(R19)(r1) 324 stw r19, HOST_NV_GPR(r19)(r1)
328 stw r20, HOST_NV_GPR(R20)(r1) 325 stw r20, HOST_NV_GPR(r20)(r1)
329 stw r21, HOST_NV_GPR(R21)(r1) 326 stw r21, HOST_NV_GPR(r21)(r1)
330 stw r22, HOST_NV_GPR(R22)(r1) 327 stw r22, HOST_NV_GPR(r22)(r1)
331 stw r23, HOST_NV_GPR(R23)(r1) 328 stw r23, HOST_NV_GPR(r23)(r1)
332 stw r24, HOST_NV_GPR(R24)(r1) 329 stw r24, HOST_NV_GPR(r24)(r1)
333 stw r25, HOST_NV_GPR(R25)(r1) 330 stw r25, HOST_NV_GPR(r25)(r1)
334 stw r26, HOST_NV_GPR(R26)(r1) 331 stw r26, HOST_NV_GPR(r26)(r1)
335 stw r27, HOST_NV_GPR(R27)(r1) 332 stw r27, HOST_NV_GPR(r27)(r1)
336 stw r28, HOST_NV_GPR(R28)(r1) 333 stw r28, HOST_NV_GPR(r28)(r1)
337 stw r29, HOST_NV_GPR(R29)(r1) 334 stw r29, HOST_NV_GPR(r29)(r1)
338 stw r30, HOST_NV_GPR(R30)(r1) 335 stw r30, HOST_NV_GPR(r30)(r1)
339 stw r31, HOST_NV_GPR(R31)(r1) 336 stw r31, HOST_NV_GPR(r31)(r1)
340 337
341 /* Load guest non-volatiles. */ 338 /* Load guest non-volatiles. */
342 lwz r14, VCPU_GPR(R14)(r4) 339 lwz r14, VCPU_GPR(r14)(r4)
343 lwz r15, VCPU_GPR(R15)(r4) 340 lwz r15, VCPU_GPR(r15)(r4)
344 lwz r16, VCPU_GPR(R16)(r4) 341 lwz r16, VCPU_GPR(r16)(r4)
345 lwz r17, VCPU_GPR(R17)(r4) 342 lwz r17, VCPU_GPR(r17)(r4)
346 lwz r18, VCPU_GPR(R18)(r4) 343 lwz r18, VCPU_GPR(r18)(r4)
347 lwz r19, VCPU_GPR(R19)(r4) 344 lwz r19, VCPU_GPR(r19)(r4)
348 lwz r20, VCPU_GPR(R20)(r4) 345 lwz r20, VCPU_GPR(r20)(r4)
349 lwz r21, VCPU_GPR(R21)(r4) 346 lwz r21, VCPU_GPR(r21)(r4)
350 lwz r22, VCPU_GPR(R22)(r4) 347 lwz r22, VCPU_GPR(r22)(r4)
351 lwz r23, VCPU_GPR(R23)(r4) 348 lwz r23, VCPU_GPR(r23)(r4)
352 lwz r24, VCPU_GPR(R24)(r4) 349 lwz r24, VCPU_GPR(r24)(r4)
353 lwz r25, VCPU_GPR(R25)(r4) 350 lwz r25, VCPU_GPR(r25)(r4)
354 lwz r26, VCPU_GPR(R26)(r4) 351 lwz r26, VCPU_GPR(r26)(r4)
355 lwz r27, VCPU_GPR(R27)(r4) 352 lwz r27, VCPU_GPR(r27)(r4)
356 lwz r28, VCPU_GPR(R28)(r4) 353 lwz r28, VCPU_GPR(r28)(r4)
357 lwz r29, VCPU_GPR(R29)(r4) 354 lwz r29, VCPU_GPR(r29)(r4)
358 lwz r30, VCPU_GPR(R30)(r4) 355 lwz r30, VCPU_GPR(r30)(r4)
359 lwz r31, VCPU_GPR(R31)(r4) 356 lwz r31, VCPU_GPR(r31)(r4)
360 357
361#ifdef CONFIG_SPE 358#ifdef CONFIG_SPE
362 /* save host SPEFSCR and load guest SPEFSCR */ 359 /* save host SPEFSCR and load guest SPEFSCR */
@@ -384,13 +381,13 @@ lightweight_exit:
384#endif 381#endif
385 382
386 /* Load some guest volatiles. */ 383 /* Load some guest volatiles. */
387 lwz r0, VCPU_GPR(R0)(r4) 384 lwz r0, VCPU_GPR(r0)(r4)
388 lwz r2, VCPU_GPR(R2)(r4) 385 lwz r2, VCPU_GPR(r2)(r4)
389 lwz r9, VCPU_GPR(R9)(r4) 386 lwz r9, VCPU_GPR(r9)(r4)
390 lwz r10, VCPU_GPR(R10)(r4) 387 lwz r10, VCPU_GPR(r10)(r4)
391 lwz r11, VCPU_GPR(R11)(r4) 388 lwz r11, VCPU_GPR(r11)(r4)
392 lwz r12, VCPU_GPR(R12)(r4) 389 lwz r12, VCPU_GPR(r12)(r4)
393 lwz r13, VCPU_GPR(R13)(r4) 390 lwz r13, VCPU_GPR(r13)(r4)
394 lwz r3, VCPU_LR(r4) 391 lwz r3, VCPU_LR(r4)
395 mtlr r3 392 mtlr r3
396 lwz r3, VCPU_XER(r4) 393 lwz r3, VCPU_XER(r4)
@@ -405,25 +402,19 @@ lightweight_exit:
405 /* Save vcpu pointer for the exception handlers. */ 402 /* Save vcpu pointer for the exception handlers. */
406 mtspr SPRN_SPRG_WVCPU, r4 403 mtspr SPRN_SPRG_WVCPU, r4
407 404
408 lwz r5, VCPU_SHARED(r4)
409
410 /* Can't switch the stack pointer until after IVPR is switched, 405 /* Can't switch the stack pointer until after IVPR is switched,
411 * because host interrupt handlers would get confused. */ 406 * because host interrupt handlers would get confused. */
412 lwz r1, VCPU_GPR(R1)(r4) 407 lwz r1, VCPU_GPR(r1)(r4)
413 408
414 /* 409 /* Host interrupt handlers may have clobbered these guest-readable
415 * Host interrupt handlers may have clobbered these 410 * SPRGs, so we need to reload them here with the guest's values. */
416 * guest-readable SPRGs, or the guest kernel may have 411 lwz r3, VCPU_SPRG4(r4)
417 * written directly to the shared area, so we
418 * need to reload them here with the guest's values.
419 */
420 PPC_LD(r3, VCPU_SHARED_SPRG4, r5)
421 mtspr SPRN_SPRG4W, r3 412 mtspr SPRN_SPRG4W, r3
422 PPC_LD(r3, VCPU_SHARED_SPRG5, r5) 413 lwz r3, VCPU_SPRG5(r4)
423 mtspr SPRN_SPRG5W, r3 414 mtspr SPRN_SPRG5W, r3
424 PPC_LD(r3, VCPU_SHARED_SPRG6, r5) 415 lwz r3, VCPU_SPRG6(r4)
425 mtspr SPRN_SPRG6W, r3 416 mtspr SPRN_SPRG6W, r3
426 PPC_LD(r3, VCPU_SHARED_SPRG7, r5) 417 lwz r3, VCPU_SPRG7(r4)
427 mtspr SPRN_SPRG7W, r3 418 mtspr SPRN_SPRG7W, r3
428 419
429#ifdef CONFIG_KVM_EXIT_TIMING 420#ifdef CONFIG_KVM_EXIT_TIMING
@@ -447,10 +438,10 @@ lightweight_exit:
447 mtcr r5 438 mtcr r5
448 mtsrr0 r6 439 mtsrr0 r6
449 mtsrr1 r7 440 mtsrr1 r7
450 lwz r5, VCPU_GPR(R5)(r4) 441 lwz r5, VCPU_GPR(r5)(r4)
451 lwz r6, VCPU_GPR(R6)(r4) 442 lwz r6, VCPU_GPR(r6)(r4)
452 lwz r7, VCPU_GPR(R7)(r4) 443 lwz r7, VCPU_GPR(r7)(r4)
453 lwz r8, VCPU_GPR(R8)(r4) 444 lwz r8, VCPU_GPR(r8)(r4)
454 445
455 /* Clear any debug events which occurred since we disabled MSR[DE]. 446 /* Clear any debug events which occurred since we disabled MSR[DE].
456 * XXX This gives us a 3-instruction window in which a breakpoint 447 * XXX This gives us a 3-instruction window in which a breakpoint
@@ -459,8 +450,8 @@ lightweight_exit:
459 ori r3, r3, 0xffff 450 ori r3, r3, 0xffff
460 mtspr SPRN_DBSR, r3 451 mtspr SPRN_DBSR, r3
461 452
462 lwz r3, VCPU_GPR(R3)(r4) 453 lwz r3, VCPU_GPR(r3)(r4)
463 lwz r4, VCPU_GPR(R4)(r4) 454 lwz r4, VCPU_GPR(r4)(r4)
464 rfi 455 rfi
465 456
466#ifdef CONFIG_SPE 457#ifdef CONFIG_SPE
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
deleted file mode 100644
index e8ed7d659c5..00000000000
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ /dev/null
@@ -1,712 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
16 *
17 * Author: Varun Sethi <varun.sethi@freescale.com>
18 * Author: Scott Wood <scotwood@freescale.com>
19 * Author: Mihai Caraman <mihai.caraman@freescale.com>
20 *
21 * This file is derived from arch/powerpc/kvm/booke_interrupts.S
22 */
23
24#include <asm/ppc_asm.h>
25#include <asm/kvm_asm.h>
26#include <asm/reg.h>
27#include <asm/mmu-44x.h>
28#include <asm/page.h>
29#include <asm/asm-compat.h>
30#include <asm/asm-offsets.h>
31#include <asm/bitsperlong.h>
32#include <asm/thread_info.h>
33
34#ifdef CONFIG_64BIT
35#include <asm/exception-64e.h>
36#else
37#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
38#endif
39
40#define LONGBYTES (BITS_PER_LONG / 8)
41
42#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
43
44/* The host stack layout: */
45#define HOST_R1 0 /* Implied by stwu. */
46#define HOST_CALLEE_LR PPC_LR_STKOFF
47#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES)
48/*
49 * r2 is special: it holds 'current', and it made nonvolatile in the
50 * kernel with the -ffixed-r2 gcc option.
51 */
52#define HOST_R2 (HOST_RUN + LONGBYTES)
53#define HOST_CR (HOST_R2 + LONGBYTES)
54#define HOST_NV_GPRS (HOST_CR + LONGBYTES)
55#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
56#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
57#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
58#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
59/* LR in caller stack frame. */
60#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF)
61
62#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
63#define NEED_DEAR 0x00000002 /* save faulting DEAR */
64#define NEED_ESR 0x00000004 /* save faulting ESR */
65
66/*
67 * On entry:
68 * r4 = vcpu, r5 = srr0, r6 = srr1
69 * saved in vcpu: cr, ctr, r3-r13
70 */
71.macro kvm_handler_common intno, srr0, flags
72 /* Restore host stack pointer */
73 PPC_STL r1, VCPU_GPR(R1)(r4)
74 PPC_STL r2, VCPU_GPR(R2)(r4)
75 PPC_LL r1, VCPU_HOST_STACK(r4)
76 PPC_LL r2, HOST_R2(r1)
77
78 mfspr r10, SPRN_PID
79 lwz r8, VCPU_HOST_PID(r4)
80 PPC_LL r11, VCPU_SHARED(r4)
81 PPC_STL r14, VCPU_GPR(R14)(r4) /* We need a non-volatile GPR. */
82 li r14, \intno
83
84 stw r10, VCPU_GUEST_PID(r4)
85 mtspr SPRN_PID, r8
86
87#ifdef CONFIG_KVM_EXIT_TIMING
88 /* save exit time */
891: mfspr r7, SPRN_TBRU
90 mfspr r8, SPRN_TBRL
91 mfspr r9, SPRN_TBRU
92 cmpw r9, r7
93 stw r8, VCPU_TIMING_EXIT_TBL(r4)
94 bne- 1b
95 stw r9, VCPU_TIMING_EXIT_TBU(r4)
96#endif
97
98 oris r8, r6, MSR_CE@h
99 PPC_STD(r6, VCPU_SHARED_MSR, r11)
100 ori r8, r8, MSR_ME | MSR_RI
101 PPC_STL r5, VCPU_PC(r4)
102
103 /*
104 * Make sure CE/ME/RI are set (if appropriate for exception type)
105 * whether or not the guest had it set. Since mfmsr/mtmsr are
106 * somewhat expensive, skip in the common case where the guest
107 * had all these bits set (and thus they're still set if
108 * appropriate for the exception type).
109 */
110 cmpw r6, r8
111 beq 1f
112 mfmsr r7
113 .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0
114 oris r7, r7, MSR_CE@h
115 .endif
116 .if \srr0 != SPRN_MCSRR0
117 ori r7, r7, MSR_ME | MSR_RI
118 .endif
119 mtmsr r7
1201:
121
122 .if \flags & NEED_EMU
123 /*
124 * This assumes you have external PID support.
125 * To support a bookehv CPU without external PID, you'll
126 * need to look up the TLB entry and create a temporary mapping.
127 *
128 * FIXME: we don't currently handle if the lwepx faults. PR-mode
129 * booke doesn't handle it either. Since Linux doesn't use
130 * broadcast tlbivax anymore, the only way this should happen is
131 * if the guest maps its memory execute-but-not-read, or if we
132 * somehow take a TLB miss in the middle of this entry code and
133 * evict the relevant entry. On e500mc, all kernel lowmem is
134 * bolted into TLB1 large page mappings, and we don't use
135 * broadcast invalidates, so we should not take a TLB miss here.
136 *
137 * Later we'll need to deal with faults here. Disallowing guest
138 * mappings that are execute-but-not-read could be an option on
139 * e500mc, but not on chips with an LRAT if it is used.
140 */
141
142 mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */
143 PPC_STL r15, VCPU_GPR(R15)(r4)
144 PPC_STL r16, VCPU_GPR(R16)(r4)
145 PPC_STL r17, VCPU_GPR(R17)(r4)
146 PPC_STL r18, VCPU_GPR(R18)(r4)
147 PPC_STL r19, VCPU_GPR(R19)(r4)
148 mr r8, r3
149 PPC_STL r20, VCPU_GPR(R20)(r4)
150 rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
151 PPC_STL r21, VCPU_GPR(R21)(r4)
152 rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
153 PPC_STL r22, VCPU_GPR(R22)(r4)
154 rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID
155 PPC_STL r23, VCPU_GPR(R23)(r4)
156 PPC_STL r24, VCPU_GPR(R24)(r4)
157 PPC_STL r25, VCPU_GPR(R25)(r4)
158 PPC_STL r26, VCPU_GPR(R26)(r4)
159 PPC_STL r27, VCPU_GPR(R27)(r4)
160 PPC_STL r28, VCPU_GPR(R28)(r4)
161 PPC_STL r29, VCPU_GPR(R29)(r4)
162 PPC_STL r30, VCPU_GPR(R30)(r4)
163 PPC_STL r31, VCPU_GPR(R31)(r4)
164 mtspr SPRN_EPLC, r8
165
166 /* disable preemption, so we are sure we hit the fixup handler */
167 CURRENT_THREAD_INFO(r8, r1)
168 li r7, 1
169 stw r7, TI_PREEMPT(r8)
170
171 isync
172
173 /*
174 * In case the read goes wrong, we catch it and write an invalid value
175 * in LAST_INST instead.
176 */
1771: lwepx r9, 0, r5
1782:
179.section .fixup, "ax"
1803: li r9, KVM_INST_FETCH_FAILED
181 b 2b
182.previous
183.section __ex_table,"a"
184 PPC_LONG_ALIGN
185 PPC_LONG 1b,3b
186.previous
187
188 mtspr SPRN_EPLC, r3
189 li r7, 0
190 stw r7, TI_PREEMPT(r8)
191 stw r9, VCPU_LAST_INST(r4)
192 .endif
193
194 .if \flags & NEED_ESR
195 mfspr r8, SPRN_ESR
196 PPC_STL r8, VCPU_FAULT_ESR(r4)
197 .endif
198
199 .if \flags & NEED_DEAR
200 mfspr r9, SPRN_DEAR
201 PPC_STL r9, VCPU_FAULT_DEAR(r4)
202 .endif
203
204 b kvmppc_resume_host
205.endm
206
207#ifdef CONFIG_64BIT
208/* Exception types */
209#define EX_GEN 1
210#define EX_GDBELL 2
211#define EX_DBG 3
212#define EX_MC 4
213#define EX_CRIT 5
214#define EX_TLB 6
215
216/*
217 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
218 */
219.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
220 _GLOBAL(kvmppc_handler_\intno\()_\srr1)
221 mr r11, r4
222 /*
223 * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
224 */
225 PPC_LL r4, PACACURRENT(r13)
226 PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
227 stw r10, VCPU_CR(r4)
228 PPC_STL r11, VCPU_GPR(R4)(r4)
229 PPC_STL r5, VCPU_GPR(R5)(r4)
230 .if \type == EX_CRIT
231 PPC_LL r5, (\paca_ex + EX_R13)(r13)
232 .else
233 mfspr r5, \scratch
234 .endif
235 PPC_STL r6, VCPU_GPR(R6)(r4)
236 PPC_STL r8, VCPU_GPR(R8)(r4)
237 PPC_STL r9, VCPU_GPR(R9)(r4)
238 PPC_STL r5, VCPU_GPR(R13)(r4)
239 PPC_LL r6, (\paca_ex + \ex_r10)(r13)
240 PPC_LL r8, (\paca_ex + \ex_r11)(r13)
241 PPC_STL r3, VCPU_GPR(R3)(r4)
242 PPC_STL r7, VCPU_GPR(R7)(r4)
243 PPC_STL r12, VCPU_GPR(R12)(r4)
244 PPC_STL r6, VCPU_GPR(R10)(r4)
245 PPC_STL r8, VCPU_GPR(R11)(r4)
246 mfctr r5
247 PPC_STL r5, VCPU_CTR(r4)
248 mfspr r5, \srr0
249 mfspr r6, \srr1
250 kvm_handler_common \intno, \srr0, \flags
251.endm
252
253#define EX_PARAMS(type) \
254 EX_##type, \
255 SPRN_SPRG_##type##_SCRATCH, \
256 PACA_EX##type, \
257 EX_R10, \
258 EX_R11
259
260#define EX_PARAMS_TLB \
261 EX_TLB, \
262 SPRN_SPRG_GEN_SCRATCH, \
263 PACA_EXTLB, \
264 EX_TLB_R10, \
265 EX_TLB_R11
266
267kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
268 SPRN_CSRR0, SPRN_CSRR1, 0
269kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
270 SPRN_MCSRR0, SPRN_MCSRR1, 0
271kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
272 SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
273kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
274 SPRN_SRR0, SPRN_SRR1, NEED_ESR
275kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
276 SPRN_SRR0, SPRN_SRR1, 0
277kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
278 SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
279kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
280 SPRN_SRR0, SPRN_SRR1,NEED_ESR
281kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
282 SPRN_SRR0, SPRN_SRR1, 0
283kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
284 SPRN_SRR0, SPRN_SRR1, 0
285kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
286 SPRN_SRR0, SPRN_SRR1, 0
287kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
288 SPRN_SRR0, SPRN_SRR1, 0
289kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
290 SPRN_CSRR0, SPRN_CSRR1, 0
291/*
292 * Only bolted TLB miss exception handlers are supported for now
293 */
294kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
295 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
296kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
297 SPRN_SRR0, SPRN_SRR1, 0
298kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
299 SPRN_SRR0, SPRN_SRR1, 0
300kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
301 SPRN_SRR0, SPRN_SRR1, 0
302kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
303 SPRN_SRR0, SPRN_SRR1, 0
304kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
305 SPRN_SRR0, SPRN_SRR1, 0
306kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
307 SPRN_SRR0, SPRN_SRR1, 0
308kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
309 SPRN_CSRR0, SPRN_CSRR1, 0
310kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
311 SPRN_SRR0, SPRN_SRR1, NEED_EMU
312kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
313 SPRN_SRR0, SPRN_SRR1, 0
314kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
315 SPRN_GSRR0, SPRN_GSRR1, 0
316kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
317 SPRN_CSRR0, SPRN_CSRR1, 0
318kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
319 SPRN_DSRR0, SPRN_DSRR1, 0
320kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
321 SPRN_CSRR0, SPRN_CSRR1, 0
322#else
323/*
324 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
325 */
326.macro kvm_handler intno srr0, srr1, flags
327_GLOBAL(kvmppc_handler_\intno\()_\srr1)
328 PPC_LL r11, THREAD_KVM_VCPU(r10)
329 PPC_STL r3, VCPU_GPR(R3)(r11)
330 mfspr r3, SPRN_SPRG_RSCRATCH0
331 PPC_STL r4, VCPU_GPR(R4)(r11)
332 PPC_LL r4, THREAD_NORMSAVE(0)(r10)
333 PPC_STL r5, VCPU_GPR(R5)(r11)
334 stw r13, VCPU_CR(r11)
335 mfspr r5, \srr0
336 PPC_STL r3, VCPU_GPR(R10)(r11)
337 PPC_LL r3, THREAD_NORMSAVE(2)(r10)
338 PPC_STL r6, VCPU_GPR(R6)(r11)
339 PPC_STL r4, VCPU_GPR(R11)(r11)
340 mfspr r6, \srr1
341 PPC_STL r7, VCPU_GPR(R7)(r11)
342 PPC_STL r8, VCPU_GPR(R8)(r11)
343 PPC_STL r9, VCPU_GPR(R9)(r11)
344 PPC_STL r3, VCPU_GPR(R13)(r11)
345 mfctr r7
346 PPC_STL r12, VCPU_GPR(R12)(r11)
347 PPC_STL r7, VCPU_CTR(r11)
348 mr r4, r11
349 kvm_handler_common \intno, \srr0, \flags
350.endm
351
352.macro kvm_lvl_handler intno scratch srr0, srr1, flags
353_GLOBAL(kvmppc_handler_\intno\()_\srr1)
354 mfspr r10, SPRN_SPRG_THREAD
355 PPC_LL r11, THREAD_KVM_VCPU(r10)
356 PPC_STL r3, VCPU_GPR(R3)(r11)
357 mfspr r3, \scratch
358 PPC_STL r4, VCPU_GPR(R4)(r11)
359 PPC_LL r4, GPR9(r8)
360 PPC_STL r5, VCPU_GPR(R5)(r11)
361 stw r9, VCPU_CR(r11)
362 mfspr r5, \srr0
363 PPC_STL r3, VCPU_GPR(R8)(r11)
364 PPC_LL r3, GPR10(r8)
365 PPC_STL r6, VCPU_GPR(R6)(r11)
366 PPC_STL r4, VCPU_GPR(R9)(r11)
367 mfspr r6, \srr1
368 PPC_LL r4, GPR11(r8)
369 PPC_STL r7, VCPU_GPR(R7)(r11)
370 PPC_STL r3, VCPU_GPR(R10)(r11)
371 mfctr r7
372 PPC_STL r12, VCPU_GPR(R12)(r11)
373 PPC_STL r13, VCPU_GPR(R13)(r11)
374 PPC_STL r4, VCPU_GPR(R11)(r11)
375 PPC_STL r7, VCPU_CTR(r11)
376 mr r4, r11
377 kvm_handler_common \intno, \srr0, \flags
378.endm
379
380kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
381 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
382kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
383 SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
384kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
385 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
386kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
387kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
388kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
389 SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
390kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
391kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
392kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
393kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
394kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0
395kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0
396kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
397 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
398kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
399 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
400kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
401kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
402kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
403kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
404kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
405kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
406kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
407 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
408kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU
409kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
410kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
411kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
412 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
413kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
414 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
415kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
416 SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
417#endif
418
419/* Registers:
420 * SPRG_SCRATCH0: guest r10
421 * r4: vcpu pointer
422 * r11: vcpu->arch.shared
423 * r14: KVM exit number
424 */
425_GLOBAL(kvmppc_resume_host)
426 /* Save remaining volatile guest register state to vcpu. */
427 mfspr r3, SPRN_VRSAVE
428 PPC_STL r0, VCPU_GPR(R0)(r4)
429 mflr r5
430 mfspr r6, SPRN_SPRG4
431 PPC_STL r5, VCPU_LR(r4)
432 mfspr r7, SPRN_SPRG5
433 stw r3, VCPU_VRSAVE(r4)
434 PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
435 mfspr r8, SPRN_SPRG6
436 PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
437 mfspr r9, SPRN_SPRG7
438 PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
439 mfxer r3
440 PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
441
442 /* save guest MAS registers and restore host mas4 & mas6 */
443 mfspr r5, SPRN_MAS0
444 PPC_STL r3, VCPU_XER(r4)
445 mfspr r6, SPRN_MAS1
446 stw r5, VCPU_SHARED_MAS0(r11)
447 mfspr r7, SPRN_MAS2
448 stw r6, VCPU_SHARED_MAS1(r11)
449 PPC_STD(r7, VCPU_SHARED_MAS2, r11)
450 mfspr r5, SPRN_MAS3
451 mfspr r6, SPRN_MAS4
452 stw r5, VCPU_SHARED_MAS7_3+4(r11)
453 mfspr r7, SPRN_MAS6
454 stw r6, VCPU_SHARED_MAS4(r11)
455 mfspr r5, SPRN_MAS7
456 lwz r6, VCPU_HOST_MAS4(r4)
457 stw r7, VCPU_SHARED_MAS6(r11)
458 lwz r8, VCPU_HOST_MAS6(r4)
459 mtspr SPRN_MAS4, r6
460 stw r5, VCPU_SHARED_MAS7_3+0(r11)
461 mtspr SPRN_MAS6, r8
462 /* Enable MAS register updates via exception */
463 mfspr r3, SPRN_EPCR
464 rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH
465 mtspr SPRN_EPCR, r3
466 isync
467
468 /* Switch to kernel stack and jump to handler. */
469 PPC_LL r3, HOST_RUN(r1)
470 mr r5, r14 /* intno */
471 mr r14, r4 /* Save vcpu pointer. */
472 bl kvmppc_handle_exit
473
474 /* Restore vcpu pointer and the nonvolatiles we used. */
475 mr r4, r14
476 PPC_LL r14, VCPU_GPR(R14)(r4)
477
478 andi. r5, r3, RESUME_FLAG_NV
479 beq skip_nv_load
480 PPC_LL r15, VCPU_GPR(R15)(r4)
481 PPC_LL r16, VCPU_GPR(R16)(r4)
482 PPC_LL r17, VCPU_GPR(R17)(r4)
483 PPC_LL r18, VCPU_GPR(R18)(r4)
484 PPC_LL r19, VCPU_GPR(R19)(r4)
485 PPC_LL r20, VCPU_GPR(R20)(r4)
486 PPC_LL r21, VCPU_GPR(R21)(r4)
487 PPC_LL r22, VCPU_GPR(R22)(r4)
488 PPC_LL r23, VCPU_GPR(R23)(r4)
489 PPC_LL r24, VCPU_GPR(R24)(r4)
490 PPC_LL r25, VCPU_GPR(R25)(r4)
491 PPC_LL r26, VCPU_GPR(R26)(r4)
492 PPC_LL r27, VCPU_GPR(R27)(r4)
493 PPC_LL r28, VCPU_GPR(R28)(r4)
494 PPC_LL r29, VCPU_GPR(R29)(r4)
495 PPC_LL r30, VCPU_GPR(R30)(r4)
496 PPC_LL r31, VCPU_GPR(R31)(r4)
497skip_nv_load:
498 /* Should we return to the guest? */
499 andi. r5, r3, RESUME_FLAG_HOST
500 beq lightweight_exit
501
502 srawi r3, r3, 2 /* Shift -ERR back down. */
503
504heavyweight_exit:
505 /* Not returning to guest. */
506 PPC_LL r5, HOST_STACK_LR(r1)
507 lwz r6, HOST_CR(r1)
508
509 /*
510 * We already saved guest volatile register state; now save the
511 * non-volatiles.
512 */
513
514 PPC_STL r15, VCPU_GPR(R15)(r4)
515 PPC_STL r16, VCPU_GPR(R16)(r4)
516 PPC_STL r17, VCPU_GPR(R17)(r4)
517 PPC_STL r18, VCPU_GPR(R18)(r4)
518 PPC_STL r19, VCPU_GPR(R19)(r4)
519 PPC_STL r20, VCPU_GPR(R20)(r4)
520 PPC_STL r21, VCPU_GPR(R21)(r4)
521 PPC_STL r22, VCPU_GPR(R22)(r4)
522 PPC_STL r23, VCPU_GPR(R23)(r4)
523 PPC_STL r24, VCPU_GPR(R24)(r4)
524 PPC_STL r25, VCPU_GPR(R25)(r4)
525 PPC_STL r26, VCPU_GPR(R26)(r4)
526 PPC_STL r27, VCPU_GPR(R27)(r4)
527 PPC_STL r28, VCPU_GPR(R28)(r4)
528 PPC_STL r29, VCPU_GPR(R29)(r4)
529 PPC_STL r30, VCPU_GPR(R30)(r4)
530 PPC_STL r31, VCPU_GPR(R31)(r4)
531
532 /* Load host non-volatile register state from host stack. */
533 PPC_LL r14, HOST_NV_GPR(R14)(r1)
534 PPC_LL r15, HOST_NV_GPR(R15)(r1)
535 PPC_LL r16, HOST_NV_GPR(R16)(r1)
536 PPC_LL r17, HOST_NV_GPR(R17)(r1)
537 PPC_LL r18, HOST_NV_GPR(R18)(r1)
538 PPC_LL r19, HOST_NV_GPR(R19)(r1)
539 PPC_LL r20, HOST_NV_GPR(R20)(r1)
540 PPC_LL r21, HOST_NV_GPR(R21)(r1)
541 PPC_LL r22, HOST_NV_GPR(R22)(r1)
542 PPC_LL r23, HOST_NV_GPR(R23)(r1)
543 PPC_LL r24, HOST_NV_GPR(R24)(r1)
544 PPC_LL r25, HOST_NV_GPR(R25)(r1)
545 PPC_LL r26, HOST_NV_GPR(R26)(r1)
546 PPC_LL r27, HOST_NV_GPR(R27)(r1)
547 PPC_LL r28, HOST_NV_GPR(R28)(r1)
548 PPC_LL r29, HOST_NV_GPR(R29)(r1)
549 PPC_LL r30, HOST_NV_GPR(R30)(r1)
550 PPC_LL r31, HOST_NV_GPR(R31)(r1)
551
552 /* Return to kvm_vcpu_run(). */
553 mtlr r5
554 mtcr r6
555 addi r1, r1, HOST_STACK_SIZE
556 /* r3 still contains the return code from kvmppc_handle_exit(). */
557 blr
558
559/* Registers:
560 * r3: kvm_run pointer
561 * r4: vcpu pointer
562 */
563_GLOBAL(__kvmppc_vcpu_run)
564 stwu r1, -HOST_STACK_SIZE(r1)
565 PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
566
567 /* Save host state to stack. */
568 PPC_STL r3, HOST_RUN(r1)
569 mflr r3
570 mfcr r5
571 PPC_STL r3, HOST_STACK_LR(r1)
572
573 stw r5, HOST_CR(r1)
574
575 /* Save host non-volatile register state to stack. */
576 PPC_STL r14, HOST_NV_GPR(R14)(r1)
577 PPC_STL r15, HOST_NV_GPR(R15)(r1)
578 PPC_STL r16, HOST_NV_GPR(R16)(r1)
579 PPC_STL r17, HOST_NV_GPR(R17)(r1)
580 PPC_STL r18, HOST_NV_GPR(R18)(r1)
581 PPC_STL r19, HOST_NV_GPR(R19)(r1)
582 PPC_STL r20, HOST_NV_GPR(R20)(r1)
583 PPC_STL r21, HOST_NV_GPR(R21)(r1)
584 PPC_STL r22, HOST_NV_GPR(R22)(r1)
585 PPC_STL r23, HOST_NV_GPR(R23)(r1)
586 PPC_STL r24, HOST_NV_GPR(R24)(r1)
587 PPC_STL r25, HOST_NV_GPR(R25)(r1)
588 PPC_STL r26, HOST_NV_GPR(R26)(r1)
589 PPC_STL r27, HOST_NV_GPR(R27)(r1)
590 PPC_STL r28, HOST_NV_GPR(R28)(r1)
591 PPC_STL r29, HOST_NV_GPR(R29)(r1)
592 PPC_STL r30, HOST_NV_GPR(R30)(r1)
593 PPC_STL r31, HOST_NV_GPR(R31)(r1)
594
595 /* Load guest non-volatiles. */
596 PPC_LL r14, VCPU_GPR(R14)(r4)
597 PPC_LL r15, VCPU_GPR(R15)(r4)
598 PPC_LL r16, VCPU_GPR(R16)(r4)
599 PPC_LL r17, VCPU_GPR(R17)(r4)
600 PPC_LL r18, VCPU_GPR(R18)(r4)
601 PPC_LL r19, VCPU_GPR(R19)(r4)
602 PPC_LL r20, VCPU_GPR(R20)(r4)
603 PPC_LL r21, VCPU_GPR(R21)(r4)
604 PPC_LL r22, VCPU_GPR(R22)(r4)
605 PPC_LL r23, VCPU_GPR(R23)(r4)
606 PPC_LL r24, VCPU_GPR(R24)(r4)
607 PPC_LL r25, VCPU_GPR(R25)(r4)
608 PPC_LL r26, VCPU_GPR(R26)(r4)
609 PPC_LL r27, VCPU_GPR(R27)(r4)
610 PPC_LL r28, VCPU_GPR(R28)(r4)
611 PPC_LL r29, VCPU_GPR(R29)(r4)
612 PPC_LL r30, VCPU_GPR(R30)(r4)
613 PPC_LL r31, VCPU_GPR(R31)(r4)
614
615
616lightweight_exit:
617 PPC_STL r2, HOST_R2(r1)
618
619 mfspr r3, SPRN_PID
620 stw r3, VCPU_HOST_PID(r4)
621 lwz r3, VCPU_GUEST_PID(r4)
622 mtspr SPRN_PID, r3
623
624 PPC_LL r11, VCPU_SHARED(r4)
625 /* Disable MAS register updates via exception */
626 mfspr r3, SPRN_EPCR
627 oris r3, r3, SPRN_EPCR_DMIUH@h
628 mtspr SPRN_EPCR, r3
629 isync
630 /* Save host mas4 and mas6 and load guest MAS registers */
631 mfspr r3, SPRN_MAS4
632 stw r3, VCPU_HOST_MAS4(r4)
633 mfspr r3, SPRN_MAS6
634 stw r3, VCPU_HOST_MAS6(r4)
635 lwz r3, VCPU_SHARED_MAS0(r11)
636 lwz r5, VCPU_SHARED_MAS1(r11)
637 PPC_LD(r6, VCPU_SHARED_MAS2, r11)
638 lwz r7, VCPU_SHARED_MAS7_3+4(r11)
639 lwz r8, VCPU_SHARED_MAS4(r11)
640 mtspr SPRN_MAS0, r3
641 mtspr SPRN_MAS1, r5
642 mtspr SPRN_MAS2, r6
643 mtspr SPRN_MAS3, r7
644 mtspr SPRN_MAS4, r8
645 lwz r3, VCPU_SHARED_MAS6(r11)
646 lwz r5, VCPU_SHARED_MAS7_3+0(r11)
647 mtspr SPRN_MAS6, r3
648 mtspr SPRN_MAS7, r5
649
650 /*
651 * Host interrupt handlers may have clobbered these guest-readable
652 * SPRGs, so we need to reload them here with the guest's values.
653 */
654 lwz r3, VCPU_VRSAVE(r4)
655 PPC_LD(r5, VCPU_SHARED_SPRG4, r11)
656 mtspr SPRN_VRSAVE, r3
657 PPC_LD(r6, VCPU_SHARED_SPRG5, r11)
658 mtspr SPRN_SPRG4W, r5
659 PPC_LD(r7, VCPU_SHARED_SPRG6, r11)
660 mtspr SPRN_SPRG5W, r6
661 PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
662 mtspr SPRN_SPRG6W, r7
663 mtspr SPRN_SPRG7W, r8
664
665 /* Load some guest volatiles. */
666 PPC_LL r3, VCPU_LR(r4)
667 PPC_LL r5, VCPU_XER(r4)
668 PPC_LL r6, VCPU_CTR(r4)
669 lwz r7, VCPU_CR(r4)
670 PPC_LL r8, VCPU_PC(r4)
671 PPC_LD(r9, VCPU_SHARED_MSR, r11)
672 PPC_LL r0, VCPU_GPR(R0)(r4)
673 PPC_LL r1, VCPU_GPR(R1)(r4)
674 PPC_LL r2, VCPU_GPR(R2)(r4)
675 PPC_LL r10, VCPU_GPR(R10)(r4)
676 PPC_LL r11, VCPU_GPR(R11)(r4)
677 PPC_LL r12, VCPU_GPR(R12)(r4)
678 PPC_LL r13, VCPU_GPR(R13)(r4)
679 mtlr r3
680 mtxer r5
681 mtctr r6
682 mtsrr0 r8
683 mtsrr1 r9
684
685#ifdef CONFIG_KVM_EXIT_TIMING
686 /* save enter time */
6871:
688 mfspr r6, SPRN_TBRU
689 mfspr r9, SPRN_TBRL
690 mfspr r8, SPRN_TBRU
691 cmpw r8, r6
692 stw r9, VCPU_TIMING_LAST_ENTER_TBL(r4)
693 bne 1b
694 stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
695#endif
696
697 /*
698 * Don't execute any instruction which can change CR after
699 * below instruction.
700 */
701 mtcr r7
702
703 /* Finish loading guest volatiles and jump to guest. */
704 PPC_LL r5, VCPU_GPR(R5)(r4)
705 PPC_LL r6, VCPU_GPR(R6)(r4)
706 PPC_LL r7, VCPU_GPR(R7)(r4)
707 PPC_LL r8, VCPU_GPR(R8)(r4)
708 PPC_LL r9, VCPU_GPR(R9)(r4)
709
710 PPC_LL r3, VCPU_GPR(R3)(r4)
711 PPC_LL r4, VCPU_GPR(R4)(r4)
712 rfi
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index b479ed77c51..797a7447c26 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -15,287 +15,15 @@
15#include <linux/kvm_host.h> 15#include <linux/kvm_host.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/export.h>
19 18
20#include <asm/reg.h> 19#include <asm/reg.h>
21#include <asm/cputable.h> 20#include <asm/cputable.h>
22#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
22#include <asm/kvm_e500.h>
23#include <asm/kvm_ppc.h> 23#include <asm/kvm_ppc.h>
24 24
25#include "../mm/mmu_decl.h"
26#include "booke.h" 25#include "booke.h"
27#include "e500.h" 26#include "e500_tlb.h"
28
29struct id {
30 unsigned long val;
31 struct id **pentry;
32};
33
34#define NUM_TIDS 256
35
36/*
37 * This table provide mappings from:
38 * (guestAS,guestTID,guestPR) --> ID of physical cpu
39 * guestAS [0..1]
40 * guestTID [0..255]
41 * guestPR [0..1]
42 * ID [1..255]
43 * Each vcpu keeps one vcpu_id_table.
44 */
45struct vcpu_id_table {
46 struct id id[2][NUM_TIDS][2];
47};
48
49/*
50 * This table provide reversed mappings of vcpu_id_table:
51 * ID --> address of vcpu_id_table item.
52 * Each physical core has one pcpu_id_table.
53 */
54struct pcpu_id_table {
55 struct id *entry[NUM_TIDS];
56};
57
58static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
59
60/* This variable keeps last used shadow ID on local core.
61 * The valid range of shadow ID is [1..255] */
62static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
63
64/*
65 * Allocate a free shadow id and setup a valid sid mapping in given entry.
66 * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
67 *
68 * The caller must have preemption disabled, and keep it that way until
69 * it has finished with the returned shadow id (either written into the
70 * TLB or arch.shadow_pid, or discarded).
71 */
72static inline int local_sid_setup_one(struct id *entry)
73{
74 unsigned long sid;
75 int ret = -1;
76
77 sid = ++(__get_cpu_var(pcpu_last_used_sid));
78 if (sid < NUM_TIDS) {
79 __get_cpu_var(pcpu_sids).entry[sid] = entry;
80 entry->val = sid;
81 entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
82 ret = sid;
83 }
84
85 /*
86 * If sid == NUM_TIDS, we've run out of sids. We return -1, and
87 * the caller will invalidate everything and start over.
88 *
89 * sid > NUM_TIDS indicates a race, which we disable preemption to
90 * avoid.
91 */
92 WARN_ON(sid > NUM_TIDS);
93
94 return ret;
95}
96
97/*
98 * Check if given entry contain a valid shadow id mapping.
99 * An ID mapping is considered valid only if
100 * both vcpu and pcpu know this mapping.
101 *
102 * The caller must have preemption disabled, and keep it that way until
103 * it has finished with the returned shadow id (either written into the
104 * TLB or arch.shadow_pid, or discarded).
105 */
106static inline int local_sid_lookup(struct id *entry)
107{
108 if (entry && entry->val != 0 &&
109 __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
110 entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
111 return entry->val;
112 return -1;
113}
114
115/* Invalidate all id mappings on local core -- call with preempt disabled */
116static inline void local_sid_destroy_all(void)
117{
118 __get_cpu_var(pcpu_last_used_sid) = 0;
119 memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
120}
121
122static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
123{
124 vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
125 return vcpu_e500->idt;
126}
127
128static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
129{
130 kfree(vcpu_e500->idt);
131 vcpu_e500->idt = NULL;
132}
133
134/* Map guest pid to shadow.
135 * We use PID to keep shadow of current guest non-zero PID,
136 * and use PID1 to keep shadow of guest zero PID.
137 * So that guest tlbe with TID=0 can be accessed at any time */
138static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
139{
140 preempt_disable();
141 vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
142 get_cur_as(&vcpu_e500->vcpu),
143 get_cur_pid(&vcpu_e500->vcpu),
144 get_cur_pr(&vcpu_e500->vcpu), 1);
145 vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
146 get_cur_as(&vcpu_e500->vcpu), 0,
147 get_cur_pr(&vcpu_e500->vcpu), 1);
148 preempt_enable();
149}
150
151/* Invalidate all mappings on vcpu */
152static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
153{
154 memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
155
156 /* Update shadow pid when mappings are changed */
157 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
158}
159
160/* Invalidate one ID mapping on vcpu */
161static inline void kvmppc_e500_id_table_reset_one(
162 struct kvmppc_vcpu_e500 *vcpu_e500,
163 int as, int pid, int pr)
164{
165 struct vcpu_id_table *idt = vcpu_e500->idt;
166
167 BUG_ON(as >= 2);
168 BUG_ON(pid >= NUM_TIDS);
169 BUG_ON(pr >= 2);
170
171 idt->id[as][pid][pr].val = 0;
172 idt->id[as][pid][pr].pentry = NULL;
173
174 /* Update shadow pid when mappings are changed */
175 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
176}
177
178/*
179 * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
180 * This function first lookup if a valid mapping exists,
181 * if not, then creates a new one.
182 *
183 * The caller must have preemption disabled, and keep it that way until
184 * it has finished with the returned shadow id (either written into the
185 * TLB or arch.shadow_pid, or discarded).
186 */
187unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
188 unsigned int as, unsigned int gid,
189 unsigned int pr, int avoid_recursion)
190{
191 struct vcpu_id_table *idt = vcpu_e500->idt;
192 int sid;
193
194 BUG_ON(as >= 2);
195 BUG_ON(gid >= NUM_TIDS);
196 BUG_ON(pr >= 2);
197
198 sid = local_sid_lookup(&idt->id[as][gid][pr]);
199
200 while (sid <= 0) {
201 /* No mapping yet */
202 sid = local_sid_setup_one(&idt->id[as][gid][pr]);
203 if (sid <= 0) {
204 _tlbil_all();
205 local_sid_destroy_all();
206 }
207
208 /* Update shadow pid when mappings are changed */
209 if (!avoid_recursion)
210 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
211 }
212
213 return sid;
214}
215
216unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
217 struct kvm_book3e_206_tlb_entry *gtlbe)
218{
219 return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe),
220 get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0);
221}
222
223void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
224{
225 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
226
227 if (vcpu->arch.pid != pid) {
228 vcpu_e500->pid[0] = vcpu->arch.pid = pid;
229 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
230 }
231}
232
233/* gtlbe must not be mapped by more than one host tlbe */
234void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
235 struct kvm_book3e_206_tlb_entry *gtlbe)
236{
237 struct vcpu_id_table *idt = vcpu_e500->idt;
238 unsigned int pr, tid, ts, pid;
239 u32 val, eaddr;
240 unsigned long flags;
241
242 ts = get_tlb_ts(gtlbe);
243 tid = get_tlb_tid(gtlbe);
244
245 preempt_disable();
246
247 /* One guest ID may be mapped to two shadow IDs */
248 for (pr = 0; pr < 2; pr++) {
249 /*
250 * The shadow PID can have a valid mapping on at most one
251 * host CPU. In the common case, it will be valid on this
252 * CPU, in which case we do a local invalidation of the
253 * specific address.
254 *
255 * If the shadow PID is not valid on the current host CPU,
256 * we invalidate the entire shadow PID.
257 */
258 pid = local_sid_lookup(&idt->id[ts][tid][pr]);
259 if (pid <= 0) {
260 kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
261 continue;
262 }
263
264 /*
265 * The guest is invalidating a 4K entry which is in a PID
266 * that has a valid shadow mapping on this host CPU. We
267 * search host TLB to invalidate it's shadow TLB entry,
268 * similar to __tlbil_va except that we need to look in AS1.
269 */
270 val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
271 eaddr = get_tlb_eaddr(gtlbe);
272
273 local_irq_save(flags);
274
275 mtspr(SPRN_MAS6, val);
276 asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
277 val = mfspr(SPRN_MAS1);
278 if (val & MAS1_VALID) {
279 mtspr(SPRN_MAS1, val & ~MAS1_VALID);
280 asm volatile("tlbwe");
281 }
282
283 local_irq_restore(flags);
284 }
285
286 preempt_enable();
287}
288
289void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
290{
291 kvmppc_e500_id_table_reset_all(vcpu_e500);
292}
293
294void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
295{
296 /* Recalc shadow pid since MSR changes */
297 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
298}
299 27
300void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) 28void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
301{ 29{
@@ -307,20 +35,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
307 35
308void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 36void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
309{ 37{
310 kvmppc_booke_vcpu_load(vcpu, cpu); 38 kvmppc_e500_tlb_load(vcpu, cpu);
311
312 /* Shadow PID may be expired on local core */
313 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
314} 39}
315 40
316void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 41void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
317{ 42{
43 kvmppc_e500_tlb_put(vcpu);
44
318#ifdef CONFIG_SPE 45#ifdef CONFIG_SPE
319 if (vcpu->arch.shadow_msr & MSR_SPE) 46 if (vcpu->arch.shadow_msr & MSR_SPE)
320 kvmppc_vcpu_disable_spe(vcpu); 47 kvmppc_vcpu_disable_spe(vcpu);
321#endif 48#endif
322
323 kvmppc_booke_vcpu_put(vcpu);
324} 49}
325 50
326int kvmppc_core_check_processor_compat(void) 51int kvmppc_core_check_processor_compat(void)
@@ -335,23 +60,6 @@ int kvmppc_core_check_processor_compat(void)
335 return r; 60 return r;
336} 61}
337 62
338static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
339{
340 struct kvm_book3e_206_tlb_entry *tlbe;
341
342 /* Insert large initial mapping for guest. */
343 tlbe = get_entry(vcpu_e500, 1, 0);
344 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
345 tlbe->mas2 = 0;
346 tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
347
348 /* 4K map for serial output. Used by kernel wrapper. */
349 tlbe = get_entry(vcpu_e500, 1, 1);
350 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
351 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
352 tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
353}
354
355int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) 63int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
356{ 64{
357 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 65 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -362,7 +70,34 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
362 vcpu->arch.pvr = mfspr(SPRN_PVR); 70 vcpu->arch.pvr = mfspr(SPRN_PVR);
363 vcpu_e500->svr = mfspr(SPRN_SVR); 71 vcpu_e500->svr = mfspr(SPRN_SVR);
364 72
365 vcpu->arch.cpu_type = KVM_CPU_E500V2; 73 /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
74 vcpu->vcpu_id = 0;
75
76 return 0;
77}
78
79/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
80int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
81 struct kvm_translation *tr)
82{
83 int index;
84 gva_t eaddr;
85 u8 pid;
86 u8 as;
87
88 eaddr = tr->linear_address;
89 pid = (tr->linear_address >> 32) & 0xff;
90 as = (tr->linear_address >> 40) & 0x1;
91
92 index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
93 if (index < 0) {
94 tr->valid = 0;
95 return 0;
96 }
97
98 tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
99 /* XXX what does "writeable" and "usermode" even mean? */
100 tr->valid = 1;
366 101
367 return 0; 102 return 0;
368} 103}
@@ -380,6 +115,19 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
380 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; 115 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
381 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; 116 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
382 117
118 sregs->u.e.mas0 = vcpu_e500->mas0;
119 sregs->u.e.mas1 = vcpu_e500->mas1;
120 sregs->u.e.mas2 = vcpu_e500->mas2;
121 sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
122 sregs->u.e.mas4 = vcpu_e500->mas4;
123 sregs->u.e.mas6 = vcpu_e500->mas6;
124
125 sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
126 sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
127 sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
128 sregs->u.e.tlbcfg[2] = 0;
129 sregs->u.e.tlbcfg[3] = 0;
130
383 sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 131 sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
384 sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; 132 sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
385 sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 133 sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
@@ -387,13 +135,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
387 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 135 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
388 136
389 kvmppc_get_sregs_ivor(vcpu, sregs); 137 kvmppc_get_sregs_ivor(vcpu, sregs);
390 kvmppc_get_sregs_e500_tlb(vcpu, sregs);
391} 138}
392 139
393int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 140int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
394{ 141{
395 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 142 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
396 int ret;
397 143
398 if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 144 if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
399 vcpu_e500->svr = sregs->u.e.impl.fsl.svr; 145 vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
@@ -401,9 +147,15 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
401 vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; 147 vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
402 } 148 }
403 149
404 ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs); 150 if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
405 if (ret < 0) 151 vcpu_e500->mas0 = sregs->u.e.mas0;
406 return ret; 152 vcpu_e500->mas1 = sregs->u.e.mas1;
153 vcpu_e500->mas2 = sregs->u.e.mas2;
154 vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
155 vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
156 vcpu_e500->mas4 = sregs->u.e.mas4;
157 vcpu_e500->mas6 = sregs->u.e.mas6;
158 }
407 159
408 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) 160 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
409 return 0; 161 return 0;
@@ -442,12 +194,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
442 if (err) 194 if (err)
443 goto free_vcpu; 195 goto free_vcpu;
444 196
445 if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
446 goto uninit_vcpu;
447
448 err = kvmppc_e500_tlb_init(vcpu_e500); 197 err = kvmppc_e500_tlb_init(vcpu_e500);
449 if (err) 198 if (err)
450 goto uninit_id; 199 goto uninit_vcpu;
451 200
452 vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); 201 vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
453 if (!vcpu->arch.shared) 202 if (!vcpu->arch.shared)
@@ -457,8 +206,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
457 206
458uninit_tlb: 207uninit_tlb:
459 kvmppc_e500_tlb_uninit(vcpu_e500); 208 kvmppc_e500_tlb_uninit(vcpu_e500);
460uninit_id:
461 kvmppc_e500_id_table_free(vcpu_e500);
462uninit_vcpu: 209uninit_vcpu:
463 kvm_vcpu_uninit(vcpu); 210 kvm_vcpu_uninit(vcpu);
464free_vcpu: 211free_vcpu:
@@ -472,31 +219,17 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
472 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 219 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
473 220
474 free_page((unsigned long)vcpu->arch.shared); 221 free_page((unsigned long)vcpu->arch.shared);
475 kvmppc_e500_tlb_uninit(vcpu_e500);
476 kvmppc_e500_id_table_free(vcpu_e500);
477 kvm_vcpu_uninit(vcpu); 222 kvm_vcpu_uninit(vcpu);
223 kvmppc_e500_tlb_uninit(vcpu_e500);
478 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 224 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
479} 225}
480 226
481int kvmppc_core_init_vm(struct kvm *kvm)
482{
483 return 0;
484}
485
486void kvmppc_core_destroy_vm(struct kvm *kvm)
487{
488}
489
490static int __init kvmppc_e500_init(void) 227static int __init kvmppc_e500_init(void)
491{ 228{
492 int r, i; 229 int r, i;
493 unsigned long ivor[3]; 230 unsigned long ivor[3];
494 unsigned long max_ivor = 0; 231 unsigned long max_ivor = 0;
495 232
496 r = kvmppc_core_check_processor_compat();
497 if (r)
498 return r;
499
500 r = kvmppc_booke_init(); 233 r = kvmppc_booke_init();
501 if (r) 234 if (r)
502 return r; 235 return r;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
deleted file mode 100644
index c70d37ed770..00000000000
--- a/arch/powerpc/kvm/e500.h
+++ /dev/null
@@ -1,305 +0,0 @@
1/*
2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu <yu.liu@freescale.com>
5 * Scott Wood <scottwood@freescale.com>
6 * Ashish Kalra <ashish.kalra@freescale.com>
7 * Varun Sethi <varun.sethi@freescale.com>
8 *
9 * Description:
10 * This file is based on arch/powerpc/kvm/44x_tlb.h and
11 * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>,
12 * Copyright IBM Corp. 2007-2008
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License, version 2, as
16 * published by the Free Software Foundation.
17 */
18
19#ifndef KVM_E500_H
20#define KVM_E500_H
21
22#include <linux/kvm_host.h>
23#include <asm/mmu-book3e.h>
24#include <asm/tlb.h>
25
26#define E500_PID_NUM 3
27#define E500_TLB_NUM 2
28
29#define E500_TLB_VALID 1
30#define E500_TLB_BITMAP 2
31
32struct tlbe_ref {
33 pfn_t pfn;
34 unsigned int flags; /* E500_TLB_* */
35};
36
37struct tlbe_priv {
38 struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
39};
40
41#ifdef CONFIG_KVM_E500V2
42struct vcpu_id_table;
43#endif
44
45struct kvmppc_e500_tlb_params {
46 int entries, ways, sets;
47};
48
49struct kvmppc_vcpu_e500 {
50 struct kvm_vcpu vcpu;
51
52 /* Unmodified copy of the guest's TLB -- shared with host userspace. */
53 struct kvm_book3e_206_tlb_entry *gtlb_arch;
54
55 /* Starting entry number in gtlb_arch[] */
56 int gtlb_offset[E500_TLB_NUM];
57
58 /* KVM internal information associated with each guest TLB entry */
59 struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
60
61 struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
62
63 unsigned int gtlb_nv[E500_TLB_NUM];
64
65 /*
66 * information associated with each host TLB entry --
67 * TLB1 only for now. If/when guest TLB1 entries can be
68 * mapped with host TLB0, this will be used for that too.
69 *
70 * We don't want to use this for guest TLB0 because then we'd
71 * have the overhead of doing the translation again even if
72 * the entry is still in the guest TLB (e.g. we swapped out
73 * and back, and our host TLB entries got evicted).
74 */
75 struct tlbe_ref *tlb_refs[E500_TLB_NUM];
76 unsigned int host_tlb1_nv;
77
78 u32 svr;
79 u32 l1csr0;
80 u32 l1csr1;
81 u32 hid0;
82 u32 hid1;
83 u64 mcar;
84
85 struct page **shared_tlb_pages;
86 int num_shared_tlb_pages;
87
88 u64 *g2h_tlb1_map;
89 unsigned int *h2g_tlb1_rmap;
90
91 /* Minimum and maximum address mapped my TLB1 */
92 unsigned long tlb1_min_eaddr;
93 unsigned long tlb1_max_eaddr;
94
95#ifdef CONFIG_KVM_E500V2
96 u32 pid[E500_PID_NUM];
97
98 /* vcpu id table */
99 struct vcpu_id_table *idt;
100#endif
101};
102
103static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
104{
105 return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
106}
107
108
109/* This geometry is the legacy default -- can be overridden by userspace */
110#define KVM_E500_TLB0_WAY_SIZE 128
111#define KVM_E500_TLB0_WAY_NUM 2
112
113#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
114#define KVM_E500_TLB1_SIZE 16
115
116#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
117#define tlbsel_of(index) ((index) >> 16)
118#define esel_of(index) ((index) & 0xFFFF)
119
120#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
121#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
122#define MAS2_ATTRIB_MASK \
123 (MAS2_X0 | MAS2_X1)
124#define MAS3_ATTRIB_MASK \
125 (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
126 | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
127
128int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
129 ulong value);
130int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
131int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
132int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
133int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
134int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
135int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
136void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
137
138void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
139int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
140
141
142#ifdef CONFIG_KVM_E500V2
143unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
144 unsigned int as, unsigned int gid,
145 unsigned int pr, int avoid_recursion);
146#endif
147
148/* TLB helper functions */
149static inline unsigned int
150get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
151{
152 return (tlbe->mas1 >> 7) & 0x1f;
153}
154
155static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
156{
157 return tlbe->mas2 & MAS2_EPN;
158}
159
160static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
161{
162 unsigned int pgsize = get_tlb_size(tlbe);
163 return 1ULL << 10 << pgsize;
164}
165
166static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
167{
168 u64 bytes = get_tlb_bytes(tlbe);
169 return get_tlb_eaddr(tlbe) + bytes - 1;
170}
171
172static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
173{
174 return tlbe->mas7_3 & ~0xfffULL;
175}
176
177static inline unsigned int
178get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
179{
180 return (tlbe->mas1 >> 16) & 0xff;
181}
182
183static inline unsigned int
184get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
185{
186 return (tlbe->mas1 >> 12) & 0x1;
187}
188
189static inline unsigned int
190get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
191{
192 return (tlbe->mas1 >> 31) & 0x1;
193}
194
195static inline unsigned int
196get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
197{
198 return (tlbe->mas1 >> 30) & 0x1;
199}
200
201static inline unsigned int
202get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe)
203{
204 return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
205}
206
207static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
208{
209 return vcpu->arch.pid & 0xff;
210}
211
212static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
213{
214 return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
215}
216
217static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
218{
219 return !!(vcpu->arch.shared->msr & MSR_PR);
220}
221
222static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
223{
224 return (vcpu->arch.shared->mas6 >> 16) & 0xff;
225}
226
227static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
228{
229 return vcpu->arch.shared->mas6 & 0x1;
230}
231
232static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
233{
234 /*
235 * Manual says that tlbsel has 2 bits wide.
236 * Since we only have two TLBs, only lower bit is used.
237 */
238 return (vcpu->arch.shared->mas0 >> 28) & 0x1;
239}
240
241static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
242{
243 return vcpu->arch.shared->mas0 & 0xfff;
244}
245
246static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
247{
248 return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
249}
250
251static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
252 const struct kvm_book3e_206_tlb_entry *tlbe)
253{
254 gpa_t gpa;
255
256 if (!get_tlb_v(tlbe))
257 return 0;
258
259#ifndef CONFIG_KVM_BOOKE_HV
260 /* Does it match current guest AS? */
261 /* XXX what about IS != DS? */
262 if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
263 return 0;
264#endif
265
266 gpa = get_tlb_raddr(tlbe);
267 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
268 /* Mapping is not for RAM. */
269 return 0;
270
271 return 1;
272}
273
274static inline struct kvm_book3e_206_tlb_entry *get_entry(
275 struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
276{
277 int offset = vcpu_e500->gtlb_offset[tlbsel];
278 return &vcpu_e500->gtlb_arch[offset + entry];
279}
280
281void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
282 struct kvm_book3e_206_tlb_entry *gtlbe);
283void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
284
285#ifdef CONFIG_KVM_BOOKE_HV
286#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
287#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
288#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
289#else
290unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
291 struct kvm_book3e_206_tlb_entry *gtlbe);
292
293static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
294{
295 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
296 unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf;
297
298 return vcpu_e500->pid[tidseld];
299}
300
301/* Force TS=1 for all guest mappings. */
302#define get_tlb_sts(gtlbe) (MAS1_TS)
303#endif /* !BOOKE_HV */
304
305#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353a836..d48ae396f41 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -14,97 +14,27 @@
14 14
15#include <asm/kvm_ppc.h> 15#include <asm/kvm_ppc.h>
16#include <asm/disassemble.h> 16#include <asm/disassemble.h>
17#include <asm/dbell.h> 17#include <asm/kvm_e500.h>
18 18
19#include "booke.h" 19#include "booke.h"
20#include "e500.h" 20#include "e500_tlb.h"
21 21
22#define XOP_MSGSND 206
23#define XOP_MSGCLR 238
24#define XOP_TLBIVAX 786 22#define XOP_TLBIVAX 786
25#define XOP_TLBSX 914 23#define XOP_TLBSX 914
26#define XOP_TLBRE 946 24#define XOP_TLBRE 946
27#define XOP_TLBWE 978 25#define XOP_TLBWE 978
28#define XOP_TLBILX 18
29
30#ifdef CONFIG_KVM_E500MC
31static int dbell2prio(ulong param)
32{
33 int msg = param & PPC_DBELL_TYPE_MASK;
34 int prio = -1;
35
36 switch (msg) {
37 case PPC_DBELL_TYPE(PPC_DBELL):
38 prio = BOOKE_IRQPRIO_DBELL;
39 break;
40 case PPC_DBELL_TYPE(PPC_DBELL_CRIT):
41 prio = BOOKE_IRQPRIO_DBELL_CRIT;
42 break;
43 default:
44 break;
45 }
46
47 return prio;
48}
49
50static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
51{
52 ulong param = vcpu->arch.gpr[rb];
53 int prio = dbell2prio(param);
54
55 if (prio < 0)
56 return EMULATE_FAIL;
57
58 clear_bit(prio, &vcpu->arch.pending_exceptions);
59 return EMULATE_DONE;
60}
61
62static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
63{
64 ulong param = vcpu->arch.gpr[rb];
65 int prio = dbell2prio(rb);
66 int pir = param & PPC_DBELL_PIR_MASK;
67 int i;
68 struct kvm_vcpu *cvcpu;
69
70 if (prio < 0)
71 return EMULATE_FAIL;
72
73 kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) {
74 int cpir = cvcpu->arch.shared->pir;
75 if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) {
76 set_bit(prio, &cvcpu->arch.pending_exceptions);
77 kvm_vcpu_kick(cvcpu);
78 }
79 }
80
81 return EMULATE_DONE;
82}
83#endif
84 26
85int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 27int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
86 unsigned int inst, int *advance) 28 unsigned int inst, int *advance)
87{ 29{
88 int emulated = EMULATE_DONE; 30 int emulated = EMULATE_DONE;
89 int ra = get_ra(inst); 31 int ra;
90 int rb = get_rb(inst); 32 int rb;
91 int rt = get_rt(inst);
92 gva_t ea;
93 33
94 switch (get_op(inst)) { 34 switch (get_op(inst)) {
95 case 31: 35 case 31:
96 switch (get_xop(inst)) { 36 switch (get_xop(inst)) {
97 37
98#ifdef CONFIG_KVM_E500MC
99 case XOP_MSGSND:
100 emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
101 break;
102
103 case XOP_MSGCLR:
104 emulated = kvmppc_e500_emul_msgclr(vcpu, rb);
105 break;
106#endif
107
108 case XOP_TLBRE: 38 case XOP_TLBRE:
109 emulated = kvmppc_e500_emul_tlbre(vcpu); 39 emulated = kvmppc_e500_emul_tlbre(vcpu);
110 break; 40 break;
@@ -114,20 +44,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
114 break; 44 break;
115 45
116 case XOP_TLBSX: 46 case XOP_TLBSX:
117 ea = kvmppc_get_ea_indexed(vcpu, ra, rb); 47 rb = get_rb(inst);
118 emulated = kvmppc_e500_emul_tlbsx(vcpu, ea); 48 emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
119 break;
120
121 case XOP_TLBILX: {
122 int type = rt & 0x3;
123 ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
124 emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
125 break; 49 break;
126 }
127 50
128 case XOP_TLBIVAX: 51 case XOP_TLBIVAX:
129 ea = kvmppc_get_ea_indexed(vcpu, ra, rb); 52 ra = get_ra(inst);
130 emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); 53 rb = get_rb(inst);
54 emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
131 break; 55 break;
132 56
133 default: 57 default:
@@ -146,63 +70,48 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
146 return emulated; 70 return emulated;
147} 71}
148 72
149int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 73int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
150{ 74{
151 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
152 int emulated = EMULATE_DONE; 76 int emulated = EMULATE_DONE;
77 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
153 78
154 switch (sprn) { 79 switch (sprn) {
155#ifndef CONFIG_KVM_BOOKE_HV
156 case SPRN_PID: 80 case SPRN_PID:
157 kvmppc_set_pid(vcpu, spr_val); 81 kvmppc_set_pid(vcpu, spr_val);
158 break; 82 break;
159 case SPRN_PID1: 83 case SPRN_PID1:
160 if (spr_val != 0) 84 if (spr_val != 0)
161 return EMULATE_FAIL; 85 return EMULATE_FAIL;
162 vcpu_e500->pid[1] = spr_val; 86 vcpu_e500->pid[1] = spr_val; break;
163 break;
164 case SPRN_PID2: 87 case SPRN_PID2:
165 if (spr_val != 0) 88 if (spr_val != 0)
166 return EMULATE_FAIL; 89 return EMULATE_FAIL;
167 vcpu_e500->pid[2] = spr_val; 90 vcpu_e500->pid[2] = spr_val; break;
168 break;
169 case SPRN_MAS0: 91 case SPRN_MAS0:
170 vcpu->arch.shared->mas0 = spr_val; 92 vcpu_e500->mas0 = spr_val; break;
171 break;
172 case SPRN_MAS1: 93 case SPRN_MAS1:
173 vcpu->arch.shared->mas1 = spr_val; 94 vcpu_e500->mas1 = spr_val; break;
174 break;
175 case SPRN_MAS2: 95 case SPRN_MAS2:
176 vcpu->arch.shared->mas2 = spr_val; 96 vcpu_e500->mas2 = spr_val; break;
177 break;
178 case SPRN_MAS3: 97 case SPRN_MAS3:
179 vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff; 98 vcpu_e500->mas3 = spr_val; break;
180 vcpu->arch.shared->mas7_3 |= spr_val;
181 break;
182 case SPRN_MAS4: 99 case SPRN_MAS4:
183 vcpu->arch.shared->mas4 = spr_val; 100 vcpu_e500->mas4 = spr_val; break;
184 break;
185 case SPRN_MAS6: 101 case SPRN_MAS6:
186 vcpu->arch.shared->mas6 = spr_val; 102 vcpu_e500->mas6 = spr_val; break;
187 break;
188 case SPRN_MAS7: 103 case SPRN_MAS7:
189 vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; 104 vcpu_e500->mas7 = spr_val; break;
190 vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
191 break;
192#endif
193 case SPRN_L1CSR0: 105 case SPRN_L1CSR0:
194 vcpu_e500->l1csr0 = spr_val; 106 vcpu_e500->l1csr0 = spr_val;
195 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); 107 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
196 break; 108 break;
197 case SPRN_L1CSR1: 109 case SPRN_L1CSR1:
198 vcpu_e500->l1csr1 = spr_val; 110 vcpu_e500->l1csr1 = spr_val; break;
199 break;
200 case SPRN_HID0: 111 case SPRN_HID0:
201 vcpu_e500->hid0 = spr_val; 112 vcpu_e500->hid0 = spr_val; break;
202 break;
203 case SPRN_HID1: 113 case SPRN_HID1:
204 vcpu_e500->hid1 = spr_val; 114 vcpu_e500->hid1 = spr_val; break;
205 break;
206 115
207 case SPRN_MMUCSR0: 116 case SPRN_MMUCSR0:
208 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, 117 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
@@ -222,115 +131,77 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
222 case SPRN_IVOR35: 131 case SPRN_IVOR35:
223 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; 132 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
224 break; 133 break;
225#ifdef CONFIG_KVM_BOOKE_HV 134
226 case SPRN_IVOR36:
227 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val;
228 break;
229 case SPRN_IVOR37:
230 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val;
231 break;
232#endif
233 default: 135 default:
234 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); 136 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
235 } 137 }
236 138
237 return emulated; 139 return emulated;
238} 140}
239 141
240int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 142int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
241{ 143{
242 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 144 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
243 int emulated = EMULATE_DONE; 145 int emulated = EMULATE_DONE;
244 146
245 switch (sprn) { 147 switch (sprn) {
246#ifndef CONFIG_KVM_BOOKE_HV
247 case SPRN_PID: 148 case SPRN_PID:
248 *spr_val = vcpu_e500->pid[0]; 149 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
249 break;
250 case SPRN_PID1: 150 case SPRN_PID1:
251 *spr_val = vcpu_e500->pid[1]; 151 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
252 break;
253 case SPRN_PID2: 152 case SPRN_PID2:
254 *spr_val = vcpu_e500->pid[2]; 153 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
255 break;
256 case SPRN_MAS0: 154 case SPRN_MAS0:
257 *spr_val = vcpu->arch.shared->mas0; 155 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
258 break;
259 case SPRN_MAS1: 156 case SPRN_MAS1:
260 *spr_val = vcpu->arch.shared->mas1; 157 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
261 break;
262 case SPRN_MAS2: 158 case SPRN_MAS2:
263 *spr_val = vcpu->arch.shared->mas2; 159 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
264 break;
265 case SPRN_MAS3: 160 case SPRN_MAS3:
266 *spr_val = (u32)vcpu->arch.shared->mas7_3; 161 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
267 break;
268 case SPRN_MAS4: 162 case SPRN_MAS4:
269 *spr_val = vcpu->arch.shared->mas4; 163 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
270 break;
271 case SPRN_MAS6: 164 case SPRN_MAS6:
272 *spr_val = vcpu->arch.shared->mas6; 165 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
273 break;
274 case SPRN_MAS7: 166 case SPRN_MAS7:
275 *spr_val = vcpu->arch.shared->mas7_3 >> 32; 167 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
276 break; 168
277#endif
278 case SPRN_DECAR:
279 *spr_val = vcpu->arch.decar;
280 break;
281 case SPRN_TLB0CFG: 169 case SPRN_TLB0CFG:
282 *spr_val = vcpu->arch.tlbcfg[0]; 170 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
283 break;
284 case SPRN_TLB1CFG: 171 case SPRN_TLB1CFG:
285 *spr_val = vcpu->arch.tlbcfg[1]; 172 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
286 break;
287 case SPRN_L1CSR0: 173 case SPRN_L1CSR0:
288 *spr_val = vcpu_e500->l1csr0; 174 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
289 break;
290 case SPRN_L1CSR1: 175 case SPRN_L1CSR1:
291 *spr_val = vcpu_e500->l1csr1; 176 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
292 break;
293 case SPRN_HID0: 177 case SPRN_HID0:
294 *spr_val = vcpu_e500->hid0; 178 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
295 break;
296 case SPRN_HID1: 179 case SPRN_HID1:
297 *spr_val = vcpu_e500->hid1; 180 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
298 break;
299 case SPRN_SVR: 181 case SPRN_SVR:
300 *spr_val = vcpu_e500->svr; 182 kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
301 break;
302 183
303 case SPRN_MMUCSR0: 184 case SPRN_MMUCSR0:
304 *spr_val = 0; 185 kvmppc_set_gpr(vcpu, rt, 0); break;
305 break;
306 186
307 case SPRN_MMUCFG: 187 case SPRN_MMUCFG:
308 *spr_val = vcpu->arch.mmucfg; 188 kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
309 break;
310 189
311 /* extra exceptions */ 190 /* extra exceptions */
312 case SPRN_IVOR32: 191 case SPRN_IVOR32:
313 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 192 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
314 break; 193 break;
315 case SPRN_IVOR33: 194 case SPRN_IVOR33:
316 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; 195 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
317 break; 196 break;
318 case SPRN_IVOR34: 197 case SPRN_IVOR34:
319 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 198 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
320 break; 199 break;
321 case SPRN_IVOR35: 200 case SPRN_IVOR35:
322 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 201 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
323 break;
324#ifdef CONFIG_KVM_BOOKE_HV
325 case SPRN_IVOR36:
326 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
327 break;
328 case SPRN_IVOR37:
329 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
330 break; 202 break;
331#endif
332 default: 203 default:
333 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); 204 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
334 } 205 }
335 206
336 return emulated; 207 return emulated;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index cf3f1801237..13c432ea2fa 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -2,9 +2,6 @@
2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. 2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 * 3 *
4 * Author: Yu Liu, yu.liu@freescale.com 4 * Author: Yu Liu, yu.liu@freescale.com
5 * Scott Wood, scottwood@freescale.com
6 * Ashish Kalra, ashish.kalra@freescale.com
7 * Varun Sethi, varun.sethi@freescale.com
8 * 5 *
9 * Description: 6 * Description:
10 * This file is based on arch/powerpc/kvm/44x_tlb.c, 7 * This file is based on arch/powerpc/kvm/44x_tlb.c,
@@ -15,36 +12,240 @@
15 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
16 */ 13 */
17 14
18#include <linux/kernel.h>
19#include <linux/types.h> 15#include <linux/types.h>
20#include <linux/slab.h> 16#include <linux/slab.h>
21#include <linux/string.h> 17#include <linux/string.h>
22#include <linux/kvm.h> 18#include <linux/kvm.h>
23#include <linux/kvm_host.h> 19#include <linux/kvm_host.h>
24#include <linux/highmem.h> 20#include <linux/highmem.h>
25#include <linux/log2.h>
26#include <linux/uaccess.h>
27#include <linux/sched.h>
28#include <linux/rwsem.h>
29#include <linux/vmalloc.h>
30#include <linux/hugetlb.h>
31#include <asm/kvm_ppc.h> 21#include <asm/kvm_ppc.h>
22#include <asm/kvm_e500.h>
32 23
33#include "e500.h" 24#include "../mm/mmu_decl.h"
25#include "e500_tlb.h"
34#include "trace.h" 26#include "trace.h"
35#include "timing.h" 27#include "timing.h"
36 28
37#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) 29#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
38 30
39static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; 31struct id {
32 unsigned long val;
33 struct id **pentry;
34};
40 35
41static inline unsigned int gtlb0_get_next_victim( 36#define NUM_TIDS 256
37
38/*
39 * This table provide mappings from:
40 * (guestAS,guestTID,guestPR) --> ID of physical cpu
41 * guestAS [0..1]
42 * guestTID [0..255]
43 * guestPR [0..1]
44 * ID [1..255]
45 * Each vcpu keeps one vcpu_id_table.
46 */
47struct vcpu_id_table {
48 struct id id[2][NUM_TIDS][2];
49};
50
51/*
52 * This table provide reversed mappings of vcpu_id_table:
53 * ID --> address of vcpu_id_table item.
54 * Each physical core has one pcpu_id_table.
55 */
56struct pcpu_id_table {
57 struct id *entry[NUM_TIDS];
58};
59
60static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
61
62/* This variable keeps last used shadow ID on local core.
63 * The valid range of shadow ID is [1..255] */
64static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
65
66static unsigned int tlb1_entry_num;
67
68/*
69 * Allocate a free shadow id and setup a valid sid mapping in given entry.
70 * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
71 *
72 * The caller must have preemption disabled, and keep it that way until
73 * it has finished with the returned shadow id (either written into the
74 * TLB or arch.shadow_pid, or discarded).
75 */
76static inline int local_sid_setup_one(struct id *entry)
77{
78 unsigned long sid;
79 int ret = -1;
80
81 sid = ++(__get_cpu_var(pcpu_last_used_sid));
82 if (sid < NUM_TIDS) {
83 __get_cpu_var(pcpu_sids).entry[sid] = entry;
84 entry->val = sid;
85 entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
86 ret = sid;
87 }
88
89 /*
90 * If sid == NUM_TIDS, we've run out of sids. We return -1, and
91 * the caller will invalidate everything and start over.
92 *
93 * sid > NUM_TIDS indicates a race, which we disable preemption to
94 * avoid.
95 */
96 WARN_ON(sid > NUM_TIDS);
97
98 return ret;
99}
100
101/*
102 * Check if given entry contain a valid shadow id mapping.
103 * An ID mapping is considered valid only if
104 * both vcpu and pcpu know this mapping.
105 *
106 * The caller must have preemption disabled, and keep it that way until
107 * it has finished with the returned shadow id (either written into the
108 * TLB or arch.shadow_pid, or discarded).
109 */
110static inline int local_sid_lookup(struct id *entry)
111{
112 if (entry && entry->val != 0 &&
113 __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
114 entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
115 return entry->val;
116 return -1;
117}
118
119/* Invalidate all id mappings on local core */
120static inline void local_sid_destroy_all(void)
121{
122 preempt_disable();
123 __get_cpu_var(pcpu_last_used_sid) = 0;
124 memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
125 preempt_enable();
126}
127
128static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
129{
130 vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
131 return vcpu_e500->idt;
132}
133
134static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
135{
136 kfree(vcpu_e500->idt);
137}
138
139/* Invalidate all mappings on vcpu */
140static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
141{
142 memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
143
144 /* Update shadow pid when mappings are changed */
145 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
146}
147
148/* Invalidate one ID mapping on vcpu */
149static inline void kvmppc_e500_id_table_reset_one(
150 struct kvmppc_vcpu_e500 *vcpu_e500,
151 int as, int pid, int pr)
152{
153 struct vcpu_id_table *idt = vcpu_e500->idt;
154
155 BUG_ON(as >= 2);
156 BUG_ON(pid >= NUM_TIDS);
157 BUG_ON(pr >= 2);
158
159 idt->id[as][pid][pr].val = 0;
160 idt->id[as][pid][pr].pentry = NULL;
161
162 /* Update shadow pid when mappings are changed */
163 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
164}
165
166/*
167 * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
168 * This function first lookup if a valid mapping exists,
169 * if not, then creates a new one.
170 *
171 * The caller must have preemption disabled, and keep it that way until
172 * it has finished with the returned shadow id (either written into the
173 * TLB or arch.shadow_pid, or discarded).
174 */
175static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
176 unsigned int as, unsigned int gid,
177 unsigned int pr, int avoid_recursion)
178{
179 struct vcpu_id_table *idt = vcpu_e500->idt;
180 int sid;
181
182 BUG_ON(as >= 2);
183 BUG_ON(gid >= NUM_TIDS);
184 BUG_ON(pr >= 2);
185
186 sid = local_sid_lookup(&idt->id[as][gid][pr]);
187
188 while (sid <= 0) {
189 /* No mapping yet */
190 sid = local_sid_setup_one(&idt->id[as][gid][pr]);
191 if (sid <= 0) {
192 _tlbil_all();
193 local_sid_destroy_all();
194 }
195
196 /* Update shadow pid when mappings are changed */
197 if (!avoid_recursion)
198 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
199 }
200
201 return sid;
202}
203
204/* Map guest pid to shadow.
205 * We use PID to keep shadow of current guest non-zero PID,
206 * and use PID1 to keep shadow of guest zero PID.
207 * So that guest tlbe with TID=0 can be accessed at any time */
208void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
209{
210 preempt_disable();
211 vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
212 get_cur_as(&vcpu_e500->vcpu),
213 get_cur_pid(&vcpu_e500->vcpu),
214 get_cur_pr(&vcpu_e500->vcpu), 1);
215 vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
216 get_cur_as(&vcpu_e500->vcpu), 0,
217 get_cur_pr(&vcpu_e500->vcpu), 1);
218 preempt_enable();
219}
220
221void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
222{
223 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
224 struct tlbe *tlbe;
225 int i, tlbsel;
226
227 printk("| %8s | %8s | %8s | %8s | %8s |\n",
228 "nr", "mas1", "mas2", "mas3", "mas7");
229
230 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
231 printk("Guest TLB%d:\n", tlbsel);
232 for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
233 tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
234 if (tlbe->mas1 & MAS1_VALID)
235 printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
236 tlbsel, i, tlbe->mas1, tlbe->mas2,
237 tlbe->mas3, tlbe->mas7);
238 }
239 }
240}
241
242static inline unsigned int tlb0_get_next_victim(
42 struct kvmppc_vcpu_e500 *vcpu_e500) 243 struct kvmppc_vcpu_e500 *vcpu_e500)
43{ 244{
44 unsigned int victim; 245 unsigned int victim;
45 246
46 victim = vcpu_e500->gtlb_nv[0]++; 247 victim = vcpu_e500->gtlb_nv[0]++;
47 if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways)) 248 if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
48 vcpu_e500->gtlb_nv[0] = 0; 249 vcpu_e500->gtlb_nv[0] = 0;
49 250
50 return victim; 251 return victim;
@@ -53,12 +254,12 @@ static inline unsigned int gtlb0_get_next_victim(
53static inline unsigned int tlb1_max_shadow_size(void) 254static inline unsigned int tlb1_max_shadow_size(void)
54{ 255{
55 /* reserve one entry for magic page */ 256 /* reserve one entry for magic page */
56 return host_tlb_params[1].entries - tlbcam_index - 1; 257 return tlb1_entry_num - tlbcam_index - 1;
57} 258}
58 259
59static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) 260static inline int tlbe_is_writable(struct tlbe *tlbe)
60{ 261{
61 return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); 262 return tlbe->mas3 & (MAS3_SW|MAS3_UW);
62} 263}
63 264
64static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) 265static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
@@ -66,7 +267,6 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
66 /* Mask off reserved bits. */ 267 /* Mask off reserved bits. */
67 mas3 &= MAS3_ATTRIB_MASK; 268 mas3 &= MAS3_ATTRIB_MASK;
68 269
69#ifndef CONFIG_KVM_BOOKE_HV
70 if (!usermode) { 270 if (!usermode) {
71 /* Guest is in supervisor mode, 271 /* Guest is in supervisor mode,
72 * so we need to translate guest 272 * so we need to translate guest
@@ -74,9 +274,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
74 mas3 &= ~E500_TLB_USER_PERM_MASK; 274 mas3 &= ~E500_TLB_USER_PERM_MASK;
75 mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; 275 mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
76 } 276 }
77 mas3 |= E500_TLB_SUPER_PERM_MASK; 277
78#endif 278 return mas3 | E500_TLB_SUPER_PERM_MASK;
79 return mas3;
80} 279}
81 280
82static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) 281static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
@@ -91,76 +290,40 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
91/* 290/*
92 * writing shadow tlb entry to host TLB 291 * writing shadow tlb entry to host TLB
93 */ 292 */
94static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, 293static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
95 uint32_t mas0)
96{ 294{
97 unsigned long flags; 295 unsigned long flags;
98 296
99 local_irq_save(flags); 297 local_irq_save(flags);
100 mtspr(SPRN_MAS0, mas0); 298 mtspr(SPRN_MAS0, mas0);
101 mtspr(SPRN_MAS1, stlbe->mas1); 299 mtspr(SPRN_MAS1, stlbe->mas1);
102 mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); 300 mtspr(SPRN_MAS2, stlbe->mas2);
103 mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); 301 mtspr(SPRN_MAS3, stlbe->mas3);
104 mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); 302 mtspr(SPRN_MAS7, stlbe->mas7);
105#ifdef CONFIG_KVM_BOOKE_HV
106 mtspr(SPRN_MAS8, stlbe->mas8);
107#endif
108 asm volatile("isync; tlbwe" : : : "memory"); 303 asm volatile("isync; tlbwe" : : : "memory");
109
110#ifdef CONFIG_KVM_BOOKE_HV
111 /* Must clear mas8 for other host tlbwe's */
112 mtspr(SPRN_MAS8, 0);
113 isync();
114#endif
115 local_irq_restore(flags);
116
117 trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
118 stlbe->mas2, stlbe->mas7_3);
119}
120
121/*
122 * Acquire a mas0 with victim hint, as if we just took a TLB miss.
123 *
124 * We don't care about the address we're searching for, other than that it's
125 * in the right set and is not present in the TLB. Using a zero PID and a
126 * userspace address means we don't have to set and then restore MAS5, or
127 * calculate a proper MAS6 value.
128 */
129static u32 get_host_mas0(unsigned long eaddr)
130{
131 unsigned long flags;
132 u32 mas0;
133
134 local_irq_save(flags);
135 mtspr(SPRN_MAS6, 0);
136 asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
137 mas0 = mfspr(SPRN_MAS0);
138 local_irq_restore(flags); 304 local_irq_restore(flags);
139
140 return mas0;
141} 305}
142 306
143/* sesel is for tlb1 only */
144static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, 307static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
145 int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) 308 int tlbsel, int esel, struct tlbe *stlbe)
146{ 309{
147 u32 mas0;
148
149 if (tlbsel == 0) { 310 if (tlbsel == 0) {
150 mas0 = get_host_mas0(stlbe->mas2); 311 __write_host_tlbe(stlbe,
151 __write_host_tlbe(stlbe, mas0); 312 MAS0_TLBSEL(0) |
313 MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1)));
152 } else { 314 } else {
153 __write_host_tlbe(stlbe, 315 __write_host_tlbe(stlbe,
154 MAS0_TLBSEL(1) | 316 MAS0_TLBSEL(1) |
155 MAS0_ESEL(to_htlb1_esel(sesel))); 317 MAS0_ESEL(to_htlb1_esel(esel)));
156 } 318 }
319 trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
320 stlbe->mas3, stlbe->mas7);
157} 321}
158 322
159#ifdef CONFIG_KVM_E500V2
160void kvmppc_map_magic(struct kvm_vcpu *vcpu) 323void kvmppc_map_magic(struct kvm_vcpu *vcpu)
161{ 324{
162 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 325 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
163 struct kvm_book3e_206_tlb_entry magic; 326 struct tlbe magic;
164 ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; 327 ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
165 unsigned int stid; 328 unsigned int stid;
166 pfn_t pfn; 329 pfn_t pfn;
@@ -174,104 +337,102 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
174 magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | 337 magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
175 MAS1_TSIZE(BOOK3E_PAGESZ_4K); 338 MAS1_TSIZE(BOOK3E_PAGESZ_4K);
176 magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; 339 magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
177 magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | 340 magic.mas3 = (pfn << PAGE_SHIFT) |
178 MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; 341 MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
179 magic.mas8 = 0; 342 magic.mas7 = pfn >> (32 - PAGE_SHIFT);
180 343
181 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); 344 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
182 preempt_enable(); 345 preempt_enable();
183} 346}
184#endif
185 347
186static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, 348void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
187 int tlbsel, int esel)
188{ 349{
189 struct kvm_book3e_206_tlb_entry *gtlbe = 350 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
190 get_entry(vcpu_e500, tlbsel, esel);
191
192 if (tlbsel == 1 &&
193 vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) {
194 u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
195 int hw_tlb_indx;
196 unsigned long flags;
197
198 local_irq_save(flags);
199 while (tmp) {
200 hw_tlb_indx = __ilog2_u64(tmp & -tmp);
201 mtspr(SPRN_MAS0,
202 MAS0_TLBSEL(1) |
203 MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
204 mtspr(SPRN_MAS1, 0);
205 asm volatile("tlbwe");
206 vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
207 tmp &= tmp - 1;
208 }
209 mb();
210 vcpu_e500->g2h_tlb1_map[esel] = 0;
211 vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP;
212 local_irq_restore(flags);
213 351
214 return; 352 /* Shadow PID may be expired on local core */
215 } 353 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
354}
216 355
217 /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ 356void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
218 kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); 357{
219} 358}
220 359
221static int tlb0_set_base(gva_t addr, int sets, int ways) 360static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
361 int tlbsel, int esel)
222{ 362{
223 int set_base; 363 struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
364 struct vcpu_id_table *idt = vcpu_e500->idt;
365 unsigned int pr, tid, ts, pid;
366 u32 val, eaddr;
367 unsigned long flags;
224 368
225 set_base = (addr >> PAGE_SHIFT) & (sets - 1); 369 ts = get_tlb_ts(gtlbe);
226 set_base *= ways; 370 tid = get_tlb_tid(gtlbe);
227 371
228 return set_base; 372 preempt_disable();
229}
230 373
231static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr) 374 /* One guest ID may be mapped to two shadow IDs */
232{ 375 for (pr = 0; pr < 2; pr++) {
233 return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets, 376 /*
234 vcpu_e500->gtlb_params[0].ways); 377 * The shadow PID can have a valid mapping on at most one
235} 378 * host CPU. In the common case, it will be valid on this
379 * CPU, in which case (for TLB0) we do a local invalidation
380 * of the specific address.
381 *
382 * If the shadow PID is not valid on the current host CPU, or
383 * if we're invalidating a TLB1 entry, we invalidate the
384 * entire shadow PID.
385 */
386 if (tlbsel == 1 ||
387 (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) {
388 kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
389 continue;
390 }
236 391
237static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel) 392 /*
238{ 393 * The guest is invalidating a TLB0 entry which is in a PID
239 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 394 * that has a valid shadow mapping on this host CPU. We
240 int esel = get_tlb_esel_bit(vcpu); 395 * search host TLB0 to invalidate it's shadow TLB entry,
396 * similar to __tlbil_va except that we need to look in AS1.
397 */
398 val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
399 eaddr = get_tlb_eaddr(gtlbe);
241 400
242 if (tlbsel == 0) { 401 local_irq_save(flags);
243 esel &= vcpu_e500->gtlb_params[0].ways - 1; 402
244 esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2); 403 mtspr(SPRN_MAS6, val);
245 } else { 404 asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
246 esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; 405 val = mfspr(SPRN_MAS1);
406 if (val & MAS1_VALID) {
407 mtspr(SPRN_MAS1, val & ~MAS1_VALID);
408 asm volatile("tlbwe");
409 }
410
411 local_irq_restore(flags);
247 } 412 }
248 413
249 return esel; 414 preempt_enable();
250} 415}
251 416
252/* Search the guest TLB for a matching entry. */ 417/* Search the guest TLB for a matching entry. */
253static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, 418static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
254 gva_t eaddr, int tlbsel, unsigned int pid, int as) 419 gva_t eaddr, int tlbsel, unsigned int pid, int as)
255{ 420{
256 int size = vcpu_e500->gtlb_params[tlbsel].entries; 421 int size = vcpu_e500->gtlb_size[tlbsel];
257 unsigned int set_base, offset; 422 int set_base;
258 int i; 423 int i;
259 424
260 if (tlbsel == 0) { 425 if (tlbsel == 0) {
261 set_base = gtlb0_set_base(vcpu_e500, eaddr); 426 int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
262 size = vcpu_e500->gtlb_params[0].ways; 427 set_base = (eaddr >> PAGE_SHIFT) & mask;
428 set_base *= KVM_E500_TLB0_WAY_NUM;
429 size = KVM_E500_TLB0_WAY_NUM;
263 } else { 430 } else {
264 if (eaddr < vcpu_e500->tlb1_min_eaddr ||
265 eaddr > vcpu_e500->tlb1_max_eaddr)
266 return -1;
267 set_base = 0; 431 set_base = 0;
268 } 432 }
269 433
270 offset = vcpu_e500->gtlb_offset[tlbsel];
271
272 for (i = 0; i < size; i++) { 434 for (i = 0; i < size; i++) {
273 struct kvm_book3e_206_tlb_entry *tlbe = 435 struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i];
274 &vcpu_e500->gtlb_arch[offset + set_base + i];
275 unsigned int tid; 436 unsigned int tid;
276 437
277 if (eaddr < get_tlb_eaddr(tlbe)) 438 if (eaddr < get_tlb_eaddr(tlbe))
@@ -296,129 +457,90 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
296 return -1; 457 return -1;
297} 458}
298 459
299static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, 460static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
300 struct kvm_book3e_206_tlb_entry *gtlbe, 461 struct tlbe *gtlbe,
301 pfn_t pfn) 462 pfn_t pfn)
302{ 463{
303 ref->pfn = pfn; 464 priv->pfn = pfn;
304 ref->flags = E500_TLB_VALID; 465 priv->flags = E500_TLB_VALID;
305 466
306 if (tlbe_is_writable(gtlbe)) 467 if (tlbe_is_writable(gtlbe))
307 kvm_set_pfn_dirty(pfn); 468 priv->flags |= E500_TLB_DIRTY;
308} 469}
309 470
310static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) 471static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv)
311{ 472{
312 if (ref->flags & E500_TLB_VALID) { 473 if (priv->flags & E500_TLB_VALID) {
313 trace_kvm_booke206_ref_release(ref->pfn, ref->flags); 474 if (priv->flags & E500_TLB_DIRTY)
314 ref->flags = 0; 475 kvm_release_pfn_dirty(priv->pfn);
315 } 476 else
316} 477 kvm_release_pfn_clean(priv->pfn);
317
318static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
319{
320 if (vcpu_e500->g2h_tlb1_map)
321 memset(vcpu_e500->g2h_tlb1_map, 0,
322 sizeof(u64) * vcpu_e500->gtlb_params[1].entries);
323 if (vcpu_e500->h2g_tlb1_rmap)
324 memset(vcpu_e500->h2g_tlb1_rmap, 0,
325 sizeof(unsigned int) * host_tlb_params[1].entries);
326}
327
328static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
329{
330 int tlbsel = 0;
331 int i;
332
333 for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
334 struct tlbe_ref *ref =
335 &vcpu_e500->gtlb_priv[tlbsel][i].ref;
336 kvmppc_e500_ref_release(ref);
337 }
338}
339
340static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
341{
342 int stlbsel = 1;
343 int i;
344
345 kvmppc_e500_tlbil_all(vcpu_e500);
346 478
347 for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { 479 priv->flags = 0;
348 struct tlbe_ref *ref =
349 &vcpu_e500->tlb_refs[stlbsel][i];
350 kvmppc_e500_ref_release(ref);
351 } 480 }
352
353 clear_tlb_privs(vcpu_e500);
354}
355
356void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
357{
358 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
359 clear_tlb_refs(vcpu_e500);
360 clear_tlb1_bitmap(vcpu_e500);
361} 481}
362 482
363static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, 483static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
364 unsigned int eaddr, int as) 484 unsigned int eaddr, int as)
365{ 485{
366 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 486 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
367 unsigned int victim, tsized; 487 unsigned int victim, pidsel, tsized;
368 int tlbsel; 488 int tlbsel;
369 489
370 /* since we only have two TLBs, only lower bit is used. */ 490 /* since we only have two TLBs, only lower bit is used. */
371 tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; 491 tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
372 victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; 492 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
373 tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; 493 pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
494 tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
374 495
375 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) 496 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
376 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 497 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
377 vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) 498 vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
378 | MAS1_TID(get_tlbmiss_tid(vcpu)) 499 | MAS1_TID(vcpu_e500->pid[pidsel])
379 | MAS1_TSIZE(tsized); 500 | MAS1_TSIZE(tsized);
380 vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) 501 vcpu_e500->mas2 = (eaddr & MAS2_EPN)
381 | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); 502 | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
382 vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; 503 vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
383 vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1) 504 vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
384 | (get_cur_pid(vcpu) << 16) 505 | (get_cur_pid(vcpu) << 16)
385 | (as ? MAS6_SAS : 0); 506 | (as ? MAS6_SAS : 0);
507 vcpu_e500->mas7 = 0;
386} 508}
387 509
388/* TID must be supplied by the caller */ 510static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
389static inline void kvmppc_e500_setup_stlbe( 511 struct tlbe *gtlbe, int tsize,
390 struct kvm_vcpu *vcpu, 512 struct tlbe_priv *priv,
391 struct kvm_book3e_206_tlb_entry *gtlbe, 513 u64 gvaddr, struct tlbe *stlbe)
392 int tsize, struct tlbe_ref *ref, u64 gvaddr,
393 struct kvm_book3e_206_tlb_entry *stlbe)
394{ 514{
395 pfn_t pfn = ref->pfn; 515 pfn_t pfn = priv->pfn;
396 u32 pr = vcpu->arch.shared->msr & MSR_PR; 516 unsigned int stid;
397
398 BUG_ON(!(ref->flags & E500_TLB_VALID));
399
400 /* Force IPROT=0 for all guest mappings. */
401 stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
402 stlbe->mas2 = (gvaddr & MAS2_EPN) |
403 e500_shadow_mas2_attrib(gtlbe->mas2, pr);
404 stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
405 e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
406 517
407#ifdef CONFIG_KVM_BOOKE_HV 518 stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
408 stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; 519 get_tlb_tid(gtlbe),
409#endif 520 get_cur_pr(&vcpu_e500->vcpu), 0);
521
522 /* Force TS=1 IPROT=0 for all guest mappings. */
523 stlbe->mas1 = MAS1_TSIZE(tsize)
524 | MAS1_TID(stid) | MAS1_TS | MAS1_VALID;
525 stlbe->mas2 = (gvaddr & MAS2_EPN)
526 | e500_shadow_mas2_attrib(gtlbe->mas2,
527 vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
528 stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN)
529 | e500_shadow_mas3_attrib(gtlbe->mas3,
530 vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
531 stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
410} 532}
411 533
534
412static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, 535static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
413 u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, 536 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
414 int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, 537 struct tlbe *stlbe)
415 struct tlbe_ref *ref)
416{ 538{
417 struct kvm_memory_slot *slot; 539 struct kvm_memory_slot *slot;
418 unsigned long pfn = 0; /* silence GCC warning */ 540 unsigned long pfn, hva;
419 unsigned long hva;
420 int pfnmap = 0; 541 int pfnmap = 0;
421 int tsize = BOOK3E_PAGESZ_4K; 542 int tsize = BOOK3E_PAGESZ_4K;
543 struct tlbe_priv *priv;
422 544
423 /* 545 /*
424 * Translate guest physical to true physical, acquiring 546 * Translate guest physical to true physical, acquiring
@@ -499,174 +621,79 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
499 pfn &= ~(tsize_pages - 1); 621 pfn &= ~(tsize_pages - 1);
500 break; 622 break;
501 } 623 }
502 } else if (vma && hva >= vma->vm_start &&
503 (vma->vm_flags & VM_HUGETLB)) {
504 unsigned long psize = vma_kernel_pagesize(vma);
505
506 tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
507 MAS1_TSIZE_SHIFT;
508
509 /*
510 * Take the largest page size that satisfies both host
511 * and guest mapping
512 */
513 tsize = min(__ilog2(psize) - 10, tsize);
514
515 /*
516 * e500 doesn't implement the lowest tsize bit,
517 * or 1K pages.
518 */
519 tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
520 } 624 }
521 625
522 up_read(&current->mm->mmap_sem); 626 up_read(&current->mm->mmap_sem);
523 } 627 }
524 628
525 if (likely(!pfnmap)) { 629 if (likely(!pfnmap)) {
526 unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); 630 pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
527 pfn = gfn_to_pfn_memslot(slot, gfn); 631 if (is_error_pfn(pfn)) {
528 if (is_error_noslot_pfn(pfn)) {
529 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", 632 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
530 (long)gfn); 633 (long)gfn);
634 kvm_release_pfn_clean(pfn);
531 return; 635 return;
532 } 636 }
533
534 /* Align guest and physical address to page map boundaries */
535 pfn &= ~(tsize_pages - 1);
536 gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
537 } 637 }
538 638
539 /* Drop old ref and setup new one. */ 639 /* Drop old priv and setup new one. */
540 kvmppc_e500_ref_release(ref); 640 priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
541 kvmppc_e500_ref_setup(ref, gtlbe, pfn); 641 kvmppc_e500_priv_release(priv);
642 kvmppc_e500_priv_setup(priv, gtlbe, pfn);
542 643
543 kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, 644 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, priv, gvaddr, stlbe);
544 ref, gvaddr, stlbe);
545
546 /* Clear i-cache for new pages */
547 kvmppc_mmu_flush_icache(pfn);
548
549 /* Drop refcount on page, so that mmu notifiers can clear it */
550 kvm_release_pfn_clean(pfn);
551} 645}
552 646
553/* XXX only map the one-one case, for now use TLB0 */ 647/* XXX only map the one-one case, for now use TLB0 */
554static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, 648static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
555 int esel, 649 int esel, struct tlbe *stlbe)
556 struct kvm_book3e_206_tlb_entry *stlbe)
557{ 650{
558 struct kvm_book3e_206_tlb_entry *gtlbe; 651 struct tlbe *gtlbe;
559 struct tlbe_ref *ref;
560 652
561 gtlbe = get_entry(vcpu_e500, 0, esel); 653 gtlbe = &vcpu_e500->gtlb_arch[0][esel];
562 ref = &vcpu_e500->gtlb_priv[0][esel].ref;
563 654
564 kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), 655 kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
565 get_tlb_raddr(gtlbe) >> PAGE_SHIFT, 656 get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
566 gtlbe, 0, stlbe, ref); 657 gtlbe, 0, esel, stlbe);
658
659 return esel;
567} 660}
568 661
569/* Caller must ensure that the specified guest TLB entry is safe to insert into 662/* Caller must ensure that the specified guest TLB entry is safe to insert into
570 * the shadow TLB. */ 663 * the shadow TLB. */
571/* XXX for both one-one and one-to-many , for now use TLB1 */ 664/* XXX for both one-one and one-to-many , for now use TLB1 */
572static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, 665static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
573 u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, 666 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
574 struct kvm_book3e_206_tlb_entry *stlbe, int esel)
575{ 667{
576 struct tlbe_ref *ref;
577 unsigned int victim; 668 unsigned int victim;
578 669
579 victim = vcpu_e500->host_tlb1_nv++; 670 victim = vcpu_e500->gtlb_nv[1]++;
580
581 if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
582 vcpu_e500->host_tlb1_nv = 0;
583 671
584 ref = &vcpu_e500->tlb_refs[1][victim]; 672 if (unlikely(vcpu_e500->gtlb_nv[1] >= tlb1_max_shadow_size()))
585 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); 673 vcpu_e500->gtlb_nv[1] = 0;
586 674
587 vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim; 675 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim, stlbe);
588 vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
589 if (vcpu_e500->h2g_tlb1_rmap[victim]) {
590 unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim];
591 vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim);
592 }
593 vcpu_e500->h2g_tlb1_rmap[victim] = esel;
594 676
595 return victim; 677 return victim;
596} 678}
597 679
598static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) 680void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
599{ 681{
600 int size = vcpu_e500->gtlb_params[1].entries;
601 unsigned int offset;
602 gva_t eaddr;
603 int i;
604
605 vcpu_e500->tlb1_min_eaddr = ~0UL;
606 vcpu_e500->tlb1_max_eaddr = 0;
607 offset = vcpu_e500->gtlb_offset[1];
608
609 for (i = 0; i < size; i++) {
610 struct kvm_book3e_206_tlb_entry *tlbe =
611 &vcpu_e500->gtlb_arch[offset + i];
612
613 if (!get_tlb_v(tlbe))
614 continue;
615
616 eaddr = get_tlb_eaddr(tlbe);
617 vcpu_e500->tlb1_min_eaddr =
618 min(vcpu_e500->tlb1_min_eaddr, eaddr);
619
620 eaddr = get_tlb_end(tlbe);
621 vcpu_e500->tlb1_max_eaddr =
622 max(vcpu_e500->tlb1_max_eaddr, eaddr);
623 }
624}
625
626static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500,
627 struct kvm_book3e_206_tlb_entry *gtlbe)
628{
629 unsigned long start, end, size;
630
631 size = get_tlb_bytes(gtlbe);
632 start = get_tlb_eaddr(gtlbe) & ~(size - 1);
633 end = start + size - 1;
634
635 return vcpu_e500->tlb1_min_eaddr == start ||
636 vcpu_e500->tlb1_max_eaddr == end;
637}
638
639/* This function is supposed to be called for a adding a new valid tlb entry */
640static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu,
641 struct kvm_book3e_206_tlb_entry *gtlbe)
642{
643 unsigned long start, end, size;
644 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 682 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
645 683
646 if (!get_tlb_v(gtlbe)) 684 /* Recalc shadow pid since MSR changes */
647 return; 685 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
648
649 size = get_tlb_bytes(gtlbe);
650 start = get_tlb_eaddr(gtlbe) & ~(size - 1);
651 end = start + size - 1;
652
653 vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start);
654 vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end);
655} 686}
656 687
657static inline int kvmppc_e500_gtlbe_invalidate( 688static inline int kvmppc_e500_gtlbe_invalidate(
658 struct kvmppc_vcpu_e500 *vcpu_e500, 689 struct kvmppc_vcpu_e500 *vcpu_e500,
659 int tlbsel, int esel) 690 int tlbsel, int esel)
660{ 691{
661 struct kvm_book3e_206_tlb_entry *gtlbe = 692 struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
662 get_entry(vcpu_e500, tlbsel, esel);
663 693
664 if (unlikely(get_tlb_iprot(gtlbe))) 694 if (unlikely(get_tlb_iprot(gtlbe)))
665 return -1; 695 return -1;
666 696
667 if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
668 kvmppc_recalc_tlb1map_range(vcpu_e500);
669
670 gtlbe->mas1 = 0; 697 gtlbe->mas1 = 0;
671 698
672 return 0; 699 return 0;
@@ -677,23 +704,26 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
677 int esel; 704 int esel;
678 705
679 if (value & MMUCSR0_TLB0FI) 706 if (value & MMUCSR0_TLB0FI)
680 for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++) 707 for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++)
681 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); 708 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
682 if (value & MMUCSR0_TLB1FI) 709 if (value & MMUCSR0_TLB1FI)
683 for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) 710 for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++)
684 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); 711 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
685 712
686 /* Invalidate all vcpu id mappings */ 713 /* Invalidate all vcpu id mappings */
687 kvmppc_e500_tlbil_all(vcpu_e500); 714 kvmppc_e500_id_table_reset_all(vcpu_e500);
688 715
689 return EMULATE_DONE; 716 return EMULATE_DONE;
690} 717}
691 718
692int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) 719int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
693{ 720{
694 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 721 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
695 unsigned int ia; 722 unsigned int ia;
696 int esel, tlbsel; 723 int esel, tlbsel;
724 gva_t ea;
725
726 ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
697 727
698 ia = (ea >> 2) & 0x1; 728 ia = (ea >> 2) & 0x1;
699 729
@@ -702,8 +732,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
702 732
703 if (ia) { 733 if (ia) {
704 /* invalidate all entries */ 734 /* invalidate all entries */
705 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; 735 for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++)
706 esel++)
707 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); 736 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
708 } else { 737 } else {
709 ea &= 0xfffff000; 738 ea &= 0xfffff000;
@@ -714,54 +743,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
714 } 743 }
715 744
716 /* Invalidate all vcpu id mappings */ 745 /* Invalidate all vcpu id mappings */
717 kvmppc_e500_tlbil_all(vcpu_e500); 746 kvmppc_e500_id_table_reset_all(vcpu_e500);
718
719 return EMULATE_DONE;
720}
721
722static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
723 int pid, int type)
724{
725 struct kvm_book3e_206_tlb_entry *tlbe;
726 int tid, esel;
727
728 /* invalidate all entries */
729 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
730 tlbe = get_entry(vcpu_e500, tlbsel, esel);
731 tid = get_tlb_tid(tlbe);
732 if (type == 0 || tid == pid) {
733 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
734 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
735 }
736 }
737}
738
739static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
740 gva_t ea)
741{
742 int tlbsel, esel;
743
744 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
745 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
746 if (esel >= 0) {
747 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
748 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
749 break;
750 }
751 }
752}
753
754int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
755{
756 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
757 int pid = get_cur_spid(vcpu);
758
759 if (type == 0 || type == 1) {
760 tlbilx_all(vcpu_e500, 0, pid, type);
761 tlbilx_all(vcpu_e500, 1, pid, type);
762 } else if (type == 3) {
763 tlbilx_one(vcpu_e500, pid, ea);
764 }
765 747
766 return EMULATE_DONE; 748 return EMULATE_DONE;
767} 749}
@@ -770,131 +752,100 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
770{ 752{
771 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 753 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
772 int tlbsel, esel; 754 int tlbsel, esel;
773 struct kvm_book3e_206_tlb_entry *gtlbe; 755 struct tlbe *gtlbe;
774 756
775 tlbsel = get_tlb_tlbsel(vcpu); 757 tlbsel = get_tlb_tlbsel(vcpu_e500);
776 esel = get_tlb_esel(vcpu, tlbsel); 758 esel = get_tlb_esel(vcpu_e500, tlbsel);
777 759
778 gtlbe = get_entry(vcpu_e500, tlbsel, esel); 760 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
779 vcpu->arch.shared->mas0 &= ~MAS0_NV(~0); 761 vcpu_e500->mas0 &= ~MAS0_NV(~0);
780 vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 762 vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
781 vcpu->arch.shared->mas1 = gtlbe->mas1; 763 vcpu_e500->mas1 = gtlbe->mas1;
782 vcpu->arch.shared->mas2 = gtlbe->mas2; 764 vcpu_e500->mas2 = gtlbe->mas2;
783 vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; 765 vcpu_e500->mas3 = gtlbe->mas3;
766 vcpu_e500->mas7 = gtlbe->mas7;
784 767
785 return EMULATE_DONE; 768 return EMULATE_DONE;
786} 769}
787 770
788int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) 771int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
789{ 772{
790 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 773 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
791 int as = !!get_cur_sas(vcpu); 774 int as = !!get_cur_sas(vcpu_e500);
792 unsigned int pid = get_cur_spid(vcpu); 775 unsigned int pid = get_cur_spid(vcpu_e500);
793 int esel, tlbsel; 776 int esel, tlbsel;
794 struct kvm_book3e_206_tlb_entry *gtlbe = NULL; 777 struct tlbe *gtlbe = NULL;
778 gva_t ea;
779
780 ea = kvmppc_get_gpr(vcpu, rb);
795 781
796 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 782 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
797 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); 783 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
798 if (esel >= 0) { 784 if (esel >= 0) {
799 gtlbe = get_entry(vcpu_e500, tlbsel, esel); 785 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
800 break; 786 break;
801 } 787 }
802 } 788 }
803 789
804 if (gtlbe) { 790 if (gtlbe) {
805 esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1; 791 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
806
807 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
808 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 792 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
809 vcpu->arch.shared->mas1 = gtlbe->mas1; 793 vcpu_e500->mas1 = gtlbe->mas1;
810 vcpu->arch.shared->mas2 = gtlbe->mas2; 794 vcpu_e500->mas2 = gtlbe->mas2;
811 vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; 795 vcpu_e500->mas3 = gtlbe->mas3;
796 vcpu_e500->mas7 = gtlbe->mas7;
812 } else { 797 } else {
813 int victim; 798 int victim;
814 799
815 /* since we only have two TLBs, only lower bit is used. */ 800 /* since we only have two TLBs, only lower bit is used. */
816 tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1; 801 tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
817 victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; 802 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
818 803
819 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) 804 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
820 | MAS0_ESEL(victim)
821 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 805 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
822 vcpu->arch.shared->mas1 = 806 vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
823 (vcpu->arch.shared->mas6 & MAS6_SPID0) 807 | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
824 | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) 808 | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
825 | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); 809 vcpu_e500->mas2 &= MAS2_EPN;
826 vcpu->arch.shared->mas2 &= MAS2_EPN; 810 vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
827 vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & 811 vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
828 MAS2_ATTRIB_MASK; 812 vcpu_e500->mas7 = 0;
829 vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 |
830 MAS3_U2 | MAS3_U3;
831 } 813 }
832 814
833 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); 815 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
834 return EMULATE_DONE; 816 return EMULATE_DONE;
835} 817}
836 818
837/* sesel is for tlb1 only */
838static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
839 struct kvm_book3e_206_tlb_entry *gtlbe,
840 struct kvm_book3e_206_tlb_entry *stlbe,
841 int stlbsel, int sesel)
842{
843 int stid;
844
845 preempt_disable();
846 stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
847
848 stlbe->mas1 |= MAS1_TID(stid);
849 write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
850 preempt_enable();
851}
852
853int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) 819int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
854{ 820{
855 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 821 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
856 struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; 822 struct tlbe *gtlbe;
857 int tlbsel, esel, stlbsel, sesel; 823 int tlbsel, esel;
858 int recal = 0;
859 824
860 tlbsel = get_tlb_tlbsel(vcpu); 825 tlbsel = get_tlb_tlbsel(vcpu_e500);
861 esel = get_tlb_esel(vcpu, tlbsel); 826 esel = get_tlb_esel(vcpu_e500, tlbsel);
862 827
863 gtlbe = get_entry(vcpu_e500, tlbsel, esel); 828 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
864 829
865 if (get_tlb_v(gtlbe)) { 830 if (get_tlb_v(gtlbe))
866 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); 831 kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
867 if ((tlbsel == 1) &&
868 kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
869 recal = 1;
870 }
871 832
872 gtlbe->mas1 = vcpu->arch.shared->mas1; 833 gtlbe->mas1 = vcpu_e500->mas1;
873 gtlbe->mas2 = vcpu->arch.shared->mas2; 834 gtlbe->mas2 = vcpu_e500->mas2;
874 if (!(vcpu->arch.shared->msr & MSR_CM)) 835 gtlbe->mas3 = vcpu_e500->mas3;
875 gtlbe->mas2 &= 0xffffffffUL; 836 gtlbe->mas7 = vcpu_e500->mas7;
876 gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
877 837
878 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, 838 trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
879 gtlbe->mas2, gtlbe->mas7_3); 839 gtlbe->mas3, gtlbe->mas7);
880
881 if (tlbsel == 1) {
882 /*
883 * If a valid tlb1 entry is overwritten then recalculate the
884 * min/max TLB1 map address range otherwise no need to look
885 * in tlb1 array.
886 */
887 if (recal)
888 kvmppc_recalc_tlb1map_range(vcpu_e500);
889 else
890 kvmppc_set_tlb1map_range(vcpu, gtlbe);
891 }
892 840
893 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ 841 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
894 if (tlbe_is_host_safe(vcpu, gtlbe)) { 842 if (tlbe_is_host_safe(vcpu, gtlbe)) {
843 struct tlbe stlbe;
844 int stlbsel, sesel;
895 u64 eaddr; 845 u64 eaddr;
896 u64 raddr; 846 u64 raddr;
897 847
848 preempt_disable();
898 switch (tlbsel) { 849 switch (tlbsel) {
899 case 0: 850 case 0:
900 /* TLB0 */ 851 /* TLB0 */
@@ -902,8 +853,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
902 gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); 853 gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
903 854
904 stlbsel = 0; 855 stlbsel = 0;
905 kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); 856 sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
906 sesel = 0; /* unused */
907 857
908 break; 858 break;
909 859
@@ -918,62 +868,20 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
918 * are mapped on the fly. */ 868 * are mapped on the fly. */
919 stlbsel = 1; 869 stlbsel = 1;
920 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, 870 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
921 raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel); 871 raddr >> PAGE_SHIFT, gtlbe, &stlbe);
922 break; 872 break;
923 873
924 default: 874 default:
925 BUG(); 875 BUG();
926 } 876 }
927 877 write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
928 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); 878 preempt_enable();
929 } 879 }
930 880
931 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); 881 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
932 return EMULATE_DONE; 882 return EMULATE_DONE;
933} 883}
934 884
935static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
936 gva_t eaddr, unsigned int pid, int as)
937{
938 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
939 int esel, tlbsel;
940
941 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
942 esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
943 if (esel >= 0)
944 return index_of(tlbsel, esel);
945 }
946
947 return -1;
948}
949
950/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
951int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
952 struct kvm_translation *tr)
953{
954 int index;
955 gva_t eaddr;
956 u8 pid;
957 u8 as;
958
959 eaddr = tr->linear_address;
960 pid = (tr->linear_address >> 32) & 0xff;
961 as = (tr->linear_address >> 40) & 0x1;
962
963 index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
964 if (index < 0) {
965 tr->valid = 0;
966 return 0;
967 }
968
969 tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
970 /* XXX what does "writeable" and "usermode" even mean? */
971 tr->valid = 1;
972
973 return 0;
974}
975
976
977int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) 885int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
978{ 886{
979 unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); 887 unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
@@ -1006,11 +914,9 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
1006 gva_t eaddr) 914 gva_t eaddr)
1007{ 915{
1008 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 916 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1009 struct kvm_book3e_206_tlb_entry *gtlbe; 917 struct tlbe *gtlbe =
1010 u64 pgmask; 918 &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
1011 919 u64 pgmask = get_tlb_bytes(gtlbe) - 1;
1012 gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
1013 pgmask = get_tlb_bytes(gtlbe) - 1;
1014 920
1015 return get_tlb_raddr(gtlbe) | (eaddr & pgmask); 921 return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
1016} 922}
@@ -1024,25 +930,22 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1024{ 930{
1025 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 931 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1026 struct tlbe_priv *priv; 932 struct tlbe_priv *priv;
1027 struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; 933 struct tlbe *gtlbe, stlbe;
1028 int tlbsel = tlbsel_of(index); 934 int tlbsel = tlbsel_of(index);
1029 int esel = esel_of(index); 935 int esel = esel_of(index);
1030 int stlbsel, sesel; 936 int stlbsel, sesel;
1031 937
1032 gtlbe = get_entry(vcpu_e500, tlbsel, esel); 938 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
1033 939
940 preempt_disable();
1034 switch (tlbsel) { 941 switch (tlbsel) {
1035 case 0: 942 case 0:
1036 stlbsel = 0; 943 stlbsel = 0;
1037 sesel = 0; /* unused */ 944 sesel = esel;
1038 priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; 945 priv = &vcpu_e500->gtlb_priv[stlbsel][sesel];
1039 946
1040 /* Only triggers after clear_tlb_refs */ 947 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
1041 if (unlikely(!(priv->ref.flags & E500_TLB_VALID))) 948 priv, eaddr, &stlbe);
1042 kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
1043 else
1044 kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
1045 &priv->ref, eaddr, &stlbe);
1046 break; 949 break;
1047 950
1048 case 1: { 951 case 1: {
@@ -1050,7 +953,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1050 953
1051 stlbsel = 1; 954 stlbsel = 1;
1052 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, 955 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
1053 gtlbe, &stlbe, esel); 956 gtlbe, &stlbe);
1054 break; 957 break;
1055 } 958 }
1056 959
@@ -1059,372 +962,116 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1059 break; 962 break;
1060 } 963 }
1061 964
1062 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); 965 write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
1063} 966 preempt_enable();
1064
1065/************* MMU Notifiers *************/
1066
1067int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1068{
1069 trace_kvm_unmap_hva(hva);
1070
1071 /*
1072 * Flush all shadow tlb entries everywhere. This is slow, but
1073 * we are 100% sure that we catch the to be unmapped page
1074 */
1075 kvm_flush_remote_tlbs(kvm);
1076
1077 return 0;
1078}
1079
1080int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
1081{
1082 /* kvm_unmap_hva flushes everything anyways */
1083 kvm_unmap_hva(kvm, start);
1084
1085 return 0;
1086}
1087
1088int kvm_age_hva(struct kvm *kvm, unsigned long hva)
1089{
1090 /* XXX could be more clever ;) */
1091 return 0;
1092}
1093
1094int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1095{
1096 /* XXX could be more clever ;) */
1097 return 0;
1098}
1099
1100void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1101{
1102 /* The page will get remapped properly on its next fault */
1103 kvm_unmap_hva(kvm, hva);
1104}
1105
1106/*****************************************/
1107
1108static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
1109{
1110 int i;
1111
1112 clear_tlb1_bitmap(vcpu_e500);
1113 kfree(vcpu_e500->g2h_tlb1_map);
1114
1115 clear_tlb_refs(vcpu_e500);
1116 kfree(vcpu_e500->gtlb_priv[0]);
1117 kfree(vcpu_e500->gtlb_priv[1]);
1118
1119 if (vcpu_e500->shared_tlb_pages) {
1120 vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
1121 PAGE_SIZE)));
1122
1123 for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) {
1124 set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
1125 put_page(vcpu_e500->shared_tlb_pages[i]);
1126 }
1127
1128 vcpu_e500->num_shared_tlb_pages = 0;
1129
1130 kfree(vcpu_e500->shared_tlb_pages);
1131 vcpu_e500->shared_tlb_pages = NULL;
1132 } else {
1133 kfree(vcpu_e500->gtlb_arch);
1134 }
1135
1136 vcpu_e500->gtlb_arch = NULL;
1137} 967}
1138 968
1139void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 969int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
970 gva_t eaddr, unsigned int pid, int as)
1140{ 971{
1141 sregs->u.e.mas0 = vcpu->arch.shared->mas0; 972 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1142 sregs->u.e.mas1 = vcpu->arch.shared->mas1; 973 int esel, tlbsel;
1143 sregs->u.e.mas2 = vcpu->arch.shared->mas2;
1144 sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
1145 sregs->u.e.mas4 = vcpu->arch.shared->mas4;
1146 sregs->u.e.mas6 = vcpu->arch.shared->mas6;
1147
1148 sregs->u.e.mmucfg = vcpu->arch.mmucfg;
1149 sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0];
1150 sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1];
1151 sregs->u.e.tlbcfg[2] = 0;
1152 sregs->u.e.tlbcfg[3] = 0;
1153}
1154 974
1155int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 975 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
1156{ 976 esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
1157 if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { 977 if (esel >= 0)
1158 vcpu->arch.shared->mas0 = sregs->u.e.mas0; 978 return index_of(tlbsel, esel);
1159 vcpu->arch.shared->mas1 = sregs->u.e.mas1;
1160 vcpu->arch.shared->mas2 = sregs->u.e.mas2;
1161 vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
1162 vcpu->arch.shared->mas4 = sregs->u.e.mas4;
1163 vcpu->arch.shared->mas6 = sregs->u.e.mas6;
1164 } 979 }
1165 980
1166 return 0; 981 return -1;
1167} 982}
1168 983
1169int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, 984void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
1170 struct kvm_config_tlb *cfg)
1171{ 985{
1172 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 986 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1173 struct kvm_book3e_206_tlb_params params;
1174 char *virt;
1175 struct page **pages;
1176 struct tlbe_priv *privs[2] = {};
1177 u64 *g2h_bitmap = NULL;
1178 size_t array_len;
1179 u32 sets;
1180 int num_pages, ret, i;
1181
1182 if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
1183 return -EINVAL;
1184
1185 if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
1186 sizeof(params)))
1187 return -EFAULT;
1188
1189 if (params.tlb_sizes[1] > 64)
1190 return -EINVAL;
1191 if (params.tlb_ways[1] != params.tlb_sizes[1])
1192 return -EINVAL;
1193 if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
1194 return -EINVAL;
1195 if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0)
1196 return -EINVAL;
1197
1198 if (!is_power_of_2(params.tlb_ways[0]))
1199 return -EINVAL;
1200
1201 sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
1202 if (!is_power_of_2(sets))
1203 return -EINVAL;
1204
1205 array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
1206 array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
1207
1208 if (cfg->array_len < array_len)
1209 return -EINVAL;
1210
1211 num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
1212 cfg->array / PAGE_SIZE;
1213 pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
1214 if (!pages)
1215 return -ENOMEM;
1216
1217 ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
1218 if (ret < 0)
1219 goto err_pages;
1220
1221 if (ret != num_pages) {
1222 num_pages = ret;
1223 ret = -EFAULT;
1224 goto err_put_page;
1225 }
1226 987
1227 virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); 988 if (vcpu->arch.pid != pid) {
1228 if (!virt) { 989 vcpu_e500->pid[0] = vcpu->arch.pid = pid;
1229 ret = -ENOMEM; 990 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
1230 goto err_put_page;
1231 } 991 }
1232
1233 privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
1234 GFP_KERNEL);
1235 privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
1236 GFP_KERNEL);
1237
1238 if (!privs[0] || !privs[1]) {
1239 ret = -ENOMEM;
1240 goto err_privs;
1241 }
1242
1243 g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
1244 GFP_KERNEL);
1245 if (!g2h_bitmap) {
1246 ret = -ENOMEM;
1247 goto err_privs;
1248 }
1249
1250 free_gtlb(vcpu_e500);
1251
1252 vcpu_e500->gtlb_priv[0] = privs[0];
1253 vcpu_e500->gtlb_priv[1] = privs[1];
1254 vcpu_e500->g2h_tlb1_map = g2h_bitmap;
1255
1256 vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
1257 (virt + (cfg->array & (PAGE_SIZE - 1)));
1258
1259 vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0];
1260 vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1];
1261
1262 vcpu_e500->gtlb_offset[0] = 0;
1263 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
1264
1265 vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
1266
1267 vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1268 if (params.tlb_sizes[0] <= 2048)
1269 vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
1270 vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
1271
1272 vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1273 vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
1274 vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
1275
1276 vcpu_e500->shared_tlb_pages = pages;
1277 vcpu_e500->num_shared_tlb_pages = num_pages;
1278
1279 vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0];
1280 vcpu_e500->gtlb_params[0].sets = sets;
1281
1282 vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
1283 vcpu_e500->gtlb_params[1].sets = 1;
1284
1285 kvmppc_recalc_tlb1map_range(vcpu_e500);
1286 return 0;
1287
1288err_privs:
1289 kfree(privs[0]);
1290 kfree(privs[1]);
1291
1292err_put_page:
1293 for (i = 0; i < num_pages; i++)
1294 put_page(pages[i]);
1295
1296err_pages:
1297 kfree(pages);
1298 return ret;
1299} 992}
1300 993
1301int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, 994void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
1302 struct kvm_dirty_tlb *dirty)
1303{ 995{
1304 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 996 struct tlbe *tlbe;
1305 kvmppc_recalc_tlb1map_range(vcpu_e500); 997
1306 clear_tlb_refs(vcpu_e500); 998 /* Insert large initial mapping for guest. */
1307 return 0; 999 tlbe = &vcpu_e500->gtlb_arch[1][0];
1000 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
1001 tlbe->mas2 = 0;
1002 tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
1003 tlbe->mas7 = 0;
1004
1005 /* 4K map for serial output. Used by kernel wrapper. */
1006 tlbe = &vcpu_e500->gtlb_arch[1][1];
1007 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
1008 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
1009 tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
1010 tlbe->mas7 = 0;
1308} 1011}
1309 1012
1310int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) 1013int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
1311{ 1014{
1312 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; 1015 tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
1313 int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
1314 int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
1315 1016
1316 host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; 1017 vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE;
1317 host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; 1018 vcpu_e500->gtlb_arch[0] =
1019 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
1020 if (vcpu_e500->gtlb_arch[0] == NULL)
1021 goto err_out;
1318 1022
1319 /* 1023 vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE;
1320 * This should never happen on real e500 hardware, but is 1024 vcpu_e500->gtlb_arch[1] =
1321 * architecturally possible -- e.g. in some weird nested 1025 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
1322 * virtualization case. 1026 if (vcpu_e500->gtlb_arch[1] == NULL)
1323 */ 1027 goto err_out_guest0;
1324 if (host_tlb_params[0].entries == 0 ||
1325 host_tlb_params[1].entries == 0) {
1326 pr_err("%s: need to know host tlb size\n", __func__);
1327 return -ENODEV;
1328 }
1329 1028
1330 host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> 1029 vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *)
1331 TLBnCFG_ASSOC_SHIFT; 1030 kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
1332 host_tlb_params[1].ways = host_tlb_params[1].entries; 1031 if (vcpu_e500->gtlb_priv[0] == NULL)
1333 1032 goto err_out_guest1;
1334 if (!is_power_of_2(host_tlb_params[0].entries) || 1033 vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *)
1335 !is_power_of_2(host_tlb_params[0].ways) || 1034 kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
1336 host_tlb_params[0].entries < host_tlb_params[0].ways || 1035
1337 host_tlb_params[0].ways == 0) { 1036 if (vcpu_e500->gtlb_priv[1] == NULL)
1338 pr_err("%s: bad tlb0 host config: %u entries %u ways\n", 1037 goto err_out_priv0;
1339 __func__, host_tlb_params[0].entries,
1340 host_tlb_params[0].ways);
1341 return -ENODEV;
1342 }
1343 1038
1344 host_tlb_params[0].sets = 1039 if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
1345 host_tlb_params[0].entries / host_tlb_params[0].ways; 1040 goto err_out_priv1;
1346 host_tlb_params[1].sets = 1;
1347
1348 vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
1349 vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
1350
1351 vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM;
1352 vcpu_e500->gtlb_params[0].sets =
1353 KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
1354
1355 vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
1356 vcpu_e500->gtlb_params[1].sets = 1;
1357
1358 vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
1359 if (!vcpu_e500->gtlb_arch)
1360 return -ENOMEM;
1361
1362 vcpu_e500->gtlb_offset[0] = 0;
1363 vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
1364
1365 vcpu_e500->tlb_refs[0] =
1366 kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries,
1367 GFP_KERNEL);
1368 if (!vcpu_e500->tlb_refs[0])
1369 goto err;
1370
1371 vcpu_e500->tlb_refs[1] =
1372 kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries,
1373 GFP_KERNEL);
1374 if (!vcpu_e500->tlb_refs[1])
1375 goto err;
1376
1377 vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
1378 vcpu_e500->gtlb_params[0].entries,
1379 GFP_KERNEL);
1380 if (!vcpu_e500->gtlb_priv[0])
1381 goto err;
1382
1383 vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
1384 vcpu_e500->gtlb_params[1].entries,
1385 GFP_KERNEL);
1386 if (!vcpu_e500->gtlb_priv[1])
1387 goto err;
1388
1389 vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
1390 vcpu_e500->gtlb_params[1].entries,
1391 GFP_KERNEL);
1392 if (!vcpu_e500->g2h_tlb1_map)
1393 goto err;
1394
1395 vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
1396 host_tlb_params[1].entries,
1397 GFP_KERNEL);
1398 if (!vcpu_e500->h2g_tlb1_rmap)
1399 goto err;
1400 1041
1401 /* Init TLB configuration register */ 1042 /* Init TLB configuration register */
1402 vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & 1043 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
1403 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); 1044 vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0];
1404 vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; 1045 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
1405 vcpu->arch.tlbcfg[0] |= 1046 vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1];
1406 vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; 1047
1407
1408 vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
1409 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1410 vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
1411 vcpu->arch.tlbcfg[1] |=
1412 vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
1413
1414 kvmppc_recalc_tlb1map_range(vcpu_e500);
1415 return 0; 1048 return 0;
1416 1049
1417err: 1050err_out_priv1:
1418 free_gtlb(vcpu_e500); 1051 kfree(vcpu_e500->gtlb_priv[1]);
1419 kfree(vcpu_e500->tlb_refs[0]); 1052err_out_priv0:
1420 kfree(vcpu_e500->tlb_refs[1]); 1053 kfree(vcpu_e500->gtlb_priv[0]);
1054err_out_guest1:
1055 kfree(vcpu_e500->gtlb_arch[1]);
1056err_out_guest0:
1057 kfree(vcpu_e500->gtlb_arch[0]);
1058err_out:
1421 return -1; 1059 return -1;
1422} 1060}
1423 1061
1424void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) 1062void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
1425{ 1063{
1426 free_gtlb(vcpu_e500); 1064 int stlbsel, i;
1427 kfree(vcpu_e500->h2g_tlb1_rmap); 1065
1428 kfree(vcpu_e500->tlb_refs[0]); 1066 /* release all privs */
1429 kfree(vcpu_e500->tlb_refs[1]); 1067 for (stlbsel = 0; stlbsel < 2; stlbsel++)
1068 for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
1069 struct tlbe_priv *priv =
1070 &vcpu_e500->gtlb_priv[stlbsel][i];
1071 kvmppc_e500_priv_release(priv);
1072 }
1073
1074 kvmppc_e500_id_table_free(vcpu_e500);
1075 kfree(vcpu_e500->gtlb_arch[1]);
1076 kfree(vcpu_e500->gtlb_arch[0]);
1430} 1077}
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
deleted file mode 100644
index 1f89d26e65f..00000000000
--- a/arch/powerpc/kvm/e500mc.c
+++ /dev/null
@@ -1,346 +0,0 @@
1/*
2 * Copyright (C) 2010,2012 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Varun Sethi, <varun.sethi@freescale.com>
5 *
6 * Description:
7 * This file is derived from arch/powerpc/kvm/e500.c,
8 * by Yu Liu <yu.liu@freescale.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/kvm_host.h>
16#include <linux/slab.h>
17#include <linux/err.h>
18#include <linux/export.h>
19
20#include <asm/reg.h>
21#include <asm/cputable.h>
22#include <asm/tlbflush.h>
23#include <asm/kvm_ppc.h>
24#include <asm/dbell.h>
25
26#include "booke.h"
27#include "e500.h"
28
29void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
30{
31 enum ppc_dbell dbell_type;
32 unsigned long tag;
33
34 switch (type) {
35 case INT_CLASS_NONCRIT:
36 dbell_type = PPC_G_DBELL;
37 break;
38 case INT_CLASS_CRIT:
39 dbell_type = PPC_G_DBELL_CRIT;
40 break;
41 case INT_CLASS_MC:
42 dbell_type = PPC_G_DBELL_MC;
43 break;
44 default:
45 WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type);
46 return;
47 }
48
49
50 tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
51 mb();
52 ppc_msgsnd(dbell_type, 0, tag);
53}
54
55/* gtlbe must not be mapped by more than one host tlb entry */
56void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
57 struct kvm_book3e_206_tlb_entry *gtlbe)
58{
59 unsigned int tid, ts;
60 gva_t eaddr;
61 u32 val, lpid;
62 unsigned long flags;
63
64 ts = get_tlb_ts(gtlbe);
65 tid = get_tlb_tid(gtlbe);
66 lpid = vcpu_e500->vcpu.kvm->arch.lpid;
67
68 /* We search the host TLB to invalidate its shadow TLB entry */
69 val = (tid << 16) | ts;
70 eaddr = get_tlb_eaddr(gtlbe);
71
72 local_irq_save(flags);
73
74 mtspr(SPRN_MAS6, val);
75 mtspr(SPRN_MAS5, MAS5_SGS | lpid);
76
77 asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
78 val = mfspr(SPRN_MAS1);
79 if (val & MAS1_VALID) {
80 mtspr(SPRN_MAS1, val & ~MAS1_VALID);
81 asm volatile("tlbwe");
82 }
83 mtspr(SPRN_MAS5, 0);
84 /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */
85 mtspr(SPRN_MAS8, 0);
86 isync();
87
88 local_irq_restore(flags);
89}
90
91void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
92{
93 unsigned long flags;
94
95 local_irq_save(flags);
96 mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
97 asm volatile("tlbilxlpid");
98 mtspr(SPRN_MAS5, 0);
99 local_irq_restore(flags);
100}
101
102void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
103{
104 vcpu->arch.pid = pid;
105}
106
107void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
108{
109}
110
111void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
112{
113 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
114
115 kvmppc_booke_vcpu_load(vcpu, cpu);
116
117 mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
118 mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
119 mtspr(SPRN_GPIR, vcpu->vcpu_id);
120 mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
121 mtspr(SPRN_EPLC, vcpu->arch.eplc);
122 mtspr(SPRN_EPSC, vcpu->arch.epsc);
123
124 mtspr(SPRN_GIVPR, vcpu->arch.ivpr);
125 mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
126 mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
127 mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0);
128 mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1);
129 mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2);
130 mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3);
131
132 mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0);
133 mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1);
134
135 mtspr(SPRN_GEPR, vcpu->arch.epr);
136 mtspr(SPRN_GDEAR, vcpu->arch.shared->dar);
137 mtspr(SPRN_GESR, vcpu->arch.shared->esr);
138
139 if (vcpu->arch.oldpir != mfspr(SPRN_PIR))
140 kvmppc_e500_tlbil_all(vcpu_e500);
141
142 kvmppc_load_guest_fp(vcpu);
143}
144
145void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
146{
147 vcpu->arch.eplc = mfspr(SPRN_EPLC);
148 vcpu->arch.epsc = mfspr(SPRN_EPSC);
149
150 vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0);
151 vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1);
152 vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2);
153 vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3);
154
155 vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0);
156 vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1);
157
158 vcpu->arch.epr = mfspr(SPRN_GEPR);
159 vcpu->arch.shared->dar = mfspr(SPRN_GDEAR);
160 vcpu->arch.shared->esr = mfspr(SPRN_GESR);
161
162 vcpu->arch.oldpir = mfspr(SPRN_PIR);
163
164 kvmppc_booke_vcpu_put(vcpu);
165}
166
167int kvmppc_core_check_processor_compat(void)
168{
169 int r;
170
171 if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0)
172 r = 0;
173 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
174 r = 0;
175 else
176 r = -ENOTSUPP;
177
178 return r;
179}
180
181int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
182{
183 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
184
185 vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
186 SPRN_EPCR_DUVD;
187#ifdef CONFIG_64BIT
188 vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
189#endif
190 vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
191 vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
192 vcpu->arch.epsc = vcpu->arch.eplc;
193
194 vcpu->arch.pvr = mfspr(SPRN_PVR);
195 vcpu_e500->svr = mfspr(SPRN_SVR);
196
197 vcpu->arch.cpu_type = KVM_CPU_E500MC;
198
199 return 0;
200}
201
202void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
203{
204 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
205
206 sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM |
207 KVM_SREGS_E_PC;
208 sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
209
210 sregs->u.e.impl.fsl.features = 0;
211 sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
212 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
213 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
214
215 kvmppc_get_sregs_e500_tlb(vcpu, sregs);
216
217 sregs->u.e.ivor_high[3] =
218 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
219 sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
220 sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
221
222 kvmppc_get_sregs_ivor(vcpu, sregs);
223}
224
225int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
226{
227 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
228 int ret;
229
230 if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
231 vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
232 vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
233 vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
234 }
235
236 ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
237 if (ret < 0)
238 return ret;
239
240 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
241 return 0;
242
243 if (sregs->u.e.features & KVM_SREGS_E_PM) {
244 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
245 sregs->u.e.ivor_high[3];
246 }
247
248 if (sregs->u.e.features & KVM_SREGS_E_PC) {
249 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] =
250 sregs->u.e.ivor_high[4];
251 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] =
252 sregs->u.e.ivor_high[5];
253 }
254
255 return kvmppc_set_sregs_ivor(vcpu, sregs);
256}
257
258struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
259{
260 struct kvmppc_vcpu_e500 *vcpu_e500;
261 struct kvm_vcpu *vcpu;
262 int err;
263
264 vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
265 if (!vcpu_e500) {
266 err = -ENOMEM;
267 goto out;
268 }
269 vcpu = &vcpu_e500->vcpu;
270
271 /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
272 vcpu->arch.oldpir = 0xffffffff;
273
274 err = kvm_vcpu_init(vcpu, kvm, id);
275 if (err)
276 goto free_vcpu;
277
278 err = kvmppc_e500_tlb_init(vcpu_e500);
279 if (err)
280 goto uninit_vcpu;
281
282 vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
283 if (!vcpu->arch.shared)
284 goto uninit_tlb;
285
286 return vcpu;
287
288uninit_tlb:
289 kvmppc_e500_tlb_uninit(vcpu_e500);
290uninit_vcpu:
291 kvm_vcpu_uninit(vcpu);
292
293free_vcpu:
294 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
295out:
296 return ERR_PTR(err);
297}
298
299void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
300{
301 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
302
303 free_page((unsigned long)vcpu->arch.shared);
304 kvmppc_e500_tlb_uninit(vcpu_e500);
305 kvm_vcpu_uninit(vcpu);
306 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
307}
308
309int kvmppc_core_init_vm(struct kvm *kvm)
310{
311 int lpid;
312
313 lpid = kvmppc_alloc_lpid();
314 if (lpid < 0)
315 return lpid;
316
317 kvm->arch.lpid = lpid;
318 return 0;
319}
320
321void kvmppc_core_destroy_vm(struct kvm *kvm)
322{
323 kvmppc_free_lpid(kvm->arch.lpid);
324}
325
326static int __init kvmppc_e500mc_init(void)
327{
328 int r;
329
330 r = kvmppc_booke_init();
331 if (r)
332 return r;
333
334 kvmppc_init_lpid(64);
335 kvmppc_claim_lpid(0); /* host */
336
337 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
338}
339
340static void __exit kvmppc_e500mc_exit(void)
341{
342 kvmppc_booke_exit();
343}
344
345module_init(kvmppc_e500mc_init);
346module_exit(kvmppc_e500mc_exit);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index b0855e5d890..141dce3c681 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -13,7 +13,6 @@
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2007 15 * Copyright IBM Corp. 2007
16 * Copyright 2011 Freescale Semiconductor, Inc.
17 * 16 *
18 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
19 */ 18 */
@@ -23,7 +22,6 @@
23#include <linux/types.h> 22#include <linux/types.h>
24#include <linux/string.h> 23#include <linux/string.h>
25#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
26#include <linux/clockchips.h>
27 25
28#include <asm/reg.h> 26#include <asm/reg.h>
29#include <asm/time.h> 27#include <asm/time.h>
@@ -36,9 +34,7 @@
36#define OP_TRAP 3 34#define OP_TRAP 3
37#define OP_TRAP_64 2 35#define OP_TRAP_64 2
38 36
39#define OP_31_XOP_TRAP 4
40#define OP_31_XOP_LWZX 23 37#define OP_31_XOP_LWZX 23
41#define OP_31_XOP_TRAP_64 68
42#define OP_31_XOP_LBZX 87 38#define OP_31_XOP_LBZX 87
43#define OP_31_XOP_STWX 151 39#define OP_31_XOP_STWX 151
44#define OP_31_XOP_STBX 215 40#define OP_31_XOP_STBX 215
@@ -59,13 +55,11 @@
59#define OP_31_XOP_STHBRX 918 55#define OP_31_XOP_STHBRX 918
60 56
61#define OP_LWZ 32 57#define OP_LWZ 32
62#define OP_LD 58
63#define OP_LWZU 33 58#define OP_LWZU 33
64#define OP_LBZ 34 59#define OP_LBZ 34
65#define OP_LBZU 35 60#define OP_LBZU 35
66#define OP_STW 36 61#define OP_STW 36
67#define OP_STWU 37 62#define OP_STWU 37
68#define OP_STD 62
69#define OP_STB 38 63#define OP_STB 38
70#define OP_STBU 39 64#define OP_STBU 39
71#define OP_LHZ 40 65#define OP_LHZ 40
@@ -75,181 +69,57 @@
75#define OP_STH 44 69#define OP_STH 44
76#define OP_STHU 45 70#define OP_STHU 45
77 71
72#ifdef CONFIG_PPC_BOOK3S
73static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
74{
75 return 1;
76}
77#else
78static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
79{
80 return vcpu->arch.tcr & TCR_DIE;
81}
82#endif
83
78void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) 84void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
79{ 85{
80 unsigned long dec_nsec; 86 unsigned long dec_nsec;
81 unsigned long long dec_time;
82 87
83 pr_debug("mtDEC: %x\n", vcpu->arch.dec); 88 pr_debug("mtDEC: %x\n", vcpu->arch.dec);
84 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
85
86#ifdef CONFIG_PPC_BOOK3S 89#ifdef CONFIG_PPC_BOOK3S
87 /* mtdec lowers the interrupt line when positive. */ 90 /* mtdec lowers the interrupt line when positive. */
88 kvmppc_core_dequeue_dec(vcpu); 91 kvmppc_core_dequeue_dec(vcpu);
89 92
90 /* POWER4+ triggers a dec interrupt if the value is < 0 */ 93 /* POWER4+ triggers a dec interrupt if the value is < 0 */
91 if (vcpu->arch.dec & 0x80000000) { 94 if (vcpu->arch.dec & 0x80000000) {
95 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
92 kvmppc_core_queue_dec(vcpu); 96 kvmppc_core_queue_dec(vcpu);
93 return; 97 return;
94 } 98 }
95#endif 99#endif
96 100 if (kvmppc_dec_enabled(vcpu)) {
97#ifdef CONFIG_BOOKE 101 /* The decrementer ticks at the same rate as the timebase, so
98 /* On BOOKE, DEC = 0 is as good as decrementer not enabled */ 102 * that's how we convert the guest DEC value to the number of
99 if (vcpu->arch.dec == 0) 103 * host ticks. */
100 return; 104
101#endif 105 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
102 106 dec_nsec = vcpu->arch.dec;
103 /* 107 dec_nsec *= 1000;
104 * The decrementer ticks at the same rate as the timebase, so 108 dec_nsec /= tb_ticks_per_usec;
105 * that's how we convert the guest DEC value to the number of 109 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
106 * host ticks. 110 HRTIMER_MODE_REL);
107 */ 111 vcpu->arch.dec_jiffies = get_tb();
108 112 } else {
109 dec_time = vcpu->arch.dec; 113 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
110 /* 114 }
111 * Guest timebase ticks at the same frequency as host decrementer.
112 * So use the host decrementer calculations for decrementer emulation.
113 */
114 dec_time = dec_time << decrementer_clockevent.shift;
115 do_div(dec_time, decrementer_clockevent.mult);
116 dec_nsec = do_div(dec_time, NSEC_PER_SEC);
117 hrtimer_start(&vcpu->arch.dec_timer,
118 ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
119 vcpu->arch.dec_jiffies = get_tb();
120} 115}
121 116
122u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) 117u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
123{ 118{
124 u64 jd = tb - vcpu->arch.dec_jiffies; 119 u64 jd = tb - vcpu->arch.dec_jiffies;
125
126#ifdef CONFIG_BOOKE
127 if (vcpu->arch.dec < jd)
128 return 0;
129#endif
130
131 return vcpu->arch.dec - jd; 120 return vcpu->arch.dec - jd;
132} 121}
133 122
134static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
135{
136 enum emulation_result emulated = EMULATE_DONE;
137 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
138
139 switch (sprn) {
140 case SPRN_SRR0:
141 vcpu->arch.shared->srr0 = spr_val;
142 break;
143 case SPRN_SRR1:
144 vcpu->arch.shared->srr1 = spr_val;
145 break;
146
147 /* XXX We need to context-switch the timebase for
148 * watchdog and FIT. */
149 case SPRN_TBWL: break;
150 case SPRN_TBWU: break;
151
152 case SPRN_MSSSR0: break;
153
154 case SPRN_DEC:
155 vcpu->arch.dec = spr_val;
156 kvmppc_emulate_dec(vcpu);
157 break;
158
159 case SPRN_SPRG0:
160 vcpu->arch.shared->sprg0 = spr_val;
161 break;
162 case SPRN_SPRG1:
163 vcpu->arch.shared->sprg1 = spr_val;
164 break;
165 case SPRN_SPRG2:
166 vcpu->arch.shared->sprg2 = spr_val;
167 break;
168 case SPRN_SPRG3:
169 vcpu->arch.shared->sprg3 = spr_val;
170 break;
171
172 default:
173 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
174 spr_val);
175 if (emulated == EMULATE_FAIL)
176 printk(KERN_INFO "mtspr: unknown spr "
177 "0x%x\n", sprn);
178 break;
179 }
180
181 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
182
183 return emulated;
184}
185
186static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
187{
188 enum emulation_result emulated = EMULATE_DONE;
189 ulong spr_val = 0;
190
191 switch (sprn) {
192 case SPRN_SRR0:
193 spr_val = vcpu->arch.shared->srr0;
194 break;
195 case SPRN_SRR1:
196 spr_val = vcpu->arch.shared->srr1;
197 break;
198 case SPRN_PVR:
199 spr_val = vcpu->arch.pvr;
200 break;
201 case SPRN_PIR:
202 spr_val = vcpu->vcpu_id;
203 break;
204 case SPRN_MSSSR0:
205 spr_val = 0;
206 break;
207
208 /* Note: mftb and TBRL/TBWL are user-accessible, so
209 * the guest can always access the real TB anyways.
210 * In fact, we probably will never see these traps. */
211 case SPRN_TBWL:
212 spr_val = get_tb() >> 32;
213 break;
214 case SPRN_TBWU:
215 spr_val = get_tb();
216 break;
217
218 case SPRN_SPRG0:
219 spr_val = vcpu->arch.shared->sprg0;
220 break;
221 case SPRN_SPRG1:
222 spr_val = vcpu->arch.shared->sprg1;
223 break;
224 case SPRN_SPRG2:
225 spr_val = vcpu->arch.shared->sprg2;
226 break;
227 case SPRN_SPRG3:
228 spr_val = vcpu->arch.shared->sprg3;
229 break;
230 /* Note: SPRG4-7 are user-readable, so we don't get
231 * a trap. */
232
233 case SPRN_DEC:
234 spr_val = kvmppc_get_dec(vcpu, get_tb());
235 break;
236 default:
237 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
238 &spr_val);
239 if (unlikely(emulated == EMULATE_FAIL)) {
240 printk(KERN_INFO "mfspr: unknown spr "
241 "0x%x\n", sprn);
242 }
243 break;
244 }
245
246 if (emulated == EMULATE_DONE)
247 kvmppc_set_gpr(vcpu, rt, spr_val);
248 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
249
250 return emulated;
251}
252
253/* XXX to do: 123/* XXX to do:
254 * lhax 124 * lhax
255 * lhaux 125 * lhaux
@@ -269,10 +139,12 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
269int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 139int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
270{ 140{
271 u32 inst = kvmppc_get_last_inst(vcpu); 141 u32 inst = kvmppc_get_last_inst(vcpu);
272 int ra = get_ra(inst); 142 u32 ea;
273 int rs = get_rs(inst); 143 int ra;
274 int rt = get_rt(inst); 144 int rb;
275 int sprn = get_sprn(inst); 145 int rs;
146 int rt;
147 int sprn;
276 enum emulation_result emulated = EMULATE_DONE; 148 enum emulation_result emulated = EMULATE_DONE;
277 int advance = 1; 149 int advance = 1;
278 150
@@ -287,8 +159,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
287 case OP_TRAP_64: 159 case OP_TRAP_64:
288 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); 160 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
289#else 161#else
290 kvmppc_core_queue_program(vcpu, 162 kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
291 vcpu->arch.shared->esr | ESR_PTR);
292#endif 163#endif
293 advance = 0; 164 advance = 0;
294 break; 165 break;
@@ -296,82 +167,207 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
296 case 31: 167 case 31:
297 switch (get_xop(inst)) { 168 switch (get_xop(inst)) {
298 169
299 case OP_31_XOP_TRAP:
300#ifdef CONFIG_64BIT
301 case OP_31_XOP_TRAP_64:
302#endif
303#ifdef CONFIG_PPC_BOOK3S
304 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
305#else
306 kvmppc_core_queue_program(vcpu,
307 vcpu->arch.shared->esr | ESR_PTR);
308#endif
309 advance = 0;
310 break;
311 case OP_31_XOP_LWZX: 170 case OP_31_XOP_LWZX:
171 rt = get_rt(inst);
312 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 172 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
313 break; 173 break;
314 174
315 case OP_31_XOP_LBZX: 175 case OP_31_XOP_LBZX:
176 rt = get_rt(inst);
316 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 177 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
317 break; 178 break;
318 179
319 case OP_31_XOP_LBZUX: 180 case OP_31_XOP_LBZUX:
181 rt = get_rt(inst);
182 ra = get_ra(inst);
183 rb = get_rb(inst);
184
185 ea = kvmppc_get_gpr(vcpu, rb);
186 if (ra)
187 ea += kvmppc_get_gpr(vcpu, ra);
188
320 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 189 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
321 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 190 kvmppc_set_gpr(vcpu, ra, ea);
322 break; 191 break;
323 192
324 case OP_31_XOP_STWX: 193 case OP_31_XOP_STWX:
194 rs = get_rs(inst);
325 emulated = kvmppc_handle_store(run, vcpu, 195 emulated = kvmppc_handle_store(run, vcpu,
326 kvmppc_get_gpr(vcpu, rs), 196 kvmppc_get_gpr(vcpu, rs),
327 4, 1); 197 4, 1);
328 break; 198 break;
329 199
330 case OP_31_XOP_STBX: 200 case OP_31_XOP_STBX:
201 rs = get_rs(inst);
331 emulated = kvmppc_handle_store(run, vcpu, 202 emulated = kvmppc_handle_store(run, vcpu,
332 kvmppc_get_gpr(vcpu, rs), 203 kvmppc_get_gpr(vcpu, rs),
333 1, 1); 204 1, 1);
334 break; 205 break;
335 206
336 case OP_31_XOP_STBUX: 207 case OP_31_XOP_STBUX:
208 rs = get_rs(inst);
209 ra = get_ra(inst);
210 rb = get_rb(inst);
211
212 ea = kvmppc_get_gpr(vcpu, rb);
213 if (ra)
214 ea += kvmppc_get_gpr(vcpu, ra);
215
337 emulated = kvmppc_handle_store(run, vcpu, 216 emulated = kvmppc_handle_store(run, vcpu,
338 kvmppc_get_gpr(vcpu, rs), 217 kvmppc_get_gpr(vcpu, rs),
339 1, 1); 218 1, 1);
340 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 219 kvmppc_set_gpr(vcpu, rs, ea);
341 break; 220 break;
342 221
343 case OP_31_XOP_LHAX: 222 case OP_31_XOP_LHAX:
223 rt = get_rt(inst);
344 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 224 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
345 break; 225 break;
346 226
347 case OP_31_XOP_LHZX: 227 case OP_31_XOP_LHZX:
228 rt = get_rt(inst);
348 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 229 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
349 break; 230 break;
350 231
351 case OP_31_XOP_LHZUX: 232 case OP_31_XOP_LHZUX:
233 rt = get_rt(inst);
234 ra = get_ra(inst);
235 rb = get_rb(inst);
236
237 ea = kvmppc_get_gpr(vcpu, rb);
238 if (ra)
239 ea += kvmppc_get_gpr(vcpu, ra);
240
352 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 241 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
353 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 242 kvmppc_set_gpr(vcpu, ra, ea);
354 break; 243 break;
355 244
356 case OP_31_XOP_MFSPR: 245 case OP_31_XOP_MFSPR:
357 emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); 246 sprn = get_sprn(inst);
247 rt = get_rt(inst);
248
249 switch (sprn) {
250 case SPRN_SRR0:
251 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
252 break;
253 case SPRN_SRR1:
254 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
255 break;
256 case SPRN_PVR:
257 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
258 case SPRN_PIR:
259 kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
260 case SPRN_MSSSR0:
261 kvmppc_set_gpr(vcpu, rt, 0); break;
262
263 /* Note: mftb and TBRL/TBWL are user-accessible, so
264 * the guest can always access the real TB anyways.
265 * In fact, we probably will never see these traps. */
266 case SPRN_TBWL:
267 kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
268 case SPRN_TBWU:
269 kvmppc_set_gpr(vcpu, rt, get_tb()); break;
270
271 case SPRN_SPRG0:
272 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
273 break;
274 case SPRN_SPRG1:
275 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
276 break;
277 case SPRN_SPRG2:
278 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
279 break;
280 case SPRN_SPRG3:
281 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
282 break;
283 /* Note: SPRG4-7 are user-readable, so we don't get
284 * a trap. */
285
286 case SPRN_DEC:
287 {
288 kvmppc_set_gpr(vcpu, rt,
289 kvmppc_get_dec(vcpu, get_tb()));
290 break;
291 }
292 default:
293 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
294 if (emulated == EMULATE_FAIL) {
295 printk("mfspr: unknown spr %x\n", sprn);
296 kvmppc_set_gpr(vcpu, rt, 0);
297 }
298 break;
299 }
300 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
358 break; 301 break;
359 302
360 case OP_31_XOP_STHX: 303 case OP_31_XOP_STHX:
304 rs = get_rs(inst);
305 ra = get_ra(inst);
306 rb = get_rb(inst);
307
361 emulated = kvmppc_handle_store(run, vcpu, 308 emulated = kvmppc_handle_store(run, vcpu,
362 kvmppc_get_gpr(vcpu, rs), 309 kvmppc_get_gpr(vcpu, rs),
363 2, 1); 310 2, 1);
364 break; 311 break;
365 312
366 case OP_31_XOP_STHUX: 313 case OP_31_XOP_STHUX:
314 rs = get_rs(inst);
315 ra = get_ra(inst);
316 rb = get_rb(inst);
317
318 ea = kvmppc_get_gpr(vcpu, rb);
319 if (ra)
320 ea += kvmppc_get_gpr(vcpu, ra);
321
367 emulated = kvmppc_handle_store(run, vcpu, 322 emulated = kvmppc_handle_store(run, vcpu,
368 kvmppc_get_gpr(vcpu, rs), 323 kvmppc_get_gpr(vcpu, rs),
369 2, 1); 324 2, 1);
370 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 325 kvmppc_set_gpr(vcpu, ra, ea);
371 break; 326 break;
372 327
373 case OP_31_XOP_MTSPR: 328 case OP_31_XOP_MTSPR:
374 emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); 329 sprn = get_sprn(inst);
330 rs = get_rs(inst);
331 switch (sprn) {
332 case SPRN_SRR0:
333 vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
334 break;
335 case SPRN_SRR1:
336 vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
337 break;
338
339 /* XXX We need to context-switch the timebase for
340 * watchdog and FIT. */
341 case SPRN_TBWL: break;
342 case SPRN_TBWU: break;
343
344 case SPRN_MSSSR0: break;
345
346 case SPRN_DEC:
347 vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
348 kvmppc_emulate_dec(vcpu);
349 break;
350
351 case SPRN_SPRG0:
352 vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
353 break;
354 case SPRN_SPRG1:
355 vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
356 break;
357 case SPRN_SPRG2:
358 vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
359 break;
360 case SPRN_SPRG3:
361 vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
362 break;
363
364 default:
365 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
366 if (emulated == EMULATE_FAIL)
367 printk("mtspr: unknown spr %x\n", sprn);
368 break;
369 }
370 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
375 break; 371 break;
376 372
377 case OP_31_XOP_DCBI: 373 case OP_31_XOP_DCBI:
@@ -383,6 +379,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
383 break; 379 break;
384 380
385 case OP_31_XOP_LWBRX: 381 case OP_31_XOP_LWBRX:
382 rt = get_rt(inst);
386 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); 383 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
387 break; 384 break;
388 385
@@ -390,16 +387,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
390 break; 387 break;
391 388
392 case OP_31_XOP_STWBRX: 389 case OP_31_XOP_STWBRX:
390 rs = get_rs(inst);
391 ra = get_ra(inst);
392 rb = get_rb(inst);
393
393 emulated = kvmppc_handle_store(run, vcpu, 394 emulated = kvmppc_handle_store(run, vcpu,
394 kvmppc_get_gpr(vcpu, rs), 395 kvmppc_get_gpr(vcpu, rs),
395 4, 0); 396 4, 0);
396 break; 397 break;
397 398
398 case OP_31_XOP_LHBRX: 399 case OP_31_XOP_LHBRX:
400 rt = get_rt(inst);
399 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); 401 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
400 break; 402 break;
401 403
402 case OP_31_XOP_STHBRX: 404 case OP_31_XOP_STHBRX:
405 rs = get_rs(inst);
406 ra = get_ra(inst);
407 rb = get_rb(inst);
408
403 emulated = kvmppc_handle_store(run, vcpu, 409 emulated = kvmppc_handle_store(run, vcpu,
404 kvmppc_get_gpr(vcpu, rs), 410 kvmppc_get_gpr(vcpu, rs),
405 2, 0); 411 2, 0);
@@ -412,92 +418,99 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
412 break; 418 break;
413 419
414 case OP_LWZ: 420 case OP_LWZ:
415 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
416 break;
417
418 /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
419 case OP_LD:
420 rt = get_rt(inst); 421 rt = get_rt(inst);
421 emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); 422 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
422 break; 423 break;
423 424
424 case OP_LWZU: 425 case OP_LWZU:
426 ra = get_ra(inst);
427 rt = get_rt(inst);
425 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 428 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
426 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 429 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
427 break; 430 break;
428 431
429 case OP_LBZ: 432 case OP_LBZ:
433 rt = get_rt(inst);
430 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 434 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
431 break; 435 break;
432 436
433 case OP_LBZU: 437 case OP_LBZU:
438 ra = get_ra(inst);
439 rt = get_rt(inst);
434 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 440 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
435 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 441 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
436 break; 442 break;
437 443
438 case OP_STW: 444 case OP_STW:
439 emulated = kvmppc_handle_store(run, vcpu,
440 kvmppc_get_gpr(vcpu, rs),
441 4, 1);
442 break;
443
444 /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
445 case OP_STD:
446 rs = get_rs(inst); 445 rs = get_rs(inst);
447 emulated = kvmppc_handle_store(run, vcpu, 446 emulated = kvmppc_handle_store(run, vcpu,
448 kvmppc_get_gpr(vcpu, rs), 447 kvmppc_get_gpr(vcpu, rs),
449 8, 1); 448 4, 1);
450 break; 449 break;
451 450
452 case OP_STWU: 451 case OP_STWU:
452 ra = get_ra(inst);
453 rs = get_rs(inst);
453 emulated = kvmppc_handle_store(run, vcpu, 454 emulated = kvmppc_handle_store(run, vcpu,
454 kvmppc_get_gpr(vcpu, rs), 455 kvmppc_get_gpr(vcpu, rs),
455 4, 1); 456 4, 1);
456 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 457 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
457 break; 458 break;
458 459
459 case OP_STB: 460 case OP_STB:
461 rs = get_rs(inst);
460 emulated = kvmppc_handle_store(run, vcpu, 462 emulated = kvmppc_handle_store(run, vcpu,
461 kvmppc_get_gpr(vcpu, rs), 463 kvmppc_get_gpr(vcpu, rs),
462 1, 1); 464 1, 1);
463 break; 465 break;
464 466
465 case OP_STBU: 467 case OP_STBU:
468 ra = get_ra(inst);
469 rs = get_rs(inst);
466 emulated = kvmppc_handle_store(run, vcpu, 470 emulated = kvmppc_handle_store(run, vcpu,
467 kvmppc_get_gpr(vcpu, rs), 471 kvmppc_get_gpr(vcpu, rs),
468 1, 1); 472 1, 1);
469 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 473 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
470 break; 474 break;
471 475
472 case OP_LHZ: 476 case OP_LHZ:
477 rt = get_rt(inst);
473 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 478 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
474 break; 479 break;
475 480
476 case OP_LHZU: 481 case OP_LHZU:
482 ra = get_ra(inst);
483 rt = get_rt(inst);
477 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 484 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
478 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 485 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
479 break; 486 break;
480 487
481 case OP_LHA: 488 case OP_LHA:
489 rt = get_rt(inst);
482 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 490 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
483 break; 491 break;
484 492
485 case OP_LHAU: 493 case OP_LHAU:
494 ra = get_ra(inst);
495 rt = get_rt(inst);
486 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 496 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
487 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 497 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
488 break; 498 break;
489 499
490 case OP_STH: 500 case OP_STH:
501 rs = get_rs(inst);
491 emulated = kvmppc_handle_store(run, vcpu, 502 emulated = kvmppc_handle_store(run, vcpu,
492 kvmppc_get_gpr(vcpu, rs), 503 kvmppc_get_gpr(vcpu, rs),
493 2, 1); 504 2, 1);
494 break; 505 break;
495 506
496 case OP_STHU: 507 case OP_STHU:
508 ra = get_ra(inst);
509 rs = get_rs(inst);
497 emulated = kvmppc_handle_store(run, vcpu, 510 emulated = kvmppc_handle_store(run, vcpu,
498 kvmppc_get_gpr(vcpu, rs), 511 kvmppc_get_gpr(vcpu, rs),
499 2, 1); 512 2, 1);
500 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 513 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
501 break; 514 break;
502 515
503 default: 516 default:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 70739a08956..a107c9be0fb 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -21,6 +21,7 @@
21#include <linux/errno.h> 21#include <linux/errno.h>
22#include <linux/err.h> 22#include <linux/err.h>
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24#include <linux/module.h>
24#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
25#include <linux/hrtimer.h> 26#include <linux/hrtimer.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
@@ -30,7 +31,6 @@
30#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/cputhreads.h> 33#include <asm/cputhreads.h>
33#include <asm/irqflags.h>
34#include "timing.h" 34#include "timing.h"
35#include "../mm/mmu_decl.h" 35#include "../mm/mmu_decl.h"
36 36
@@ -39,93 +39,13 @@
39 39
40int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 40int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
41{ 41{
42 return !!(v->arch.pending_exceptions) ||
43 v->requests;
44}
45
46int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
47{
48 return 1;
49}
50
51#ifndef CONFIG_KVM_BOOK3S_64_HV 42#ifndef CONFIG_KVM_BOOK3S_64_HV
52/* 43 return !(v->arch.shared->msr & MSR_WE) ||
53 * Common checks before entering the guest world. Call with interrupts 44 !!(v->arch.pending_exceptions);
54 * disabled. 45#else
55 * 46 return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
56 * returns:
57 *
58 * == 1 if we're ready to go into guest state
59 * <= 0 if we need to go back to the host with return value
60 */
61int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
62{
63 int r = 1;
64
65 WARN_ON_ONCE(!irqs_disabled());
66 while (true) {
67 if (need_resched()) {
68 local_irq_enable();
69 cond_resched();
70 local_irq_disable();
71 continue;
72 }
73
74 if (signal_pending(current)) {
75 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
76 vcpu->run->exit_reason = KVM_EXIT_INTR;
77 r = -EINTR;
78 break;
79 }
80
81 vcpu->mode = IN_GUEST_MODE;
82
83 /*
84 * Reading vcpu->requests must happen after setting vcpu->mode,
85 * so we don't miss a request because the requester sees
86 * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
87 * before next entering the guest (and thus doesn't IPI).
88 */
89 smp_mb();
90
91 if (vcpu->requests) {
92 /* Make sure we process requests preemptable */
93 local_irq_enable();
94 trace_kvm_check_requests(vcpu);
95 r = kvmppc_core_check_requests(vcpu);
96 local_irq_disable();
97 if (r > 0)
98 continue;
99 break;
100 }
101
102 if (kvmppc_core_prepare_to_enter(vcpu)) {
103 /* interrupts got enabled in between, so we
104 are back at square 1 */
105 continue;
106 }
107
108#ifdef CONFIG_PPC64
109 /* lazy EE magic */
110 hard_irq_disable();
111 if (lazy_irq_pending()) {
112 /* Got an interrupt in between, try again */
113 local_irq_enable();
114 local_irq_disable();
115 kvm_guest_exit();
116 continue;
117 }
118
119 trace_hardirqs_on();
120#endif 47#endif
121
122 kvm_guest_enter();
123 break;
124 }
125
126 return r;
127} 48}
128#endif /* CONFIG_KVM_BOOK3S_64_HV */
129 49
130int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 50int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
131{ 51{
@@ -146,32 +66,27 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
146 } 66 }
147 67
148 switch (nr) { 68 switch (nr) {
149 case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE): 69 case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
150 { 70 {
151 vcpu->arch.magic_page_pa = param1; 71 vcpu->arch.magic_page_pa = param1;
152 vcpu->arch.magic_page_ea = param2; 72 vcpu->arch.magic_page_ea = param2;
153 73
154 r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; 74 r2 = KVM_MAGIC_FEAT_SR;
155 75
156 r = EV_SUCCESS; 76 r = HC_EV_SUCCESS;
157 break; 77 break;
158 } 78 }
159 case KVM_HCALL_TOKEN(KVM_HC_FEATURES): 79 case HC_VENDOR_KVM | KVM_HC_FEATURES:
160 r = EV_SUCCESS; 80 r = HC_EV_SUCCESS;
161#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) 81#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500)
162 /* XXX Missing magic page on 44x */ 82 /* XXX Missing magic page on 44x */
163 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); 83 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
164#endif 84#endif
165 85
166 /* Second return value is in r4 */ 86 /* Second return value is in r4 */
167 break; 87 break;
168 case EV_HCALL_TOKEN(EV_IDLE):
169 r = EV_SUCCESS;
170 kvm_vcpu_block(vcpu);
171 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
172 break;
173 default: 88 default:
174 r = EV_UNIMPLEMENTED; 89 r = HC_EV_UNIMPLEMENTED;
175 break; 90 break;
176 } 91 }
177 92
@@ -180,36 +95,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
180 return r; 95 return r;
181} 96}
182 97
183int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
184{
185 int r = false;
186
187 /* We have to know what CPU to virtualize */
188 if (!vcpu->arch.pvr)
189 goto out;
190
191 /* PAPR only works with book3s_64 */
192 if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
193 goto out;
194
195#ifdef CONFIG_KVM_BOOK3S_64_HV
196 /* HV KVM can only do PAPR mode for now */
197 if (!vcpu->arch.papr_enabled)
198 goto out;
199#endif
200
201#ifdef CONFIG_KVM_BOOKE_HV
202 if (!cpu_has_feature(CPU_FTR_EMB_HV))
203 goto out;
204#endif
205
206 r = true;
207
208out:
209 vcpu->arch.sane = r;
210 return r ? 0 : -EINVAL;
211}
212
213int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) 98int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
214{ 99{
215 enum emulation_result er; 100 enum emulation_result er;
@@ -266,11 +151,8 @@ void kvm_arch_check_processor_compat(void *rtn)
266 *(int *)rtn = kvmppc_core_check_processor_compat(); 151 *(int *)rtn = kvmppc_core_check_processor_compat();
267} 152}
268 153
269int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 154int kvm_arch_init_vm(struct kvm *kvm)
270{ 155{
271 if (type)
272 return -EINVAL;
273
274 return kvmppc_core_init_vm(kvm); 156 return kvmppc_core_init_vm(kvm);
275} 157}
276 158
@@ -304,39 +186,28 @@ int kvm_dev_ioctl_check_extension(long ext)
304 switch (ext) { 186 switch (ext) {
305#ifdef CONFIG_BOOKE 187#ifdef CONFIG_BOOKE
306 case KVM_CAP_PPC_BOOKE_SREGS: 188 case KVM_CAP_PPC_BOOKE_SREGS:
307 case KVM_CAP_PPC_BOOKE_WATCHDOG:
308#else 189#else
309 case KVM_CAP_PPC_SEGSTATE: 190 case KVM_CAP_PPC_SEGSTATE:
310 case KVM_CAP_PPC_HIOR:
311 case KVM_CAP_PPC_PAPR:
312#endif 191#endif
313 case KVM_CAP_PPC_UNSET_IRQ: 192 case KVM_CAP_PPC_UNSET_IRQ:
314 case KVM_CAP_PPC_IRQ_LEVEL: 193 case KVM_CAP_PPC_IRQ_LEVEL:
315 case KVM_CAP_ENABLE_CAP: 194 case KVM_CAP_ENABLE_CAP:
316 case KVM_CAP_ONE_REG:
317 case KVM_CAP_IOEVENTFD:
318 r = 1; 195 r = 1;
319 break; 196 break;
320#ifndef CONFIG_KVM_BOOK3S_64_HV 197#ifndef CONFIG_KVM_BOOK3S_64_HV
321 case KVM_CAP_PPC_PAIRED_SINGLES: 198 case KVM_CAP_PPC_PAIRED_SINGLES:
322 case KVM_CAP_PPC_OSI: 199 case KVM_CAP_PPC_OSI:
323 case KVM_CAP_PPC_GET_PVINFO: 200 case KVM_CAP_PPC_GET_PVINFO:
324#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
325 case KVM_CAP_SW_TLB:
326#endif
327 r = 1; 201 r = 1;
328 break; 202 break;
329 case KVM_CAP_COALESCED_MMIO: 203 case KVM_CAP_COALESCED_MMIO:
330 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 204 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
331 break; 205 break;
332#endif 206#endif
333#ifdef CONFIG_PPC_BOOK3S_64 207#ifdef CONFIG_KVM_BOOK3S_64_HV
334 case KVM_CAP_SPAPR_TCE: 208 case KVM_CAP_SPAPR_TCE:
335 case KVM_CAP_PPC_ALLOC_HTAB:
336 r = 1; 209 r = 1;
337 break; 210 break;
338#endif /* CONFIG_PPC_BOOK3S_64 */
339#ifdef CONFIG_KVM_BOOK3S_64_HV
340 case KVM_CAP_PPC_SMT: 211 case KVM_CAP_PPC_SMT:
341 r = threads_per_core; 212 r = threads_per_core;
342 break; 213 break;
@@ -347,42 +218,6 @@ int kvm_dev_ioctl_check_extension(long ext)
347 r = 2; 218 r = 2;
348 break; 219 break;
349#endif 220#endif
350 case KVM_CAP_SYNC_MMU:
351#ifdef CONFIG_KVM_BOOK3S_64_HV
352 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
353#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
354 r = 1;
355#else
356 r = 0;
357 break;
358#endif
359#ifdef CONFIG_KVM_BOOK3S_64_HV
360 case KVM_CAP_PPC_HTAB_FD:
361 r = 1;
362 break;
363#endif
364 break;
365 case KVM_CAP_NR_VCPUS:
366 /*
367 * Recommending a number of CPUs is somewhat arbitrary; we
368 * return the number of present CPUs for -HV (since a host
369 * will have secondary threads "offline"), and for other KVM
370 * implementations just count online CPUs.
371 */
372#ifdef CONFIG_KVM_BOOK3S_64_HV
373 r = num_present_cpus();
374#else
375 r = num_online_cpus();
376#endif
377 break;
378 case KVM_CAP_MAX_VCPUS:
379 r = KVM_MAX_VCPUS;
380 break;
381#ifdef CONFIG_PPC_BOOK3S_64
382 case KVM_CAP_PPC_GET_SMMU_INFO:
383 r = 1;
384 break;
385#endif
386 default: 221 default:
387 r = 0; 222 r = 0;
388 break; 223 break;
@@ -397,24 +232,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
397 return -EINVAL; 232 return -EINVAL;
398} 233}
399 234
400void kvm_arch_free_memslot(struct kvm_memory_slot *free,
401 struct kvm_memory_slot *dont)
402{
403 kvmppc_core_free_memslot(free, dont);
404}
405
406int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
407{
408 return kvmppc_core_create_memslot(slot, npages);
409}
410
411int kvm_arch_prepare_memory_region(struct kvm *kvm, 235int kvm_arch_prepare_memory_region(struct kvm *kvm,
412 struct kvm_memory_slot *memslot, 236 struct kvm_memory_slot *memslot,
413 struct kvm_memory_slot old, 237 struct kvm_memory_slot old,
414 struct kvm_userspace_memory_region *mem, 238 struct kvm_userspace_memory_region *mem,
415 int user_alloc) 239 int user_alloc)
416{ 240{
417 return kvmppc_core_prepare_memory_region(kvm, memslot, mem); 241 return kvmppc_core_prepare_memory_region(kvm, mem);
418} 242}
419 243
420void kvm_arch_commit_memory_region(struct kvm *kvm, 244void kvm_arch_commit_memory_region(struct kvm *kvm,
@@ -422,35 +246,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
422 struct kvm_memory_slot old, 246 struct kvm_memory_slot old,
423 int user_alloc) 247 int user_alloc)
424{ 248{
425 kvmppc_core_commit_memory_region(kvm, mem, old); 249 kvmppc_core_commit_memory_region(kvm, mem);
426} 250}
427 251
428void kvm_arch_flush_shadow_all(struct kvm *kvm)
429{
430}
431 252
432void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 253void kvm_arch_flush_shadow(struct kvm *kvm)
433 struct kvm_memory_slot *slot)
434{ 254{
435 kvmppc_core_flush_memslot(kvm, slot);
436} 255}
437 256
438struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 257struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
439{ 258{
440 struct kvm_vcpu *vcpu; 259 struct kvm_vcpu *vcpu;
441 vcpu = kvmppc_core_vcpu_create(kvm, id); 260 vcpu = kvmppc_core_vcpu_create(kvm, id);
442 if (!IS_ERR(vcpu)) { 261 if (!IS_ERR(vcpu))
443 vcpu->arch.wqp = &vcpu->wq;
444 kvmppc_create_vcpu_debugfs(vcpu, id); 262 kvmppc_create_vcpu_debugfs(vcpu, id);
445 }
446 return vcpu; 263 return vcpu;
447} 264}
448 265
449int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
450{
451 return 0;
452}
453
454void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 266void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
455{ 267{
456 /* Make sure we're not using the vcpu anymore */ 268 /* Make sure we're not using the vcpu anymore */
@@ -471,6 +283,18 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
471 return kvmppc_core_pending_dec(vcpu); 283 return kvmppc_core_pending_dec(vcpu);
472} 284}
473 285
286static void kvmppc_decrementer_func(unsigned long data)
287{
288 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
289
290 kvmppc_core_queue_dec(vcpu);
291
292 if (waitqueue_active(&vcpu->wq)) {
293 wake_up_interruptible(&vcpu->wq);
294 vcpu->stat.halt_wakeup++;
295 }
296}
297
474/* 298/*
475 * low level hrtimer wake routine. Because this runs in hardirq context 299 * low level hrtimer wake routine. Because this runs in hardirq context
476 * we schedule a tasklet to do the real work. 300 * we schedule a tasklet to do the real work.
@@ -487,8 +311,6 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
487 311
488int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 312int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
489{ 313{
490 int ret;
491
492 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 314 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
493 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); 315 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
494 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 316 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -497,14 +319,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
497#ifdef CONFIG_KVM_EXIT_TIMING 319#ifdef CONFIG_KVM_EXIT_TIMING
498 mutex_init(&vcpu->arch.exit_timing_lock); 320 mutex_init(&vcpu->arch.exit_timing_lock);
499#endif 321#endif
500 ret = kvmppc_subarch_vcpu_init(vcpu); 322
501 return ret; 323 return 0;
502} 324}
503 325
504void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 326void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
505{ 327{
506 kvmppc_mmu_destroy(vcpu); 328 kvmppc_mmu_destroy(vcpu);
507 kvmppc_subarch_vcpu_uninit(vcpu);
508} 329}
509 330
510void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 331void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -520,6 +341,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
520 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); 341 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
521#endif 342#endif
522 kvmppc_core_vcpu_load(vcpu, cpu); 343 kvmppc_core_vcpu_load(vcpu, cpu);
344 vcpu->cpu = smp_processor_id();
523} 345}
524 346
525void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 347void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -528,6 +350,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
528#ifdef CONFIG_BOOKE 350#ifdef CONFIG_BOOKE
529 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); 351 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
530#endif 352#endif
353 vcpu->cpu = -1;
531} 354}
532 355
533int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 356int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -586,20 +409,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
586 409
587 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 410 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
588 411
589 switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { 412 switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) {
590 case KVM_MMIO_REG_GPR: 413 case KVM_REG_GPR:
591 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 414 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
592 break; 415 break;
593 case KVM_MMIO_REG_FPR: 416 case KVM_REG_FPR:
594 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; 417 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
595 break; 418 break;
596#ifdef CONFIG_PPC_BOOK3S 419#ifdef CONFIG_PPC_BOOK3S
597 case KVM_MMIO_REG_QPR: 420 case KVM_REG_QPR:
598 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; 421 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
599 break; 422 break;
600 case KVM_MMIO_REG_FQPR: 423 case KVM_REG_FQPR:
601 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; 424 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
602 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; 425 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
603 break; 426 break;
604#endif 427#endif
605 default: 428 default:
@@ -625,13 +448,6 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
625 vcpu->mmio_is_write = 0; 448 vcpu->mmio_is_write = 0;
626 vcpu->arch.mmio_sign_extend = 0; 449 vcpu->arch.mmio_sign_extend = 0;
627 450
628 if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
629 bytes, &run->mmio.data)) {
630 kvmppc_complete_mmio_load(vcpu, run);
631 vcpu->mmio_needed = 0;
632 return EMULATE_DONE;
633 }
634
635 return EMULATE_DO_MMIO; 451 return EMULATE_DO_MMIO;
636} 452}
637 453
@@ -641,8 +457,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
641{ 457{
642 int r; 458 int r;
643 459
644 vcpu->arch.mmio_sign_extend = 1;
645 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); 460 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
461 vcpu->arch.mmio_sign_extend = 1;
646 462
647 return r; 463 return r;
648} 464}
@@ -680,13 +496,6 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
680 } 496 }
681 } 497 }
682 498
683 if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
684 bytes, &run->mmio.data)) {
685 kvmppc_complete_mmio_load(vcpu, run);
686 vcpu->mmio_needed = 0;
687 return EMULATE_DONE;
688 }
689
690 return EMULATE_DO_MMIO; 499 return EMULATE_DO_MMIO;
691} 500}
692 501
@@ -722,6 +531,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
722 vcpu->arch.hcall_needed = 0; 531 vcpu->arch.hcall_needed = 0;
723 } 532 }
724 533
534 kvmppc_core_deliver_interrupts(vcpu);
535
725 r = kvmppc_vcpu_run(run, vcpu); 536 r = kvmppc_vcpu_run(run, vcpu);
726 537
727 if (vcpu->sigset_active) 538 if (vcpu->sigset_active)
@@ -732,14 +543,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
732 543
733int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 544int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
734{ 545{
735 if (irq->irq == KVM_INTERRUPT_UNSET) { 546 if (irq->irq == KVM_INTERRUPT_UNSET)
736 kvmppc_core_dequeue_external(vcpu, irq); 547 kvmppc_core_dequeue_external(vcpu, irq);
737 return 0; 548 else
738 } 549 kvmppc_core_queue_external(vcpu, irq);
739
740 kvmppc_core_queue_external(vcpu, irq);
741 550
742 kvm_vcpu_kick(vcpu); 551 if (waitqueue_active(&vcpu->wq)) {
552 wake_up_interruptible(&vcpu->wq);
553 vcpu->stat.halt_wakeup++;
554 } else if (vcpu->cpu != -1) {
555 smp_send_reschedule(vcpu->cpu);
556 }
743 557
744 return 0; 558 return 0;
745} 559}
@@ -757,37 +571,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
757 r = 0; 571 r = 0;
758 vcpu->arch.osi_enabled = true; 572 vcpu->arch.osi_enabled = true;
759 break; 573 break;
760 case KVM_CAP_PPC_PAPR:
761 r = 0;
762 vcpu->arch.papr_enabled = true;
763 break;
764#ifdef CONFIG_BOOKE
765 case KVM_CAP_PPC_BOOKE_WATCHDOG:
766 r = 0;
767 vcpu->arch.watchdog_enabled = true;
768 break;
769#endif
770#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
771 case KVM_CAP_SW_TLB: {
772 struct kvm_config_tlb cfg;
773 void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
774
775 r = -EFAULT;
776 if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
777 break;
778
779 r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
780 break;
781 }
782#endif
783 default: 574 default:
784 r = -EINVAL; 575 r = -EINVAL;
785 break; 576 break;
786 } 577 }
787 578
788 if (!r)
789 r = kvmppc_sanity_check(vcpu);
790
791 return r; 579 return r;
792} 580}
793 581
@@ -829,31 +617,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
829 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 617 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
830 break; 618 break;
831 } 619 }
832
833 case KVM_SET_ONE_REG:
834 case KVM_GET_ONE_REG:
835 {
836 struct kvm_one_reg reg;
837 r = -EFAULT;
838 if (copy_from_user(&reg, argp, sizeof(reg)))
839 goto out;
840 if (ioctl == KVM_SET_ONE_REG)
841 r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
842 else
843 r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
844 break;
845 }
846
847#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
848 case KVM_DIRTY_TLB: {
849 struct kvm_dirty_tlb dirty;
850 r = -EFAULT;
851 if (copy_from_user(&dirty, argp, sizeof(dirty)))
852 goto out;
853 r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
854 break;
855 }
856#endif
857 default: 620 default:
858 r = -EINVAL; 621 r = -EINVAL;
859 } 622 }
@@ -862,23 +625,11 @@ out:
862 return r; 625 return r;
863} 626}
864 627
865int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
866{
867 return VM_FAULT_SIGBUS;
868}
869
870static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) 628static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
871{ 629{
872 u32 inst_nop = 0x60000000;
873#ifdef CONFIG_KVM_BOOKE_HV
874 u32 inst_sc1 = 0x44000022;
875 pvinfo->hcall[0] = inst_sc1;
876 pvinfo->hcall[1] = inst_nop;
877 pvinfo->hcall[2] = inst_nop;
878 pvinfo->hcall[3] = inst_nop;
879#else
880 u32 inst_lis = 0x3c000000; 630 u32 inst_lis = 0x3c000000;
881 u32 inst_ori = 0x60000000; 631 u32 inst_ori = 0x60000000;
632 u32 inst_nop = 0x60000000;
882 u32 inst_sc = 0x44000002; 633 u32 inst_sc = 0x44000002;
883 u32 inst_imm_mask = 0xffff; 634 u32 inst_imm_mask = 0xffff;
884 635
@@ -895,9 +646,6 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
895 pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); 646 pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
896 pvinfo->hcall[2] = inst_sc; 647 pvinfo->hcall[2] = inst_sc;
897 pvinfo->hcall[3] = inst_nop; 648 pvinfo->hcall[3] = inst_nop;
898#endif
899
900 pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
901 649
902 return 0; 650 return 0;
903} 651}
@@ -920,7 +668,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
920 668
921 break; 669 break;
922 } 670 }
923#ifdef CONFIG_PPC_BOOK3S_64 671#ifdef CONFIG_KVM_BOOK3S_64_HV
924 case KVM_CREATE_SPAPR_TCE: { 672 case KVM_CREATE_SPAPR_TCE: {
925 struct kvm_create_spapr_tce create_tce; 673 struct kvm_create_spapr_tce create_tce;
926 struct kvm *kvm = filp->private_data; 674 struct kvm *kvm = filp->private_data;
@@ -931,9 +679,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
931 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); 679 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
932 goto out; 680 goto out;
933 } 681 }
934#endif /* CONFIG_PPC_BOOK3S_64 */
935 682
936#ifdef CONFIG_KVM_BOOK3S_64_HV
937 case KVM_ALLOCATE_RMA: { 683 case KVM_ALLOCATE_RMA: {
938 struct kvm *kvm = filp->private_data; 684 struct kvm *kvm = filp->private_data;
939 struct kvm_allocate_rma rma; 685 struct kvm_allocate_rma rma;
@@ -943,48 +689,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
943 r = -EFAULT; 689 r = -EFAULT;
944 break; 690 break;
945 } 691 }
946
947 case KVM_PPC_ALLOCATE_HTAB: {
948 struct kvm *kvm = filp->private_data;
949 u32 htab_order;
950
951 r = -EFAULT;
952 if (get_user(htab_order, (u32 __user *)argp))
953 break;
954 r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
955 if (r)
956 break;
957 r = -EFAULT;
958 if (put_user(htab_order, (u32 __user *)argp))
959 break;
960 r = 0;
961 break;
962 }
963
964 case KVM_PPC_GET_HTAB_FD: {
965 struct kvm *kvm = filp->private_data;
966 struct kvm_get_htab_fd ghf;
967
968 r = -EFAULT;
969 if (copy_from_user(&ghf, argp, sizeof(ghf)))
970 break;
971 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
972 break;
973 }
974#endif /* CONFIG_KVM_BOOK3S_64_HV */ 692#endif /* CONFIG_KVM_BOOK3S_64_HV */
975 693
976#ifdef CONFIG_PPC_BOOK3S_64
977 case KVM_PPC_GET_SMMU_INFO: {
978 struct kvm *kvm = filp->private_data;
979 struct kvm_ppc_smmu_info info;
980
981 memset(&info, 0, sizeof(info));
982 r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
983 if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
984 r = -EFAULT;
985 break;
986 }
987#endif /* CONFIG_PPC_BOOK3S_64 */
988 default: 694 default:
989 r = -ENOTTY; 695 r = -ENOTTY;
990 } 696 }
@@ -993,40 +699,6 @@ out:
993 return r; 699 return r;
994} 700}
995 701
996static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
997static unsigned long nr_lpids;
998
999long kvmppc_alloc_lpid(void)
1000{
1001 long lpid;
1002
1003 do {
1004 lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
1005 if (lpid >= nr_lpids) {
1006 pr_err("%s: No LPIDs free\n", __func__);
1007 return -ENOMEM;
1008 }
1009 } while (test_and_set_bit(lpid, lpid_inuse));
1010
1011 return lpid;
1012}
1013
1014void kvmppc_claim_lpid(long lpid)
1015{
1016 set_bit(lpid, lpid_inuse);
1017}
1018
1019void kvmppc_free_lpid(long lpid)
1020{
1021 clear_bit(lpid, lpid_inuse);
1022}
1023
1024void kvmppc_init_lpid(unsigned long nr_lpids_param)
1025{
1026 nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
1027 memset(lpid_inuse, 0, sizeof(lpid_inuse));
1028}
1029
1030int kvm_arch_init(void *opaque) 702int kvm_arch_init(void *opaque)
1031{ 703{
1032 return 0; 704 return 0;
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index bf191e72b2d..8167d42a776 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -93,12 +93,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
93 case SIGNAL_EXITS: 93 case SIGNAL_EXITS:
94 vcpu->stat.signal_exits++; 94 vcpu->stat.signal_exits++;
95 break; 95 break;
96 case DBELL_EXITS:
97 vcpu->stat.dbell_exits++;
98 break;
99 case GDBELL_EXITS:
100 vcpu->stat.gdbell_exits++;
101 break;
102 } 96 }
103} 97}
104 98
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index e326489a542..b135d3d397d 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,126 +31,6 @@ TRACE_EVENT(kvm_ppc_instr,
31 __entry->inst, __entry->pc, __entry->emulate) 31 __entry->inst, __entry->pc, __entry->emulate)
32); 32);
33 33
34#ifdef CONFIG_PPC_BOOK3S
35#define kvm_trace_symbol_exit \
36 {0x100, "SYSTEM_RESET"}, \
37 {0x200, "MACHINE_CHECK"}, \
38 {0x300, "DATA_STORAGE"}, \
39 {0x380, "DATA_SEGMENT"}, \
40 {0x400, "INST_STORAGE"}, \
41 {0x480, "INST_SEGMENT"}, \
42 {0x500, "EXTERNAL"}, \
43 {0x501, "EXTERNAL_LEVEL"}, \
44 {0x502, "EXTERNAL_HV"}, \
45 {0x600, "ALIGNMENT"}, \
46 {0x700, "PROGRAM"}, \
47 {0x800, "FP_UNAVAIL"}, \
48 {0x900, "DECREMENTER"}, \
49 {0x980, "HV_DECREMENTER"}, \
50 {0xc00, "SYSCALL"}, \
51 {0xd00, "TRACE"}, \
52 {0xe00, "H_DATA_STORAGE"}, \
53 {0xe20, "H_INST_STORAGE"}, \
54 {0xe40, "H_EMUL_ASSIST"}, \
55 {0xf00, "PERFMON"}, \
56 {0xf20, "ALTIVEC"}, \
57 {0xf40, "VSX"}
58#else
59#define kvm_trace_symbol_exit \
60 {0, "CRITICAL"}, \
61 {1, "MACHINE_CHECK"}, \
62 {2, "DATA_STORAGE"}, \
63 {3, "INST_STORAGE"}, \
64 {4, "EXTERNAL"}, \
65 {5, "ALIGNMENT"}, \
66 {6, "PROGRAM"}, \
67 {7, "FP_UNAVAIL"}, \
68 {8, "SYSCALL"}, \
69 {9, "AP_UNAVAIL"}, \
70 {10, "DECREMENTER"}, \
71 {11, "FIT"}, \
72 {12, "WATCHDOG"}, \
73 {13, "DTLB_MISS"}, \
74 {14, "ITLB_MISS"}, \
75 {15, "DEBUG"}, \
76 {32, "SPE_UNAVAIL"}, \
77 {33, "SPE_FP_DATA"}, \
78 {34, "SPE_FP_ROUND"}, \
79 {35, "PERFORMANCE_MONITOR"}, \
80 {36, "DOORBELL"}, \
81 {37, "DOORBELL_CRITICAL"}, \
82 {38, "GUEST_DBELL"}, \
83 {39, "GUEST_DBELL_CRIT"}, \
84 {40, "HV_SYSCALL"}, \
85 {41, "HV_PRIV"}
86#endif
87
88TRACE_EVENT(kvm_exit,
89 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
90 TP_ARGS(exit_nr, vcpu),
91
92 TP_STRUCT__entry(
93 __field( unsigned int, exit_nr )
94 __field( unsigned long, pc )
95 __field( unsigned long, msr )
96 __field( unsigned long, dar )
97#ifdef CONFIG_KVM_BOOK3S_PR
98 __field( unsigned long, srr1 )
99#endif
100 __field( unsigned long, last_inst )
101 ),
102
103 TP_fast_assign(
104#ifdef CONFIG_KVM_BOOK3S_PR
105 struct kvmppc_book3s_shadow_vcpu *svcpu;
106#endif
107 __entry->exit_nr = exit_nr;
108 __entry->pc = kvmppc_get_pc(vcpu);
109 __entry->dar = kvmppc_get_fault_dar(vcpu);
110 __entry->msr = vcpu->arch.shared->msr;
111#ifdef CONFIG_KVM_BOOK3S_PR
112 svcpu = svcpu_get(vcpu);
113 __entry->srr1 = svcpu->shadow_srr1;
114 svcpu_put(svcpu);
115#endif
116 __entry->last_inst = vcpu->arch.last_inst;
117 ),
118
119 TP_printk("exit=%s"
120 " | pc=0x%lx"
121 " | msr=0x%lx"
122 " | dar=0x%lx"
123#ifdef CONFIG_KVM_BOOK3S_PR
124 " | srr1=0x%lx"
125#endif
126 " | last_inst=0x%lx"
127 ,
128 __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
129 __entry->pc,
130 __entry->msr,
131 __entry->dar,
132#ifdef CONFIG_KVM_BOOK3S_PR
133 __entry->srr1,
134#endif
135 __entry->last_inst
136 )
137);
138
139TRACE_EVENT(kvm_unmap_hva,
140 TP_PROTO(unsigned long hva),
141 TP_ARGS(hva),
142
143 TP_STRUCT__entry(
144 __field( unsigned long, hva )
145 ),
146
147 TP_fast_assign(
148 __entry->hva = hva;
149 ),
150
151 TP_printk("unmap hva 0x%lx\n", __entry->hva)
152);
153
154TRACE_EVENT(kvm_stlb_inval, 34TRACE_EVENT(kvm_stlb_inval,
155 TP_PROTO(unsigned int stlb_index), 35 TP_PROTO(unsigned int stlb_index),
156 TP_ARGS(stlb_index), 36 TP_ARGS(stlb_index),
@@ -218,31 +98,38 @@ TRACE_EVENT(kvm_gtlb_write,
218 __entry->word1, __entry->word2) 98 __entry->word1, __entry->word2)
219); 99);
220 100
221TRACE_EVENT(kvm_check_requests, 101
222 TP_PROTO(struct kvm_vcpu *vcpu), 102/*************************************************************************
223 TP_ARGS(vcpu), 103 * Book3S trace points *
104 *************************************************************************/
105
106#ifdef CONFIG_KVM_BOOK3S_PR
107
108TRACE_EVENT(kvm_book3s_exit,
109 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
110 TP_ARGS(exit_nr, vcpu),
224 111
225 TP_STRUCT__entry( 112 TP_STRUCT__entry(
226 __field( __u32, cpu_nr ) 113 __field( unsigned int, exit_nr )
227 __field( __u32, requests ) 114 __field( unsigned long, pc )
115 __field( unsigned long, msr )
116 __field( unsigned long, dar )
117 __field( unsigned long, srr1 )
228 ), 118 ),
229 119
230 TP_fast_assign( 120 TP_fast_assign(
231 __entry->cpu_nr = vcpu->vcpu_id; 121 __entry->exit_nr = exit_nr;
232 __entry->requests = vcpu->requests; 122 __entry->pc = kvmppc_get_pc(vcpu);
123 __entry->dar = kvmppc_get_fault_dar(vcpu);
124 __entry->msr = vcpu->arch.shared->msr;
125 __entry->srr1 = to_svcpu(vcpu)->shadow_srr1;
233 ), 126 ),
234 127
235 TP_printk("vcpu=%x requests=%x", 128 TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
236 __entry->cpu_nr, __entry->requests) 129 __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
130 __entry->srr1)
237); 131);
238 132
239
240/*************************************************************************
241 * Book3S trace points *
242 *************************************************************************/
243
244#ifdef CONFIG_KVM_BOOK3S_PR
245
246TRACE_EVENT(kvm_book3s_reenter, 133TRACE_EVENT(kvm_book3s_reenter,
247 TP_PROTO(int r, struct kvm_vcpu *vcpu), 134 TP_PROTO(int r, struct kvm_vcpu *vcpu),
248 TP_ARGS(r, vcpu), 135 TP_ARGS(r, vcpu),
@@ -299,7 +186,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
299 TP_ARGS(pte), 186 TP_ARGS(pte),
300 187
301 TP_STRUCT__entry( 188 TP_STRUCT__entry(
302 __field( u64, host_vpn ) 189 __field( u64, host_va )
303 __field( u64, pfn ) 190 __field( u64, pfn )
304 __field( ulong, eaddr ) 191 __field( ulong, eaddr )
305 __field( u64, vpage ) 192 __field( u64, vpage )
@@ -308,7 +195,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
308 ), 195 ),
309 196
310 TP_fast_assign( 197 TP_fast_assign(
311 __entry->host_vpn = pte->host_vpn; 198 __entry->host_va = pte->host_va;
312 __entry->pfn = pte->pfn; 199 __entry->pfn = pte->pfn;
313 __entry->eaddr = pte->pte.eaddr; 200 __entry->eaddr = pte->pte.eaddr;
314 __entry->vpage = pte->pte.vpage; 201 __entry->vpage = pte->pte.vpage;
@@ -318,8 +205,8 @@ TRACE_EVENT(kvm_book3s_mmu_map,
318 (pte->pte.may_execute ? 0x1 : 0); 205 (pte->pte.may_execute ? 0x1 : 0);
319 ), 206 ),
320 207
321 TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 208 TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
322 __entry->host_vpn, __entry->pfn, __entry->eaddr, 209 __entry->host_va, __entry->pfn, __entry->eaddr,
323 __entry->vpage, __entry->raddr, __entry->flags) 210 __entry->vpage, __entry->raddr, __entry->flags)
324); 211);
325 212
@@ -328,7 +215,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
328 TP_ARGS(pte), 215 TP_ARGS(pte),
329 216
330 TP_STRUCT__entry( 217 TP_STRUCT__entry(
331 __field( u64, host_vpn ) 218 __field( u64, host_va )
332 __field( u64, pfn ) 219 __field( u64, pfn )
333 __field( ulong, eaddr ) 220 __field( ulong, eaddr )
334 __field( u64, vpage ) 221 __field( u64, vpage )
@@ -337,7 +224,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
337 ), 224 ),
338 225
339 TP_fast_assign( 226 TP_fast_assign(
340 __entry->host_vpn = pte->host_vpn; 227 __entry->host_va = pte->host_va;
341 __entry->pfn = pte->pfn; 228 __entry->pfn = pte->pfn;
342 __entry->eaddr = pte->pte.eaddr; 229 __entry->eaddr = pte->pte.eaddr;
343 __entry->vpage = pte->pte.vpage; 230 __entry->vpage = pte->pte.vpage;
@@ -348,7 +235,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
348 ), 235 ),
349 236
350 TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 237 TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
351 __entry->host_vpn, __entry->pfn, __entry->eaddr, 238 __entry->host_va, __entry->pfn, __entry->eaddr,
352 __entry->vpage, __entry->raddr, __entry->flags) 239 __entry->vpage, __entry->raddr, __entry->flags)
353); 240);
354 241
@@ -450,101 +337,6 @@ TRACE_EVENT(kvm_book3s_slbmte,
450 337
451#endif /* CONFIG_PPC_BOOK3S */ 338#endif /* CONFIG_PPC_BOOK3S */
452 339
453
454/*************************************************************************
455 * Book3E trace points *
456 *************************************************************************/
457
458#ifdef CONFIG_BOOKE
459
460TRACE_EVENT(kvm_booke206_stlb_write,
461 TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
462 TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
463
464 TP_STRUCT__entry(
465 __field( __u32, mas0 )
466 __field( __u32, mas8 )
467 __field( __u32, mas1 )
468 __field( __u64, mas2 )
469 __field( __u64, mas7_3 )
470 ),
471
472 TP_fast_assign(
473 __entry->mas0 = mas0;
474 __entry->mas8 = mas8;
475 __entry->mas1 = mas1;
476 __entry->mas2 = mas2;
477 __entry->mas7_3 = mas7_3;
478 ),
479
480 TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
481 __entry->mas0, __entry->mas8, __entry->mas1,
482 __entry->mas2, __entry->mas7_3)
483);
484
485TRACE_EVENT(kvm_booke206_gtlb_write,
486 TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
487 TP_ARGS(mas0, mas1, mas2, mas7_3),
488
489 TP_STRUCT__entry(
490 __field( __u32, mas0 )
491 __field( __u32, mas1 )
492 __field( __u64, mas2 )
493 __field( __u64, mas7_3 )
494 ),
495
496 TP_fast_assign(
497 __entry->mas0 = mas0;
498 __entry->mas1 = mas1;
499 __entry->mas2 = mas2;
500 __entry->mas7_3 = mas7_3;
501 ),
502
503 TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
504 __entry->mas0, __entry->mas1,
505 __entry->mas2, __entry->mas7_3)
506);
507
508TRACE_EVENT(kvm_booke206_ref_release,
509 TP_PROTO(__u64 pfn, __u32 flags),
510 TP_ARGS(pfn, flags),
511
512 TP_STRUCT__entry(
513 __field( __u64, pfn )
514 __field( __u32, flags )
515 ),
516
517 TP_fast_assign(
518 __entry->pfn = pfn;
519 __entry->flags = flags;
520 ),
521
522 TP_printk("pfn=%llx flags=%x",
523 __entry->pfn, __entry->flags)
524);
525
526TRACE_EVENT(kvm_booke_queue_irqprio,
527 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
528 TP_ARGS(vcpu, priority),
529
530 TP_STRUCT__entry(
531 __field( __u32, cpu_nr )
532 __field( __u32, priority )
533 __field( unsigned long, pending )
534 ),
535
536 TP_fast_assign(
537 __entry->cpu_nr = vcpu->vcpu_id;
538 __entry->priority = priority;
539 __entry->pending = vcpu->arch.pending_exceptions;
540 ),
541
542 TP_printk("vcpu=%x prio=%x pending=%lx",
543 __entry->cpu_nr, __entry->priority, __entry->pending)
544);
545
546#endif
547
548#endif /* _TRACE_KVM_H */ 340#endif /* _TRACE_KVM_H */
549 341
550/* This part must be outside protection */ 342/* This part must be outside protection */