aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-24 12:07:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-24 12:07:03 -0400
commit5fabc487c96819dd12ddb9414835d170fd9cd6d5 (patch)
tree01532d492e5074b0d3add29bf92ebf9a9d161e9e /arch/powerpc/kvm
parentc61264f98c1a974ee6f545f61a4ab33b141d6bda (diff)
parent3f68b0318bbbd61bf08478ab99a149f0d9e5156e (diff)
Merge branch 'kvm-updates/3.1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/3.1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (143 commits) KVM: IOMMU: Disable device assignment without interrupt remapping KVM: MMU: trace mmio page fault KVM: MMU: mmio page fault support KVM: MMU: reorganize struct kvm_shadow_walk_iterator KVM: MMU: lockless walking shadow page table KVM: MMU: do not need atomicly to set/clear spte KVM: MMU: introduce the rules to modify shadow page table KVM: MMU: abstract some functions to handle fault pfn KVM: MMU: filter out the mmio pfn from the fault pfn KVM: MMU: remove bypass_guest_pf KVM: MMU: split kvm_mmu_free_page KVM: MMU: count used shadow pages on prepareing path KVM: MMU: rename 'pt_write' to 'emulate' KVM: MMU: cleanup for FNAME(fetch) KVM: MMU: optimize to handle dirty bit KVM: MMU: cache mmio info on page fault path KVM: x86: introduce vcpu_mmio_gva_to_gpa to cleanup the code KVM: MMU: do not update slot bitmap if spte is nonpresent KVM: MMU: fix walking shadow page table KVM guest: KVM Steal time registration ...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x_tlb.c4
-rw-r--r--arch/powerpc/kvm/Kconfig34
-rw-r--r--arch/powerpc/kvm/Makefile27
-rw-r--r--arch/powerpc/kvm/book3s.c1007
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c54
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c180
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c73
-rw-r--r--arch/powerpc/kvm/book3s_exports.c9
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1269
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c155
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S166
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c370
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S1345
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S21
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c71
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1029
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S102
-rw-r--r--arch/powerpc/kvm/book3s_segment.S117
-rw-r--r--arch/powerpc/kvm/booke.c132
-rw-r--r--arch/powerpc/kvm/booke.h23
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S66
-rw-r--r--arch/powerpc/kvm/e500.c7
-rw-r--r--arch/powerpc/kvm/e500_emulate.c4
-rw-r--r--arch/powerpc/kvm/e500_tlb.c800
-rw-r--r--arch/powerpc/kvm/e500_tlb.h13
-rw-r--r--arch/powerpc/kvm/powerpc.c78
-rw-r--r--arch/powerpc/kvm/timing.c9
-rw-r--r--arch/powerpc/kvm/trace.h4
28 files changed, 5689 insertions, 1480 deletions
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 5f3cff83e089..33aa715dab28 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -387,8 +387,10 @@ static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
387 } 387 }
388} 388}
389 389
390void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) 390void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
391{ 391{
392 int usermode = vcpu->arch.shared->msr & MSR_PR;
393
392 vcpu->arch.shadow_pid = !usermode; 394 vcpu->arch.shadow_pid = !usermode;
393} 395}
394 396
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 105b6918b23e..78133deb4b64 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
20 bool 20 bool
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select KVM_MMIO
24 23
25config KVM_BOOK3S_HANDLER 24config KVM_BOOK3S_HANDLER
26 bool 25 bool
@@ -28,16 +27,22 @@ config KVM_BOOK3S_HANDLER
28config KVM_BOOK3S_32_HANDLER 27config KVM_BOOK3S_32_HANDLER
29 bool 28 bool
30 select KVM_BOOK3S_HANDLER 29 select KVM_BOOK3S_HANDLER
30 select KVM_MMIO
31 31
32config KVM_BOOK3S_64_HANDLER 32config KVM_BOOK3S_64_HANDLER
33 bool 33 bool
34 select KVM_BOOK3S_HANDLER 34 select KVM_BOOK3S_HANDLER
35 35
36config KVM_BOOK3S_PR
37 bool
38 select KVM_MMIO
39
36config KVM_BOOK3S_32 40config KVM_BOOK3S_32
37 tristate "KVM support for PowerPC book3s_32 processors" 41 tristate "KVM support for PowerPC book3s_32 processors"
38 depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT 42 depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT
39 select KVM 43 select KVM
40 select KVM_BOOK3S_32_HANDLER 44 select KVM_BOOK3S_32_HANDLER
45 select KVM_BOOK3S_PR
41 ---help--- 46 ---help---
42 Support running unmodified book3s_32 guest kernels 47 Support running unmodified book3s_32 guest kernels
43 in virtual machines on book3s_32 host processors. 48 in virtual machines on book3s_32 host processors.
@@ -50,8 +55,8 @@ config KVM_BOOK3S_32
50config KVM_BOOK3S_64 55config KVM_BOOK3S_64
51 tristate "KVM support for PowerPC book3s_64 processors" 56 tristate "KVM support for PowerPC book3s_64 processors"
52 depends on EXPERIMENTAL && PPC_BOOK3S_64 57 depends on EXPERIMENTAL && PPC_BOOK3S_64
53 select KVM
54 select KVM_BOOK3S_64_HANDLER 58 select KVM_BOOK3S_64_HANDLER
59 select KVM
55 ---help--- 60 ---help---
56 Support running unmodified book3s_64 and book3s_32 guest kernels 61 Support running unmodified book3s_64 and book3s_32 guest kernels
57 in virtual machines on book3s_64 host processors. 62 in virtual machines on book3s_64 host processors.
@@ -61,10 +66,34 @@ config KVM_BOOK3S_64
61 66
62 If unsure, say N. 67 If unsure, say N.
63 68
69config KVM_BOOK3S_64_HV
70 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
71 depends on KVM_BOOK3S_64
72 ---help---
73 Support running unmodified book3s_64 guest kernels in
74 virtual machines on POWER7 and PPC970 processors that have
75 hypervisor mode available to the host.
76
77 If you say Y here, KVM will use the hardware virtualization
78 facilities of POWER7 (and later) processors, meaning that
79 guest operating systems will run at full hardware speed
80 using supervisor and user modes. However, this also means
81 that KVM is not usable under PowerVM (pHyp), is only usable
82 on POWER7 (or later) processors and PPC970-family processors,
83 and cannot emulate a different processor from the host processor.
84
85 If unsure, say N.
86
87config KVM_BOOK3S_64_PR
88 def_bool y
89 depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
90 select KVM_BOOK3S_PR
91
64config KVM_440 92config KVM_440
65 bool "KVM support for PowerPC 440 processors" 93 bool "KVM support for PowerPC 440 processors"
66 depends on EXPERIMENTAL && 44x 94 depends on EXPERIMENTAL && 44x
67 select KVM 95 select KVM
96 select KVM_MMIO
68 ---help--- 97 ---help---
69 Support running unmodified 440 guest kernels in virtual machines on 98 Support running unmodified 440 guest kernels in virtual machines on
70 440 host processors. 99 440 host processors.
@@ -89,6 +118,7 @@ config KVM_E500
89 bool "KVM support for PowerPC E500 processors" 118 bool "KVM support for PowerPC E500 processors"
90 depends on EXPERIMENTAL && E500 119 depends on EXPERIMENTAL && E500
91 select KVM 120 select KVM
121 select KVM_MMIO
92 ---help--- 122 ---help---
93 Support running unmodified E500 guest kernels in virtual machines on 123 Support running unmodified E500 guest kernels in virtual machines on
94 E500 host processors. 124 E500 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4d6863823f69..08428e2c188d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -38,24 +38,42 @@ kvm-e500-objs := \
38 e500_emulate.o 38 e500_emulate.o
39kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) 39kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
40 40
41kvm-book3s_64-objs := \ 41kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
42 $(common-objs-y) \ 42 ../../../virt/kvm/coalesced_mmio.o \
43 fpu.o \ 43 fpu.o \
44 book3s_paired_singles.o \ 44 book3s_paired_singles.o \
45 book3s.o \ 45 book3s_pr.o \
46 book3s_emulate.o \ 46 book3s_emulate.o \
47 book3s_interrupts.o \ 47 book3s_interrupts.o \
48 book3s_mmu_hpte.o \ 48 book3s_mmu_hpte.o \
49 book3s_64_mmu_host.o \ 49 book3s_64_mmu_host.o \
50 book3s_64_mmu.o \ 50 book3s_64_mmu.o \
51 book3s_32_mmu.o 51 book3s_32_mmu.o
52kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) 52
53kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
54 book3s_hv.o \
55 book3s_hv_interrupts.o \
56 book3s_64_mmu_hv.o
57kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
58 book3s_hv_rm_mmu.o \
59 book3s_64_vio_hv.o \
60 book3s_hv_builtin.o
61
62kvm-book3s_64-module-objs := \
63 ../../../virt/kvm/kvm_main.o \
64 powerpc.o \
65 emulate.o \
66 book3s.o \
67 $(kvm-book3s_64-objs-y)
68
69kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
53 70
54kvm-book3s_32-objs := \ 71kvm-book3s_32-objs := \
55 $(common-objs-y) \ 72 $(common-objs-y) \
56 fpu.o \ 73 fpu.o \
57 book3s_paired_singles.o \ 74 book3s_paired_singles.o \
58 book3s.o \ 75 book3s.o \
76 book3s_pr.o \
59 book3s_emulate.o \ 77 book3s_emulate.o \
60 book3s_interrupts.o \ 78 book3s_interrupts.o \
61 book3s_mmu_hpte.o \ 79 book3s_mmu_hpte.o \
@@ -70,3 +88,4 @@ obj-$(CONFIG_KVM_E500) += kvm.o
70obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 88obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
71obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o 89obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
72 90
91obj-y += $(kvm-book3s_64-builtin-objs-y)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 0f95b5cce033..f68a34d16035 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -17,7 +17,6 @@
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include <linux/err.h> 18#include <linux/err.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include "trace.h"
21 20
22#include <asm/reg.h> 21#include <asm/reg.h>
23#include <asm/cputable.h> 22#include <asm/cputable.h>
@@ -28,25 +27,17 @@
28#include <asm/kvm_ppc.h> 27#include <asm/kvm_ppc.h>
29#include <asm/kvm_book3s.h> 28#include <asm/kvm_book3s.h>
30#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
30#include <asm/page.h>
31#include <linux/gfp.h> 31#include <linux/gfp.h>
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
34#include <linux/highmem.h> 34#include <linux/highmem.h>
35 35
36#include "trace.h"
37
36#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 38#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
37 39
38/* #define EXIT_DEBUG */ 40/* #define EXIT_DEBUG */
39/* #define DEBUG_EXT */
40
41static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
42 ulong msr);
43
44/* Some compatibility defines */
45#ifdef CONFIG_PPC_BOOK3S_32
46#define MSR_USER32 MSR_USER
47#define MSR_USER64 MSR_USER
48#define HW_PAGE_SIZE PAGE_SIZE
49#endif
50 41
51struct kvm_stats_debugfs_item debugfs_entries[] = { 42struct kvm_stats_debugfs_item debugfs_entries[] = {
52 { "exits", VCPU_STAT(sum_exits) }, 43 { "exits", VCPU_STAT(sum_exits) },
@@ -77,100 +68,11 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
77{ 68{
78} 69}
79 70
80void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
81{
82#ifdef CONFIG_PPC_BOOK3S_64
83 memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
84 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
85 sizeof(get_paca()->shadow_vcpu));
86 to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
87#endif
88
89#ifdef CONFIG_PPC_BOOK3S_32
90 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
91#endif
92}
93
94void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
95{
96#ifdef CONFIG_PPC_BOOK3S_64
97 memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
98 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
99 sizeof(get_paca()->shadow_vcpu));
100 to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
101#endif
102
103 kvmppc_giveup_ext(vcpu, MSR_FP);
104 kvmppc_giveup_ext(vcpu, MSR_VEC);
105 kvmppc_giveup_ext(vcpu, MSR_VSX);
106}
107
108static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
109{
110 ulong smsr = vcpu->arch.shared->msr;
111
112 /* Guest MSR values */
113 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
114 /* Process MSR values */
115 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
116 /* External providers the guest reserved */
117 smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
118 /* 64-bit Process MSR values */
119#ifdef CONFIG_PPC_BOOK3S_64
120 smsr |= MSR_ISF | MSR_HV;
121#endif
122 vcpu->arch.shadow_msr = smsr;
123}
124
125void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
126{
127 ulong old_msr = vcpu->arch.shared->msr;
128
129#ifdef EXIT_DEBUG
130 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
131#endif
132
133 msr &= to_book3s(vcpu)->msr_mask;
134 vcpu->arch.shared->msr = msr;
135 kvmppc_recalc_shadow_msr(vcpu);
136
137 if (msr & MSR_POW) {
138 if (!vcpu->arch.pending_exceptions) {
139 kvm_vcpu_block(vcpu);
140 vcpu->stat.halt_wakeup++;
141
142 /* Unset POW bit after we woke up */
143 msr &= ~MSR_POW;
144 vcpu->arch.shared->msr = msr;
145 }
146 }
147
148 if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
149 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
150 kvmppc_mmu_flush_segments(vcpu);
151 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
152
153 /* Preload magic page segment when in kernel mode */
154 if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
155 struct kvm_vcpu_arch *a = &vcpu->arch;
156
157 if (msr & MSR_DR)
158 kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
159 else
160 kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
161 }
162 }
163
164 /* Preload FPU if it's enabled */
165 if (vcpu->arch.shared->msr & MSR_FP)
166 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
167}
168
169void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) 71void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
170{ 72{
171 vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); 73 vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
172 vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags; 74 vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
173 kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); 75 kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
174 vcpu->arch.mmu.reset_msr(vcpu); 76 vcpu->arch.mmu.reset_msr(vcpu);
175} 77}
176 78
@@ -204,11 +106,13 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
204static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, 106static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
205 unsigned int vec) 107 unsigned int vec)
206{ 108{
109 unsigned long old_pending = vcpu->arch.pending_exceptions;
110
207 clear_bit(kvmppc_book3s_vec2irqprio(vec), 111 clear_bit(kvmppc_book3s_vec2irqprio(vec),
208 &vcpu->arch.pending_exceptions); 112 &vcpu->arch.pending_exceptions);
209 113
210 if (!vcpu->arch.pending_exceptions) 114 kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions,
211 vcpu->arch.shared->int_pending = 0; 115 old_pending);
212} 116}
213 117
214void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) 118void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
@@ -225,8 +129,8 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
225 129
226void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) 130void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
227{ 131{
228 to_book3s(vcpu)->prog_flags = flags; 132 /* might as well deliver this straight away */
229 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); 133 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
230} 134}
231 135
232void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 136void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
@@ -266,21 +170,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
266{ 170{
267 int deliver = 1; 171 int deliver = 1;
268 int vec = 0; 172 int vec = 0;
269 ulong flags = 0ULL; 173 bool crit = kvmppc_critical_section(vcpu);
270 ulong crit_raw = vcpu->arch.shared->critical;
271 ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
272 bool crit;
273
274 /* Truncate crit indicators in 32 bit mode */
275 if (!(vcpu->arch.shared->msr & MSR_SF)) {
276 crit_raw &= 0xffffffff;
277 crit_r1 &= 0xffffffff;
278 }
279
280 /* Critical section when crit == r1 */
281 crit = (crit_raw == crit_r1);
282 /* ... and we're in supervisor mode */
283 crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
284 174
285 switch (priority) { 175 switch (priority) {
286 case BOOK3S_IRQPRIO_DECREMENTER: 176 case BOOK3S_IRQPRIO_DECREMENTER:
@@ -315,7 +205,6 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
315 break; 205 break;
316 case BOOK3S_IRQPRIO_PROGRAM: 206 case BOOK3S_IRQPRIO_PROGRAM:
317 vec = BOOK3S_INTERRUPT_PROGRAM; 207 vec = BOOK3S_INTERRUPT_PROGRAM;
318 flags = to_book3s(vcpu)->prog_flags;
319 break; 208 break;
320 case BOOK3S_IRQPRIO_VSX: 209 case BOOK3S_IRQPRIO_VSX:
321 vec = BOOK3S_INTERRUPT_VSX; 210 vec = BOOK3S_INTERRUPT_VSX;
@@ -346,7 +235,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
346#endif 235#endif
347 236
348 if (deliver) 237 if (deliver)
349 kvmppc_inject_interrupt(vcpu, vec, flags); 238 kvmppc_inject_interrupt(vcpu, vec, 0);
350 239
351 return deliver; 240 return deliver;
352} 241}
@@ -392,64 +281,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
392 } 281 }
393 282
394 /* Tell the guest about our interrupt status */ 283 /* Tell the guest about our interrupt status */
395 if (*pending) 284 kvmppc_update_int_pending(vcpu, *pending, old_pending);
396 vcpu->arch.shared->int_pending = 1;
397 else if (old_pending)
398 vcpu->arch.shared->int_pending = 0;
399}
400
401void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
402{
403 u32 host_pvr;
404
405 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
406 vcpu->arch.pvr = pvr;
407#ifdef CONFIG_PPC_BOOK3S_64
408 if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
409 kvmppc_mmu_book3s_64_init(vcpu);
410 to_book3s(vcpu)->hior = 0xfff00000;
411 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
412 } else
413#endif
414 {
415 kvmppc_mmu_book3s_32_init(vcpu);
416 to_book3s(vcpu)->hior = 0;
417 to_book3s(vcpu)->msr_mask = 0xffffffffULL;
418 }
419
420 /* If we are in hypervisor level on 970, we can tell the CPU to
421 * treat DCBZ as 32 bytes store */
422 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
423 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
424 !strcmp(cur_cpu_spec->platform, "ppc970"))
425 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
426
427 /* Cell performs badly if MSR_FEx are set. So let's hope nobody
428 really needs them in a VM on Cell and force disable them. */
429 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
430 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
431
432#ifdef CONFIG_PPC_BOOK3S_32
433 /* 32 bit Book3S always has 32 byte dcbz */
434 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
435#endif
436
437 /* On some CPUs we can execute paired single operations natively */
438 asm ( "mfpvr %0" : "=r"(host_pvr));
439 switch (host_pvr) {
440 case 0x00080200: /* lonestar 2.0 */
441 case 0x00088202: /* lonestar 2.2 */
442 case 0x70000100: /* gekko 1.0 */
443 case 0x00080100: /* gekko 2.0 */
444 case 0x00083203: /* gekko 2.3a */
445 case 0x00083213: /* gekko 2.3b */
446 case 0x00083204: /* gekko 2.4 */
447 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
448 case 0x00087200: /* broadway */
449 vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
450 /* Enable HID2.PSE - in case we need it later */
451 mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
452 }
453} 285}
454 286
455pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) 287pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -471,44 +303,6 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
471 return gfn_to_pfn(vcpu->kvm, gfn); 303 return gfn_to_pfn(vcpu->kvm, gfn);
472} 304}
473 305
474/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
475 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
476 * emulate 32 bytes dcbz length.
477 *
478 * The Book3s_64 inventors also realized this case and implemented a special bit
479 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
480 *
481 * My approach here is to patch the dcbz instruction on executing pages.
482 */
483static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
484{
485 struct page *hpage;
486 u64 hpage_offset;
487 u32 *page;
488 int i;
489
490 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
491 if (is_error_page(hpage)) {
492 kvm_release_page_clean(hpage);
493 return;
494 }
495
496 hpage_offset = pte->raddr & ~PAGE_MASK;
497 hpage_offset &= ~0xFFFULL;
498 hpage_offset /= 4;
499
500 get_page(hpage);
501 page = kmap_atomic(hpage, KM_USER0);
502
503 /* patch dcbz into reserved instruction, so we trap */
504 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
505 if ((page[i] & 0xff0007ff) == INS_DCBZ)
506 page[i] &= 0xfffffff7;
507
508 kunmap_atomic(page, KM_USER0);
509 put_page(hpage);
510}
511
512static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 306static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
513 struct kvmppc_pte *pte) 307 struct kvmppc_pte *pte)
514{ 308{
@@ -606,519 +400,6 @@ mmio:
606 return EMULATE_DO_MMIO; 400 return EMULATE_DO_MMIO;
607} 401}
608 402
609static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
610{
611 ulong mp_pa = vcpu->arch.magic_page_pa;
612
613 if (unlikely(mp_pa) &&
614 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
615 return 1;
616 }
617
618 return kvm_is_visible_gfn(vcpu->kvm, gfn);
619}
620
621int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
622 ulong eaddr, int vec)
623{
624 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
625 int r = RESUME_GUEST;
626 int relocated;
627 int page_found = 0;
628 struct kvmppc_pte pte;
629 bool is_mmio = false;
630 bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
631 bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
632 u64 vsid;
633
634 relocated = data ? dr : ir;
635
636 /* Resolve real address if translation turned on */
637 if (relocated) {
638 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
639 } else {
640 pte.may_execute = true;
641 pte.may_read = true;
642 pte.may_write = true;
643 pte.raddr = eaddr & KVM_PAM;
644 pte.eaddr = eaddr;
645 pte.vpage = eaddr >> 12;
646 }
647
648 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
649 case 0:
650 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
651 break;
652 case MSR_DR:
653 case MSR_IR:
654 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
655
656 if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
657 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
658 else
659 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
660 pte.vpage |= vsid;
661
662 if (vsid == -1)
663 page_found = -EINVAL;
664 break;
665 }
666
667 if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
668 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
669 /*
670 * If we do the dcbz hack, we have to NX on every execution,
671 * so we can patch the executing code. This renders our guest
672 * NX-less.
673 */
674 pte.may_execute = !data;
675 }
676
677 if (page_found == -ENOENT) {
678 /* Page not found in guest PTE entries */
679 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
680 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
681 vcpu->arch.shared->msr |=
682 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
683 kvmppc_book3s_queue_irqprio(vcpu, vec);
684 } else if (page_found == -EPERM) {
685 /* Storage protection */
686 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
687 vcpu->arch.shared->dsisr =
688 to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
689 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
690 vcpu->arch.shared->msr |=
691 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
692 kvmppc_book3s_queue_irqprio(vcpu, vec);
693 } else if (page_found == -EINVAL) {
694 /* Page not found in guest SLB */
695 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
696 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
697 } else if (!is_mmio &&
698 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
699 /* The guest's PTE is not mapped yet. Map on the host */
700 kvmppc_mmu_map_page(vcpu, &pte);
701 if (data)
702 vcpu->stat.sp_storage++;
703 else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
704 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
705 kvmppc_patch_dcbz(vcpu, &pte);
706 } else {
707 /* MMIO */
708 vcpu->stat.mmio_exits++;
709 vcpu->arch.paddr_accessed = pte.raddr;
710 r = kvmppc_emulate_mmio(run, vcpu);
711 if ( r == RESUME_HOST_NV )
712 r = RESUME_HOST;
713 }
714
715 return r;
716}
717
718static inline int get_fpr_index(int i)
719{
720#ifdef CONFIG_VSX
721 i *= 2;
722#endif
723 return i;
724}
725
726/* Give up external provider (FPU, Altivec, VSX) */
727void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
728{
729 struct thread_struct *t = &current->thread;
730 u64 *vcpu_fpr = vcpu->arch.fpr;
731#ifdef CONFIG_VSX
732 u64 *vcpu_vsx = vcpu->arch.vsr;
733#endif
734 u64 *thread_fpr = (u64*)t->fpr;
735 int i;
736
737 if (!(vcpu->arch.guest_owned_ext & msr))
738 return;
739
740#ifdef DEBUG_EXT
741 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
742#endif
743
744 switch (msr) {
745 case MSR_FP:
746 giveup_fpu(current);
747 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
748 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
749
750 vcpu->arch.fpscr = t->fpscr.val;
751 break;
752 case MSR_VEC:
753#ifdef CONFIG_ALTIVEC
754 giveup_altivec(current);
755 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
756 vcpu->arch.vscr = t->vscr;
757#endif
758 break;
759 case MSR_VSX:
760#ifdef CONFIG_VSX
761 __giveup_vsx(current);
762 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
763 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
764#endif
765 break;
766 default:
767 BUG();
768 }
769
770 vcpu->arch.guest_owned_ext &= ~msr;
771 current->thread.regs->msr &= ~msr;
772 kvmppc_recalc_shadow_msr(vcpu);
773}
774
775static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
776{
777 ulong srr0 = kvmppc_get_pc(vcpu);
778 u32 last_inst = kvmppc_get_last_inst(vcpu);
779 int ret;
780
781 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
782 if (ret == -ENOENT) {
783 ulong msr = vcpu->arch.shared->msr;
784
785 msr = kvmppc_set_field(msr, 33, 33, 1);
786 msr = kvmppc_set_field(msr, 34, 36, 0);
787 vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
788 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
789 return EMULATE_AGAIN;
790 }
791
792 return EMULATE_DONE;
793}
794
795static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
796{
797
798 /* Need to do paired single emulation? */
799 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
800 return EMULATE_DONE;
801
802 /* Read out the instruction */
803 if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
804 /* Need to emulate */
805 return EMULATE_FAIL;
806
807 return EMULATE_AGAIN;
808}
809
810/* Handle external providers (FPU, Altivec, VSX) */
811static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
812 ulong msr)
813{
814 struct thread_struct *t = &current->thread;
815 u64 *vcpu_fpr = vcpu->arch.fpr;
816#ifdef CONFIG_VSX
817 u64 *vcpu_vsx = vcpu->arch.vsr;
818#endif
819 u64 *thread_fpr = (u64*)t->fpr;
820 int i;
821
822 /* When we have paired singles, we emulate in software */
823 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
824 return RESUME_GUEST;
825
826 if (!(vcpu->arch.shared->msr & msr)) {
827 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
828 return RESUME_GUEST;
829 }
830
831 /* We already own the ext */
832 if (vcpu->arch.guest_owned_ext & msr) {
833 return RESUME_GUEST;
834 }
835
836#ifdef DEBUG_EXT
837 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
838#endif
839
840 current->thread.regs->msr |= msr;
841
842 switch (msr) {
843 case MSR_FP:
844 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
845 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
846
847 t->fpscr.val = vcpu->arch.fpscr;
848 t->fpexc_mode = 0;
849 kvmppc_load_up_fpu();
850 break;
851 case MSR_VEC:
852#ifdef CONFIG_ALTIVEC
853 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
854 t->vscr = vcpu->arch.vscr;
855 t->vrsave = -1;
856 kvmppc_load_up_altivec();
857#endif
858 break;
859 case MSR_VSX:
860#ifdef CONFIG_VSX
861 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
862 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
863 kvmppc_load_up_vsx();
864#endif
865 break;
866 default:
867 BUG();
868 }
869
870 vcpu->arch.guest_owned_ext |= msr;
871
872 kvmppc_recalc_shadow_msr(vcpu);
873
874 return RESUME_GUEST;
875}
876
877int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
878 unsigned int exit_nr)
879{
880 int r = RESUME_HOST;
881
882 vcpu->stat.sum_exits++;
883
884 run->exit_reason = KVM_EXIT_UNKNOWN;
885 run->ready_for_interrupt_injection = 1;
886
887 trace_kvm_book3s_exit(exit_nr, vcpu);
888 kvm_resched(vcpu);
889 switch (exit_nr) {
890 case BOOK3S_INTERRUPT_INST_STORAGE:
891 vcpu->stat.pf_instruc++;
892
893#ifdef CONFIG_PPC_BOOK3S_32
894 /* We set segments as unused segments when invalidating them. So
895 * treat the respective fault as segment fault. */
896 if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
897 == SR_INVALID) {
898 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
899 r = RESUME_GUEST;
900 break;
901 }
902#endif
903
904 /* only care about PTEG not found errors, but leave NX alone */
905 if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
906 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
907 vcpu->stat.sp_instruc++;
908 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
909 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
910 /*
911 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
912 * so we can't use the NX bit inside the guest. Let's cross our fingers,
913 * that no guest that needs the dcbz hack does NX.
914 */
915 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
916 r = RESUME_GUEST;
917 } else {
918 vcpu->arch.shared->msr |=
919 to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
920 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
921 r = RESUME_GUEST;
922 }
923 break;
924 case BOOK3S_INTERRUPT_DATA_STORAGE:
925 {
926 ulong dar = kvmppc_get_fault_dar(vcpu);
927 vcpu->stat.pf_storage++;
928
929#ifdef CONFIG_PPC_BOOK3S_32
930 /* We set segments as unused segments when invalidating them. So
931 * treat the respective fault as segment fault. */
932 if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
933 kvmppc_mmu_map_segment(vcpu, dar);
934 r = RESUME_GUEST;
935 break;
936 }
937#endif
938
939 /* The only case we need to handle is missing shadow PTEs */
940 if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
941 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
942 } else {
943 vcpu->arch.shared->dar = dar;
944 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
945 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
946 r = RESUME_GUEST;
947 }
948 break;
949 }
950 case BOOK3S_INTERRUPT_DATA_SEGMENT:
951 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
952 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
953 kvmppc_book3s_queue_irqprio(vcpu,
954 BOOK3S_INTERRUPT_DATA_SEGMENT);
955 }
956 r = RESUME_GUEST;
957 break;
958 case BOOK3S_INTERRUPT_INST_SEGMENT:
959 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
960 kvmppc_book3s_queue_irqprio(vcpu,
961 BOOK3S_INTERRUPT_INST_SEGMENT);
962 }
963 r = RESUME_GUEST;
964 break;
965 /* We're good on these - the host merely wanted to get our attention */
966 case BOOK3S_INTERRUPT_DECREMENTER:
967 vcpu->stat.dec_exits++;
968 r = RESUME_GUEST;
969 break;
970 case BOOK3S_INTERRUPT_EXTERNAL:
971 vcpu->stat.ext_intr_exits++;
972 r = RESUME_GUEST;
973 break;
974 case BOOK3S_INTERRUPT_PERFMON:
975 r = RESUME_GUEST;
976 break;
977 case BOOK3S_INTERRUPT_PROGRAM:
978 {
979 enum emulation_result er;
980 ulong flags;
981
982program_interrupt:
983 flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
984
985 if (vcpu->arch.shared->msr & MSR_PR) {
986#ifdef EXIT_DEBUG
987 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
988#endif
989 if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
990 (INS_DCBZ & 0xfffffff7)) {
991 kvmppc_core_queue_program(vcpu, flags);
992 r = RESUME_GUEST;
993 break;
994 }
995 }
996
997 vcpu->stat.emulated_inst_exits++;
998 er = kvmppc_emulate_instruction(run, vcpu);
999 switch (er) {
1000 case EMULATE_DONE:
1001 r = RESUME_GUEST_NV;
1002 break;
1003 case EMULATE_AGAIN:
1004 r = RESUME_GUEST;
1005 break;
1006 case EMULATE_FAIL:
1007 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
1008 __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
1009 kvmppc_core_queue_program(vcpu, flags);
1010 r = RESUME_GUEST;
1011 break;
1012 case EMULATE_DO_MMIO:
1013 run->exit_reason = KVM_EXIT_MMIO;
1014 r = RESUME_HOST_NV;
1015 break;
1016 default:
1017 BUG();
1018 }
1019 break;
1020 }
1021 case BOOK3S_INTERRUPT_SYSCALL:
1022 if (vcpu->arch.osi_enabled &&
1023 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
1024 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
1025 /* MOL hypercalls */
1026 u64 *gprs = run->osi.gprs;
1027 int i;
1028
1029 run->exit_reason = KVM_EXIT_OSI;
1030 for (i = 0; i < 32; i++)
1031 gprs[i] = kvmppc_get_gpr(vcpu, i);
1032 vcpu->arch.osi_needed = 1;
1033 r = RESUME_HOST_NV;
1034 } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
1035 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
1036 /* KVM PV hypercalls */
1037 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
1038 r = RESUME_GUEST;
1039 } else {
1040 /* Guest syscalls */
1041 vcpu->stat.syscall_exits++;
1042 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1043 r = RESUME_GUEST;
1044 }
1045 break;
1046 case BOOK3S_INTERRUPT_FP_UNAVAIL:
1047 case BOOK3S_INTERRUPT_ALTIVEC:
1048 case BOOK3S_INTERRUPT_VSX:
1049 {
1050 int ext_msr = 0;
1051
1052 switch (exit_nr) {
1053 case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
1054 case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
1055 case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
1056 }
1057
1058 switch (kvmppc_check_ext(vcpu, exit_nr)) {
1059 case EMULATE_DONE:
1060 /* everything ok - let's enable the ext */
1061 r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
1062 break;
1063 case EMULATE_FAIL:
1064 /* we need to emulate this instruction */
1065 goto program_interrupt;
1066 break;
1067 default:
1068 /* nothing to worry about - go again */
1069 break;
1070 }
1071 break;
1072 }
1073 case BOOK3S_INTERRUPT_ALIGNMENT:
1074 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
1075 vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
1076 kvmppc_get_last_inst(vcpu));
1077 vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
1078 kvmppc_get_last_inst(vcpu));
1079 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1080 }
1081 r = RESUME_GUEST;
1082 break;
1083 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1084 case BOOK3S_INTERRUPT_TRACE:
1085 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1086 r = RESUME_GUEST;
1087 break;
1088 default:
1089 /* Ugh - bork here! What did we get? */
1090 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
1091 exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
1092 r = RESUME_HOST;
1093 BUG();
1094 break;
1095 }
1096
1097
1098 if (!(r & RESUME_HOST)) {
1099 /* To avoid clobbering exit_reason, only check for signals if
1100 * we aren't already exiting to userspace for some other
1101 * reason. */
1102 if (signal_pending(current)) {
1103#ifdef EXIT_DEBUG
1104 printk(KERN_EMERG "KVM: Going back to host\n");
1105#endif
1106 vcpu->stat.signal_exits++;
1107 run->exit_reason = KVM_EXIT_INTR;
1108 r = -EINTR;
1109 } else {
1110 /* In case an interrupt came in that was triggered
1111 * from userspace (like DEC), we need to check what
1112 * to inject now! */
1113 kvmppc_core_deliver_interrupts(vcpu);
1114 }
1115 }
1116
1117 trace_kvm_book3s_reenter(r, vcpu);
1118
1119 return r;
1120}
1121
1122int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 403int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1123{ 404{
1124 return 0; 405 return 0;
@@ -1179,69 +460,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1179 return 0; 460 return 0;
1180} 461}
1181 462
1182int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1183 struct kvm_sregs *sregs)
1184{
1185 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1186 int i;
1187
1188 sregs->pvr = vcpu->arch.pvr;
1189
1190 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
1191 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1192 for (i = 0; i < 64; i++) {
1193 sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
1194 sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
1195 }
1196 } else {
1197 for (i = 0; i < 16; i++)
1198 sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
1199
1200 for (i = 0; i < 8; i++) {
1201 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
1202 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
1203 }
1204 }
1205
1206 return 0;
1207}
1208
1209int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1210 struct kvm_sregs *sregs)
1211{
1212 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1213 int i;
1214
1215 kvmppc_set_pvr(vcpu, sregs->pvr);
1216
1217 vcpu3s->sdr1 = sregs->u.s.sdr1;
1218 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1219 for (i = 0; i < 64; i++) {
1220 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
1221 sregs->u.s.ppc64.slb[i].slbe);
1222 }
1223 } else {
1224 for (i = 0; i < 16; i++) {
1225 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
1226 }
1227 for (i = 0; i < 8; i++) {
1228 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
1229 (u32)sregs->u.s.ppc32.ibat[i]);
1230 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
1231 (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
1232 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
1233 (u32)sregs->u.s.ppc32.dbat[i]);
1234 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
1235 (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
1236 }
1237 }
1238
1239 /* Flush the MMU after messing with the segments */
1240 kvmppc_mmu_pte_flush(vcpu, 0, 0);
1241
1242 return 0;
1243}
1244
1245int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 463int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1246{ 464{
1247 return -ENOTSUPP; 465 return -ENOTSUPP;
@@ -1296,202 +514,3 @@ out:
1296 mutex_unlock(&kvm->slots_lock); 514 mutex_unlock(&kvm->slots_lock);
1297 return r; 515 return r;
1298} 516}
1299
1300int kvmppc_core_check_processor_compat(void)
1301{
1302 return 0;
1303}
1304
1305struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1306{
1307 struct kvmppc_vcpu_book3s *vcpu_book3s;
1308 struct kvm_vcpu *vcpu;
1309 int err = -ENOMEM;
1310 unsigned long p;
1311
1312 vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
1313 if (!vcpu_book3s)
1314 goto out;
1315
1316 vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
1317 kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
1318 if (!vcpu_book3s->shadow_vcpu)
1319 goto free_vcpu;
1320
1321 vcpu = &vcpu_book3s->vcpu;
1322 err = kvm_vcpu_init(vcpu, kvm, id);
1323 if (err)
1324 goto free_shadow_vcpu;
1325
1326 p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
1327 /* the real shared page fills the last 4k of our page */
1328 vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
1329 if (!p)
1330 goto uninit_vcpu;
1331
1332 vcpu->arch.host_retip = kvm_return_point;
1333 vcpu->arch.host_msr = mfmsr();
1334#ifdef CONFIG_PPC_BOOK3S_64
1335 /* default to book3s_64 (970fx) */
1336 vcpu->arch.pvr = 0x3C0301;
1337#else
1338 /* default to book3s_32 (750) */
1339 vcpu->arch.pvr = 0x84202;
1340#endif
1341 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
1342 vcpu_book3s->slb_nr = 64;
1343
1344 /* remember where some real-mode handlers are */
1345 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
1346 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
1347 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
1348#ifdef CONFIG_PPC_BOOK3S_64
1349 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
1350#else
1351 vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
1352#endif
1353
1354 vcpu->arch.shadow_msr = MSR_USER64;
1355
1356 err = kvmppc_mmu_init(vcpu);
1357 if (err < 0)
1358 goto uninit_vcpu;
1359
1360 return vcpu;
1361
1362uninit_vcpu:
1363 kvm_vcpu_uninit(vcpu);
1364free_shadow_vcpu:
1365 kfree(vcpu_book3s->shadow_vcpu);
1366free_vcpu:
1367 vfree(vcpu_book3s);
1368out:
1369 return ERR_PTR(err);
1370}
1371
1372void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
1373{
1374 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
1375
1376 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
1377 kvm_vcpu_uninit(vcpu);
1378 kfree(vcpu_book3s->shadow_vcpu);
1379 vfree(vcpu_book3s);
1380}
1381
1382extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
1383int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1384{
1385 int ret;
1386 double fpr[32][TS_FPRWIDTH];
1387 unsigned int fpscr;
1388 int fpexc_mode;
1389#ifdef CONFIG_ALTIVEC
1390 vector128 vr[32];
1391 vector128 vscr;
1392 unsigned long uninitialized_var(vrsave);
1393 int used_vr;
1394#endif
1395#ifdef CONFIG_VSX
1396 int used_vsr;
1397#endif
1398 ulong ext_msr;
1399
1400 /* No need to go into the guest when all we do is going out */
1401 if (signal_pending(current)) {
1402 kvm_run->exit_reason = KVM_EXIT_INTR;
1403 return -EINTR;
1404 }
1405
1406 /* Save FPU state in stack */
1407 if (current->thread.regs->msr & MSR_FP)
1408 giveup_fpu(current);
1409 memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
1410 fpscr = current->thread.fpscr.val;
1411 fpexc_mode = current->thread.fpexc_mode;
1412
1413#ifdef CONFIG_ALTIVEC
1414 /* Save Altivec state in stack */
1415 used_vr = current->thread.used_vr;
1416 if (used_vr) {
1417 if (current->thread.regs->msr & MSR_VEC)
1418 giveup_altivec(current);
1419 memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
1420 vscr = current->thread.vscr;
1421 vrsave = current->thread.vrsave;
1422 }
1423#endif
1424
1425#ifdef CONFIG_VSX
1426 /* Save VSX state in stack */
1427 used_vsr = current->thread.used_vsr;
1428 if (used_vsr && (current->thread.regs->msr & MSR_VSX))
1429 __giveup_vsx(current);
1430#endif
1431
1432 /* Remember the MSR with disabled extensions */
1433 ext_msr = current->thread.regs->msr;
1434
1435 /* XXX we get called with irq disabled - change that! */
1436 local_irq_enable();
1437
1438 /* Preload FPU if it's enabled */
1439 if (vcpu->arch.shared->msr & MSR_FP)
1440 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1441
1442 ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
1443
1444 local_irq_disable();
1445
1446 current->thread.regs->msr = ext_msr;
1447
1448 /* Make sure we save the guest FPU/Altivec/VSX state */
1449 kvmppc_giveup_ext(vcpu, MSR_FP);
1450 kvmppc_giveup_ext(vcpu, MSR_VEC);
1451 kvmppc_giveup_ext(vcpu, MSR_VSX);
1452
1453 /* Restore FPU state from stack */
1454 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
1455 current->thread.fpscr.val = fpscr;
1456 current->thread.fpexc_mode = fpexc_mode;
1457
1458#ifdef CONFIG_ALTIVEC
1459 /* Restore Altivec state from stack */
1460 if (used_vr && current->thread.used_vr) {
1461 memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
1462 current->thread.vscr = vscr;
1463 current->thread.vrsave = vrsave;
1464 }
1465 current->thread.used_vr = used_vr;
1466#endif
1467
1468#ifdef CONFIG_VSX
1469 current->thread.used_vsr = used_vsr;
1470#endif
1471
1472 return ret;
1473}
1474
1475static int kvmppc_book3s_init(void)
1476{
1477 int r;
1478
1479 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
1480 THIS_MODULE);
1481
1482 if (r)
1483 return r;
1484
1485 r = kvmppc_mmu_hpte_sysinit();
1486
1487 return r;
1488}
1489
1490static void kvmppc_book3s_exit(void)
1491{
1492 kvmppc_mmu_hpte_sysexit();
1493 kvm_exit();
1494}
1495
1496module_init(kvmppc_book3s_init);
1497module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index d7889ef3211e..c6d3e194b6b4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -41,36 +41,36 @@ static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
41} 41}
42 42
43static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( 43static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
44 struct kvmppc_vcpu_book3s *vcpu_book3s, 44 struct kvm_vcpu *vcpu,
45 gva_t eaddr) 45 gva_t eaddr)
46{ 46{
47 int i; 47 int i;
48 u64 esid = GET_ESID(eaddr); 48 u64 esid = GET_ESID(eaddr);
49 u64 esid_1t = GET_ESID_1T(eaddr); 49 u64 esid_1t = GET_ESID_1T(eaddr);
50 50
51 for (i = 0; i < vcpu_book3s->slb_nr; i++) { 51 for (i = 0; i < vcpu->arch.slb_nr; i++) {
52 u64 cmp_esid = esid; 52 u64 cmp_esid = esid;
53 53
54 if (!vcpu_book3s->slb[i].valid) 54 if (!vcpu->arch.slb[i].valid)
55 continue; 55 continue;
56 56
57 if (vcpu_book3s->slb[i].tb) 57 if (vcpu->arch.slb[i].tb)
58 cmp_esid = esid_1t; 58 cmp_esid = esid_1t;
59 59
60 if (vcpu_book3s->slb[i].esid == cmp_esid) 60 if (vcpu->arch.slb[i].esid == cmp_esid)
61 return &vcpu_book3s->slb[i]; 61 return &vcpu->arch.slb[i];
62 } 62 }
63 63
64 dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n", 64 dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n",
65 eaddr, esid, esid_1t); 65 eaddr, esid, esid_1t);
66 for (i = 0; i < vcpu_book3s->slb_nr; i++) { 66 for (i = 0; i < vcpu->arch.slb_nr; i++) {
67 if (vcpu_book3s->slb[i].vsid) 67 if (vcpu->arch.slb[i].vsid)
68 dprintk(" %d: %c%c%c %llx %llx\n", i, 68 dprintk(" %d: %c%c%c %llx %llx\n", i,
69 vcpu_book3s->slb[i].valid ? 'v' : ' ', 69 vcpu->arch.slb[i].valid ? 'v' : ' ',
70 vcpu_book3s->slb[i].large ? 'l' : ' ', 70 vcpu->arch.slb[i].large ? 'l' : ' ',
71 vcpu_book3s->slb[i].tb ? 't' : ' ', 71 vcpu->arch.slb[i].tb ? 't' : ' ',
72 vcpu_book3s->slb[i].esid, 72 vcpu->arch.slb[i].esid,
73 vcpu_book3s->slb[i].vsid); 73 vcpu->arch.slb[i].vsid);
74 } 74 }
75 75
76 return NULL; 76 return NULL;
@@ -81,7 +81,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
81{ 81{
82 struct kvmppc_slb *slb; 82 struct kvmppc_slb *slb;
83 83
84 slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), eaddr); 84 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
85 if (!slb) 85 if (!slb)
86 return 0; 86 return 0;
87 87
@@ -180,7 +180,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
180 return 0; 180 return 0;
181 } 181 }
182 182
183 slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr); 183 slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
184 if (!slbe) 184 if (!slbe)
185 goto no_seg_found; 185 goto no_seg_found;
186 186
@@ -320,10 +320,10 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
320 esid_1t = GET_ESID_1T(rb); 320 esid_1t = GET_ESID_1T(rb);
321 slb_nr = rb & 0xfff; 321 slb_nr = rb & 0xfff;
322 322
323 if (slb_nr > vcpu_book3s->slb_nr) 323 if (slb_nr > vcpu->arch.slb_nr)
324 return; 324 return;
325 325
326 slbe = &vcpu_book3s->slb[slb_nr]; 326 slbe = &vcpu->arch.slb[slb_nr];
327 327
328 slbe->large = (rs & SLB_VSID_L) ? 1 : 0; 328 slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
329 slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; 329 slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
@@ -344,38 +344,35 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
344 344
345static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) 345static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
346{ 346{
347 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
348 struct kvmppc_slb *slbe; 347 struct kvmppc_slb *slbe;
349 348
350 if (slb_nr > vcpu_book3s->slb_nr) 349 if (slb_nr > vcpu->arch.slb_nr)
351 return 0; 350 return 0;
352 351
353 slbe = &vcpu_book3s->slb[slb_nr]; 352 slbe = &vcpu->arch.slb[slb_nr];
354 353
355 return slbe->orige; 354 return slbe->orige;
356} 355}
357 356
358static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr) 357static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
359{ 358{
360 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
361 struct kvmppc_slb *slbe; 359 struct kvmppc_slb *slbe;
362 360
363 if (slb_nr > vcpu_book3s->slb_nr) 361 if (slb_nr > vcpu->arch.slb_nr)
364 return 0; 362 return 0;
365 363
366 slbe = &vcpu_book3s->slb[slb_nr]; 364 slbe = &vcpu->arch.slb[slb_nr];
367 365
368 return slbe->origv; 366 return slbe->origv;
369} 367}
370 368
371static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) 369static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
372{ 370{
373 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
374 struct kvmppc_slb *slbe; 371 struct kvmppc_slb *slbe;
375 372
376 dprintk("KVM MMU: slbie(0x%llx)\n", ea); 373 dprintk("KVM MMU: slbie(0x%llx)\n", ea);
377 374
378 slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, ea); 375 slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
379 376
380 if (!slbe) 377 if (!slbe)
381 return; 378 return;
@@ -389,13 +386,12 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
389 386
390static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) 387static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
391{ 388{
392 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
393 int i; 389 int i;
394 390
395 dprintk("KVM MMU: slbia()\n"); 391 dprintk("KVM MMU: slbia()\n");
396 392
397 for (i = 1; i < vcpu_book3s->slb_nr; i++) 393 for (i = 1; i < vcpu->arch.slb_nr; i++)
398 vcpu_book3s->slb[i].valid = false; 394 vcpu->arch.slb[i].valid = false;
399 395
400 if (vcpu->arch.shared->msr & MSR_IR) { 396 if (vcpu->arch.shared->msr & MSR_IR) {
401 kvmppc_mmu_flush_segments(vcpu); 397 kvmppc_mmu_flush_segments(vcpu);
@@ -464,7 +460,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
464 ulong mp_ea = vcpu->arch.magic_page_ea; 460 ulong mp_ea = vcpu->arch.magic_page_ea;
465 461
466 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 462 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
467 slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); 463 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
468 if (slb) 464 if (slb)
469 gvsid = slb->vsid; 465 gvsid = slb->vsid;
470 } 466 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
new file mode 100644
index 000000000000..bc3a2ea94217
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -0,0 +1,180 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 */
17
18#include <linux/types.h>
19#include <linux/string.h>
20#include <linux/kvm.h>
21#include <linux/kvm_host.h>
22#include <linux/highmem.h>
23#include <linux/gfp.h>
24#include <linux/slab.h>
25#include <linux/hugetlb.h>
26
27#include <asm/tlbflush.h>
28#include <asm/kvm_ppc.h>
29#include <asm/kvm_book3s.h>
30#include <asm/mmu-hash64.h>
31#include <asm/hvcall.h>
32#include <asm/synch.h>
33#include <asm/ppc-opcode.h>
34#include <asm/cputable.h>
35
36/* For now use fixed-size 16MB page table */
37#define HPT_ORDER 24
38#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
39#define HPT_HASH_MASK (HPT_NPTEG - 1)
40
41/* Pages in the VRMA are 16MB pages */
42#define VRMA_PAGE_ORDER 24
43#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
44
45/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
46#define MAX_LPID_970 63
47#define NR_LPIDS (LPID_RSVD + 1)
48unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
49
50long kvmppc_alloc_hpt(struct kvm *kvm)
51{
52 unsigned long hpt;
53 unsigned long lpid;
54
55 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
56 HPT_ORDER - PAGE_SHIFT);
57 if (!hpt) {
58 pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
59 return -ENOMEM;
60 }
61 kvm->arch.hpt_virt = hpt;
62
63 do {
64 lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
65 if (lpid >= NR_LPIDS) {
66 pr_err("kvm_alloc_hpt: No LPIDs free\n");
67 free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
68 return -ENOMEM;
69 }
70 } while (test_and_set_bit(lpid, lpid_inuse));
71
72 kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
73 kvm->arch.lpid = lpid;
74
75 pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
76 return 0;
77}
78
79void kvmppc_free_hpt(struct kvm *kvm)
80{
81 clear_bit(kvm->arch.lpid, lpid_inuse);
82 free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
83}
84
85void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
86{
87 unsigned long i;
88 unsigned long npages = kvm->arch.ram_npages;
89 unsigned long pfn;
90 unsigned long *hpte;
91 unsigned long hash;
92 struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
93
94 if (!pginfo)
95 return;
96
97 /* VRMA can't be > 1TB */
98 if (npages > 1ul << (40 - kvm->arch.ram_porder))
99 npages = 1ul << (40 - kvm->arch.ram_porder);
100 /* Can't use more than 1 HPTE per HPTEG */
101 if (npages > HPT_NPTEG)
102 npages = HPT_NPTEG;
103
104 for (i = 0; i < npages; ++i) {
105 pfn = pginfo[i].pfn;
106 if (!pfn)
107 break;
108 /* can't use hpt_hash since va > 64 bits */
109 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
110 /*
111 * We assume that the hash table is empty and no
112 * vcpus are using it at this stage. Since we create
113 * at most one HPTE per HPTEG, we just assume entry 7
114 * is available and use it.
115 */
116 hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7));
117 hpte += 7 * 2;
118 /* HPTE low word - RPN, protection, etc. */
119 hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
120 HPTE_R_M | PP_RWXX;
121 wmb();
122 hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
123 (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
124 HPTE_V_LARGE | HPTE_V_VALID;
125 }
126}
127
128int kvmppc_mmu_hv_init(void)
129{
130 unsigned long host_lpid, rsvd_lpid;
131
132 if (!cpu_has_feature(CPU_FTR_HVMODE))
133 return -EINVAL;
134
135 memset(lpid_inuse, 0, sizeof(lpid_inuse));
136
137 if (cpu_has_feature(CPU_FTR_ARCH_206)) {
138 host_lpid = mfspr(SPRN_LPID); /* POWER7 */
139 rsvd_lpid = LPID_RSVD;
140 } else {
141 host_lpid = 0; /* PPC970 */
142 rsvd_lpid = MAX_LPID_970;
143 }
144
145 set_bit(host_lpid, lpid_inuse);
146 /* rsvd_lpid is reserved for use in partition switching */
147 set_bit(rsvd_lpid, lpid_inuse);
148
149 return 0;
150}
151
152void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
153{
154}
155
156static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
157{
158 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
159}
160
161static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
162 struct kvmppc_pte *gpte, bool data)
163{
164 return -ENOENT;
165}
166
167void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
168{
169 struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
170
171 if (cpu_has_feature(CPU_FTR_ARCH_206))
172 vcpu->arch.slb_nr = 32; /* POWER7 */
173 else
174 vcpu->arch.slb_nr = 64;
175
176 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
177 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
178
179 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
180}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
new file mode 100644
index 000000000000..ea0f8c537c28
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -0,0 +1,73 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 */
18
19#include <linux/types.h>
20#include <linux/string.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/highmem.h>
24#include <linux/gfp.h>
25#include <linux/slab.h>
26#include <linux/hugetlb.h>
27#include <linux/list.h>
28
29#include <asm/tlbflush.h>
30#include <asm/kvm_ppc.h>
31#include <asm/kvm_book3s.h>
32#include <asm/mmu-hash64.h>
33#include <asm/hvcall.h>
34#include <asm/synch.h>
35#include <asm/ppc-opcode.h>
36#include <asm/kvm_host.h>
37#include <asm/udbg.h>
38
39#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
40
41long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
42 unsigned long ioba, unsigned long tce)
43{
44 struct kvm *kvm = vcpu->kvm;
45 struct kvmppc_spapr_tce_table *stt;
46
47 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
48 /* liobn, ioba, tce); */
49
50 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
51 if (stt->liobn == liobn) {
52 unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
53 struct page *page;
54 u64 *tbl;
55
56 /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
57 /* liobn, stt, stt->window_size); */
58 if (ioba >= stt->window_size)
59 return H_PARAMETER;
60
61 page = stt->pages[idx / TCES_PER_PAGE];
62 tbl = (u64 *)page_address(page);
63
64 /* FIXME: Need to validate the TCE itself */
65 /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
66 tbl[idx % TCES_PER_PAGE] = tce;
67 return H_SUCCESS;
68 }
69 }
70
71 /* Didn't find the liobn, punt it to userspace */
72 return H_TOO_HARD;
73}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 1dd5a1ddfd0d..88c8f26add02 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -20,8 +20,11 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <asm/kvm_book3s.h> 21#include <asm/kvm_book3s.h>
22 22
23EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter); 23#ifdef CONFIG_KVM_BOOK3S_64_HV
24EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem); 24EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
25#else
26EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
27EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
25EXPORT_SYMBOL_GPL(kvmppc_rmcall); 28EXPORT_SYMBOL_GPL(kvmppc_rmcall);
26EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); 29EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
27#ifdef CONFIG_ALTIVEC 30#ifdef CONFIG_ALTIVEC
@@ -30,3 +33,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
30#ifdef CONFIG_VSX 33#ifdef CONFIG_VSX
31EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx); 34EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
32#endif 35#endif
36#endif
37
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
new file mode 100644
index 000000000000..cc0d7f1b19ab
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -0,0 +1,1269 @@
1/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
4 *
5 * Authors:
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
9 *
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
12 *
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24#include <linux/preempt.h>
25#include <linux/sched.h>
26#include <linux/delay.h>
27#include <linux/fs.h>
28#include <linux/anon_inodes.h>
29#include <linux/cpumask.h>
30#include <linux/spinlock.h>
31#include <linux/page-flags.h>
32
33#include <asm/reg.h>
34#include <asm/cputable.h>
35#include <asm/cacheflush.h>
36#include <asm/tlbflush.h>
37#include <asm/uaccess.h>
38#include <asm/io.h>
39#include <asm/kvm_ppc.h>
40#include <asm/kvm_book3s.h>
41#include <asm/mmu_context.h>
42#include <asm/lppaca.h>
43#include <asm/processor.h>
44#include <asm/cputhreads.h>
45#include <asm/page.h>
46#include <linux/gfp.h>
47#include <linux/sched.h>
48#include <linux/vmalloc.h>
49#include <linux/highmem.h>
50
51/*
52 * For now, limit memory to 64GB and require it to be large pages.
53 * This value is chosen because it makes the ram_pginfo array be
54 * 64kB in size, which is about as large as we want to be trying
55 * to allocate with kmalloc.
56 */
57#define MAX_MEM_ORDER 36
58
59#define LARGE_PAGE_ORDER 24 /* 16MB pages */
60
61/* #define EXIT_DEBUG */
62/* #define EXIT_DEBUG_SIMPLE */
63/* #define EXIT_DEBUG_INT */
64
65void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
66{
67 local_paca->kvm_hstate.kvm_vcpu = vcpu;
68 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
69}
70
71void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
72{
73}
74
75static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
76static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
77
78void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
79{
80 u64 now;
81 unsigned long dec_nsec;
82
83 now = get_tb();
84 if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
85 kvmppc_core_queue_dec(vcpu);
86 if (vcpu->arch.pending_exceptions)
87 return;
88 if (vcpu->arch.dec_expires != ~(u64)0) {
89 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
90 tb_ticks_per_sec;
91 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
92 HRTIMER_MODE_REL);
93 }
94
95 kvmppc_vcpu_blocked(vcpu);
96
97 kvm_vcpu_block(vcpu);
98 vcpu->stat.halt_wakeup++;
99
100 if (vcpu->arch.dec_expires != ~(u64)0)
101 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
102
103 kvmppc_vcpu_unblocked(vcpu);
104}
105
106void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
107{
108 vcpu->arch.shregs.msr = msr;
109}
110
111void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
112{
113 vcpu->arch.pvr = pvr;
114}
115
116void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
117{
118 int r;
119
120 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
121 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
122 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
123 for (r = 0; r < 16; ++r)
124 pr_err("r%2d = %.16lx r%d = %.16lx\n",
125 r, kvmppc_get_gpr(vcpu, r),
126 r+16, kvmppc_get_gpr(vcpu, r+16));
127 pr_err("ctr = %.16lx lr = %.16lx\n",
128 vcpu->arch.ctr, vcpu->arch.lr);
129 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
130 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
131 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
132 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
133 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
134 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
135 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
136 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
137 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
138 pr_err("fault dar = %.16lx dsisr = %.8x\n",
139 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
140 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
141 for (r = 0; r < vcpu->arch.slb_max; ++r)
142 pr_err(" ESID = %.16llx VSID = %.16llx\n",
143 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
144 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
145 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
146 vcpu->arch.last_inst);
147}
148
149struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
150{
151 int r;
152 struct kvm_vcpu *v, *ret = NULL;
153
154 mutex_lock(&kvm->lock);
155 kvm_for_each_vcpu(r, v, kvm) {
156 if (v->vcpu_id == id) {
157 ret = v;
158 break;
159 }
160 }
161 mutex_unlock(&kvm->lock);
162 return ret;
163}
164
165static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
166{
167 vpa->shared_proc = 1;
168 vpa->yield_count = 1;
169}
170
171static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
172 unsigned long flags,
173 unsigned long vcpuid, unsigned long vpa)
174{
175 struct kvm *kvm = vcpu->kvm;
176 unsigned long pg_index, ra, len;
177 unsigned long pg_offset;
178 void *va;
179 struct kvm_vcpu *tvcpu;
180
181 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
182 if (!tvcpu)
183 return H_PARAMETER;
184
185 flags >>= 63 - 18;
186 flags &= 7;
187 if (flags == 0 || flags == 4)
188 return H_PARAMETER;
189 if (flags < 4) {
190 if (vpa & 0x7f)
191 return H_PARAMETER;
192 /* registering new area; convert logical addr to real */
193 pg_index = vpa >> kvm->arch.ram_porder;
194 pg_offset = vpa & (kvm->arch.ram_psize - 1);
195 if (pg_index >= kvm->arch.ram_npages)
196 return H_PARAMETER;
197 if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
198 return H_PARAMETER;
199 ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
200 ra |= pg_offset;
201 va = __va(ra);
202 if (flags <= 1)
203 len = *(unsigned short *)(va + 4);
204 else
205 len = *(unsigned int *)(va + 4);
206 if (pg_offset + len > kvm->arch.ram_psize)
207 return H_PARAMETER;
208 switch (flags) {
209 case 1: /* register VPA */
210 if (len < 640)
211 return H_PARAMETER;
212 tvcpu->arch.vpa = va;
213 init_vpa(vcpu, va);
214 break;
215 case 2: /* register DTL */
216 if (len < 48)
217 return H_PARAMETER;
218 if (!tvcpu->arch.vpa)
219 return H_RESOURCE;
220 len -= len % 48;
221 tvcpu->arch.dtl = va;
222 tvcpu->arch.dtl_end = va + len;
223 break;
224 case 3: /* register SLB shadow buffer */
225 if (len < 8)
226 return H_PARAMETER;
227 if (!tvcpu->arch.vpa)
228 return H_RESOURCE;
229 tvcpu->arch.slb_shadow = va;
230 len = (len - 16) / 16;
231 tvcpu->arch.slb_shadow = va;
232 break;
233 }
234 } else {
235 switch (flags) {
236 case 5: /* unregister VPA */
237 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
238 return H_RESOURCE;
239 tvcpu->arch.vpa = NULL;
240 break;
241 case 6: /* unregister DTL */
242 tvcpu->arch.dtl = NULL;
243 break;
244 case 7: /* unregister SLB shadow buffer */
245 tvcpu->arch.slb_shadow = NULL;
246 break;
247 }
248 }
249 return H_SUCCESS;
250}
251
252int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
253{
254 unsigned long req = kvmppc_get_gpr(vcpu, 3);
255 unsigned long target, ret = H_SUCCESS;
256 struct kvm_vcpu *tvcpu;
257
258 switch (req) {
259 case H_CEDE:
260 vcpu->arch.shregs.msr |= MSR_EE;
261 vcpu->arch.ceded = 1;
262 smp_mb();
263 if (!vcpu->arch.prodded)
264 kvmppc_vcpu_block(vcpu);
265 else
266 vcpu->arch.prodded = 0;
267 smp_mb();
268 vcpu->arch.ceded = 0;
269 break;
270 case H_PROD:
271 target = kvmppc_get_gpr(vcpu, 4);
272 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
273 if (!tvcpu) {
274 ret = H_PARAMETER;
275 break;
276 }
277 tvcpu->arch.prodded = 1;
278 smp_mb();
279 if (vcpu->arch.ceded) {
280 if (waitqueue_active(&vcpu->wq)) {
281 wake_up_interruptible(&vcpu->wq);
282 vcpu->stat.halt_wakeup++;
283 }
284 }
285 break;
286 case H_CONFER:
287 break;
288 case H_REGISTER_VPA:
289 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
290 kvmppc_get_gpr(vcpu, 5),
291 kvmppc_get_gpr(vcpu, 6));
292 break;
293 default:
294 return RESUME_HOST;
295 }
296 kvmppc_set_gpr(vcpu, 3, ret);
297 vcpu->arch.hcall_needed = 0;
298 return RESUME_GUEST;
299}
300
301static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
302 struct task_struct *tsk)
303{
304 int r = RESUME_HOST;
305
306 vcpu->stat.sum_exits++;
307
308 run->exit_reason = KVM_EXIT_UNKNOWN;
309 run->ready_for_interrupt_injection = 1;
310 switch (vcpu->arch.trap) {
311 /* We're good on these - the host merely wanted to get our attention */
312 case BOOK3S_INTERRUPT_HV_DECREMENTER:
313 vcpu->stat.dec_exits++;
314 r = RESUME_GUEST;
315 break;
316 case BOOK3S_INTERRUPT_EXTERNAL:
317 vcpu->stat.ext_intr_exits++;
318 r = RESUME_GUEST;
319 break;
320 case BOOK3S_INTERRUPT_PERFMON:
321 r = RESUME_GUEST;
322 break;
323 case BOOK3S_INTERRUPT_PROGRAM:
324 {
325 ulong flags;
326 /*
327 * Normally program interrupts are delivered directly
328 * to the guest by the hardware, but we can get here
329 * as a result of a hypervisor emulation interrupt
330 * (e40) getting turned into a 700 by BML RTAS.
331 */
332 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
333 kvmppc_core_queue_program(vcpu, flags);
334 r = RESUME_GUEST;
335 break;
336 }
337 case BOOK3S_INTERRUPT_SYSCALL:
338 {
339 /* hcall - punt to userspace */
340 int i;
341
342 if (vcpu->arch.shregs.msr & MSR_PR) {
343 /* sc 1 from userspace - reflect to guest syscall */
344 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
345 r = RESUME_GUEST;
346 break;
347 }
348 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
349 for (i = 0; i < 9; ++i)
350 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
351 run->exit_reason = KVM_EXIT_PAPR_HCALL;
352 vcpu->arch.hcall_needed = 1;
353 r = RESUME_HOST;
354 break;
355 }
356 /*
357 * We get these next two if the guest does a bad real-mode access,
358 * as we have enabled VRMA (virtualized real mode area) mode in the
359 * LPCR. We just generate an appropriate DSI/ISI to the guest.
360 */
361 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
362 vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
363 vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
364 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
365 r = RESUME_GUEST;
366 break;
367 case BOOK3S_INTERRUPT_H_INST_STORAGE:
368 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
369 0x08000000);
370 r = RESUME_GUEST;
371 break;
372 /*
373 * This occurs if the guest executes an illegal instruction.
374 * We just generate a program interrupt to the guest, since
375 * we don't emulate any guest instructions at this stage.
376 */
377 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
378 kvmppc_core_queue_program(vcpu, 0x80000);
379 r = RESUME_GUEST;
380 break;
381 default:
382 kvmppc_dump_regs(vcpu);
383 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
384 vcpu->arch.trap, kvmppc_get_pc(vcpu),
385 vcpu->arch.shregs.msr);
386 r = RESUME_HOST;
387 BUG();
388 break;
389 }
390
391
392 if (!(r & RESUME_HOST)) {
393 /* To avoid clobbering exit_reason, only check for signals if
394 * we aren't already exiting to userspace for some other
395 * reason. */
396 if (signal_pending(tsk)) {
397 vcpu->stat.signal_exits++;
398 run->exit_reason = KVM_EXIT_INTR;
399 r = -EINTR;
400 } else {
401 kvmppc_core_deliver_interrupts(vcpu);
402 }
403 }
404
405 return r;
406}
407
408int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
409 struct kvm_sregs *sregs)
410{
411 int i;
412
413 sregs->pvr = vcpu->arch.pvr;
414
415 memset(sregs, 0, sizeof(struct kvm_sregs));
416 for (i = 0; i < vcpu->arch.slb_max; i++) {
417 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
418 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
419 }
420
421 return 0;
422}
423
424int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
425 struct kvm_sregs *sregs)
426{
427 int i, j;
428
429 kvmppc_set_pvr(vcpu, sregs->pvr);
430
431 j = 0;
432 for (i = 0; i < vcpu->arch.slb_nr; i++) {
433 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
434 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
435 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
436 ++j;
437 }
438 }
439 vcpu->arch.slb_max = j;
440
441 return 0;
442}
443
444int kvmppc_core_check_processor_compat(void)
445{
446 if (cpu_has_feature(CPU_FTR_HVMODE))
447 return 0;
448 return -EIO;
449}
450
451struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
452{
453 struct kvm_vcpu *vcpu;
454 int err = -EINVAL;
455 int core;
456 struct kvmppc_vcore *vcore;
457
458 core = id / threads_per_core;
459 if (core >= KVM_MAX_VCORES)
460 goto out;
461
462 err = -ENOMEM;
463 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
464 if (!vcpu)
465 goto out;
466
467 err = kvm_vcpu_init(vcpu, kvm, id);
468 if (err)
469 goto free_vcpu;
470
471 vcpu->arch.shared = &vcpu->arch.shregs;
472 vcpu->arch.last_cpu = -1;
473 vcpu->arch.mmcr[0] = MMCR0_FC;
474 vcpu->arch.ctrl = CTRL_RUNLATCH;
475 /* default to host PVR, since we can't spoof it */
476 vcpu->arch.pvr = mfspr(SPRN_PVR);
477 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
478
479 kvmppc_mmu_book3s_hv_init(vcpu);
480
481 /*
482 * Some vcpus may start out in stopped state. If we initialize
483 * them to busy-in-host state they will stop other vcpus in the
484 * vcore from running. Instead we initialize them to blocked
485 * state, effectively considering them to be stopped until we
486 * see the first run ioctl for them.
487 */
488 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
489
490 init_waitqueue_head(&vcpu->arch.cpu_run);
491
492 mutex_lock(&kvm->lock);
493 vcore = kvm->arch.vcores[core];
494 if (!vcore) {
495 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
496 if (vcore) {
497 INIT_LIST_HEAD(&vcore->runnable_threads);
498 spin_lock_init(&vcore->lock);
499 }
500 kvm->arch.vcores[core] = vcore;
501 }
502 mutex_unlock(&kvm->lock);
503
504 if (!vcore)
505 goto free_vcpu;
506
507 spin_lock(&vcore->lock);
508 ++vcore->num_threads;
509 ++vcore->n_blocked;
510 spin_unlock(&vcore->lock);
511 vcpu->arch.vcore = vcore;
512
513 return vcpu;
514
515free_vcpu:
516 kfree(vcpu);
517out:
518 return ERR_PTR(err);
519}
520
521void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
522{
523 kvm_vcpu_uninit(vcpu);
524 kfree(vcpu);
525}
526
527static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
528{
529 struct kvmppc_vcore *vc = vcpu->arch.vcore;
530
531 spin_lock(&vc->lock);
532 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
533 ++vc->n_blocked;
534 if (vc->n_runnable > 0 &&
535 vc->n_runnable + vc->n_blocked == vc->num_threads) {
536 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
537 arch.run_list);
538 wake_up(&vcpu->arch.cpu_run);
539 }
540 spin_unlock(&vc->lock);
541}
542
543static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
544{
545 struct kvmppc_vcore *vc = vcpu->arch.vcore;
546
547 spin_lock(&vc->lock);
548 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
549 --vc->n_blocked;
550 spin_unlock(&vc->lock);
551}
552
553extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
554extern void xics_wake_cpu(int cpu);
555
556static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
557 struct kvm_vcpu *vcpu)
558{
559 struct kvm_vcpu *v;
560
561 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
562 return;
563 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
564 --vc->n_runnable;
565 /* decrement the physical thread id of each following vcpu */
566 v = vcpu;
567 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
568 --v->arch.ptid;
569 list_del(&vcpu->arch.run_list);
570}
571
572static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
573{
574 int cpu;
575 struct paca_struct *tpaca;
576 struct kvmppc_vcore *vc = vcpu->arch.vcore;
577
578 cpu = vc->pcpu + vcpu->arch.ptid;
579 tpaca = &paca[cpu];
580 tpaca->kvm_hstate.kvm_vcpu = vcpu;
581 tpaca->kvm_hstate.kvm_vcore = vc;
582 smp_wmb();
583#ifdef CONFIG_PPC_ICP_NATIVE
584 if (vcpu->arch.ptid) {
585 tpaca->cpu_start = 0x80;
586 tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
587 wmb();
588 xics_wake_cpu(cpu);
589 ++vc->n_woken;
590 }
591#endif
592}
593
594static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
595{
596 int i;
597
598 HMT_low();
599 i = 0;
600 while (vc->nap_count < vc->n_woken) {
601 if (++i >= 1000000) {
602 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
603 vc->nap_count, vc->n_woken);
604 break;
605 }
606 cpu_relax();
607 }
608 HMT_medium();
609}
610
611/*
612 * Check that we are on thread 0 and that any other threads in
613 * this core are off-line.
614 */
615static int on_primary_thread(void)
616{
617 int cpu = smp_processor_id();
618 int thr = cpu_thread_in_core(cpu);
619
620 if (thr)
621 return 0;
622 while (++thr < threads_per_core)
623 if (cpu_online(cpu + thr))
624 return 0;
625 return 1;
626}
627
628/*
629 * Run a set of guest threads on a physical core.
630 * Called with vc->lock held.
631 */
632static int kvmppc_run_core(struct kvmppc_vcore *vc)
633{
634 struct kvm_vcpu *vcpu, *vnext;
635 long ret;
636 u64 now;
637
638 /* don't start if any threads have a signal pending */
639 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
640 if (signal_pending(vcpu->arch.run_task))
641 return 0;
642
643 /*
644 * Make sure we are running on thread 0, and that
645 * secondary threads are offline.
646 * XXX we should also block attempts to bring any
647 * secondary threads online.
648 */
649 if (threads_per_core > 1 && !on_primary_thread()) {
650 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
651 vcpu->arch.ret = -EBUSY;
652 goto out;
653 }
654
655 vc->n_woken = 0;
656 vc->nap_count = 0;
657 vc->entry_exit_count = 0;
658 vc->vcore_running = 1;
659 vc->in_guest = 0;
660 vc->pcpu = smp_processor_id();
661 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
662 kvmppc_start_thread(vcpu);
663 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
664 arch.run_list);
665
666 spin_unlock(&vc->lock);
667
668 preempt_disable();
669 kvm_guest_enter();
670 __kvmppc_vcore_entry(NULL, vcpu);
671
672 /* wait for secondary threads to finish writing their state to memory */
673 spin_lock(&vc->lock);
674 if (vc->nap_count < vc->n_woken)
675 kvmppc_wait_for_nap(vc);
676 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
677 vc->vcore_running = 2;
678 spin_unlock(&vc->lock);
679
680 /* make sure updates to secondary vcpu structs are visible now */
681 smp_mb();
682 kvm_guest_exit();
683
684 preempt_enable();
685 kvm_resched(vcpu);
686
687 now = get_tb();
688 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
689 /* cancel pending dec exception if dec is positive */
690 if (now < vcpu->arch.dec_expires &&
691 kvmppc_core_pending_dec(vcpu))
692 kvmppc_core_dequeue_dec(vcpu);
693 if (!vcpu->arch.trap) {
694 if (signal_pending(vcpu->arch.run_task)) {
695 vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
696 vcpu->arch.ret = -EINTR;
697 }
698 continue; /* didn't get to run */
699 }
700 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
701 vcpu->arch.run_task);
702 vcpu->arch.ret = ret;
703 vcpu->arch.trap = 0;
704 }
705
706 spin_lock(&vc->lock);
707 out:
708 vc->vcore_running = 0;
709 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
710 arch.run_list) {
711 if (vcpu->arch.ret != RESUME_GUEST) {
712 kvmppc_remove_runnable(vc, vcpu);
713 wake_up(&vcpu->arch.cpu_run);
714 }
715 }
716
717 return 1;
718}
719
720static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
721{
722 int ptid;
723 int wait_state;
724 struct kvmppc_vcore *vc;
725 DEFINE_WAIT(wait);
726
727 /* No need to go into the guest when all we do is going out */
728 if (signal_pending(current)) {
729 kvm_run->exit_reason = KVM_EXIT_INTR;
730 return -EINTR;
731 }
732
733 /* On PPC970, check that we have an RMA region */
734 if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
735 return -EPERM;
736
737 kvm_run->exit_reason = 0;
738 vcpu->arch.ret = RESUME_GUEST;
739 vcpu->arch.trap = 0;
740
741 flush_fp_to_thread(current);
742 flush_altivec_to_thread(current);
743 flush_vsx_to_thread(current);
744
745 /*
746 * Synchronize with other threads in this virtual core
747 */
748 vc = vcpu->arch.vcore;
749 spin_lock(&vc->lock);
750 /* This happens the first time this is called for a vcpu */
751 if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
752 --vc->n_blocked;
753 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
754 ptid = vc->n_runnable;
755 vcpu->arch.run_task = current;
756 vcpu->arch.kvm_run = kvm_run;
757 vcpu->arch.ptid = ptid;
758 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
759 ++vc->n_runnable;
760
761 wait_state = TASK_INTERRUPTIBLE;
762 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
763 if (signal_pending(current)) {
764 if (!vc->vcore_running) {
765 kvm_run->exit_reason = KVM_EXIT_INTR;
766 vcpu->arch.ret = -EINTR;
767 break;
768 }
769 /* have to wait for vcore to stop executing guest */
770 wait_state = TASK_UNINTERRUPTIBLE;
771 smp_send_reschedule(vc->pcpu);
772 }
773
774 if (!vc->vcore_running &&
775 vc->n_runnable + vc->n_blocked == vc->num_threads) {
776 /* we can run now */
777 if (kvmppc_run_core(vc))
778 continue;
779 }
780
781 if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
782 kvmppc_start_thread(vcpu);
783
784 /* wait for other threads to come in, or wait for vcore */
785 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
786 spin_unlock(&vc->lock);
787 schedule();
788 finish_wait(&vcpu->arch.cpu_run, &wait);
789 spin_lock(&vc->lock);
790 }
791
792 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
793 kvmppc_remove_runnable(vc, vcpu);
794 spin_unlock(&vc->lock);
795
796 return vcpu->arch.ret;
797}
798
799int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
800{
801 int r;
802
803 do {
804 r = kvmppc_run_vcpu(run, vcpu);
805
806 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
807 !(vcpu->arch.shregs.msr & MSR_PR)) {
808 r = kvmppc_pseries_do_hcall(vcpu);
809 kvmppc_core_deliver_interrupts(vcpu);
810 }
811 } while (r == RESUME_GUEST);
812 return r;
813}
814
815static long kvmppc_stt_npages(unsigned long window_size)
816{
817 return ALIGN((window_size >> SPAPR_TCE_SHIFT)
818 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
819}
820
821static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
822{
823 struct kvm *kvm = stt->kvm;
824 int i;
825
826 mutex_lock(&kvm->lock);
827 list_del(&stt->list);
828 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
829 __free_page(stt->pages[i]);
830 kfree(stt);
831 mutex_unlock(&kvm->lock);
832
833 kvm_put_kvm(kvm);
834}
835
836static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
837{
838 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
839 struct page *page;
840
841 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
842 return VM_FAULT_SIGBUS;
843
844 page = stt->pages[vmf->pgoff];
845 get_page(page);
846 vmf->page = page;
847 return 0;
848}
849
850static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
851 .fault = kvm_spapr_tce_fault,
852};
853
854static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
855{
856 vma->vm_ops = &kvm_spapr_tce_vm_ops;
857 return 0;
858}
859
860static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
861{
862 struct kvmppc_spapr_tce_table *stt = filp->private_data;
863
864 release_spapr_tce_table(stt);
865 return 0;
866}
867
868static struct file_operations kvm_spapr_tce_fops = {
869 .mmap = kvm_spapr_tce_mmap,
870 .release = kvm_spapr_tce_release,
871};
872
873long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
874 struct kvm_create_spapr_tce *args)
875{
876 struct kvmppc_spapr_tce_table *stt = NULL;
877 long npages;
878 int ret = -ENOMEM;
879 int i;
880
881 /* Check this LIOBN hasn't been previously allocated */
882 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
883 if (stt->liobn == args->liobn)
884 return -EBUSY;
885 }
886
887 npages = kvmppc_stt_npages(args->window_size);
888
889 stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
890 GFP_KERNEL);
891 if (!stt)
892 goto fail;
893
894 stt->liobn = args->liobn;
895 stt->window_size = args->window_size;
896 stt->kvm = kvm;
897
898 for (i = 0; i < npages; i++) {
899 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
900 if (!stt->pages[i])
901 goto fail;
902 }
903
904 kvm_get_kvm(kvm);
905
906 mutex_lock(&kvm->lock);
907 list_add(&stt->list, &kvm->arch.spapr_tce_tables);
908
909 mutex_unlock(&kvm->lock);
910
911 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
912 stt, O_RDWR);
913
914fail:
915 if (stt) {
916 for (i = 0; i < npages; i++)
917 if (stt->pages[i])
918 __free_page(stt->pages[i]);
919
920 kfree(stt);
921 }
922 return ret;
923}
924
925/* Work out RMLS (real mode limit selector) field value for a given RMA size.
926 Assumes POWER7 or PPC970. */
927static inline int lpcr_rmls(unsigned long rma_size)
928{
929 switch (rma_size) {
930 case 32ul << 20: /* 32 MB */
931 if (cpu_has_feature(CPU_FTR_ARCH_206))
932 return 8; /* only supported on POWER7 */
933 return -1;
934 case 64ul << 20: /* 64 MB */
935 return 3;
936 case 128ul << 20: /* 128 MB */
937 return 7;
938 case 256ul << 20: /* 256 MB */
939 return 4;
940 case 1ul << 30: /* 1 GB */
941 return 2;
942 case 16ul << 30: /* 16 GB */
943 return 1;
944 case 256ul << 30: /* 256 GB */
945 return 0;
946 default:
947 return -1;
948 }
949}
950
951static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
952{
953 struct kvmppc_rma_info *ri = vma->vm_file->private_data;
954 struct page *page;
955
956 if (vmf->pgoff >= ri->npages)
957 return VM_FAULT_SIGBUS;
958
959 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
960 get_page(page);
961 vmf->page = page;
962 return 0;
963}
964
965static const struct vm_operations_struct kvm_rma_vm_ops = {
966 .fault = kvm_rma_fault,
967};
968
969static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
970{
971 vma->vm_flags |= VM_RESERVED;
972 vma->vm_ops = &kvm_rma_vm_ops;
973 return 0;
974}
975
976static int kvm_rma_release(struct inode *inode, struct file *filp)
977{
978 struct kvmppc_rma_info *ri = filp->private_data;
979
980 kvm_release_rma(ri);
981 return 0;
982}
983
984static struct file_operations kvm_rma_fops = {
985 .mmap = kvm_rma_mmap,
986 .release = kvm_rma_release,
987};
988
989long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
990{
991 struct kvmppc_rma_info *ri;
992 long fd;
993
994 ri = kvm_alloc_rma();
995 if (!ri)
996 return -ENOMEM;
997
998 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
999 if (fd < 0)
1000 kvm_release_rma(ri);
1001
1002 ret->rma_size = ri->npages << PAGE_SHIFT;
1003 return fd;
1004}
1005
1006static struct page *hva_to_page(unsigned long addr)
1007{
1008 struct page *page[1];
1009 int npages;
1010
1011 might_sleep();
1012
1013 npages = get_user_pages_fast(addr, 1, 1, page);
1014
1015 if (unlikely(npages != 1))
1016 return 0;
1017
1018 return page[0];
1019}
1020
1021int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1022 struct kvm_userspace_memory_region *mem)
1023{
1024 unsigned long psize, porder;
1025 unsigned long i, npages, totalpages;
1026 unsigned long pg_ix;
1027 struct kvmppc_pginfo *pginfo;
1028 unsigned long hva;
1029 struct kvmppc_rma_info *ri = NULL;
1030 struct page *page;
1031
1032 /* For now, only allow 16MB pages */
1033 porder = LARGE_PAGE_ORDER;
1034 psize = 1ul << porder;
1035 if ((mem->memory_size & (psize - 1)) ||
1036 (mem->guest_phys_addr & (psize - 1))) {
1037 pr_err("bad memory_size=%llx @ %llx\n",
1038 mem->memory_size, mem->guest_phys_addr);
1039 return -EINVAL;
1040 }
1041
1042 npages = mem->memory_size >> porder;
1043 totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
1044
1045 /* More memory than we have space to track? */
1046 if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
1047 return -EINVAL;
1048
1049 /* Do we already have an RMA registered? */
1050 if (mem->guest_phys_addr == 0 && kvm->arch.rma)
1051 return -EINVAL;
1052
1053 if (totalpages > kvm->arch.ram_npages)
1054 kvm->arch.ram_npages = totalpages;
1055
1056 /* Is this one of our preallocated RMAs? */
1057 if (mem->guest_phys_addr == 0) {
1058 struct vm_area_struct *vma;
1059
1060 down_read(&current->mm->mmap_sem);
1061 vma = find_vma(current->mm, mem->userspace_addr);
1062 if (vma && vma->vm_file &&
1063 vma->vm_file->f_op == &kvm_rma_fops &&
1064 mem->userspace_addr == vma->vm_start)
1065 ri = vma->vm_file->private_data;
1066 up_read(&current->mm->mmap_sem);
1067 if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) {
1068 pr_err("CPU requires an RMO\n");
1069 return -EINVAL;
1070 }
1071 }
1072
1073 if (ri) {
1074 unsigned long rma_size;
1075 unsigned long lpcr;
1076 long rmls;
1077
1078 rma_size = ri->npages << PAGE_SHIFT;
1079 if (rma_size > mem->memory_size)
1080 rma_size = mem->memory_size;
1081 rmls = lpcr_rmls(rma_size);
1082 if (rmls < 0) {
1083 pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
1084 return -EINVAL;
1085 }
1086 atomic_inc(&ri->use_count);
1087 kvm->arch.rma = ri;
1088 kvm->arch.n_rma_pages = rma_size >> porder;
1089
1090 /* Update LPCR and RMOR */
1091 lpcr = kvm->arch.lpcr;
1092 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1093 /* PPC970; insert RMLS value (split field) in HID4 */
1094 lpcr &= ~((1ul << HID4_RMLS0_SH) |
1095 (3ul << HID4_RMLS2_SH));
1096 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
1097 ((rmls & 3) << HID4_RMLS2_SH);
1098 /* RMOR is also in HID4 */
1099 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
1100 << HID4_RMOR_SH;
1101 } else {
1102 /* POWER7 */
1103 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
1104 lpcr |= rmls << LPCR_RMLS_SH;
1105 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1106 }
1107 kvm->arch.lpcr = lpcr;
1108 pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
1109 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1110 }
1111
1112 pg_ix = mem->guest_phys_addr >> porder;
1113 pginfo = kvm->arch.ram_pginfo + pg_ix;
1114 for (i = 0; i < npages; ++i, ++pg_ix) {
1115 if (ri && pg_ix < kvm->arch.n_rma_pages) {
1116 pginfo[i].pfn = ri->base_pfn +
1117 (pg_ix << (porder - PAGE_SHIFT));
1118 continue;
1119 }
1120 hva = mem->userspace_addr + (i << porder);
1121 page = hva_to_page(hva);
1122 if (!page) {
1123 pr_err("oops, no pfn for hva %lx\n", hva);
1124 goto err;
1125 }
1126 /* Check it's a 16MB page */
1127 if (!PageHead(page) ||
1128 compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
1129 pr_err("page at %lx isn't 16MB (o=%d)\n",
1130 hva, compound_order(page));
1131 goto err;
1132 }
1133 pginfo[i].pfn = page_to_pfn(page);
1134 }
1135
1136 return 0;
1137
1138 err:
1139 return -EINVAL;
1140}
1141
1142void kvmppc_core_commit_memory_region(struct kvm *kvm,
1143 struct kvm_userspace_memory_region *mem)
1144{
1145 if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
1146 !kvm->arch.rma)
1147 kvmppc_map_vrma(kvm, mem);
1148}
1149
1150int kvmppc_core_init_vm(struct kvm *kvm)
1151{
1152 long r;
1153 unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
1154 long err = -ENOMEM;
1155 unsigned long lpcr;
1156
1157 /* Allocate hashed page table */
1158 r = kvmppc_alloc_hpt(kvm);
1159 if (r)
1160 return r;
1161
1162 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1163
1164 kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
1165 GFP_KERNEL);
1166 if (!kvm->arch.ram_pginfo) {
1167 pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
1168 npages * sizeof(struct kvmppc_pginfo));
1169 goto out_free;
1170 }
1171
1172 kvm->arch.ram_npages = 0;
1173 kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
1174 kvm->arch.ram_porder = LARGE_PAGE_ORDER;
1175 kvm->arch.rma = NULL;
1176 kvm->arch.n_rma_pages = 0;
1177
1178 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
1179
1180 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1181 /* PPC970; HID4 is effectively the LPCR */
1182 unsigned long lpid = kvm->arch.lpid;
1183 kvm->arch.host_lpid = 0;
1184 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
1185 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
1186 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
1187 ((lpid & 0xf) << HID4_LPID5_SH);
1188 } else {
1189 /* POWER7; init LPCR for virtual RMA mode */
1190 kvm->arch.host_lpid = mfspr(SPRN_LPID);
1191 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
1192 lpcr &= LPCR_PECE | LPCR_LPES;
1193 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
1194 LPCR_VPM0 | LPCR_VRMA_L;
1195 }
1196 kvm->arch.lpcr = lpcr;
1197
1198 return 0;
1199
1200 out_free:
1201 kvmppc_free_hpt(kvm);
1202 return err;
1203}
1204
1205void kvmppc_core_destroy_vm(struct kvm *kvm)
1206{
1207 struct kvmppc_pginfo *pginfo;
1208 unsigned long i;
1209
1210 if (kvm->arch.ram_pginfo) {
1211 pginfo = kvm->arch.ram_pginfo;
1212 kvm->arch.ram_pginfo = NULL;
1213 for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
1214 if (pginfo[i].pfn)
1215 put_page(pfn_to_page(pginfo[i].pfn));
1216 kfree(pginfo);
1217 }
1218 if (kvm->arch.rma) {
1219 kvm_release_rma(kvm->arch.rma);
1220 kvm->arch.rma = NULL;
1221 }
1222
1223 kvmppc_free_hpt(kvm);
1224 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
1225}
1226
1227/* These are stubs for now */
1228void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
1229{
1230}
1231
1232/* We don't need to emulate any privileged instructions or dcbz */
1233int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
1234 unsigned int inst, int *advance)
1235{
1236 return EMULATE_FAIL;
1237}
1238
1239int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
1240{
1241 return EMULATE_FAIL;
1242}
1243
1244int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
1245{
1246 return EMULATE_FAIL;
1247}
1248
1249static int kvmppc_book3s_hv_init(void)
1250{
1251 int r;
1252
1253 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1254
1255 if (r)
1256 return r;
1257
1258 r = kvmppc_mmu_hv_init();
1259
1260 return r;
1261}
1262
1263static void kvmppc_book3s_hv_exit(void)
1264{
1265 kvm_exit();
1266}
1267
1268module_init(kvmppc_book3s_hv_init);
1269module_exit(kvmppc_book3s_hv_exit);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
new file mode 100644
index 000000000000..d43120355eec
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -0,0 +1,155 @@
1/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kvm_host.h>
10#include <linux/preempt.h>
11#include <linux/sched.h>
12#include <linux/spinlock.h>
13#include <linux/bootmem.h>
14#include <linux/init.h>
15
16#include <asm/cputable.h>
17#include <asm/kvm_ppc.h>
18#include <asm/kvm_book3s.h>
19
20/*
21 * This maintains a list of RMAs (real mode areas) for KVM guests to use.
22 * Each RMA has to be physically contiguous and of a size that the
23 * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
24 * and other larger sizes. Since we are unlikely to be allocate that
25 * much physically contiguous memory after the system is up and running,
26 * we preallocate a set of RMAs in early boot for KVM to use.
27 */
28static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
29static unsigned long kvm_rma_count;
30
31static int __init early_parse_rma_size(char *p)
32{
33 if (!p)
34 return 1;
35
36 kvm_rma_size = memparse(p, &p);
37
38 return 0;
39}
40early_param("kvm_rma_size", early_parse_rma_size);
41
42static int __init early_parse_rma_count(char *p)
43{
44 if (!p)
45 return 1;
46
47 kvm_rma_count = simple_strtoul(p, NULL, 0);
48
49 return 0;
50}
51early_param("kvm_rma_count", early_parse_rma_count);
52
53static struct kvmppc_rma_info *rma_info;
54static LIST_HEAD(free_rmas);
55static DEFINE_SPINLOCK(rma_lock);
56
57/* Work out RMLS (real mode limit selector) field value for a given RMA size.
58 Assumes POWER7 or PPC970. */
59static inline int lpcr_rmls(unsigned long rma_size)
60{
61 switch (rma_size) {
62 case 32ul << 20: /* 32 MB */
63 if (cpu_has_feature(CPU_FTR_ARCH_206))
64 return 8; /* only supported on POWER7 */
65 return -1;
66 case 64ul << 20: /* 64 MB */
67 return 3;
68 case 128ul << 20: /* 128 MB */
69 return 7;
70 case 256ul << 20: /* 256 MB */
71 return 4;
72 case 1ul << 30: /* 1 GB */
73 return 2;
74 case 16ul << 30: /* 16 GB */
75 return 1;
76 case 256ul << 30: /* 256 GB */
77 return 0;
78 default:
79 return -1;
80 }
81}
82
83/*
84 * Called at boot time while the bootmem allocator is active,
85 * to allocate contiguous physical memory for the real memory
86 * areas for guests.
87 */
88void kvm_rma_init(void)
89{
90 unsigned long i;
91 unsigned long j, npages;
92 void *rma;
93 struct page *pg;
94
95 /* Only do this on PPC970 in HV mode */
96 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
97 !cpu_has_feature(CPU_FTR_ARCH_201))
98 return;
99
100 if (!kvm_rma_size || !kvm_rma_count)
101 return;
102
103 /* Check that the requested size is one supported in hardware */
104 if (lpcr_rmls(kvm_rma_size) < 0) {
105 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
106 return;
107 }
108
109 npages = kvm_rma_size >> PAGE_SHIFT;
110 rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info));
111 for (i = 0; i < kvm_rma_count; ++i) {
112 rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size);
113 pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma,
114 kvm_rma_size >> 20);
115 rma_info[i].base_virt = rma;
116 rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT;
117 rma_info[i].npages = npages;
118 list_add_tail(&rma_info[i].list, &free_rmas);
119 atomic_set(&rma_info[i].use_count, 0);
120
121 pg = pfn_to_page(rma_info[i].base_pfn);
122 for (j = 0; j < npages; ++j) {
123 atomic_inc(&pg->_count);
124 ++pg;
125 }
126 }
127}
128
129struct kvmppc_rma_info *kvm_alloc_rma(void)
130{
131 struct kvmppc_rma_info *ri;
132
133 ri = NULL;
134 spin_lock(&rma_lock);
135 if (!list_empty(&free_rmas)) {
136 ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list);
137 list_del(&ri->list);
138 atomic_inc(&ri->use_count);
139 }
140 spin_unlock(&rma_lock);
141 return ri;
142}
143EXPORT_SYMBOL_GPL(kvm_alloc_rma);
144
145void kvm_release_rma(struct kvmppc_rma_info *ri)
146{
147 if (atomic_dec_and_test(&ri->use_count)) {
148 spin_lock(&rma_lock);
149 list_add_tail(&ri->list, &free_rmas);
150 spin_unlock(&rma_lock);
151
152 }
153}
154EXPORT_SYMBOL_GPL(kvm_release_rma);
155
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
new file mode 100644
index 000000000000..3f7b674dd4bf
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -0,0 +1,166 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 *
17 * Derived from book3s_interrupts.S, which is:
18 * Copyright SUSE Linux Products GmbH 2009
19 *
20 * Authors: Alexander Graf <agraf@suse.de>
21 */
22
23#include <asm/ppc_asm.h>
24#include <asm/kvm_asm.h>
25#include <asm/reg.h>
26#include <asm/page.h>
27#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h>
29#include <asm/ppc-opcode.h>
30
31/*****************************************************************************
32 * *
33 * Guest entry / exit code that is in kernel module memory (vmalloc) *
34 * *
35 ****************************************************************************/
36
37/* Registers:
38 * r4: vcpu pointer
39 */
40_GLOBAL(__kvmppc_vcore_entry)
41
42 /* Write correct stack frame */
43 mflr r0
44 std r0,PPC_LR_STKOFF(r1)
45
46 /* Save host state to the stack */
47 stdu r1, -SWITCH_FRAME_SIZE(r1)
48
49 /* Save non-volatile registers (r14 - r31) */
50 SAVE_NVGPRS(r1)
51
52 /* Save host DSCR */
53BEGIN_FTR_SECTION
54 mfspr r3, SPRN_DSCR
55 std r3, HSTATE_DSCR(r13)
56END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
57
58 /* Save host DABR */
59 mfspr r3, SPRN_DABR
60 std r3, HSTATE_DABR(r13)
61
62 /* Hard-disable interrupts */
63 mfmsr r10
64 std r10, HSTATE_HOST_MSR(r13)
65 rldicl r10,r10,48,1
66 rotldi r10,r10,16
67 mtmsrd r10,1
68
69 /* Save host PMU registers and load guest PMU registers */
70 /* R4 is live here (vcpu pointer) but not r3 or r5 */
71 li r3, 1
72 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
73 mfspr r7, SPRN_MMCR0 /* save MMCR0 */
74 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
75 isync
76 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
77 lbz r5, LPPACA_PMCINUSE(r3)
78 cmpwi r5, 0
79 beq 31f /* skip if not */
80 mfspr r5, SPRN_MMCR1
81 mfspr r6, SPRN_MMCRA
82 std r7, HSTATE_MMCR(r13)
83 std r5, HSTATE_MMCR + 8(r13)
84 std r6, HSTATE_MMCR + 16(r13)
85 mfspr r3, SPRN_PMC1
86 mfspr r5, SPRN_PMC2
87 mfspr r6, SPRN_PMC3
88 mfspr r7, SPRN_PMC4
89 mfspr r8, SPRN_PMC5
90 mfspr r9, SPRN_PMC6
91BEGIN_FTR_SECTION
92 mfspr r10, SPRN_PMC7
93 mfspr r11, SPRN_PMC8
94END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
95 stw r3, HSTATE_PMC(r13)
96 stw r5, HSTATE_PMC + 4(r13)
97 stw r6, HSTATE_PMC + 8(r13)
98 stw r7, HSTATE_PMC + 12(r13)
99 stw r8, HSTATE_PMC + 16(r13)
100 stw r9, HSTATE_PMC + 20(r13)
101BEGIN_FTR_SECTION
102 stw r10, HSTATE_PMC + 24(r13)
103 stw r11, HSTATE_PMC + 28(r13)
104END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
10531:
106
107 /*
108 * Put whatever is in the decrementer into the
109 * hypervisor decrementer.
110 */
111 mfspr r8,SPRN_DEC
112 mftb r7
113 mtspr SPRN_HDEC,r8
114 extsw r8,r8
115 add r8,r8,r7
116 std r8,HSTATE_DECEXP(r13)
117
118 /*
119 * On PPC970, if the guest vcpu has an external interrupt pending,
120 * send ourselves an IPI so as to interrupt the guest once it
121 * enables interrupts. (It must have interrupts disabled,
122 * otherwise we would already have delivered the interrupt.)
123 */
124BEGIN_FTR_SECTION
125 ld r0, VCPU_PENDING_EXC(r4)
126 li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
127 oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
128 and. r0, r0, r7
129 beq 32f
130 mr r31, r4
131 lhz r3, PACAPACAINDEX(r13)
132 bl smp_send_reschedule
133 nop
134 mr r4, r31
13532:
136END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
137
138 /* Jump to partition switch code */
139 bl .kvmppc_hv_entry_trampoline
140 nop
141
142/*
143 * We return here in virtual mode after the guest exits
144 * with something that we can't handle in real mode.
145 * Interrupts are enabled again at this point.
146 */
147
148.global kvmppc_handler_highmem
149kvmppc_handler_highmem:
150
151 /*
152 * Register usage at this point:
153 *
154 * R1 = host R1
155 * R2 = host R2
156 * R12 = exit handler id
157 * R13 = PACA
158 */
159
160 /* Restore non-volatile host registers (r14 - r31) */
161 REST_NVGPRS(r1)
162
163 addi r1, r1, SWITCH_FRAME_SIZE
164 ld r0, PPC_LR_STKOFF(r1)
165 mtlr r0
166 blr
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
new file mode 100644
index 000000000000..fcfe6b055558
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -0,0 +1,370 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
7 */
8
9#include <linux/types.h>
10#include <linux/string.h>
11#include <linux/kvm.h>
12#include <linux/kvm_host.h>
13#include <linux/hugetlb.h>
14
15#include <asm/tlbflush.h>
16#include <asm/kvm_ppc.h>
17#include <asm/kvm_book3s.h>
18#include <asm/mmu-hash64.h>
19#include <asm/hvcall.h>
20#include <asm/synch.h>
21#include <asm/ppc-opcode.h>
22
23/* For now use fixed-size 16MB page table */
24#define HPT_ORDER 24
25#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
26#define HPT_HASH_MASK (HPT_NPTEG - 1)
27
28#define HPTE_V_HVLOCK 0x40UL
29
30static inline long lock_hpte(unsigned long *hpte, unsigned long bits)
31{
32 unsigned long tmp, old;
33
34 asm volatile(" ldarx %0,0,%2\n"
35 " and. %1,%0,%3\n"
36 " bne 2f\n"
37 " ori %0,%0,%4\n"
38 " stdcx. %0,0,%2\n"
39 " beq+ 2f\n"
40 " li %1,%3\n"
41 "2: isync"
42 : "=&r" (tmp), "=&r" (old)
43 : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
44 : "cc", "memory");
45 return old == 0;
46}
47
48long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
49 long pte_index, unsigned long pteh, unsigned long ptel)
50{
51 unsigned long porder;
52 struct kvm *kvm = vcpu->kvm;
53 unsigned long i, lpn, pa;
54 unsigned long *hpte;
55
56 /* only handle 4k, 64k and 16M pages for now */
57 porder = 12;
58 if (pteh & HPTE_V_LARGE) {
59 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
60 (ptel & 0xf000) == 0x1000) {
61 /* 64k page */
62 porder = 16;
63 } else if ((ptel & 0xff000) == 0) {
64 /* 16M page */
65 porder = 24;
66 /* lowest AVA bit must be 0 for 16M pages */
67 if (pteh & 0x80)
68 return H_PARAMETER;
69 } else
70 return H_PARAMETER;
71 }
72 lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder;
73 if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder)
74 return H_PARAMETER;
75 pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
76 if (!pa)
77 return H_PARAMETER;
78 /* Check WIMG */
79 if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
80 (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
81 return H_PARAMETER;
82 pteh &= ~0x60UL;
83 ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize);
84 ptel |= pa;
85 if (pte_index >= (HPT_NPTEG << 3))
86 return H_PARAMETER;
87 if (likely((flags & H_EXACT) == 0)) {
88 pte_index &= ~7UL;
89 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
90 for (i = 0; ; ++i) {
91 if (i == 8)
92 return H_PTEG_FULL;
93 if ((*hpte & HPTE_V_VALID) == 0 &&
94 lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
95 break;
96 hpte += 2;
97 }
98 } else {
99 i = 0;
100 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
101 if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
102 return H_PTEG_FULL;
103 }
104 hpte[1] = ptel;
105 eieio();
106 hpte[0] = pteh;
107 asm volatile("ptesync" : : : "memory");
108 atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt);
109 vcpu->arch.gpr[4] = pte_index + i;
110 return H_SUCCESS;
111}
112
113static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
114 unsigned long pte_index)
115{
116 unsigned long rb, va_low;
117
118 rb = (v & ~0x7fUL) << 16; /* AVA field */
119 va_low = pte_index >> 3;
120 if (v & HPTE_V_SECONDARY)
121 va_low = ~va_low;
122 /* xor vsid from AVA */
123 if (!(v & HPTE_V_1TB_SEG))
124 va_low ^= v >> 12;
125 else
126 va_low ^= v >> 24;
127 va_low &= 0x7ff;
128 if (v & HPTE_V_LARGE) {
129 rb |= 1; /* L field */
130 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
131 (r & 0xff000)) {
132 /* non-16MB large page, must be 64k */
133 /* (masks depend on page size) */
134 rb |= 0x1000; /* page encoding in LP field */
135 rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
136 rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
137 }
138 } else {
139 /* 4kB page */
140 rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
141 }
142 rb |= (v >> 54) & 0x300; /* B field */
143 return rb;
144}
145
146#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
147
148static inline int try_lock_tlbie(unsigned int *lock)
149{
150 unsigned int tmp, old;
151 unsigned int token = LOCK_TOKEN;
152
153 asm volatile("1:lwarx %1,0,%2\n"
154 " cmpwi cr0,%1,0\n"
155 " bne 2f\n"
156 " stwcx. %3,0,%2\n"
157 " bne- 1b\n"
158 " isync\n"
159 "2:"
160 : "=&r" (tmp), "=&r" (old)
161 : "r" (lock), "r" (token)
162 : "cc", "memory");
163 return old == 0;
164}
165
166long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
167 unsigned long pte_index, unsigned long avpn,
168 unsigned long va)
169{
170 struct kvm *kvm = vcpu->kvm;
171 unsigned long *hpte;
172 unsigned long v, r, rb;
173
174 if (pte_index >= (HPT_NPTEG << 3))
175 return H_PARAMETER;
176 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
177 while (!lock_hpte(hpte, HPTE_V_HVLOCK))
178 cpu_relax();
179 if ((hpte[0] & HPTE_V_VALID) == 0 ||
180 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
181 ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
182 hpte[0] &= ~HPTE_V_HVLOCK;
183 return H_NOT_FOUND;
184 }
185 if (atomic_read(&kvm->online_vcpus) == 1)
186 flags |= H_LOCAL;
187 vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
188 vcpu->arch.gpr[5] = r = hpte[1];
189 rb = compute_tlbie_rb(v, r, pte_index);
190 hpte[0] = 0;
191 if (!(flags & H_LOCAL)) {
192 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
193 cpu_relax();
194 asm volatile("ptesync" : : : "memory");
195 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
196 : : "r" (rb), "r" (kvm->arch.lpid));
197 asm volatile("ptesync" : : : "memory");
198 kvm->arch.tlbie_lock = 0;
199 } else {
200 asm volatile("ptesync" : : : "memory");
201 asm volatile("tlbiel %0" : : "r" (rb));
202 asm volatile("ptesync" : : : "memory");
203 }
204 return H_SUCCESS;
205}
206
207long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
208{
209 struct kvm *kvm = vcpu->kvm;
210 unsigned long *args = &vcpu->arch.gpr[4];
211 unsigned long *hp, tlbrb[4];
212 long int i, found;
213 long int n_inval = 0;
214 unsigned long flags, req, pte_index;
215 long int local = 0;
216 long int ret = H_SUCCESS;
217
218 if (atomic_read(&kvm->online_vcpus) == 1)
219 local = 1;
220 for (i = 0; i < 4; ++i) {
221 pte_index = args[i * 2];
222 flags = pte_index >> 56;
223 pte_index &= ((1ul << 56) - 1);
224 req = flags >> 6;
225 flags &= 3;
226 if (req == 3)
227 break;
228 if (req != 1 || flags == 3 ||
229 pte_index >= (HPT_NPTEG << 3)) {
230 /* parameter error */
231 args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
232 ret = H_PARAMETER;
233 break;
234 }
235 hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
236 while (!lock_hpte(hp, HPTE_V_HVLOCK))
237 cpu_relax();
238 found = 0;
239 if (hp[0] & HPTE_V_VALID) {
240 switch (flags & 3) {
241 case 0: /* absolute */
242 found = 1;
243 break;
244 case 1: /* andcond */
245 if (!(hp[0] & args[i * 2 + 1]))
246 found = 1;
247 break;
248 case 2: /* AVPN */
249 if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
250 found = 1;
251 break;
252 }
253 }
254 if (!found) {
255 hp[0] &= ~HPTE_V_HVLOCK;
256 args[i * 2] = ((0x90 | flags) << 56) + pte_index;
257 continue;
258 }
259 /* insert R and C bits from PTE */
260 flags |= (hp[1] >> 5) & 0x0c;
261 args[i * 2] = ((0x80 | flags) << 56) + pte_index;
262 tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
263 hp[0] = 0;
264 }
265 if (n_inval == 0)
266 return ret;
267
268 if (!local) {
269 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
270 cpu_relax();
271 asm volatile("ptesync" : : : "memory");
272 for (i = 0; i < n_inval; ++i)
273 asm volatile(PPC_TLBIE(%1,%0)
274 : : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
275 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
276 kvm->arch.tlbie_lock = 0;
277 } else {
278 asm volatile("ptesync" : : : "memory");
279 for (i = 0; i < n_inval; ++i)
280 asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
281 asm volatile("ptesync" : : : "memory");
282 }
283 return ret;
284}
285
286long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
287 unsigned long pte_index, unsigned long avpn,
288 unsigned long va)
289{
290 struct kvm *kvm = vcpu->kvm;
291 unsigned long *hpte;
292 unsigned long v, r, rb;
293
294 if (pte_index >= (HPT_NPTEG << 3))
295 return H_PARAMETER;
296 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
297 while (!lock_hpte(hpte, HPTE_V_HVLOCK))
298 cpu_relax();
299 if ((hpte[0] & HPTE_V_VALID) == 0 ||
300 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
301 hpte[0] &= ~HPTE_V_HVLOCK;
302 return H_NOT_FOUND;
303 }
304 if (atomic_read(&kvm->online_vcpus) == 1)
305 flags |= H_LOCAL;
306 v = hpte[0];
307 r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
308 HPTE_R_KEY_HI | HPTE_R_KEY_LO);
309 r |= (flags << 55) & HPTE_R_PP0;
310 r |= (flags << 48) & HPTE_R_KEY_HI;
311 r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
312 rb = compute_tlbie_rb(v, r, pte_index);
313 hpte[0] = v & ~HPTE_V_VALID;
314 if (!(flags & H_LOCAL)) {
315 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
316 cpu_relax();
317 asm volatile("ptesync" : : : "memory");
318 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
319 : : "r" (rb), "r" (kvm->arch.lpid));
320 asm volatile("ptesync" : : : "memory");
321 kvm->arch.tlbie_lock = 0;
322 } else {
323 asm volatile("ptesync" : : : "memory");
324 asm volatile("tlbiel %0" : : "r" (rb));
325 asm volatile("ptesync" : : : "memory");
326 }
327 hpte[1] = r;
328 eieio();
329 hpte[0] = v & ~HPTE_V_HVLOCK;
330 asm volatile("ptesync" : : : "memory");
331 return H_SUCCESS;
332}
333
334static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
335{
336 long int i;
337 unsigned long offset, rpn;
338
339 offset = realaddr & (kvm->arch.ram_psize - 1);
340 rpn = (realaddr - offset) >> PAGE_SHIFT;
341 for (i = 0; i < kvm->arch.ram_npages; ++i)
342 if (rpn == kvm->arch.ram_pginfo[i].pfn)
343 return (i << PAGE_SHIFT) + offset;
344 return HPTE_R_RPN; /* all 1s in the RPN field */
345}
346
347long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
348 unsigned long pte_index)
349{
350 struct kvm *kvm = vcpu->kvm;
351 unsigned long *hpte, r;
352 int i, n = 1;
353
354 if (pte_index >= (HPT_NPTEG << 3))
355 return H_PARAMETER;
356 if (flags & H_READ_4) {
357 pte_index &= ~3;
358 n = 4;
359 }
360 for (i = 0; i < n; ++i, ++pte_index) {
361 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
362 r = hpte[1];
363 if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID))
364 r = reverse_xlate(kvm, r & HPTE_R_RPN) |
365 (r & ~HPTE_R_RPN);
366 vcpu->arch.gpr[4 + i * 2] = hpte[0];
367 vcpu->arch.gpr[5 + i * 2] = r;
368 }
369 return H_SUCCESS;
370}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
new file mode 100644
index 000000000000..6dd33581a228
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -0,0 +1,1345 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
12 *
13 * Derived from book3s_rmhandlers.S and other files, which are:
14 *
15 * Copyright SUSE Linux Products GmbH 2009
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#include <asm/ppc_asm.h>
21#include <asm/kvm_asm.h>
22#include <asm/reg.h>
23#include <asm/page.h>
24#include <asm/asm-offsets.h>
25#include <asm/exception-64s.h>
26
27/*****************************************************************************
28 * *
29 * Real Mode handlers that need to be in the linear mapping *
30 * *
31 ****************************************************************************/
32
33 .globl kvmppc_skip_interrupt
34kvmppc_skip_interrupt:
35 mfspr r13,SPRN_SRR0
36 addi r13,r13,4
37 mtspr SPRN_SRR0,r13
38 GET_SCRATCH0(r13)
39 rfid
40 b .
41
42 .globl kvmppc_skip_Hinterrupt
43kvmppc_skip_Hinterrupt:
44 mfspr r13,SPRN_HSRR0
45 addi r13,r13,4
46 mtspr SPRN_HSRR0,r13
47 GET_SCRATCH0(r13)
48 hrfid
49 b .
50
51/*
52 * Call kvmppc_handler_trampoline_enter in real mode.
53 * Must be called with interrupts hard-disabled.
54 *
55 * Input Registers:
56 *
57 * LR = return address to continue at after eventually re-enabling MMU
58 */
59_GLOBAL(kvmppc_hv_entry_trampoline)
60 mfmsr r10
61 LOAD_REG_ADDR(r5, kvmppc_hv_entry)
62 li r0,MSR_RI
63 andc r0,r10,r0
64 li r6,MSR_IR | MSR_DR
65 andc r6,r10,r6
66 mtmsrd r0,1 /* clear RI in MSR */
67 mtsrr0 r5
68 mtsrr1 r6
69 RFI
70
71#define ULONG_SIZE 8
72#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
73
74/******************************************************************************
75 * *
76 * Entry code *
77 * *
78 *****************************************************************************/
79
80#define XICS_XIRR 4
81#define XICS_QIRR 0xc
82
83/*
84 * We come in here when wakened from nap mode on a secondary hw thread.
85 * Relocation is off and most register values are lost.
86 * r13 points to the PACA.
87 */
88 .globl kvm_start_guest
89kvm_start_guest:
90 ld r1,PACAEMERGSP(r13)
91 subi r1,r1,STACK_FRAME_OVERHEAD
92
93 /* get vcpu pointer */
94 ld r4, HSTATE_KVM_VCPU(r13)
95
96 /* We got here with an IPI; clear it */
97 ld r5, HSTATE_XICS_PHYS(r13)
98 li r0, 0xff
99 li r6, XICS_QIRR
100 li r7, XICS_XIRR
101 lwzcix r8, r5, r7 /* ack the interrupt */
102 sync
103 stbcix r0, r5, r6 /* clear it */
104 stwcix r8, r5, r7 /* EOI it */
105
106.global kvmppc_hv_entry
107kvmppc_hv_entry:
108
109 /* Required state:
110 *
111 * R4 = vcpu pointer
112 * MSR = ~IR|DR
113 * R13 = PACA
114 * R1 = host R1
115 * all other volatile GPRS = free
116 */
117 mflr r0
118 std r0, HSTATE_VMHANDLER(r13)
119
120 ld r14, VCPU_GPR(r14)(r4)
121 ld r15, VCPU_GPR(r15)(r4)
122 ld r16, VCPU_GPR(r16)(r4)
123 ld r17, VCPU_GPR(r17)(r4)
124 ld r18, VCPU_GPR(r18)(r4)
125 ld r19, VCPU_GPR(r19)(r4)
126 ld r20, VCPU_GPR(r20)(r4)
127 ld r21, VCPU_GPR(r21)(r4)
128 ld r22, VCPU_GPR(r22)(r4)
129 ld r23, VCPU_GPR(r23)(r4)
130 ld r24, VCPU_GPR(r24)(r4)
131 ld r25, VCPU_GPR(r25)(r4)
132 ld r26, VCPU_GPR(r26)(r4)
133 ld r27, VCPU_GPR(r27)(r4)
134 ld r28, VCPU_GPR(r28)(r4)
135 ld r29, VCPU_GPR(r29)(r4)
136 ld r30, VCPU_GPR(r30)(r4)
137 ld r31, VCPU_GPR(r31)(r4)
138
139 /* Load guest PMU registers */
140 /* R4 is live here (vcpu pointer) */
141 li r3, 1
142 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
143 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
144 isync
145 lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
146 lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
147 lwz r6, VCPU_PMC + 8(r4)
148 lwz r7, VCPU_PMC + 12(r4)
149 lwz r8, VCPU_PMC + 16(r4)
150 lwz r9, VCPU_PMC + 20(r4)
151BEGIN_FTR_SECTION
152 lwz r10, VCPU_PMC + 24(r4)
153 lwz r11, VCPU_PMC + 28(r4)
154END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
155 mtspr SPRN_PMC1, r3
156 mtspr SPRN_PMC2, r5
157 mtspr SPRN_PMC3, r6
158 mtspr SPRN_PMC4, r7
159 mtspr SPRN_PMC5, r8
160 mtspr SPRN_PMC6, r9
161BEGIN_FTR_SECTION
162 mtspr SPRN_PMC7, r10
163 mtspr SPRN_PMC8, r11
164END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
165 ld r3, VCPU_MMCR(r4)
166 ld r5, VCPU_MMCR + 8(r4)
167 ld r6, VCPU_MMCR + 16(r4)
168 mtspr SPRN_MMCR1, r5
169 mtspr SPRN_MMCRA, r6
170 mtspr SPRN_MMCR0, r3
171 isync
172
173 /* Load up FP, VMX and VSX registers */
174 bl kvmppc_load_fp
175
176BEGIN_FTR_SECTION
177 /* Switch DSCR to guest value */
178 ld r5, VCPU_DSCR(r4)
179 mtspr SPRN_DSCR, r5
180END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
181
182 /*
183 * Set the decrementer to the guest decrementer.
184 */
185 ld r8,VCPU_DEC_EXPIRES(r4)
186 mftb r7
187 subf r3,r7,r8
188 mtspr SPRN_DEC,r3
189 stw r3,VCPU_DEC(r4)
190
191 ld r5, VCPU_SPRG0(r4)
192 ld r6, VCPU_SPRG1(r4)
193 ld r7, VCPU_SPRG2(r4)
194 ld r8, VCPU_SPRG3(r4)
195 mtspr SPRN_SPRG0, r5
196 mtspr SPRN_SPRG1, r6
197 mtspr SPRN_SPRG2, r7
198 mtspr SPRN_SPRG3, r8
199
200 /* Save R1 in the PACA */
201 std r1, HSTATE_HOST_R1(r13)
202
203 /* Increment yield count if they have a VPA */
204 ld r3, VCPU_VPA(r4)
205 cmpdi r3, 0
206 beq 25f
207 lwz r5, LPPACA_YIELDCOUNT(r3)
208 addi r5, r5, 1
209 stw r5, LPPACA_YIELDCOUNT(r3)
21025:
211 /* Load up DAR and DSISR */
212 ld r5, VCPU_DAR(r4)
213 lwz r6, VCPU_DSISR(r4)
214 mtspr SPRN_DAR, r5
215 mtspr SPRN_DSISR, r6
216
217 /* Set partition DABR */
218 li r5,3
219 ld r6,VCPU_DABR(r4)
220 mtspr SPRN_DABRX,r5
221 mtspr SPRN_DABR,r6
222
223BEGIN_FTR_SECTION
224 /* Restore AMR and UAMOR, set AMOR to all 1s */
225 ld r5,VCPU_AMR(r4)
226 ld r6,VCPU_UAMOR(r4)
227 li r7,-1
228 mtspr SPRN_AMR,r5
229 mtspr SPRN_UAMOR,r6
230 mtspr SPRN_AMOR,r7
231END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
232
233 /* Clear out SLB */
234 li r6,0
235 slbmte r6,r6
236 slbia
237 ptesync
238
239BEGIN_FTR_SECTION
240 b 30f
241END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
242 /*
243 * POWER7 host -> guest partition switch code.
244 * We don't have to lock against concurrent tlbies,
245 * but we do have to coordinate across hardware threads.
246 */
247 /* Increment entry count iff exit count is zero. */
248 ld r5,HSTATE_KVM_VCORE(r13)
249 addi r9,r5,VCORE_ENTRY_EXIT
25021: lwarx r3,0,r9
251 cmpwi r3,0x100 /* any threads starting to exit? */
252 bge secondary_too_late /* if so we're too late to the party */
253 addi r3,r3,1
254 stwcx. r3,0,r9
255 bne 21b
256
257 /* Primary thread switches to guest partition. */
258 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
259 lwz r6,VCPU_PTID(r4)
260 cmpwi r6,0
261 bne 20f
262 ld r6,KVM_SDR1(r9)
263 lwz r7,KVM_LPID(r9)
264 li r0,LPID_RSVD /* switch to reserved LPID */
265 mtspr SPRN_LPID,r0
266 ptesync
267 mtspr SPRN_SDR1,r6 /* switch to partition page table */
268 mtspr SPRN_LPID,r7
269 isync
270 li r0,1
271 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
272 b 10f
273
274 /* Secondary threads wait for primary to have done partition switch */
27520: lbz r0,VCORE_IN_GUEST(r5)
276 cmpwi r0,0
277 beq 20b
278
279 /* Set LPCR. Set the MER bit if there is a pending external irq. */
28010: ld r8,KVM_LPCR(r9)
281 ld r0,VCPU_PENDING_EXC(r4)
282 li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
283 oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
284 and. r0,r0,r7
285 beq 11f
286 ori r8,r8,LPCR_MER
28711: mtspr SPRN_LPCR,r8
288 ld r8,KVM_RMOR(r9)
289 mtspr SPRN_RMOR,r8
290 isync
291
292 /* Check if HDEC expires soon */
293 mfspr r3,SPRN_HDEC
294 cmpwi r3,10
295 li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
296 mr r9,r4
297 blt hdec_soon
298
299 /*
300 * Invalidate the TLB if we could possibly have stale TLB
301 * entries for this partition on this core due to the use
302 * of tlbiel.
303 * XXX maybe only need this on primary thread?
304 */
305 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
306 lwz r5,VCPU_VCPUID(r4)
307 lhz r6,PACAPACAINDEX(r13)
308 rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
309 lhz r8,VCPU_LAST_CPU(r4)
310 sldi r7,r6,1 /* see if this is the same vcpu */
311 add r7,r7,r9 /* as last ran on this pcpu */
312 lhz r0,KVM_LAST_VCPU(r7)
313 cmpw r6,r8 /* on the same cpu core as last time? */
314 bne 3f
315 cmpw r0,r5 /* same vcpu as this core last ran? */
316 beq 1f
3173: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
318 sth r5,KVM_LAST_VCPU(r7)
319 li r6,128
320 mtctr r6
321 li r7,0x800 /* IS field = 0b10 */
322 ptesync
3232: tlbiel r7
324 addi r7,r7,0x1000
325 bdnz 2b
326 ptesync
3271:
328
329 /* Save purr/spurr */
330 mfspr r5,SPRN_PURR
331 mfspr r6,SPRN_SPURR
332 std r5,HSTATE_PURR(r13)
333 std r6,HSTATE_SPURR(r13)
334 ld r7,VCPU_PURR(r4)
335 ld r8,VCPU_SPURR(r4)
336 mtspr SPRN_PURR,r7
337 mtspr SPRN_SPURR,r8
338 b 31f
339
340 /*
341 * PPC970 host -> guest partition switch code.
342 * We have to lock against concurrent tlbies,
343 * using native_tlbie_lock to lock against host tlbies
344 * and kvm->arch.tlbie_lock to lock against guest tlbies.
345 * We also have to invalidate the TLB since its
346 * entries aren't tagged with the LPID.
347 */
34830: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
349
350 /* first take native_tlbie_lock */
351 .section ".toc","aw"
352toc_tlbie_lock:
353 .tc native_tlbie_lock[TC],native_tlbie_lock
354 .previous
355 ld r3,toc_tlbie_lock@toc(2)
356 lwz r8,PACA_LOCK_TOKEN(r13)
35724: lwarx r0,0,r3
358 cmpwi r0,0
359 bne 24b
360 stwcx. r8,0,r3
361 bne 24b
362 isync
363
364 ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */
365 li r0,0x18f
366 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
367 or r0,r7,r0
368 ptesync
369 sync
370 mtspr SPRN_HID4,r0 /* switch to reserved LPID */
371 isync
372 li r0,0
373 stw r0,0(r3) /* drop native_tlbie_lock */
374
375 /* invalidate the whole TLB */
376 li r0,256
377 mtctr r0
378 li r6,0
37925: tlbiel r6
380 addi r6,r6,0x1000
381 bdnz 25b
382 ptesync
383
384 /* Take the guest's tlbie_lock */
385 addi r3,r9,KVM_TLBIE_LOCK
38624: lwarx r0,0,r3
387 cmpwi r0,0
388 bne 24b
389 stwcx. r8,0,r3
390 bne 24b
391 isync
392 ld r6,KVM_SDR1(r9)
393 mtspr SPRN_SDR1,r6 /* switch to partition page table */
394
395 /* Set up HID4 with the guest's LPID etc. */
396 sync
397 mtspr SPRN_HID4,r7
398 isync
399
400 /* drop the guest's tlbie_lock */
401 li r0,0
402 stw r0,0(r3)
403
404 /* Check if HDEC expires soon */
405 mfspr r3,SPRN_HDEC
406 cmpwi r3,10
407 li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
408 mr r9,r4
409 blt hdec_soon
410
411 /* Enable HDEC interrupts */
412 mfspr r0,SPRN_HID0
413 li r3,1
414 rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
415 sync
416 mtspr SPRN_HID0,r0
417 mfspr r0,SPRN_HID0
418 mfspr r0,SPRN_HID0
419 mfspr r0,SPRN_HID0
420 mfspr r0,SPRN_HID0
421 mfspr r0,SPRN_HID0
422 mfspr r0,SPRN_HID0
423
424 /* Load up guest SLB entries */
42531: lwz r5,VCPU_SLB_MAX(r4)
426 cmpwi r5,0
427 beq 9f
428 mtctr r5
429 addi r6,r4,VCPU_SLB
4301: ld r8,VCPU_SLB_E(r6)
431 ld r9,VCPU_SLB_V(r6)
432 slbmte r9,r8
433 addi r6,r6,VCPU_SLB_SIZE
434 bdnz 1b
4359:
436
437 /* Restore state of CTRL run bit; assume 1 on entry */
438 lwz r5,VCPU_CTRL(r4)
439 andi. r5,r5,1
440 bne 4f
441 mfspr r6,SPRN_CTRLF
442 clrrdi r6,r6,1
443 mtspr SPRN_CTRLT,r6
4444:
445 ld r6, VCPU_CTR(r4)
446 lwz r7, VCPU_XER(r4)
447
448 mtctr r6
449 mtxer r7
450
451 /* Move SRR0 and SRR1 into the respective regs */
452 ld r6, VCPU_SRR0(r4)
453 ld r7, VCPU_SRR1(r4)
454 mtspr SPRN_SRR0, r6
455 mtspr SPRN_SRR1, r7
456
457 ld r10, VCPU_PC(r4)
458
459 ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */
460 rldicl r11, r11, 63 - MSR_HV_LG, 1
461 rotldi r11, r11, 1 + MSR_HV_LG
462 ori r11, r11, MSR_ME
463
464fast_guest_return:
465 mtspr SPRN_HSRR0,r10
466 mtspr SPRN_HSRR1,r11
467
468 /* Activate guest mode, so faults get handled by KVM */
469 li r9, KVM_GUEST_MODE_GUEST
470 stb r9, HSTATE_IN_GUEST(r13)
471
472 /* Enter guest */
473
474 ld r5, VCPU_LR(r4)
475 lwz r6, VCPU_CR(r4)
476 mtlr r5
477 mtcr r6
478
479 ld r0, VCPU_GPR(r0)(r4)
480 ld r1, VCPU_GPR(r1)(r4)
481 ld r2, VCPU_GPR(r2)(r4)
482 ld r3, VCPU_GPR(r3)(r4)
483 ld r5, VCPU_GPR(r5)(r4)
484 ld r6, VCPU_GPR(r6)(r4)
485 ld r7, VCPU_GPR(r7)(r4)
486 ld r8, VCPU_GPR(r8)(r4)
487 ld r9, VCPU_GPR(r9)(r4)
488 ld r10, VCPU_GPR(r10)(r4)
489 ld r11, VCPU_GPR(r11)(r4)
490 ld r12, VCPU_GPR(r12)(r4)
491 ld r13, VCPU_GPR(r13)(r4)
492
493 ld r4, VCPU_GPR(r4)(r4)
494
495 hrfid
496 b .
497
498/******************************************************************************
499 * *
500 * Exit code *
501 * *
502 *****************************************************************************/
503
504/*
505 * We come here from the first-level interrupt handlers.
506 */
507 .globl kvmppc_interrupt
508kvmppc_interrupt:
509 /*
510 * Register contents:
511 * R12 = interrupt vector
512 * R13 = PACA
513 * guest CR, R12 saved in shadow VCPU SCRATCH1/0
514 * guest R13 saved in SPRN_SCRATCH0
515 */
516 /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
517 std r9, HSTATE_HOST_R2(r13)
518 ld r9, HSTATE_KVM_VCPU(r13)
519
520 /* Save registers */
521
522 std r0, VCPU_GPR(r0)(r9)
523 std r1, VCPU_GPR(r1)(r9)
524 std r2, VCPU_GPR(r2)(r9)
525 std r3, VCPU_GPR(r3)(r9)
526 std r4, VCPU_GPR(r4)(r9)
527 std r5, VCPU_GPR(r5)(r9)
528 std r6, VCPU_GPR(r6)(r9)
529 std r7, VCPU_GPR(r7)(r9)
530 std r8, VCPU_GPR(r8)(r9)
531 ld r0, HSTATE_HOST_R2(r13)
532 std r0, VCPU_GPR(r9)(r9)
533 std r10, VCPU_GPR(r10)(r9)
534 std r11, VCPU_GPR(r11)(r9)
535 ld r3, HSTATE_SCRATCH0(r13)
536 lwz r4, HSTATE_SCRATCH1(r13)
537 std r3, VCPU_GPR(r12)(r9)
538 stw r4, VCPU_CR(r9)
539
540 /* Restore R1/R2 so we can handle faults */
541 ld r1, HSTATE_HOST_R1(r13)
542 ld r2, PACATOC(r13)
543
544 mfspr r10, SPRN_SRR0
545 mfspr r11, SPRN_SRR1
546 std r10, VCPU_SRR0(r9)
547 std r11, VCPU_SRR1(r9)
548 andi. r0, r12, 2 /* need to read HSRR0/1? */
549 beq 1f
550 mfspr r10, SPRN_HSRR0
551 mfspr r11, SPRN_HSRR1
552 clrrdi r12, r12, 2
5531: std r10, VCPU_PC(r9)
554 std r11, VCPU_MSR(r9)
555
556 GET_SCRATCH0(r3)
557 mflr r4
558 std r3, VCPU_GPR(r13)(r9)
559 std r4, VCPU_LR(r9)
560
561 /* Unset guest mode */
562 li r0, KVM_GUEST_MODE_NONE
563 stb r0, HSTATE_IN_GUEST(r13)
564
565 stw r12,VCPU_TRAP(r9)
566
567 /* See if this is a leftover HDEC interrupt */
568 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
569 bne 2f
570 mfspr r3,SPRN_HDEC
571 cmpwi r3,0
572 bge ignore_hdec
5732:
574 /* See if this is something we can handle in real mode */
575 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
576 beq hcall_try_real_mode
577hcall_real_cont:
578
579 /* Check for mediated interrupts (could be done earlier really ...) */
580BEGIN_FTR_SECTION
581 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
582 bne+ 1f
583 ld r5,VCPU_KVM(r9)
584 ld r5,KVM_LPCR(r5)
585 andi. r0,r11,MSR_EE
586 beq 1f
587 andi. r0,r5,LPCR_MER
588 bne bounce_ext_interrupt
5891:
590END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
591
592 /* Save DEC */
593 mfspr r5,SPRN_DEC
594 mftb r6
595 extsw r5,r5
596 add r5,r5,r6
597 std r5,VCPU_DEC_EXPIRES(r9)
598
599 /* Save HEIR (HV emulation assist reg) in last_inst
600 if this is an HEI (HV emulation interrupt, e40) */
601 li r3,-1
602BEGIN_FTR_SECTION
603 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
604 bne 11f
605 mfspr r3,SPRN_HEIR
606END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
60711: stw r3,VCPU_LAST_INST(r9)
608
609 /* Save more register state */
610 mfxer r5
611 mfdar r6
612 mfdsisr r7
613 mfctr r8
614
615 stw r5, VCPU_XER(r9)
616 std r6, VCPU_DAR(r9)
617 stw r7, VCPU_DSISR(r9)
618 std r8, VCPU_CTR(r9)
619 /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */
620BEGIN_FTR_SECTION
621 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
622 beq 6f
623END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
6247: std r6, VCPU_FAULT_DAR(r9)
625 stw r7, VCPU_FAULT_DSISR(r9)
626
627 /* Save guest CTRL register, set runlatch to 1 */
628 mfspr r6,SPRN_CTRLF
629 stw r6,VCPU_CTRL(r9)
630 andi. r0,r6,1
631 bne 4f
632 ori r6,r6,1
633 mtspr SPRN_CTRLT,r6
6344:
635 /* Read the guest SLB and save it away */
636 lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
637 mtctr r0
638 li r6,0
639 addi r7,r9,VCPU_SLB
640 li r5,0
6411: slbmfee r8,r6
642 andis. r0,r8,SLB_ESID_V@h
643 beq 2f
644 add r8,r8,r6 /* put index in */
645 slbmfev r3,r6
646 std r8,VCPU_SLB_E(r7)
647 std r3,VCPU_SLB_V(r7)
648 addi r7,r7,VCPU_SLB_SIZE
649 addi r5,r5,1
6502: addi r6,r6,1
651 bdnz 1b
652 stw r5,VCPU_SLB_MAX(r9)
653
654 /*
655 * Save the guest PURR/SPURR
656 */
657BEGIN_FTR_SECTION
658 mfspr r5,SPRN_PURR
659 mfspr r6,SPRN_SPURR
660 ld r7,VCPU_PURR(r9)
661 ld r8,VCPU_SPURR(r9)
662 std r5,VCPU_PURR(r9)
663 std r6,VCPU_SPURR(r9)
664 subf r5,r7,r5
665 subf r6,r8,r6
666
667 /*
668 * Restore host PURR/SPURR and add guest times
669 * so that the time in the guest gets accounted.
670 */
671 ld r3,HSTATE_PURR(r13)
672 ld r4,HSTATE_SPURR(r13)
673 add r3,r3,r5
674 add r4,r4,r6
675 mtspr SPRN_PURR,r3
676 mtspr SPRN_SPURR,r4
677END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
678
679 /* Clear out SLB */
680 li r5,0
681 slbmte r5,r5
682 slbia
683 ptesync
684
685hdec_soon:
686BEGIN_FTR_SECTION
687 b 32f
688END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
689 /*
690 * POWER7 guest -> host partition switch code.
691 * We don't have to lock against tlbies but we do
692 * have to coordinate the hardware threads.
693 */
694 /* Increment the threads-exiting-guest count in the 0xff00
695 bits of vcore->entry_exit_count */
696 lwsync
697 ld r5,HSTATE_KVM_VCORE(r13)
698 addi r6,r5,VCORE_ENTRY_EXIT
69941: lwarx r3,0,r6
700 addi r0,r3,0x100
701 stwcx. r0,0,r6
702 bne 41b
703
704 /*
705 * At this point we have an interrupt that we have to pass
706 * up to the kernel or qemu; we can't handle it in real mode.
707 * Thus we have to do a partition switch, so we have to
708 * collect the other threads, if we are the first thread
709 * to take an interrupt. To do this, we set the HDEC to 0,
710 * which causes an HDEC interrupt in all threads within 2ns
711 * because the HDEC register is shared between all 4 threads.
712 * However, we don't need to bother if this is an HDEC
713 * interrupt, since the other threads will already be on their
714 * way here in that case.
715 */
716 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
717 beq 40f
718 cmpwi r3,0x100 /* Are we the first here? */
719 bge 40f
720 cmpwi r3,1
721 ble 40f
722 li r0,0
723 mtspr SPRN_HDEC,r0
72440:
725
726 /* Secondary threads wait for primary to do partition switch */
727 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
728 ld r5,HSTATE_KVM_VCORE(r13)
729 lwz r3,VCPU_PTID(r9)
730 cmpwi r3,0
731 beq 15f
732 HMT_LOW
73313: lbz r3,VCORE_IN_GUEST(r5)
734 cmpwi r3,0
735 bne 13b
736 HMT_MEDIUM
737 b 16f
738
739 /* Primary thread waits for all the secondaries to exit guest */
74015: lwz r3,VCORE_ENTRY_EXIT(r5)
741 srwi r0,r3,8
742 clrldi r3,r3,56
743 cmpw r3,r0
744 bne 15b
745 isync
746
747 /* Primary thread switches back to host partition */
748 ld r6,KVM_HOST_SDR1(r4)
749 lwz r7,KVM_HOST_LPID(r4)
750 li r8,LPID_RSVD /* switch to reserved LPID */
751 mtspr SPRN_LPID,r8
752 ptesync
753 mtspr SPRN_SDR1,r6 /* switch to partition page table */
754 mtspr SPRN_LPID,r7
755 isync
756 li r0,0
757 stb r0,VCORE_IN_GUEST(r5)
758 lis r8,0x7fff /* MAX_INT@h */
759 mtspr SPRN_HDEC,r8
760
76116: ld r8,KVM_HOST_LPCR(r4)
762 mtspr SPRN_LPCR,r8
763 isync
764 b 33f
765
766 /*
767 * PPC970 guest -> host partition switch code.
768 * We have to lock against concurrent tlbies, and
769 * we have to flush the whole TLB.
770 */
77132: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
772
773 /* Take the guest's tlbie_lock */
774 lwz r8,PACA_LOCK_TOKEN(r13)
775 addi r3,r4,KVM_TLBIE_LOCK
77624: lwarx r0,0,r3
777 cmpwi r0,0
778 bne 24b
779 stwcx. r8,0,r3
780 bne 24b
781 isync
782
783 ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */
784 li r0,0x18f
785 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
786 or r0,r7,r0
787 ptesync
788 sync
789 mtspr SPRN_HID4,r0 /* switch to reserved LPID */
790 isync
791 li r0,0
792 stw r0,0(r3) /* drop guest tlbie_lock */
793
794 /* invalidate the whole TLB */
795 li r0,256
796 mtctr r0
797 li r6,0
79825: tlbiel r6
799 addi r6,r6,0x1000
800 bdnz 25b
801 ptesync
802
803 /* take native_tlbie_lock */
804 ld r3,toc_tlbie_lock@toc(2)
80524: lwarx r0,0,r3
806 cmpwi r0,0
807 bne 24b
808 stwcx. r8,0,r3
809 bne 24b
810 isync
811
812 ld r6,KVM_HOST_SDR1(r4)
813 mtspr SPRN_SDR1,r6 /* switch to host page table */
814
815 /* Set up host HID4 value */
816 sync
817 mtspr SPRN_HID4,r7
818 isync
819 li r0,0
820 stw r0,0(r3) /* drop native_tlbie_lock */
821
822 lis r8,0x7fff /* MAX_INT@h */
823 mtspr SPRN_HDEC,r8
824
825 /* Disable HDEC interrupts */
826 mfspr r0,SPRN_HID0
827 li r3,0
828 rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
829 sync
830 mtspr SPRN_HID0,r0
831 mfspr r0,SPRN_HID0
832 mfspr r0,SPRN_HID0
833 mfspr r0,SPRN_HID0
834 mfspr r0,SPRN_HID0
835 mfspr r0,SPRN_HID0
836 mfspr r0,SPRN_HID0
837
838 /* load host SLB entries */
83933: ld r8,PACA_SLBSHADOWPTR(r13)
840
841 .rept SLB_NUM_BOLTED
842 ld r5,SLBSHADOW_SAVEAREA(r8)
843 ld r6,SLBSHADOW_SAVEAREA+8(r8)
844 andis. r7,r5,SLB_ESID_V@h
845 beq 1f
846 slbmte r6,r5
8471: addi r8,r8,16
848 .endr
849
850 /* Save and reset AMR and UAMOR before turning on the MMU */
851BEGIN_FTR_SECTION
852 mfspr r5,SPRN_AMR
853 mfspr r6,SPRN_UAMOR
854 std r5,VCPU_AMR(r9)
855 std r6,VCPU_UAMOR(r9)
856 li r6,0
857 mtspr SPRN_AMR,r6
858END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
859
860 /* Restore host DABR and DABRX */
861 ld r5,HSTATE_DABR(r13)
862 li r6,7
863 mtspr SPRN_DABR,r5
864 mtspr SPRN_DABRX,r6
865
866 /* Switch DSCR back to host value */
867BEGIN_FTR_SECTION
868 mfspr r8, SPRN_DSCR
869 ld r7, HSTATE_DSCR(r13)
870 std r8, VCPU_DSCR(r7)
871 mtspr SPRN_DSCR, r7
872END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
873
874 /* Save non-volatile GPRs */
875 std r14, VCPU_GPR(r14)(r9)
876 std r15, VCPU_GPR(r15)(r9)
877 std r16, VCPU_GPR(r16)(r9)
878 std r17, VCPU_GPR(r17)(r9)
879 std r18, VCPU_GPR(r18)(r9)
880 std r19, VCPU_GPR(r19)(r9)
881 std r20, VCPU_GPR(r20)(r9)
882 std r21, VCPU_GPR(r21)(r9)
883 std r22, VCPU_GPR(r22)(r9)
884 std r23, VCPU_GPR(r23)(r9)
885 std r24, VCPU_GPR(r24)(r9)
886 std r25, VCPU_GPR(r25)(r9)
887 std r26, VCPU_GPR(r26)(r9)
888 std r27, VCPU_GPR(r27)(r9)
889 std r28, VCPU_GPR(r28)(r9)
890 std r29, VCPU_GPR(r29)(r9)
891 std r30, VCPU_GPR(r30)(r9)
892 std r31, VCPU_GPR(r31)(r9)
893
894 /* Save SPRGs */
895 mfspr r3, SPRN_SPRG0
896 mfspr r4, SPRN_SPRG1
897 mfspr r5, SPRN_SPRG2
898 mfspr r6, SPRN_SPRG3
899 std r3, VCPU_SPRG0(r9)
900 std r4, VCPU_SPRG1(r9)
901 std r5, VCPU_SPRG2(r9)
902 std r6, VCPU_SPRG3(r9)
903
904 /* Increment yield count if they have a VPA */
905 ld r8, VCPU_VPA(r9) /* do they have a VPA? */
906 cmpdi r8, 0
907 beq 25f
908 lwz r3, LPPACA_YIELDCOUNT(r8)
909 addi r3, r3, 1
910 stw r3, LPPACA_YIELDCOUNT(r8)
91125:
912 /* Save PMU registers if requested */
913 /* r8 and cr0.eq are live here */
914 li r3, 1
915 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
916 mfspr r4, SPRN_MMCR0 /* save MMCR0 */
917 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
918 isync
919 beq 21f /* if no VPA, save PMU stuff anyway */
920 lbz r7, LPPACA_PMCINUSE(r8)
921 cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
922 bne 21f
923 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
924 b 22f
92521: mfspr r5, SPRN_MMCR1
926 mfspr r6, SPRN_MMCRA
927 std r4, VCPU_MMCR(r9)
928 std r5, VCPU_MMCR + 8(r9)
929 std r6, VCPU_MMCR + 16(r9)
930 mfspr r3, SPRN_PMC1
931 mfspr r4, SPRN_PMC2
932 mfspr r5, SPRN_PMC3
933 mfspr r6, SPRN_PMC4
934 mfspr r7, SPRN_PMC5
935 mfspr r8, SPRN_PMC6
936BEGIN_FTR_SECTION
937 mfspr r10, SPRN_PMC7
938 mfspr r11, SPRN_PMC8
939END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
940 stw r3, VCPU_PMC(r9)
941 stw r4, VCPU_PMC + 4(r9)
942 stw r5, VCPU_PMC + 8(r9)
943 stw r6, VCPU_PMC + 12(r9)
944 stw r7, VCPU_PMC + 16(r9)
945 stw r8, VCPU_PMC + 20(r9)
946BEGIN_FTR_SECTION
947 stw r10, VCPU_PMC + 24(r9)
948 stw r11, VCPU_PMC + 28(r9)
949END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
95022:
951 /* save FP state */
952 mr r3, r9
953 bl .kvmppc_save_fp
954
955 /* Secondary threads go off to take a nap on POWER7 */
956BEGIN_FTR_SECTION
957 lwz r0,VCPU_PTID(r3)
958 cmpwi r0,0
959 bne secondary_nap
960END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
961
962 /*
963 * Reload DEC. HDEC interrupts were disabled when
964 * we reloaded the host's LPCR value.
965 */
966 ld r3, HSTATE_DECEXP(r13)
967 mftb r4
968 subf r4, r4, r3
969 mtspr SPRN_DEC, r4
970
971 /* Reload the host's PMU registers */
972 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
973 lbz r4, LPPACA_PMCINUSE(r3)
974 cmpwi r4, 0
975 beq 23f /* skip if not */
976 lwz r3, HSTATE_PMC(r13)
977 lwz r4, HSTATE_PMC + 4(r13)
978 lwz r5, HSTATE_PMC + 8(r13)
979 lwz r6, HSTATE_PMC + 12(r13)
980 lwz r8, HSTATE_PMC + 16(r13)
981 lwz r9, HSTATE_PMC + 20(r13)
982BEGIN_FTR_SECTION
983 lwz r10, HSTATE_PMC + 24(r13)
984 lwz r11, HSTATE_PMC + 28(r13)
985END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
986 mtspr SPRN_PMC1, r3
987 mtspr SPRN_PMC2, r4
988 mtspr SPRN_PMC3, r5
989 mtspr SPRN_PMC4, r6
990 mtspr SPRN_PMC5, r8
991 mtspr SPRN_PMC6, r9
992BEGIN_FTR_SECTION
993 mtspr SPRN_PMC7, r10
994 mtspr SPRN_PMC8, r11
995END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
996 ld r3, HSTATE_MMCR(r13)
997 ld r4, HSTATE_MMCR + 8(r13)
998 ld r5, HSTATE_MMCR + 16(r13)
999 mtspr SPRN_MMCR1, r4
1000 mtspr SPRN_MMCRA, r5
1001 mtspr SPRN_MMCR0, r3
1002 isync
100323:
1004 /*
1005 * For external and machine check interrupts, we need
1006 * to call the Linux handler to process the interrupt.
1007 * We do that by jumping to the interrupt vector address
1008 * which we have in r12. The [h]rfid at the end of the
1009 * handler will return to the book3s_hv_interrupts.S code.
1010 * For other interrupts we do the rfid to get back
1011 * to the book3s_interrupts.S code here.
1012 */
1013 ld r8, HSTATE_VMHANDLER(r13)
1014 ld r7, HSTATE_HOST_MSR(r13)
1015
1016 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1017 beq 11f
1018 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1019
1020 /* RFI into the highmem handler, or branch to interrupt handler */
102112: mfmsr r6
1022 mtctr r12
1023 li r0, MSR_RI
1024 andc r6, r6, r0
1025 mtmsrd r6, 1 /* Clear RI in MSR */
1026 mtsrr0 r8
1027 mtsrr1 r7
1028 beqctr
1029 RFI
1030
103111:
1032BEGIN_FTR_SECTION
1033 b 12b
1034END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1035 mtspr SPRN_HSRR0, r8
1036 mtspr SPRN_HSRR1, r7
1037 ba 0x500
1038
10396: mfspr r6,SPRN_HDAR
1040 mfspr r7,SPRN_HDSISR
1041 b 7b
1042
1043/*
1044 * Try to handle an hcall in real mode.
1045 * Returns to the guest if we handle it, or continues on up to
1046 * the kernel if we can't (i.e. if we don't have a handler for
1047 * it, or if the handler returns H_TOO_HARD).
1048 */
1049 .globl hcall_try_real_mode
1050hcall_try_real_mode:
1051 ld r3,VCPU_GPR(r3)(r9)
1052 andi. r0,r11,MSR_PR
1053 bne hcall_real_cont
1054 clrrdi r3,r3,2
1055 cmpldi r3,hcall_real_table_end - hcall_real_table
1056 bge hcall_real_cont
1057 LOAD_REG_ADDR(r4, hcall_real_table)
1058 lwzx r3,r3,r4
1059 cmpwi r3,0
1060 beq hcall_real_cont
1061 add r3,r3,r4
1062 mtctr r3
1063 mr r3,r9 /* get vcpu pointer */
1064 ld r4,VCPU_GPR(r4)(r9)
1065 bctrl
1066 cmpdi r3,H_TOO_HARD
1067 beq hcall_real_fallback
1068 ld r4,HSTATE_KVM_VCPU(r13)
1069 std r3,VCPU_GPR(r3)(r4)
1070 ld r10,VCPU_PC(r4)
1071 ld r11,VCPU_MSR(r4)
1072 b fast_guest_return
1073
1074 /* We've attempted a real mode hcall, but it's punted it back
1075 * to userspace. We need to restore some clobbered volatiles
1076 * before resuming the pass-it-to-qemu path */
1077hcall_real_fallback:
1078 li r12,BOOK3S_INTERRUPT_SYSCALL
1079 ld r9, HSTATE_KVM_VCPU(r13)
1080 ld r11, VCPU_MSR(r9)
1081
1082 b hcall_real_cont
1083
1084 .globl hcall_real_table
1085hcall_real_table:
1086 .long 0 /* 0 - unused */
1087 .long .kvmppc_h_remove - hcall_real_table
1088 .long .kvmppc_h_enter - hcall_real_table
1089 .long .kvmppc_h_read - hcall_real_table
1090 .long 0 /* 0x10 - H_CLEAR_MOD */
1091 .long 0 /* 0x14 - H_CLEAR_REF */
1092 .long .kvmppc_h_protect - hcall_real_table
1093 .long 0 /* 0x1c - H_GET_TCE */
1094 .long .kvmppc_h_put_tce - hcall_real_table
1095 .long 0 /* 0x24 - H_SET_SPRG0 */
1096 .long .kvmppc_h_set_dabr - hcall_real_table
1097 .long 0 /* 0x2c */
1098 .long 0 /* 0x30 */
1099 .long 0 /* 0x34 */
1100 .long 0 /* 0x38 */
1101 .long 0 /* 0x3c */
1102 .long 0 /* 0x40 */
1103 .long 0 /* 0x44 */
1104 .long 0 /* 0x48 */
1105 .long 0 /* 0x4c */
1106 .long 0 /* 0x50 */
1107 .long 0 /* 0x54 */
1108 .long 0 /* 0x58 */
1109 .long 0 /* 0x5c */
1110 .long 0 /* 0x60 */
1111 .long 0 /* 0x64 */
1112 .long 0 /* 0x68 */
1113 .long 0 /* 0x6c */
1114 .long 0 /* 0x70 */
1115 .long 0 /* 0x74 */
1116 .long 0 /* 0x78 */
1117 .long 0 /* 0x7c */
1118 .long 0 /* 0x80 */
1119 .long 0 /* 0x84 */
1120 .long 0 /* 0x88 */
1121 .long 0 /* 0x8c */
1122 .long 0 /* 0x90 */
1123 .long 0 /* 0x94 */
1124 .long 0 /* 0x98 */
1125 .long 0 /* 0x9c */
1126 .long 0 /* 0xa0 */
1127 .long 0 /* 0xa4 */
1128 .long 0 /* 0xa8 */
1129 .long 0 /* 0xac */
1130 .long 0 /* 0xb0 */
1131 .long 0 /* 0xb4 */
1132 .long 0 /* 0xb8 */
1133 .long 0 /* 0xbc */
1134 .long 0 /* 0xc0 */
1135 .long 0 /* 0xc4 */
1136 .long 0 /* 0xc8 */
1137 .long 0 /* 0xcc */
1138 .long 0 /* 0xd0 */
1139 .long 0 /* 0xd4 */
1140 .long 0 /* 0xd8 */
1141 .long 0 /* 0xdc */
1142 .long 0 /* 0xe0 */
1143 .long 0 /* 0xe4 */
1144 .long 0 /* 0xe8 */
1145 .long 0 /* 0xec */
1146 .long 0 /* 0xf0 */
1147 .long 0 /* 0xf4 */
1148 .long 0 /* 0xf8 */
1149 .long 0 /* 0xfc */
1150 .long 0 /* 0x100 */
1151 .long 0 /* 0x104 */
1152 .long 0 /* 0x108 */
1153 .long 0 /* 0x10c */
1154 .long 0 /* 0x110 */
1155 .long 0 /* 0x114 */
1156 .long 0 /* 0x118 */
1157 .long 0 /* 0x11c */
1158 .long 0 /* 0x120 */
1159 .long .kvmppc_h_bulk_remove - hcall_real_table
1160hcall_real_table_end:
1161
1162ignore_hdec:
1163 mr r4,r9
1164 b fast_guest_return
1165
1166bounce_ext_interrupt:
1167 mr r4,r9
1168 mtspr SPRN_SRR0,r10
1169 mtspr SPRN_SRR1,r11
1170 li r10,BOOK3S_INTERRUPT_EXTERNAL
1171 LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME);
1172 b fast_guest_return
1173
1174_GLOBAL(kvmppc_h_set_dabr)
1175 std r4,VCPU_DABR(r3)
1176 mtspr SPRN_DABR,r4
1177 li r3,0
1178 blr
1179
1180secondary_too_late:
1181 ld r5,HSTATE_KVM_VCORE(r13)
1182 HMT_LOW
118313: lbz r3,VCORE_IN_GUEST(r5)
1184 cmpwi r3,0
1185 bne 13b
1186 HMT_MEDIUM
1187 ld r11,PACA_SLBSHADOWPTR(r13)
1188
1189 .rept SLB_NUM_BOLTED
1190 ld r5,SLBSHADOW_SAVEAREA(r11)
1191 ld r6,SLBSHADOW_SAVEAREA+8(r11)
1192 andis. r7,r5,SLB_ESID_V@h
1193 beq 1f
1194 slbmte r6,r5
11951: addi r11,r11,16
1196 .endr
1197 b 50f
1198
1199secondary_nap:
1200 /* Clear any pending IPI */
120150: ld r5, HSTATE_XICS_PHYS(r13)
1202 li r0, 0xff
1203 li r6, XICS_QIRR
1204 stbcix r0, r5, r6
1205
1206 /* increment the nap count and then go to nap mode */
1207 ld r4, HSTATE_KVM_VCORE(r13)
1208 addi r4, r4, VCORE_NAP_COUNT
1209 lwsync /* make previous updates visible */
121051: lwarx r3, 0, r4
1211 addi r3, r3, 1
1212 stwcx. r3, 0, r4
1213 bne 51b
1214 isync
1215
1216 mfspr r4, SPRN_LPCR
1217 li r0, LPCR_PECE
1218 andc r4, r4, r0
1219 ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
1220 mtspr SPRN_LPCR, r4
1221 li r0, 0
1222 std r0, HSTATE_SCRATCH0(r13)
1223 ptesync
1224 ld r0, HSTATE_SCRATCH0(r13)
12251: cmpd r0, r0
1226 bne 1b
1227 nap
1228 b .
1229
1230/*
1231 * Save away FP, VMX and VSX registers.
1232 * r3 = vcpu pointer
1233 */
1234_GLOBAL(kvmppc_save_fp)
1235 mfmsr r9
1236 ori r8,r9,MSR_FP
1237#ifdef CONFIG_ALTIVEC
1238BEGIN_FTR_SECTION
1239 oris r8,r8,MSR_VEC@h
1240END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1241#endif
1242#ifdef CONFIG_VSX
1243BEGIN_FTR_SECTION
1244 oris r8,r8,MSR_VSX@h
1245END_FTR_SECTION_IFSET(CPU_FTR_VSX)
1246#endif
1247 mtmsrd r8
1248 isync
1249#ifdef CONFIG_VSX
1250BEGIN_FTR_SECTION
1251 reg = 0
1252 .rept 32
1253 li r6,reg*16+VCPU_VSRS
1254 stxvd2x reg,r6,r3
1255 reg = reg + 1
1256 .endr
1257FTR_SECTION_ELSE
1258#endif
1259 reg = 0
1260 .rept 32
1261 stfd reg,reg*8+VCPU_FPRS(r3)
1262 reg = reg + 1
1263 .endr
1264#ifdef CONFIG_VSX
1265ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
1266#endif
1267 mffs fr0
1268 stfd fr0,VCPU_FPSCR(r3)
1269
1270#ifdef CONFIG_ALTIVEC
1271BEGIN_FTR_SECTION
1272 reg = 0
1273 .rept 32
1274 li r6,reg*16+VCPU_VRS
1275 stvx reg,r6,r3
1276 reg = reg + 1
1277 .endr
1278 mfvscr vr0
1279 li r6,VCPU_VSCR
1280 stvx vr0,r6,r3
1281END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1282#endif
1283 mfspr r6,SPRN_VRSAVE
1284 stw r6,VCPU_VRSAVE(r3)
1285 mtmsrd r9
1286 isync
1287 blr
1288
1289/*
1290 * Load up FP, VMX and VSX registers
1291 * r4 = vcpu pointer
1292 */
1293 .globl kvmppc_load_fp
1294kvmppc_load_fp:
1295 mfmsr r9
1296 ori r8,r9,MSR_FP
1297#ifdef CONFIG_ALTIVEC
1298BEGIN_FTR_SECTION
1299 oris r8,r8,MSR_VEC@h
1300END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1301#endif
1302#ifdef CONFIG_VSX
1303BEGIN_FTR_SECTION
1304 oris r8,r8,MSR_VSX@h
1305END_FTR_SECTION_IFSET(CPU_FTR_VSX)
1306#endif
1307 mtmsrd r8
1308 isync
1309 lfd fr0,VCPU_FPSCR(r4)
1310 MTFSF_L(fr0)
1311#ifdef CONFIG_VSX
1312BEGIN_FTR_SECTION
1313 reg = 0
1314 .rept 32
1315 li r7,reg*16+VCPU_VSRS
1316 lxvd2x reg,r7,r4
1317 reg = reg + 1
1318 .endr
1319FTR_SECTION_ELSE
1320#endif
1321 reg = 0
1322 .rept 32
1323 lfd reg,reg*8+VCPU_FPRS(r4)
1324 reg = reg + 1
1325 .endr
1326#ifdef CONFIG_VSX
1327ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
1328#endif
1329
1330#ifdef CONFIG_ALTIVEC
1331BEGIN_FTR_SECTION
1332 li r7,VCPU_VSCR
1333 lvx vr0,r7,r4
1334 mtvscr vr0
1335 reg = 0
1336 .rept 32
1337 li r7,reg*16+VCPU_VRS
1338 lvx reg,r7,r4
1339 reg = reg + 1
1340 .endr
1341END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1342#endif
1343 lwz r7,VCPU_VRSAVE(r4)
1344 mtspr SPRN_VRSAVE,r7
1345 blr
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 2f0bc928b08a..c54b0e30cf3f 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -29,8 +29,7 @@
29#define ULONG_SIZE 8 29#define ULONG_SIZE 8
30#define FUNC(name) GLUE(.,name) 30#define FUNC(name) GLUE(.,name)
31 31
32#define GET_SHADOW_VCPU(reg) \ 32#define GET_SHADOW_VCPU_R13
33 addi reg, r13, PACA_KVM_SVCPU
34 33
35#define DISABLE_INTERRUPTS \ 34#define DISABLE_INTERRUPTS \
36 mfmsr r0; \ 35 mfmsr r0; \
@@ -43,8 +42,8 @@
43#define ULONG_SIZE 4 42#define ULONG_SIZE 4
44#define FUNC(name) name 43#define FUNC(name) name
45 44
46#define GET_SHADOW_VCPU(reg) \ 45#define GET_SHADOW_VCPU_R13 \
47 lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) 46 lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2)
48 47
49#define DISABLE_INTERRUPTS \ 48#define DISABLE_INTERRUPTS \
50 mfmsr r0; \ 49 mfmsr r0; \
@@ -85,7 +84,7 @@
85 * r3: kvm_run pointer 84 * r3: kvm_run pointer
86 * r4: vcpu pointer 85 * r4: vcpu pointer
87 */ 86 */
88_GLOBAL(__kvmppc_vcpu_entry) 87_GLOBAL(__kvmppc_vcpu_run)
89 88
90kvm_start_entry: 89kvm_start_entry:
91 /* Write correct stack frame */ 90 /* Write correct stack frame */
@@ -107,17 +106,11 @@ kvm_start_entry:
107 /* Load non-volatile guest state from the vcpu */ 106 /* Load non-volatile guest state from the vcpu */
108 VCPU_LOAD_NVGPRS(r4) 107 VCPU_LOAD_NVGPRS(r4)
109 108
110 GET_SHADOW_VCPU(r5) 109kvm_start_lightweight:
111
112 /* Save R1/R2 in the PACA */
113 PPC_STL r1, SVCPU_HOST_R1(r5)
114 PPC_STL r2, SVCPU_HOST_R2(r5)
115 110
116 /* XXX swap in/out on load? */ 111 GET_SHADOW_VCPU_R13
117 PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4) 112 PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
118 PPC_STL r3, SVCPU_VMHANDLER(r5) 113 PPC_STL r3, HSTATE_VMHANDLER(r13)
119
120kvm_start_lightweight:
121 114
122 PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ 115 PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
123 116
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 79751d8dd131..41cb0017e757 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -21,7 +21,6 @@
21#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
22#include <linux/hash.h> 22#include <linux/hash.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include "trace.h"
25 24
26#include <asm/kvm_ppc.h> 25#include <asm/kvm_ppc.h>
27#include <asm/kvm_book3s.h> 26#include <asm/kvm_book3s.h>
@@ -29,6 +28,8 @@
29#include <asm/mmu_context.h> 28#include <asm/mmu_context.h>
30#include <asm/hw_irq.h> 29#include <asm/hw_irq.h>
31 30
31#include "trace.h"
32
32#define PTE_SIZE 12 33#define PTE_SIZE 12
33 34
34static struct kmem_cache *hpte_cache; 35static struct kmem_cache *hpte_cache;
@@ -58,30 +59,31 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
58void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 59void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
59{ 60{
60 u64 index; 61 u64 index;
62 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
61 63
62 trace_kvm_book3s_mmu_map(pte); 64 trace_kvm_book3s_mmu_map(pte);
63 65
64 spin_lock(&vcpu->arch.mmu_lock); 66 spin_lock(&vcpu3s->mmu_lock);
65 67
66 /* Add to ePTE list */ 68 /* Add to ePTE list */
67 index = kvmppc_mmu_hash_pte(pte->pte.eaddr); 69 index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
68 hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); 70 hlist_add_head_rcu(&pte->list_pte, &vcpu3s->hpte_hash_pte[index]);
69 71
70 /* Add to ePTE_long list */ 72 /* Add to ePTE_long list */
71 index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr); 73 index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
72 hlist_add_head_rcu(&pte->list_pte_long, 74 hlist_add_head_rcu(&pte->list_pte_long,
73 &vcpu->arch.hpte_hash_pte_long[index]); 75 &vcpu3s->hpte_hash_pte_long[index]);
74 76
75 /* Add to vPTE list */ 77 /* Add to vPTE list */
76 index = kvmppc_mmu_hash_vpte(pte->pte.vpage); 78 index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
77 hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); 79 hlist_add_head_rcu(&pte->list_vpte, &vcpu3s->hpte_hash_vpte[index]);
78 80
79 /* Add to vPTE_long list */ 81 /* Add to vPTE_long list */
80 index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); 82 index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
81 hlist_add_head_rcu(&pte->list_vpte_long, 83 hlist_add_head_rcu(&pte->list_vpte_long,
82 &vcpu->arch.hpte_hash_vpte_long[index]); 84 &vcpu3s->hpte_hash_vpte_long[index]);
83 85
84 spin_unlock(&vcpu->arch.mmu_lock); 86 spin_unlock(&vcpu3s->mmu_lock);
85} 87}
86 88
87static void free_pte_rcu(struct rcu_head *head) 89static void free_pte_rcu(struct rcu_head *head)
@@ -92,16 +94,18 @@ static void free_pte_rcu(struct rcu_head *head)
92 94
93static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 95static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
94{ 96{
97 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
98
95 trace_kvm_book3s_mmu_invalidate(pte); 99 trace_kvm_book3s_mmu_invalidate(pte);
96 100
97 /* Different for 32 and 64 bit */ 101 /* Different for 32 and 64 bit */
98 kvmppc_mmu_invalidate_pte(vcpu, pte); 102 kvmppc_mmu_invalidate_pte(vcpu, pte);
99 103
100 spin_lock(&vcpu->arch.mmu_lock); 104 spin_lock(&vcpu3s->mmu_lock);
101 105
102 /* pte already invalidated in between? */ 106 /* pte already invalidated in between? */
103 if (hlist_unhashed(&pte->list_pte)) { 107 if (hlist_unhashed(&pte->list_pte)) {
104 spin_unlock(&vcpu->arch.mmu_lock); 108 spin_unlock(&vcpu3s->mmu_lock);
105 return; 109 return;
106 } 110 }
107 111
@@ -115,14 +119,15 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
115 else 119 else
116 kvm_release_pfn_clean(pte->pfn); 120 kvm_release_pfn_clean(pte->pfn);
117 121
118 spin_unlock(&vcpu->arch.mmu_lock); 122 spin_unlock(&vcpu3s->mmu_lock);
119 123
120 vcpu->arch.hpte_cache_count--; 124 vcpu3s->hpte_cache_count--;
121 call_rcu(&pte->rcu_head, free_pte_rcu); 125 call_rcu(&pte->rcu_head, free_pte_rcu);
122} 126}
123 127
124static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) 128static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
125{ 129{
130 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
126 struct hpte_cache *pte; 131 struct hpte_cache *pte;
127 struct hlist_node *node; 132 struct hlist_node *node;
128 int i; 133 int i;
@@ -130,7 +135,7 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
130 rcu_read_lock(); 135 rcu_read_lock();
131 136
132 for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { 137 for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
133 struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; 138 struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
134 139
135 hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) 140 hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
136 invalidate_pte(vcpu, pte); 141 invalidate_pte(vcpu, pte);
@@ -141,12 +146,13 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
141 146
142static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) 147static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
143{ 148{
149 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
144 struct hlist_head *list; 150 struct hlist_head *list;
145 struct hlist_node *node; 151 struct hlist_node *node;
146 struct hpte_cache *pte; 152 struct hpte_cache *pte;
147 153
148 /* Find the list of entries in the map */ 154 /* Find the list of entries in the map */
149 list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; 155 list = &vcpu3s->hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
150 156
151 rcu_read_lock(); 157 rcu_read_lock();
152 158
@@ -160,12 +166,13 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
160 166
161static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) 167static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
162{ 168{
169 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
163 struct hlist_head *list; 170 struct hlist_head *list;
164 struct hlist_node *node; 171 struct hlist_node *node;
165 struct hpte_cache *pte; 172 struct hpte_cache *pte;
166 173
167 /* Find the list of entries in the map */ 174 /* Find the list of entries in the map */
168 list = &vcpu->arch.hpte_hash_pte_long[ 175 list = &vcpu3s->hpte_hash_pte_long[
169 kvmppc_mmu_hash_pte_long(guest_ea)]; 176 kvmppc_mmu_hash_pte_long(guest_ea)];
170 177
171 rcu_read_lock(); 178 rcu_read_lock();
@@ -203,12 +210,13 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
203/* Flush with mask 0xfffffffff */ 210/* Flush with mask 0xfffffffff */
204static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) 211static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
205{ 212{
213 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
206 struct hlist_head *list; 214 struct hlist_head *list;
207 struct hlist_node *node; 215 struct hlist_node *node;
208 struct hpte_cache *pte; 216 struct hpte_cache *pte;
209 u64 vp_mask = 0xfffffffffULL; 217 u64 vp_mask = 0xfffffffffULL;
210 218
211 list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; 219 list = &vcpu3s->hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
212 220
213 rcu_read_lock(); 221 rcu_read_lock();
214 222
@@ -223,12 +231,13 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
223/* Flush with mask 0xffffff000 */ 231/* Flush with mask 0xffffff000 */
224static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) 232static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
225{ 233{
234 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
226 struct hlist_head *list; 235 struct hlist_head *list;
227 struct hlist_node *node; 236 struct hlist_node *node;
228 struct hpte_cache *pte; 237 struct hpte_cache *pte;
229 u64 vp_mask = 0xffffff000ULL; 238 u64 vp_mask = 0xffffff000ULL;
230 239
231 list = &vcpu->arch.hpte_hash_vpte_long[ 240 list = &vcpu3s->hpte_hash_vpte_long[
232 kvmppc_mmu_hash_vpte_long(guest_vp)]; 241 kvmppc_mmu_hash_vpte_long(guest_vp)];
233 242
234 rcu_read_lock(); 243 rcu_read_lock();
@@ -261,6 +270,7 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
261 270
262void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 271void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
263{ 272{
273 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
264 struct hlist_node *node; 274 struct hlist_node *node;
265 struct hpte_cache *pte; 275 struct hpte_cache *pte;
266 int i; 276 int i;
@@ -270,7 +280,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
270 rcu_read_lock(); 280 rcu_read_lock();
271 281
272 for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { 282 for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
273 struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; 283 struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
274 284
275 hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) 285 hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
276 if ((pte->pte.raddr >= pa_start) && 286 if ((pte->pte.raddr >= pa_start) &&
@@ -283,12 +293,13 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
283 293
284struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) 294struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
285{ 295{
296 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
286 struct hpte_cache *pte; 297 struct hpte_cache *pte;
287 298
288 pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); 299 pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
289 vcpu->arch.hpte_cache_count++; 300 vcpu3s->hpte_cache_count++;
290 301
291 if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM) 302 if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
292 kvmppc_mmu_pte_flush_all(vcpu); 303 kvmppc_mmu_pte_flush_all(vcpu);
293 304
294 return pte; 305 return pte;
@@ -309,17 +320,19 @@ static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len)
309 320
310int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) 321int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
311{ 322{
323 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
324
312 /* init hpte lookup hashes */ 325 /* init hpte lookup hashes */
313 kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, 326 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte,
314 ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); 327 ARRAY_SIZE(vcpu3s->hpte_hash_pte));
315 kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long, 328 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte_long,
316 ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long)); 329 ARRAY_SIZE(vcpu3s->hpte_hash_pte_long));
317 kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, 330 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte,
318 ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); 331 ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
319 kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, 332 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
320 ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); 333 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
321 334
322 spin_lock_init(&vcpu->arch.mmu_lock); 335 spin_lock_init(&vcpu3s->mmu_lock);
323 336
324 return 0; 337 return 0;
325} 338}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
new file mode 100644
index 000000000000..0c0d3f274437
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -0,0 +1,1029 @@
1/*
2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
3 *
4 * Authors:
5 * Alexander Graf <agraf@suse.de>
6 * Kevin Wolf <mail@kevin-wolf.de>
7 * Paul Mackerras <paulus@samba.org>
8 *
9 * Description:
10 * Functions relating to running KVM on Book 3S processors where
11 * we don't have access to hypervisor mode, and we run the guest
12 * in problem state (user mode).
13 *
14 * This file is derived from arch/powerpc/kvm/44x.c,
15 * by Hollis Blanchard <hollisb@us.ibm.com>.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License, version 2, as
19 * published by the Free Software Foundation.
20 */
21
22#include <linux/kvm_host.h>
23#include <linux/err.h>
24#include <linux/slab.h>
25
26#include <asm/reg.h>
27#include <asm/cputable.h>
28#include <asm/cacheflush.h>
29#include <asm/tlbflush.h>
30#include <asm/uaccess.h>
31#include <asm/io.h>
32#include <asm/kvm_ppc.h>
33#include <asm/kvm_book3s.h>
34#include <asm/mmu_context.h>
35#include <linux/gfp.h>
36#include <linux/sched.h>
37#include <linux/vmalloc.h>
38#include <linux/highmem.h>
39
40#include "trace.h"
41
42/* #define EXIT_DEBUG */
43/* #define DEBUG_EXT */
44
45static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
46 ulong msr);
47
48/* Some compatibility defines */
49#ifdef CONFIG_PPC_BOOK3S_32
50#define MSR_USER32 MSR_USER
51#define MSR_USER64 MSR_USER
52#define HW_PAGE_SIZE PAGE_SIZE
53#endif
54
55void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
56{
57#ifdef CONFIG_PPC_BOOK3S_64
58 memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
59 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
60 sizeof(get_paca()->shadow_vcpu));
61 to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
62#endif
63
64#ifdef CONFIG_PPC_BOOK3S_32
65 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
66#endif
67}
68
69void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
70{
71#ifdef CONFIG_PPC_BOOK3S_64
72 memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
73 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
74 sizeof(get_paca()->shadow_vcpu));
75 to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
76#endif
77
78 kvmppc_giveup_ext(vcpu, MSR_FP);
79 kvmppc_giveup_ext(vcpu, MSR_VEC);
80 kvmppc_giveup_ext(vcpu, MSR_VSX);
81}
82
83static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
84{
85 ulong smsr = vcpu->arch.shared->msr;
86
87 /* Guest MSR values */
88 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
89 /* Process MSR values */
90 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
91 /* External providers the guest reserved */
92 smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
93 /* 64-bit Process MSR values */
94#ifdef CONFIG_PPC_BOOK3S_64
95 smsr |= MSR_ISF | MSR_HV;
96#endif
97 vcpu->arch.shadow_msr = smsr;
98}
99
100void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
101{
102 ulong old_msr = vcpu->arch.shared->msr;
103
104#ifdef EXIT_DEBUG
105 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
106#endif
107
108 msr &= to_book3s(vcpu)->msr_mask;
109 vcpu->arch.shared->msr = msr;
110 kvmppc_recalc_shadow_msr(vcpu);
111
112 if (msr & MSR_POW) {
113 if (!vcpu->arch.pending_exceptions) {
114 kvm_vcpu_block(vcpu);
115 vcpu->stat.halt_wakeup++;
116
117 /* Unset POW bit after we woke up */
118 msr &= ~MSR_POW;
119 vcpu->arch.shared->msr = msr;
120 }
121 }
122
123 if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
124 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
125 kvmppc_mmu_flush_segments(vcpu);
126 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
127
128 /* Preload magic page segment when in kernel mode */
129 if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
130 struct kvm_vcpu_arch *a = &vcpu->arch;
131
132 if (msr & MSR_DR)
133 kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
134 else
135 kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
136 }
137 }
138
139 /* Preload FPU if it's enabled */
140 if (vcpu->arch.shared->msr & MSR_FP)
141 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
142}
143
144void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
145{
146 u32 host_pvr;
147
148 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
149 vcpu->arch.pvr = pvr;
150#ifdef CONFIG_PPC_BOOK3S_64
151 if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
152 kvmppc_mmu_book3s_64_init(vcpu);
153 to_book3s(vcpu)->hior = 0xfff00000;
154 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
155 } else
156#endif
157 {
158 kvmppc_mmu_book3s_32_init(vcpu);
159 to_book3s(vcpu)->hior = 0;
160 to_book3s(vcpu)->msr_mask = 0xffffffffULL;
161 }
162
163 /* If we are in hypervisor level on 970, we can tell the CPU to
164 * treat DCBZ as 32 bytes store */
165 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
166 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
167 !strcmp(cur_cpu_spec->platform, "ppc970"))
168 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
169
170 /* Cell performs badly if MSR_FEx are set. So let's hope nobody
171 really needs them in a VM on Cell and force disable them. */
172 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
173 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
174
175#ifdef CONFIG_PPC_BOOK3S_32
176 /* 32 bit Book3S always has 32 byte dcbz */
177 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
178#endif
179
180 /* On some CPUs we can execute paired single operations natively */
181 asm ( "mfpvr %0" : "=r"(host_pvr));
182 switch (host_pvr) {
183 case 0x00080200: /* lonestar 2.0 */
184 case 0x00088202: /* lonestar 2.2 */
185 case 0x70000100: /* gekko 1.0 */
186 case 0x00080100: /* gekko 2.0 */
187 case 0x00083203: /* gekko 2.3a */
188 case 0x00083213: /* gekko 2.3b */
189 case 0x00083204: /* gekko 2.4 */
190 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
191 case 0x00087200: /* broadway */
192 vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
193 /* Enable HID2.PSE - in case we need it later */
194 mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
195 }
196}
197
198/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
199 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
200 * emulate 32 bytes dcbz length.
201 *
202 * The Book3s_64 inventors also realized this case and implemented a special bit
203 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
204 *
205 * My approach here is to patch the dcbz instruction on executing pages.
206 */
207static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
208{
209 struct page *hpage;
210 u64 hpage_offset;
211 u32 *page;
212 int i;
213
214 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
215 if (is_error_page(hpage)) {
216 kvm_release_page_clean(hpage);
217 return;
218 }
219
220 hpage_offset = pte->raddr & ~PAGE_MASK;
221 hpage_offset &= ~0xFFFULL;
222 hpage_offset /= 4;
223
224 get_page(hpage);
225 page = kmap_atomic(hpage, KM_USER0);
226
227 /* patch dcbz into reserved instruction, so we trap */
228 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
229 if ((page[i] & 0xff0007ff) == INS_DCBZ)
230 page[i] &= 0xfffffff7;
231
232 kunmap_atomic(page, KM_USER0);
233 put_page(hpage);
234}
235
236static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
237{
238 ulong mp_pa = vcpu->arch.magic_page_pa;
239
240 if (unlikely(mp_pa) &&
241 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
242 return 1;
243 }
244
245 return kvm_is_visible_gfn(vcpu->kvm, gfn);
246}
247
248int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
249 ulong eaddr, int vec)
250{
251 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
252 int r = RESUME_GUEST;
253 int relocated;
254 int page_found = 0;
255 struct kvmppc_pte pte;
256 bool is_mmio = false;
257 bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
258 bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
259 u64 vsid;
260
261 relocated = data ? dr : ir;
262
263 /* Resolve real address if translation turned on */
264 if (relocated) {
265 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
266 } else {
267 pte.may_execute = true;
268 pte.may_read = true;
269 pte.may_write = true;
270 pte.raddr = eaddr & KVM_PAM;
271 pte.eaddr = eaddr;
272 pte.vpage = eaddr >> 12;
273 }
274
275 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
276 case 0:
277 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
278 break;
279 case MSR_DR:
280 case MSR_IR:
281 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
282
283 if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
284 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
285 else
286 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
287 pte.vpage |= vsid;
288
289 if (vsid == -1)
290 page_found = -EINVAL;
291 break;
292 }
293
294 if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
295 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
296 /*
297 * If we do the dcbz hack, we have to NX on every execution,
298 * so we can patch the executing code. This renders our guest
299 * NX-less.
300 */
301 pte.may_execute = !data;
302 }
303
304 if (page_found == -ENOENT) {
305 /* Page not found in guest PTE entries */
306 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
307 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
308 vcpu->arch.shared->msr |=
309 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
310 kvmppc_book3s_queue_irqprio(vcpu, vec);
311 } else if (page_found == -EPERM) {
312 /* Storage protection */
313 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
314 vcpu->arch.shared->dsisr =
315 to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
316 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
317 vcpu->arch.shared->msr |=
318 (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
319 kvmppc_book3s_queue_irqprio(vcpu, vec);
320 } else if (page_found == -EINVAL) {
321 /* Page not found in guest SLB */
322 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
323 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
324 } else if (!is_mmio &&
325 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
326 /* The guest's PTE is not mapped yet. Map on the host */
327 kvmppc_mmu_map_page(vcpu, &pte);
328 if (data)
329 vcpu->stat.sp_storage++;
330 else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
331 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
332 kvmppc_patch_dcbz(vcpu, &pte);
333 } else {
334 /* MMIO */
335 vcpu->stat.mmio_exits++;
336 vcpu->arch.paddr_accessed = pte.raddr;
337 r = kvmppc_emulate_mmio(run, vcpu);
338 if ( r == RESUME_HOST_NV )
339 r = RESUME_HOST;
340 }
341
342 return r;
343}
344
345static inline int get_fpr_index(int i)
346{
347#ifdef CONFIG_VSX
348 i *= 2;
349#endif
350 return i;
351}
352
353/* Give up external provider (FPU, Altivec, VSX) */
354void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
355{
356 struct thread_struct *t = &current->thread;
357 u64 *vcpu_fpr = vcpu->arch.fpr;
358#ifdef CONFIG_VSX
359 u64 *vcpu_vsx = vcpu->arch.vsr;
360#endif
361 u64 *thread_fpr = (u64*)t->fpr;
362 int i;
363
364 if (!(vcpu->arch.guest_owned_ext & msr))
365 return;
366
367#ifdef DEBUG_EXT
368 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
369#endif
370
371 switch (msr) {
372 case MSR_FP:
373 giveup_fpu(current);
374 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
375 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
376
377 vcpu->arch.fpscr = t->fpscr.val;
378 break;
379 case MSR_VEC:
380#ifdef CONFIG_ALTIVEC
381 giveup_altivec(current);
382 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
383 vcpu->arch.vscr = t->vscr;
384#endif
385 break;
386 case MSR_VSX:
387#ifdef CONFIG_VSX
388 __giveup_vsx(current);
389 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
390 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
391#endif
392 break;
393 default:
394 BUG();
395 }
396
397 vcpu->arch.guest_owned_ext &= ~msr;
398 current->thread.regs->msr &= ~msr;
399 kvmppc_recalc_shadow_msr(vcpu);
400}
401
402static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
403{
404 ulong srr0 = kvmppc_get_pc(vcpu);
405 u32 last_inst = kvmppc_get_last_inst(vcpu);
406 int ret;
407
408 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
409 if (ret == -ENOENT) {
410 ulong msr = vcpu->arch.shared->msr;
411
412 msr = kvmppc_set_field(msr, 33, 33, 1);
413 msr = kvmppc_set_field(msr, 34, 36, 0);
414 vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
415 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
416 return EMULATE_AGAIN;
417 }
418
419 return EMULATE_DONE;
420}
421
422static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
423{
424
425 /* Need to do paired single emulation? */
426 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
427 return EMULATE_DONE;
428
429 /* Read out the instruction */
430 if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
431 /* Need to emulate */
432 return EMULATE_FAIL;
433
434 return EMULATE_AGAIN;
435}
436
437/* Handle external providers (FPU, Altivec, VSX) */
438static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
439 ulong msr)
440{
441 struct thread_struct *t = &current->thread;
442 u64 *vcpu_fpr = vcpu->arch.fpr;
443#ifdef CONFIG_VSX
444 u64 *vcpu_vsx = vcpu->arch.vsr;
445#endif
446 u64 *thread_fpr = (u64*)t->fpr;
447 int i;
448
449 /* When we have paired singles, we emulate in software */
450 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
451 return RESUME_GUEST;
452
453 if (!(vcpu->arch.shared->msr & msr)) {
454 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
455 return RESUME_GUEST;
456 }
457
458 /* We already own the ext */
459 if (vcpu->arch.guest_owned_ext & msr) {
460 return RESUME_GUEST;
461 }
462
463#ifdef DEBUG_EXT
464 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
465#endif
466
467 current->thread.regs->msr |= msr;
468
469 switch (msr) {
470 case MSR_FP:
471 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
472 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
473
474 t->fpscr.val = vcpu->arch.fpscr;
475 t->fpexc_mode = 0;
476 kvmppc_load_up_fpu();
477 break;
478 case MSR_VEC:
479#ifdef CONFIG_ALTIVEC
480 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
481 t->vscr = vcpu->arch.vscr;
482 t->vrsave = -1;
483 kvmppc_load_up_altivec();
484#endif
485 break;
486 case MSR_VSX:
487#ifdef CONFIG_VSX
488 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
489 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
490 kvmppc_load_up_vsx();
491#endif
492 break;
493 default:
494 BUG();
495 }
496
497 vcpu->arch.guest_owned_ext |= msr;
498
499 kvmppc_recalc_shadow_msr(vcpu);
500
501 return RESUME_GUEST;
502}
503
504int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
505 unsigned int exit_nr)
506{
507 int r = RESUME_HOST;
508
509 vcpu->stat.sum_exits++;
510
511 run->exit_reason = KVM_EXIT_UNKNOWN;
512 run->ready_for_interrupt_injection = 1;
513
514 trace_kvm_book3s_exit(exit_nr, vcpu);
515 kvm_resched(vcpu);
516 switch (exit_nr) {
517 case BOOK3S_INTERRUPT_INST_STORAGE:
518 vcpu->stat.pf_instruc++;
519
520#ifdef CONFIG_PPC_BOOK3S_32
521 /* We set segments as unused segments when invalidating them. So
522 * treat the respective fault as segment fault. */
523 if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
524 == SR_INVALID) {
525 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
526 r = RESUME_GUEST;
527 break;
528 }
529#endif
530
531 /* only care about PTEG not found errors, but leave NX alone */
532 if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
533 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
534 vcpu->stat.sp_instruc++;
535 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
536 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
537 /*
538 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
539 * so we can't use the NX bit inside the guest. Let's cross our fingers,
540 * that no guest that needs the dcbz hack does NX.
541 */
542 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
543 r = RESUME_GUEST;
544 } else {
545 vcpu->arch.shared->msr |=
546 to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
547 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
548 r = RESUME_GUEST;
549 }
550 break;
551 case BOOK3S_INTERRUPT_DATA_STORAGE:
552 {
553 ulong dar = kvmppc_get_fault_dar(vcpu);
554 vcpu->stat.pf_storage++;
555
556#ifdef CONFIG_PPC_BOOK3S_32
557 /* We set segments as unused segments when invalidating them. So
558 * treat the respective fault as segment fault. */
559 if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
560 kvmppc_mmu_map_segment(vcpu, dar);
561 r = RESUME_GUEST;
562 break;
563 }
564#endif
565
566 /* The only case we need to handle is missing shadow PTEs */
567 if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
568 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
569 } else {
570 vcpu->arch.shared->dar = dar;
571 vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
572 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
573 r = RESUME_GUEST;
574 }
575 break;
576 }
577 case BOOK3S_INTERRUPT_DATA_SEGMENT:
578 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
579 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
580 kvmppc_book3s_queue_irqprio(vcpu,
581 BOOK3S_INTERRUPT_DATA_SEGMENT);
582 }
583 r = RESUME_GUEST;
584 break;
585 case BOOK3S_INTERRUPT_INST_SEGMENT:
586 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
587 kvmppc_book3s_queue_irqprio(vcpu,
588 BOOK3S_INTERRUPT_INST_SEGMENT);
589 }
590 r = RESUME_GUEST;
591 break;
592 /* We're good on these - the host merely wanted to get our attention */
593 case BOOK3S_INTERRUPT_DECREMENTER:
594 vcpu->stat.dec_exits++;
595 r = RESUME_GUEST;
596 break;
597 case BOOK3S_INTERRUPT_EXTERNAL:
598 vcpu->stat.ext_intr_exits++;
599 r = RESUME_GUEST;
600 break;
601 case BOOK3S_INTERRUPT_PERFMON:
602 r = RESUME_GUEST;
603 break;
604 case BOOK3S_INTERRUPT_PROGRAM:
605 {
606 enum emulation_result er;
607 ulong flags;
608
609program_interrupt:
610 flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
611
612 if (vcpu->arch.shared->msr & MSR_PR) {
613#ifdef EXIT_DEBUG
614 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
615#endif
616 if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
617 (INS_DCBZ & 0xfffffff7)) {
618 kvmppc_core_queue_program(vcpu, flags);
619 r = RESUME_GUEST;
620 break;
621 }
622 }
623
624 vcpu->stat.emulated_inst_exits++;
625 er = kvmppc_emulate_instruction(run, vcpu);
626 switch (er) {
627 case EMULATE_DONE:
628 r = RESUME_GUEST_NV;
629 break;
630 case EMULATE_AGAIN:
631 r = RESUME_GUEST;
632 break;
633 case EMULATE_FAIL:
634 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
635 __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
636 kvmppc_core_queue_program(vcpu, flags);
637 r = RESUME_GUEST;
638 break;
639 case EMULATE_DO_MMIO:
640 run->exit_reason = KVM_EXIT_MMIO;
641 r = RESUME_HOST_NV;
642 break;
643 default:
644 BUG();
645 }
646 break;
647 }
648 case BOOK3S_INTERRUPT_SYSCALL:
649 if (vcpu->arch.osi_enabled &&
650 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
651 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
652 /* MOL hypercalls */
653 u64 *gprs = run->osi.gprs;
654 int i;
655
656 run->exit_reason = KVM_EXIT_OSI;
657 for (i = 0; i < 32; i++)
658 gprs[i] = kvmppc_get_gpr(vcpu, i);
659 vcpu->arch.osi_needed = 1;
660 r = RESUME_HOST_NV;
661 } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
662 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
663 /* KVM PV hypercalls */
664 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
665 r = RESUME_GUEST;
666 } else {
667 /* Guest syscalls */
668 vcpu->stat.syscall_exits++;
669 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
670 r = RESUME_GUEST;
671 }
672 break;
673 case BOOK3S_INTERRUPT_FP_UNAVAIL:
674 case BOOK3S_INTERRUPT_ALTIVEC:
675 case BOOK3S_INTERRUPT_VSX:
676 {
677 int ext_msr = 0;
678
679 switch (exit_nr) {
680 case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
681 case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
682 case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
683 }
684
685 switch (kvmppc_check_ext(vcpu, exit_nr)) {
686 case EMULATE_DONE:
687 /* everything ok - let's enable the ext */
688 r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
689 break;
690 case EMULATE_FAIL:
691 /* we need to emulate this instruction */
692 goto program_interrupt;
693 break;
694 default:
695 /* nothing to worry about - go again */
696 break;
697 }
698 break;
699 }
700 case BOOK3S_INTERRUPT_ALIGNMENT:
701 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
702 vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
703 kvmppc_get_last_inst(vcpu));
704 vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
705 kvmppc_get_last_inst(vcpu));
706 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
707 }
708 r = RESUME_GUEST;
709 break;
710 case BOOK3S_INTERRUPT_MACHINE_CHECK:
711 case BOOK3S_INTERRUPT_TRACE:
712 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
713 r = RESUME_GUEST;
714 break;
715 default:
716 /* Ugh - bork here! What did we get? */
717 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
718 exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
719 r = RESUME_HOST;
720 BUG();
721 break;
722 }
723
724
725 if (!(r & RESUME_HOST)) {
726 /* To avoid clobbering exit_reason, only check for signals if
727 * we aren't already exiting to userspace for some other
728 * reason. */
729 if (signal_pending(current)) {
730#ifdef EXIT_DEBUG
731 printk(KERN_EMERG "KVM: Going back to host\n");
732#endif
733 vcpu->stat.signal_exits++;
734 run->exit_reason = KVM_EXIT_INTR;
735 r = -EINTR;
736 } else {
737 /* In case an interrupt came in that was triggered
738 * from userspace (like DEC), we need to check what
739 * to inject now! */
740 kvmppc_core_deliver_interrupts(vcpu);
741 }
742 }
743
744 trace_kvm_book3s_reenter(r, vcpu);
745
746 return r;
747}
748
749int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
750 struct kvm_sregs *sregs)
751{
752 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
753 int i;
754
755 sregs->pvr = vcpu->arch.pvr;
756
757 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
758 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
759 for (i = 0; i < 64; i++) {
760 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
761 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
762 }
763 } else {
764 for (i = 0; i < 16; i++)
765 sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
766
767 for (i = 0; i < 8; i++) {
768 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
769 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
770 }
771 }
772
773 return 0;
774}
775
776int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
777 struct kvm_sregs *sregs)
778{
779 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
780 int i;
781
782 kvmppc_set_pvr(vcpu, sregs->pvr);
783
784 vcpu3s->sdr1 = sregs->u.s.sdr1;
785 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
786 for (i = 0; i < 64; i++) {
787 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
788 sregs->u.s.ppc64.slb[i].slbe);
789 }
790 } else {
791 for (i = 0; i < 16; i++) {
792 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
793 }
794 for (i = 0; i < 8; i++) {
795 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
796 (u32)sregs->u.s.ppc32.ibat[i]);
797 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
798 (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
799 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
800 (u32)sregs->u.s.ppc32.dbat[i]);
801 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
802 (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
803 }
804 }
805
806 /* Flush the MMU after messing with the segments */
807 kvmppc_mmu_pte_flush(vcpu, 0, 0);
808
809 return 0;
810}
811
812int kvmppc_core_check_processor_compat(void)
813{
814 return 0;
815}
816
817struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
818{
819 struct kvmppc_vcpu_book3s *vcpu_book3s;
820 struct kvm_vcpu *vcpu;
821 int err = -ENOMEM;
822 unsigned long p;
823
824 vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
825 if (!vcpu_book3s)
826 goto out;
827
828 vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
829 kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
830 if (!vcpu_book3s->shadow_vcpu)
831 goto free_vcpu;
832
833 vcpu = &vcpu_book3s->vcpu;
834 err = kvm_vcpu_init(vcpu, kvm, id);
835 if (err)
836 goto free_shadow_vcpu;
837
838 p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
839 /* the real shared page fills the last 4k of our page */
840 vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
841 if (!p)
842 goto uninit_vcpu;
843
844 vcpu->arch.host_retip = kvm_return_point;
845 vcpu->arch.host_msr = mfmsr();
846#ifdef CONFIG_PPC_BOOK3S_64
847 /* default to book3s_64 (970fx) */
848 vcpu->arch.pvr = 0x3C0301;
849#else
850 /* default to book3s_32 (750) */
851 vcpu->arch.pvr = 0x84202;
852#endif
853 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
854 vcpu->arch.slb_nr = 64;
855
856 /* remember where some real-mode handlers are */
857 vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
858 vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
859 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
860#ifdef CONFIG_PPC_BOOK3S_64
861 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
862#else
863 vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
864#endif
865
866 vcpu->arch.shadow_msr = MSR_USER64;
867
868 err = kvmppc_mmu_init(vcpu);
869 if (err < 0)
870 goto uninit_vcpu;
871
872 return vcpu;
873
874uninit_vcpu:
875 kvm_vcpu_uninit(vcpu);
876free_shadow_vcpu:
877 kfree(vcpu_book3s->shadow_vcpu);
878free_vcpu:
879 vfree(vcpu_book3s);
880out:
881 return ERR_PTR(err);
882}
883
884void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
885{
886 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
887
888 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
889 kvm_vcpu_uninit(vcpu);
890 kfree(vcpu_book3s->shadow_vcpu);
891 vfree(vcpu_book3s);
892}
893
894int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
895{
896 int ret;
897 double fpr[32][TS_FPRWIDTH];
898 unsigned int fpscr;
899 int fpexc_mode;
900#ifdef CONFIG_ALTIVEC
901 vector128 vr[32];
902 vector128 vscr;
903 unsigned long uninitialized_var(vrsave);
904 int used_vr;
905#endif
906#ifdef CONFIG_VSX
907 int used_vsr;
908#endif
909 ulong ext_msr;
910
911 /* No need to go into the guest when all we do is going out */
912 if (signal_pending(current)) {
913 kvm_run->exit_reason = KVM_EXIT_INTR;
914 return -EINTR;
915 }
916
917 /* Save FPU state in stack */
918 if (current->thread.regs->msr & MSR_FP)
919 giveup_fpu(current);
920 memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
921 fpscr = current->thread.fpscr.val;
922 fpexc_mode = current->thread.fpexc_mode;
923
924#ifdef CONFIG_ALTIVEC
925 /* Save Altivec state in stack */
926 used_vr = current->thread.used_vr;
927 if (used_vr) {
928 if (current->thread.regs->msr & MSR_VEC)
929 giveup_altivec(current);
930 memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
931 vscr = current->thread.vscr;
932 vrsave = current->thread.vrsave;
933 }
934#endif
935
936#ifdef CONFIG_VSX
937 /* Save VSX state in stack */
938 used_vsr = current->thread.used_vsr;
939 if (used_vsr && (current->thread.regs->msr & MSR_VSX))
940 __giveup_vsx(current);
941#endif
942
943 /* Remember the MSR with disabled extensions */
944 ext_msr = current->thread.regs->msr;
945
946 /* Preload FPU if it's enabled */
947 if (vcpu->arch.shared->msr & MSR_FP)
948 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
949
950 kvm_guest_enter();
951
952 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
953
954 kvm_guest_exit();
955
956 local_irq_disable();
957
958 current->thread.regs->msr = ext_msr;
959
960 /* Make sure we save the guest FPU/Altivec/VSX state */
961 kvmppc_giveup_ext(vcpu, MSR_FP);
962 kvmppc_giveup_ext(vcpu, MSR_VEC);
963 kvmppc_giveup_ext(vcpu, MSR_VSX);
964
965 /* Restore FPU state from stack */
966 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
967 current->thread.fpscr.val = fpscr;
968 current->thread.fpexc_mode = fpexc_mode;
969
970#ifdef CONFIG_ALTIVEC
971 /* Restore Altivec state from stack */
972 if (used_vr && current->thread.used_vr) {
973 memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
974 current->thread.vscr = vscr;
975 current->thread.vrsave = vrsave;
976 }
977 current->thread.used_vr = used_vr;
978#endif
979
980#ifdef CONFIG_VSX
981 current->thread.used_vsr = used_vsr;
982#endif
983
984 return ret;
985}
986
987int kvmppc_core_prepare_memory_region(struct kvm *kvm,
988 struct kvm_userspace_memory_region *mem)
989{
990 return 0;
991}
992
993void kvmppc_core_commit_memory_region(struct kvm *kvm,
994 struct kvm_userspace_memory_region *mem)
995{
996}
997
998int kvmppc_core_init_vm(struct kvm *kvm)
999{
1000 return 0;
1001}
1002
1003void kvmppc_core_destroy_vm(struct kvm *kvm)
1004{
1005}
1006
1007static int kvmppc_book3s_init(void)
1008{
1009 int r;
1010
1011 r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
1012 THIS_MODULE);
1013
1014 if (r)
1015 return r;
1016
1017 r = kvmppc_mmu_hpte_sysinit();
1018
1019 return r;
1020}
1021
1022static void kvmppc_book3s_exit(void)
1023{
1024 kvmppc_mmu_hpte_sysexit();
1025 kvm_exit();
1026}
1027
1028module_init(kvmppc_book3s_init);
1029module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 1a1b34487e71..c1f877c4a884 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -36,41 +36,44 @@
36#if defined(CONFIG_PPC_BOOK3S_64) 36#if defined(CONFIG_PPC_BOOK3S_64)
37 37
38#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg) 38#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg)
39#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
40#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) 39#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
41#define FUNC(name) GLUE(.,name) 40#define FUNC(name) GLUE(.,name)
42 41
42kvmppc_skip_interrupt:
43 /*
44 * Here all GPRs are unchanged from when the interrupt happened
45 * except for r13, which is saved in SPRG_SCRATCH0.
46 */
47 mfspr r13, SPRN_SRR0
48 addi r13, r13, 4
49 mtspr SPRN_SRR0, r13
50 GET_SCRATCH0(r13)
51 rfid
52 b .
53
54kvmppc_skip_Hinterrupt:
55 /*
56 * Here all GPRs are unchanged from when the interrupt happened
57 * except for r13, which is saved in SPRG_SCRATCH0.
58 */
59 mfspr r13, SPRN_HSRR0
60 addi r13, r13, 4
61 mtspr SPRN_HSRR0, r13
62 GET_SCRATCH0(r13)
63 hrfid
64 b .
65
43#elif defined(CONFIG_PPC_BOOK3S_32) 66#elif defined(CONFIG_PPC_BOOK3S_32)
44 67
45#define LOAD_SHADOW_VCPU(reg) \
46 mfspr reg, SPRN_SPRG_THREAD; \
47 lwz reg, THREAD_KVM_SVCPU(reg); \
48 /* PPC32 can have a NULL pointer - let's check for that */ \
49 mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \
50 mfcr r12; \
51 cmpwi reg, 0; \
52 bne 1f; \
53 mfspr reg, SPRN_SPRG_SCRATCH0; \
54 mtcr r12; \
55 mfspr r12, SPRN_SPRG_SCRATCH1; \
56 b kvmppc_resume_\intno; \
571:; \
58 mtcr r12; \
59 mfspr r12, SPRN_SPRG_SCRATCH1; \
60 tophys(reg, reg)
61
62#define SHADOW_VCPU_OFF 0
63#define MSR_NOIRQ MSR_KERNEL 68#define MSR_NOIRQ MSR_KERNEL
64#define FUNC(name) name 69#define FUNC(name) name
65 70
66#endif
67
68.macro INTERRUPT_TRAMPOLINE intno 71.macro INTERRUPT_TRAMPOLINE intno
69 72
70.global kvmppc_trampoline_\intno 73.global kvmppc_trampoline_\intno
71kvmppc_trampoline_\intno: 74kvmppc_trampoline_\intno:
72 75
73 SET_SCRATCH0(r13) /* Save r13 */ 76 mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */
74 77
75 /* 78 /*
76 * First thing to do is to find out if we're coming 79 * First thing to do is to find out if we're coming
@@ -78,19 +81,28 @@ kvmppc_trampoline_\intno:
78 * 81 *
79 * To distinguish, we check a magic byte in the PACA/current 82 * To distinguish, we check a magic byte in the PACA/current
80 */ 83 */
81 LOAD_SHADOW_VCPU(r13) 84 mfspr r13, SPRN_SPRG_THREAD
82 PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) 85 lwz r13, THREAD_KVM_SVCPU(r13)
86 /* PPC32 can have a NULL pointer - let's check for that */
87 mtspr SPRN_SPRG_SCRATCH1, r12 /* Save r12 */
83 mfcr r12 88 mfcr r12
84 stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 89 cmpwi r13, 0
85 lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) 90 bne 1f
912: mtcr r12
92 mfspr r12, SPRN_SPRG_SCRATCH1
93 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
94 b kvmppc_resume_\intno /* Get back original handler */
95
961: tophys(r13, r13)
97 stw r12, HSTATE_SCRATCH1(r13)
98 mfspr r12, SPRN_SPRG_SCRATCH1
99 stw r12, HSTATE_SCRATCH0(r13)
100 lbz r12, HSTATE_IN_GUEST(r13)
86 cmpwi r12, KVM_GUEST_MODE_NONE 101 cmpwi r12, KVM_GUEST_MODE_NONE
87 bne ..kvmppc_handler_hasmagic_\intno 102 bne ..kvmppc_handler_hasmagic_\intno
88 /* No KVM guest? Then jump back to the Linux handler! */ 103 /* No KVM guest? Then jump back to the Linux handler! */
89 lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 104 lwz r12, HSTATE_SCRATCH1(r13)
90 mtcr r12 105 b 2b
91 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
92 GET_SCRATCH0(r13) /* r13 = original r13 */
93 b kvmppc_resume_\intno /* Get back original handler */
94 106
95 /* Now we know we're handling a KVM guest */ 107 /* Now we know we're handling a KVM guest */
96..kvmppc_handler_hasmagic_\intno: 108..kvmppc_handler_hasmagic_\intno:
@@ -112,9 +124,6 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK
112INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE 124INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE
113INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE 125INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE
114INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL 126INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL
115#ifdef CONFIG_PPC_BOOK3S_64
116INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL_HV
117#endif
118INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT 127INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT
119INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM 128INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM
120INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL 129INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL
@@ -124,14 +133,6 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE
124INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON 133INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON
125INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC 134INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
126 135
127/* Those are only available on 64 bit machines */
128
129#ifdef CONFIG_PPC_BOOK3S_64
130INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT
131INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT
132INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
133#endif
134
135/* 136/*
136 * Bring us back to the faulting code, but skip the 137 * Bring us back to the faulting code, but skip the
137 * faulting instruction. 138 * faulting instruction.
@@ -143,8 +144,8 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
143 * 144 *
144 * R12 = free 145 * R12 = free
145 * R13 = Shadow VCPU (PACA) 146 * R13 = Shadow VCPU (PACA)
146 * SVCPU.SCRATCH0 = guest R12 147 * HSTATE.SCRATCH0 = guest R12
147 * SVCPU.SCRATCH1 = guest CR 148 * HSTATE.SCRATCH1 = guest CR
148 * SPRG_SCRATCH0 = guest R13 149 * SPRG_SCRATCH0 = guest R13
149 * 150 *
150 */ 151 */
@@ -156,13 +157,14 @@ kvmppc_handler_skip_ins:
156 mtsrr0 r12 157 mtsrr0 r12
157 158
158 /* Clean up all state */ 159 /* Clean up all state */
159 lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 160 lwz r12, HSTATE_SCRATCH1(r13)
160 mtcr r12 161 mtcr r12
161 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) 162 PPC_LL r12, HSTATE_SCRATCH0(r13)
162 GET_SCRATCH0(r13) 163 GET_SCRATCH0(r13)
163 164
164 /* And get back into the code */ 165 /* And get back into the code */
165 RFI 166 RFI
167#endif
166 168
167/* 169/*
168 * This trampoline brings us back to a real mode handler 170 * This trampoline brings us back to a real mode handler
@@ -251,12 +253,4 @@ define_load_up(altivec)
251define_load_up(vsx) 253define_load_up(vsx)
252#endif 254#endif
253 255
254.global kvmppc_trampoline_lowmem
255kvmppc_trampoline_lowmem:
256 PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
257
258.global kvmppc_trampoline_enter
259kvmppc_trampoline_enter:
260 PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
261
262#include "book3s_segment.S" 256#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 451264274b8c..aed32e517212 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -22,7 +22,7 @@
22#if defined(CONFIG_PPC_BOOK3S_64) 22#if defined(CONFIG_PPC_BOOK3S_64)
23 23
24#define GET_SHADOW_VCPU(reg) \ 24#define GET_SHADOW_VCPU(reg) \
25 addi reg, r13, PACA_KVM_SVCPU 25 mr reg, r13
26 26
27#elif defined(CONFIG_PPC_BOOK3S_32) 27#elif defined(CONFIG_PPC_BOOK3S_32)
28 28
@@ -71,6 +71,10 @@ kvmppc_handler_trampoline_enter:
71 /* r3 = shadow vcpu */ 71 /* r3 = shadow vcpu */
72 GET_SHADOW_VCPU(r3) 72 GET_SHADOW_VCPU(r3)
73 73
74 /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
75 PPC_STL r1, HSTATE_HOST_R1(r3)
76 PPC_STL r2, HSTATE_HOST_R2(r3)
77
74 /* Move SRR0 and SRR1 into the respective regs */ 78 /* Move SRR0 and SRR1 into the respective regs */
75 PPC_LL r9, SVCPU_PC(r3) 79 PPC_LL r9, SVCPU_PC(r3)
76 mtsrr0 r9 80 mtsrr0 r9
@@ -78,36 +82,36 @@ kvmppc_handler_trampoline_enter:
78 82
79 /* Activate guest mode, so faults get handled by KVM */ 83 /* Activate guest mode, so faults get handled by KVM */
80 li r11, KVM_GUEST_MODE_GUEST 84 li r11, KVM_GUEST_MODE_GUEST
81 stb r11, SVCPU_IN_GUEST(r3) 85 stb r11, HSTATE_IN_GUEST(r3)
82 86
83 /* Switch to guest segment. This is subarch specific. */ 87 /* Switch to guest segment. This is subarch specific. */
84 LOAD_GUEST_SEGMENTS 88 LOAD_GUEST_SEGMENTS
85 89
86 /* Enter guest */ 90 /* Enter guest */
87 91
88 PPC_LL r4, (SVCPU_CTR)(r3) 92 PPC_LL r4, SVCPU_CTR(r3)
89 PPC_LL r5, (SVCPU_LR)(r3) 93 PPC_LL r5, SVCPU_LR(r3)
90 lwz r6, (SVCPU_CR)(r3) 94 lwz r6, SVCPU_CR(r3)
91 lwz r7, (SVCPU_XER)(r3) 95 lwz r7, SVCPU_XER(r3)
92 96
93 mtctr r4 97 mtctr r4
94 mtlr r5 98 mtlr r5
95 mtcr r6 99 mtcr r6
96 mtxer r7 100 mtxer r7
97 101
98 PPC_LL r0, (SVCPU_R0)(r3) 102 PPC_LL r0, SVCPU_R0(r3)
99 PPC_LL r1, (SVCPU_R1)(r3) 103 PPC_LL r1, SVCPU_R1(r3)
100 PPC_LL r2, (SVCPU_R2)(r3) 104 PPC_LL r2, SVCPU_R2(r3)
101 PPC_LL r4, (SVCPU_R4)(r3) 105 PPC_LL r4, SVCPU_R4(r3)
102 PPC_LL r5, (SVCPU_R5)(r3) 106 PPC_LL r5, SVCPU_R5(r3)
103 PPC_LL r6, (SVCPU_R6)(r3) 107 PPC_LL r6, SVCPU_R6(r3)
104 PPC_LL r7, (SVCPU_R7)(r3) 108 PPC_LL r7, SVCPU_R7(r3)
105 PPC_LL r8, (SVCPU_R8)(r3) 109 PPC_LL r8, SVCPU_R8(r3)
106 PPC_LL r9, (SVCPU_R9)(r3) 110 PPC_LL r9, SVCPU_R9(r3)
107 PPC_LL r10, (SVCPU_R10)(r3) 111 PPC_LL r10, SVCPU_R10(r3)
108 PPC_LL r11, (SVCPU_R11)(r3) 112 PPC_LL r11, SVCPU_R11(r3)
109 PPC_LL r12, (SVCPU_R12)(r3) 113 PPC_LL r12, SVCPU_R12(r3)
110 PPC_LL r13, (SVCPU_R13)(r3) 114 PPC_LL r13, SVCPU_R13(r3)
111 115
112 PPC_LL r3, (SVCPU_R3)(r3) 116 PPC_LL r3, (SVCPU_R3)(r3)
113 117
@@ -125,56 +129,63 @@ kvmppc_handler_trampoline_enter_end:
125.global kvmppc_handler_trampoline_exit 129.global kvmppc_handler_trampoline_exit
126kvmppc_handler_trampoline_exit: 130kvmppc_handler_trampoline_exit:
127 131
132.global kvmppc_interrupt
133kvmppc_interrupt:
134
128 /* Register usage at this point: 135 /* Register usage at this point:
129 * 136 *
130 * SPRG_SCRATCH0 = guest R13 137 * SPRG_SCRATCH0 = guest R13
131 * R12 = exit handler id 138 * R12 = exit handler id
132 * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] 139 * R13 = shadow vcpu (32-bit) or PACA (64-bit)
133 * SVCPU.SCRATCH0 = guest R12 140 * HSTATE.SCRATCH0 = guest R12
134 * SVCPU.SCRATCH1 = guest CR 141 * HSTATE.SCRATCH1 = guest CR
135 * 142 *
136 */ 143 */
137 144
138 /* Save registers */ 145 /* Save registers */
139 146
140 PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13) 147 PPC_STL r0, SVCPU_R0(r13)
141 PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13) 148 PPC_STL r1, SVCPU_R1(r13)
142 PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13) 149 PPC_STL r2, SVCPU_R2(r13)
143 PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13) 150 PPC_STL r3, SVCPU_R3(r13)
144 PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13) 151 PPC_STL r4, SVCPU_R4(r13)
145 PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13) 152 PPC_STL r5, SVCPU_R5(r13)
146 PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13) 153 PPC_STL r6, SVCPU_R6(r13)
147 PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13) 154 PPC_STL r7, SVCPU_R7(r13)
148 PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13) 155 PPC_STL r8, SVCPU_R8(r13)
149 PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13) 156 PPC_STL r9, SVCPU_R9(r13)
150 PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13) 157 PPC_STL r10, SVCPU_R10(r13)
151 PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13) 158 PPC_STL r11, SVCPU_R11(r13)
152 159
153 /* Restore R1/R2 so we can handle faults */ 160 /* Restore R1/R2 so we can handle faults */
154 PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13) 161 PPC_LL r1, HSTATE_HOST_R1(r13)
155 PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) 162 PPC_LL r2, HSTATE_HOST_R2(r13)
156 163
157 /* Save guest PC and MSR */ 164 /* Save guest PC and MSR */
165#ifdef CONFIG_PPC64
166BEGIN_FTR_SECTION
158 andi. r0,r12,0x2 167 andi. r0,r12,0x2
159 beq 1f 168 beq 1f
160 mfspr r3,SPRN_HSRR0 169 mfspr r3,SPRN_HSRR0
161 mfspr r4,SPRN_HSRR1 170 mfspr r4,SPRN_HSRR1
162 andi. r12,r12,0x3ffd 171 andi. r12,r12,0x3ffd
163 b 2f 172 b 2f
173END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
174#endif
1641: mfsrr0 r3 1751: mfsrr0 r3
165 mfsrr1 r4 176 mfsrr1 r4
1662: 1772:
167 PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13) 178 PPC_STL r3, SVCPU_PC(r13)
168 PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13) 179 PPC_STL r4, SVCPU_SHADOW_SRR1(r13)
169 180
170 /* Get scratch'ed off registers */ 181 /* Get scratch'ed off registers */
171 GET_SCRATCH0(r9) 182 GET_SCRATCH0(r9)
172 PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) 183 PPC_LL r8, HSTATE_SCRATCH0(r13)
173 lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 184 lwz r7, HSTATE_SCRATCH1(r13)
174 185
175 PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13) 186 PPC_STL r9, SVCPU_R13(r13)
176 PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13) 187 PPC_STL r8, SVCPU_R12(r13)
177 stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13) 188 stw r7, SVCPU_CR(r13)
178 189
179 /* Save more register state */ 190 /* Save more register state */
180 191
@@ -184,11 +195,11 @@ kvmppc_handler_trampoline_exit:
184 mfctr r8 195 mfctr r8
185 mflr r9 196 mflr r9
186 197
187 stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13) 198 stw r5, SVCPU_XER(r13)
188 PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13) 199 PPC_STL r6, SVCPU_FAULT_DAR(r13)
189 stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13) 200 stw r7, SVCPU_FAULT_DSISR(r13)
190 PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13) 201 PPC_STL r8, SVCPU_CTR(r13)
191 PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13) 202 PPC_STL r9, SVCPU_LR(r13)
192 203
193 /* 204 /*
194 * In order for us to easily get the last instruction, 205 * In order for us to easily get the last instruction,
@@ -218,7 +229,7 @@ ld_last_inst:
218 /* Set guest mode to 'jump over instruction' so if lwz faults 229 /* Set guest mode to 'jump over instruction' so if lwz faults
219 * we'll just continue at the next IP. */ 230 * we'll just continue at the next IP. */
220 li r9, KVM_GUEST_MODE_SKIP 231 li r9, KVM_GUEST_MODE_SKIP
221 stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) 232 stb r9, HSTATE_IN_GUEST(r13)
222 233
223 /* 1) enable paging for data */ 234 /* 1) enable paging for data */
224 mfmsr r9 235 mfmsr r9
@@ -232,13 +243,13 @@ ld_last_inst:
232 sync 243 sync
233 244
234#endif 245#endif
235 stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13) 246 stw r0, SVCPU_LAST_INST(r13)
236 247
237no_ld_last_inst: 248no_ld_last_inst:
238 249
239 /* Unset guest mode */ 250 /* Unset guest mode */
240 li r9, KVM_GUEST_MODE_NONE 251 li r9, KVM_GUEST_MODE_NONE
241 stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) 252 stb r9, HSTATE_IN_GUEST(r13)
242 253
243 /* Switch back to host MMU */ 254 /* Switch back to host MMU */
244 LOAD_HOST_SEGMENTS 255 LOAD_HOST_SEGMENTS
@@ -248,7 +259,7 @@ no_ld_last_inst:
248 * R1 = host R1 259 * R1 = host R1
249 * R2 = host R2 260 * R2 = host R2
250 * R12 = exit handler id 261 * R12 = exit handler id
251 * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] 262 * R13 = shadow vcpu (32-bit) or PACA (64-bit)
252 * SVCPU.* = guest * 263 * SVCPU.* = guest *
253 * 264 *
254 */ 265 */
@@ -258,7 +269,7 @@ no_ld_last_inst:
258 ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */ 269 ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
259 mtsrr1 r7 270 mtsrr1 r7
260 /* Load highmem handler address */ 271 /* Load highmem handler address */
261 PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13) 272 PPC_LL r8, HSTATE_VMHANDLER(r13)
262 mtsrr0 r8 273 mtsrr0 r8
263 274
264 RFI 275 RFI
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8462b3a1c1c7..ee45fa01220e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -13,6 +13,7 @@
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2007 15 * Copyright IBM Corp. 2007
16 * Copyright 2010-2011 Freescale Semiconductor, Inc.
16 * 17 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 19 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
@@ -78,6 +79,60 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
78 } 79 }
79} 80}
80 81
82#ifdef CONFIG_SPE
83void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
84{
85 preempt_disable();
86 enable_kernel_spe();
87 kvmppc_save_guest_spe(vcpu);
88 vcpu->arch.shadow_msr &= ~MSR_SPE;
89 preempt_enable();
90}
91
92static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
93{
94 preempt_disable();
95 enable_kernel_spe();
96 kvmppc_load_guest_spe(vcpu);
97 vcpu->arch.shadow_msr |= MSR_SPE;
98 preempt_enable();
99}
100
101static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
102{
103 if (vcpu->arch.shared->msr & MSR_SPE) {
104 if (!(vcpu->arch.shadow_msr & MSR_SPE))
105 kvmppc_vcpu_enable_spe(vcpu);
106 } else if (vcpu->arch.shadow_msr & MSR_SPE) {
107 kvmppc_vcpu_disable_spe(vcpu);
108 }
109}
110#else
111static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
112{
113}
114#endif
115
116/*
117 * Helper function for "full" MSR writes. No need to call this if only
118 * EE/CE/ME/DE/RI are changing.
119 */
120void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
121{
122 u32 old_msr = vcpu->arch.shared->msr;
123
124 vcpu->arch.shared->msr = new_msr;
125
126 kvmppc_mmu_msr_notify(vcpu, old_msr);
127
128 if (vcpu->arch.shared->msr & MSR_WE) {
129 kvm_vcpu_block(vcpu);
130 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
131 };
132
133 kvmppc_vcpu_sync_spe(vcpu);
134}
135
81static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, 136static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
82 unsigned int priority) 137 unsigned int priority)
83{ 138{
@@ -257,6 +312,19 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
257 vcpu->arch.shared->int_pending = 0; 312 vcpu->arch.shared->int_pending = 0;
258} 313}
259 314
315int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
316{
317 int ret;
318
319 local_irq_disable();
320 kvm_guest_enter();
321 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
322 kvm_guest_exit();
323 local_irq_enable();
324
325 return ret;
326}
327
260/** 328/**
261 * kvmppc_handle_exit 329 * kvmppc_handle_exit
262 * 330 *
@@ -344,10 +412,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
344 r = RESUME_GUEST; 412 r = RESUME_GUEST;
345 break; 413 break;
346 414
347 case BOOKE_INTERRUPT_SPE_UNAVAIL: 415#ifdef CONFIG_SPE
348 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL); 416 case BOOKE_INTERRUPT_SPE_UNAVAIL: {
417 if (vcpu->arch.shared->msr & MSR_SPE)
418 kvmppc_vcpu_enable_spe(vcpu);
419 else
420 kvmppc_booke_queue_irqprio(vcpu,
421 BOOKE_IRQPRIO_SPE_UNAVAIL);
349 r = RESUME_GUEST; 422 r = RESUME_GUEST;
350 break; 423 break;
424 }
351 425
352 case BOOKE_INTERRUPT_SPE_FP_DATA: 426 case BOOKE_INTERRUPT_SPE_FP_DATA:
353 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA); 427 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
@@ -358,6 +432,28 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
358 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); 432 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
359 r = RESUME_GUEST; 433 r = RESUME_GUEST;
360 break; 434 break;
435#else
436 case BOOKE_INTERRUPT_SPE_UNAVAIL:
437 /*
438 * Guest wants SPE, but host kernel doesn't support it. Send
439 * an "unimplemented operation" program check to the guest.
440 */
441 kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
442 r = RESUME_GUEST;
443 break;
444
445 /*
446 * These really should never happen without CONFIG_SPE,
447 * as we should never enable the real MSR[SPE] in the guest.
448 */
449 case BOOKE_INTERRUPT_SPE_FP_DATA:
450 case BOOKE_INTERRUPT_SPE_FP_ROUND:
451 printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
452 __func__, exit_nr, vcpu->arch.pc);
453 run->hw.hardware_exit_reason = exit_nr;
454 r = RESUME_HOST;
455 break;
456#endif
361 457
362 case BOOKE_INTERRUPT_DATA_STORAGE: 458 case BOOKE_INTERRUPT_DATA_STORAGE:
363 kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear, 459 kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
@@ -392,6 +488,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
392 gpa_t gpaddr; 488 gpa_t gpaddr;
393 gfn_t gfn; 489 gfn_t gfn;
394 490
491#ifdef CONFIG_KVM_E500
492 if (!(vcpu->arch.shared->msr & MSR_PR) &&
493 (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
494 kvmppc_map_magic(vcpu);
495 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
496 r = RESUME_GUEST;
497
498 break;
499 }
500#endif
501
395 /* Check the guest TLB. */ 502 /* Check the guest TLB. */
396 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); 503 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
397 if (gtlb_index < 0) { 504 if (gtlb_index < 0) {
@@ -514,6 +621,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
514 621
515 vcpu->arch.pc = 0; 622 vcpu->arch.pc = 0;
516 vcpu->arch.shared->msr = 0; 623 vcpu->arch.shared->msr = 0;
624 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
517 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ 625 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
518 626
519 vcpu->arch.shadow_pid = 1; 627 vcpu->arch.shadow_pid = 1;
@@ -770,6 +878,26 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
770 return -ENOTSUPP; 878 return -ENOTSUPP;
771} 879}
772 880
881int kvmppc_core_prepare_memory_region(struct kvm *kvm,
882 struct kvm_userspace_memory_region *mem)
883{
884 return 0;
885}
886
887void kvmppc_core_commit_memory_region(struct kvm *kvm,
888 struct kvm_userspace_memory_region *mem)
889{
890}
891
892int kvmppc_core_init_vm(struct kvm *kvm)
893{
894 return 0;
895}
896
897void kvmppc_core_destroy_vm(struct kvm *kvm)
898{
899}
900
773int __init kvmppc_booke_init(void) 901int __init kvmppc_booke_init(void)
774{ 902{
775 unsigned long ivor[16]; 903 unsigned long ivor[16];
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 492bb7030358..8e1fe33d64e5 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -52,24 +52,19 @@
52 52
53extern unsigned long kvmppc_booke_handlers; 53extern unsigned long kvmppc_booke_handlers;
54 54
55/* Helper function for "full" MSR writes. No need to call this if only EE is 55void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
56 * changing. */ 56void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
57static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
58{
59 if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR))
60 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
61
62 vcpu->arch.shared->msr = new_msr;
63
64 if (vcpu->arch.shared->msr & MSR_WE) {
65 kvm_vcpu_block(vcpu);
66 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
67 };
68}
69 57
70int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 58int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
71 unsigned int inst, int *advance); 59 unsigned int inst, int *advance);
72int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); 60int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
73int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); 61int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
74 62
63/* low-level asm code to transfer guest state */
64void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
65void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
66
67/* high-level function, manages flags, host state */
68void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
69
75#endif /* __KVM_BOOKE_H__ */ 70#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index b58ccae95904..42f2fb1f66e9 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -13,6 +13,7 @@
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2007 15 * Copyright IBM Corp. 2007
16 * Copyright 2011 Freescale Semiconductor, Inc.
16 * 17 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */ 19 */
@@ -24,8 +25,6 @@
24#include <asm/page.h> 25#include <asm/page.h>
25#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
26 27
27#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS)
28
29#define VCPU_GPR(n) (VCPU_GPRS + (n * 4)) 28#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
30 29
31/* The host stack layout: */ 30/* The host stack layout: */
@@ -192,6 +191,12 @@ _GLOBAL(kvmppc_resume_host)
192 lwz r3, VCPU_HOST_PID(r4) 191 lwz r3, VCPU_HOST_PID(r4)
193 mtspr SPRN_PID, r3 192 mtspr SPRN_PID, r3
194 193
194#ifdef CONFIG_FSL_BOOKE
195 /* we cheat and know that Linux doesn't use PID1 which is always 0 */
196 lis r3, 0
197 mtspr SPRN_PID1, r3
198#endif
199
195 /* Restore host IVPR before re-enabling interrupts. We cheat and know 200 /* Restore host IVPR before re-enabling interrupts. We cheat and know
196 * that Linux IVPR is always 0xc0000000. */ 201 * that Linux IVPR is always 0xc0000000. */
197 lis r3, 0xc000 202 lis r3, 0xc000
@@ -241,6 +246,14 @@ _GLOBAL(kvmppc_resume_host)
241heavyweight_exit: 246heavyweight_exit:
242 /* Not returning to guest. */ 247 /* Not returning to guest. */
243 248
249#ifdef CONFIG_SPE
250 /* save guest SPEFSCR and load host SPEFSCR */
251 mfspr r9, SPRN_SPEFSCR
252 stw r9, VCPU_SPEFSCR(r4)
253 lwz r9, VCPU_HOST_SPEFSCR(r4)
254 mtspr SPRN_SPEFSCR, r9
255#endif
256
244 /* We already saved guest volatile register state; now save the 257 /* We already saved guest volatile register state; now save the
245 * non-volatiles. */ 258 * non-volatiles. */
246 stw r15, VCPU_GPR(r15)(r4) 259 stw r15, VCPU_GPR(r15)(r4)
@@ -342,6 +355,14 @@ _GLOBAL(__kvmppc_vcpu_run)
342 lwz r30, VCPU_GPR(r30)(r4) 355 lwz r30, VCPU_GPR(r30)(r4)
343 lwz r31, VCPU_GPR(r31)(r4) 356 lwz r31, VCPU_GPR(r31)(r4)
344 357
358#ifdef CONFIG_SPE
359 /* save host SPEFSCR and load guest SPEFSCR */
360 mfspr r3, SPRN_SPEFSCR
361 stw r3, VCPU_HOST_SPEFSCR(r4)
362 lwz r3, VCPU_SPEFSCR(r4)
363 mtspr SPRN_SPEFSCR, r3
364#endif
365
345lightweight_exit: 366lightweight_exit:
346 stw r2, HOST_R2(r1) 367 stw r2, HOST_R2(r1)
347 368
@@ -350,6 +371,11 @@ lightweight_exit:
350 lwz r3, VCPU_SHADOW_PID(r4) 371 lwz r3, VCPU_SHADOW_PID(r4)
351 mtspr SPRN_PID, r3 372 mtspr SPRN_PID, r3
352 373
374#ifdef CONFIG_FSL_BOOKE
375 lwz r3, VCPU_SHADOW_PID1(r4)
376 mtspr SPRN_PID1, r3
377#endif
378
353#ifdef CONFIG_44x 379#ifdef CONFIG_44x
354 iccci 0, 0 /* XXX hack */ 380 iccci 0, 0 /* XXX hack */
355#endif 381#endif
@@ -405,20 +431,17 @@ lightweight_exit:
405 431
406 /* Finish loading guest volatiles and jump to guest. */ 432 /* Finish loading guest volatiles and jump to guest. */
407 lwz r3, VCPU_CTR(r4) 433 lwz r3, VCPU_CTR(r4)
434 lwz r5, VCPU_CR(r4)
435 lwz r6, VCPU_PC(r4)
436 lwz r7, VCPU_SHADOW_MSR(r4)
408 mtctr r3 437 mtctr r3
409 lwz r3, VCPU_CR(r4) 438 mtcr r5
410 mtcr r3 439 mtsrr0 r6
440 mtsrr1 r7
411 lwz r5, VCPU_GPR(r5)(r4) 441 lwz r5, VCPU_GPR(r5)(r4)
412 lwz r6, VCPU_GPR(r6)(r4) 442 lwz r6, VCPU_GPR(r6)(r4)
413 lwz r7, VCPU_GPR(r7)(r4) 443 lwz r7, VCPU_GPR(r7)(r4)
414 lwz r8, VCPU_GPR(r8)(r4) 444 lwz r8, VCPU_GPR(r8)(r4)
415 lwz r3, VCPU_PC(r4)
416 mtsrr0 r3
417 lwz r3, VCPU_SHARED(r4)
418 lwz r3, (VCPU_SHARED_MSR + 4)(r3)
419 oris r3, r3, KVMPPC_MSR_MASK@h
420 ori r3, r3, KVMPPC_MSR_MASK@l
421 mtsrr1 r3
422 445
423 /* Clear any debug events which occurred since we disabled MSR[DE]. 446 /* Clear any debug events which occurred since we disabled MSR[DE].
424 * XXX This gives us a 3-instruction window in which a breakpoint 447 * XXX This gives us a 3-instruction window in which a breakpoint
@@ -430,3 +453,24 @@ lightweight_exit:
430 lwz r3, VCPU_GPR(r3)(r4) 453 lwz r3, VCPU_GPR(r3)(r4)
431 lwz r4, VCPU_GPR(r4)(r4) 454 lwz r4, VCPU_GPR(r4)(r4)
432 rfi 455 rfi
456
457#ifdef CONFIG_SPE
458_GLOBAL(kvmppc_save_guest_spe)
459 cmpi 0,r3,0
460 beqlr-
461 SAVE_32EVRS(0, r4, r3, VCPU_EVR)
462 evxor evr6, evr6, evr6
463 evmwumiaa evr6, evr6, evr6
464 li r4,VCPU_ACC
465 evstddx evr6, r4, r3 /* save acc */
466 blr
467
468_GLOBAL(kvmppc_load_guest_spe)
469 cmpi 0,r3,0
470 beqlr-
471 li r4,VCPU_ACC
472 evlddx evr6,r4,r3
473 evmra evr6,evr6 /* load acc */
474 REST_32EVRS(0, r4, r3, VCPU_EVR)
475 blr
476#endif
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 318dbc61ba44..797a7447c268 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. 2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 * 3 *
4 * Author: Yu Liu, <yu.liu@freescale.com> 4 * Author: Yu Liu, <yu.liu@freescale.com>
5 * 5 *
@@ -41,6 +41,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
41void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 41void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
42{ 42{
43 kvmppc_e500_tlb_put(vcpu); 43 kvmppc_e500_tlb_put(vcpu);
44
45#ifdef CONFIG_SPE
46 if (vcpu->arch.shadow_msr & MSR_SPE)
47 kvmppc_vcpu_disable_spe(vcpu);
48#endif
44} 49}
45 50
46int kvmppc_core_check_processor_compat(void) 51int kvmppc_core_check_processor_compat(void)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 69cd665a0caf..d48ae396f41e 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -81,8 +81,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
81 kvmppc_set_pid(vcpu, spr_val); 81 kvmppc_set_pid(vcpu, spr_val);
82 break; 82 break;
83 case SPRN_PID1: 83 case SPRN_PID1:
84 if (spr_val != 0)
85 return EMULATE_FAIL;
84 vcpu_e500->pid[1] = spr_val; break; 86 vcpu_e500->pid[1] = spr_val; break;
85 case SPRN_PID2: 87 case SPRN_PID2:
88 if (spr_val != 0)
89 return EMULATE_FAIL;
86 vcpu_e500->pid[2] = spr_val; break; 90 vcpu_e500->pid[2] = spr_val; break;
87 case SPRN_MAS0: 91 case SPRN_MAS0:
88 vcpu_e500->mas0 = spr_val; break; 92 vcpu_e500->mas0 = spr_val; break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index b18fe353397d..13c432ea2fa8 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -28,8 +28,196 @@
28 28
29#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) 29#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
30 30
31struct id {
32 unsigned long val;
33 struct id **pentry;
34};
35
36#define NUM_TIDS 256
37
38/*
39 * This table provide mappings from:
40 * (guestAS,guestTID,guestPR) --> ID of physical cpu
41 * guestAS [0..1]
42 * guestTID [0..255]
43 * guestPR [0..1]
44 * ID [1..255]
45 * Each vcpu keeps one vcpu_id_table.
46 */
47struct vcpu_id_table {
48 struct id id[2][NUM_TIDS][2];
49};
50
51/*
52 * This table provide reversed mappings of vcpu_id_table:
53 * ID --> address of vcpu_id_table item.
54 * Each physical core has one pcpu_id_table.
55 */
56struct pcpu_id_table {
57 struct id *entry[NUM_TIDS];
58};
59
60static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
61
62/* This variable keeps last used shadow ID on local core.
63 * The valid range of shadow ID is [1..255] */
64static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
65
31static unsigned int tlb1_entry_num; 66static unsigned int tlb1_entry_num;
32 67
68/*
69 * Allocate a free shadow id and setup a valid sid mapping in given entry.
70 * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
71 *
72 * The caller must have preemption disabled, and keep it that way until
73 * it has finished with the returned shadow id (either written into the
74 * TLB or arch.shadow_pid, or discarded).
75 */
76static inline int local_sid_setup_one(struct id *entry)
77{
78 unsigned long sid;
79 int ret = -1;
80
81 sid = ++(__get_cpu_var(pcpu_last_used_sid));
82 if (sid < NUM_TIDS) {
83 __get_cpu_var(pcpu_sids).entry[sid] = entry;
84 entry->val = sid;
85 entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
86 ret = sid;
87 }
88
89 /*
90 * If sid == NUM_TIDS, we've run out of sids. We return -1, and
91 * the caller will invalidate everything and start over.
92 *
93 * sid > NUM_TIDS indicates a race, which we disable preemption to
94 * avoid.
95 */
96 WARN_ON(sid > NUM_TIDS);
97
98 return ret;
99}
100
101/*
102 * Check if given entry contain a valid shadow id mapping.
103 * An ID mapping is considered valid only if
104 * both vcpu and pcpu know this mapping.
105 *
106 * The caller must have preemption disabled, and keep it that way until
107 * it has finished with the returned shadow id (either written into the
108 * TLB or arch.shadow_pid, or discarded).
109 */
110static inline int local_sid_lookup(struct id *entry)
111{
112 if (entry && entry->val != 0 &&
113 __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
114 entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
115 return entry->val;
116 return -1;
117}
118
119/* Invalidate all id mappings on local core */
120static inline void local_sid_destroy_all(void)
121{
122 preempt_disable();
123 __get_cpu_var(pcpu_last_used_sid) = 0;
124 memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
125 preempt_enable();
126}
127
128static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
129{
130 vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
131 return vcpu_e500->idt;
132}
133
134static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
135{
136 kfree(vcpu_e500->idt);
137}
138
139/* Invalidate all mappings on vcpu */
140static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
141{
142 memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
143
144 /* Update shadow pid when mappings are changed */
145 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
146}
147
148/* Invalidate one ID mapping on vcpu */
149static inline void kvmppc_e500_id_table_reset_one(
150 struct kvmppc_vcpu_e500 *vcpu_e500,
151 int as, int pid, int pr)
152{
153 struct vcpu_id_table *idt = vcpu_e500->idt;
154
155 BUG_ON(as >= 2);
156 BUG_ON(pid >= NUM_TIDS);
157 BUG_ON(pr >= 2);
158
159 idt->id[as][pid][pr].val = 0;
160 idt->id[as][pid][pr].pentry = NULL;
161
162 /* Update shadow pid when mappings are changed */
163 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
164}
165
166/*
167 * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
168 * This function first lookup if a valid mapping exists,
169 * if not, then creates a new one.
170 *
171 * The caller must have preemption disabled, and keep it that way until
172 * it has finished with the returned shadow id (either written into the
173 * TLB or arch.shadow_pid, or discarded).
174 */
175static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
176 unsigned int as, unsigned int gid,
177 unsigned int pr, int avoid_recursion)
178{
179 struct vcpu_id_table *idt = vcpu_e500->idt;
180 int sid;
181
182 BUG_ON(as >= 2);
183 BUG_ON(gid >= NUM_TIDS);
184 BUG_ON(pr >= 2);
185
186 sid = local_sid_lookup(&idt->id[as][gid][pr]);
187
188 while (sid <= 0) {
189 /* No mapping yet */
190 sid = local_sid_setup_one(&idt->id[as][gid][pr]);
191 if (sid <= 0) {
192 _tlbil_all();
193 local_sid_destroy_all();
194 }
195
196 /* Update shadow pid when mappings are changed */
197 if (!avoid_recursion)
198 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
199 }
200
201 return sid;
202}
203
204/* Map guest pid to shadow.
205 * We use PID to keep shadow of current guest non-zero PID,
206 * and use PID1 to keep shadow of guest zero PID.
207 * So that guest tlbe with TID=0 can be accessed at any time */
208void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
209{
210 preempt_disable();
211 vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
212 get_cur_as(&vcpu_e500->vcpu),
213 get_cur_pid(&vcpu_e500->vcpu),
214 get_cur_pr(&vcpu_e500->vcpu), 1);
215 vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
216 get_cur_as(&vcpu_e500->vcpu), 0,
217 get_cur_pr(&vcpu_e500->vcpu), 1);
218 preempt_enable();
219}
220
33void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) 221void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
34{ 222{
35 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 223 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -41,25 +229,14 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
41 229
42 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 230 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
43 printk("Guest TLB%d:\n", tlbsel); 231 printk("Guest TLB%d:\n", tlbsel);
44 for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { 232 for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
45 tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; 233 tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
46 if (tlbe->mas1 & MAS1_VALID) 234 if (tlbe->mas1 & MAS1_VALID)
47 printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n", 235 printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
48 tlbsel, i, tlbe->mas1, tlbe->mas2, 236 tlbsel, i, tlbe->mas1, tlbe->mas2,
49 tlbe->mas3, tlbe->mas7); 237 tlbe->mas3, tlbe->mas7);
50 } 238 }
51 } 239 }
52
53 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
54 printk("Shadow TLB%d:\n", tlbsel);
55 for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
56 tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
57 if (tlbe->mas1 & MAS1_VALID)
58 printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n",
59 tlbsel, i, tlbe->mas1, tlbe->mas2,
60 tlbe->mas3, tlbe->mas7);
61 }
62 }
63} 240}
64 241
65static inline unsigned int tlb0_get_next_victim( 242static inline unsigned int tlb0_get_next_victim(
@@ -67,16 +244,17 @@ static inline unsigned int tlb0_get_next_victim(
67{ 244{
68 unsigned int victim; 245 unsigned int victim;
69 246
70 victim = vcpu_e500->guest_tlb_nv[0]++; 247 victim = vcpu_e500->gtlb_nv[0]++;
71 if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) 248 if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
72 vcpu_e500->guest_tlb_nv[0] = 0; 249 vcpu_e500->gtlb_nv[0] = 0;
73 250
74 return victim; 251 return victim;
75} 252}
76 253
77static inline unsigned int tlb1_max_shadow_size(void) 254static inline unsigned int tlb1_max_shadow_size(void)
78{ 255{
79 return tlb1_entry_num - tlbcam_index; 256 /* reserve one entry for magic page */
257 return tlb1_entry_num - tlbcam_index - 1;
80} 258}
81 259
82static inline int tlbe_is_writable(struct tlbe *tlbe) 260static inline int tlbe_is_writable(struct tlbe *tlbe)
@@ -112,72 +290,149 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
112/* 290/*
113 * writing shadow tlb entry to host TLB 291 * writing shadow tlb entry to host TLB
114 */ 292 */
115static inline void __write_host_tlbe(struct tlbe *stlbe) 293static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
116{ 294{
295 unsigned long flags;
296
297 local_irq_save(flags);
298 mtspr(SPRN_MAS0, mas0);
117 mtspr(SPRN_MAS1, stlbe->mas1); 299 mtspr(SPRN_MAS1, stlbe->mas1);
118 mtspr(SPRN_MAS2, stlbe->mas2); 300 mtspr(SPRN_MAS2, stlbe->mas2);
119 mtspr(SPRN_MAS3, stlbe->mas3); 301 mtspr(SPRN_MAS3, stlbe->mas3);
120 mtspr(SPRN_MAS7, stlbe->mas7); 302 mtspr(SPRN_MAS7, stlbe->mas7);
121 __asm__ __volatile__ ("tlbwe\n" : : ); 303 asm volatile("isync; tlbwe" : : : "memory");
304 local_irq_restore(flags);
122} 305}
123 306
124static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, 307static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
125 int tlbsel, int esel) 308 int tlbsel, int esel, struct tlbe *stlbe)
126{ 309{
127 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
128
129 local_irq_disable();
130 if (tlbsel == 0) { 310 if (tlbsel == 0) {
131 __write_host_tlbe(stlbe); 311 __write_host_tlbe(stlbe,
312 MAS0_TLBSEL(0) |
313 MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1)));
132 } else { 314 } else {
133 unsigned register mas0; 315 __write_host_tlbe(stlbe,
134 316 MAS0_TLBSEL(1) |
135 mas0 = mfspr(SPRN_MAS0); 317 MAS0_ESEL(to_htlb1_esel(esel)));
136
137 mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
138 __write_host_tlbe(stlbe);
139
140 mtspr(SPRN_MAS0, mas0);
141 } 318 }
142 local_irq_enable(); 319 trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
320 stlbe->mas3, stlbe->mas7);
321}
322
323void kvmppc_map_magic(struct kvm_vcpu *vcpu)
324{
325 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
326 struct tlbe magic;
327 ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
328 unsigned int stid;
329 pfn_t pfn;
330
331 pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
332 get_page(pfn_to_page(pfn));
333
334 preempt_disable();
335 stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0);
336
337 magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
338 MAS1_TSIZE(BOOK3E_PAGESZ_4K);
339 magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
340 magic.mas3 = (pfn << PAGE_SHIFT) |
341 MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
342 magic.mas7 = pfn >> (32 - PAGE_SHIFT);
343
344 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
345 preempt_enable();
143} 346}
144 347
145void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) 348void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
146{ 349{
147 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 350 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
148 int i; 351
149 unsigned register mas0; 352 /* Shadow PID may be expired on local core */
150 353 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
151 /* Load all valid TLB1 entries to reduce guest tlb miss fault */
152 local_irq_disable();
153 mas0 = mfspr(SPRN_MAS0);
154 for (i = 0; i < tlb1_max_shadow_size(); i++) {
155 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
156
157 if (get_tlb_v(stlbe)) {
158 mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
159 | MAS0_ESEL(to_htlb1_esel(i)));
160 __write_host_tlbe(stlbe);
161 }
162 }
163 mtspr(SPRN_MAS0, mas0);
164 local_irq_enable();
165} 354}
166 355
167void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) 356void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
168{ 357{
169 _tlbil_all(); 358}
359
360static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
361 int tlbsel, int esel)
362{
363 struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
364 struct vcpu_id_table *idt = vcpu_e500->idt;
365 unsigned int pr, tid, ts, pid;
366 u32 val, eaddr;
367 unsigned long flags;
368
369 ts = get_tlb_ts(gtlbe);
370 tid = get_tlb_tid(gtlbe);
371
372 preempt_disable();
373
374 /* One guest ID may be mapped to two shadow IDs */
375 for (pr = 0; pr < 2; pr++) {
376 /*
377 * The shadow PID can have a valid mapping on at most one
378 * host CPU. In the common case, it will be valid on this
379 * CPU, in which case (for TLB0) we do a local invalidation
380 * of the specific address.
381 *
382 * If the shadow PID is not valid on the current host CPU, or
383 * if we're invalidating a TLB1 entry, we invalidate the
384 * entire shadow PID.
385 */
386 if (tlbsel == 1 ||
387 (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) {
388 kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
389 continue;
390 }
391
392 /*
393 * The guest is invalidating a TLB0 entry which is in a PID
394 * that has a valid shadow mapping on this host CPU. We
395 * search host TLB0 to invalidate it's shadow TLB entry,
396 * similar to __tlbil_va except that we need to look in AS1.
397 */
398 val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
399 eaddr = get_tlb_eaddr(gtlbe);
400
401 local_irq_save(flags);
402
403 mtspr(SPRN_MAS6, val);
404 asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
405 val = mfspr(SPRN_MAS1);
406 if (val & MAS1_VALID) {
407 mtspr(SPRN_MAS1, val & ~MAS1_VALID);
408 asm volatile("tlbwe");
409 }
410
411 local_irq_restore(flags);
412 }
413
414 preempt_enable();
170} 415}
171 416
172/* Search the guest TLB for a matching entry. */ 417/* Search the guest TLB for a matching entry. */
173static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, 418static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
174 gva_t eaddr, int tlbsel, unsigned int pid, int as) 419 gva_t eaddr, int tlbsel, unsigned int pid, int as)
175{ 420{
421 int size = vcpu_e500->gtlb_size[tlbsel];
422 int set_base;
176 int i; 423 int i;
177 424
178 /* XXX Replace loop with fancy data structures. */ 425 if (tlbsel == 0) {
179 for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { 426 int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
180 struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; 427 set_base = (eaddr >> PAGE_SHIFT) & mask;
428 set_base *= KVM_E500_TLB0_WAY_NUM;
429 size = KVM_E500_TLB0_WAY_NUM;
430 } else {
431 set_base = 0;
432 }
433
434 for (i = 0; i < size; i++) {
435 struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i];
181 unsigned int tid; 436 unsigned int tid;
182 437
183 if (eaddr < get_tlb_eaddr(tlbe)) 438 if (eaddr < get_tlb_eaddr(tlbe))
@@ -196,66 +451,32 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
196 if (get_tlb_ts(tlbe) != as && as != -1) 451 if (get_tlb_ts(tlbe) != as && as != -1)
197 continue; 452 continue;
198 453
199 return i; 454 return set_base + i;
200 } 455 }
201 456
202 return -1; 457 return -1;
203} 458}
204 459
205static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500, 460static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
206 int tlbsel, int esel) 461 struct tlbe *gtlbe,
207{ 462 pfn_t pfn)
208 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
209 struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
210
211 if (page) {
212 vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
213
214 if (get_tlb_v(stlbe)) {
215 if (tlbe_is_writable(stlbe))
216 kvm_release_page_dirty(page);
217 else
218 kvm_release_page_clean(page);
219 }
220 }
221}
222
223static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
224 int tlbsel, int esel)
225{ 463{
226 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; 464 priv->pfn = pfn;
465 priv->flags = E500_TLB_VALID;
227 466
228 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); 467 if (tlbe_is_writable(gtlbe))
229 stlbe->mas1 = 0; 468 priv->flags |= E500_TLB_DIRTY;
230 trace_kvm_stlb_inval(index_of(tlbsel, esel));
231} 469}
232 470
233static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, 471static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv)
234 gva_t eaddr, gva_t eend, u32 tid)
235{ 472{
236 unsigned int pid = tid & 0xff; 473 if (priv->flags & E500_TLB_VALID) {
237 unsigned int i; 474 if (priv->flags & E500_TLB_DIRTY)
238 475 kvm_release_pfn_dirty(priv->pfn);
239 /* XXX Replace loop with fancy data structures. */ 476 else
240 for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) { 477 kvm_release_pfn_clean(priv->pfn);
241 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
242 unsigned int tid;
243
244 if (!get_tlb_v(stlbe))
245 continue;
246
247 if (eend < get_tlb_eaddr(stlbe))
248 continue;
249 478
250 if (eaddr > get_tlb_end(stlbe)) 479 priv->flags = 0;
251 continue;
252
253 tid = get_tlb_tid(stlbe);
254 if (tid && (tid != pid))
255 continue;
256
257 kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
258 write_host_tlbe(vcpu_e500, 1, i);
259 } 480 }
260} 481}
261 482
@@ -273,7 +494,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
273 tsized = (vcpu_e500->mas4 >> 7) & 0x1f; 494 tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
274 495
275 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) 496 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
276 | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); 497 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
277 vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) 498 vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
278 | MAS1_TID(vcpu_e500->pid[pidsel]) 499 | MAS1_TID(vcpu_e500->pid[pidsel])
279 | MAS1_TSIZE(tsized); 500 | MAS1_TSIZE(tsized);
@@ -286,56 +507,154 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
286 vcpu_e500->mas7 = 0; 507 vcpu_e500->mas7 = 0;
287} 508}
288 509
289static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, 510static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
290 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) 511 struct tlbe *gtlbe, int tsize,
512 struct tlbe_priv *priv,
513 u64 gvaddr, struct tlbe *stlbe)
291{ 514{
292 struct page *new_page; 515 pfn_t pfn = priv->pfn;
293 struct tlbe *stlbe; 516 unsigned int stid;
294 hpa_t hpaddr;
295
296 stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
297
298 /* Get reference to new page. */
299 new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
300 if (is_error_page(new_page)) {
301 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n",
302 (long)gfn);
303 kvm_release_page_clean(new_page);
304 return;
305 }
306 hpaddr = page_to_phys(new_page);
307
308 /* Drop reference to old page. */
309 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
310 517
311 vcpu_e500->shadow_pages[tlbsel][esel] = new_page; 518 stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
519 get_tlb_tid(gtlbe),
520 get_cur_pr(&vcpu_e500->vcpu), 0);
312 521
313 /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ 522 /* Force TS=1 IPROT=0 for all guest mappings. */
314 stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) 523 stlbe->mas1 = MAS1_TSIZE(tsize)
315 | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; 524 | MAS1_TID(stid) | MAS1_TS | MAS1_VALID;
316 stlbe->mas2 = (gvaddr & MAS2_EPN) 525 stlbe->mas2 = (gvaddr & MAS2_EPN)
317 | e500_shadow_mas2_attrib(gtlbe->mas2, 526 | e500_shadow_mas2_attrib(gtlbe->mas2,
318 vcpu_e500->vcpu.arch.shared->msr & MSR_PR); 527 vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
319 stlbe->mas3 = (hpaddr & MAS3_RPN) 528 stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN)
320 | e500_shadow_mas3_attrib(gtlbe->mas3, 529 | e500_shadow_mas3_attrib(gtlbe->mas3,
321 vcpu_e500->vcpu.arch.shared->msr & MSR_PR); 530 vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
322 stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; 531 stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
532}
323 533
324 trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, 534
325 stlbe->mas3, stlbe->mas7); 535static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
536 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
537 struct tlbe *stlbe)
538{
539 struct kvm_memory_slot *slot;
540 unsigned long pfn, hva;
541 int pfnmap = 0;
542 int tsize = BOOK3E_PAGESZ_4K;
543 struct tlbe_priv *priv;
544
545 /*
546 * Translate guest physical to true physical, acquiring
547 * a page reference if it is normal, non-reserved memory.
548 *
549 * gfn_to_memslot() must succeed because otherwise we wouldn't
550 * have gotten this far. Eventually we should just pass the slot
551 * pointer through from the first lookup.
552 */
553 slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
554 hva = gfn_to_hva_memslot(slot, gfn);
555
556 if (tlbsel == 1) {
557 struct vm_area_struct *vma;
558 down_read(&current->mm->mmap_sem);
559
560 vma = find_vma(current->mm, hva);
561 if (vma && hva >= vma->vm_start &&
562 (vma->vm_flags & VM_PFNMAP)) {
563 /*
564 * This VMA is a physically contiguous region (e.g.
565 * /dev/mem) that bypasses normal Linux page
566 * management. Find the overlap between the
567 * vma and the memslot.
568 */
569
570 unsigned long start, end;
571 unsigned long slot_start, slot_end;
572
573 pfnmap = 1;
574
575 start = vma->vm_pgoff;
576 end = start +
577 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
578
579 pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
580
581 slot_start = pfn - (gfn - slot->base_gfn);
582 slot_end = slot_start + slot->npages;
583
584 if (start < slot_start)
585 start = slot_start;
586 if (end > slot_end)
587 end = slot_end;
588
589 tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
590 MAS1_TSIZE_SHIFT;
591
592 /*
593 * e500 doesn't implement the lowest tsize bit,
594 * or 1K pages.
595 */
596 tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
597
598 /*
599 * Now find the largest tsize (up to what the guest
600 * requested) that will cover gfn, stay within the
601 * range, and for which gfn and pfn are mutually
602 * aligned.
603 */
604
605 for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
606 unsigned long gfn_start, gfn_end, tsize_pages;
607 tsize_pages = 1 << (tsize - 2);
608
609 gfn_start = gfn & ~(tsize_pages - 1);
610 gfn_end = gfn_start + tsize_pages;
611
612 if (gfn_start + pfn - gfn < start)
613 continue;
614 if (gfn_end + pfn - gfn > end)
615 continue;
616 if ((gfn & (tsize_pages - 1)) !=
617 (pfn & (tsize_pages - 1)))
618 continue;
619
620 gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
621 pfn &= ~(tsize_pages - 1);
622 break;
623 }
624 }
625
626 up_read(&current->mm->mmap_sem);
627 }
628
629 if (likely(!pfnmap)) {
630 pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
631 if (is_error_pfn(pfn)) {
632 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
633 (long)gfn);
634 kvm_release_pfn_clean(pfn);
635 return;
636 }
637 }
638
639 /* Drop old priv and setup new one. */
640 priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
641 kvmppc_e500_priv_release(priv);
642 kvmppc_e500_priv_setup(priv, gtlbe, pfn);
643
644 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, priv, gvaddr, stlbe);
326} 645}
327 646
328/* XXX only map the one-one case, for now use TLB0 */ 647/* XXX only map the one-one case, for now use TLB0 */
329static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500, 648static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
330 int tlbsel, int esel) 649 int esel, struct tlbe *stlbe)
331{ 650{
332 struct tlbe *gtlbe; 651 struct tlbe *gtlbe;
333 652
334 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; 653 gtlbe = &vcpu_e500->gtlb_arch[0][esel];
335 654
336 kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), 655 kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
337 get_tlb_raddr(gtlbe) >> PAGE_SHIFT, 656 get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
338 gtlbe, tlbsel, esel); 657 gtlbe, 0, esel, stlbe);
339 658
340 return esel; 659 return esel;
341} 660}
@@ -344,53 +663,37 @@ static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
344 * the shadow TLB. */ 663 * the shadow TLB. */
345/* XXX for both one-one and one-to-many , for now use TLB1 */ 664/* XXX for both one-one and one-to-many , for now use TLB1 */
346static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, 665static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
347 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe) 666 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
348{ 667{
349 unsigned int victim; 668 unsigned int victim;
350 669
351 victim = vcpu_e500->guest_tlb_nv[1]++; 670 victim = vcpu_e500->gtlb_nv[1]++;
352 671
353 if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size())) 672 if (unlikely(vcpu_e500->gtlb_nv[1] >= tlb1_max_shadow_size()))
354 vcpu_e500->guest_tlb_nv[1] = 0; 673 vcpu_e500->gtlb_nv[1] = 0;
355 674
356 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim); 675 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim, stlbe);
357 676
358 return victim; 677 return victim;
359} 678}
360 679
361/* Invalidate all guest kernel mappings when enter usermode, 680void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
362 * so that when they fault back in they will get the
363 * proper permission bits. */
364void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
365{ 681{
366 if (usermode) { 682 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
367 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
368 int i;
369
370 /* XXX Replace loop with fancy data structures. */
371 for (i = 0; i < tlb1_max_shadow_size(); i++)
372 kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
373 683
374 _tlbil_all(); 684 /* Recalc shadow pid since MSR changes */
375 } 685 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
376} 686}
377 687
378static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, 688static inline int kvmppc_e500_gtlbe_invalidate(
379 int tlbsel, int esel) 689 struct kvmppc_vcpu_e500 *vcpu_e500,
690 int tlbsel, int esel)
380{ 691{
381 struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; 692 struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
382 693
383 if (unlikely(get_tlb_iprot(gtlbe))) 694 if (unlikely(get_tlb_iprot(gtlbe)))
384 return -1; 695 return -1;
385 696
386 if (tlbsel == 1) {
387 kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
388 get_tlb_end(gtlbe),
389 get_tlb_tid(gtlbe));
390 } else {
391 kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
392 }
393
394 gtlbe->mas1 = 0; 697 gtlbe->mas1 = 0;
395 698
396 return 0; 699 return 0;
@@ -401,13 +704,14 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
401 int esel; 704 int esel;
402 705
403 if (value & MMUCSR0_TLB0FI) 706 if (value & MMUCSR0_TLB0FI)
404 for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++) 707 for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++)
405 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); 708 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
406 if (value & MMUCSR0_TLB1FI) 709 if (value & MMUCSR0_TLB1FI)
407 for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++) 710 for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++)
408 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); 711 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
409 712
410 _tlbil_all(); 713 /* Invalidate all vcpu id mappings */
714 kvmppc_e500_id_table_reset_all(vcpu_e500);
411 715
412 return EMULATE_DONE; 716 return EMULATE_DONE;
413} 717}
@@ -428,7 +732,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
428 732
429 if (ia) { 733 if (ia) {
430 /* invalidate all entries */ 734 /* invalidate all entries */
431 for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++) 735 for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++)
432 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); 736 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
433 } else { 737 } else {
434 ea &= 0xfffff000; 738 ea &= 0xfffff000;
@@ -438,7 +742,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
438 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); 742 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
439 } 743 }
440 744
441 _tlbil_all(); 745 /* Invalidate all vcpu id mappings */
746 kvmppc_e500_id_table_reset_all(vcpu_e500);
442 747
443 return EMULATE_DONE; 748 return EMULATE_DONE;
444} 749}
@@ -452,9 +757,9 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
452 tlbsel = get_tlb_tlbsel(vcpu_e500); 757 tlbsel = get_tlb_tlbsel(vcpu_e500);
453 esel = get_tlb_esel(vcpu_e500, tlbsel); 758 esel = get_tlb_esel(vcpu_e500, tlbsel);
454 759
455 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; 760 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
456 vcpu_e500->mas0 &= ~MAS0_NV(~0); 761 vcpu_e500->mas0 &= ~MAS0_NV(~0);
457 vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); 762 vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
458 vcpu_e500->mas1 = gtlbe->mas1; 763 vcpu_e500->mas1 = gtlbe->mas1;
459 vcpu_e500->mas2 = gtlbe->mas2; 764 vcpu_e500->mas2 = gtlbe->mas2;
460 vcpu_e500->mas3 = gtlbe->mas3; 765 vcpu_e500->mas3 = gtlbe->mas3;
@@ -477,14 +782,14 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
477 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 782 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
478 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); 783 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
479 if (esel >= 0) { 784 if (esel >= 0) {
480 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; 785 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
481 break; 786 break;
482 } 787 }
483 } 788 }
484 789
485 if (gtlbe) { 790 if (gtlbe) {
486 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) 791 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
487 | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); 792 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
488 vcpu_e500->mas1 = gtlbe->mas1; 793 vcpu_e500->mas1 = gtlbe->mas1;
489 vcpu_e500->mas2 = gtlbe->mas2; 794 vcpu_e500->mas2 = gtlbe->mas2;
490 vcpu_e500->mas3 = gtlbe->mas3; 795 vcpu_e500->mas3 = gtlbe->mas3;
@@ -497,7 +802,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
497 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; 802 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
498 803
499 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) 804 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
500 | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); 805 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
501 vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0) 806 vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
502 | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0)) 807 | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
503 | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); 808 | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
@@ -514,23 +819,16 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
514int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) 819int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
515{ 820{
516 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 821 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
517 u64 eaddr;
518 u64 raddr;
519 u32 tid;
520 struct tlbe *gtlbe; 822 struct tlbe *gtlbe;
521 int tlbsel, esel, stlbsel, sesel; 823 int tlbsel, esel;
522 824
523 tlbsel = get_tlb_tlbsel(vcpu_e500); 825 tlbsel = get_tlb_tlbsel(vcpu_e500);
524 esel = get_tlb_esel(vcpu_e500, tlbsel); 826 esel = get_tlb_esel(vcpu_e500, tlbsel);
525 827
526 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; 828 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
527 829
528 if (get_tlb_v(gtlbe) && tlbsel == 1) { 830 if (get_tlb_v(gtlbe))
529 eaddr = get_tlb_eaddr(gtlbe); 831 kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
530 tid = get_tlb_tid(gtlbe);
531 kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
532 get_tlb_end(gtlbe), tid);
533 }
534 832
535 gtlbe->mas1 = vcpu_e500->mas1; 833 gtlbe->mas1 = vcpu_e500->mas1;
536 gtlbe->mas2 = vcpu_e500->mas2; 834 gtlbe->mas2 = vcpu_e500->mas2;
@@ -542,6 +840,12 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
542 840
543 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ 841 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
544 if (tlbe_is_host_safe(vcpu, gtlbe)) { 842 if (tlbe_is_host_safe(vcpu, gtlbe)) {
843 struct tlbe stlbe;
844 int stlbsel, sesel;
845 u64 eaddr;
846 u64 raddr;
847
848 preempt_disable();
545 switch (tlbsel) { 849 switch (tlbsel) {
546 case 0: 850 case 0:
547 /* TLB0 */ 851 /* TLB0 */
@@ -549,7 +853,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
549 gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); 853 gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
550 854
551 stlbsel = 0; 855 stlbsel = 0;
552 sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel); 856 sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
553 857
554 break; 858 break;
555 859
@@ -564,13 +868,14 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
564 * are mapped on the fly. */ 868 * are mapped on the fly. */
565 stlbsel = 1; 869 stlbsel = 1;
566 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, 870 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
567 raddr >> PAGE_SHIFT, gtlbe); 871 raddr >> PAGE_SHIFT, gtlbe, &stlbe);
568 break; 872 break;
569 873
570 default: 874 default:
571 BUG(); 875 BUG();
572 } 876 }
573 write_host_tlbe(vcpu_e500, stlbsel, sesel); 877 write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
878 preempt_enable();
574 } 879 }
575 880
576 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); 881 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
@@ -610,7 +915,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
610{ 915{
611 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 916 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
612 struct tlbe *gtlbe = 917 struct tlbe *gtlbe =
613 &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)]; 918 &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
614 u64 pgmask = get_tlb_bytes(gtlbe) - 1; 919 u64 pgmask = get_tlb_bytes(gtlbe) - 1;
615 920
616 return get_tlb_raddr(gtlbe) | (eaddr & pgmask); 921 return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
@@ -618,38 +923,37 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
618 923
619void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 924void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
620{ 925{
621 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
622 int tlbsel, i;
623
624 for (tlbsel = 0; tlbsel < 2; tlbsel++)
625 for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
626 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
627
628 /* discard all guest mapping */
629 _tlbil_all();
630} 926}
631 927
632void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, 928void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
633 unsigned int index) 929 unsigned int index)
634{ 930{
635 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 931 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
932 struct tlbe_priv *priv;
933 struct tlbe *gtlbe, stlbe;
636 int tlbsel = tlbsel_of(index); 934 int tlbsel = tlbsel_of(index);
637 int esel = esel_of(index); 935 int esel = esel_of(index);
638 int stlbsel, sesel; 936 int stlbsel, sesel;
639 937
938 gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
939
940 preempt_disable();
640 switch (tlbsel) { 941 switch (tlbsel) {
641 case 0: 942 case 0:
642 stlbsel = 0; 943 stlbsel = 0;
643 sesel = esel; 944 sesel = esel;
945 priv = &vcpu_e500->gtlb_priv[stlbsel][sesel];
946
947 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
948 priv, eaddr, &stlbe);
644 break; 949 break;
645 950
646 case 1: { 951 case 1: {
647 gfn_t gfn = gpaddr >> PAGE_SHIFT; 952 gfn_t gfn = gpaddr >> PAGE_SHIFT;
648 struct tlbe *gtlbe
649 = &vcpu_e500->guest_tlb[tlbsel][esel];
650 953
651 stlbsel = 1; 954 stlbsel = 1;
652 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe); 955 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
956 gtlbe, &stlbe);
653 break; 957 break;
654 } 958 }
655 959
@@ -657,7 +961,9 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
657 BUG(); 961 BUG();
658 break; 962 break;
659 } 963 }
660 write_host_tlbe(vcpu_e500, stlbsel, sesel); 964
965 write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
966 preempt_enable();
661} 967}
662 968
663int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, 969int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -679,8 +985,10 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
679{ 985{
680 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 986 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
681 987
682 vcpu_e500->pid[0] = vcpu->arch.shadow_pid = 988 if (vcpu->arch.pid != pid) {
683 vcpu->arch.pid = pid; 989 vcpu_e500->pid[0] = vcpu->arch.pid = pid;
990 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
991 }
684} 992}
685 993
686void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) 994void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -688,14 +996,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
688 struct tlbe *tlbe; 996 struct tlbe *tlbe;
689 997
690 /* Insert large initial mapping for guest. */ 998 /* Insert large initial mapping for guest. */
691 tlbe = &vcpu_e500->guest_tlb[1][0]; 999 tlbe = &vcpu_e500->gtlb_arch[1][0];
692 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); 1000 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
693 tlbe->mas2 = 0; 1001 tlbe->mas2 = 0;
694 tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; 1002 tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
695 tlbe->mas7 = 0; 1003 tlbe->mas7 = 0;
696 1004
697 /* 4K map for serial output. Used by kernel wrapper. */ 1005 /* 4K map for serial output. Used by kernel wrapper. */
698 tlbe = &vcpu_e500->guest_tlb[1][1]; 1006 tlbe = &vcpu_e500->gtlb_arch[1][1];
699 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); 1007 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
700 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; 1008 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
701 tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; 1009 tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
@@ -706,68 +1014,64 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
706{ 1014{
707 tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF; 1015 tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
708 1016
709 vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE; 1017 vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE;
710 vcpu_e500->guest_tlb[0] = 1018 vcpu_e500->gtlb_arch[0] =
711 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); 1019 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
712 if (vcpu_e500->guest_tlb[0] == NULL) 1020 if (vcpu_e500->gtlb_arch[0] == NULL)
713 goto err_out; 1021 goto err_out;
714 1022
715 vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE; 1023 vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE;
716 vcpu_e500->shadow_tlb[0] = 1024 vcpu_e500->gtlb_arch[1] =
717 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
718 if (vcpu_e500->shadow_tlb[0] == NULL)
719 goto err_out_guest0;
720
721 vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
722 vcpu_e500->guest_tlb[1] =
723 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); 1025 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
724 if (vcpu_e500->guest_tlb[1] == NULL) 1026 if (vcpu_e500->gtlb_arch[1] == NULL)
725 goto err_out_shadow0; 1027 goto err_out_guest0;
726 1028
727 vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num; 1029 vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *)
728 vcpu_e500->shadow_tlb[1] = 1030 kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
729 kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL); 1031 if (vcpu_e500->gtlb_priv[0] == NULL)
730 if (vcpu_e500->shadow_tlb[1] == NULL)
731 goto err_out_guest1; 1032 goto err_out_guest1;
1033 vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *)
1034 kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
732 1035
733 vcpu_e500->shadow_pages[0] = (struct page **) 1036 if (vcpu_e500->gtlb_priv[1] == NULL)
734 kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL); 1037 goto err_out_priv0;
735 if (vcpu_e500->shadow_pages[0] == NULL)
736 goto err_out_shadow1;
737 1038
738 vcpu_e500->shadow_pages[1] = (struct page **) 1039 if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
739 kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL); 1040 goto err_out_priv1;
740 if (vcpu_e500->shadow_pages[1] == NULL)
741 goto err_out_page0;
742 1041
743 /* Init TLB configuration register */ 1042 /* Init TLB configuration register */
744 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; 1043 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
745 vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0]; 1044 vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0];
746 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; 1045 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
747 vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1]; 1046 vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1];
748 1047
749 return 0; 1048 return 0;
750 1049
751err_out_page0: 1050err_out_priv1:
752 kfree(vcpu_e500->shadow_pages[0]); 1051 kfree(vcpu_e500->gtlb_priv[1]);
753err_out_shadow1: 1052err_out_priv0:
754 kfree(vcpu_e500->shadow_tlb[1]); 1053 kfree(vcpu_e500->gtlb_priv[0]);
755err_out_guest1: 1054err_out_guest1:
756 kfree(vcpu_e500->guest_tlb[1]); 1055 kfree(vcpu_e500->gtlb_arch[1]);
757err_out_shadow0:
758 kfree(vcpu_e500->shadow_tlb[0]);
759err_out_guest0: 1056err_out_guest0:
760 kfree(vcpu_e500->guest_tlb[0]); 1057 kfree(vcpu_e500->gtlb_arch[0]);
761err_out: 1058err_out:
762 return -1; 1059 return -1;
763} 1060}
764 1061
765void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) 1062void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
766{ 1063{
767 kfree(vcpu_e500->shadow_pages[1]); 1064 int stlbsel, i;
768 kfree(vcpu_e500->shadow_pages[0]); 1065
769 kfree(vcpu_e500->shadow_tlb[1]); 1066 /* release all privs */
770 kfree(vcpu_e500->guest_tlb[1]); 1067 for (stlbsel = 0; stlbsel < 2; stlbsel++)
771 kfree(vcpu_e500->shadow_tlb[0]); 1068 for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
772 kfree(vcpu_e500->guest_tlb[0]); 1069 struct tlbe_priv *priv =
1070 &vcpu_e500->gtlb_priv[stlbsel][i];
1071 kvmppc_e500_priv_release(priv);
1072 }
1073
1074 kvmppc_e500_id_table_free(vcpu_e500);
1075 kfree(vcpu_e500->gtlb_arch[1]);
1076 kfree(vcpu_e500->gtlb_arch[0]);
773} 1077}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 458946b4775d..59b88e99a235 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. 2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 * 3 *
4 * Author: Yu Liu, yu.liu@freescale.com 4 * Author: Yu Liu, yu.liu@freescale.com
5 * 5 *
@@ -55,6 +55,7 @@ extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
55extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); 55extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
56extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); 56extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
57extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); 57extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
58extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
58 59
59/* TLB helper functions */ 60/* TLB helper functions */
60static inline unsigned int get_tlb_size(const struct tlbe *tlbe) 61static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
@@ -110,6 +111,16 @@ static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
110 return vcpu->arch.pid & 0xff; 111 return vcpu->arch.pid & 0xff;
111} 112}
112 113
114static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
115{
116 return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
117}
118
119static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
120{
121 return !!(vcpu->arch.shared->msr & MSR_PR);
122}
123
113static inline unsigned int get_cur_spid( 124static inline unsigned int get_cur_spid(
114 const struct kvmppc_vcpu_e500 *vcpu_e500) 125 const struct kvmppc_vcpu_e500 *vcpu_e500)
115{ 126{
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 616dd516ca1f..a107c9be0fb1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/cputhreads.h>
33#include "timing.h" 34#include "timing.h"
34#include "../mm/mmu_decl.h" 35#include "../mm/mmu_decl.h"
35 36
@@ -38,8 +39,12 @@
38 39
39int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 40int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
40{ 41{
42#ifndef CONFIG_KVM_BOOK3S_64_HV
41 return !(v->arch.shared->msr & MSR_WE) || 43 return !(v->arch.shared->msr & MSR_WE) ||
42 !!(v->arch.pending_exceptions); 44 !!(v->arch.pending_exceptions);
45#else
46 return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
47#endif
43} 48}
44 49
45int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 50int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -73,7 +78,8 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
73 } 78 }
74 case HC_VENDOR_KVM | KVM_HC_FEATURES: 79 case HC_VENDOR_KVM | KVM_HC_FEATURES:
75 r = HC_EV_SUCCESS; 80 r = HC_EV_SUCCESS;
76#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */ 81#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500)
82 /* XXX Missing magic page on 44x */
77 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); 83 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
78#endif 84#endif
79 85
@@ -147,7 +153,7 @@ void kvm_arch_check_processor_compat(void *rtn)
147 153
148int kvm_arch_init_vm(struct kvm *kvm) 154int kvm_arch_init_vm(struct kvm *kvm)
149{ 155{
150 return 0; 156 return kvmppc_core_init_vm(kvm);
151} 157}
152 158
153void kvm_arch_destroy_vm(struct kvm *kvm) 159void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -163,6 +169,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
163 kvm->vcpus[i] = NULL; 169 kvm->vcpus[i] = NULL;
164 170
165 atomic_set(&kvm->online_vcpus, 0); 171 atomic_set(&kvm->online_vcpus, 0);
172
173 kvmppc_core_destroy_vm(kvm);
174
166 mutex_unlock(&kvm->lock); 175 mutex_unlock(&kvm->lock);
167} 176}
168 177
@@ -180,10 +189,13 @@ int kvm_dev_ioctl_check_extension(long ext)
180#else 189#else
181 case KVM_CAP_PPC_SEGSTATE: 190 case KVM_CAP_PPC_SEGSTATE:
182#endif 191#endif
183 case KVM_CAP_PPC_PAIRED_SINGLES:
184 case KVM_CAP_PPC_UNSET_IRQ: 192 case KVM_CAP_PPC_UNSET_IRQ:
185 case KVM_CAP_PPC_IRQ_LEVEL: 193 case KVM_CAP_PPC_IRQ_LEVEL:
186 case KVM_CAP_ENABLE_CAP: 194 case KVM_CAP_ENABLE_CAP:
195 r = 1;
196 break;
197#ifndef CONFIG_KVM_BOOK3S_64_HV
198 case KVM_CAP_PPC_PAIRED_SINGLES:
187 case KVM_CAP_PPC_OSI: 199 case KVM_CAP_PPC_OSI:
188 case KVM_CAP_PPC_GET_PVINFO: 200 case KVM_CAP_PPC_GET_PVINFO:
189 r = 1; 201 r = 1;
@@ -191,6 +203,21 @@ int kvm_dev_ioctl_check_extension(long ext)
191 case KVM_CAP_COALESCED_MMIO: 203 case KVM_CAP_COALESCED_MMIO:
192 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 204 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
193 break; 205 break;
206#endif
207#ifdef CONFIG_KVM_BOOK3S_64_HV
208 case KVM_CAP_SPAPR_TCE:
209 r = 1;
210 break;
211 case KVM_CAP_PPC_SMT:
212 r = threads_per_core;
213 break;
214 case KVM_CAP_PPC_RMA:
215 r = 1;
216 /* PPC970 requires an RMA */
217 if (cpu_has_feature(CPU_FTR_ARCH_201))
218 r = 2;
219 break;
220#endif
194 default: 221 default:
195 r = 0; 222 r = 0;
196 break; 223 break;
@@ -211,7 +238,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
211 struct kvm_userspace_memory_region *mem, 238 struct kvm_userspace_memory_region *mem,
212 int user_alloc) 239 int user_alloc)
213{ 240{
214 return 0; 241 return kvmppc_core_prepare_memory_region(kvm, mem);
215} 242}
216 243
217void kvm_arch_commit_memory_region(struct kvm *kvm, 244void kvm_arch_commit_memory_region(struct kvm *kvm,
@@ -219,7 +246,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
219 struct kvm_memory_slot old, 246 struct kvm_memory_slot old,
220 int user_alloc) 247 int user_alloc)
221{ 248{
222 return; 249 kvmppc_core_commit_memory_region(kvm, mem);
223} 250}
224 251
225 252
@@ -287,6 +314,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
287 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 314 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
288 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); 315 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
289 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 316 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
317 vcpu->arch.dec_expires = ~(u64)0;
290 318
291#ifdef CONFIG_KVM_EXIT_TIMING 319#ifdef CONFIG_KVM_EXIT_TIMING
292 mutex_init(&vcpu->arch.exit_timing_lock); 320 mutex_init(&vcpu->arch.exit_timing_lock);
@@ -313,6 +341,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
313 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); 341 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
314#endif 342#endif
315 kvmppc_core_vcpu_load(vcpu, cpu); 343 kvmppc_core_vcpu_load(vcpu, cpu);
344 vcpu->cpu = smp_processor_id();
316} 345}
317 346
318void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 347void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -321,6 +350,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
321#ifdef CONFIG_BOOKE 350#ifdef CONFIG_BOOKE
322 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); 351 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
323#endif 352#endif
353 vcpu->cpu = -1;
324} 354}
325 355
326int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 356int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -492,15 +522,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
492 for (i = 0; i < 32; i++) 522 for (i = 0; i < 32; i++)
493 kvmppc_set_gpr(vcpu, i, gprs[i]); 523 kvmppc_set_gpr(vcpu, i, gprs[i]);
494 vcpu->arch.osi_needed = 0; 524 vcpu->arch.osi_needed = 0;
525 } else if (vcpu->arch.hcall_needed) {
526 int i;
527
528 kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
529 for (i = 0; i < 9; ++i)
530 kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
531 vcpu->arch.hcall_needed = 0;
495 } 532 }
496 533
497 kvmppc_core_deliver_interrupts(vcpu); 534 kvmppc_core_deliver_interrupts(vcpu);
498 535
499 local_irq_disable(); 536 r = kvmppc_vcpu_run(run, vcpu);
500 kvm_guest_enter();
501 r = __kvmppc_vcpu_run(run, vcpu);
502 kvm_guest_exit();
503 local_irq_enable();
504 537
505 if (vcpu->sigset_active) 538 if (vcpu->sigset_active)
506 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 539 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -518,6 +551,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
518 if (waitqueue_active(&vcpu->wq)) { 551 if (waitqueue_active(&vcpu->wq)) {
519 wake_up_interruptible(&vcpu->wq); 552 wake_up_interruptible(&vcpu->wq);
520 vcpu->stat.halt_wakeup++; 553 vcpu->stat.halt_wakeup++;
554 } else if (vcpu->cpu != -1) {
555 smp_send_reschedule(vcpu->cpu);
521 } 556 }
522 557
523 return 0; 558 return 0;
@@ -633,6 +668,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
633 668
634 break; 669 break;
635 } 670 }
671#ifdef CONFIG_KVM_BOOK3S_64_HV
672 case KVM_CREATE_SPAPR_TCE: {
673 struct kvm_create_spapr_tce create_tce;
674 struct kvm *kvm = filp->private_data;
675
676 r = -EFAULT;
677 if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
678 goto out;
679 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
680 goto out;
681 }
682
683 case KVM_ALLOCATE_RMA: {
684 struct kvm *kvm = filp->private_data;
685 struct kvm_allocate_rma rma;
686
687 r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
688 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
689 r = -EFAULT;
690 break;
691 }
692#endif /* CONFIG_KVM_BOOK3S_64_HV */
693
636 default: 694 default:
637 r = -ENOTTY; 695 r = -ENOTTY;
638 } 696 }
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 319177df9587..07b6110a4bb7 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -56,15 +56,6 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
56{ 56{
57 u64 old; 57 u64 old;
58 58
59 do_div(duration, tb_ticks_per_usec);
60 if (unlikely(duration > 0xFFFFFFFF)) {
61 printk(KERN_ERR"%s - duration too big -> overflow"
62 " duration %lld type %d exit #%d\n",
63 __func__, duration, type,
64 vcpu->arch.timing_count_type[type]);
65 return;
66 }
67
68 mutex_lock(&vcpu->arch.exit_timing_lock); 59 mutex_lock(&vcpu->arch.exit_timing_lock);
69 60
70 vcpu->arch.timing_count_type[type]++; 61 vcpu->arch.timing_count_type[type]++;
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 3aca1b042b8c..b135d3d397db 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -103,7 +103,7 @@ TRACE_EVENT(kvm_gtlb_write,
103 * Book3S trace points * 103 * Book3S trace points *
104 *************************************************************************/ 104 *************************************************************************/
105 105
106#ifdef CONFIG_PPC_BOOK3S 106#ifdef CONFIG_KVM_BOOK3S_PR
107 107
108TRACE_EVENT(kvm_book3s_exit, 108TRACE_EVENT(kvm_book3s_exit,
109 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), 109 TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
@@ -252,7 +252,7 @@ TRACE_EVENT(kvm_book3s_mmu_flush,
252 ), 252 ),
253 253
254 TP_fast_assign( 254 TP_fast_assign(
255 __entry->count = vcpu->arch.hpte_cache_count; 255 __entry->count = to_book3s(vcpu)->hpte_cache_count;
256 __entry->p1 = p1; 256 __entry->p1 = p1;
257 __entry->p2 = p2; 257 __entry->p2 = p2;
258 __entry->type = type; 258 __entry->type = type;