diff options
Diffstat (limited to 'arch/powerpc/kvm/book3s.c')
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 925 |
1 files changed, 925 insertions, 0 deletions
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c new file mode 100644 index 000000000000..42037d46a416 --- /dev/null +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -0,0 +1,925 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. | ||
3 | * | ||
4 | * Authors: | ||
5 | * Alexander Graf <agraf@suse.de> | ||
6 | * Kevin Wolf <mail@kevin-wolf.de> | ||
7 | * | ||
8 | * Description: | ||
9 | * This file is derived from arch/powerpc/kvm/44x.c, | ||
10 | * by Hollis Blanchard <hollisb@us.ibm.com>. | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License, version 2, as | ||
14 | * published by the Free Software Foundation. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kvm_host.h> | ||
18 | #include <linux/err.h> | ||
19 | |||
20 | #include <asm/reg.h> | ||
21 | #include <asm/cputable.h> | ||
22 | #include <asm/cacheflush.h> | ||
23 | #include <asm/tlbflush.h> | ||
24 | #include <asm/uaccess.h> | ||
25 | #include <asm/io.h> | ||
26 | #include <asm/kvm_ppc.h> | ||
27 | #include <asm/kvm_book3s.h> | ||
28 | #include <asm/mmu_context.h> | ||
29 | #include <linux/sched.h> | ||
30 | #include <linux/vmalloc.h> | ||
31 | |||
32 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | ||
33 | |||
34 | /* #define EXIT_DEBUG */ | ||
35 | /* #define EXIT_DEBUG_SIMPLE */ | ||
36 | |||
37 | /* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0. | ||
38 | * When set, we retrigger a DEC interrupt after that if DEC <= 0. | ||
39 | * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */ | ||
40 | |||
41 | /* #define AGGRESSIVE_DEC */ | ||
42 | |||
43 | struct kvm_stats_debugfs_item debugfs_entries[] = { | ||
44 | { "exits", VCPU_STAT(sum_exits) }, | ||
45 | { "mmio", VCPU_STAT(mmio_exits) }, | ||
46 | { "sig", VCPU_STAT(signal_exits) }, | ||
47 | { "sysc", VCPU_STAT(syscall_exits) }, | ||
48 | { "inst_emu", VCPU_STAT(emulated_inst_exits) }, | ||
49 | { "dec", VCPU_STAT(dec_exits) }, | ||
50 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, | ||
51 | { "queue_intr", VCPU_STAT(queue_intr) }, | ||
52 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | ||
53 | { "pf_storage", VCPU_STAT(pf_storage) }, | ||
54 | { "sp_storage", VCPU_STAT(sp_storage) }, | ||
55 | { "pf_instruc", VCPU_STAT(pf_instruc) }, | ||
56 | { "sp_instruc", VCPU_STAT(sp_instruc) }, | ||
57 | { "ld", VCPU_STAT(ld) }, | ||
58 | { "ld_slow", VCPU_STAT(ld_slow) }, | ||
59 | { "st", VCPU_STAT(st) }, | ||
60 | { "st_slow", VCPU_STAT(st_slow) }, | ||
61 | { NULL } | ||
62 | }; | ||
63 | |||
64 | void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) | ||
65 | { | ||
66 | } | ||
67 | |||
68 | void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) | ||
69 | { | ||
70 | } | ||
71 | |||
72 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
73 | { | ||
74 | memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); | ||
75 | get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; | ||
76 | } | ||
77 | |||
78 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) | ||
79 | { | ||
80 | memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); | ||
81 | to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; | ||
82 | } | ||
83 | |||
84 | #if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG) | ||
85 | static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) | ||
86 | { | ||
87 | u64 jd = mftb() - vcpu->arch.dec_jiffies; | ||
88 | return vcpu->arch.dec - jd; | ||
89 | } | ||
90 | #endif | ||
91 | |||
92 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) | ||
93 | { | ||
94 | ulong old_msr = vcpu->arch.msr; | ||
95 | |||
96 | #ifdef EXIT_DEBUG | ||
97 | printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); | ||
98 | #endif | ||
99 | msr &= to_book3s(vcpu)->msr_mask; | ||
100 | vcpu->arch.msr = msr; | ||
101 | vcpu->arch.shadow_msr = msr | MSR_USER32; | ||
102 | vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 | | ||
103 | MSR_USER64 | MSR_SE | MSR_BE | MSR_DE | | ||
104 | MSR_FE1); | ||
105 | |||
106 | if (msr & (MSR_WE|MSR_POW)) { | ||
107 | if (!vcpu->arch.pending_exceptions) { | ||
108 | kvm_vcpu_block(vcpu); | ||
109 | vcpu->stat.halt_wakeup++; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || | ||
114 | (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { | ||
115 | kvmppc_mmu_flush_segments(vcpu); | ||
116 | kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) | ||
121 | { | ||
122 | vcpu->arch.srr0 = vcpu->arch.pc; | ||
123 | vcpu->arch.srr1 = vcpu->arch.msr | flags; | ||
124 | vcpu->arch.pc = to_book3s(vcpu)->hior + vec; | ||
125 | vcpu->arch.mmu.reset_msr(vcpu); | ||
126 | } | ||
127 | |||
128 | void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) | ||
129 | { | ||
130 | unsigned int prio; | ||
131 | |||
132 | vcpu->stat.queue_intr++; | ||
133 | switch (vec) { | ||
134 | case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; | ||
135 | case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; | ||
136 | case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break; | ||
137 | case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break; | ||
138 | case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; | ||
139 | case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; | ||
140 | case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; | ||
141 | case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; | ||
142 | case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; | ||
143 | case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; | ||
144 | case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break; | ||
145 | case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break; | ||
146 | case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break; | ||
147 | case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break; | ||
148 | case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break; | ||
149 | default: prio = BOOK3S_IRQPRIO_MAX; break; | ||
150 | } | ||
151 | |||
152 | set_bit(prio, &vcpu->arch.pending_exceptions); | ||
153 | #ifdef EXIT_DEBUG | ||
154 | printk(KERN_INFO "Queueing interrupt %x\n", vec); | ||
155 | #endif | ||
156 | } | ||
157 | |||
158 | |||
159 | void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) | ||
160 | { | ||
161 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); | ||
162 | } | ||
163 | |||
164 | void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) | ||
165 | { | ||
166 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); | ||
167 | } | ||
168 | |||
169 | int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) | ||
170 | { | ||
171 | return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); | ||
172 | } | ||
173 | |||
174 | void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | ||
175 | struct kvm_interrupt *irq) | ||
176 | { | ||
177 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); | ||
178 | } | ||
179 | |||
180 | int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) | ||
181 | { | ||
182 | int deliver = 1; | ||
183 | int vec = 0; | ||
184 | |||
185 | switch (priority) { | ||
186 | case BOOK3S_IRQPRIO_DECREMENTER: | ||
187 | deliver = vcpu->arch.msr & MSR_EE; | ||
188 | vec = BOOK3S_INTERRUPT_DECREMENTER; | ||
189 | break; | ||
190 | case BOOK3S_IRQPRIO_EXTERNAL: | ||
191 | deliver = vcpu->arch.msr & MSR_EE; | ||
192 | vec = BOOK3S_INTERRUPT_EXTERNAL; | ||
193 | break; | ||
194 | case BOOK3S_IRQPRIO_SYSTEM_RESET: | ||
195 | vec = BOOK3S_INTERRUPT_SYSTEM_RESET; | ||
196 | break; | ||
197 | case BOOK3S_IRQPRIO_MACHINE_CHECK: | ||
198 | vec = BOOK3S_INTERRUPT_MACHINE_CHECK; | ||
199 | break; | ||
200 | case BOOK3S_IRQPRIO_DATA_STORAGE: | ||
201 | vec = BOOK3S_INTERRUPT_DATA_STORAGE; | ||
202 | break; | ||
203 | case BOOK3S_IRQPRIO_INST_STORAGE: | ||
204 | vec = BOOK3S_INTERRUPT_INST_STORAGE; | ||
205 | break; | ||
206 | case BOOK3S_IRQPRIO_DATA_SEGMENT: | ||
207 | vec = BOOK3S_INTERRUPT_DATA_SEGMENT; | ||
208 | break; | ||
209 | case BOOK3S_IRQPRIO_INST_SEGMENT: | ||
210 | vec = BOOK3S_INTERRUPT_INST_SEGMENT; | ||
211 | break; | ||
212 | case BOOK3S_IRQPRIO_ALIGNMENT: | ||
213 | vec = BOOK3S_INTERRUPT_ALIGNMENT; | ||
214 | break; | ||
215 | case BOOK3S_IRQPRIO_PROGRAM: | ||
216 | vec = BOOK3S_INTERRUPT_PROGRAM; | ||
217 | break; | ||
218 | case BOOK3S_IRQPRIO_VSX: | ||
219 | vec = BOOK3S_INTERRUPT_VSX; | ||
220 | break; | ||
221 | case BOOK3S_IRQPRIO_ALTIVEC: | ||
222 | vec = BOOK3S_INTERRUPT_ALTIVEC; | ||
223 | break; | ||
224 | case BOOK3S_IRQPRIO_FP_UNAVAIL: | ||
225 | vec = BOOK3S_INTERRUPT_FP_UNAVAIL; | ||
226 | break; | ||
227 | case BOOK3S_IRQPRIO_SYSCALL: | ||
228 | vec = BOOK3S_INTERRUPT_SYSCALL; | ||
229 | break; | ||
230 | case BOOK3S_IRQPRIO_DEBUG: | ||
231 | vec = BOOK3S_INTERRUPT_TRACE; | ||
232 | break; | ||
233 | case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR: | ||
234 | vec = BOOK3S_INTERRUPT_PERFMON; | ||
235 | break; | ||
236 | default: | ||
237 | deliver = 0; | ||
238 | printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); | ||
239 | break; | ||
240 | } | ||
241 | |||
242 | #if 0 | ||
243 | printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver); | ||
244 | #endif | ||
245 | |||
246 | if (deliver) | ||
247 | kvmppc_inject_interrupt(vcpu, vec, 0ULL); | ||
248 | |||
249 | return deliver; | ||
250 | } | ||
251 | |||
252 | void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | ||
253 | { | ||
254 | unsigned long *pending = &vcpu->arch.pending_exceptions; | ||
255 | unsigned int priority; | ||
256 | |||
257 | /* XXX be more clever here - no need to mftb() on every entry */ | ||
258 | /* Issue DEC again if it's still active */ | ||
259 | #ifdef AGGRESSIVE_DEC | ||
260 | if (vcpu->arch.msr & MSR_EE) | ||
261 | if (kvmppc_get_dec(vcpu) & 0x80000000) | ||
262 | kvmppc_core_queue_dec(vcpu); | ||
263 | #endif | ||
264 | |||
265 | #ifdef EXIT_DEBUG | ||
266 | if (vcpu->arch.pending_exceptions) | ||
267 | printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); | ||
268 | #endif | ||
269 | priority = __ffs(*pending); | ||
270 | while (priority <= (sizeof(unsigned int) * 8)) { | ||
271 | if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) { | ||
272 | clear_bit(priority, &vcpu->arch.pending_exceptions); | ||
273 | break; | ||
274 | } | ||
275 | |||
276 | priority = find_next_bit(pending, | ||
277 | BITS_PER_BYTE * sizeof(*pending), | ||
278 | priority + 1); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | ||
283 | { | ||
284 | vcpu->arch.pvr = pvr; | ||
285 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { | ||
286 | kvmppc_mmu_book3s_64_init(vcpu); | ||
287 | to_book3s(vcpu)->hior = 0xfff00000; | ||
288 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; | ||
289 | } else { | ||
290 | kvmppc_mmu_book3s_32_init(vcpu); | ||
291 | to_book3s(vcpu)->hior = 0; | ||
292 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; | ||
293 | } | ||
294 | |||
295 | /* If we are in hypervisor level on 970, we can tell the CPU to | ||
296 | * treat DCBZ as 32 bytes store */ | ||
297 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; | ||
298 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && | ||
299 | !strcmp(cur_cpu_spec->platform, "ppc970")) | ||
300 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; | ||
301 | |||
302 | } | ||
303 | |||
304 | /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To | ||
305 | * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to | ||
306 | * emulate 32 bytes dcbz length. | ||
307 | * | ||
308 | * The Book3s_64 inventors also realized this case and implemented a special bit | ||
309 | * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. | ||
310 | * | ||
311 | * My approach here is to patch the dcbz instruction on executing pages. | ||
312 | */ | ||
313 | static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) | ||
314 | { | ||
315 | bool touched = false; | ||
316 | hva_t hpage; | ||
317 | u32 *page; | ||
318 | int i; | ||
319 | |||
320 | hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); | ||
321 | if (kvm_is_error_hva(hpage)) | ||
322 | return; | ||
323 | |||
324 | hpage |= pte->raddr & ~PAGE_MASK; | ||
325 | hpage &= ~0xFFFULL; | ||
326 | |||
327 | page = vmalloc(HW_PAGE_SIZE); | ||
328 | |||
329 | if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) | ||
330 | goto out; | ||
331 | |||
332 | for (i=0; i < HW_PAGE_SIZE / 4; i++) | ||
333 | if ((page[i] & 0xff0007ff) == INS_DCBZ) { | ||
334 | page[i] &= 0xfffffff7; // reserved instruction, so we trap | ||
335 | touched = true; | ||
336 | } | ||
337 | |||
338 | if (touched) | ||
339 | copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); | ||
340 | |||
341 | out: | ||
342 | vfree(page); | ||
343 | } | ||
344 | |||
345 | static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, | ||
346 | struct kvmppc_pte *pte) | ||
347 | { | ||
348 | int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR)); | ||
349 | int r; | ||
350 | |||
351 | if (relocated) { | ||
352 | r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); | ||
353 | } else { | ||
354 | pte->eaddr = eaddr; | ||
355 | pte->raddr = eaddr & 0xffffffff; | ||
356 | pte->vpage = eaddr >> 12; | ||
357 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
358 | case 0: | ||
359 | pte->vpage |= VSID_REAL; | ||
360 | case MSR_DR: | ||
361 | pte->vpage |= VSID_REAL_DR; | ||
362 | case MSR_IR: | ||
363 | pte->vpage |= VSID_REAL_IR; | ||
364 | } | ||
365 | pte->may_read = true; | ||
366 | pte->may_write = true; | ||
367 | pte->may_execute = true; | ||
368 | r = 0; | ||
369 | } | ||
370 | |||
371 | return r; | ||
372 | } | ||
373 | |||
374 | static hva_t kvmppc_bad_hva(void) | ||
375 | { | ||
376 | return PAGE_OFFSET; | ||
377 | } | ||
378 | |||
379 | static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, | ||
380 | bool read) | ||
381 | { | ||
382 | hva_t hpage; | ||
383 | |||
384 | if (read && !pte->may_read) | ||
385 | goto err; | ||
386 | |||
387 | if (!read && !pte->may_write) | ||
388 | goto err; | ||
389 | |||
390 | hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); | ||
391 | if (kvm_is_error_hva(hpage)) | ||
392 | goto err; | ||
393 | |||
394 | return hpage | (pte->raddr & ~PAGE_MASK); | ||
395 | err: | ||
396 | return kvmppc_bad_hva(); | ||
397 | } | ||
398 | |||
399 | int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) | ||
400 | { | ||
401 | struct kvmppc_pte pte; | ||
402 | hva_t hva = eaddr; | ||
403 | |||
404 | vcpu->stat.st++; | ||
405 | |||
406 | if (kvmppc_xlate(vcpu, eaddr, false, &pte)) | ||
407 | goto err; | ||
408 | |||
409 | hva = kvmppc_pte_to_hva(vcpu, &pte, false); | ||
410 | if (kvm_is_error_hva(hva)) | ||
411 | goto err; | ||
412 | |||
413 | if (copy_to_user((void __user *)hva, ptr, size)) { | ||
414 | printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); | ||
415 | goto err; | ||
416 | } | ||
417 | |||
418 | return 0; | ||
419 | |||
420 | err: | ||
421 | return -ENOENT; | ||
422 | } | ||
423 | |||
424 | int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, | ||
425 | bool data) | ||
426 | { | ||
427 | struct kvmppc_pte pte; | ||
428 | hva_t hva = eaddr; | ||
429 | |||
430 | vcpu->stat.ld++; | ||
431 | |||
432 | if (kvmppc_xlate(vcpu, eaddr, data, &pte)) | ||
433 | goto err; | ||
434 | |||
435 | hva = kvmppc_pte_to_hva(vcpu, &pte, true); | ||
436 | if (kvm_is_error_hva(hva)) | ||
437 | goto err; | ||
438 | |||
439 | if (copy_from_user(ptr, (void __user *)hva, size)) { | ||
440 | printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); | ||
441 | goto err; | ||
442 | } | ||
443 | |||
444 | return 0; | ||
445 | |||
446 | err: | ||
447 | return -ENOENT; | ||
448 | } | ||
449 | |||
450 | static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
451 | { | ||
452 | return kvm_is_visible_gfn(vcpu->kvm, gfn); | ||
453 | } | ||
454 | |||
455 | int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
456 | ulong eaddr, int vec) | ||
457 | { | ||
458 | bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); | ||
459 | int r = RESUME_GUEST; | ||
460 | int relocated; | ||
461 | int page_found = 0; | ||
462 | struct kvmppc_pte pte; | ||
463 | bool is_mmio = false; | ||
464 | |||
465 | if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { | ||
466 | relocated = (vcpu->arch.msr & MSR_DR); | ||
467 | } else { | ||
468 | relocated = (vcpu->arch.msr & MSR_IR); | ||
469 | } | ||
470 | |||
471 | /* Resolve real address if translation turned on */ | ||
472 | if (relocated) { | ||
473 | page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); | ||
474 | } else { | ||
475 | pte.may_execute = true; | ||
476 | pte.may_read = true; | ||
477 | pte.may_write = true; | ||
478 | pte.raddr = eaddr & 0xffffffff; | ||
479 | pte.eaddr = eaddr; | ||
480 | pte.vpage = eaddr >> 12; | ||
481 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
482 | case 0: | ||
483 | pte.vpage |= VSID_REAL; | ||
484 | case MSR_DR: | ||
485 | pte.vpage |= VSID_REAL_DR; | ||
486 | case MSR_IR: | ||
487 | pte.vpage |= VSID_REAL_IR; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && | ||
492 | (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { | ||
493 | /* | ||
494 | * If we do the dcbz hack, we have to NX on every execution, | ||
495 | * so we can patch the executing code. This renders our guest | ||
496 | * NX-less. | ||
497 | */ | ||
498 | pte.may_execute = !data; | ||
499 | } | ||
500 | |||
501 | if (page_found == -ENOENT) { | ||
502 | /* Page not found in guest PTE entries */ | ||
503 | vcpu->arch.dear = vcpu->arch.fault_dear; | ||
504 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; | ||
505 | vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); | ||
506 | kvmppc_book3s_queue_irqprio(vcpu, vec); | ||
507 | } else if (page_found == -EPERM) { | ||
508 | /* Storage protection */ | ||
509 | vcpu->arch.dear = vcpu->arch.fault_dear; | ||
510 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; | ||
511 | to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; | ||
512 | vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); | ||
513 | kvmppc_book3s_queue_irqprio(vcpu, vec); | ||
514 | } else if (page_found == -EINVAL) { | ||
515 | /* Page not found in guest SLB */ | ||
516 | vcpu->arch.dear = vcpu->arch.fault_dear; | ||
517 | kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); | ||
518 | } else if (!is_mmio && | ||
519 | kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { | ||
520 | /* The guest's PTE is not mapped yet. Map on the host */ | ||
521 | kvmppc_mmu_map_page(vcpu, &pte); | ||
522 | if (data) | ||
523 | vcpu->stat.sp_storage++; | ||
524 | else if (vcpu->arch.mmu.is_dcbz32(vcpu) && | ||
525 | (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) | ||
526 | kvmppc_patch_dcbz(vcpu, &pte); | ||
527 | } else { | ||
528 | /* MMIO */ | ||
529 | vcpu->stat.mmio_exits++; | ||
530 | vcpu->arch.paddr_accessed = pte.raddr; | ||
531 | r = kvmppc_emulate_mmio(run, vcpu); | ||
532 | if ( r == RESUME_HOST_NV ) | ||
533 | r = RESUME_HOST; | ||
534 | if ( r == RESUME_GUEST_NV ) | ||
535 | r = RESUME_GUEST; | ||
536 | } | ||
537 | |||
538 | return r; | ||
539 | } | ||
540 | |||
541 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
542 | unsigned int exit_nr) | ||
543 | { | ||
544 | int r = RESUME_HOST; | ||
545 | |||
546 | vcpu->stat.sum_exits++; | ||
547 | |||
548 | run->exit_reason = KVM_EXIT_UNKNOWN; | ||
549 | run->ready_for_interrupt_injection = 1; | ||
550 | #ifdef EXIT_DEBUG | ||
551 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", | ||
552 | exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, | ||
553 | kvmppc_get_dec(vcpu), vcpu->arch.msr); | ||
554 | #elif defined (EXIT_DEBUG_SIMPLE) | ||
555 | if ((exit_nr != 0x900) && (exit_nr != 0x500)) | ||
556 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", | ||
557 | exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, | ||
558 | vcpu->arch.msr); | ||
559 | #endif | ||
560 | kvm_resched(vcpu); | ||
561 | switch (exit_nr) { | ||
562 | case BOOK3S_INTERRUPT_INST_STORAGE: | ||
563 | vcpu->stat.pf_instruc++; | ||
564 | /* only care about PTEG not found errors, but leave NX alone */ | ||
565 | if (vcpu->arch.shadow_msr & 0x40000000) { | ||
566 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); | ||
567 | vcpu->stat.sp_instruc++; | ||
568 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && | ||
569 | (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { | ||
570 | /* | ||
571 | * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, | ||
572 | * so we can't use the NX bit inside the guest. Let's cross our fingers, | ||
573 | * that no guest that needs the dcbz hack does NX. | ||
574 | */ | ||
575 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | ||
576 | } else { | ||
577 | vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000); | ||
578 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
579 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | ||
580 | r = RESUME_GUEST; | ||
581 | } | ||
582 | break; | ||
583 | case BOOK3S_INTERRUPT_DATA_STORAGE: | ||
584 | vcpu->stat.pf_storage++; | ||
585 | /* The only case we need to handle is missing shadow PTEs */ | ||
586 | if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) { | ||
587 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr); | ||
588 | } else { | ||
589 | vcpu->arch.dear = vcpu->arch.fault_dear; | ||
590 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; | ||
591 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
592 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL); | ||
593 | r = RESUME_GUEST; | ||
594 | } | ||
595 | break; | ||
596 | case BOOK3S_INTERRUPT_DATA_SEGMENT: | ||
597 | if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) { | ||
598 | vcpu->arch.dear = vcpu->arch.fault_dear; | ||
599 | kvmppc_book3s_queue_irqprio(vcpu, | ||
600 | BOOK3S_INTERRUPT_DATA_SEGMENT); | ||
601 | } | ||
602 | r = RESUME_GUEST; | ||
603 | break; | ||
604 | case BOOK3S_INTERRUPT_INST_SEGMENT: | ||
605 | if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) { | ||
606 | kvmppc_book3s_queue_irqprio(vcpu, | ||
607 | BOOK3S_INTERRUPT_INST_SEGMENT); | ||
608 | } | ||
609 | r = RESUME_GUEST; | ||
610 | break; | ||
611 | /* We're good on these - the host merely wanted to get our attention */ | ||
612 | case BOOK3S_INTERRUPT_DECREMENTER: | ||
613 | vcpu->stat.dec_exits++; | ||
614 | r = RESUME_GUEST; | ||
615 | break; | ||
616 | case BOOK3S_INTERRUPT_EXTERNAL: | ||
617 | vcpu->stat.ext_intr_exits++; | ||
618 | r = RESUME_GUEST; | ||
619 | break; | ||
620 | case BOOK3S_INTERRUPT_PROGRAM: | ||
621 | { | ||
622 | enum emulation_result er; | ||
623 | |||
624 | if (vcpu->arch.msr & MSR_PR) { | ||
625 | #ifdef EXIT_DEBUG | ||
626 | printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst); | ||
627 | #endif | ||
628 | if ((vcpu->arch.last_inst & 0xff0007ff) != | ||
629 | (INS_DCBZ & 0xfffffff7)) { | ||
630 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
631 | r = RESUME_GUEST; | ||
632 | break; | ||
633 | } | ||
634 | } | ||
635 | |||
636 | vcpu->stat.emulated_inst_exits++; | ||
637 | er = kvmppc_emulate_instruction(run, vcpu); | ||
638 | switch (er) { | ||
639 | case EMULATE_DONE: | ||
640 | r = RESUME_GUEST; | ||
641 | break; | ||
642 | case EMULATE_FAIL: | ||
643 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", | ||
644 | __func__, vcpu->arch.pc, vcpu->arch.last_inst); | ||
645 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
646 | r = RESUME_GUEST; | ||
647 | break; | ||
648 | default: | ||
649 | BUG(); | ||
650 | } | ||
651 | break; | ||
652 | } | ||
653 | case BOOK3S_INTERRUPT_SYSCALL: | ||
654 | #ifdef EXIT_DEBUG | ||
655 | printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]); | ||
656 | #endif | ||
657 | vcpu->stat.syscall_exits++; | ||
658 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
659 | r = RESUME_GUEST; | ||
660 | break; | ||
661 | case BOOK3S_INTERRUPT_MACHINE_CHECK: | ||
662 | case BOOK3S_INTERRUPT_FP_UNAVAIL: | ||
663 | case BOOK3S_INTERRUPT_TRACE: | ||
664 | case BOOK3S_INTERRUPT_ALTIVEC: | ||
665 | case BOOK3S_INTERRUPT_VSX: | ||
666 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
667 | r = RESUME_GUEST; | ||
668 | break; | ||
669 | default: | ||
670 | /* Ugh - bork here! What did we get? */ | ||
671 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr); | ||
672 | r = RESUME_HOST; | ||
673 | BUG(); | ||
674 | break; | ||
675 | } | ||
676 | |||
677 | |||
678 | if (!(r & RESUME_HOST)) { | ||
679 | /* To avoid clobbering exit_reason, only check for signals if | ||
680 | * we aren't already exiting to userspace for some other | ||
681 | * reason. */ | ||
682 | if (signal_pending(current)) { | ||
683 | #ifdef EXIT_DEBUG | ||
684 | printk(KERN_EMERG "KVM: Going back to host\n"); | ||
685 | #endif | ||
686 | vcpu->stat.signal_exits++; | ||
687 | run->exit_reason = KVM_EXIT_INTR; | ||
688 | r = -EINTR; | ||
689 | } else { | ||
690 | /* In case an interrupt came in that was triggered | ||
691 | * from userspace (like DEC), we need to check what | ||
692 | * to inject now! */ | ||
693 | kvmppc_core_deliver_interrupts(vcpu); | ||
694 | } | ||
695 | } | ||
696 | |||
697 | #ifdef EXIT_DEBUG | ||
698 | printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r); | ||
699 | #endif | ||
700 | |||
701 | return r; | ||
702 | } | ||
703 | |||
704 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | ||
705 | { | ||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
710 | { | ||
711 | int i; | ||
712 | |||
713 | regs->pc = vcpu->arch.pc; | ||
714 | regs->cr = vcpu->arch.cr; | ||
715 | regs->ctr = vcpu->arch.ctr; | ||
716 | regs->lr = vcpu->arch.lr; | ||
717 | regs->xer = vcpu->arch.xer; | ||
718 | regs->msr = vcpu->arch.msr; | ||
719 | regs->srr0 = vcpu->arch.srr0; | ||
720 | regs->srr1 = vcpu->arch.srr1; | ||
721 | regs->pid = vcpu->arch.pid; | ||
722 | regs->sprg0 = vcpu->arch.sprg0; | ||
723 | regs->sprg1 = vcpu->arch.sprg1; | ||
724 | regs->sprg2 = vcpu->arch.sprg2; | ||
725 | regs->sprg3 = vcpu->arch.sprg3; | ||
726 | regs->sprg5 = vcpu->arch.sprg4; | ||
727 | regs->sprg6 = vcpu->arch.sprg5; | ||
728 | regs->sprg7 = vcpu->arch.sprg6; | ||
729 | |||
730 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | ||
731 | regs->gpr[i] = vcpu->arch.gpr[i]; | ||
732 | |||
733 | return 0; | ||
734 | } | ||
735 | |||
736 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
737 | { | ||
738 | int i; | ||
739 | |||
740 | vcpu->arch.pc = regs->pc; | ||
741 | vcpu->arch.cr = regs->cr; | ||
742 | vcpu->arch.ctr = regs->ctr; | ||
743 | vcpu->arch.lr = regs->lr; | ||
744 | vcpu->arch.xer = regs->xer; | ||
745 | kvmppc_set_msr(vcpu, regs->msr); | ||
746 | vcpu->arch.srr0 = regs->srr0; | ||
747 | vcpu->arch.srr1 = regs->srr1; | ||
748 | vcpu->arch.sprg0 = regs->sprg0; | ||
749 | vcpu->arch.sprg1 = regs->sprg1; | ||
750 | vcpu->arch.sprg2 = regs->sprg2; | ||
751 | vcpu->arch.sprg3 = regs->sprg3; | ||
752 | vcpu->arch.sprg5 = regs->sprg4; | ||
753 | vcpu->arch.sprg6 = regs->sprg5; | ||
754 | vcpu->arch.sprg7 = regs->sprg6; | ||
755 | |||
756 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) | ||
757 | vcpu->arch.gpr[i] = regs->gpr[i]; | ||
758 | |||
759 | return 0; | ||
760 | } | ||
761 | |||
762 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | ||
763 | struct kvm_sregs *sregs) | ||
764 | { | ||
765 | sregs->pvr = vcpu->arch.pvr; | ||
766 | return 0; | ||
767 | } | ||
768 | |||
769 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | ||
770 | struct kvm_sregs *sregs) | ||
771 | { | ||
772 | kvmppc_set_pvr(vcpu, sregs->pvr); | ||
773 | return 0; | ||
774 | } | ||
775 | |||
776 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
777 | { | ||
778 | return -ENOTSUPP; | ||
779 | } | ||
780 | |||
781 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
782 | { | ||
783 | return -ENOTSUPP; | ||
784 | } | ||
785 | |||
786 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | ||
787 | struct kvm_translation *tr) | ||
788 | { | ||
789 | return 0; | ||
790 | } | ||
791 | |||
792 | /* | ||
793 | * Get (and clear) the dirty memory log for a memory slot. | ||
794 | */ | ||
795 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | ||
796 | struct kvm_dirty_log *log) | ||
797 | { | ||
798 | struct kvm_memory_slot *memslot; | ||
799 | struct kvm_vcpu *vcpu; | ||
800 | ulong ga, ga_end; | ||
801 | int is_dirty = 0; | ||
802 | int r, n; | ||
803 | |||
804 | down_write(&kvm->slots_lock); | ||
805 | |||
806 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | ||
807 | if (r) | ||
808 | goto out; | ||
809 | |||
810 | /* If nothing is dirty, don't bother messing with page tables. */ | ||
811 | if (is_dirty) { | ||
812 | memslot = &kvm->memslots[log->slot]; | ||
813 | |||
814 | ga = memslot->base_gfn << PAGE_SHIFT; | ||
815 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | ||
816 | |||
817 | kvm_for_each_vcpu(n, vcpu, kvm) | ||
818 | kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); | ||
819 | |||
820 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | ||
821 | memset(memslot->dirty_bitmap, 0, n); | ||
822 | } | ||
823 | |||
824 | r = 0; | ||
825 | out: | ||
826 | up_write(&kvm->slots_lock); | ||
827 | return r; | ||
828 | } | ||
829 | |||
830 | int kvmppc_core_check_processor_compat(void) | ||
831 | { | ||
832 | return 0; | ||
833 | } | ||
834 | |||
835 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | ||
836 | { | ||
837 | struct kvmppc_vcpu_book3s *vcpu_book3s; | ||
838 | struct kvm_vcpu *vcpu; | ||
839 | int err; | ||
840 | |||
841 | vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, | ||
842 | get_order(sizeof(struct kvmppc_vcpu_book3s))); | ||
843 | if (!vcpu_book3s) { | ||
844 | err = -ENOMEM; | ||
845 | goto out; | ||
846 | } | ||
847 | |||
848 | vcpu = &vcpu_book3s->vcpu; | ||
849 | err = kvm_vcpu_init(vcpu, kvm, id); | ||
850 | if (err) | ||
851 | goto free_vcpu; | ||
852 | |||
853 | vcpu->arch.host_retip = kvm_return_point; | ||
854 | vcpu->arch.host_msr = mfmsr(); | ||
855 | /* default to book3s_64 (970fx) */ | ||
856 | vcpu->arch.pvr = 0x3C0301; | ||
857 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); | ||
858 | vcpu_book3s->slb_nr = 64; | ||
859 | |||
860 | /* remember where some real-mode handlers are */ | ||
861 | vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; | ||
862 | vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; | ||
863 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; | ||
864 | |||
865 | vcpu->arch.shadow_msr = MSR_USER64; | ||
866 | |||
867 | err = __init_new_context(); | ||
868 | if (err < 0) | ||
869 | goto free_vcpu; | ||
870 | vcpu_book3s->context_id = err; | ||
871 | |||
872 | vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1; | ||
873 | vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS; | ||
874 | vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; | ||
875 | |||
876 | return vcpu; | ||
877 | |||
878 | free_vcpu: | ||
879 | free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); | ||
880 | out: | ||
881 | return ERR_PTR(err); | ||
882 | } | ||
883 | |||
884 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | ||
885 | { | ||
886 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | ||
887 | |||
888 | __destroy_context(vcpu_book3s->context_id); | ||
889 | kvm_vcpu_uninit(vcpu); | ||
890 | free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); | ||
891 | } | ||
892 | |||
893 | extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | ||
894 | int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | ||
895 | { | ||
896 | int ret; | ||
897 | |||
898 | /* No need to go into the guest when all we do is going out */ | ||
899 | if (signal_pending(current)) { | ||
900 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
901 | return -EINTR; | ||
902 | } | ||
903 | |||
904 | /* XXX we get called with irq disabled - change that! */ | ||
905 | local_irq_enable(); | ||
906 | |||
907 | ret = __kvmppc_vcpu_entry(kvm_run, vcpu); | ||
908 | |||
909 | local_irq_disable(); | ||
910 | |||
911 | return ret; | ||
912 | } | ||
913 | |||
914 | static int kvmppc_book3s_init(void) | ||
915 | { | ||
916 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE); | ||
917 | } | ||
918 | |||
919 | static void kvmppc_book3s_exit(void) | ||
920 | { | ||
921 | kvm_exit(); | ||
922 | } | ||
923 | |||
924 | module_init(kvmppc_book3s_init); | ||
925 | module_exit(kvmppc_book3s_exit); | ||