aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/powerpc.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-06-28 20:21:34 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:54 -0400
commitde56a948b9182fbcf92cb8212f114de096c2d574 (patch)
tree633ab73672aa2543b683686fc8fb023629c5f8f8 /arch/powerpc/kvm/powerpc.c
parent3c42bf8a717cb636e0ed2ed77194669e2ac3ed56 (diff)
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm/powerpc.c')
-rw-r--r--arch/powerpc/kvm/powerpc.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 026036efcde0..3a4f379ee70f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,8 +38,12 @@
38 38
39int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 39int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
40{ 40{
41#ifndef CONFIG_KVM_BOOK3S_64_HV
41 return !(v->arch.shared->msr & MSR_WE) || 42 return !(v->arch.shared->msr & MSR_WE) ||
42 !!(v->arch.pending_exceptions); 43 !!(v->arch.pending_exceptions);
44#else
45 return 1;
46#endif
43} 47}
44 48
45int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 49int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -184,10 +188,13 @@ int kvm_dev_ioctl_check_extension(long ext)
184#else 188#else
185 case KVM_CAP_PPC_SEGSTATE: 189 case KVM_CAP_PPC_SEGSTATE:
186#endif 190#endif
187 case KVM_CAP_PPC_PAIRED_SINGLES:
188 case KVM_CAP_PPC_UNSET_IRQ: 191 case KVM_CAP_PPC_UNSET_IRQ:
189 case KVM_CAP_PPC_IRQ_LEVEL: 192 case KVM_CAP_PPC_IRQ_LEVEL:
190 case KVM_CAP_ENABLE_CAP: 193 case KVM_CAP_ENABLE_CAP:
194 r = 1;
195 break;
196#ifndef CONFIG_KVM_BOOK3S_64_HV
197 case KVM_CAP_PPC_PAIRED_SINGLES:
191 case KVM_CAP_PPC_OSI: 198 case KVM_CAP_PPC_OSI:
192 case KVM_CAP_PPC_GET_PVINFO: 199 case KVM_CAP_PPC_GET_PVINFO:
193 r = 1; 200 r = 1;
@@ -195,6 +202,7 @@ int kvm_dev_ioctl_check_extension(long ext)
195 case KVM_CAP_COALESCED_MMIO: 202 case KVM_CAP_COALESCED_MMIO:
196 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 203 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
197 break; 204 break;
205#endif
198 default: 206 default:
199 r = 0; 207 r = 0;
200 break; 208 break;
@@ -291,6 +299,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
291 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 299 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
292 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); 300 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
293 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 301 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
302 vcpu->arch.dec_expires = ~(u64)0;
294 303
295#ifdef CONFIG_KVM_EXIT_TIMING 304#ifdef CONFIG_KVM_EXIT_TIMING
296 mutex_init(&vcpu->arch.exit_timing_lock); 305 mutex_init(&vcpu->arch.exit_timing_lock);
@@ -317,6 +326,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
317 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); 326 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
318#endif 327#endif
319 kvmppc_core_vcpu_load(vcpu, cpu); 328 kvmppc_core_vcpu_load(vcpu, cpu);
329 vcpu->cpu = smp_processor_id();
320} 330}
321 331
322void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 332void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -325,6 +335,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
325#ifdef CONFIG_BOOKE 335#ifdef CONFIG_BOOKE
326 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); 336 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
327#endif 337#endif
338 vcpu->cpu = -1;
328} 339}
329 340
330int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 341int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -496,6 +507,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
496 for (i = 0; i < 32; i++) 507 for (i = 0; i < 32; i++)
497 kvmppc_set_gpr(vcpu, i, gprs[i]); 508 kvmppc_set_gpr(vcpu, i, gprs[i]);
498 vcpu->arch.osi_needed = 0; 509 vcpu->arch.osi_needed = 0;
510 } else if (vcpu->arch.hcall_needed) {
511 int i;
512
513 kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
514 for (i = 0; i < 9; ++i)
515 kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
516 vcpu->arch.hcall_needed = 0;
499 } 517 }
500 518
501 kvmppc_core_deliver_interrupts(vcpu); 519 kvmppc_core_deliver_interrupts(vcpu);
@@ -518,6 +536,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
518 if (waitqueue_active(&vcpu->wq)) { 536 if (waitqueue_active(&vcpu->wq)) {
519 wake_up_interruptible(&vcpu->wq); 537 wake_up_interruptible(&vcpu->wq);
520 vcpu->stat.halt_wakeup++; 538 vcpu->stat.halt_wakeup++;
539 } else if (vcpu->cpu != -1) {
540 smp_send_reschedule(vcpu->cpu);
521 } 541 }
522 542
523 return 0; 543 return 0;