aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-06-28 20:23:08 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:57 -0400
commit371fefd6f2dc46668e00871930dde613b88d4bde (patch)
tree35fe799343861405914d27873eb175eb04d6dce5 /arch/powerpc/kvm/book3s_hv.c
parent54738c097163c3f01e67ccc85462b78d4d4f495f (diff)
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c316
1 files changed, 289 insertions, 27 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6fe469eabce8..36b6d98f1197 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -39,6 +39,7 @@
39#include <asm/mmu_context.h> 39#include <asm/mmu_context.h>
40#include <asm/lppaca.h> 40#include <asm/lppaca.h>
41#include <asm/processor.h> 41#include <asm/processor.h>
42#include <asm/cputhreads.h>
42#include <linux/gfp.h> 43#include <linux/gfp.h>
43#include <linux/sched.h> 44#include <linux/sched.h>
44#include <linux/vmalloc.h> 45#include <linux/vmalloc.h>
@@ -51,12 +52,16 @@
51void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 52void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
52{ 53{
53 local_paca->kvm_hstate.kvm_vcpu = vcpu; 54 local_paca->kvm_hstate.kvm_vcpu = vcpu;
55 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
54} 56}
55 57
56void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 58void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
57{ 59{
58} 60}
59 61
62static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
63static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
64
60void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) 65void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
61{ 66{
62 u64 now; 67 u64 now;
@@ -74,11 +79,15 @@ void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
74 HRTIMER_MODE_REL); 79 HRTIMER_MODE_REL);
75 } 80 }
76 81
82 kvmppc_vcpu_blocked(vcpu);
83
77 kvm_vcpu_block(vcpu); 84 kvm_vcpu_block(vcpu);
78 vcpu->stat.halt_wakeup++; 85 vcpu->stat.halt_wakeup++;
79 86
80 if (vcpu->arch.dec_expires != ~(u64)0) 87 if (vcpu->arch.dec_expires != ~(u64)0)
81 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 88 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
89
90 kvmppc_vcpu_unblocked(vcpu);
82} 91}
83 92
84void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 93void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -429,9 +438,16 @@ int kvmppc_core_check_processor_compat(void)
429struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 438struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
430{ 439{
431 struct kvm_vcpu *vcpu; 440 struct kvm_vcpu *vcpu;
432 int err = -ENOMEM; 441 int err = -EINVAL;
442 int core;
443 struct kvmppc_vcore *vcore;
433 unsigned long lpcr; 444 unsigned long lpcr;
434 445
446 core = id / threads_per_core;
447 if (core >= KVM_MAX_VCORES)
448 goto out;
449
450 err = -ENOMEM;
435 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); 451 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
436 if (!vcpu) 452 if (!vcpu)
437 goto out; 453 goto out;
@@ -454,6 +470,38 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
454 470
455 kvmppc_mmu_book3s_hv_init(vcpu); 471 kvmppc_mmu_book3s_hv_init(vcpu);
456 472
473 /*
474 * Some vcpus may start out in stopped state. If we initialize
475 * them to busy-in-host state they will stop other vcpus in the
476 * vcore from running. Instead we initialize them to blocked
477 * state, effectively considering them to be stopped until we
478 * see the first run ioctl for them.
479 */
480 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
481
482 init_waitqueue_head(&vcpu->arch.cpu_run);
483
484 mutex_lock(&kvm->lock);
485 vcore = kvm->arch.vcores[core];
486 if (!vcore) {
487 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
488 if (vcore) {
489 INIT_LIST_HEAD(&vcore->runnable_threads);
490 spin_lock_init(&vcore->lock);
491 }
492 kvm->arch.vcores[core] = vcore;
493 }
494 mutex_unlock(&kvm->lock);
495
496 if (!vcore)
497 goto free_vcpu;
498
499 spin_lock(&vcore->lock);
500 ++vcore->num_threads;
501 ++vcore->n_blocked;
502 spin_unlock(&vcore->lock);
503 vcpu->arch.vcore = vcore;
504
457 return vcpu; 505 return vcpu;
458 506
459free_vcpu: 507free_vcpu:
@@ -468,21 +516,121 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
468 kfree(vcpu); 516 kfree(vcpu);
469} 517}
470 518
519static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
520{
521 struct kvmppc_vcore *vc = vcpu->arch.vcore;
522
523 spin_lock(&vc->lock);
524 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
525 ++vc->n_blocked;
526 if (vc->n_runnable > 0 &&
527 vc->n_runnable + vc->n_blocked == vc->num_threads) {
528 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
529 arch.run_list);
530 wake_up(&vcpu->arch.cpu_run);
531 }
532 spin_unlock(&vc->lock);
533}
534
535static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
536{
537 struct kvmppc_vcore *vc = vcpu->arch.vcore;
538
539 spin_lock(&vc->lock);
540 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
541 --vc->n_blocked;
542 spin_unlock(&vc->lock);
543}
544
471extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 545extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
546extern void xics_wake_cpu(int cpu);
472 547
473static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) 548static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
549 struct kvm_vcpu *vcpu)
474{ 550{
475 u64 now; 551 struct kvm_vcpu *v;
476 552
477 if (signal_pending(current)) { 553 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
478 run->exit_reason = KVM_EXIT_INTR; 554 return;
479 return -EINTR; 555 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
556 --vc->n_runnable;
557 /* decrement the physical thread id of each following vcpu */
558 v = vcpu;
559 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
560 --v->arch.ptid;
561 list_del(&vcpu->arch.run_list);
562}
563
564static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
565{
566 int cpu;
567 struct paca_struct *tpaca;
568 struct kvmppc_vcore *vc = vcpu->arch.vcore;
569
570 cpu = vc->pcpu + vcpu->arch.ptid;
571 tpaca = &paca[cpu];
572 tpaca->kvm_hstate.kvm_vcpu = vcpu;
573 tpaca->kvm_hstate.kvm_vcore = vc;
574 smp_wmb();
575#ifdef CONFIG_PPC_ICP_NATIVE
576 if (vcpu->arch.ptid) {
577 tpaca->cpu_start = 0x80;
578 tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
579 wmb();
580 xics_wake_cpu(cpu);
581 ++vc->n_woken;
480 } 582 }
583#endif
584}
481 585
482 flush_fp_to_thread(current); 586static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
483 flush_altivec_to_thread(current); 587{
484 flush_vsx_to_thread(current); 588 int i;
485 preempt_disable(); 589
590 HMT_low();
591 i = 0;
592 while (vc->nap_count < vc->n_woken) {
593 if (++i >= 1000000) {
594 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
595 vc->nap_count, vc->n_woken);
596 break;
597 }
598 cpu_relax();
599 }
600 HMT_medium();
601}
602
603/*
604 * Check that we are on thread 0 and that any other threads in
605 * this core are off-line.
606 */
607static int on_primary_thread(void)
608{
609 int cpu = smp_processor_id();
610 int thr = cpu_thread_in_core(cpu);
611
612 if (thr)
613 return 0;
614 while (++thr < threads_per_core)
615 if (cpu_online(cpu + thr))
616 return 0;
617 return 1;
618}
619
620/*
621 * Run a set of guest threads on a physical core.
622 * Called with vc->lock held.
623 */
624static int kvmppc_run_core(struct kvmppc_vcore *vc)
625{
626 struct kvm_vcpu *vcpu, *vnext;
627 long ret;
628 u64 now;
629
630 /* don't start if any threads have a signal pending */
631 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
632 if (signal_pending(vcpu->arch.run_task))
633 return 0;
486 634
487 /* 635 /*
488 * Make sure we are running on thread 0, and that 636 * Make sure we are running on thread 0, and that
@@ -490,36 +638,150 @@ static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu)
490 * XXX we should also block attempts to bring any 638 * XXX we should also block attempts to bring any
491 * secondary threads online. 639 * secondary threads online.
492 */ 640 */
493 if (threads_per_core > 1) { 641 if (threads_per_core > 1 && !on_primary_thread()) {
494 int cpu = smp_processor_id(); 642 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
495 int thr = cpu_thread_in_core(cpu); 643 vcpu->arch.ret = -EBUSY;
496 644 goto out;
497 if (thr)
498 goto out;
499 while (++thr < threads_per_core)
500 if (cpu_online(cpu + thr))
501 goto out;
502 } 645 }
503 646
504 kvm_guest_enter(); 647 vc->n_woken = 0;
648 vc->nap_count = 0;
649 vc->entry_exit_count = 0;
650 vc->vcore_running = 1;
651 vc->in_guest = 0;
652 vc->pcpu = smp_processor_id();
653 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
654 kvmppc_start_thread(vcpu);
655 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
656 arch.run_list);
657
658 spin_unlock(&vc->lock);
505 659
660 preempt_disable();
661 kvm_guest_enter();
506 __kvmppc_vcore_entry(NULL, vcpu); 662 __kvmppc_vcore_entry(NULL, vcpu);
507 663
664 /* wait for secondary threads to finish writing their state to memory */
665 spin_lock(&vc->lock);
666 if (vc->nap_count < vc->n_woken)
667 kvmppc_wait_for_nap(vc);
668 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
669 vc->vcore_running = 2;
670 spin_unlock(&vc->lock);
671
672 /* make sure updates to secondary vcpu structs are visible now */
673 smp_mb();
508 kvm_guest_exit(); 674 kvm_guest_exit();
509 675
510 preempt_enable(); 676 preempt_enable();
511 kvm_resched(vcpu); 677 kvm_resched(vcpu);
512 678
513 now = get_tb(); 679 now = get_tb();
514 /* cancel pending dec exception if dec is positive */ 680 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
515 if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) 681 /* cancel pending dec exception if dec is positive */
516 kvmppc_core_dequeue_dec(vcpu); 682 if (now < vcpu->arch.dec_expires &&
517 683 kvmppc_core_pending_dec(vcpu))
518 return kvmppc_handle_exit(run, vcpu, current); 684 kvmppc_core_dequeue_dec(vcpu);
685 if (!vcpu->arch.trap) {
686 if (signal_pending(vcpu->arch.run_task)) {
687 vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
688 vcpu->arch.ret = -EINTR;
689 }
690 continue; /* didn't get to run */
691 }
692 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
693 vcpu->arch.run_task);
694 vcpu->arch.ret = ret;
695 vcpu->arch.trap = 0;
696 }
519 697
698 spin_lock(&vc->lock);
520 out: 699 out:
521 preempt_enable(); 700 vc->vcore_running = 0;
522 return -EBUSY; 701 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
702 arch.run_list) {
703 if (vcpu->arch.ret != RESUME_GUEST) {
704 kvmppc_remove_runnable(vc, vcpu);
705 wake_up(&vcpu->arch.cpu_run);
706 }
707 }
708
709 return 1;
710}
711
712static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
713{
714 int ptid;
715 int wait_state;
716 struct kvmppc_vcore *vc;
717 DEFINE_WAIT(wait);
718
719 /* No need to go into the guest when all we do is going out */
720 if (signal_pending(current)) {
721 kvm_run->exit_reason = KVM_EXIT_INTR;
722 return -EINTR;
723 }
724
725 kvm_run->exit_reason = 0;
726 vcpu->arch.ret = RESUME_GUEST;
727 vcpu->arch.trap = 0;
728
729 flush_fp_to_thread(current);
730 flush_altivec_to_thread(current);
731 flush_vsx_to_thread(current);
732
733 /*
734 * Synchronize with other threads in this virtual core
735 */
736 vc = vcpu->arch.vcore;
737 spin_lock(&vc->lock);
738 /* This happens the first time this is called for a vcpu */
739 if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
740 --vc->n_blocked;
741 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
742 ptid = vc->n_runnable;
743 vcpu->arch.run_task = current;
744 vcpu->arch.kvm_run = kvm_run;
745 vcpu->arch.ptid = ptid;
746 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
747 ++vc->n_runnable;
748
749 wait_state = TASK_INTERRUPTIBLE;
750 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
751 if (signal_pending(current)) {
752 if (!vc->vcore_running) {
753 kvm_run->exit_reason = KVM_EXIT_INTR;
754 vcpu->arch.ret = -EINTR;
755 break;
756 }
757 /* have to wait for vcore to stop executing guest */
758 wait_state = TASK_UNINTERRUPTIBLE;
759 smp_send_reschedule(vc->pcpu);
760 }
761
762 if (!vc->vcore_running &&
763 vc->n_runnable + vc->n_blocked == vc->num_threads) {
764 /* we can run now */
765 if (kvmppc_run_core(vc))
766 continue;
767 }
768
769 if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
770 kvmppc_start_thread(vcpu);
771
772 /* wait for other threads to come in, or wait for vcore */
773 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
774 spin_unlock(&vc->lock);
775 schedule();
776 finish_wait(&vcpu->arch.cpu_run, &wait);
777 spin_lock(&vc->lock);
778 }
779
780 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
781 kvmppc_remove_runnable(vc, vcpu);
782 spin_unlock(&vc->lock);
783
784 return vcpu->arch.ret;
523} 785}
524 786
525int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 787int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)