aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-06-28 20:23:08 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:57 -0400
commit371fefd6f2dc46668e00871930dde613b88d4bde (patch)
tree35fe799343861405914d27873eb175eb04d6dce5 /arch/powerpc/kvm
parent54738c097163c3f01e67ccc85462b78d4d4f495f (diff)
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c316
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S168
-rw-r--r--arch/powerpc/kvm/powerpc.c4
3 files changed, 455 insertions, 33 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6fe469eabce8..36b6d98f1197 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -39,6 +39,7 @@
39#include <asm/mmu_context.h> 39#include <asm/mmu_context.h>
40#include <asm/lppaca.h> 40#include <asm/lppaca.h>
41#include <asm/processor.h> 41#include <asm/processor.h>
42#include <asm/cputhreads.h>
42#include <linux/gfp.h> 43#include <linux/gfp.h>
43#include <linux/sched.h> 44#include <linux/sched.h>
44#include <linux/vmalloc.h> 45#include <linux/vmalloc.h>
@@ -51,12 +52,16 @@
51void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 52void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
52{ 53{
53 local_paca->kvm_hstate.kvm_vcpu = vcpu; 54 local_paca->kvm_hstate.kvm_vcpu = vcpu;
55 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
54} 56}
55 57
56void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 58void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
57{ 59{
58} 60}
59 61
62static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
63static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
64
60void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) 65void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
61{ 66{
62 u64 now; 67 u64 now;
@@ -74,11 +79,15 @@ void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
74 HRTIMER_MODE_REL); 79 HRTIMER_MODE_REL);
75 } 80 }
76 81
82 kvmppc_vcpu_blocked(vcpu);
83
77 kvm_vcpu_block(vcpu); 84 kvm_vcpu_block(vcpu);
78 vcpu->stat.halt_wakeup++; 85 vcpu->stat.halt_wakeup++;
79 86
80 if (vcpu->arch.dec_expires != ~(u64)0) 87 if (vcpu->arch.dec_expires != ~(u64)0)
81 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 88 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
89
90 kvmppc_vcpu_unblocked(vcpu);
82} 91}
83 92
84void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 93void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -429,9 +438,16 @@ int kvmppc_core_check_processor_compat(void)
429struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 438struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
430{ 439{
431 struct kvm_vcpu *vcpu; 440 struct kvm_vcpu *vcpu;
432 int err = -ENOMEM; 441 int err = -EINVAL;
442 int core;
443 struct kvmppc_vcore *vcore;
433 unsigned long lpcr; 444 unsigned long lpcr;
434 445
446 core = id / threads_per_core;
447 if (core >= KVM_MAX_VCORES)
448 goto out;
449
450 err = -ENOMEM;
435 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); 451 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
436 if (!vcpu) 452 if (!vcpu)
437 goto out; 453 goto out;
@@ -454,6 +470,38 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
454 470
455 kvmppc_mmu_book3s_hv_init(vcpu); 471 kvmppc_mmu_book3s_hv_init(vcpu);
456 472
473 /*
474 * Some vcpus may start out in stopped state. If we initialize
475 * them to busy-in-host state they will stop other vcpus in the
476 * vcore from running. Instead we initialize them to blocked
477 * state, effectively considering them to be stopped until we
478 * see the first run ioctl for them.
479 */
480 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
481
482 init_waitqueue_head(&vcpu->arch.cpu_run);
483
484 mutex_lock(&kvm->lock);
485 vcore = kvm->arch.vcores[core];
486 if (!vcore) {
487 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
488 if (vcore) {
489 INIT_LIST_HEAD(&vcore->runnable_threads);
490 spin_lock_init(&vcore->lock);
491 }
492 kvm->arch.vcores[core] = vcore;
493 }
494 mutex_unlock(&kvm->lock);
495
496 if (!vcore)
497 goto free_vcpu;
498
499 spin_lock(&vcore->lock);
500 ++vcore->num_threads;
501 ++vcore->n_blocked;
502 spin_unlock(&vcore->lock);
503 vcpu->arch.vcore = vcore;
504
457 return vcpu; 505 return vcpu;
458 506
459free_vcpu: 507free_vcpu:
@@ -468,21 +516,121 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
468 kfree(vcpu); 516 kfree(vcpu);
469} 517}
470 518
519static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
520{
521 struct kvmppc_vcore *vc = vcpu->arch.vcore;
522
523 spin_lock(&vc->lock);
524 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
525 ++vc->n_blocked;
526 if (vc->n_runnable > 0 &&
527 vc->n_runnable + vc->n_blocked == vc->num_threads) {
528 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
529 arch.run_list);
530 wake_up(&vcpu->arch.cpu_run);
531 }
532 spin_unlock(&vc->lock);
533}
534
535static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
536{
537 struct kvmppc_vcore *vc = vcpu->arch.vcore;
538
539 spin_lock(&vc->lock);
540 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
541 --vc->n_blocked;
542 spin_unlock(&vc->lock);
543}
544
471extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 545extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
546extern void xics_wake_cpu(int cpu);
472 547
473static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) 548static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
549 struct kvm_vcpu *vcpu)
474{ 550{
475 u64 now; 551 struct kvm_vcpu *v;
476 552
477 if (signal_pending(current)) { 553 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
478 run->exit_reason = KVM_EXIT_INTR; 554 return;
479 return -EINTR; 555 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
556 --vc->n_runnable;
557 /* decrement the physical thread id of each following vcpu */
558 v = vcpu;
559 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
560 --v->arch.ptid;
561 list_del(&vcpu->arch.run_list);
562}
563
564static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
565{
566 int cpu;
567 struct paca_struct *tpaca;
568 struct kvmppc_vcore *vc = vcpu->arch.vcore;
569
570 cpu = vc->pcpu + vcpu->arch.ptid;
571 tpaca = &paca[cpu];
572 tpaca->kvm_hstate.kvm_vcpu = vcpu;
573 tpaca->kvm_hstate.kvm_vcore = vc;
574 smp_wmb();
575#ifdef CONFIG_PPC_ICP_NATIVE
576 if (vcpu->arch.ptid) {
577 tpaca->cpu_start = 0x80;
578 tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
579 wmb();
580 xics_wake_cpu(cpu);
581 ++vc->n_woken;
480 } 582 }
583#endif
584}
481 585
482 flush_fp_to_thread(current); 586static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
483 flush_altivec_to_thread(current); 587{
484 flush_vsx_to_thread(current); 588 int i;
485 preempt_disable(); 589
590 HMT_low();
591 i = 0;
592 while (vc->nap_count < vc->n_woken) {
593 if (++i >= 1000000) {
594 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
595 vc->nap_count, vc->n_woken);
596 break;
597 }
598 cpu_relax();
599 }
600 HMT_medium();
601}
602
603/*
604 * Check that we are on thread 0 and that any other threads in
605 * this core are off-line.
606 */
607static int on_primary_thread(void)
608{
609 int cpu = smp_processor_id();
610 int thr = cpu_thread_in_core(cpu);
611
612 if (thr)
613 return 0;
614 while (++thr < threads_per_core)
615 if (cpu_online(cpu + thr))
616 return 0;
617 return 1;
618}
619
620/*
621 * Run a set of guest threads on a physical core.
622 * Called with vc->lock held.
623 */
624static int kvmppc_run_core(struct kvmppc_vcore *vc)
625{
626 struct kvm_vcpu *vcpu, *vnext;
627 long ret;
628 u64 now;
629
630 /* don't start if any threads have a signal pending */
631 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
632 if (signal_pending(vcpu->arch.run_task))
633 return 0;
486 634
487 /* 635 /*
488 * Make sure we are running on thread 0, and that 636 * Make sure we are running on thread 0, and that
@@ -490,36 +638,150 @@ static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu)
490 * XXX we should also block attempts to bring any 638 * XXX we should also block attempts to bring any
491 * secondary threads online. 639 * secondary threads online.
492 */ 640 */
493 if (threads_per_core > 1) { 641 if (threads_per_core > 1 && !on_primary_thread()) {
494 int cpu = smp_processor_id(); 642 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
495 int thr = cpu_thread_in_core(cpu); 643 vcpu->arch.ret = -EBUSY;
496 644 goto out;
497 if (thr)
498 goto out;
499 while (++thr < threads_per_core)
500 if (cpu_online(cpu + thr))
501 goto out;
502 } 645 }
503 646
504 kvm_guest_enter(); 647 vc->n_woken = 0;
648 vc->nap_count = 0;
649 vc->entry_exit_count = 0;
650 vc->vcore_running = 1;
651 vc->in_guest = 0;
652 vc->pcpu = smp_processor_id();
653 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
654 kvmppc_start_thread(vcpu);
655 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
656 arch.run_list);
657
658 spin_unlock(&vc->lock);
505 659
660 preempt_disable();
661 kvm_guest_enter();
506 __kvmppc_vcore_entry(NULL, vcpu); 662 __kvmppc_vcore_entry(NULL, vcpu);
507 663
664 /* wait for secondary threads to finish writing their state to memory */
665 spin_lock(&vc->lock);
666 if (vc->nap_count < vc->n_woken)
667 kvmppc_wait_for_nap(vc);
668 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
669 vc->vcore_running = 2;
670 spin_unlock(&vc->lock);
671
672 /* make sure updates to secondary vcpu structs are visible now */
673 smp_mb();
508 kvm_guest_exit(); 674 kvm_guest_exit();
509 675
510 preempt_enable(); 676 preempt_enable();
511 kvm_resched(vcpu); 677 kvm_resched(vcpu);
512 678
513 now = get_tb(); 679 now = get_tb();
514 /* cancel pending dec exception if dec is positive */ 680 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
515 if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) 681 /* cancel pending dec exception if dec is positive */
516 kvmppc_core_dequeue_dec(vcpu); 682 if (now < vcpu->arch.dec_expires &&
517 683 kvmppc_core_pending_dec(vcpu))
518 return kvmppc_handle_exit(run, vcpu, current); 684 kvmppc_core_dequeue_dec(vcpu);
685 if (!vcpu->arch.trap) {
686 if (signal_pending(vcpu->arch.run_task)) {
687 vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
688 vcpu->arch.ret = -EINTR;
689 }
690 continue; /* didn't get to run */
691 }
692 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
693 vcpu->arch.run_task);
694 vcpu->arch.ret = ret;
695 vcpu->arch.trap = 0;
696 }
519 697
698 spin_lock(&vc->lock);
520 out: 699 out:
521 preempt_enable(); 700 vc->vcore_running = 0;
522 return -EBUSY; 701 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
702 arch.run_list) {
703 if (vcpu->arch.ret != RESUME_GUEST) {
704 kvmppc_remove_runnable(vc, vcpu);
705 wake_up(&vcpu->arch.cpu_run);
706 }
707 }
708
709 return 1;
710}
711
712static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
713{
714 int ptid;
715 int wait_state;
716 struct kvmppc_vcore *vc;
717 DEFINE_WAIT(wait);
718
719 /* No need to go into the guest when all we do is going out */
720 if (signal_pending(current)) {
721 kvm_run->exit_reason = KVM_EXIT_INTR;
722 return -EINTR;
723 }
724
725 kvm_run->exit_reason = 0;
726 vcpu->arch.ret = RESUME_GUEST;
727 vcpu->arch.trap = 0;
728
729 flush_fp_to_thread(current);
730 flush_altivec_to_thread(current);
731 flush_vsx_to_thread(current);
732
733 /*
734 * Synchronize with other threads in this virtual core
735 */
736 vc = vcpu->arch.vcore;
737 spin_lock(&vc->lock);
738 /* This happens the first time this is called for a vcpu */
739 if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
740 --vc->n_blocked;
741 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
742 ptid = vc->n_runnable;
743 vcpu->arch.run_task = current;
744 vcpu->arch.kvm_run = kvm_run;
745 vcpu->arch.ptid = ptid;
746 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
747 ++vc->n_runnable;
748
749 wait_state = TASK_INTERRUPTIBLE;
750 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
751 if (signal_pending(current)) {
752 if (!vc->vcore_running) {
753 kvm_run->exit_reason = KVM_EXIT_INTR;
754 vcpu->arch.ret = -EINTR;
755 break;
756 }
757 /* have to wait for vcore to stop executing guest */
758 wait_state = TASK_UNINTERRUPTIBLE;
759 smp_send_reschedule(vc->pcpu);
760 }
761
762 if (!vc->vcore_running &&
763 vc->n_runnable + vc->n_blocked == vc->num_threads) {
764 /* we can run now */
765 if (kvmppc_run_core(vc))
766 continue;
767 }
768
769 if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
770 kvmppc_start_thread(vcpu);
771
772 /* wait for other threads to come in, or wait for vcore */
773 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
774 spin_unlock(&vc->lock);
775 schedule();
776 finish_wait(&vcpu->arch.cpu_run, &wait);
777 spin_lock(&vc->lock);
778 }
779
780 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
781 kvmppc_remove_runnable(vc, vcpu);
782 spin_unlock(&vc->lock);
783
784 return vcpu->arch.ret;
523} 785}
524 786
525int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 787int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e6adaadcdff2..c9bf177b7cf2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -30,8 +30,6 @@
30 * * 30 * *
31 ****************************************************************************/ 31 ****************************************************************************/
32 32
33#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
34
35 .globl kvmppc_skip_interrupt 33 .globl kvmppc_skip_interrupt
36kvmppc_skip_interrupt: 34kvmppc_skip_interrupt:
37 mfspr r13,SPRN_SRR0 35 mfspr r13,SPRN_SRR0
@@ -79,6 +77,32 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
79 * * 77 * *
80 *****************************************************************************/ 78 *****************************************************************************/
81 79
80#define XICS_XIRR 4
81#define XICS_QIRR 0xc
82
83/*
84 * We come in here when wakened from nap mode on a secondary hw thread.
85 * Relocation is off and most register values are lost.
86 * r13 points to the PACA.
87 */
88 .globl kvm_start_guest
89kvm_start_guest:
90 ld r1,PACAEMERGSP(r13)
91 subi r1,r1,STACK_FRAME_OVERHEAD
92
93 /* get vcpu pointer */
94 ld r4, HSTATE_KVM_VCPU(r13)
95
96 /* We got here with an IPI; clear it */
97 ld r5, HSTATE_XICS_PHYS(r13)
98 li r0, 0xff
99 li r6, XICS_QIRR
100 li r7, XICS_XIRR
101 lwzcix r8, r5, r7 /* ack the interrupt */
102 sync
103 stbcix r0, r5, r6 /* clear it */
104 stwcix r8, r5, r7 /* EOI it */
105
82.global kvmppc_hv_entry 106.global kvmppc_hv_entry
83kvmppc_hv_entry: 107kvmppc_hv_entry:
84 108
@@ -200,7 +224,20 @@ kvmppc_hv_entry:
200 slbia 224 slbia
201 ptesync 225 ptesync
202 226
203 /* Switch to guest partition. */ 227 /* Increment entry count iff exit count is zero. */
228 ld r5,HSTATE_KVM_VCORE(r13)
229 addi r9,r5,VCORE_ENTRY_EXIT
23021: lwarx r3,0,r9
231 cmpwi r3,0x100 /* any threads starting to exit? */
232 bge secondary_too_late /* if so we're too late to the party */
233 addi r3,r3,1
234 stwcx. r3,0,r9
235 bne 21b
236
237 /* Primary thread switches to guest partition. */
238 lwz r6,VCPU_PTID(r4)
239 cmpwi r6,0
240 bne 20f
204 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ 241 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
205 ld r6,KVM_SDR1(r9) 242 ld r6,KVM_SDR1(r9)
206 lwz r7,KVM_LPID(r9) 243 lwz r7,KVM_LPID(r9)
@@ -210,7 +247,15 @@ kvmppc_hv_entry:
210 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 247 mtspr SPRN_SDR1,r6 /* switch to partition page table */
211 mtspr SPRN_LPID,r7 248 mtspr SPRN_LPID,r7
212 isync 249 isync
213 ld r8,VCPU_LPCR(r4) 250 li r0,1
251 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
252 b 10f
253
254 /* Secondary threads wait for primary to have done partition switch */
25520: lbz r0,VCORE_IN_GUEST(r5)
256 cmpwi r0,0
257 beq 20b
25810: ld r8,VCPU_LPCR(r4)
214 mtspr SPRN_LPCR,r8 259 mtspr SPRN_LPCR,r8
215 isync 260 isync
216 261
@@ -225,10 +270,12 @@ kvmppc_hv_entry:
225 * Invalidate the TLB if we could possibly have stale TLB 270 * Invalidate the TLB if we could possibly have stale TLB
226 * entries for this partition on this core due to the use 271 * entries for this partition on this core due to the use
227 * of tlbiel. 272 * of tlbiel.
273 * XXX maybe only need this on primary thread?
228 */ 274 */
229 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ 275 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
230 lwz r5,VCPU_VCPUID(r4) 276 lwz r5,VCPU_VCPUID(r4)
231 lhz r6,PACAPACAINDEX(r13) 277 lhz r6,PACAPACAINDEX(r13)
278 rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
232 lhz r8,VCPU_LAST_CPU(r4) 279 lhz r8,VCPU_LAST_CPU(r4)
233 sldi r7,r6,1 /* see if this is the same vcpu */ 280 sldi r7,r6,1 /* see if this is the same vcpu */
234 add r7,r7,r9 /* as last ran on this pcpu */ 281 add r7,r7,r9 /* as last ran on this pcpu */
@@ -512,8 +559,60 @@ hcall_real_cont:
512 ptesync 559 ptesync
513 560
514hdec_soon: 561hdec_soon:
515 /* Switch back to host partition */ 562 /* Increment the threads-exiting-guest count in the 0xff00
563 bits of vcore->entry_exit_count */
564 lwsync
565 ld r5,HSTATE_KVM_VCORE(r13)
566 addi r6,r5,VCORE_ENTRY_EXIT
56741: lwarx r3,0,r6
568 addi r0,r3,0x100
569 stwcx. r0,0,r6
570 bne 41b
571
572 /*
573 * At this point we have an interrupt that we have to pass
574 * up to the kernel or qemu; we can't handle it in real mode.
575 * Thus we have to do a partition switch, so we have to
576 * collect the other threads, if we are the first thread
577 * to take an interrupt. To do this, we set the HDEC to 0,
578 * which causes an HDEC interrupt in all threads within 2ns
579 * because the HDEC register is shared between all 4 threads.
580 * However, we don't need to bother if this is an HDEC
581 * interrupt, since the other threads will already be on their
582 * way here in that case.
583 */
584 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
585 beq 40f
586 cmpwi r3,0x100 /* Are we the first here? */
587 bge 40f
588 cmpwi r3,1
589 ble 40f
590 li r0,0
591 mtspr SPRN_HDEC,r0
59240:
593
594 /* Secondary threads wait for primary to do partition switch */
516 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ 595 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
596 ld r5,HSTATE_KVM_VCORE(r13)
597 lwz r3,VCPU_PTID(r9)
598 cmpwi r3,0
599 beq 15f
600 HMT_LOW
60113: lbz r3,VCORE_IN_GUEST(r5)
602 cmpwi r3,0
603 bne 13b
604 HMT_MEDIUM
605 b 16f
606
607 /* Primary thread waits for all the secondaries to exit guest */
60815: lwz r3,VCORE_ENTRY_EXIT(r5)
609 srwi r0,r3,8
610 clrldi r3,r3,56
611 cmpw r3,r0
612 bne 15b
613 isync
614
615 /* Primary thread switches back to host partition */
517 ld r6,KVM_HOST_SDR1(r4) 616 ld r6,KVM_HOST_SDR1(r4)
518 lwz r7,KVM_HOST_LPID(r4) 617 lwz r7,KVM_HOST_LPID(r4)
519 li r8,LPID_RSVD /* switch to reserved LPID */ 618 li r8,LPID_RSVD /* switch to reserved LPID */
@@ -522,10 +621,12 @@ hdec_soon:
522 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 621 mtspr SPRN_SDR1,r6 /* switch to partition page table */
523 mtspr SPRN_LPID,r7 622 mtspr SPRN_LPID,r7
524 isync 623 isync
624 li r0,0
625 stb r0,VCORE_IN_GUEST(r5)
525 lis r8,0x7fff /* MAX_INT@h */ 626 lis r8,0x7fff /* MAX_INT@h */
526 mtspr SPRN_HDEC,r8 627 mtspr SPRN_HDEC,r8
527 628
528 ld r8,KVM_HOST_LPCR(r4) 62916: ld r8,KVM_HOST_LPCR(r4)
529 mtspr SPRN_LPCR,r8 630 mtspr SPRN_LPCR,r8
530 isync 631 isync
531 632
@@ -634,6 +735,11 @@ hdec_soon:
634 mr r3, r9 735 mr r3, r9
635 bl .kvmppc_save_fp 736 bl .kvmppc_save_fp
636 737
738 /* Secondary threads go off to take a nap */
739 lwz r0,VCPU_PTID(r3)
740 cmpwi r0,0
741 bne secondary_nap
742
637 /* 743 /*
638 * Reload DEC. HDEC interrupts were disabled when 744 * Reload DEC. HDEC interrupts were disabled when
639 * we reloaded the host's LPCR value. 745 * we reloaded the host's LPCR value.
@@ -840,6 +946,56 @@ _GLOBAL(kvmppc_h_set_dabr)
840 li r3,0 946 li r3,0
841 blr 947 blr
842 948
949secondary_too_late:
950 ld r5,HSTATE_KVM_VCORE(r13)
951 HMT_LOW
95213: lbz r3,VCORE_IN_GUEST(r5)
953 cmpwi r3,0
954 bne 13b
955 HMT_MEDIUM
956 ld r11,PACA_SLBSHADOWPTR(r13)
957
958 .rept SLB_NUM_BOLTED
959 ld r5,SLBSHADOW_SAVEAREA(r11)
960 ld r6,SLBSHADOW_SAVEAREA+8(r11)
961 andis. r7,r5,SLB_ESID_V@h
962 beq 1f
963 slbmte r6,r5
9641: addi r11,r11,16
965 .endr
966 b 50f
967
968secondary_nap:
969 /* Clear any pending IPI */
97050: ld r5, HSTATE_XICS_PHYS(r13)
971 li r0, 0xff
972 li r6, XICS_QIRR
973 stbcix r0, r5, r6
974
975 /* increment the nap count and then go to nap mode */
976 ld r4, HSTATE_KVM_VCORE(r13)
977 addi r4, r4, VCORE_NAP_COUNT
978 lwsync /* make previous updates visible */
97951: lwarx r3, 0, r4
980 addi r3, r3, 1
981 stwcx. r3, 0, r4
982 bne 51b
983 isync
984
985 mfspr r4, SPRN_LPCR
986 li r0, LPCR_PECE
987 andc r4, r4, r0
988 ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
989 mtspr SPRN_LPCR, r4
990 li r0, 0
991 std r0, HSTATE_SCRATCH0(r13)
992 ptesync
993 ld r0, HSTATE_SCRATCH0(r13)
9941: cmpd r0, r0
995 bne 1b
996 nap
997 b .
998
843/* 999/*
844 * Save away FP, VMX and VSX registers. 1000 * Save away FP, VMX and VSX registers.
845 * r3 = vcpu pointer 1001 * r3 = vcpu pointer
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c78ceb9d5605..4c549664c987 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/cputhreads.h>
33#include "timing.h" 34#include "timing.h"
34#include "../mm/mmu_decl.h" 35#include "../mm/mmu_decl.h"
35 36
@@ -207,6 +208,9 @@ int kvm_dev_ioctl_check_extension(long ext)
207 case KVM_CAP_SPAPR_TCE: 208 case KVM_CAP_SPAPR_TCE:
208 r = 1; 209 r = 1;
209 break; 210 break;
211 case KVM_CAP_PPC_SMT:
212 r = threads_per_core;
213 break;
210#endif 214#endif
211 default: 215 default:
212 r = 0; 216 r = 0;