aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-07-23 03:42:46 -0400
committerAvi Kivity <avi@redhat.com>2011-09-25 12:52:30 -0400
commit19ccb76a1938ab364a412253daec64613acbf3df (patch)
tree42a3e3307355202fe0db48e2530bb42e43d9a035 /arch/powerpc
parent02143947603fe90237a0423d34dd8943de229f78 (diff)
KVM: PPC: Implement H_CEDE hcall for book3s_hv in real-mode code
With a KVM guest operating in SMT4 mode (i.e. 4 hardware threads per core), whenever a CPU goes idle, we have to pull all the other hardware threads in the core out of the guest, because the H_CEDE hcall is handled in the kernel. This is inefficient. This adds code to book3s_hv_rmhandlers.S to handle the H_CEDE hcall in real mode. When a guest vcpu does an H_CEDE hcall, we now only exit to the kernel if all the other vcpus in the same core are also idle. Otherwise we mark this vcpu as napping, save state that could be lost in nap mode (mainly GPRs and FPRs), and execute the nap instruction. When the thread wakes up, because of a decrementer or external interrupt, we come back in at kvm_start_guest (from the system reset interrupt vector), find the `napping' flag set in the paca, and go to the resume path. This has some other ramifications. First, when starting a core, we now start all the threads, both those that are immediately runnable and those that are idle. This is so that we don't have to pull all the threads out of the guest when an idle thread gets a decrementer interrupt and wants to start running. In fact the idle threads will all start with the H_CEDE hcall returning; being idle they will just do another H_CEDE immediately and go to nap mode. This required some changes to kvmppc_run_core() and kvmppc_run_vcpu(). These functions have been restructured to make them simpler and clearer. We introduce a level of indirection in the wait queue that gets woken when external and decrementer interrupts get generated for a vcpu, so that we can have the 4 vcpus in a vcore using the same wait queue. We need this because the 4 vcpus are being handled by one thread. Secondly, when we need to exit from the guest to the kernel, we now have to generate an IPI for any napping threads, because an HDEC interrupt doesn't wake up a napping thread. Thirdly, we now need to be able to handle virtual external interrupts and decrementer interrupts becoming pending while a thread is napping, and deliver those interrupts to the guest when the thread wakes. This is done in kvmppc_cede_reentry, just before fast_guest_return. Finally, since we are not using the generic kvm_vcpu_block for book3s_hv, and hence not calling kvm_arch_vcpu_runnable, we can remove the #ifdef from kvm_arch_vcpu_runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h19
-rw-r--r--arch/powerpc/kernel/asm-offsets.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c335
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S297
-rw-r--r--arch/powerpc/kvm/powerpc.c21
6 files changed, 483 insertions, 196 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index af73469530e6..1f2f5b6156bd 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -76,6 +76,7 @@ struct kvmppc_host_state {
76 ulong scratch1; 76 ulong scratch1;
77 u8 in_guest; 77 u8 in_guest;
78 u8 restore_hid5; 78 u8 restore_hid5;
79 u8 napping;
79 80
80#ifdef CONFIG_KVM_BOOK3S_64_HV 81#ifdef CONFIG_KVM_BOOK3S_64_HV
81 struct kvm_vcpu *kvm_vcpu; 82 struct kvm_vcpu *kvm_vcpu;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index dec3054f6ad4..bf8af5d5d5dc 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -198,21 +198,29 @@ struct kvm_arch {
198 */ 198 */
199struct kvmppc_vcore { 199struct kvmppc_vcore {
200 int n_runnable; 200 int n_runnable;
201 int n_blocked; 201 int n_busy;
202 int num_threads; 202 int num_threads;
203 int entry_exit_count; 203 int entry_exit_count;
204 int n_woken; 204 int n_woken;
205 int nap_count; 205 int nap_count;
206 int napping_threads;
206 u16 pcpu; 207 u16 pcpu;
207 u8 vcore_running; 208 u8 vcore_state;
208 u8 in_guest; 209 u8 in_guest;
209 struct list_head runnable_threads; 210 struct list_head runnable_threads;
210 spinlock_t lock; 211 spinlock_t lock;
212 wait_queue_head_t wq;
211}; 213};
212 214
213#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) 215#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
214#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) 216#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
215 217
218/* Values for vcore_state */
219#define VCORE_INACTIVE 0
220#define VCORE_RUNNING 1
221#define VCORE_EXITING 2
222#define VCORE_SLEEPING 3
223
216struct kvmppc_pte { 224struct kvmppc_pte {
217 ulong eaddr; 225 ulong eaddr;
218 u64 vpage; 226 u64 vpage;
@@ -403,11 +411,13 @@ struct kvm_vcpu_arch {
403 struct dtl *dtl; 411 struct dtl *dtl;
404 struct dtl *dtl_end; 412 struct dtl *dtl_end;
405 413
414 wait_queue_head_t *wqp;
406 struct kvmppc_vcore *vcore; 415 struct kvmppc_vcore *vcore;
407 int ret; 416 int ret;
408 int trap; 417 int trap;
409 int state; 418 int state;
410 int ptid; 419 int ptid;
420 bool timer_running;
411 wait_queue_head_t cpu_run; 421 wait_queue_head_t cpu_run;
412 422
413 struct kvm_vcpu_arch_shared *shared; 423 struct kvm_vcpu_arch_shared *shared;
@@ -423,8 +433,9 @@ struct kvm_vcpu_arch {
423#endif 433#endif
424}; 434};
425 435
426#define KVMPPC_VCPU_BUSY_IN_HOST 0 436/* Values for vcpu->arch.state */
427#define KVMPPC_VCPU_BLOCKED 1 437#define KVMPPC_VCPU_STOPPED 0
438#define KVMPPC_VCPU_BUSY_IN_HOST 1
428#define KVMPPC_VCPU_RUNNABLE 2 439#define KVMPPC_VCPU_RUNNABLE 2
429 440
430#endif /* __POWERPC_KVM_HOST_H__ */ 441#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e069c766695d..69f7ffe7f674 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -44,6 +44,7 @@
44#include <asm/compat.h> 44#include <asm/compat.h>
45#include <asm/mmu.h> 45#include <asm/mmu.h>
46#include <asm/hvcall.h> 46#include <asm/hvcall.h>
47#include <asm/xics.h>
47#endif 48#endif
48#ifdef CONFIG_PPC_ISERIES 49#ifdef CONFIG_PPC_ISERIES
49#include <asm/iseries/alpaca.h> 50#include <asm/iseries/alpaca.h>
@@ -460,6 +461,8 @@ int main(void)
460 DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); 461 DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
461 DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); 462 DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
462 DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); 463 DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
464 DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
465 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
463 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); 466 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
464 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); 467 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
465 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); 468 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
@@ -475,6 +478,7 @@ int main(void)
475 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); 478 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
476 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); 479 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
477 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); 480 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
481 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
478 DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - 482 DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
479 offsetof(struct kvmppc_vcpu_book3s, vcpu)); 483 offsetof(struct kvmppc_vcpu_book3s, vcpu));
480 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); 484 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
@@ -532,6 +536,7 @@ int main(void)
532 HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); 536 HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
533 HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); 537 HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
534 HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); 538 HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
539 HSTATE_FIELD(HSTATE_NAPPING, napping);
535 540
536#ifdef CONFIG_KVM_BOOK3S_64_HV 541#ifdef CONFIG_KVM_BOOK3S_64_HV
537 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); 542 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
@@ -544,6 +549,7 @@ int main(void)
544 HSTATE_FIELD(HSTATE_DSCR, host_dscr); 549 HSTATE_FIELD(HSTATE_DSCR, host_dscr);
545 HSTATE_FIELD(HSTATE_DABR, dabr); 550 HSTATE_FIELD(HSTATE_DABR, dabr);
546 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 551 HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
552 DEFINE(IPI_PRIORITY, IPI_PRIORITY);
547#endif /* CONFIG_KVM_BOOK3S_64_HV */ 553#endif /* CONFIG_KVM_BOOK3S_64_HV */
548 554
549#else /* CONFIG_PPC_BOOK3S */ 555#else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index bf66ec731e8f..4644c7986d80 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -62,6 +62,8 @@
62/* #define EXIT_DEBUG_SIMPLE */ 62/* #define EXIT_DEBUG_SIMPLE */
63/* #define EXIT_DEBUG_INT */ 63/* #define EXIT_DEBUG_INT */
64 64
65static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
66
65void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 67void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
66{ 68{
67 local_paca->kvm_hstate.kvm_vcpu = vcpu; 69 local_paca->kvm_hstate.kvm_vcpu = vcpu;
@@ -72,40 +74,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
72{ 74{
73} 75}
74 76
75static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
76static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
77
78void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
79{
80 u64 now;
81 unsigned long dec_nsec;
82
83 now = get_tb();
84 if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
85 kvmppc_core_queue_dec(vcpu);
86 if (vcpu->arch.pending_exceptions)
87 return;
88 if (vcpu->arch.dec_expires != ~(u64)0) {
89 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
90 tb_ticks_per_sec;
91 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
92 HRTIMER_MODE_REL);
93 }
94
95 kvmppc_vcpu_blocked(vcpu);
96
97 kvm_vcpu_block(vcpu);
98 vcpu->stat.halt_wakeup++;
99
100 if (vcpu->arch.dec_expires != ~(u64)0)
101 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
102
103 kvmppc_vcpu_unblocked(vcpu);
104}
105
106void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 77void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
107{ 78{
108 vcpu->arch.shregs.msr = msr; 79 vcpu->arch.shregs.msr = msr;
80 kvmppc_end_cede(vcpu);
109} 81}
110 82
111void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 83void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
@@ -257,15 +229,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
257 229
258 switch (req) { 230 switch (req) {
259 case H_CEDE: 231 case H_CEDE:
260 vcpu->arch.shregs.msr |= MSR_EE;
261 vcpu->arch.ceded = 1;
262 smp_mb();
263 if (!vcpu->arch.prodded)
264 kvmppc_vcpu_block(vcpu);
265 else
266 vcpu->arch.prodded = 0;
267 smp_mb();
268 vcpu->arch.ceded = 0;
269 break; 232 break;
270 case H_PROD: 233 case H_PROD:
271 target = kvmppc_get_gpr(vcpu, 4); 234 target = kvmppc_get_gpr(vcpu, 4);
@@ -388,20 +351,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
388 break; 351 break;
389 } 352 }
390 353
391
392 if (!(r & RESUME_HOST)) {
393 /* To avoid clobbering exit_reason, only check for signals if
394 * we aren't already exiting to userspace for some other
395 * reason. */
396 if (signal_pending(tsk)) {
397 vcpu->stat.signal_exits++;
398 run->exit_reason = KVM_EXIT_INTR;
399 r = -EINTR;
400 } else {
401 kvmppc_core_deliver_interrupts(vcpu);
402 }
403 }
404
405 return r; 354 return r;
406} 355}
407 356
@@ -479,13 +428,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
479 kvmppc_mmu_book3s_hv_init(vcpu); 428 kvmppc_mmu_book3s_hv_init(vcpu);
480 429
481 /* 430 /*
482 * Some vcpus may start out in stopped state. If we initialize 431 * We consider the vcpu stopped until we see the first run ioctl for it.
483 * them to busy-in-host state they will stop other vcpus in the
484 * vcore from running. Instead we initialize them to blocked
485 * state, effectively considering them to be stopped until we
486 * see the first run ioctl for them.
487 */ 432 */
488 vcpu->arch.state = KVMPPC_VCPU_BLOCKED; 433 vcpu->arch.state = KVMPPC_VCPU_STOPPED;
489 434
490 init_waitqueue_head(&vcpu->arch.cpu_run); 435 init_waitqueue_head(&vcpu->arch.cpu_run);
491 436
@@ -496,6 +441,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
496 if (vcore) { 441 if (vcore) {
497 INIT_LIST_HEAD(&vcore->runnable_threads); 442 INIT_LIST_HEAD(&vcore->runnable_threads);
498 spin_lock_init(&vcore->lock); 443 spin_lock_init(&vcore->lock);
444 init_waitqueue_head(&vcore->wq);
499 } 445 }
500 kvm->arch.vcores[core] = vcore; 446 kvm->arch.vcores[core] = vcore;
501 } 447 }
@@ -506,7 +452,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
506 452
507 spin_lock(&vcore->lock); 453 spin_lock(&vcore->lock);
508 ++vcore->num_threads; 454 ++vcore->num_threads;
509 ++vcore->n_blocked;
510 spin_unlock(&vcore->lock); 455 spin_unlock(&vcore->lock);
511 vcpu->arch.vcore = vcore; 456 vcpu->arch.vcore = vcore;
512 457
@@ -527,30 +472,31 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
527 kfree(vcpu); 472 kfree(vcpu);
528} 473}
529 474
530static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu) 475static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
531{ 476{
532 struct kvmppc_vcore *vc = vcpu->arch.vcore; 477 unsigned long dec_nsec, now;
533 478
534 spin_lock(&vc->lock); 479 now = get_tb();
535 vcpu->arch.state = KVMPPC_VCPU_BLOCKED; 480 if (now > vcpu->arch.dec_expires) {
536 ++vc->n_blocked; 481 /* decrementer has already gone negative */
537 if (vc->n_runnable > 0 && 482 kvmppc_core_queue_dec(vcpu);
538 vc->n_runnable + vc->n_blocked == vc->num_threads) { 483 kvmppc_core_deliver_interrupts(vcpu);
539 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, 484 return;
540 arch.run_list);
541 wake_up(&vcpu->arch.cpu_run);
542 } 485 }
543 spin_unlock(&vc->lock); 486 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
487 / tb_ticks_per_sec;
488 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
489 HRTIMER_MODE_REL);
490 vcpu->arch.timer_running = 1;
544} 491}
545 492
546static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu) 493static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
547{ 494{
548 struct kvmppc_vcore *vc = vcpu->arch.vcore; 495 vcpu->arch.ceded = 0;
549 496 if (vcpu->arch.timer_running) {
550 spin_lock(&vc->lock); 497 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
551 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 498 vcpu->arch.timer_running = 0;
552 --vc->n_blocked; 499 }
553 spin_unlock(&vc->lock);
554} 500}
555 501
556extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 502extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -565,6 +511,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
565 return; 511 return;
566 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 512 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
567 --vc->n_runnable; 513 --vc->n_runnable;
514 ++vc->n_busy;
568 /* decrement the physical thread id of each following vcpu */ 515 /* decrement the physical thread id of each following vcpu */
569 v = vcpu; 516 v = vcpu;
570 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) 517 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
@@ -578,15 +525,20 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
578 struct paca_struct *tpaca; 525 struct paca_struct *tpaca;
579 struct kvmppc_vcore *vc = vcpu->arch.vcore; 526 struct kvmppc_vcore *vc = vcpu->arch.vcore;
580 527
528 if (vcpu->arch.timer_running) {
529 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
530 vcpu->arch.timer_running = 0;
531 }
581 cpu = vc->pcpu + vcpu->arch.ptid; 532 cpu = vc->pcpu + vcpu->arch.ptid;
582 tpaca = &paca[cpu]; 533 tpaca = &paca[cpu];
583 tpaca->kvm_hstate.kvm_vcpu = vcpu; 534 tpaca->kvm_hstate.kvm_vcpu = vcpu;
584 tpaca->kvm_hstate.kvm_vcore = vc; 535 tpaca->kvm_hstate.kvm_vcore = vc;
536 tpaca->kvm_hstate.napping = 0;
537 vcpu->cpu = vc->pcpu;
585 smp_wmb(); 538 smp_wmb();
586#ifdef CONFIG_PPC_ICP_NATIVE 539#ifdef CONFIG_PPC_ICP_NATIVE
587 if (vcpu->arch.ptid) { 540 if (vcpu->arch.ptid) {
588 tpaca->cpu_start = 0x80; 541 tpaca->cpu_start = 0x80;
589 tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
590 wmb(); 542 wmb();
591 xics_wake_cpu(cpu); 543 xics_wake_cpu(cpu);
592 ++vc->n_woken; 544 ++vc->n_woken;
@@ -634,9 +586,10 @@ static int on_primary_thread(void)
634 */ 586 */
635static int kvmppc_run_core(struct kvmppc_vcore *vc) 587static int kvmppc_run_core(struct kvmppc_vcore *vc)
636{ 588{
637 struct kvm_vcpu *vcpu, *vnext; 589 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
638 long ret; 590 long ret;
639 u64 now; 591 u64 now;
592 int ptid;
640 593
641 /* don't start if any threads have a signal pending */ 594 /* don't start if any threads have a signal pending */
642 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 595 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -655,29 +608,50 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
655 goto out; 608 goto out;
656 } 609 }
657 610
611 /*
612 * Assign physical thread IDs, first to non-ceded vcpus
613 * and then to ceded ones.
614 */
615 ptid = 0;
616 vcpu0 = NULL;
617 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
618 if (!vcpu->arch.ceded) {
619 if (!ptid)
620 vcpu0 = vcpu;
621 vcpu->arch.ptid = ptid++;
622 }
623 }
624 if (!vcpu0)
625 return 0; /* nothing to run */
626 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
627 if (vcpu->arch.ceded)
628 vcpu->arch.ptid = ptid++;
629
658 vc->n_woken = 0; 630 vc->n_woken = 0;
659 vc->nap_count = 0; 631 vc->nap_count = 0;
660 vc->entry_exit_count = 0; 632 vc->entry_exit_count = 0;
661 vc->vcore_running = 1; 633 vc->vcore_state = VCORE_RUNNING;
662 vc->in_guest = 0; 634 vc->in_guest = 0;
663 vc->pcpu = smp_processor_id(); 635 vc->pcpu = smp_processor_id();
636 vc->napping_threads = 0;
664 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 637 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
665 kvmppc_start_thread(vcpu); 638 kvmppc_start_thread(vcpu);
666 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
667 arch.run_list);
668 639
640 preempt_disable();
669 spin_unlock(&vc->lock); 641 spin_unlock(&vc->lock);
670 642
671 preempt_disable();
672 kvm_guest_enter(); 643 kvm_guest_enter();
673 __kvmppc_vcore_entry(NULL, vcpu); 644 __kvmppc_vcore_entry(NULL, vcpu0);
674 645
675 /* wait for secondary threads to finish writing their state to memory */
676 spin_lock(&vc->lock); 646 spin_lock(&vc->lock);
647 /* disable sending of IPIs on virtual external irqs */
648 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
649 vcpu->cpu = -1;
650 /* wait for secondary threads to finish writing their state to memory */
677 if (vc->nap_count < vc->n_woken) 651 if (vc->nap_count < vc->n_woken)
678 kvmppc_wait_for_nap(vc); 652 kvmppc_wait_for_nap(vc);
679 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 653 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
680 vc->vcore_running = 2; 654 vc->vcore_state = VCORE_EXITING;
681 spin_unlock(&vc->lock); 655 spin_unlock(&vc->lock);
682 656
683 /* make sure updates to secondary vcpu structs are visible now */ 657 /* make sure updates to secondary vcpu structs are visible now */
@@ -693,22 +667,26 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
693 if (now < vcpu->arch.dec_expires && 667 if (now < vcpu->arch.dec_expires &&
694 kvmppc_core_pending_dec(vcpu)) 668 kvmppc_core_pending_dec(vcpu))
695 kvmppc_core_dequeue_dec(vcpu); 669 kvmppc_core_dequeue_dec(vcpu);
696 if (!vcpu->arch.trap) { 670
697 if (signal_pending(vcpu->arch.run_task)) { 671 ret = RESUME_GUEST;
698 vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR; 672 if (vcpu->arch.trap)
699 vcpu->arch.ret = -EINTR; 673 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
700 } 674 vcpu->arch.run_task);
701 continue; /* didn't get to run */ 675
702 }
703 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
704 vcpu->arch.run_task);
705 vcpu->arch.ret = ret; 676 vcpu->arch.ret = ret;
706 vcpu->arch.trap = 0; 677 vcpu->arch.trap = 0;
678
679 if (vcpu->arch.ceded) {
680 if (ret != RESUME_GUEST)
681 kvmppc_end_cede(vcpu);
682 else
683 kvmppc_set_timer(vcpu);
684 }
707 } 685 }
708 686
709 spin_lock(&vc->lock); 687 spin_lock(&vc->lock);
710 out: 688 out:
711 vc->vcore_running = 0; 689 vc->vcore_state = VCORE_INACTIVE;
712 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 690 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
713 arch.run_list) { 691 arch.run_list) {
714 if (vcpu->arch.ret != RESUME_GUEST) { 692 if (vcpu->arch.ret != RESUME_GUEST) {
@@ -720,82 +698,130 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
720 return 1; 698 return 1;
721} 699}
722 700
723static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 701/*
702 * Wait for some other vcpu thread to execute us, and
703 * wake us up when we need to handle something in the host.
704 */
705static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
724{ 706{
725 int ptid;
726 int wait_state;
727 struct kvmppc_vcore *vc;
728 DEFINE_WAIT(wait); 707 DEFINE_WAIT(wait);
729 708
730 /* No need to go into the guest when all we do is going out */ 709 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
731 if (signal_pending(current)) { 710 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
732 kvm_run->exit_reason = KVM_EXIT_INTR; 711 schedule();
733 return -EINTR; 712 finish_wait(&vcpu->arch.cpu_run, &wait);
713}
714
715/*
716 * All the vcpus in this vcore are idle, so wait for a decrementer
717 * or external interrupt to one of the vcpus. vc->lock is held.
718 */
719static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
720{
721 DEFINE_WAIT(wait);
722 struct kvm_vcpu *v;
723 int all_idle = 1;
724
725 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
726 vc->vcore_state = VCORE_SLEEPING;
727 spin_unlock(&vc->lock);
728 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
729 if (!v->arch.ceded || v->arch.pending_exceptions) {
730 all_idle = 0;
731 break;
732 }
734 } 733 }
734 if (all_idle)
735 schedule();
736 finish_wait(&vc->wq, &wait);
737 spin_lock(&vc->lock);
738 vc->vcore_state = VCORE_INACTIVE;
739}
735 740
736 /* On PPC970, check that we have an RMA region */ 741static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
737 if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) 742{
738 return -EPERM; 743 int n_ceded;
744 int prev_state;
745 struct kvmppc_vcore *vc;
746 struct kvm_vcpu *v, *vn;
739 747
740 kvm_run->exit_reason = 0; 748 kvm_run->exit_reason = 0;
741 vcpu->arch.ret = RESUME_GUEST; 749 vcpu->arch.ret = RESUME_GUEST;
742 vcpu->arch.trap = 0; 750 vcpu->arch.trap = 0;
743 751
744 flush_fp_to_thread(current);
745 flush_altivec_to_thread(current);
746 flush_vsx_to_thread(current);
747
748 /* 752 /*
749 * Synchronize with other threads in this virtual core 753 * Synchronize with other threads in this virtual core
750 */ 754 */
751 vc = vcpu->arch.vcore; 755 vc = vcpu->arch.vcore;
752 spin_lock(&vc->lock); 756 spin_lock(&vc->lock);
753 /* This happens the first time this is called for a vcpu */ 757 vcpu->arch.ceded = 0;
754 if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
755 --vc->n_blocked;
756 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
757 ptid = vc->n_runnable;
758 vcpu->arch.run_task = current; 758 vcpu->arch.run_task = current;
759 vcpu->arch.kvm_run = kvm_run; 759 vcpu->arch.kvm_run = kvm_run;
760 vcpu->arch.ptid = ptid; 760 prev_state = vcpu->arch.state;
761 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
761 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 762 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
762 ++vc->n_runnable; 763 ++vc->n_runnable;
763 764
764 wait_state = TASK_INTERRUPTIBLE; 765 /*
765 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 766 * This happens the first time this is called for a vcpu.
766 if (signal_pending(current)) { 767 * If the vcore is already running, we may be able to start
767 if (!vc->vcore_running) { 768 * this thread straight away and have it join in.
768 kvm_run->exit_reason = KVM_EXIT_INTR; 769 */
769 vcpu->arch.ret = -EINTR; 770 if (prev_state == KVMPPC_VCPU_STOPPED) {
770 break; 771 if (vc->vcore_state == VCORE_RUNNING &&
771 } 772 VCORE_EXIT_COUNT(vc) == 0) {
772 /* have to wait for vcore to stop executing guest */ 773 vcpu->arch.ptid = vc->n_runnable - 1;
773 wait_state = TASK_UNINTERRUPTIBLE; 774 kvmppc_start_thread(vcpu);
774 smp_send_reschedule(vc->pcpu);
775 } 775 }
776 776
777 if (!vc->vcore_running && 777 } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
778 vc->n_runnable + vc->n_blocked == vc->num_threads) { 778 --vc->n_busy;
779 /* we can run now */
780 if (kvmppc_run_core(vc))
781 continue;
782 }
783 779
784 if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0) 780 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
785 kvmppc_start_thread(vcpu); 781 !signal_pending(current)) {
782 if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
783 spin_unlock(&vc->lock);
784 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
785 spin_lock(&vc->lock);
786 continue;
787 }
788 n_ceded = 0;
789 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
790 n_ceded += v->arch.ceded;
791 if (n_ceded == vc->n_runnable)
792 kvmppc_vcore_blocked(vc);
793 else
794 kvmppc_run_core(vc);
795
796 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
797 arch.run_list) {
798 kvmppc_core_deliver_interrupts(v);
799 if (signal_pending(v->arch.run_task)) {
800 kvmppc_remove_runnable(vc, v);
801 v->stat.signal_exits++;
802 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
803 v->arch.ret = -EINTR;
804 wake_up(&v->arch.cpu_run);
805 }
806 }
807 }
786 808
787 /* wait for other threads to come in, or wait for vcore */ 809 if (signal_pending(current)) {
788 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 810 if (vc->vcore_state == VCORE_RUNNING ||
789 spin_unlock(&vc->lock); 811 vc->vcore_state == VCORE_EXITING) {
790 schedule(); 812 spin_unlock(&vc->lock);
791 finish_wait(&vcpu->arch.cpu_run, &wait); 813 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
792 spin_lock(&vc->lock); 814 spin_lock(&vc->lock);
815 }
816 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
817 kvmppc_remove_runnable(vc, vcpu);
818 vcpu->stat.signal_exits++;
819 kvm_run->exit_reason = KVM_EXIT_INTR;
820 vcpu->arch.ret = -EINTR;
821 }
793 } 822 }
794 823
795 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
796 kvmppc_remove_runnable(vc, vcpu);
797 spin_unlock(&vc->lock); 824 spin_unlock(&vc->lock);
798
799 return vcpu->arch.ret; 825 return vcpu->arch.ret;
800} 826}
801 827
@@ -808,6 +834,21 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
808 return -EINVAL; 834 return -EINVAL;
809 } 835 }
810 836
837 /* No need to go into the guest when all we'll do is come back out */
838 if (signal_pending(current)) {
839 run->exit_reason = KVM_EXIT_INTR;
840 return -EINTR;
841 }
842
843 /* On PPC970, check that we have an RMA region */
844 if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
845 return -EPERM;
846
847 flush_fp_to_thread(current);
848 flush_altivec_to_thread(current);
849 flush_vsx_to_thread(current);
850 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
851
811 do { 852 do {
812 r = kvmppc_run_vcpu(run, vcpu); 853 r = kvmppc_run_vcpu(run, vcpu);
813 854
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bc6ade933089..f422231d9235 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -52,7 +52,7 @@ kvmppc_skip_Hinterrupt:
52 b . 52 b .
53 53
54/* 54/*
55 * Call kvmppc_handler_trampoline_enter in real mode. 55 * Call kvmppc_hv_entry in real mode.
56 * Must be called with interrupts hard-disabled. 56 * Must be called with interrupts hard-disabled.
57 * 57 *
58 * Input Registers: 58 * Input Registers:
@@ -92,6 +92,12 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
92kvm_start_guest: 92kvm_start_guest:
93 ld r1,PACAEMERGSP(r13) 93 ld r1,PACAEMERGSP(r13)
94 subi r1,r1,STACK_FRAME_OVERHEAD 94 subi r1,r1,STACK_FRAME_OVERHEAD
95 ld r2,PACATOC(r13)
96
97 /* were we napping due to cede? */
98 lbz r0,HSTATE_NAPPING(r13)
99 cmpwi r0,0
100 bne kvm_end_cede
95 101
96 /* get vcpu pointer */ 102 /* get vcpu pointer */
97 ld r4, HSTATE_KVM_VCPU(r13) 103 ld r4, HSTATE_KVM_VCPU(r13)
@@ -279,15 +285,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
279 cmpwi r0,0 285 cmpwi r0,0
280 beq 20b 286 beq 20b
281 287
282 /* Set LPCR. Set the MER bit if there is a pending external irq. */ 288 /* Set LPCR and RMOR. */
28310: ld r8,KVM_LPCR(r9) 28910: ld r8,KVM_LPCR(r9)
284 ld r0,VCPU_PENDING_EXC(r4) 290 mtspr SPRN_LPCR,r8
285 li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
286 oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
287 and. r0,r0,r7
288 beq 11f
289 ori r8,r8,LPCR_MER
29011: mtspr SPRN_LPCR,r8
291 ld r8,KVM_RMOR(r9) 291 ld r8,KVM_RMOR(r9)
292 mtspr SPRN_RMOR,r8 292 mtspr SPRN_RMOR,r8
293 isync 293 isync
@@ -451,19 +451,50 @@ toc_tlbie_lock:
451 mtctr r6 451 mtctr r6
452 mtxer r7 452 mtxer r7
453 453
454 /* Move SRR0 and SRR1 into the respective regs */ 454kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
455 ld r6, VCPU_SRR0(r4) 455 ld r6, VCPU_SRR0(r4)
456 ld r7, VCPU_SRR1(r4) 456 ld r7, VCPU_SRR1(r4)
457 mtspr SPRN_SRR0, r6
458 mtspr SPRN_SRR1, r7
459
460 ld r10, VCPU_PC(r4) 457 ld r10, VCPU_PC(r4)
458 ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
461 459
462 ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */
463 rldicl r11, r11, 63 - MSR_HV_LG, 1 460 rldicl r11, r11, 63 - MSR_HV_LG, 1
464 rotldi r11, r11, 1 + MSR_HV_LG 461 rotldi r11, r11, 1 + MSR_HV_LG
465 ori r11, r11, MSR_ME 462 ori r11, r11, MSR_ME
466 463
464 /* Check if we can deliver an external or decrementer interrupt now */
465 ld r0,VCPU_PENDING_EXC(r4)
466 li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
467 oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
468 and r0,r0,r8
469 cmpdi cr1,r0,0
470 andi. r0,r11,MSR_EE
471 beq cr1,11f
472BEGIN_FTR_SECTION
473 mfspr r8,SPRN_LPCR
474 ori r8,r8,LPCR_MER
475 mtspr SPRN_LPCR,r8
476 isync
477END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
478 beq 5f
479 li r0,BOOK3S_INTERRUPT_EXTERNAL
48012: mr r6,r10
481 mr r10,r0
482 mr r7,r11
483 li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
484 rotldi r11,r11,63
485 b 5f
48611: beq 5f
487 mfspr r0,SPRN_DEC
488 cmpwi r0,0
489 li r0,BOOK3S_INTERRUPT_DECREMENTER
490 blt 12b
491
492 /* Move SRR0 and SRR1 into the respective regs */
4935: mtspr SPRN_SRR0, r6
494 mtspr SPRN_SRR1, r7
495 li r0,0
496 stb r0,VCPU_CEDED(r4) /* cancel cede */
497
467fast_guest_return: 498fast_guest_return:
468 mtspr SPRN_HSRR0,r10 499 mtspr SPRN_HSRR0,r10
469 mtspr SPRN_HSRR1,r11 500 mtspr SPRN_HSRR1,r11
@@ -577,21 +608,20 @@ kvmppc_interrupt:
577 /* See if this is something we can handle in real mode */ 608 /* See if this is something we can handle in real mode */
578 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL 609 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
579 beq hcall_try_real_mode 610 beq hcall_try_real_mode
580hcall_real_cont:
581 611
582 /* Check for mediated interrupts (could be done earlier really ...) */ 612 /* Check for mediated interrupts (could be done earlier really ...) */
583BEGIN_FTR_SECTION 613BEGIN_FTR_SECTION
584 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL 614 cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
585 bne+ 1f 615 bne+ 1f
586 ld r5,VCPU_KVM(r9)
587 ld r5,KVM_LPCR(r5)
588 andi. r0,r11,MSR_EE 616 andi. r0,r11,MSR_EE
589 beq 1f 617 beq 1f
618 mfspr r5,SPRN_LPCR
590 andi. r0,r5,LPCR_MER 619 andi. r0,r5,LPCR_MER
591 bne bounce_ext_interrupt 620 bne bounce_ext_interrupt
5921: 6211:
593END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 622END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
594 623
624hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
595 /* Save DEC */ 625 /* Save DEC */
596 mfspr r5,SPRN_DEC 626 mfspr r5,SPRN_DEC
597 mftb r6 627 mftb r6
@@ -685,7 +715,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
685 slbia 715 slbia
686 ptesync 716 ptesync
687 717
688hdec_soon: 718hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */
689BEGIN_FTR_SECTION 719BEGIN_FTR_SECTION
690 b 32f 720 b 32f
691END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 721END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
@@ -703,6 +733,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
703 addi r0,r3,0x100 733 addi r0,r3,0x100
704 stwcx. r0,0,r6 734 stwcx. r0,0,r6
705 bne 41b 735 bne 41b
736 lwsync
706 737
707 /* 738 /*
708 * At this point we have an interrupt that we have to pass 739 * At this point we have an interrupt that we have to pass
@@ -716,18 +747,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
716 * interrupt, since the other threads will already be on their 747 * interrupt, since the other threads will already be on their
717 * way here in that case. 748 * way here in that case.
718 */ 749 */
750 cmpwi r3,0x100 /* Are we the first here? */
751 bge 43f
752 cmpwi r3,1 /* Are any other threads in the guest? */
753 ble 43f
719 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 754 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
720 beq 40f 755 beq 40f
721 cmpwi r3,0x100 /* Are we the first here? */
722 bge 40f
723 cmpwi r3,1
724 ble 40f
725 li r0,0 756 li r0,0
726 mtspr SPRN_HDEC,r0 757 mtspr SPRN_HDEC,r0
72740: 75840:
759 /*
760 * Send an IPI to any napping threads, since an HDEC interrupt
761 * doesn't wake CPUs up from nap.
762 */
763 lwz r3,VCORE_NAPPING_THREADS(r5)
764 lwz r4,VCPU_PTID(r9)
765 li r0,1
766 sldi r0,r0,r4
767 andc. r3,r3,r0 /* no sense IPI'ing ourselves */
768 beq 43f
769 mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
770 subf r6,r4,r13
77142: andi. r0,r3,1
772 beq 44f
773 ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
774 li r0,IPI_PRIORITY
775 li r7,XICS_QIRR
776 stbcix r0,r7,r8 /* trigger the IPI */
77744: srdi. r3,r3,1
778 addi r6,r6,PACA_SIZE
779 bne 42b
728 780
729 /* Secondary threads wait for primary to do partition switch */ 781 /* Secondary threads wait for primary to do partition switch */
730 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ 78243: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
731 ld r5,HSTATE_KVM_VCORE(r13) 783 ld r5,HSTATE_KVM_VCORE(r13)
732 lwz r3,VCPU_PTID(r9) 784 lwz r3,VCPU_PTID(r9)
733 cmpwi r3,0 785 cmpwi r3,0
@@ -1080,7 +1132,6 @@ hcall_try_real_mode:
1080hcall_real_fallback: 1132hcall_real_fallback:
1081 li r12,BOOK3S_INTERRUPT_SYSCALL 1133 li r12,BOOK3S_INTERRUPT_SYSCALL
1082 ld r9, HSTATE_KVM_VCPU(r13) 1134 ld r9, HSTATE_KVM_VCPU(r13)
1083 ld r11, VCPU_MSR(r9)
1084 1135
1085 b hcall_real_cont 1136 b hcall_real_cont
1086 1137
@@ -1142,7 +1193,7 @@ hcall_real_table:
1142 .long 0 /* 0xd4 */ 1193 .long 0 /* 0xd4 */
1143 .long 0 /* 0xd8 */ 1194 .long 0 /* 0xd8 */
1144 .long 0 /* 0xdc */ 1195 .long 0 /* 0xdc */
1145 .long 0 /* 0xe0 */ 1196 .long .kvmppc_h_cede - hcall_real_table
1146 .long 0 /* 0xe4 */ 1197 .long 0 /* 0xe4 */
1147 .long 0 /* 0xe8 */ 1198 .long 0 /* 0xe8 */
1148 .long 0 /* 0xec */ 1199 .long 0 /* 0xec */
@@ -1171,7 +1222,8 @@ bounce_ext_interrupt:
1171 mtspr SPRN_SRR0,r10 1222 mtspr SPRN_SRR0,r10
1172 mtspr SPRN_SRR1,r11 1223 mtspr SPRN_SRR1,r11
1173 li r10,BOOK3S_INTERRUPT_EXTERNAL 1224 li r10,BOOK3S_INTERRUPT_EXTERNAL
1174 LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME); 1225 li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1226 rotldi r11,r11,63
1175 b fast_guest_return 1227 b fast_guest_return
1176 1228
1177_GLOBAL(kvmppc_h_set_dabr) 1229_GLOBAL(kvmppc_h_set_dabr)
@@ -1180,6 +1232,178 @@ _GLOBAL(kvmppc_h_set_dabr)
1180 li r3,0 1232 li r3,0
1181 blr 1233 blr
1182 1234
1235_GLOBAL(kvmppc_h_cede)
1236 ori r11,r11,MSR_EE
1237 std r11,VCPU_MSR(r3)
1238 li r0,1
1239 stb r0,VCPU_CEDED(r3)
1240 sync /* order setting ceded vs. testing prodded */
1241 lbz r5,VCPU_PRODDED(r3)
1242 cmpwi r5,0
1243 bne 1f
1244 li r0,0 /* set trap to 0 to say hcall is handled */
1245 stw r0,VCPU_TRAP(r3)
1246 li r0,H_SUCCESS
1247 std r0,VCPU_GPR(r3)(r3)
1248BEGIN_FTR_SECTION
1249 b 2f /* just send it up to host on 970 */
1250END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1251
1252 /*
1253 * Set our bit in the bitmask of napping threads unless all the
1254 * other threads are already napping, in which case we send this
1255 * up to the host.
1256 */
1257 ld r5,HSTATE_KVM_VCORE(r13)
1258 lwz r6,VCPU_PTID(r3)
1259 lwz r8,VCORE_ENTRY_EXIT(r5)
1260 clrldi r8,r8,56
1261 li r0,1
1262 sld r0,r0,r6
1263 addi r6,r5,VCORE_NAPPING_THREADS
126431: lwarx r4,0,r6
1265 or r4,r4,r0
1266 popcntw r7,r4
1267 cmpw r7,r8
1268 bge 2f
1269 stwcx. r4,0,r6
1270 bne 31b
1271 li r0,1
1272 stb r0,HSTATE_NAPPING(r13)
1273 /* order napping_threads update vs testing entry_exit_count */
1274 lwsync
1275 mr r4,r3
1276 lwz r7,VCORE_ENTRY_EXIT(r5)
1277 cmpwi r7,0x100
1278 bge 33f /* another thread already exiting */
1279
1280/*
1281 * Although not specifically required by the architecture, POWER7
1282 * preserves the following registers in nap mode, even if an SMT mode
1283 * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
1284 * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
1285 */
1286 /* Save non-volatile GPRs */
1287 std r14, VCPU_GPR(r14)(r3)
1288 std r15, VCPU_GPR(r15)(r3)
1289 std r16, VCPU_GPR(r16)(r3)
1290 std r17, VCPU_GPR(r17)(r3)
1291 std r18, VCPU_GPR(r18)(r3)
1292 std r19, VCPU_GPR(r19)(r3)
1293 std r20, VCPU_GPR(r20)(r3)
1294 std r21, VCPU_GPR(r21)(r3)
1295 std r22, VCPU_GPR(r22)(r3)
1296 std r23, VCPU_GPR(r23)(r3)
1297 std r24, VCPU_GPR(r24)(r3)
1298 std r25, VCPU_GPR(r25)(r3)
1299 std r26, VCPU_GPR(r26)(r3)
1300 std r27, VCPU_GPR(r27)(r3)
1301 std r28, VCPU_GPR(r28)(r3)
1302 std r29, VCPU_GPR(r29)(r3)
1303 std r30, VCPU_GPR(r30)(r3)
1304 std r31, VCPU_GPR(r31)(r3)
1305
1306 /* save FP state */
1307 bl .kvmppc_save_fp
1308
1309 /*
1310 * Take a nap until a decrementer or external interrupt occurs,
1311 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
1312 */
1313 li r0,0x80
1314 stb r0,PACAPROCSTART(r13)
1315 mfspr r5,SPRN_LPCR
1316 ori r5,r5,LPCR_PECE0 | LPCR_PECE1
1317 mtspr SPRN_LPCR,r5
1318 isync
1319 li r0, 0
1320 std r0, HSTATE_SCRATCH0(r13)
1321 ptesync
1322 ld r0, HSTATE_SCRATCH0(r13)
13231: cmpd r0, r0
1324 bne 1b
1325 nap
1326 b .
1327
1328kvm_end_cede:
1329 /* Woken by external or decrementer interrupt */
1330 ld r1, HSTATE_HOST_R1(r13)
1331 ld r2, PACATOC(r13)
1332
1333 /* If we're a secondary thread and we got here by an IPI, ack it */
1334 ld r4,HSTATE_KVM_VCPU(r13)
1335 lwz r3,VCPU_PTID(r4)
1336 cmpwi r3,0
1337 beq 27f
1338 mfspr r3,SPRN_SRR1
1339 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
1340 cmpwi r3,4 /* was it an external interrupt? */
1341 bne 27f
1342 ld r5, HSTATE_XICS_PHYS(r13)
1343 li r0,0xff
1344 li r6,XICS_QIRR
1345 li r7,XICS_XIRR
1346 lwzcix r8,r5,r7 /* ack the interrupt */
1347 sync
1348 stbcix r0,r5,r6 /* clear it */
1349 stwcix r8,r5,r7 /* EOI it */
135027:
1351 /* load up FP state */
1352 bl kvmppc_load_fp
1353
1354 /* Load NV GPRS */
1355 ld r14, VCPU_GPR(r14)(r4)
1356 ld r15, VCPU_GPR(r15)(r4)
1357 ld r16, VCPU_GPR(r16)(r4)
1358 ld r17, VCPU_GPR(r17)(r4)
1359 ld r18, VCPU_GPR(r18)(r4)
1360 ld r19, VCPU_GPR(r19)(r4)
1361 ld r20, VCPU_GPR(r20)(r4)
1362 ld r21, VCPU_GPR(r21)(r4)
1363 ld r22, VCPU_GPR(r22)(r4)
1364 ld r23, VCPU_GPR(r23)(r4)
1365 ld r24, VCPU_GPR(r24)(r4)
1366 ld r25, VCPU_GPR(r25)(r4)
1367 ld r26, VCPU_GPR(r26)(r4)
1368 ld r27, VCPU_GPR(r27)(r4)
1369 ld r28, VCPU_GPR(r28)(r4)
1370 ld r29, VCPU_GPR(r29)(r4)
1371 ld r30, VCPU_GPR(r30)(r4)
1372 ld r31, VCPU_GPR(r31)(r4)
1373
1374 /* clear our bit in vcore->napping_threads */
137533: ld r5,HSTATE_KVM_VCORE(r13)
1376 lwz r3,VCPU_PTID(r4)
1377 li r0,1
1378 sld r0,r0,r3
1379 addi r6,r5,VCORE_NAPPING_THREADS
138032: lwarx r7,0,r6
1381 andc r7,r7,r0
1382 stwcx. r7,0,r6
1383 bne 32b
1384 li r0,0
1385 stb r0,HSTATE_NAPPING(r13)
1386
1387 /* see if any other thread is already exiting */
1388 lwz r0,VCORE_ENTRY_EXIT(r5)
1389 cmpwi r0,0x100
1390 blt kvmppc_cede_reentry /* if not go back to guest */
1391
1392 /* some threads are exiting, so go to the guest exit path */
1393 b hcall_real_fallback
1394
1395 /* cede when already previously prodded case */
13961: li r0,0
1397 stb r0,VCPU_PRODDED(r3)
1398 sync /* order testing prodded vs. clearing ceded */
1399 stb r0,VCPU_CEDED(r3)
1400 li r3,H_SUCCESS
1401 blr
1402
1403 /* we've ceded but we want to give control to the host */
14042: li r3,H_TOO_HARD
1405 blr
1406
1183secondary_too_late: 1407secondary_too_late:
1184 ld r5,HSTATE_KVM_VCORE(r13) 1408 ld r5,HSTATE_KVM_VCORE(r13)
1185 HMT_LOW 1409 HMT_LOW
@@ -1197,14 +1421,20 @@ secondary_too_late:
1197 slbmte r6,r5 1421 slbmte r6,r5
11981: addi r11,r11,16 14221: addi r11,r11,16
1199 .endr 1423 .endr
1200 b 50f
1201 1424
1202secondary_nap: 1425secondary_nap:
1203 /* Clear any pending IPI */ 1426 /* Clear any pending IPI - assume we're a secondary thread */
120450: ld r5, HSTATE_XICS_PHYS(r13) 1427 ld r5, HSTATE_XICS_PHYS(r13)
1428 li r7, XICS_XIRR
1429 lwzcix r3, r5, r7 /* ack any pending interrupt */
1430 rlwinm. r0, r3, 0, 0xffffff /* any pending? */
1431 beq 37f
1432 sync
1205 li r0, 0xff 1433 li r0, 0xff
1206 li r6, XICS_QIRR 1434 li r6, XICS_QIRR
1207 stbcix r0, r5, r6 1435 stbcix r0, r5, r6 /* clear the IPI */
1436 stwcix r3, r5, r7 /* EOI it */
143737: sync
1208 1438
1209 /* increment the nap count and then go to nap mode */ 1439 /* increment the nap count and then go to nap mode */
1210 ld r4, HSTATE_KVM_VCORE(r13) 1440 ld r4, HSTATE_KVM_VCORE(r13)
@@ -1214,13 +1444,12 @@ secondary_nap:
1214 addi r3, r3, 1 1444 addi r3, r3, 1
1215 stwcx. r3, 0, r4 1445 stwcx. r3, 0, r4
1216 bne 51b 1446 bne 51b
1217 isync
1218 1447
1448 li r3, LPCR_PECE0
1219 mfspr r4, SPRN_LPCR 1449 mfspr r4, SPRN_LPCR
1220 li r0, LPCR_PECE 1450 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
1221 andc r4, r4, r0
1222 ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
1223 mtspr SPRN_LPCR, r4 1451 mtspr SPRN_LPCR, r4
1452 isync
1224 li r0, 0 1453 li r0, 0
1225 std r0, HSTATE_SCRATCH0(r13) 1454 std r0, HSTATE_SCRATCH0(r13)
1226 ptesync 1455 ptesync
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a8000ce562b0..0d843c6ba315 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,12 +39,8 @@
39 39
40int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 40int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
41{ 41{
42#ifndef CONFIG_KVM_BOOK3S_64_HV
43 return !(v->arch.shared->msr & MSR_WE) || 42 return !(v->arch.shared->msr & MSR_WE) ||
44 !!(v->arch.pending_exceptions); 43 !!(v->arch.pending_exceptions);
45#else
46 return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
47#endif
48} 44}
49 45
50int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 46int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -285,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
285{ 281{
286 struct kvm_vcpu *vcpu; 282 struct kvm_vcpu *vcpu;
287 vcpu = kvmppc_core_vcpu_create(kvm, id); 283 vcpu = kvmppc_core_vcpu_create(kvm, id);
284 vcpu->arch.wqp = &vcpu->wq;
288 if (!IS_ERR(vcpu)) 285 if (!IS_ERR(vcpu))
289 kvmppc_create_vcpu_debugfs(vcpu, id); 286 kvmppc_create_vcpu_debugfs(vcpu, id);
290 return vcpu; 287 return vcpu;
@@ -316,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data)
316 313
317 kvmppc_core_queue_dec(vcpu); 314 kvmppc_core_queue_dec(vcpu);
318 315
319 if (waitqueue_active(&vcpu->wq)) { 316 if (waitqueue_active(vcpu->arch.wqp)) {
320 wake_up_interruptible(&vcpu->wq); 317 wake_up_interruptible(vcpu->arch.wqp);
321 vcpu->stat.halt_wakeup++; 318 vcpu->stat.halt_wakeup++;
322 } 319 }
323} 320}
@@ -570,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
570 567
571int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 568int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
572{ 569{
573 if (irq->irq == KVM_INTERRUPT_UNSET) 570 if (irq->irq == KVM_INTERRUPT_UNSET) {
574 kvmppc_core_dequeue_external(vcpu, irq); 571 kvmppc_core_dequeue_external(vcpu, irq);
575 else 572 return 0;
576 kvmppc_core_queue_external(vcpu, irq); 573 }
574
575 kvmppc_core_queue_external(vcpu, irq);
577 576
578 if (waitqueue_active(&vcpu->wq)) { 577 if (waitqueue_active(vcpu->arch.wqp)) {
579 wake_up_interruptible(&vcpu->wq); 578 wake_up_interruptible(vcpu->arch.wqp);
580 vcpu->stat.halt_wakeup++; 579 vcpu->stat.halt_wakeup++;
581 } else if (vcpu->cpu != -1) { 580 } else if (vcpu->cpu != -1) {
582 smp_send_reschedule(vcpu->cpu); 581 smp_send_reschedule(vcpu->cpu);