aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-06-28 20:23:08 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:57 -0400
commit371fefd6f2dc46668e00871930dde613b88d4bde (patch)
tree35fe799343861405914d27873eb175eb04d6dce5
parent54738c097163c3f01e67ccc85462b78d4d4f495f (diff)
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--Documentation/virtual/kvm/api.txt13
-rw-r--r--arch/powerpc/include/asm/kvm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h46
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c6
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S31
-rw-r--r--arch/powerpc/kernel/idle_power7.S2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c316
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S168
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c9
-rw-r--r--include/linux/kvm.h1
13 files changed, 567 insertions, 45 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a1d344d5ff4c..681871311d3e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -180,6 +180,19 @@ KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
180If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 180If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
181cpus max. 181cpus max.
182 182
183On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
184threads in one or more virtual CPU cores. (This is because the
185hardware requires all the hardware threads in a CPU core to be in the
186same partition.) The KVM_CAP_PPC_SMT capability indicates the number
187of vcpus per virtual core (vcore). The vcore id is obtained by
188dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
189given vcore will always be in the same physical core as each other
190(though that might be a different physical core from time to time).
191Userspace can control the threading (SMT) mode of the guest by its
192allocation of vcpu ids. For example, if userspace wants
193single-threaded guest vcpus, it should make all vcpu ids be a multiple
194of the number of vcpus per vcore.
195
1834.8 KVM_GET_DIRTY_LOG (vm ioctl) 1964.8 KVM_GET_DIRTY_LOG (vm ioctl)
184 197
185Capability: basic 198Capability: basic
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index c3ec990daf45..471bb3d85e0b 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -24,6 +24,7 @@
24 24
25/* Select powerpc specific features in <linux/kvm.h> */ 25/* Select powerpc specific features in <linux/kvm.h> */
26#define __KVM_HAVE_SPAPR_TCE 26#define __KVM_HAVE_SPAPR_TCE
27#define __KVM_HAVE_PPC_SMT
27 28
28struct kvm_regs { 29struct kvm_regs {
29 __u64 pc; 30 __u64 pc;
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index b7b039532fbc..9cfd5436782d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -78,6 +78,8 @@ struct kvmppc_host_state {
78 78
79#ifdef CONFIG_KVM_BOOK3S_64_HV 79#ifdef CONFIG_KVM_BOOK3S_64_HV
80 struct kvm_vcpu *kvm_vcpu; 80 struct kvm_vcpu *kvm_vcpu;
81 struct kvmppc_vcore *kvm_vcore;
82 unsigned long xics_phys;
81 u64 dabr; 83 u64 dabr;
82 u64 host_mmcr[3]; 84 u64 host_mmcr[3];
83 u32 host_pmc[6]; 85 u32 host_pmc[6];
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 5616e39a7fa4..0d6d569e19c7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -25,10 +25,14 @@
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/kvm_types.h> 27#include <linux/kvm_types.h>
28#include <linux/threads.h>
29#include <linux/spinlock.h>
28#include <linux/kvm_para.h> 30#include <linux/kvm_para.h>
29#include <asm/kvm_asm.h> 31#include <asm/kvm_asm.h>
32#include <asm/processor.h>
30 33
31#define KVM_MAX_VCPUS 1 34#define KVM_MAX_VCPUS NR_CPUS
35#define KVM_MAX_VCORES NR_CPUS
32#define KVM_MEMORY_SLOTS 32 36#define KVM_MEMORY_SLOTS 32
33/* memory slots that does not exposed to userspace */ 37/* memory slots that does not exposed to userspace */
34#define KVM_PRIVATE_MEM_SLOTS 4 38#define KVM_PRIVATE_MEM_SLOTS 4
@@ -167,9 +171,34 @@ struct kvm_arch {
167 int tlbie_lock; 171 int tlbie_lock;
168 struct list_head spapr_tce_tables; 172 struct list_head spapr_tce_tables;
169 unsigned short last_vcpu[NR_CPUS]; 173 unsigned short last_vcpu[NR_CPUS];
174 struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
170#endif /* CONFIG_KVM_BOOK3S_64_HV */ 175#endif /* CONFIG_KVM_BOOK3S_64_HV */
171}; 176};
172 177
178/*
179 * Struct for a virtual core.
180 * Note: entry_exit_count combines an entry count in the bottom 8 bits
181 * and an exit count in the next 8 bits. This is so that we can
182 * atomically increment the entry count iff the exit count is 0
183 * without taking the lock.
184 */
185struct kvmppc_vcore {
186 int n_runnable;
187 int n_blocked;
188 int num_threads;
189 int entry_exit_count;
190 int n_woken;
191 int nap_count;
192 u16 pcpu;
193 u8 vcore_running;
194 u8 in_guest;
195 struct list_head runnable_threads;
196 spinlock_t lock;
197};
198
199#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
200#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
201
173struct kvmppc_pte { 202struct kvmppc_pte {
174 ulong eaddr; 203 ulong eaddr;
175 u64 vpage; 204 u64 vpage;
@@ -365,14 +394,29 @@ struct kvm_vcpu_arch {
365 struct slb_shadow *slb_shadow; 394 struct slb_shadow *slb_shadow;
366 struct dtl *dtl; 395 struct dtl *dtl;
367 struct dtl *dtl_end; 396 struct dtl *dtl_end;
397
398 struct kvmppc_vcore *vcore;
399 int ret;
368 int trap; 400 int trap;
401 int state;
402 int ptid;
403 wait_queue_head_t cpu_run;
404
369 struct kvm_vcpu_arch_shared *shared; 405 struct kvm_vcpu_arch_shared *shared;
370 unsigned long magic_page_pa; /* phys addr to map the magic page to */ 406 unsigned long magic_page_pa; /* phys addr to map the magic page to */
371 unsigned long magic_page_ea; /* effect. addr to map the magic page to */ 407 unsigned long magic_page_ea; /* effect. addr to map the magic page to */
372 408
373#ifdef CONFIG_KVM_BOOK3S_64_HV 409#ifdef CONFIG_KVM_BOOK3S_64_HV
374 struct kvm_vcpu_arch_shared shregs; 410 struct kvm_vcpu_arch_shared shregs;
411
412 struct list_head run_list;
413 struct task_struct *run_task;
414 struct kvm_run *kvm_run;
375#endif 415#endif
376}; 416};
377 417
418#define KVMPPC_VCPU_BUSY_IN_HOST 0
419#define KVMPPC_VCPU_BLOCKED 1
420#define KVMPPC_VCPU_RUNNABLE 2
421
378#endif /* __POWERPC_KVM_HOST_H__ */ 422#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 99f6fcf4cf88..6ef734428634 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -33,6 +33,9 @@
33#else 33#else
34#include <asm/kvm_booke.h> 34#include <asm/kvm_booke.h>
35#endif 35#endif
36#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
37#include <asm/paca.h>
38#endif
36 39
37enum emulation_result { 40enum emulation_result {
38 EMULATE_DONE, /* no further processing */ 41 EMULATE_DONE, /* no further processing */
@@ -169,4 +172,14 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
169 172
170void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); 173void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
171 174
175#ifdef CONFIG_KVM_BOOK3S_64_HV
176static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
177{
178 paca[cpu].kvm_hstate.xics_phys = addr;
179}
180#else
181static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
182{}
183#endif
184
172#endif /* __POWERPC_KVM_PPC_H__ */ 185#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c70d106bf1a4..d0f2387fd792 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -471,6 +471,10 @@ int main(void)
471 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); 471 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
472 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 472 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
473 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); 473 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
474 DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
475 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
476 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
477 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
474 DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - 478 DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
475 offsetof(struct kvmppc_vcpu_book3s, vcpu)); 479 offsetof(struct kvmppc_vcpu_book3s, vcpu));
476 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); 480 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
@@ -530,6 +534,8 @@ int main(void)
530 534
531#ifdef CONFIG_KVM_BOOK3S_64_HV 535#ifdef CONFIG_KVM_BOOK3S_64_HV
532 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); 536 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
537 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
538 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
533 HSTATE_FIELD(HSTATE_MMCR, host_mmcr); 539 HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
534 HSTATE_FIELD(HSTATE_PMC, host_pmc); 540 HSTATE_FIELD(HSTATE_PMC, host_pmc);
535 HSTATE_FIELD(HSTATE_PURR, host_purr); 541 HSTATE_FIELD(HSTATE_PURR, host_purr);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 163c041cec24..5bc06fdfa6c0 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -49,19 +49,32 @@ BEGIN_FTR_SECTION
49 * state loss at this time. 49 * state loss at this time.
50 */ 50 */
51 mfspr r13,SPRN_SRR1 51 mfspr r13,SPRN_SRR1
52 rlwinm r13,r13,47-31,30,31 52 rlwinm. r13,r13,47-31,30,31
53 cmpwi cr0,r13,1 53 beq 9f
54 bne 1f 54
55 b .power7_wakeup_noloss 55 /* waking up from powersave (nap) state */
561: cmpwi cr0,r13,2 56 cmpwi cr1,r13,2
57 bne 1f
58 b .power7_wakeup_loss
59 /* Total loss of HV state is fatal, we could try to use the 57 /* Total loss of HV state is fatal, we could try to use the
60 * PIR to locate a PACA, then use an emergency stack etc... 58 * PIR to locate a PACA, then use an emergency stack etc...
61 * but for now, let's just stay stuck here 59 * but for now, let's just stay stuck here
62 */ 60 */
631: cmpwi cr0,r13,3 61 bgt cr1,.
64 beq . 62 GET_PACA(r13)
63
64#ifdef CONFIG_KVM_BOOK3S_64_HV
65 lbz r0,PACAPROCSTART(r13)
66 cmpwi r0,0x80
67 bne 1f
68 li r0,0
69 stb r0,PACAPROCSTART(r13)
70 b kvm_start_guest
711:
72#endif
73
74 beq cr1,2f
75 b .power7_wakeup_noloss
762: b .power7_wakeup_loss
779:
65END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) 78END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206)
66#endif /* CONFIG_PPC_P7_NAP */ 79#endif /* CONFIG_PPC_P7_NAP */
67 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, 80 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index f8f0bc7f1d4f..3a70845a51c7 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -73,7 +73,6 @@ _GLOBAL(power7_idle)
73 b . 73 b .
74 74
75_GLOBAL(power7_wakeup_loss) 75_GLOBAL(power7_wakeup_loss)
76 GET_PACA(r13)
77 ld r1,PACAR1(r13) 76 ld r1,PACAR1(r13)
78 REST_NVGPRS(r1) 77 REST_NVGPRS(r1)
79 REST_GPR(2, r1) 78 REST_GPR(2, r1)
@@ -87,7 +86,6 @@ _GLOBAL(power7_wakeup_loss)
87 rfid 86 rfid
88 87
89_GLOBAL(power7_wakeup_noloss) 88_GLOBAL(power7_wakeup_noloss)
90 GET_PACA(r13)
91 ld r1,PACAR1(r13) 89 ld r1,PACAR1(r13)
92 ld r4,_MSR(r1) 90 ld r4,_MSR(r1)
93 ld r5,_NIP(r1) 91 ld r5,_NIP(r1)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6fe469eabce8..36b6d98f1197 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -39,6 +39,7 @@
39#include <asm/mmu_context.h> 39#include <asm/mmu_context.h>
40#include <asm/lppaca.h> 40#include <asm/lppaca.h>
41#include <asm/processor.h> 41#include <asm/processor.h>
42#include <asm/cputhreads.h>
42#include <linux/gfp.h> 43#include <linux/gfp.h>
43#include <linux/sched.h> 44#include <linux/sched.h>
44#include <linux/vmalloc.h> 45#include <linux/vmalloc.h>
@@ -51,12 +52,16 @@
51void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 52void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
52{ 53{
53 local_paca->kvm_hstate.kvm_vcpu = vcpu; 54 local_paca->kvm_hstate.kvm_vcpu = vcpu;
55 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
54} 56}
55 57
56void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 58void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
57{ 59{
58} 60}
59 61
62static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
63static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
64
60void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) 65void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
61{ 66{
62 u64 now; 67 u64 now;
@@ -74,11 +79,15 @@ void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
74 HRTIMER_MODE_REL); 79 HRTIMER_MODE_REL);
75 } 80 }
76 81
82 kvmppc_vcpu_blocked(vcpu);
83
77 kvm_vcpu_block(vcpu); 84 kvm_vcpu_block(vcpu);
78 vcpu->stat.halt_wakeup++; 85 vcpu->stat.halt_wakeup++;
79 86
80 if (vcpu->arch.dec_expires != ~(u64)0) 87 if (vcpu->arch.dec_expires != ~(u64)0)
81 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 88 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
89
90 kvmppc_vcpu_unblocked(vcpu);
82} 91}
83 92
84void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 93void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -429,9 +438,16 @@ int kvmppc_core_check_processor_compat(void)
429struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 438struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
430{ 439{
431 struct kvm_vcpu *vcpu; 440 struct kvm_vcpu *vcpu;
432 int err = -ENOMEM; 441 int err = -EINVAL;
442 int core;
443 struct kvmppc_vcore *vcore;
433 unsigned long lpcr; 444 unsigned long lpcr;
434 445
446 core = id / threads_per_core;
447 if (core >= KVM_MAX_VCORES)
448 goto out;
449
450 err = -ENOMEM;
435 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); 451 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
436 if (!vcpu) 452 if (!vcpu)
437 goto out; 453 goto out;
@@ -454,6 +470,38 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
454 470
455 kvmppc_mmu_book3s_hv_init(vcpu); 471 kvmppc_mmu_book3s_hv_init(vcpu);
456 472
473 /*
474 * Some vcpus may start out in stopped state. If we initialize
475 * them to busy-in-host state they will stop other vcpus in the
476 * vcore from running. Instead we initialize them to blocked
477 * state, effectively considering them to be stopped until we
478 * see the first run ioctl for them.
479 */
480 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
481
482 init_waitqueue_head(&vcpu->arch.cpu_run);
483
484 mutex_lock(&kvm->lock);
485 vcore = kvm->arch.vcores[core];
486 if (!vcore) {
487 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
488 if (vcore) {
489 INIT_LIST_HEAD(&vcore->runnable_threads);
490 spin_lock_init(&vcore->lock);
491 }
492 kvm->arch.vcores[core] = vcore;
493 }
494 mutex_unlock(&kvm->lock);
495
496 if (!vcore)
497 goto free_vcpu;
498
499 spin_lock(&vcore->lock);
500 ++vcore->num_threads;
501 ++vcore->n_blocked;
502 spin_unlock(&vcore->lock);
503 vcpu->arch.vcore = vcore;
504
457 return vcpu; 505 return vcpu;
458 506
459free_vcpu: 507free_vcpu:
@@ -468,21 +516,121 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
468 kfree(vcpu); 516 kfree(vcpu);
469} 517}
470 518
519static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
520{
521 struct kvmppc_vcore *vc = vcpu->arch.vcore;
522
523 spin_lock(&vc->lock);
524 vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
525 ++vc->n_blocked;
526 if (vc->n_runnable > 0 &&
527 vc->n_runnable + vc->n_blocked == vc->num_threads) {
528 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
529 arch.run_list);
530 wake_up(&vcpu->arch.cpu_run);
531 }
532 spin_unlock(&vc->lock);
533}
534
535static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
536{
537 struct kvmppc_vcore *vc = vcpu->arch.vcore;
538
539 spin_lock(&vc->lock);
540 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
541 --vc->n_blocked;
542 spin_unlock(&vc->lock);
543}
544
471extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 545extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
546extern void xics_wake_cpu(int cpu);
472 547
473static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) 548static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
549 struct kvm_vcpu *vcpu)
474{ 550{
475 u64 now; 551 struct kvm_vcpu *v;
476 552
477 if (signal_pending(current)) { 553 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
478 run->exit_reason = KVM_EXIT_INTR; 554 return;
479 return -EINTR; 555 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
556 --vc->n_runnable;
557 /* decrement the physical thread id of each following vcpu */
558 v = vcpu;
559 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
560 --v->arch.ptid;
561 list_del(&vcpu->arch.run_list);
562}
563
564static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
565{
566 int cpu;
567 struct paca_struct *tpaca;
568 struct kvmppc_vcore *vc = vcpu->arch.vcore;
569
570 cpu = vc->pcpu + vcpu->arch.ptid;
571 tpaca = &paca[cpu];
572 tpaca->kvm_hstate.kvm_vcpu = vcpu;
573 tpaca->kvm_hstate.kvm_vcore = vc;
574 smp_wmb();
575#ifdef CONFIG_PPC_ICP_NATIVE
576 if (vcpu->arch.ptid) {
577 tpaca->cpu_start = 0x80;
578 tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
579 wmb();
580 xics_wake_cpu(cpu);
581 ++vc->n_woken;
480 } 582 }
583#endif
584}
481 585
482 flush_fp_to_thread(current); 586static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
483 flush_altivec_to_thread(current); 587{
484 flush_vsx_to_thread(current); 588 int i;
485 preempt_disable(); 589
590 HMT_low();
591 i = 0;
592 while (vc->nap_count < vc->n_woken) {
593 if (++i >= 1000000) {
594 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
595 vc->nap_count, vc->n_woken);
596 break;
597 }
598 cpu_relax();
599 }
600 HMT_medium();
601}
602
603/*
604 * Check that we are on thread 0 and that any other threads in
605 * this core are off-line.
606 */
607static int on_primary_thread(void)
608{
609 int cpu = smp_processor_id();
610 int thr = cpu_thread_in_core(cpu);
611
612 if (thr)
613 return 0;
614 while (++thr < threads_per_core)
615 if (cpu_online(cpu + thr))
616 return 0;
617 return 1;
618}
619
620/*
621 * Run a set of guest threads on a physical core.
622 * Called with vc->lock held.
623 */
624static int kvmppc_run_core(struct kvmppc_vcore *vc)
625{
626 struct kvm_vcpu *vcpu, *vnext;
627 long ret;
628 u64 now;
629
630 /* don't start if any threads have a signal pending */
631 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
632 if (signal_pending(vcpu->arch.run_task))
633 return 0;
486 634
487 /* 635 /*
488 * Make sure we are running on thread 0, and that 636 * Make sure we are running on thread 0, and that
@@ -490,36 +638,150 @@ static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu)
490 * XXX we should also block attempts to bring any 638 * XXX we should also block attempts to bring any
491 * secondary threads online. 639 * secondary threads online.
492 */ 640 */
493 if (threads_per_core > 1) { 641 if (threads_per_core > 1 && !on_primary_thread()) {
494 int cpu = smp_processor_id(); 642 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
495 int thr = cpu_thread_in_core(cpu); 643 vcpu->arch.ret = -EBUSY;
496 644 goto out;
497 if (thr)
498 goto out;
499 while (++thr < threads_per_core)
500 if (cpu_online(cpu + thr))
501 goto out;
502 } 645 }
503 646
504 kvm_guest_enter(); 647 vc->n_woken = 0;
648 vc->nap_count = 0;
649 vc->entry_exit_count = 0;
650 vc->vcore_running = 1;
651 vc->in_guest = 0;
652 vc->pcpu = smp_processor_id();
653 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
654 kvmppc_start_thread(vcpu);
655 vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
656 arch.run_list);
657
658 spin_unlock(&vc->lock);
505 659
660 preempt_disable();
661 kvm_guest_enter();
506 __kvmppc_vcore_entry(NULL, vcpu); 662 __kvmppc_vcore_entry(NULL, vcpu);
507 663
664 /* wait for secondary threads to finish writing their state to memory */
665 spin_lock(&vc->lock);
666 if (vc->nap_count < vc->n_woken)
667 kvmppc_wait_for_nap(vc);
668 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
669 vc->vcore_running = 2;
670 spin_unlock(&vc->lock);
671
672 /* make sure updates to secondary vcpu structs are visible now */
673 smp_mb();
508 kvm_guest_exit(); 674 kvm_guest_exit();
509 675
510 preempt_enable(); 676 preempt_enable();
511 kvm_resched(vcpu); 677 kvm_resched(vcpu);
512 678
513 now = get_tb(); 679 now = get_tb();
514 /* cancel pending dec exception if dec is positive */ 680 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
515 if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) 681 /* cancel pending dec exception if dec is positive */
516 kvmppc_core_dequeue_dec(vcpu); 682 if (now < vcpu->arch.dec_expires &&
517 683 kvmppc_core_pending_dec(vcpu))
518 return kvmppc_handle_exit(run, vcpu, current); 684 kvmppc_core_dequeue_dec(vcpu);
685 if (!vcpu->arch.trap) {
686 if (signal_pending(vcpu->arch.run_task)) {
687 vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
688 vcpu->arch.ret = -EINTR;
689 }
690 continue; /* didn't get to run */
691 }
692 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
693 vcpu->arch.run_task);
694 vcpu->arch.ret = ret;
695 vcpu->arch.trap = 0;
696 }
519 697
698 spin_lock(&vc->lock);
520 out: 699 out:
521 preempt_enable(); 700 vc->vcore_running = 0;
522 return -EBUSY; 701 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
702 arch.run_list) {
703 if (vcpu->arch.ret != RESUME_GUEST) {
704 kvmppc_remove_runnable(vc, vcpu);
705 wake_up(&vcpu->arch.cpu_run);
706 }
707 }
708
709 return 1;
710}
711
712static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
713{
714 int ptid;
715 int wait_state;
716 struct kvmppc_vcore *vc;
717 DEFINE_WAIT(wait);
718
719 /* No need to go into the guest when all we do is going out */
720 if (signal_pending(current)) {
721 kvm_run->exit_reason = KVM_EXIT_INTR;
722 return -EINTR;
723 }
724
725 kvm_run->exit_reason = 0;
726 vcpu->arch.ret = RESUME_GUEST;
727 vcpu->arch.trap = 0;
728
729 flush_fp_to_thread(current);
730 flush_altivec_to_thread(current);
731 flush_vsx_to_thread(current);
732
733 /*
734 * Synchronize with other threads in this virtual core
735 */
736 vc = vcpu->arch.vcore;
737 spin_lock(&vc->lock);
738 /* This happens the first time this is called for a vcpu */
739 if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
740 --vc->n_blocked;
741 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
742 ptid = vc->n_runnable;
743 vcpu->arch.run_task = current;
744 vcpu->arch.kvm_run = kvm_run;
745 vcpu->arch.ptid = ptid;
746 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
747 ++vc->n_runnable;
748
749 wait_state = TASK_INTERRUPTIBLE;
750 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
751 if (signal_pending(current)) {
752 if (!vc->vcore_running) {
753 kvm_run->exit_reason = KVM_EXIT_INTR;
754 vcpu->arch.ret = -EINTR;
755 break;
756 }
757 /* have to wait for vcore to stop executing guest */
758 wait_state = TASK_UNINTERRUPTIBLE;
759 smp_send_reschedule(vc->pcpu);
760 }
761
762 if (!vc->vcore_running &&
763 vc->n_runnable + vc->n_blocked == vc->num_threads) {
764 /* we can run now */
765 if (kvmppc_run_core(vc))
766 continue;
767 }
768
769 if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
770 kvmppc_start_thread(vcpu);
771
772 /* wait for other threads to come in, or wait for vcore */
773 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
774 spin_unlock(&vc->lock);
775 schedule();
776 finish_wait(&vcpu->arch.cpu_run, &wait);
777 spin_lock(&vc->lock);
778 }
779
780 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
781 kvmppc_remove_runnable(vc, vcpu);
782 spin_unlock(&vc->lock);
783
784 return vcpu->arch.ret;
523} 785}
524 786
525int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 787int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e6adaadcdff2..c9bf177b7cf2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -30,8 +30,6 @@
30 * * 30 * *
31 ****************************************************************************/ 31 ****************************************************************************/
32 32
33#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
34
35 .globl kvmppc_skip_interrupt 33 .globl kvmppc_skip_interrupt
36kvmppc_skip_interrupt: 34kvmppc_skip_interrupt:
37 mfspr r13,SPRN_SRR0 35 mfspr r13,SPRN_SRR0
@@ -79,6 +77,32 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
79 * * 77 * *
80 *****************************************************************************/ 78 *****************************************************************************/
81 79
80#define XICS_XIRR 4
81#define XICS_QIRR 0xc
82
83/*
84 * We come in here when wakened from nap mode on a secondary hw thread.
85 * Relocation is off and most register values are lost.
86 * r13 points to the PACA.
87 */
88 .globl kvm_start_guest
89kvm_start_guest:
90 ld r1,PACAEMERGSP(r13)
91 subi r1,r1,STACK_FRAME_OVERHEAD
92
93 /* get vcpu pointer */
94 ld r4, HSTATE_KVM_VCPU(r13)
95
96 /* We got here with an IPI; clear it */
97 ld r5, HSTATE_XICS_PHYS(r13)
98 li r0, 0xff
99 li r6, XICS_QIRR
100 li r7, XICS_XIRR
101 lwzcix r8, r5, r7 /* ack the interrupt */
102 sync
103 stbcix r0, r5, r6 /* clear it */
104 stwcix r8, r5, r7 /* EOI it */
105
82.global kvmppc_hv_entry 106.global kvmppc_hv_entry
83kvmppc_hv_entry: 107kvmppc_hv_entry:
84 108
@@ -200,7 +224,20 @@ kvmppc_hv_entry:
200 slbia 224 slbia
201 ptesync 225 ptesync
202 226
203 /* Switch to guest partition. */ 227 /* Increment entry count iff exit count is zero. */
228 ld r5,HSTATE_KVM_VCORE(r13)
229 addi r9,r5,VCORE_ENTRY_EXIT
23021: lwarx r3,0,r9
231 cmpwi r3,0x100 /* any threads starting to exit? */
232 bge secondary_too_late /* if so we're too late to the party */
233 addi r3,r3,1
234 stwcx. r3,0,r9
235 bne 21b
236
237 /* Primary thread switches to guest partition. */
238 lwz r6,VCPU_PTID(r4)
239 cmpwi r6,0
240 bne 20f
204 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ 241 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
205 ld r6,KVM_SDR1(r9) 242 ld r6,KVM_SDR1(r9)
206 lwz r7,KVM_LPID(r9) 243 lwz r7,KVM_LPID(r9)
@@ -210,7 +247,15 @@ kvmppc_hv_entry:
210 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 247 mtspr SPRN_SDR1,r6 /* switch to partition page table */
211 mtspr SPRN_LPID,r7 248 mtspr SPRN_LPID,r7
212 isync 249 isync
213 ld r8,VCPU_LPCR(r4) 250 li r0,1
251 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
252 b 10f
253
254 /* Secondary threads wait for primary to have done partition switch */
25520: lbz r0,VCORE_IN_GUEST(r5)
256 cmpwi r0,0
257 beq 20b
25810: ld r8,VCPU_LPCR(r4)
214 mtspr SPRN_LPCR,r8 259 mtspr SPRN_LPCR,r8
215 isync 260 isync
216 261
@@ -225,10 +270,12 @@ kvmppc_hv_entry:
225 * Invalidate the TLB if we could possibly have stale TLB 270 * Invalidate the TLB if we could possibly have stale TLB
226 * entries for this partition on this core due to the use 271 * entries for this partition on this core due to the use
227 * of tlbiel. 272 * of tlbiel.
273 * XXX maybe only need this on primary thread?
228 */ 274 */
229 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ 275 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
230 lwz r5,VCPU_VCPUID(r4) 276 lwz r5,VCPU_VCPUID(r4)
231 lhz r6,PACAPACAINDEX(r13) 277 lhz r6,PACAPACAINDEX(r13)
278 rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
232 lhz r8,VCPU_LAST_CPU(r4) 279 lhz r8,VCPU_LAST_CPU(r4)
233 sldi r7,r6,1 /* see if this is the same vcpu */ 280 sldi r7,r6,1 /* see if this is the same vcpu */
234 add r7,r7,r9 /* as last ran on this pcpu */ 281 add r7,r7,r9 /* as last ran on this pcpu */
@@ -512,8 +559,60 @@ hcall_real_cont:
512 ptesync 559 ptesync
513 560
514hdec_soon: 561hdec_soon:
515 /* Switch back to host partition */ 562 /* Increment the threads-exiting-guest count in the 0xff00
563 bits of vcore->entry_exit_count */
564 lwsync
565 ld r5,HSTATE_KVM_VCORE(r13)
566 addi r6,r5,VCORE_ENTRY_EXIT
56741: lwarx r3,0,r6
568 addi r0,r3,0x100
569 stwcx. r0,0,r6
570 bne 41b
571
572 /*
573 * At this point we have an interrupt that we have to pass
574 * up to the kernel or qemu; we can't handle it in real mode.
575 * Thus we have to do a partition switch, so we have to
576 * collect the other threads, if we are the first thread
577 * to take an interrupt. To do this, we set the HDEC to 0,
578 * which causes an HDEC interrupt in all threads within 2ns
579 * because the HDEC register is shared between all 4 threads.
580 * However, we don't need to bother if this is an HDEC
581 * interrupt, since the other threads will already be on their
582 * way here in that case.
583 */
584 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
585 beq 40f
586 cmpwi r3,0x100 /* Are we the first here? */
587 bge 40f
588 cmpwi r3,1
589 ble 40f
590 li r0,0
591 mtspr SPRN_HDEC,r0
59240:
593
594 /* Secondary threads wait for primary to do partition switch */
516 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ 595 ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
596 ld r5,HSTATE_KVM_VCORE(r13)
597 lwz r3,VCPU_PTID(r9)
598 cmpwi r3,0
599 beq 15f
600 HMT_LOW
60113: lbz r3,VCORE_IN_GUEST(r5)
602 cmpwi r3,0
603 bne 13b
604 HMT_MEDIUM
605 b 16f
606
607 /* Primary thread waits for all the secondaries to exit guest */
60815: lwz r3,VCORE_ENTRY_EXIT(r5)
609 srwi r0,r3,8
610 clrldi r3,r3,56
611 cmpw r3,r0
612 bne 15b
613 isync
614
615 /* Primary thread switches back to host partition */
517 ld r6,KVM_HOST_SDR1(r4) 616 ld r6,KVM_HOST_SDR1(r4)
518 lwz r7,KVM_HOST_LPID(r4) 617 lwz r7,KVM_HOST_LPID(r4)
519 li r8,LPID_RSVD /* switch to reserved LPID */ 618 li r8,LPID_RSVD /* switch to reserved LPID */
@@ -522,10 +621,12 @@ hdec_soon:
522 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 621 mtspr SPRN_SDR1,r6 /* switch to partition page table */
523 mtspr SPRN_LPID,r7 622 mtspr SPRN_LPID,r7
524 isync 623 isync
624 li r0,0
625 stb r0,VCORE_IN_GUEST(r5)
525 lis r8,0x7fff /* MAX_INT@h */ 626 lis r8,0x7fff /* MAX_INT@h */
526 mtspr SPRN_HDEC,r8 627 mtspr SPRN_HDEC,r8
527 628
528 ld r8,KVM_HOST_LPCR(r4) 62916: ld r8,KVM_HOST_LPCR(r4)
529 mtspr SPRN_LPCR,r8 630 mtspr SPRN_LPCR,r8
530 isync 631 isync
531 632
@@ -634,6 +735,11 @@ hdec_soon:
634 mr r3, r9 735 mr r3, r9
635 bl .kvmppc_save_fp 736 bl .kvmppc_save_fp
636 737
738 /* Secondary threads go off to take a nap */
739 lwz r0,VCPU_PTID(r3)
740 cmpwi r0,0
741 bne secondary_nap
742
637 /* 743 /*
638 * Reload DEC. HDEC interrupts were disabled when 744 * Reload DEC. HDEC interrupts were disabled when
639 * we reloaded the host's LPCR value. 745 * we reloaded the host's LPCR value.
@@ -840,6 +946,56 @@ _GLOBAL(kvmppc_h_set_dabr)
840 li r3,0 946 li r3,0
841 blr 947 blr
842 948
949secondary_too_late:
950 ld r5,HSTATE_KVM_VCORE(r13)
951 HMT_LOW
95213: lbz r3,VCORE_IN_GUEST(r5)
953 cmpwi r3,0
954 bne 13b
955 HMT_MEDIUM
956 ld r11,PACA_SLBSHADOWPTR(r13)
957
958 .rept SLB_NUM_BOLTED
959 ld r5,SLBSHADOW_SAVEAREA(r11)
960 ld r6,SLBSHADOW_SAVEAREA+8(r11)
961 andis. r7,r5,SLB_ESID_V@h
962 beq 1f
963 slbmte r6,r5
9641: addi r11,r11,16
965 .endr
966 b 50f
967
968secondary_nap:
969 /* Clear any pending IPI */
97050: ld r5, HSTATE_XICS_PHYS(r13)
971 li r0, 0xff
972 li r6, XICS_QIRR
973 stbcix r0, r5, r6
974
975 /* increment the nap count and then go to nap mode */
976 ld r4, HSTATE_KVM_VCORE(r13)
977 addi r4, r4, VCORE_NAP_COUNT
978 lwsync /* make previous updates visible */
97951: lwarx r3, 0, r4
980 addi r3, r3, 1
981 stwcx. r3, 0, r4
982 bne 51b
983 isync
984
985 mfspr r4, SPRN_LPCR
986 li r0, LPCR_PECE
987 andc r4, r4, r0
988 ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
989 mtspr SPRN_LPCR, r4
990 li r0, 0
991 std r0, HSTATE_SCRATCH0(r13)
992 ptesync
993 ld r0, HSTATE_SCRATCH0(r13)
9941: cmpd r0, r0
995 bne 1b
996 nap
997 b .
998
843/* 999/*
844 * Save away FP, VMX and VSX registers. 1000 * Save away FP, VMX and VSX registers.
845 * r3 = vcpu pointer 1001 * r3 = vcpu pointer
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c78ceb9d5605..4c549664c987 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/cputhreads.h>
33#include "timing.h" 34#include "timing.h"
34#include "../mm/mmu_decl.h" 35#include "../mm/mmu_decl.h"
35 36
@@ -207,6 +208,9 @@ int kvm_dev_ioctl_check_extension(long ext)
207 case KVM_CAP_SPAPR_TCE: 208 case KVM_CAP_SPAPR_TCE:
208 r = 1; 209 r = 1;
209 break; 210 break;
211 case KVM_CAP_PPC_SMT:
212 r = threads_per_core;
213 break;
210#endif 214#endif
211 default: 215 default:
212 r = 0; 216 r = 0;
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 1f15ad436140..ba382b59b926 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -17,6 +17,7 @@
17#include <linux/cpu.h> 17#include <linux/cpu.h>
18#include <linux/of.h> 18#include <linux/of.h>
19#include <linux/spinlock.h> 19#include <linux/spinlock.h>
20#include <linux/module.h>
20 21
21#include <asm/prom.h> 22#include <asm/prom.h>
22#include <asm/io.h> 23#include <asm/io.h>
@@ -24,6 +25,7 @@
24#include <asm/irq.h> 25#include <asm/irq.h>
25#include <asm/errno.h> 26#include <asm/errno.h>
26#include <asm/xics.h> 27#include <asm/xics.h>
28#include <asm/kvm_ppc.h>
27 29
28struct icp_ipl { 30struct icp_ipl {
29 union { 31 union {
@@ -139,6 +141,12 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
139 icp_native_set_qirr(cpu, IPI_PRIORITY); 141 icp_native_set_qirr(cpu, IPI_PRIORITY);
140} 142}
141 143
144void xics_wake_cpu(int cpu)
145{
146 icp_native_set_qirr(cpu, IPI_PRIORITY);
147}
148EXPORT_SYMBOL_GPL(xics_wake_cpu);
149
142static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) 150static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
143{ 151{
144 int cpu = smp_processor_id(); 152 int cpu = smp_processor_id();
@@ -185,6 +193,7 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
185 } 193 }
186 194
187 icp_native_regs[cpu] = ioremap(addr, size); 195 icp_native_regs[cpu] = ioremap(addr, size);
196 kvmppc_set_xics_phys(cpu, addr);
188 if (!icp_native_regs[cpu]) { 197 if (!icp_native_regs[cpu]) {
189 pr_warning("icp_native: Failed ioremap for CPU %d, " 198 pr_warning("icp_native: Failed ioremap for CPU %d, "
190 "interrupt server #0x%x, addr %#lx\n", 199 "interrupt server #0x%x, addr %#lx\n",
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 61f56502732e..e2a378d97160 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -551,6 +551,7 @@ struct kvm_ppc_pvinfo {
551#define KVM_CAP_GET_TSC_KHZ 61 551#define KVM_CAP_GET_TSC_KHZ 61
552#define KVM_CAP_PPC_BOOKE_SREGS 62 552#define KVM_CAP_PPC_BOOKE_SREGS 62
553#define KVM_CAP_SPAPR_TCE 63 553#define KVM_CAP_SPAPR_TCE 63
554#define KVM_CAP_PPC_SMT 64
554 555
555#ifdef KVM_CAP_IRQ_ROUTING 556#ifdef KVM_CAP_IRQ_ROUTING
556 557