summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/include/asm/kvm_host.h12
-rw-r--r--arch/arm64/include/asm/kvm_host.h12
-rw-r--r--arch/mips/include/asm/kvm_host.h46
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h37
-rw-r--r--arch/powerpc/include/asm/hmi.h2
-rw-r--r--arch/powerpc/include/asm/io.h29
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h10
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h37
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h87
-rw-r--r--arch/powerpc/include/asm/kvm_host.h123
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h28
-rw-r--r--arch/powerpc/include/asm/mmu.h1
-rw-r--r--arch/powerpc/include/asm/opal.h1
-rw-r--r--arch/powerpc/include/asm/paca.h12
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h3
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kvm/Kconfig3
-rw-r--r--arch/powerpc/kvm/Makefile20
-rw-r--r--arch/powerpc/kvm/book3s.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv.c401
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c156
-rw-r--r--arch/powerpc/kvm/book3s_hv_hmi.c (renamed from arch/powerpc/kernel/hmi.c)0
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c120
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S183
-rw-r--r--arch/powerpc/kvm/book3s_xics.c57
-rw-r--r--arch/powerpc/kvm/book3s_xics.h2
-rw-r--r--arch/powerpc/kvm/e500_mmu.c73
-rw-r--r--arch/powerpc/kvm/powerpc.c51
-rw-r--r--arch/powerpc/kvm/trace_hv.h22
-rw-r--r--arch/powerpc/mm/hash_native_64.c42
-rw-r--r--arch/powerpc/mm/hash_utils_64.c55
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S1
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c24
-rw-r--r--arch/s390/include/asm/kvm_host.h134
-rw-r--r--arch/x86/include/asm/kvm_host.h72
-rw-r--r--virt/kvm/kvm_main.c4
36 files changed, 1366 insertions, 503 deletions
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index de338d93d11b..6ad21f04a922 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -183,15 +183,15 @@ struct kvm_vcpu_arch {
183}; 183};
184 184
185struct kvm_vm_stat { 185struct kvm_vm_stat {
186 u32 remote_tlb_flush; 186 ulong remote_tlb_flush;
187}; 187};
188 188
189struct kvm_vcpu_stat { 189struct kvm_vcpu_stat {
190 u32 halt_successful_poll; 190 u64 halt_successful_poll;
191 u32 halt_attempted_poll; 191 u64 halt_attempted_poll;
192 u32 halt_poll_invalid; 192 u64 halt_poll_invalid;
193 u32 halt_wakeup; 193 u64 halt_wakeup;
194 u32 hvc_exit_stat; 194 u64 hvc_exit_stat;
195 u64 wfe_exit_stat; 195 u64 wfe_exit_stat;
196 u64 wfi_exit_stat; 196 u64 wfi_exit_stat;
197 u64 mmio_exit_user; 197 u64 mmio_exit_user;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 3eda975837d0..bd94e6766759 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -290,15 +290,15 @@ struct kvm_vcpu_arch {
290#endif 290#endif
291 291
292struct kvm_vm_stat { 292struct kvm_vm_stat {
293 u32 remote_tlb_flush; 293 ulong remote_tlb_flush;
294}; 294};
295 295
296struct kvm_vcpu_stat { 296struct kvm_vcpu_stat {
297 u32 halt_successful_poll; 297 u64 halt_successful_poll;
298 u32 halt_attempted_poll; 298 u64 halt_attempted_poll;
299 u32 halt_poll_invalid; 299 u64 halt_poll_invalid;
300 u32 halt_wakeup; 300 u64 halt_wakeup;
301 u32 hvc_exit_stat; 301 u64 hvc_exit_stat;
302 u64 wfe_exit_stat; 302 u64 wfe_exit_stat;
303 u64 wfi_exit_stat; 303 u64 wfi_exit_stat;
304 u64 mmio_exit_user; 304 u64 mmio_exit_user;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index b54bcadd8aec..5f488dc8a7d5 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -110,32 +110,32 @@
110extern atomic_t kvm_mips_instance; 110extern atomic_t kvm_mips_instance;
111 111
112struct kvm_vm_stat { 112struct kvm_vm_stat {
113 u32 remote_tlb_flush; 113 ulong remote_tlb_flush;
114}; 114};
115 115
116struct kvm_vcpu_stat { 116struct kvm_vcpu_stat {
117 u32 wait_exits; 117 u64 wait_exits;
118 u32 cache_exits; 118 u64 cache_exits;
119 u32 signal_exits; 119 u64 signal_exits;
120 u32 int_exits; 120 u64 int_exits;
121 u32 cop_unusable_exits; 121 u64 cop_unusable_exits;
122 u32 tlbmod_exits; 122 u64 tlbmod_exits;
123 u32 tlbmiss_ld_exits; 123 u64 tlbmiss_ld_exits;
124 u32 tlbmiss_st_exits; 124 u64 tlbmiss_st_exits;
125 u32 addrerr_st_exits; 125 u64 addrerr_st_exits;
126 u32 addrerr_ld_exits; 126 u64 addrerr_ld_exits;
127 u32 syscall_exits; 127 u64 syscall_exits;
128 u32 resvd_inst_exits; 128 u64 resvd_inst_exits;
129 u32 break_inst_exits; 129 u64 break_inst_exits;
130 u32 trap_inst_exits; 130 u64 trap_inst_exits;
131 u32 msa_fpe_exits; 131 u64 msa_fpe_exits;
132 u32 fpe_exits; 132 u64 fpe_exits;
133 u32 msa_disabled_exits; 133 u64 msa_disabled_exits;
134 u32 flush_dcache_exits; 134 u64 flush_dcache_exits;
135 u32 halt_successful_poll; 135 u64 halt_successful_poll;
136 u32 halt_attempted_poll; 136 u64 halt_attempted_poll;
137 u32 halt_poll_invalid; 137 u64 halt_poll_invalid;
138 u32 halt_wakeup; 138 u64 halt_wakeup;
139}; 139};
140 140
141struct kvm_arch_memory_slot { 141struct kvm_arch_memory_slot {
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 287a656ceb57..e407af2b7333 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -245,6 +245,43 @@ static inline int segment_shift(int ssize)
245} 245}
246 246
247/* 247/*
248 * This array is indexed by the LP field of the HPTE second dword.
249 * Since this field may contain some RPN bits, some entries are
250 * replicated so that we get the same value irrespective of RPN.
251 * The top 4 bits are the page size index (MMU_PAGE_*) for the
252 * actual page size, the bottom 4 bits are the base page size.
253 */
254extern u8 hpte_page_sizes[1 << LP_BITS];
255
256static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
257 bool is_base_size)
258{
259 unsigned int i, lp;
260
261 if (!(h & HPTE_V_LARGE))
262 return 1ul << 12;
263
264 /* Look at the 8 bit LP value */
265 lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
266 i = hpte_page_sizes[lp];
267 if (!i)
268 return 0;
269 if (!is_base_size)
270 i >>= 4;
271 return 1ul << mmu_psize_defs[i & 0xf].shift;
272}
273
274static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
275{
276 return __hpte_page_size(h, l, 0);
277}
278
279static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
280{
281 return __hpte_page_size(h, l, 1);
282}
283
284/*
248 * The current system page and segment sizes 285 * The current system page and segment sizes
249 */ 286 */
250extern int mmu_kernel_ssize; 287extern int mmu_kernel_ssize;
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 88b4901ac4ee..85b7a1a21e22 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -21,7 +21,7 @@
21#ifndef __ASM_PPC64_HMI_H__ 21#ifndef __ASM_PPC64_HMI_H__
22#define __ASM_PPC64_HMI_H__ 22#define __ASM_PPC64_HMI_H__
23 23
24#ifdef CONFIG_PPC_BOOK3S_64 24#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
25 25
26#define CORE_TB_RESYNC_REQ_BIT 63 26#define CORE_TB_RESYNC_REQ_BIT 63
27#define MAX_SUBCORE_PER_CORE 4 27#define MAX_SUBCORE_PER_CORE 4
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 2fd1690b79d2..f6fda8482f60 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val)
241#endif 241#endif
242#endif /* __powerpc64__ */ 242#endif /* __powerpc64__ */
243 243
244
245/*
246 * Simple Cache inhibited accessors
247 * Unlike the DEF_MMIO_* macros, these don't include any h/w memory
248 * barriers, callers need to manage memory barriers on their own.
249 * These can only be used in hypervisor real mode.
250 */
251
252static inline u32 _lwzcix(unsigned long addr)
253{
254 u32 ret;
255
256 __asm__ __volatile__("lwzcix %0,0, %1"
257 : "=r" (ret) : "r" (addr) : "memory");
258 return ret;
259}
260
261static inline void _stbcix(u64 addr, u8 val)
262{
263 __asm__ __volatile__("stbcix %0,0,%1"
264 : : "r" (val), "r" (addr) : "memory");
265}
266
267static inline void _stwcix(u64 addr, u32 val)
268{
269 __asm__ __volatile__("stwcix %0,0,%1"
270 : : "r" (val), "r" (addr) : "memory");
271}
272
244/* 273/*
245 * Low level IO stream instructions are defined out of line for now 274 * Low level IO stream instructions are defined out of line for now
246 */ 275 */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 5bca220bbb60..05cabed3d1bd 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -105,6 +105,15 @@
105#define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60 105#define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60
106#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80 106#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
107 107
108/* book3s_hv */
109
110/*
111 * Special trap used to indicate to host that this is a
112 * passthrough interrupt that could not be handled
113 * completely in the guest.
114 */
115#define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555
116
108#define BOOK3S_IRQPRIO_SYSTEM_RESET 0 117#define BOOK3S_IRQPRIO_SYSTEM_RESET 0
109#define BOOK3S_IRQPRIO_DATA_SEGMENT 1 118#define BOOK3S_IRQPRIO_DATA_SEGMENT 1
110#define BOOK3S_IRQPRIO_INST_SEGMENT 2 119#define BOOK3S_IRQPRIO_INST_SEGMENT 2
@@ -136,6 +145,7 @@
136#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ 145#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
137#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ 146#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
138#define RESUME_FLAG_ARCH1 (1<<2) 147#define RESUME_FLAG_ARCH1 (1<<2)
148#define RESUME_FLAG_ARCH2 (1<<3)
139 149
140#define RESUME_GUEST 0 150#define RESUME_GUEST 0
141#define RESUME_GUEST_NV RESUME_FLAG_NV 151#define RESUME_GUEST_NV RESUME_FLAG_NV
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 8f39796c9da8..cef2b892245c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -69,6 +69,42 @@ struct hpte_cache {
69 int pagesize; 69 int pagesize;
70}; 70};
71 71
72/*
73 * Struct for a virtual core.
74 * Note: entry_exit_map combines a bitmap of threads that have entered
75 * in the bottom 8 bits and a bitmap of threads that have exited in the
76 * next 8 bits. This is so that we can atomically set the entry bit
77 * iff the exit map is 0 without taking a lock.
78 */
79struct kvmppc_vcore {
80 int n_runnable;
81 int num_threads;
82 int entry_exit_map;
83 int napping_threads;
84 int first_vcpuid;
85 u16 pcpu;
86 u16 last_cpu;
87 u8 vcore_state;
88 u8 in_guest;
89 struct kvmppc_vcore *master_vcore;
90 struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
91 struct list_head preempt_list;
92 spinlock_t lock;
93 struct swait_queue_head wq;
94 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
95 u64 stolen_tb;
96 u64 preempt_tb;
97 struct kvm_vcpu *runner;
98 struct kvm *kvm;
99 u64 tb_offset; /* guest timebase - host timebase */
100 ulong lpcr;
101 u32 arch_compat;
102 ulong pcr;
103 ulong dpdes; /* doorbell state (POWER8) */
104 ulong conferring_threads;
105 unsigned int halt_poll_ns;
106};
107
72struct kvmppc_vcpu_book3s { 108struct kvmppc_vcpu_book3s {
73 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 109 struct kvmppc_sid_map sid_map[SID_MAP_NUM];
74 struct { 110 struct {
@@ -191,6 +227,7 @@ extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
191 struct kvm_vcpu *vcpu); 227 struct kvm_vcpu *vcpu);
192extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, 228extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
193 struct kvmppc_book3s_shadow_vcpu *svcpu); 229 struct kvmppc_book3s_shadow_vcpu *svcpu);
230extern int kvm_irq_bypass;
194 231
195static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 232static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
196{ 233{
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 88d17b4ea9c8..4ffd5a1e788d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -20,6 +20,8 @@
20#ifndef __ASM_KVM_BOOK3S_64_H__ 20#ifndef __ASM_KVM_BOOK3S_64_H__
21#define __ASM_KVM_BOOK3S_64_H__ 21#define __ASM_KVM_BOOK3S_64_H__
22 22
23#include <asm/book3s/64/mmu-hash.h>
24
23#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 25#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
24static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 26static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
25{ 27{
@@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
97 hpte[0] = cpu_to_be64(hpte_v); 99 hpte[0] = cpu_to_be64(hpte_v);
98} 100}
99 101
100static inline int __hpte_actual_psize(unsigned int lp, int psize)
101{
102 int i, shift;
103 unsigned int mask;
104
105 /* start from 1 ignoring MMU_PAGE_4K */
106 for (i = 1; i < MMU_PAGE_COUNT; i++) {
107
108 /* invalid penc */
109 if (mmu_psize_defs[psize].penc[i] == -1)
110 continue;
111 /*
112 * encoding bits per actual page size
113 * PTE LP actual page size
114 * rrrr rrrz >=8KB
115 * rrrr rrzz >=16KB
116 * rrrr rzzz >=32KB
117 * rrrr zzzz >=64KB
118 * .......
119 */
120 shift = mmu_psize_defs[i].shift - LP_SHIFT;
121 if (shift > LP_BITS)
122 shift = LP_BITS;
123 mask = (1 << shift) - 1;
124 if ((lp & mask) == mmu_psize_defs[psize].penc[i])
125 return i;
126 }
127 return -1;
128}
129
130static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, 102static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
131 unsigned long pte_index) 103 unsigned long pte_index)
132{ 104{
133 int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; 105 int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
134 unsigned int penc; 106 unsigned int penc;
135 unsigned long rb = 0, va_low, sllp; 107 unsigned long rb = 0, va_low, sllp;
136 unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); 108 unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
137 109
138 if (v & HPTE_V_LARGE) { 110 if (v & HPTE_V_LARGE) {
139 for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { 111 i = hpte_page_sizes[lp];
140 112 b_psize = i & 0xf;
141 /* valid entries have a shift value */ 113 a_psize = i >> 4;
142 if (!mmu_psize_defs[b_psize].shift)
143 continue;
144
145 a_psize = __hpte_actual_psize(lp, b_psize);
146 if (a_psize != -1)
147 break;
148 }
149 } 114 }
115
150 /* 116 /*
151 * Ignore the top 14 bits of va 117 * Ignore the top 14 bits of va
152 * v have top two bits covering segment size, hence move 118 * v have top two bits covering segment size, hence move
@@ -215,45 +181,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
215 return rb; 181 return rb;
216} 182}
217 183
218static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
219 bool is_base_size)
220{
221
222 int size, a_psize;
223 /* Look at the 8 bit LP value */
224 unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
225
226 /* only handle 4k, 64k and 16M pages for now */
227 if (!(h & HPTE_V_LARGE))
228 return 1ul << 12;
229 else {
230 for (size = 0; size < MMU_PAGE_COUNT; size++) {
231 /* valid entries have a shift value */
232 if (!mmu_psize_defs[size].shift)
233 continue;
234
235 a_psize = __hpte_actual_psize(lp, size);
236 if (a_psize != -1) {
237 if (is_base_size)
238 return 1ul << mmu_psize_defs[size].shift;
239 return 1ul << mmu_psize_defs[a_psize].shift;
240 }
241 }
242
243 }
244 return 0;
245}
246
247static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
248{
249 return __hpte_page_size(h, l, 0);
250}
251
252static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
253{
254 return __hpte_page_size(h, l, 1);
255}
256
257static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) 184static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
258{ 185{
259 return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; 186 return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ec35af34a3fb..ed30d2ea21b7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -43,6 +43,8 @@
43#include <asm/cputhreads.h> 43#include <asm/cputhreads.h>
44#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES) 44#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
45 45
46#define __KVM_HAVE_ARCH_INTC_INITIALIZED
47
46#ifdef CONFIG_KVM_MMIO 48#ifdef CONFIG_KVM_MMIO
47#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 49#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
48#endif 50#endif
@@ -95,42 +97,49 @@ struct kvmppc_vcpu_book3s;
95struct kvmppc_book3s_shadow_vcpu; 97struct kvmppc_book3s_shadow_vcpu;
96 98
97struct kvm_vm_stat { 99struct kvm_vm_stat {
98 u32 remote_tlb_flush; 100 ulong remote_tlb_flush;
99}; 101};
100 102
101struct kvm_vcpu_stat { 103struct kvm_vcpu_stat {
102 u32 sum_exits; 104 u64 sum_exits;
103 u32 mmio_exits; 105 u64 mmio_exits;
104 u32 signal_exits; 106 u64 signal_exits;
105 u32 light_exits; 107 u64 light_exits;
106 /* Account for special types of light exits: */ 108 /* Account for special types of light exits: */
107 u32 itlb_real_miss_exits; 109 u64 itlb_real_miss_exits;
108 u32 itlb_virt_miss_exits; 110 u64 itlb_virt_miss_exits;
109 u32 dtlb_real_miss_exits; 111 u64 dtlb_real_miss_exits;
110 u32 dtlb_virt_miss_exits; 112 u64 dtlb_virt_miss_exits;
111 u32 syscall_exits; 113 u64 syscall_exits;
112 u32 isi_exits; 114 u64 isi_exits;
113 u32 dsi_exits; 115 u64 dsi_exits;
114 u32 emulated_inst_exits; 116 u64 emulated_inst_exits;
115 u32 dec_exits; 117 u64 dec_exits;
116 u32 ext_intr_exits; 118 u64 ext_intr_exits;
117 u32 halt_successful_poll; 119 u64 halt_poll_success_ns;
118 u32 halt_attempted_poll; 120 u64 halt_poll_fail_ns;
119 u32 halt_poll_invalid; 121 u64 halt_wait_ns;
120 u32 halt_wakeup; 122 u64 halt_successful_poll;
121 u32 dbell_exits; 123 u64 halt_attempted_poll;
122 u32 gdbell_exits; 124 u64 halt_successful_wait;
123 u32 ld; 125 u64 halt_poll_invalid;
124 u32 st; 126 u64 halt_wakeup;
127 u64 dbell_exits;
128 u64 gdbell_exits;
129 u64 ld;
130 u64 st;
125#ifdef CONFIG_PPC_BOOK3S 131#ifdef CONFIG_PPC_BOOK3S
126 u32 pf_storage; 132 u64 pf_storage;
127 u32 pf_instruc; 133 u64 pf_instruc;
128 u32 sp_storage; 134 u64 sp_storage;
129 u32 sp_instruc; 135 u64 sp_instruc;
130 u32 queue_intr; 136 u64 queue_intr;
131 u32 ld_slow; 137 u64 ld_slow;
132 u32 st_slow; 138 u64 st_slow;
133#endif 139#endif
140 u64 pthru_all;
141 u64 pthru_host;
142 u64 pthru_bad_aff;
134}; 143};
135 144
136enum kvm_exit_types { 145enum kvm_exit_types {
@@ -197,6 +206,8 @@ struct kvmppc_spapr_tce_table {
197struct kvmppc_xics; 206struct kvmppc_xics;
198struct kvmppc_icp; 207struct kvmppc_icp;
199 208
209struct kvmppc_passthru_irqmap;
210
200/* 211/*
201 * The reverse mapping array has one entry for each HPTE, 212 * The reverse mapping array has one entry for each HPTE,
202 * which stores the guest's view of the second word of the HPTE 213 * which stores the guest's view of the second word of the HPTE
@@ -267,6 +278,7 @@ struct kvm_arch {
267#endif 278#endif
268#ifdef CONFIG_KVM_XICS 279#ifdef CONFIG_KVM_XICS
269 struct kvmppc_xics *xics; 280 struct kvmppc_xics *xics;
281 struct kvmppc_passthru_irqmap *pimap;
270#endif 282#endif
271 struct kvmppc_ops *kvm_ops; 283 struct kvmppc_ops *kvm_ops;
272#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 284#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -275,41 +287,6 @@ struct kvm_arch {
275#endif 287#endif
276}; 288};
277 289
278/*
279 * Struct for a virtual core.
280 * Note: entry_exit_map combines a bitmap of threads that have entered
281 * in the bottom 8 bits and a bitmap of threads that have exited in the
282 * next 8 bits. This is so that we can atomically set the entry bit
283 * iff the exit map is 0 without taking a lock.
284 */
285struct kvmppc_vcore {
286 int n_runnable;
287 int num_threads;
288 int entry_exit_map;
289 int napping_threads;
290 int first_vcpuid;
291 u16 pcpu;
292 u16 last_cpu;
293 u8 vcore_state;
294 u8 in_guest;
295 struct kvmppc_vcore *master_vcore;
296 struct list_head runnable_threads;
297 struct list_head preempt_list;
298 spinlock_t lock;
299 struct swait_queue_head wq;
300 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
301 u64 stolen_tb;
302 u64 preempt_tb;
303 struct kvm_vcpu *runner;
304 struct kvm *kvm;
305 u64 tb_offset; /* guest timebase - host timebase */
306 ulong lpcr;
307 u32 arch_compat;
308 ulong pcr;
309 ulong dpdes; /* doorbell state (POWER8) */
310 ulong conferring_threads;
311};
312
313#define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff) 290#define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff)
314#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) 291#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
315#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) 292#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
@@ -329,6 +306,7 @@ struct kvmppc_vcore {
329#define VCORE_SLEEPING 3 306#define VCORE_SLEEPING 3
330#define VCORE_RUNNING 4 307#define VCORE_RUNNING 4
331#define VCORE_EXITING 5 308#define VCORE_EXITING 5
309#define VCORE_POLLING 6
332 310
333/* 311/*
334 * Struct used to manage memory for a virtual processor area 312 * Struct used to manage memory for a virtual processor area
@@ -397,6 +375,20 @@ struct kvmhv_tb_accumulator {
397 u64 tb_max; /* max time */ 375 u64 tb_max; /* max time */
398}; 376};
399 377
378#ifdef CONFIG_PPC_BOOK3S_64
379struct kvmppc_irq_map {
380 u32 r_hwirq;
381 u32 v_hwirq;
382 struct irq_desc *desc;
383};
384
385#define KVMPPC_PIRQ_MAPPED 1024
386struct kvmppc_passthru_irqmap {
387 int n_mapped;
388 struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
389};
390#endif
391
400# ifdef CONFIG_PPC_FSL_BOOK3E 392# ifdef CONFIG_PPC_FSL_BOOK3E
401#define KVMPPC_BOOKE_IAC_NUM 2 393#define KVMPPC_BOOKE_IAC_NUM 2
402#define KVMPPC_BOOKE_DAC_NUM 2 394#define KVMPPC_BOOKE_DAC_NUM 2
@@ -668,7 +660,6 @@ struct kvm_vcpu_arch {
668 long pgfault_index; 660 long pgfault_index;
669 unsigned long pgfault_hpte[2]; 661 unsigned long pgfault_hpte[2];
670 662
671 struct list_head run_list;
672 struct task_struct *run_task; 663 struct task_struct *run_task;
673 struct kvm_run *kvm_run; 664 struct kvm_run *kvm_run;
674 665
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2544edabe7f3..f6e49640dbe1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -287,6 +287,10 @@ struct kvmppc_ops {
287 long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, 287 long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
288 unsigned long arg); 288 unsigned long arg);
289 int (*hcall_implemented)(unsigned long hcall); 289 int (*hcall_implemented)(unsigned long hcall);
290 int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
291 struct irq_bypass_producer *);
292 void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
293 struct irq_bypass_producer *);
290}; 294};
291 295
292extern struct kvmppc_ops *kvmppc_hv_ops; 296extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -453,8 +457,19 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
453{ 457{
454 return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; 458 return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
455} 459}
460
461static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
462 struct kvm *kvm)
463{
464 if (kvm && kvm_irq_bypass)
465 return kvm->arch.pimap;
466 return NULL;
467}
468
456extern void kvmppc_alloc_host_rm_ops(void); 469extern void kvmppc_alloc_host_rm_ops(void);
457extern void kvmppc_free_host_rm_ops(void); 470extern void kvmppc_free_host_rm_ops(void);
471extern void kvmppc_free_pimap(struct kvm *kvm);
472extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
458extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); 473extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
459extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); 474extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
460extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); 475extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -464,10 +479,23 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
464extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, 479extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
465 struct kvm_vcpu *vcpu, u32 cpu); 480 struct kvm_vcpu *vcpu, u32 cpu);
466extern void kvmppc_xics_ipi_action(void); 481extern void kvmppc_xics_ipi_action(void);
482extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
483 unsigned long host_irq);
484extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
485 unsigned long host_irq);
486extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
487 struct kvmppc_irq_map *irq_map,
488 struct kvmppc_passthru_irqmap *pimap);
467extern int h_ipi_redirect; 489extern int h_ipi_redirect;
468#else 490#else
491static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
492 struct kvm *kvm)
493 { return NULL; }
469static inline void kvmppc_alloc_host_rm_ops(void) {}; 494static inline void kvmppc_alloc_host_rm_ops(void) {};
470static inline void kvmppc_free_host_rm_ops(void) {}; 495static inline void kvmppc_free_host_rm_ops(void) {};
496static inline void kvmppc_free_pimap(struct kvm *kvm) {};
497static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
498 { return 0; }
471static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) 499static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
472 { return 0; } 500 { return 0; }
473static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } 501static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e2fb408f8398..b78e8d3377f6 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -271,6 +271,7 @@ static inline bool early_radix_enabled(void)
271#define MMU_PAGE_16G 13 271#define MMU_PAGE_16G 13
272#define MMU_PAGE_64G 14 272#define MMU_PAGE_64G 14
273 273
274/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
274#define MMU_PAGE_COUNT 15 275#define MMU_PAGE_COUNT 15
275 276
276#ifdef CONFIG_PPC_BOOK3S_64 277#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index ee05bd203630..e958b7096f19 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -67,6 +67,7 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
67int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func, 67int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
68 uint64_t offset, uint32_t data); 68 uint64_t offset, uint32_t data);
69int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority); 69int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
70int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
70int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority); 71int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
71int64_t opal_register_exception_handler(uint64_t opal_exception, 72int64_t opal_register_exception_handler(uint64_t opal_exception,
72 uint64_t handler_address, 73 uint64_t handler_address,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 148303e7771f..6a6792bb39fb 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -183,11 +183,6 @@ struct paca_struct {
183 */ 183 */
184 u16 in_mce; 184 u16 in_mce;
185 u8 hmi_event_available; /* HMI event is available */ 185 u8 hmi_event_available; /* HMI event is available */
186 /*
187 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
188 * more details
189 */
190 struct sibling_subcore_state *sibling_subcore_state;
191#endif 186#endif
192 187
193 /* Stuff for accurate time accounting */ 188 /* Stuff for accurate time accounting */
@@ -202,6 +197,13 @@ struct paca_struct {
202 struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 197 struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
203#endif 198#endif
204 struct kvmppc_host_state kvm_hstate; 199 struct kvmppc_host_state kvm_hstate;
200#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
201 /*
202 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
203 * more details
204 */
205 struct sibling_subcore_state *sibling_subcore_state;
206#endif
205#endif 207#endif
206}; 208};
207 209
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 0cbd8134ce81..1b46b52d3212 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/pci.h> 13#include <linux/pci.h>
14#include <linux/pci_hotplug.h> 14#include <linux/pci_hotplug.h>
15#include <linux/irq.h>
15#include <misc/cxl-base.h> 16#include <misc/cxl-base.h>
16#include <asm/opal-api.h> 17#include <asm/opal-api.h>
17 18
@@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
33void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); 34void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
34int pnv_cxl_get_irq_count(struct pci_dev *dev); 35int pnv_cxl_get_irq_count(struct pci_dev *dev);
35struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); 36struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
37int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
38bool is_pnv_opal_msi(struct irq_chip *chip);
36 39
37#ifdef CONFIG_CXL_BASE 40#ifdef CONFIG_CXL_BASE
38int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, 41int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b2027a5cf508..fe4c075bcf50 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32) += vdso32/
41obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 41obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
42obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o 42obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
43obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o 43obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
44obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o 44obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
45obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o 45obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
46obj-$(CONFIG_PPC64) += vdso64/ 46obj-$(CONFIG_PPC64) += vdso64/
47obj-$(CONFIG_ALTIVEC) += vecemu.o 47obj-$(CONFIG_ALTIVEC) += vecemu.o
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c2024ac9d4e8..029be26b5a17 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,9 @@ config KVM
22 select ANON_INODES 22 select ANON_INODES
23 select HAVE_KVM_EVENTFD 23 select HAVE_KVM_EVENTFD
24 select SRCU 24 select SRCU
25 select KVM_VFIO
26 select IRQ_BYPASS_MANAGER
27 select HAVE_KVM_IRQ_BYPASS
25 28
26config KVM_BOOK3S_HANDLER 29config KVM_BOOK3S_HANDLER
27 bool 30 bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 1f9e5529e692..7dd89b79d038 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -7,16 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm 7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
8KVM := ../../../virt/kvm 8KVM := ../../../virt/kvm
9 9
10common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ 10common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
11 $(KVM)/eventfd.o
12common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o 11common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
12common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
13 13
14CFLAGS_e500_mmu.o := -I. 14CFLAGS_e500_mmu.o := -I.
15CFLAGS_e500_mmu_host.o := -I. 15CFLAGS_e500_mmu_host.o := -I.
16CFLAGS_emulate.o := -I. 16CFLAGS_emulate.o := -I.
17CFLAGS_emulate_loadstore.o := -I. 17CFLAGS_emulate_loadstore.o := -I.
18 18
19common-objs-y += powerpc.o emulate.o emulate_loadstore.o 19common-objs-y += powerpc.o emulate_loadstore.o
20obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o 20obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
21obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o 21obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
22 22
@@ -24,6 +24,7 @@ AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
24 24
25kvm-e500-objs := \ 25kvm-e500-objs := \
26 $(common-objs-y) \ 26 $(common-objs-y) \
27 emulate.o \
27 booke.o \ 28 booke.o \
28 booke_emulate.o \ 29 booke_emulate.o \
29 booke_interrupts.o \ 30 booke_interrupts.o \
@@ -35,6 +36,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
35 36
36kvm-e500mc-objs := \ 37kvm-e500mc-objs := \
37 $(common-objs-y) \ 38 $(common-objs-y) \
39 emulate.o \
38 booke.o \ 40 booke.o \
39 booke_emulate.o \ 41 booke_emulate.o \
40 bookehv_interrupts.o \ 42 bookehv_interrupts.o \
@@ -61,9 +63,6 @@ kvm-pr-y := \
61 book3s_32_mmu.o 63 book3s_32_mmu.o
62 64
63ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 65ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
64kvm-book3s_64-module-objs := \
65 $(KVM)/coalesced_mmio.o
66
67kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 66kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
68 book3s_rmhandlers.o 67 book3s_rmhandlers.o
69endif 68endif
@@ -78,6 +77,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
78 77
79ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 78ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
80kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 79kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
80 book3s_hv_hmi.o \
81 book3s_hv_rmhandlers.o \ 81 book3s_hv_rmhandlers.o \
82 book3s_hv_rm_mmu.o \ 82 book3s_hv_rm_mmu.o \
83 book3s_hv_ras.o \ 83 book3s_hv_ras.o \
@@ -88,11 +88,8 @@ endif
88kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ 88kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
89 book3s_xics.o 89 book3s_xics.o
90 90
91kvm-book3s_64-module-objs += \ 91kvm-book3s_64-module-objs := \
92 $(KVM)/kvm_main.o \ 92 $(common-objs-y) \
93 $(KVM)/eventfd.o \
94 powerpc.o \
95 emulate_loadstore.o \
96 book3s.o \ 93 book3s.o \
97 book3s_64_vio.o \ 94 book3s_64_vio.o \
98 book3s_rtas.o \ 95 book3s_rtas.o \
@@ -102,6 +99,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
102 99
103kvm-book3s_32-objs := \ 100kvm-book3s_32-objs := \
104 $(common-objs-y) \ 101 $(common-objs-y) \
102 emulate.o \
105 fpu.o \ 103 fpu.o \
106 book3s_paired_singles.o \ 104 book3s_paired_singles.o \
107 book3s.o \ 105 book3s.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 47018fcbf7d6..ba231a1d43d4 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
52 { "dec", VCPU_STAT(dec_exits) }, 52 { "dec", VCPU_STAT(dec_exits) },
53 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 53 { "ext_intr", VCPU_STAT(ext_intr_exits) },
54 { "queue_intr", VCPU_STAT(queue_intr) }, 54 { "queue_intr", VCPU_STAT(queue_intr) },
55 { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) },
56 { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) },
57 { "halt_wait_ns", VCPU_STAT(halt_wait_ns) },
55 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, 58 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
56 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, 59 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
60 { "halt_successful_wait", VCPU_STAT(halt_successful_wait) },
57 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 61 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
58 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 62 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
59 { "pf_storage", VCPU_STAT(pf_storage) }, 63 { "pf_storage", VCPU_STAT(pf_storage) },
@@ -64,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { "ld_slow", VCPU_STAT(ld_slow) }, 68 { "ld_slow", VCPU_STAT(ld_slow) },
65 { "st", VCPU_STAT(st) }, 69 { "st", VCPU_STAT(st) },
66 { "st_slow", VCPU_STAT(st_slow) }, 70 { "st_slow", VCPU_STAT(st_slow) },
71 { "pthru_all", VCPU_STAT(pthru_all) },
72 { "pthru_host", VCPU_STAT(pthru_host) },
73 { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
67 { NULL } 74 { NULL }
68}; 75};
69 76
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2fd5580c8f6e..9b3bba643b43 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -53,11 +53,15 @@
53#include <asm/smp.h> 53#include <asm/smp.h>
54#include <asm/dbell.h> 54#include <asm/dbell.h>
55#include <asm/hmi.h> 55#include <asm/hmi.h>
56#include <asm/pnv-pci.h>
56#include <linux/gfp.h> 57#include <linux/gfp.h>
57#include <linux/vmalloc.h> 58#include <linux/vmalloc.h>
58#include <linux/highmem.h> 59#include <linux/highmem.h>
59#include <linux/hugetlb.h> 60#include <linux/hugetlb.h>
61#include <linux/kvm_irqfd.h>
62#include <linux/irqbypass.h>
60#include <linux/module.h> 63#include <linux/module.h>
64#include <linux/compiler.h>
61 65
62#include "book3s.h" 66#include "book3s.h"
63 67
@@ -70,6 +74,8 @@
70 74
71/* Used to indicate that a guest page fault needs to be handled */ 75/* Used to indicate that a guest page fault needs to be handled */
72#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) 76#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
77/* Used to indicate that a guest passthrough interrupt needs to be handled */
78#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
73 79
74/* Used as a "null" value for timebase values */ 80/* Used as a "null" value for timebase values */
75#define TB_NIL (~(u64)0) 81#define TB_NIL (~(u64)0)
@@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = {
89 .get = param_get_int, 95 .get = param_get_int,
90}; 96};
91 97
98module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
99 S_IRUGO | S_IWUSR);
100MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
101
92module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 102module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
93 S_IRUGO | S_IWUSR); 103 S_IRUGO | S_IWUSR);
94MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); 104MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
95#endif 105#endif
96 106
107/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
108static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
109module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
110MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
111
112/* Factor by which the vcore halt poll interval is grown, default is to double
113 */
114static unsigned int halt_poll_ns_grow = 2;
115module_param(halt_poll_ns_grow, int, S_IRUGO);
116MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
117
118/* Factor by which the vcore halt poll interval is shrunk, default is to reset
119 */
120static unsigned int halt_poll_ns_shrink;
121module_param(halt_poll_ns_shrink, int, S_IRUGO);
122MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
123
97static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 124static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
98static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 125static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
99 126
127static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
128 int *ip)
129{
130 int i = *ip;
131 struct kvm_vcpu *vcpu;
132
133 while (++i < MAX_SMT_THREADS) {
134 vcpu = READ_ONCE(vc->runnable_threads[i]);
135 if (vcpu) {
136 *ip = i;
137 return vcpu;
138 }
139 }
140 return NULL;
141}
142
143/* Used to traverse the list of runnable threads for a given vcore */
144#define for_each_runnable_thread(i, vcpu, vc) \
145 for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
146
100static bool kvmppc_ipi_thread(int cpu) 147static bool kvmppc_ipi_thread(int cpu)
101{ 148{
102 /* On POWER8 for IPIs to threads in the same core, use msgsnd */ 149 /* On POWER8 for IPIs to threads in the same core, use msgsnd */
@@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
991 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 1038 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
992 r = RESUME_GUEST; 1039 r = RESUME_GUEST;
993 break; 1040 break;
1041 case BOOK3S_INTERRUPT_HV_RM_HARD:
1042 r = RESUME_PASSTHROUGH;
1043 break;
994 default: 1044 default:
995 kvmppc_dump_regs(vcpu); 1045 kvmppc_dump_regs(vcpu);
996 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 1046 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
@@ -1493,7 +1543,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1493 if (vcore == NULL) 1543 if (vcore == NULL)
1494 return NULL; 1544 return NULL;
1495 1545
1496 INIT_LIST_HEAD(&vcore->runnable_threads);
1497 spin_lock_init(&vcore->lock); 1546 spin_lock_init(&vcore->lock);
1498 spin_lock_init(&vcore->stoltb_lock); 1547 spin_lock_init(&vcore->stoltb_lock);
1499 init_swait_queue_head(&vcore->wq); 1548 init_swait_queue_head(&vcore->wq);
@@ -1802,7 +1851,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
1802 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 1851 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
1803 spin_unlock_irq(&vcpu->arch.tbacct_lock); 1852 spin_unlock_irq(&vcpu->arch.tbacct_lock);
1804 --vc->n_runnable; 1853 --vc->n_runnable;
1805 list_del(&vcpu->arch.run_list); 1854 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
1806} 1855}
1807 1856
1808static int kvmppc_grab_hwthread(int cpu) 1857static int kvmppc_grab_hwthread(int cpu)
@@ -2209,10 +2258,10 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2209 2258
2210static void prepare_threads(struct kvmppc_vcore *vc) 2259static void prepare_threads(struct kvmppc_vcore *vc)
2211{ 2260{
2212 struct kvm_vcpu *vcpu, *vnext; 2261 int i;
2262 struct kvm_vcpu *vcpu;
2213 2263
2214 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 2264 for_each_runnable_thread(i, vcpu, vc) {
2215 arch.run_list) {
2216 if (signal_pending(vcpu->arch.run_task)) 2265 if (signal_pending(vcpu->arch.run_task))
2217 vcpu->arch.ret = -EINTR; 2266 vcpu->arch.ret = -EINTR;
2218 else if (vcpu->arch.vpa.update_pending || 2267 else if (vcpu->arch.vpa.update_pending ||
@@ -2259,15 +2308,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
2259 2308
2260static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) 2309static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2261{ 2310{
2262 int still_running = 0; 2311 int still_running = 0, i;
2263 u64 now; 2312 u64 now;
2264 long ret; 2313 long ret;
2265 struct kvm_vcpu *vcpu, *vnext; 2314 struct kvm_vcpu *vcpu;
2266 2315
2267 spin_lock(&vc->lock); 2316 spin_lock(&vc->lock);
2268 now = get_tb(); 2317 now = get_tb();
2269 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 2318 for_each_runnable_thread(i, vcpu, vc) {
2270 arch.run_list) {
2271 /* cancel pending dec exception if dec is positive */ 2319 /* cancel pending dec exception if dec is positive */
2272 if (now < vcpu->arch.dec_expires && 2320 if (now < vcpu->arch.dec_expires &&
2273 kvmppc_core_pending_dec(vcpu)) 2321 kvmppc_core_pending_dec(vcpu))
@@ -2307,8 +2355,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2307 } 2355 }
2308 if (vc->n_runnable > 0 && vc->runner == NULL) { 2356 if (vc->n_runnable > 0 && vc->runner == NULL) {
2309 /* make sure there's a candidate runner awake */ 2357 /* make sure there's a candidate runner awake */
2310 vcpu = list_first_entry(&vc->runnable_threads, 2358 i = -1;
2311 struct kvm_vcpu, arch.run_list); 2359 vcpu = next_runnable_thread(vc, &i);
2312 wake_up(&vcpu->arch.cpu_run); 2360 wake_up(&vcpu->arch.cpu_run);
2313 } 2361 }
2314 } 2362 }
@@ -2361,7 +2409,7 @@ static inline void kvmppc_set_host_core(int cpu)
2361 */ 2409 */
2362static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) 2410static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2363{ 2411{
2364 struct kvm_vcpu *vcpu, *vnext; 2412 struct kvm_vcpu *vcpu;
2365 int i; 2413 int i;
2366 int srcu_idx; 2414 int srcu_idx;
2367 struct core_info core_info; 2415 struct core_info core_info;
@@ -2397,8 +2445,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2397 */ 2445 */
2398 if ((threads_per_core > 1) && 2446 if ((threads_per_core > 1) &&
2399 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 2447 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
2400 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 2448 for_each_runnable_thread(i, vcpu, vc) {
2401 arch.run_list) {
2402 vcpu->arch.ret = -EBUSY; 2449 vcpu->arch.ret = -EBUSY;
2403 kvmppc_remove_runnable(vc, vcpu); 2450 kvmppc_remove_runnable(vc, vcpu);
2404 wake_up(&vcpu->arch.cpu_run); 2451 wake_up(&vcpu->arch.cpu_run);
@@ -2477,8 +2524,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2477 active |= 1 << thr; 2524 active |= 1 << thr;
2478 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { 2525 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
2479 pvc->pcpu = pcpu + thr; 2526 pvc->pcpu = pcpu + thr;
2480 list_for_each_entry(vcpu, &pvc->runnable_threads, 2527 for_each_runnable_thread(i, vcpu, pvc) {
2481 arch.run_list) {
2482 kvmppc_start_thread(vcpu, pvc); 2528 kvmppc_start_thread(vcpu, pvc);
2483 kvmppc_create_dtl_entry(vcpu, pvc); 2529 kvmppc_create_dtl_entry(vcpu, pvc);
2484 trace_kvm_guest_enter(vcpu); 2530 trace_kvm_guest_enter(vcpu);
@@ -2604,34 +2650,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2604 finish_wait(&vcpu->arch.cpu_run, &wait); 2650 finish_wait(&vcpu->arch.cpu_run, &wait);
2605} 2651}
2606 2652
2653static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
2654{
2655 /* 10us base */
2656 if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
2657 vc->halt_poll_ns = 10000;
2658 else
2659 vc->halt_poll_ns *= halt_poll_ns_grow;
2660
2661 if (vc->halt_poll_ns > halt_poll_max_ns)
2662 vc->halt_poll_ns = halt_poll_max_ns;
2663}
2664
2665static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
2666{
2667 if (halt_poll_ns_shrink == 0)
2668 vc->halt_poll_ns = 0;
2669 else
2670 vc->halt_poll_ns /= halt_poll_ns_shrink;
2671}
2672
2673/* Check to see if any of the runnable vcpus on the vcore have pending
2674 * exceptions or are no longer ceded
2675 */
2676static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
2677{
2678 struct kvm_vcpu *vcpu;
2679 int i;
2680
2681 for_each_runnable_thread(i, vcpu, vc) {
2682 if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
2683 return 1;
2684 }
2685
2686 return 0;
2687}
2688
2607/* 2689/*
2608 * All the vcpus in this vcore are idle, so wait for a decrementer 2690 * All the vcpus in this vcore are idle, so wait for a decrementer
2609 * or external interrupt to one of the vcpus. vc->lock is held. 2691 * or external interrupt to one of the vcpus. vc->lock is held.
2610 */ 2692 */
2611static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 2693static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
2612{ 2694{
2613 struct kvm_vcpu *vcpu; 2695 ktime_t cur, start_poll, start_wait;
2614 int do_sleep = 1; 2696 int do_sleep = 1;
2697 u64 block_ns;
2615 DECLARE_SWAITQUEUE(wait); 2698 DECLARE_SWAITQUEUE(wait);
2616 2699
2617 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 2700 /* Poll for pending exceptions and ceded state */
2701 cur = start_poll = ktime_get();
2702 if (vc->halt_poll_ns) {
2703 ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
2704 ++vc->runner->stat.halt_attempted_poll;
2618 2705
2619 /* 2706 vc->vcore_state = VCORE_POLLING;
2620 * Check one last time for pending exceptions and ceded state after 2707 spin_unlock(&vc->lock);
2621 * we put ourselves on the wait queue 2708
2622 */ 2709 do {
2623 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 2710 if (kvmppc_vcore_check_block(vc)) {
2624 if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { 2711 do_sleep = 0;
2625 do_sleep = 0; 2712 break;
2626 break; 2713 }
2714 cur = ktime_get();
2715 } while (single_task_running() && ktime_before(cur, stop));
2716
2717 spin_lock(&vc->lock);
2718 vc->vcore_state = VCORE_INACTIVE;
2719
2720 if (!do_sleep) {
2721 ++vc->runner->stat.halt_successful_poll;
2722 goto out;
2627 } 2723 }
2628 } 2724 }
2629 2725
2630 if (!do_sleep) { 2726 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
2727
2728 if (kvmppc_vcore_check_block(vc)) {
2631 finish_swait(&vc->wq, &wait); 2729 finish_swait(&vc->wq, &wait);
2632 return; 2730 do_sleep = 0;
2731 /* If we polled, count this as a successful poll */
2732 if (vc->halt_poll_ns)
2733 ++vc->runner->stat.halt_successful_poll;
2734 goto out;
2633 } 2735 }
2634 2736
2737 start_wait = ktime_get();
2738
2635 vc->vcore_state = VCORE_SLEEPING; 2739 vc->vcore_state = VCORE_SLEEPING;
2636 trace_kvmppc_vcore_blocked(vc, 0); 2740 trace_kvmppc_vcore_blocked(vc, 0);
2637 spin_unlock(&vc->lock); 2741 spin_unlock(&vc->lock);
@@ -2640,13 +2744,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
2640 spin_lock(&vc->lock); 2744 spin_lock(&vc->lock);
2641 vc->vcore_state = VCORE_INACTIVE; 2745 vc->vcore_state = VCORE_INACTIVE;
2642 trace_kvmppc_vcore_blocked(vc, 1); 2746 trace_kvmppc_vcore_blocked(vc, 1);
2747 ++vc->runner->stat.halt_successful_wait;
2748
2749 cur = ktime_get();
2750
2751out:
2752 block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
2753
2754 /* Attribute wait time */
2755 if (do_sleep) {
2756 vc->runner->stat.halt_wait_ns +=
2757 ktime_to_ns(cur) - ktime_to_ns(start_wait);
2758 /* Attribute failed poll time */
2759 if (vc->halt_poll_ns)
2760 vc->runner->stat.halt_poll_fail_ns +=
2761 ktime_to_ns(start_wait) -
2762 ktime_to_ns(start_poll);
2763 } else {
2764 /* Attribute successful poll time */
2765 if (vc->halt_poll_ns)
2766 vc->runner->stat.halt_poll_success_ns +=
2767 ktime_to_ns(cur) -
2768 ktime_to_ns(start_poll);
2769 }
2770
2771 /* Adjust poll time */
2772 if (halt_poll_max_ns) {
2773 if (block_ns <= vc->halt_poll_ns)
2774 ;
2775 /* We slept and blocked for longer than the max halt time */
2776 else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
2777 shrink_halt_poll_ns(vc);
2778 /* We slept and our poll time is too small */
2779 else if (vc->halt_poll_ns < halt_poll_max_ns &&
2780 block_ns < halt_poll_max_ns)
2781 grow_halt_poll_ns(vc);
2782 } else
2783 vc->halt_poll_ns = 0;
2784
2785 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
2643} 2786}
2644 2787
2645static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 2788static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2646{ 2789{
2647 int n_ceded; 2790 int n_ceded, i;
2648 struct kvmppc_vcore *vc; 2791 struct kvmppc_vcore *vc;
2649 struct kvm_vcpu *v, *vn; 2792 struct kvm_vcpu *v;
2650 2793
2651 trace_kvmppc_run_vcpu_enter(vcpu); 2794 trace_kvmppc_run_vcpu_enter(vcpu);
2652 2795
@@ -2666,7 +2809,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2666 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); 2809 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
2667 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; 2810 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
2668 vcpu->arch.busy_preempt = TB_NIL; 2811 vcpu->arch.busy_preempt = TB_NIL;
2669 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 2812 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
2670 ++vc->n_runnable; 2813 ++vc->n_runnable;
2671 2814
2672 /* 2815 /*
@@ -2706,8 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2706 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE); 2849 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
2707 continue; 2850 continue;
2708 } 2851 }
2709 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 2852 for_each_runnable_thread(i, v, vc) {
2710 arch.run_list) {
2711 kvmppc_core_prepare_to_enter(v); 2853 kvmppc_core_prepare_to_enter(v);
2712 if (signal_pending(v->arch.run_task)) { 2854 if (signal_pending(v->arch.run_task)) {
2713 kvmppc_remove_runnable(vc, v); 2855 kvmppc_remove_runnable(vc, v);
@@ -2720,7 +2862,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2720 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 2862 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
2721 break; 2863 break;
2722 n_ceded = 0; 2864 n_ceded = 0;
2723 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { 2865 for_each_runnable_thread(i, v, vc) {
2724 if (!v->arch.pending_exceptions) 2866 if (!v->arch.pending_exceptions)
2725 n_ceded += v->arch.ceded; 2867 n_ceded += v->arch.ceded;
2726 else 2868 else
@@ -2759,8 +2901,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2759 2901
2760 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { 2902 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
2761 /* Wake up some vcpu to run the core */ 2903 /* Wake up some vcpu to run the core */
2762 v = list_first_entry(&vc->runnable_threads, 2904 i = -1;
2763 struct kvm_vcpu, arch.run_list); 2905 v = next_runnable_thread(vc, &i);
2764 wake_up(&v->arch.cpu_run); 2906 wake_up(&v->arch.cpu_run);
2765 } 2907 }
2766 2908
@@ -2818,7 +2960,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
2818 r = kvmppc_book3s_hv_page_fault(run, vcpu, 2960 r = kvmppc_book3s_hv_page_fault(run, vcpu,
2819 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 2961 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
2820 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 2962 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2821 } 2963 } else if (r == RESUME_PASSTHROUGH)
2964 r = kvmppc_xics_rm_complete(vcpu, 0);
2822 } while (is_kvmppc_resume_guest(r)); 2965 } while (is_kvmppc_resume_guest(r));
2823 2966
2824 out: 2967 out:
@@ -3247,6 +3390,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
3247 kvmppc_free_vcores(kvm); 3390 kvmppc_free_vcores(kvm);
3248 3391
3249 kvmppc_free_hpt(kvm); 3392 kvmppc_free_hpt(kvm);
3393
3394 kvmppc_free_pimap(kvm);
3250} 3395}
3251 3396
3252/* We don't need to emulate any privileged instructions or dcbz */ 3397/* We don't need to emulate any privileged instructions or dcbz */
@@ -3282,6 +3427,184 @@ static int kvmppc_core_check_processor_compat_hv(void)
3282 return 0; 3427 return 0;
3283} 3428}
3284 3429
3430#ifdef CONFIG_KVM_XICS
3431
3432void kvmppc_free_pimap(struct kvm *kvm)
3433{
3434 kfree(kvm->arch.pimap);
3435}
3436
3437static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
3438{
3439 return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
3440}
3441
3442static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3443{
3444 struct irq_desc *desc;
3445 struct kvmppc_irq_map *irq_map;
3446 struct kvmppc_passthru_irqmap *pimap;
3447 struct irq_chip *chip;
3448 int i;
3449
3450 if (!kvm_irq_bypass)
3451 return 1;
3452
3453 desc = irq_to_desc(host_irq);
3454 if (!desc)
3455 return -EIO;
3456
3457 mutex_lock(&kvm->lock);
3458
3459 pimap = kvm->arch.pimap;
3460 if (pimap == NULL) {
3461 /* First call, allocate structure to hold IRQ map */
3462 pimap = kvmppc_alloc_pimap();
3463 if (pimap == NULL) {
3464 mutex_unlock(&kvm->lock);
3465 return -ENOMEM;
3466 }
3467 kvm->arch.pimap = pimap;
3468 }
3469
3470 /*
3471 * For now, we only support interrupts for which the EOI operation
3472 * is an OPAL call followed by a write to XIRR, since that's
3473 * what our real-mode EOI code does.
3474 */
3475 chip = irq_data_get_irq_chip(&desc->irq_data);
3476 if (!chip || !is_pnv_opal_msi(chip)) {
3477 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
3478 host_irq, guest_gsi);
3479 mutex_unlock(&kvm->lock);
3480 return -ENOENT;
3481 }
3482
3483 /*
3484 * See if we already have an entry for this guest IRQ number.
3485 * If it's mapped to a hardware IRQ number, that's an error,
3486 * otherwise re-use this entry.
3487 */
3488 for (i = 0; i < pimap->n_mapped; i++) {
3489 if (guest_gsi == pimap->mapped[i].v_hwirq) {
3490 if (pimap->mapped[i].r_hwirq) {
3491 mutex_unlock(&kvm->lock);
3492 return -EINVAL;
3493 }
3494 break;
3495 }
3496 }
3497
3498 if (i == KVMPPC_PIRQ_MAPPED) {
3499 mutex_unlock(&kvm->lock);
3500 return -EAGAIN; /* table is full */
3501 }
3502
3503 irq_map = &pimap->mapped[i];
3504
3505 irq_map->v_hwirq = guest_gsi;
3506 irq_map->desc = desc;
3507
3508 /*
3509 * Order the above two stores before the next to serialize with
3510 * the KVM real mode handler.
3511 */
3512 smp_wmb();
3513 irq_map->r_hwirq = desc->irq_data.hwirq;
3514
3515 if (i == pimap->n_mapped)
3516 pimap->n_mapped++;
3517
3518 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
3519
3520 mutex_unlock(&kvm->lock);
3521
3522 return 0;
3523}
3524
3525static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3526{
3527 struct irq_desc *desc;
3528 struct kvmppc_passthru_irqmap *pimap;
3529 int i;
3530
3531 if (!kvm_irq_bypass)
3532 return 0;
3533
3534 desc = irq_to_desc(host_irq);
3535 if (!desc)
3536 return -EIO;
3537
3538 mutex_lock(&kvm->lock);
3539
3540 if (kvm->arch.pimap == NULL) {
3541 mutex_unlock(&kvm->lock);
3542 return 0;
3543 }
3544 pimap = kvm->arch.pimap;
3545
3546 for (i = 0; i < pimap->n_mapped; i++) {
3547 if (guest_gsi == pimap->mapped[i].v_hwirq)
3548 break;
3549 }
3550
3551 if (i == pimap->n_mapped) {
3552 mutex_unlock(&kvm->lock);
3553 return -ENODEV;
3554 }
3555
3556 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
3557
3558 /* invalidate the entry */
3559 pimap->mapped[i].r_hwirq = 0;
3560
3561 /*
3562 * We don't free this structure even when the count goes to
3563 * zero. The structure is freed when we destroy the VM.
3564 */
3565
3566 mutex_unlock(&kvm->lock);
3567 return 0;
3568}
3569
3570static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
3571 struct irq_bypass_producer *prod)
3572{
3573 int ret = 0;
3574 struct kvm_kernel_irqfd *irqfd =
3575 container_of(cons, struct kvm_kernel_irqfd, consumer);
3576
3577 irqfd->producer = prod;
3578
3579 ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
3580 if (ret)
3581 pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
3582 prod->irq, irqfd->gsi, ret);
3583
3584 return ret;
3585}
3586
3587static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
3588 struct irq_bypass_producer *prod)
3589{
3590 int ret;
3591 struct kvm_kernel_irqfd *irqfd =
3592 container_of(cons, struct kvm_kernel_irqfd, consumer);
3593
3594 irqfd->producer = NULL;
3595
3596 /*
3597 * When producer of consumer is unregistered, we change back to
3598 * default external interrupt handling mode - KVM real mode
3599 * will switch back to host.
3600 */
3601 ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
3602 if (ret)
3603 pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
3604 prod->irq, irqfd->gsi, ret);
3605}
3606#endif
3607
3285static long kvm_arch_vm_ioctl_hv(struct file *filp, 3608static long kvm_arch_vm_ioctl_hv(struct file *filp,
3286 unsigned int ioctl, unsigned long arg) 3609 unsigned int ioctl, unsigned long arg)
3287{ 3610{
@@ -3400,6 +3723,10 @@ static struct kvmppc_ops kvm_ops_hv = {
3400 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, 3723 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
3401 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, 3724 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
3402 .hcall_implemented = kvmppc_hcall_impl_hv, 3725 .hcall_implemented = kvmppc_hcall_impl_hv,
3726#ifdef CONFIG_KVM_XICS
3727 .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
3728 .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
3729#endif
3403}; 3730};
3404 3731
3405static int kvm_init_subcore_bitmap(void) 3732static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 5f0380db3eab..0c84d6bc8356 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -25,6 +25,7 @@
25#include <asm/xics.h> 25#include <asm/xics.h>
26#include <asm/dbell.h> 26#include <asm/dbell.h>
27#include <asm/cputhreads.h> 27#include <asm/cputhreads.h>
28#include <asm/io.h>
28 29
29#define KVM_CMA_CHUNK_ORDER 18 30#define KVM_CMA_CHUNK_ORDER 18
30 31
@@ -286,3 +287,158 @@ void kvmhv_commence_exit(int trap)
286 287
287struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; 288struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
288EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv); 289EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
290
291#ifdef CONFIG_KVM_XICS
292static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
293 u32 xisr)
294{
295 int i;
296
297 /*
298 * We access the mapped array here without a lock. That
299 * is safe because we never reduce the number of entries
300 * in the array and we never change the v_hwirq field of
301 * an entry once it is set.
302 *
303 * We have also carefully ordered the stores in the writer
304 * and the loads here in the reader, so that if we find a matching
305 * hwirq here, the associated GSI and irq_desc fields are valid.
306 */
307 for (i = 0; i < pimap->n_mapped; i++) {
308 if (xisr == pimap->mapped[i].r_hwirq) {
309 /*
310 * Order subsequent reads in the caller to serialize
311 * with the writer.
312 */
313 smp_rmb();
314 return &pimap->mapped[i];
315 }
316 }
317 return NULL;
318}
319
320/*
321 * If we have an interrupt that's not an IPI, check if we have a
322 * passthrough adapter and if so, check if this external interrupt
323 * is for the adapter.
324 * We will attempt to deliver the IRQ directly to the target VCPU's
325 * ICP, the virtual ICP (based on affinity - the xive value in ICS).
326 *
327 * If the delivery fails or if this is not for a passthrough adapter,
328 * return to the host to handle this interrupt. We earlier
329 * saved a copy of the XIRR in the PACA, it will be picked up by
330 * the host ICP driver.
331 */
332static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
333{
334 struct kvmppc_passthru_irqmap *pimap;
335 struct kvmppc_irq_map *irq_map;
336 struct kvm_vcpu *vcpu;
337
338 vcpu = local_paca->kvm_hstate.kvm_vcpu;
339 if (!vcpu)
340 return 1;
341 pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
342 if (!pimap)
343 return 1;
344 irq_map = get_irqmap(pimap, xisr);
345 if (!irq_map)
346 return 1;
347
348 /* We're handling this interrupt, generic code doesn't need to */
349 local_paca->kvm_hstate.saved_xirr = 0;
350
351 return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap);
352}
353
354#else
355static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
356{
357 return 1;
358}
359#endif
360
361/*
362 * Determine what sort of external interrupt is pending (if any).
363 * Returns:
364 * 0 if no interrupt is pending
365 * 1 if an interrupt is pending that needs to be handled by the host
366 * 2 Passthrough that needs completion in the host
367 * -1 if there was a guest wakeup IPI (which has now been cleared)
368 * -2 if there is PCI passthrough external interrupt that was handled
369 */
370
371long kvmppc_read_intr(void)
372{
373 unsigned long xics_phys;
374 u32 h_xirr;
375 __be32 xirr;
376 u32 xisr;
377 u8 host_ipi;
378
379 /* see if a host IPI is pending */
380 host_ipi = local_paca->kvm_hstate.host_ipi;
381 if (host_ipi)
382 return 1;
383
384 /* Now read the interrupt from the ICP */
385 xics_phys = local_paca->kvm_hstate.xics_phys;
386 if (unlikely(!xics_phys))
387 return 1;
388
389 /*
390 * Save XIRR for later. Since we get control in reverse endian
391 * on LE systems, save it byte reversed and fetch it back in
392 * host endian. Note that xirr is the value read from the
393 * XIRR register, while h_xirr is the host endian version.
394 */
395 xirr = _lwzcix(xics_phys + XICS_XIRR);
396 h_xirr = be32_to_cpu(xirr);
397 local_paca->kvm_hstate.saved_xirr = h_xirr;
398 xisr = h_xirr & 0xffffff;
399 /*
400 * Ensure that the store/load complete to guarantee all side
401 * effects of loading from XIRR has completed
402 */
403 smp_mb();
404
405 /* if nothing pending in the ICP */
406 if (!xisr)
407 return 0;
408
409 /* We found something in the ICP...
410 *
411 * If it is an IPI, clear the MFRR and EOI it.
412 */
413 if (xisr == XICS_IPI) {
414 _stbcix(xics_phys + XICS_MFRR, 0xff);
415 _stwcix(xics_phys + XICS_XIRR, xirr);
416 /*
417 * Need to ensure side effects of above stores
418 * complete before proceeding.
419 */
420 smp_mb();
421
422 /*
423 * We need to re-check host IPI now in case it got set in the
424 * meantime. If it's clear, we bounce the interrupt to the
425 * guest
426 */
427 host_ipi = local_paca->kvm_hstate.host_ipi;
428 if (unlikely(host_ipi != 0)) {
429 /* We raced with the host,
430 * we need to resend that IPI, bummer
431 */
432 _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
433 /* Let side effects complete */
434 smp_mb();
435 return 1;
436 }
437
438 /* OK, it's an IPI for us */
439 local_paca->kvm_hstate.saved_xirr = 0;
440 return -1;
441 }
442
443 return kvmppc_check_passthru(xisr, xirr);
444}
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
index e3f738eb1cac..e3f738eb1cac 100644
--- a/arch/powerpc/kernel/hmi.c
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 980d8a6f7284..82ff5de8b1e7 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -10,6 +10,7 @@
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/kvm_host.h> 11#include <linux/kvm_host.h>
12#include <linux/err.h> 12#include <linux/err.h>
13#include <linux/kernel_stat.h>
13 14
14#include <asm/kvm_book3s.h> 15#include <asm/kvm_book3s.h>
15#include <asm/kvm_ppc.h> 16#include <asm/kvm_ppc.h>
@@ -18,7 +19,10 @@
18#include <asm/debug.h> 19#include <asm/debug.h>
19#include <asm/synch.h> 20#include <asm/synch.h>
20#include <asm/cputhreads.h> 21#include <asm/cputhreads.h>
22#include <asm/pgtable.h>
21#include <asm/ppc-opcode.h> 23#include <asm/ppc-opcode.h>
24#include <asm/pnv-pci.h>
25#include <asm/opal.h>
22 26
23#include "book3s_xics.h" 27#include "book3s_xics.h"
24 28
@@ -26,9 +30,12 @@
26 30
27int h_ipi_redirect = 1; 31int h_ipi_redirect = 1;
28EXPORT_SYMBOL(h_ipi_redirect); 32EXPORT_SYMBOL(h_ipi_redirect);
33int kvm_irq_bypass = 1;
34EXPORT_SYMBOL(kvm_irq_bypass);
29 35
30static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 36static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
31 u32 new_irq); 37 u32 new_irq);
38static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu);
32 39
33/* -- ICS routines -- */ 40/* -- ICS routines -- */
34static void ics_rm_check_resend(struct kvmppc_xics *xics, 41static void ics_rm_check_resend(struct kvmppc_xics *xics,
@@ -708,10 +715,123 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
708 icp->rm_action |= XICS_RM_NOTIFY_EOI; 715 icp->rm_action |= XICS_RM_NOTIFY_EOI;
709 icp->rm_eoied_irq = irq; 716 icp->rm_eoied_irq = irq;
710 } 717 }
718
719 if (state->host_irq) {
720 ++vcpu->stat.pthru_all;
721 if (state->intr_cpu != -1) {
722 int pcpu = raw_smp_processor_id();
723
724 pcpu = cpu_first_thread_sibling(pcpu);
725 ++vcpu->stat.pthru_host;
726 if (state->intr_cpu != pcpu) {
727 ++vcpu->stat.pthru_bad_aff;
728 xics_opal_rm_set_server(state->host_irq, pcpu);
729 }
730 state->intr_cpu = -1;
731 }
732 }
711 bail: 733 bail:
712 return check_too_hard(xics, icp); 734 return check_too_hard(xics, icp);
713} 735}
714 736
737unsigned long eoi_rc;
738
739static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
740{
741 unsigned long xics_phys;
742 int64_t rc;
743
744 rc = pnv_opal_pci_msi_eoi(c, hwirq);
745
746 if (rc)
747 eoi_rc = rc;
748
749 iosync();
750
751 /* EOI it */
752 xics_phys = local_paca->kvm_hstate.xics_phys;
753 _stwcix(xics_phys + XICS_XIRR, xirr);
754}
755
756static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
757{
758 unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
759
760 return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
761}
762
763/*
764 * Increment a per-CPU 32-bit unsigned integer variable.
765 * Safe to call in real-mode. Handles vmalloc'ed addresses
766 *
767 * ToDo: Make this work for any integral type
768 */
769
770static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
771{
772 unsigned long l;
773 unsigned int *raddr;
774 int cpu = smp_processor_id();
775
776 raddr = per_cpu_ptr(addr, cpu);
777 l = (unsigned long)raddr;
778
779 if (REGION_ID(l) == VMALLOC_REGION_ID) {
780 l = vmalloc_to_phys(raddr);
781 raddr = (unsigned int *)l;
782 }
783 ++*raddr;
784}
785
786/*
787 * We don't try to update the flags in the irq_desc 'istate' field in
788 * here as would happen in the normal IRQ handling path for several reasons:
789 * - state flags represent internal IRQ state and are not expected to be
790 * updated outside the IRQ subsystem
791 * - more importantly, these are useful for edge triggered interrupts,
792 * IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
793 * and these states shouldn't apply to us.
794 *
795 * However, we do update irq_stats - we somewhat duplicate the code in
796 * kstat_incr_irqs_this_cpu() for this since this function is defined
797 * in irq/internal.h which we don't want to include here.
798 * The only difference is that desc->kstat_irqs is an allocated per CPU
799 * variable and could have been vmalloc'ed, so we can't directly
800 * call __this_cpu_inc() on it. The kstat structure is a static
801 * per CPU variable and it should be accessible by real-mode KVM.
802 *
803 */
804static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
805{
806 this_cpu_inc_rm(desc->kstat_irqs);
807 __this_cpu_inc(kstat.irqs_sum);
808}
809
810long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
811 u32 xirr,
812 struct kvmppc_irq_map *irq_map,
813 struct kvmppc_passthru_irqmap *pimap)
814{
815 struct kvmppc_xics *xics;
816 struct kvmppc_icp *icp;
817 u32 irq;
818
819 irq = irq_map->v_hwirq;
820 xics = vcpu->kvm->arch.xics;
821 icp = vcpu->arch.icp;
822
823 kvmppc_rm_handle_irq_desc(irq_map->desc);
824 icp_rm_deliver_irq(xics, icp, irq);
825
826 /* EOI the interrupt */
827 icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr);
828
829 if (check_too_hard(xics, icp) == H_TOO_HARD)
830 return 2;
831 else
832 return -2;
833}
834
715/* --- Non-real mode XICS-related built-in routines --- */ 835/* --- Non-real mode XICS-related built-in routines --- */
716 836
717/** 837/**
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 975655573844..7cc924b5eea2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -221,6 +221,13 @@ kvmppc_primary_no_guest:
221 li r3, 0 /* Don't wake on privileged (OS) doorbell */ 221 li r3, 0 /* Don't wake on privileged (OS) doorbell */
222 b kvm_do_nap 222 b kvm_do_nap
223 223
224/*
225 * kvm_novcpu_wakeup
226 * Entered from kvm_start_guest if kvm_hstate.napping is set
227 * to NAPPING_NOVCPU
228 * r2 = kernel TOC
229 * r13 = paca
230 */
224kvm_novcpu_wakeup: 231kvm_novcpu_wakeup:
225 ld r1, HSTATE_HOST_R1(r13) 232 ld r1, HSTATE_HOST_R1(r13)
226 ld r5, HSTATE_KVM_VCORE(r13) 233 ld r5, HSTATE_KVM_VCORE(r13)
@@ -230,6 +237,13 @@ kvm_novcpu_wakeup:
230 /* check the wake reason */ 237 /* check the wake reason */
231 bl kvmppc_check_wake_reason 238 bl kvmppc_check_wake_reason
232 239
240 /*
241 * Restore volatile registers since we could have called
242 * a C routine in kvmppc_check_wake_reason.
243 * r5 = VCORE
244 */
245 ld r5, HSTATE_KVM_VCORE(r13)
246
233 /* see if any other thread is already exiting */ 247 /* see if any other thread is already exiting */
234 lwz r0, VCORE_ENTRY_EXIT(r5) 248 lwz r0, VCORE_ENTRY_EXIT(r5)
235 cmpwi r0, 0x100 249 cmpwi r0, 0x100
@@ -322,6 +336,11 @@ kvm_start_guest:
322 336
323 /* Check the wake reason in SRR1 to see why we got here */ 337 /* Check the wake reason in SRR1 to see why we got here */
324 bl kvmppc_check_wake_reason 338 bl kvmppc_check_wake_reason
339 /*
340 * kvmppc_check_wake_reason could invoke a C routine, but we
341 * have no volatile registers to restore when we return.
342 */
343
325 cmpdi r3, 0 344 cmpdi r3, 0
326 bge kvm_no_guest 345 bge kvm_no_guest
327 346
@@ -881,6 +900,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
881 cmpwi r3, 512 /* 1 microsecond */ 900 cmpwi r3, 512 /* 1 microsecond */
882 blt hdec_soon 901 blt hdec_soon
883 902
903deliver_guest_interrupt:
884 ld r6, VCPU_CTR(r4) 904 ld r6, VCPU_CTR(r4)
885 ld r7, VCPU_XER(r4) 905 ld r7, VCPU_XER(r4)
886 906
@@ -895,7 +915,6 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
895 mtspr SPRN_SRR0, r6 915 mtspr SPRN_SRR0, r6
896 mtspr SPRN_SRR1, r7 916 mtspr SPRN_SRR1, r7
897 917
898deliver_guest_interrupt:
899 /* r11 = vcpu->arch.msr & ~MSR_HV */ 918 /* r11 = vcpu->arch.msr & ~MSR_HV */
900 rldicl r11, r11, 63 - MSR_HV_LG, 1 919 rldicl r11, r11, 63 - MSR_HV_LG, 1
901 rotldi r11, r11, 1 + MSR_HV_LG 920 rotldi r11, r11, 1 + MSR_HV_LG
@@ -1155,10 +1174,54 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1155 * set, we know the host wants us out so let's do it now 1174 * set, we know the host wants us out so let's do it now
1156 */ 1175 */
1157 bl kvmppc_read_intr 1176 bl kvmppc_read_intr
1177
1178 /*
1179 * Restore the active volatile registers after returning from
1180 * a C function.
1181 */
1182 ld r9, HSTATE_KVM_VCPU(r13)
1183 li r12, BOOK3S_INTERRUPT_EXTERNAL
1184
1185 /*
1186 * kvmppc_read_intr return codes:
1187 *
1188 * Exit to host (r3 > 0)
1189 * 1 An interrupt is pending that needs to be handled by the host
1190 * Exit guest and return to host by branching to guest_exit_cont
1191 *
1192 * 2 Passthrough that needs completion in the host
1193 * Exit guest and return to host by branching to guest_exit_cont
1194 * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
1195 * to indicate to the host to complete handling the interrupt
1196 *
1197 * Before returning to guest, we check if any CPU is heading out
1198 * to the host and if so, we head out also. If no CPUs are heading
1199 * check return values <= 0.
1200 *
1201 * Return to guest (r3 <= 0)
1202 * 0 No external interrupt is pending
1203 * -1 A guest wakeup IPI (which has now been cleared)
1204 * In either case, we return to guest to deliver any pending
1205 * guest interrupts.
1206 *
1207 * -2 A PCI passthrough external interrupt was handled
1208 * (interrupt was delivered directly to guest)
1209 * Return to guest to deliver any pending guest interrupts.
1210 */
1211
1212 cmpdi r3, 1
1213 ble 1f
1214
1215 /* Return code = 2 */
1216 li r12, BOOK3S_INTERRUPT_HV_RM_HARD
1217 stw r12, VCPU_TRAP(r9)
1218 b guest_exit_cont
1219
12201: /* Return code <= 1 */
1158 cmpdi r3, 0 1221 cmpdi r3, 0
1159 bgt guest_exit_cont 1222 bgt guest_exit_cont
1160 1223
1161 /* Check if any CPU is heading out to the host, if so head out too */ 1224 /* Return code <= 0 */
11624: ld r5, HSTATE_KVM_VCORE(r13) 12254: ld r5, HSTATE_KVM_VCORE(r13)
1163 lwz r0, VCORE_ENTRY_EXIT(r5) 1226 lwz r0, VCORE_ENTRY_EXIT(r5)
1164 cmpwi r0, 0x100 1227 cmpwi r0, 0x100
@@ -2213,10 +2276,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
2213 ld r29, VCPU_GPR(R29)(r4) 2276 ld r29, VCPU_GPR(R29)(r4)
2214 ld r30, VCPU_GPR(R30)(r4) 2277 ld r30, VCPU_GPR(R30)(r4)
2215 ld r31, VCPU_GPR(R31)(r4) 2278 ld r31, VCPU_GPR(R31)(r4)
2216 2279
2217 /* Check the wake reason in SRR1 to see why we got here */ 2280 /* Check the wake reason in SRR1 to see why we got here */
2218 bl kvmppc_check_wake_reason 2281 bl kvmppc_check_wake_reason
2219 2282
2283 /*
2284 * Restore volatile registers since we could have called a
2285 * C routine in kvmppc_check_wake_reason
2286 * r4 = VCPU
2287 * r3 tells us whether we need to return to host or not
2288 * WARNING: it gets checked further down:
2289 * should not modify r3 until this check is done.
2290 */
2291 ld r4, HSTATE_KVM_VCPU(r13)
2292
2220 /* clear our bit in vcore->napping_threads */ 2293 /* clear our bit in vcore->napping_threads */
222134: ld r5,HSTATE_KVM_VCORE(r13) 229434: ld r5,HSTATE_KVM_VCORE(r13)
2222 lbz r7,HSTATE_PTID(r13) 2295 lbz r7,HSTATE_PTID(r13)
@@ -2230,7 +2303,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
2230 li r0,0 2303 li r0,0
2231 stb r0,HSTATE_NAPPING(r13) 2304 stb r0,HSTATE_NAPPING(r13)
2232 2305
2233 /* See if the wake reason means we need to exit */ 2306 /* See if the wake reason saved in r3 means we need to exit */
2234 stw r12, VCPU_TRAP(r4) 2307 stw r12, VCPU_TRAP(r4)
2235 mr r9, r4 2308 mr r9, r4
2236 cmpdi r3, 0 2309 cmpdi r3, 0
@@ -2297,10 +2370,14 @@ machine_check_realmode:
2297 * 0 if nothing needs to be done 2370 * 0 if nothing needs to be done
2298 * 1 if something happened that needs to be handled by the host 2371 * 1 if something happened that needs to be handled by the host
2299 * -1 if there was a guest wakeup (IPI or msgsnd) 2372 * -1 if there was a guest wakeup (IPI or msgsnd)
2373 * -2 if we handled a PCI passthrough interrupt (returned by
2374 * kvmppc_read_intr only)
2300 * 2375 *
2301 * Also sets r12 to the interrupt vector for any interrupt that needs 2376 * Also sets r12 to the interrupt vector for any interrupt that needs
2302 * to be handled now by the host (0x500 for external interrupt), or zero. 2377 * to be handled now by the host (0x500 for external interrupt), or zero.
2303 * Modifies r0, r6, r7, r8. 2378 * Modifies all volatile registers (since it may call a C function).
2379 * This routine calls kvmppc_read_intr, a C function, if an external
2380 * interrupt is pending.
2304 */ 2381 */
2305kvmppc_check_wake_reason: 2382kvmppc_check_wake_reason:
2306 mfspr r6, SPRN_SRR1 2383 mfspr r6, SPRN_SRR1
@@ -2310,8 +2387,7 @@ FTR_SECTION_ELSE
2310 rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */ 2387 rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */
2311ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) 2388ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
2312 cmpwi r6, 8 /* was it an external interrupt? */ 2389 cmpwi r6, 8 /* was it an external interrupt? */
2313 li r12, BOOK3S_INTERRUPT_EXTERNAL 2390 beq 7f /* if so, see what it was */
2314 beq kvmppc_read_intr /* if so, see what it was */
2315 li r3, 0 2391 li r3, 0
2316 li r12, 0 2392 li r12, 0
2317 cmpwi r6, 6 /* was it the decrementer? */ 2393 cmpwi r6, 6 /* was it the decrementer? */
@@ -2350,83 +2426,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2350 li r3, 1 2426 li r3, 1
2351 blr 2427 blr
2352 2428
2353/* 2429 /* external interrupt - create a stack frame so we can call C */
2354 * Determine what sort of external interrupt is pending (if any). 24307: mflr r0
2355 * Returns: 2431 std r0, PPC_LR_STKOFF(r1)
2356 * 0 if no interrupt is pending 2432 stdu r1, -PPC_MIN_STKFRM(r1)
2357 * 1 if an interrupt is pending that needs to be handled by the host 2433 bl kvmppc_read_intr
2358 * -1 if there was a guest wakeup IPI (which has now been cleared) 2434 nop
2359 * Modifies r0, r6, r7, r8, returns value in r3. 2435 li r12, BOOK3S_INTERRUPT_EXTERNAL
2360 */ 2436 cmpdi r3, 1
2361kvmppc_read_intr: 2437 ble 1f
2362 /* see if a host IPI is pending */
2363 li r3, 1
2364 lbz r0, HSTATE_HOST_IPI(r13)
2365 cmpwi r0, 0
2366 bne 1f
2367 2438
2368 /* Now read the interrupt from the ICP */
2369 ld r6, HSTATE_XICS_PHYS(r13)
2370 li r7, XICS_XIRR
2371 cmpdi r6, 0
2372 beq- 1f
2373 lwzcix r0, r6, r7
2374 /* 2439 /*
2375 * Save XIRR for later. Since we get in in reverse endian on LE 2440 * Return code of 2 means PCI passthrough interrupt, but
2376 * systems, save it byte reversed and fetch it back in host endian. 2441 * we need to return back to host to complete handling the
2377 */ 2442 * interrupt. Trap reason is expected in r12 by guest
2378 li r3, HSTATE_SAVED_XIRR 2443 * exit code.
2379 STWX_BE r0, r3, r13
2380#ifdef __LITTLE_ENDIAN__
2381 lwz r3, HSTATE_SAVED_XIRR(r13)
2382#else
2383 mr r3, r0
2384#endif
2385 rlwinm. r3, r3, 0, 0xffffff
2386 sync
2387 beq 1f /* if nothing pending in the ICP */
2388
2389 /* We found something in the ICP...
2390 *
2391 * If it's not an IPI, stash it in the PACA and return to
2392 * the host, we don't (yet) handle directing real external
2393 * interrupts directly to the guest
2394 */ 2444 */
2395 cmpwi r3, XICS_IPI /* if there is, is it an IPI? */ 2445 li r12, BOOK3S_INTERRUPT_HV_RM_HARD
2396 bne 42f 24461:
2397 2447 ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
2398 /* It's an IPI, clear the MFRR and EOI it */ 2448 addi r1, r1, PPC_MIN_STKFRM
2399 li r3, 0xff 2449 mtlr r0
2400 li r8, XICS_MFRR 2450 blr
2401 stbcix r3, r6, r8 /* clear the IPI */
2402 stwcix r0, r6, r7 /* EOI it */
2403 sync
2404
2405 /* We need to re-check host IPI now in case it got set in the
2406 * meantime. If it's clear, we bounce the interrupt to the
2407 * guest
2408 */
2409 lbz r0, HSTATE_HOST_IPI(r13)
2410 cmpwi r0, 0
2411 bne- 43f
2412
2413 /* OK, it's an IPI for us */
2414 li r12, 0
2415 li r3, -1
24161: blr
2417
241842: /* It's not an IPI and it's for the host. We saved a copy of XIRR in
2419 * the PACA earlier, it will be picked up by the host ICP driver
2420 */
2421 li r3, 1
2422 b 1b
2423
242443: /* We raced with the host, we need to resend that IPI, bummer */
2425 li r0, IPI_PRIORITY
2426 stbcix r0, r6, r8 /* set the IPI */
2427 sync
2428 li r3, 1
2429 b 1b
2430 2451
2431/* 2452/*
2432 * Save away FP, VMX and VSX registers. 2453 * Save away FP, VMX and VSX registers.
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 05aa11399a78..3bdc639157c1 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
99 return 0; 99 return 0;
100 } 100 }
101 101
102 /* Record which CPU this arrived on for passed-through interrupts */
103 if (state->host_irq)
104 state->intr_cpu = raw_smp_processor_id();
105
102 /* Attempt delivery */ 106 /* Attempt delivery */
103 icp_deliver_irq(xics, NULL, irq); 107 icp_deliver_irq(xics, NULL, irq);
104 108
@@ -812,7 +816,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
812 return H_SUCCESS; 816 return H_SUCCESS;
813} 817}
814 818
815static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) 819int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
816{ 820{
817 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 821 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
818 struct kvmppc_icp *icp = vcpu->arch.icp; 822 struct kvmppc_icp *icp = vcpu->arch.icp;
@@ -841,6 +845,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
841 845
842 return H_SUCCESS; 846 return H_SUCCESS;
843} 847}
848EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
844 849
845int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) 850int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
846{ 851{
@@ -892,6 +897,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
892 897
893/* -- Initialisation code etc. -- */ 898/* -- Initialisation code etc. -- */
894 899
900static void xics_debugfs_irqmap(struct seq_file *m,
901 struct kvmppc_passthru_irqmap *pimap)
902{
903 int i;
904
905 if (!pimap)
906 return;
907 seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
908 pimap->n_mapped);
909 for (i = 0; i < pimap->n_mapped; i++) {
910 seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
911 pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
912 }
913}
914
895static int xics_debug_show(struct seq_file *m, void *private) 915static int xics_debug_show(struct seq_file *m, void *private)
896{ 916{
897 struct kvmppc_xics *xics = m->private; 917 struct kvmppc_xics *xics = m->private;
@@ -913,6 +933,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
913 t_check_resend = 0; 933 t_check_resend = 0;
914 t_reject = 0; 934 t_reject = 0;
915 935
936 xics_debugfs_irqmap(m, kvm->arch.pimap);
937
916 seq_printf(m, "=========\nICP state\n=========\n"); 938 seq_printf(m, "=========\nICP state\n=========\n");
917 939
918 kvm_for_each_vcpu(i, vcpu, kvm) { 940 kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1252,6 +1274,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1252{ 1274{
1253 struct kvmppc_xics *xics = kvm->arch.xics; 1275 struct kvmppc_xics *xics = kvm->arch.xics;
1254 1276
1277 if (!xics)
1278 return -ENODEV;
1255 return ics_deliver_irq(xics, irq, level); 1279 return ics_deliver_irq(xics, irq, level);
1256} 1280}
1257 1281
@@ -1418,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
1418{ 1442{
1419 return pin; 1443 return pin;
1420} 1444}
1445
1446void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
1447 unsigned long host_irq)
1448{
1449 struct kvmppc_xics *xics = kvm->arch.xics;
1450 struct kvmppc_ics *ics;
1451 u16 idx;
1452
1453 ics = kvmppc_xics_find_ics(xics, irq, &idx);
1454 if (!ics)
1455 return;
1456
1457 ics->irq_state[idx].host_irq = host_irq;
1458 ics->irq_state[idx].intr_cpu = -1;
1459}
1460EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
1461
1462void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
1463 unsigned long host_irq)
1464{
1465 struct kvmppc_xics *xics = kvm->arch.xics;
1466 struct kvmppc_ics *ics;
1467 u16 idx;
1468
1469 ics = kvmppc_xics_find_ics(xics, irq, &idx);
1470 if (!ics)
1471 return;
1472
1473 ics->irq_state[idx].host_irq = 0;
1474}
1475EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index a46b954055c4..2a50320b55ca 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -42,6 +42,8 @@ struct ics_irq_state {
42 u8 lsi; /* level-sensitive interrupt */ 42 u8 lsi; /* level-sensitive interrupt */
43 u8 asserted; /* Only for LSI */ 43 u8 asserted; /* Only for LSI */
44 u8 exists; 44 u8 exists;
45 int intr_cpu;
46 u32 host_irq;
45}; 47};
46 48
47/* Atomic ICP state, updated with a single compare & swap */ 49/* Atomic ICP state, updated with a single compare & swap */
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 29911a07bcdb..ddbf8f0284c0 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -743,7 +743,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
743 char *virt; 743 char *virt;
744 struct page **pages; 744 struct page **pages;
745 struct tlbe_priv *privs[2] = {}; 745 struct tlbe_priv *privs[2] = {};
746 u64 *g2h_bitmap = NULL; 746 u64 *g2h_bitmap;
747 size_t array_len; 747 size_t array_len;
748 u32 sets; 748 u32 sets;
749 int num_pages, ret, i; 749 int num_pages, ret, i;
@@ -779,41 +779,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
779 779
780 num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - 780 num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
781 cfg->array / PAGE_SIZE; 781 cfg->array / PAGE_SIZE;
782 pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); 782 pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
783 if (!pages) 783 if (!pages)
784 return -ENOMEM; 784 return -ENOMEM;
785 785
786 ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); 786 ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
787 if (ret < 0) 787 if (ret < 0)
788 goto err_pages; 788 goto free_pages;
789 789
790 if (ret != num_pages) { 790 if (ret != num_pages) {
791 num_pages = ret; 791 num_pages = ret;
792 ret = -EFAULT; 792 ret = -EFAULT;
793 goto err_put_page; 793 goto put_pages;
794 } 794 }
795 795
796 virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); 796 virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
797 if (!virt) { 797 if (!virt) {
798 ret = -ENOMEM; 798 ret = -ENOMEM;
799 goto err_put_page; 799 goto put_pages;
800 } 800 }
801 801
802 privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], 802 privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL);
803 GFP_KERNEL); 803 if (!privs[0]) {
804 privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], 804 ret = -ENOMEM;
805 GFP_KERNEL); 805 goto put_pages;
806 }
806 807
807 if (!privs[0] || !privs[1]) { 808 privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL);
809 if (!privs[1]) {
808 ret = -ENOMEM; 810 ret = -ENOMEM;
809 goto err_privs; 811 goto free_privs_first;
810 } 812 }
811 813
812 g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], 814 g2h_bitmap = kcalloc(params.tlb_sizes[1],
813 GFP_KERNEL); 815 sizeof(*g2h_bitmap),
816 GFP_KERNEL);
814 if (!g2h_bitmap) { 817 if (!g2h_bitmap) {
815 ret = -ENOMEM; 818 ret = -ENOMEM;
816 goto err_privs; 819 goto free_privs_second;
817 } 820 }
818 821
819 free_gtlb(vcpu_e500); 822 free_gtlb(vcpu_e500);
@@ -845,16 +848,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
845 848
846 kvmppc_recalc_tlb1map_range(vcpu_e500); 849 kvmppc_recalc_tlb1map_range(vcpu_e500);
847 return 0; 850 return 0;
848 851 free_privs_second:
849err_privs:
850 kfree(privs[0]);
851 kfree(privs[1]); 852 kfree(privs[1]);
852 853 free_privs_first:
853err_put_page: 854 kfree(privs[0]);
855 put_pages:
854 for (i = 0; i < num_pages; i++) 856 for (i = 0; i < num_pages; i++)
855 put_page(pages[i]); 857 put_page(pages[i]);
856 858 free_pages:
857err_pages:
858 kfree(pages); 859 kfree(pages);
859 return ret; 860 return ret;
860} 861}
@@ -904,11 +905,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
904int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) 905int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
905{ 906{
906 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; 907 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
907 int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
908 int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
909 908
910 if (e500_mmu_host_init(vcpu_e500)) 909 if (e500_mmu_host_init(vcpu_e500))
911 goto err; 910 goto free_vcpu;
912 911
913 vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; 912 vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
914 vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; 913 vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
@@ -920,37 +919,39 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
920 vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; 919 vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
921 vcpu_e500->gtlb_params[1].sets = 1; 920 vcpu_e500->gtlb_params[1].sets = 1;
922 921
923 vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); 922 vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE +
923 KVM_E500_TLB1_SIZE,
924 sizeof(*vcpu_e500->gtlb_arch),
925 GFP_KERNEL);
924 if (!vcpu_e500->gtlb_arch) 926 if (!vcpu_e500->gtlb_arch)
925 return -ENOMEM; 927 return -ENOMEM;
926 928
927 vcpu_e500->gtlb_offset[0] = 0; 929 vcpu_e500->gtlb_offset[0] = 0;
928 vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; 930 vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
929 931
930 vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * 932 vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries,
931 vcpu_e500->gtlb_params[0].entries, 933 sizeof(struct tlbe_ref),
932 GFP_KERNEL); 934 GFP_KERNEL);
933 if (!vcpu_e500->gtlb_priv[0]) 935 if (!vcpu_e500->gtlb_priv[0])
934 goto err; 936 goto free_vcpu;
935 937
936 vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * 938 vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries,
937 vcpu_e500->gtlb_params[1].entries, 939 sizeof(struct tlbe_ref),
938 GFP_KERNEL); 940 GFP_KERNEL);
939 if (!vcpu_e500->gtlb_priv[1]) 941 if (!vcpu_e500->gtlb_priv[1])
940 goto err; 942 goto free_vcpu;
941 943
942 vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * 944 vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries,
943 vcpu_e500->gtlb_params[1].entries, 945 sizeof(*vcpu_e500->g2h_tlb1_map),
944 GFP_KERNEL); 946 GFP_KERNEL);
945 if (!vcpu_e500->g2h_tlb1_map) 947 if (!vcpu_e500->g2h_tlb1_map)
946 goto err; 948 goto free_vcpu;
947 949
948 vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params); 950 vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
949 951
950 kvmppc_recalc_tlb1map_range(vcpu_e500); 952 kvmppc_recalc_tlb1map_range(vcpu_e500);
951 return 0; 953 return 0;
952 954 free_vcpu:
953err:
954 free_gtlb(vcpu_e500); 955 free_gtlb(vcpu_e500);
955 return -1; 956 return -1;
956} 957}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6ce40dd6fe51..0b7d66422650 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -27,6 +27,8 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/file.h> 28#include <linux/file.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/irqbypass.h>
31#include <linux/kvm_irqfd.h>
30#include <asm/cputable.h> 32#include <asm/cputable.h>
31#include <asm/uaccess.h> 33#include <asm/uaccess.h>
32#include <asm/kvm_ppc.h> 34#include <asm/kvm_ppc.h>
@@ -739,6 +741,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
739#endif 741#endif
740} 742}
741 743
744/*
745 * irq_bypass_add_producer and irq_bypass_del_producer are only
746 * useful if the architecture supports PCI passthrough.
747 * irq_bypass_stop and irq_bypass_start are not needed and so
748 * kvm_ops are not defined for them.
749 */
750bool kvm_arch_has_irq_bypass(void)
751{
752 return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
753 (kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
754}
755
756int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
757 struct irq_bypass_producer *prod)
758{
759 struct kvm_kernel_irqfd *irqfd =
760 container_of(cons, struct kvm_kernel_irqfd, consumer);
761 struct kvm *kvm = irqfd->kvm;
762
763 if (kvm->arch.kvm_ops->irq_bypass_add_producer)
764 return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
765
766 return 0;
767}
768
769void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
770 struct irq_bypass_producer *prod)
771{
772 struct kvm_kernel_irqfd *irqfd =
773 container_of(cons, struct kvm_kernel_irqfd, consumer);
774 struct kvm *kvm = irqfd->kvm;
775
776 if (kvm->arch.kvm_ops->irq_bypass_del_producer)
777 kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
778}
779
742static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 780static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
743 struct kvm_run *run) 781 struct kvm_run *run)
744{ 782{
@@ -1167,6 +1205,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1167 return r; 1205 return r;
1168} 1206}
1169 1207
1208bool kvm_arch_intc_initialized(struct kvm *kvm)
1209{
1210#ifdef CONFIG_KVM_MPIC
1211 if (kvm->arch.mpic)
1212 return true;
1213#endif
1214#ifdef CONFIG_KVM_XICS
1215 if (kvm->arch.xics)
1216 return true;
1217#endif
1218 return false;
1219}
1220
1170int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 1221int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1171 struct kvm_mp_state *mp_state) 1222 struct kvm_mp_state *mp_state)
1172{ 1223{
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 33d9daff5783..fb21990c0fb4 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
432 __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) 432 __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
433); 433);
434 434
435TRACE_EVENT(kvmppc_vcore_wakeup,
436 TP_PROTO(int do_sleep, __u64 ns),
437
438 TP_ARGS(do_sleep, ns),
439
440 TP_STRUCT__entry(
441 __field(__u64, ns)
442 __field(int, waited)
443 __field(pid_t, tgid)
444 ),
445
446 TP_fast_assign(
447 __entry->ns = ns;
448 __entry->waited = do_sleep;
449 __entry->tgid = current->tgid;
450 ),
451
452 TP_printk("%s time %lld ns, tgid=%d",
453 __entry->waited ? "wait" : "poll",
454 __entry->ns, __entry->tgid)
455);
456
435TRACE_EVENT(kvmppc_run_vcpu_enter, 457TRACE_EVENT(kvmppc_run_vcpu_enter,
436 TP_PROTO(struct kvm_vcpu *vcpu), 458 TP_PROTO(struct kvm_vcpu *vcpu),
437 459
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 0e4e9654bd2c..83ddc0e171b0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid,
493} 493}
494#endif 494#endif
495 495
496static inline int __hpte_actual_psize(unsigned int lp, int psize)
497{
498 int i, shift;
499 unsigned int mask;
500
501 /* start from 1 ignoring MMU_PAGE_4K */
502 for (i = 1; i < MMU_PAGE_COUNT; i++) {
503
504 /* invalid penc */
505 if (mmu_psize_defs[psize].penc[i] == -1)
506 continue;
507 /*
508 * encoding bits per actual page size
509 * PTE LP actual page size
510 * rrrr rrrz >=8KB
511 * rrrr rrzz >=16KB
512 * rrrr rzzz >=32KB
513 * rrrr zzzz >=64KB
514 * .......
515 */
516 shift = mmu_psize_defs[i].shift - LP_SHIFT;
517 if (shift > LP_BITS)
518 shift = LP_BITS;
519 mask = (1 << shift) - 1;
520 if ((lp & mask) == mmu_psize_defs[psize].penc[i])
521 return i;
522 }
523 return -1;
524}
525
526static void hpte_decode(struct hash_pte *hpte, unsigned long slot, 496static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
527 int *psize, int *apsize, int *ssize, unsigned long *vpn) 497 int *psize, int *apsize, int *ssize, unsigned long *vpn)
528{ 498{
@@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
538 size = MMU_PAGE_4K; 508 size = MMU_PAGE_4K;
539 a_size = MMU_PAGE_4K; 509 a_size = MMU_PAGE_4K;
540 } else { 510 } else {
541 for (size = 0; size < MMU_PAGE_COUNT; size++) { 511 size = hpte_page_sizes[lp] & 0xf;
542 512 a_size = hpte_page_sizes[lp] >> 4;
543 /* valid entries have a shift value */
544 if (!mmu_psize_defs[size].shift)
545 continue;
546
547 a_size = __hpte_actual_psize(lp, size);
548 if (a_size != -1)
549 break;
550 }
551 } 513 }
552 /* This works for all page sizes, and for 256M and 1T segments */ 514 /* This works for all page sizes, and for 256M and 1T segments */
553 if (cpu_has_feature(CPU_FTR_ARCH_300)) 515 if (cpu_has_feature(CPU_FTR_ARCH_300))
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0821556e16f4..ef3ae891a3db 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -93,6 +93,9 @@ static unsigned long _SDR1;
93struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 93struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
94EXPORT_SYMBOL_GPL(mmu_psize_defs); 94EXPORT_SYMBOL_GPL(mmu_psize_defs);
95 95
96u8 hpte_page_sizes[1 << LP_BITS];
97EXPORT_SYMBOL_GPL(hpte_page_sizes);
98
96struct hash_pte *htab_address; 99struct hash_pte *htab_address;
97unsigned long htab_size_bytes; 100unsigned long htab_size_bytes;
98unsigned long htab_hash_mask; 101unsigned long htab_hash_mask;
@@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void)
564#endif /* CONFIG_HUGETLB_PAGE */ 567#endif /* CONFIG_HUGETLB_PAGE */
565} 568}
566 569
570/*
571 * Fill in the hpte_page_sizes[] array.
572 * We go through the mmu_psize_defs[] array looking for all the
573 * supported base/actual page size combinations. Each combination
574 * has a unique pagesize encoding (penc) value in the low bits of
575 * the LP field of the HPTE. For actual page sizes less than 1MB,
576 * some of the upper LP bits are used for RPN bits, meaning that
577 * we need to fill in several entries in hpte_page_sizes[].
578 *
579 * In diagrammatic form, with r = RPN bits and z = page size bits:
580 * PTE LP actual page size
581 * rrrr rrrz >=8KB
582 * rrrr rrzz >=16KB
583 * rrrr rzzz >=32KB
584 * rrrr zzzz >=64KB
585 * ...
586 *
587 * The zzzz bits are implementation-specific but are chosen so that
588 * no encoding for a larger page size uses the same value in its
589 * low-order N bits as the encoding for the 2^(12+N) byte page size
590 * (if it exists).
591 */
592static void init_hpte_page_sizes(void)
593{
594 long int ap, bp;
595 long int shift, penc;
596
597 for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
598 if (!mmu_psize_defs[bp].shift)
599 continue; /* not a supported page size */
600 for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
601 penc = mmu_psize_defs[bp].penc[ap];
602 if (penc == -1)
603 continue;
604 shift = mmu_psize_defs[ap].shift - LP_SHIFT;
605 if (shift <= 0)
606 continue; /* should never happen */
607 /*
608 * For page sizes less than 1MB, this loop
609 * replicates the entry for all possible values
610 * of the rrrr bits.
611 */
612 while (penc < (1 << LP_BITS)) {
613 hpte_page_sizes[penc] = (ap << 4) | bp;
614 penc += 1 << shift;
615 }
616 }
617 }
618}
619
567static void __init htab_init_page_sizes(void) 620static void __init htab_init_page_sizes(void)
568{ 621{
622 init_hpte_page_sizes();
623
569 if (!debug_pagealloc_enabled()) { 624 if (!debug_pagealloc_enabled()) {
570 /* 625 /*
571 * Pick a size for the linear mapping. Currently, we only 626 * Pick a size for the linear mapping. Currently, we only
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3d29d40eb0e9..44d2d842cee7 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -208,6 +208,7 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
208OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); 208OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
209OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); 209OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
210OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); 210OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
211OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE);
211OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); 212OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
212OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); 213OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
213OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); 214OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index fd9444f9fb0c..9ce48ae55062 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2710,15 +2710,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
2710} 2710}
2711 2711
2712#ifdef CONFIG_PCI_MSI 2712#ifdef CONFIG_PCI_MSI
2713static void pnv_ioda2_msi_eoi(struct irq_data *d) 2713int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
2714{ 2714{
2715 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
2716 struct irq_chip *chip = irq_data_get_irq_chip(d);
2717 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 2715 struct pnv_phb *phb = container_of(chip, struct pnv_phb,
2718 ioda.irq_chip); 2716 ioda.irq_chip);
2717
2718 return opal_pci_msi_eoi(phb->opal_id, hw_irq);
2719}
2720
2721static void pnv_ioda2_msi_eoi(struct irq_data *d)
2722{
2719 int64_t rc; 2723 int64_t rc;
2724 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
2725 struct irq_chip *chip = irq_data_get_irq_chip(d);
2720 2726
2721 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 2727 rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
2722 WARN_ON_ONCE(rc); 2728 WARN_ON_ONCE(rc);
2723 2729
2724 icp_native_eoi(d); 2730 icp_native_eoi(d);
@@ -2748,6 +2754,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
2748 irq_set_chip(virq, &phb->ioda.irq_chip); 2754 irq_set_chip(virq, &phb->ioda.irq_chip);
2749} 2755}
2750 2756
2757/*
2758 * Returns true iff chip is something that we could call
2759 * pnv_opal_pci_msi_eoi for.
2760 */
2761bool is_pnv_opal_msi(struct irq_chip *chip)
2762{
2763 return chip->irq_eoi == pnv_ioda2_msi_eoi;
2764}
2765EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
2766
2751static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 2767static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
2752 unsigned int hwirq, unsigned int virq, 2768 unsigned int hwirq, unsigned int virq,
2753 unsigned int is_64, struct msi_msg *msg) 2769 unsigned int is_64, struct msi_msg *msg)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 876173ca815f..a41faf34b034 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -245,72 +245,72 @@ struct sie_page {
245} __packed; 245} __packed;
246 246
247struct kvm_vcpu_stat { 247struct kvm_vcpu_stat {
248 u32 exit_userspace; 248 u64 exit_userspace;
249 u32 exit_null; 249 u64 exit_null;
250 u32 exit_external_request; 250 u64 exit_external_request;
251 u32 exit_external_interrupt; 251 u64 exit_external_interrupt;
252 u32 exit_stop_request; 252 u64 exit_stop_request;
253 u32 exit_validity; 253 u64 exit_validity;
254 u32 exit_instruction; 254 u64 exit_instruction;
255 u32 exit_pei; 255 u64 exit_pei;
256 u32 halt_successful_poll; 256 u64 halt_successful_poll;
257 u32 halt_attempted_poll; 257 u64 halt_attempted_poll;
258 u32 halt_poll_invalid; 258 u64 halt_poll_invalid;
259 u32 halt_wakeup; 259 u64 halt_wakeup;
260 u32 instruction_lctl; 260 u64 instruction_lctl;
261 u32 instruction_lctlg; 261 u64 instruction_lctlg;
262 u32 instruction_stctl; 262 u64 instruction_stctl;
263 u32 instruction_stctg; 263 u64 instruction_stctg;
264 u32 exit_program_interruption; 264 u64 exit_program_interruption;
265 u32 exit_instr_and_program; 265 u64 exit_instr_and_program;
266 u32 exit_operation_exception; 266 u64 exit_operation_exception;
267 u32 deliver_external_call; 267 u64 deliver_external_call;
268 u32 deliver_emergency_signal; 268 u64 deliver_emergency_signal;
269 u32 deliver_service_signal; 269 u64 deliver_service_signal;
270 u32 deliver_virtio_interrupt; 270 u64 deliver_virtio_interrupt;
271 u32 deliver_stop_signal; 271 u64 deliver_stop_signal;
272 u32 deliver_prefix_signal; 272 u64 deliver_prefix_signal;
273 u32 deliver_restart_signal; 273 u64 deliver_restart_signal;
274 u32 deliver_program_int; 274 u64 deliver_program_int;
275 u32 deliver_io_int; 275 u64 deliver_io_int;
276 u32 exit_wait_state; 276 u64 exit_wait_state;
277 u32 instruction_pfmf; 277 u64 instruction_pfmf;
278 u32 instruction_stidp; 278 u64 instruction_stidp;
279 u32 instruction_spx; 279 u64 instruction_spx;
280 u32 instruction_stpx; 280 u64 instruction_stpx;
281 u32 instruction_stap; 281 u64 instruction_stap;
282 u32 instruction_storage_key; 282 u64 instruction_storage_key;
283 u32 instruction_ipte_interlock; 283 u64 instruction_ipte_interlock;
284 u32 instruction_stsch; 284 u64 instruction_stsch;
285 u32 instruction_chsc; 285 u64 instruction_chsc;
286 u32 instruction_stsi; 286 u64 instruction_stsi;
287 u32 instruction_stfl; 287 u64 instruction_stfl;
288 u32 instruction_tprot; 288 u64 instruction_tprot;
289 u32 instruction_sie; 289 u64 instruction_sie;
290 u32 instruction_essa; 290 u64 instruction_essa;
291 u32 instruction_sthyi; 291 u64 instruction_sthyi;
292 u32 instruction_sigp_sense; 292 u64 instruction_sigp_sense;
293 u32 instruction_sigp_sense_running; 293 u64 instruction_sigp_sense_running;
294 u32 instruction_sigp_external_call; 294 u64 instruction_sigp_external_call;
295 u32 instruction_sigp_emergency; 295 u64 instruction_sigp_emergency;
296 u32 instruction_sigp_cond_emergency; 296 u64 instruction_sigp_cond_emergency;
297 u32 instruction_sigp_start; 297 u64 instruction_sigp_start;
298 u32 instruction_sigp_stop; 298 u64 instruction_sigp_stop;
299 u32 instruction_sigp_stop_store_status; 299 u64 instruction_sigp_stop_store_status;
300 u32 instruction_sigp_store_status; 300 u64 instruction_sigp_store_status;
301 u32 instruction_sigp_store_adtl_status; 301 u64 instruction_sigp_store_adtl_status;
302 u32 instruction_sigp_arch; 302 u64 instruction_sigp_arch;
303 u32 instruction_sigp_prefix; 303 u64 instruction_sigp_prefix;
304 u32 instruction_sigp_restart; 304 u64 instruction_sigp_restart;
305 u32 instruction_sigp_init_cpu_reset; 305 u64 instruction_sigp_init_cpu_reset;
306 u32 instruction_sigp_cpu_reset; 306 u64 instruction_sigp_cpu_reset;
307 u32 instruction_sigp_unknown; 307 u64 instruction_sigp_unknown;
308 u32 diagnose_10; 308 u64 diagnose_10;
309 u32 diagnose_44; 309 u64 diagnose_44;
310 u32 diagnose_9c; 310 u64 diagnose_9c;
311 u32 diagnose_258; 311 u64 diagnose_258;
312 u32 diagnose_308; 312 u64 diagnose_308;
313 u32 diagnose_500; 313 u64 diagnose_500;
314}; 314};
315 315
316#define PGM_OPERATION 0x01 316#define PGM_OPERATION 0x01
@@ -577,7 +577,7 @@ struct kvm_vcpu_arch {
577}; 577};
578 578
579struct kvm_vm_stat { 579struct kvm_vm_stat {
580 u32 remote_tlb_flush; 580 ulong remote_tlb_flush;
581}; 581};
582 582
583struct kvm_arch_memory_slot { 583struct kvm_arch_memory_slot {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4c738c206be3..cd82bf74e7a5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -792,45 +792,45 @@ struct kvm_arch {
792}; 792};
793 793
794struct kvm_vm_stat { 794struct kvm_vm_stat {
795 u32 mmu_shadow_zapped; 795 ulong mmu_shadow_zapped;
796 u32 mmu_pte_write; 796 ulong mmu_pte_write;
797 u32 mmu_pte_updated; 797 ulong mmu_pte_updated;
798 u32 mmu_pde_zapped; 798 ulong mmu_pde_zapped;
799 u32 mmu_flooded; 799 ulong mmu_flooded;
800 u32 mmu_recycled; 800 ulong mmu_recycled;
801 u32 mmu_cache_miss; 801 ulong mmu_cache_miss;
802 u32 mmu_unsync; 802 ulong mmu_unsync;
803 u32 remote_tlb_flush; 803 ulong remote_tlb_flush;
804 u32 lpages; 804 ulong lpages;
805}; 805};
806 806
807struct kvm_vcpu_stat { 807struct kvm_vcpu_stat {
808 u32 pf_fixed; 808 u64 pf_fixed;
809 u32 pf_guest; 809 u64 pf_guest;
810 u32 tlb_flush; 810 u64 tlb_flush;
811 u32 invlpg; 811 u64 invlpg;
812 812
813 u32 exits; 813 u64 exits;
814 u32 io_exits; 814 u64 io_exits;
815 u32 mmio_exits; 815 u64 mmio_exits;
816 u32 signal_exits; 816 u64 signal_exits;
817 u32 irq_window_exits; 817 u64 irq_window_exits;
818 u32 nmi_window_exits; 818 u64 nmi_window_exits;
819 u32 halt_exits; 819 u64 halt_exits;
820 u32 halt_successful_poll; 820 u64 halt_successful_poll;
821 u32 halt_attempted_poll; 821 u64 halt_attempted_poll;
822 u32 halt_poll_invalid; 822 u64 halt_poll_invalid;
823 u32 halt_wakeup; 823 u64 halt_wakeup;
824 u32 request_irq_exits; 824 u64 request_irq_exits;
825 u32 irq_exits; 825 u64 irq_exits;
826 u32 host_state_reload; 826 u64 host_state_reload;
827 u32 efer_reload; 827 u64 efer_reload;
828 u32 fpu_reload; 828 u64 fpu_reload;
829 u32 insn_emulation; 829 u64 insn_emulation;
830 u32 insn_emulation_fail; 830 u64 insn_emulation_fail;
831 u32 hypercalls; 831 u64 hypercalls;
832 u32 irq_injections; 832 u64 irq_injections;
833 u32 nmi_injections; 833 u64 nmi_injections;
834}; 834};
835 835
836struct x86_instruction_info; 836struct x86_instruction_info;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b3fa12ce1166..a00f8e4045cf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3619,7 +3619,7 @@ static int vm_stat_get_per_vm(void *data, u64 *val)
3619{ 3619{
3620 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; 3620 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
3621 3621
3622 *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset); 3622 *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset);
3623 3623
3624 return 0; 3624 return 0;
3625} 3625}
@@ -3649,7 +3649,7 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
3649 *val = 0; 3649 *val = 0;
3650 3650
3651 kvm_for_each_vcpu(i, vcpu, stat_data->kvm) 3651 kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
3652 *val += *(u32 *)((void *)vcpu + stat_data->offset); 3652 *val += *(u64 *)((void *)vcpu + stat_data->offset);
3653 3653
3654 return 0; 3654 return 0;
3655} 3655}