aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2012-12-09 15:44:10 -0500
committerMarcelo Tosatti <mtosatti@redhat.com>2012-12-09 15:44:10 -0500
commitd2ff4fc557a4c5248b2d99b0d48e47a246d994b2 (patch)
treedcb1af3d5cbb0aa9279baeb4d4760ef43d78b5e6
parent8f536b7697a0d40ef6b5fd04cf2c04953d5ca06f (diff)
parent352df1deb2e3c40e65ff94c8d7c62d9144446b1c (diff)
Merge branch 'for-upstream' of https://github.com/agraf/linux-2.6 into queue
* 'for-upstream' of https://github.com/agraf/linux-2.6: (28 commits) KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation KVM: PPC: bookehv: Add guest computation mode for irq delivery KVM: PPC: Make EPCR a valid field for booke64 and bookehv KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation KVM: PPC: e500: Add emulation helper for getting instruction ea KVM: PPC: bookehv64: Add support for interrupt handling KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler KVM: PPC: booke: Fix get_tb() compile error on 64-bit KVM: PPC: e500: Silence bogus GCC warning in tlb code KVM: PPC: Book3S HV: Handle guest-caused machine checks on POWER7 without panicking KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations MAINTAINERS: Add git tree link for PPC KVM KVM: PPC: Book3S PR: MSR_DE doesn't exist on Book 3S KVM: PPC: Book3S PR: Fix VSX handling KVM: PPC: Book3S PR: Emulate PURR, SPURR and DSCR registers KVM: PPC: Book3S HV: Don't give the guest RW access to RO pages KVM: PPC: Book3S HV: Report correct HPT entry index when reading HPT ...
-rw-r--r--Documentation/virtual/kvm/api.txt55
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h10
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h31
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h25
-rw-r--r--arch/powerpc/include/asm/kvm_host.h13
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h23
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h2
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h10
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h27
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Makefile5
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c407
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c16
-rw-r--r--arch/powerpc/kvm/book3s_exports.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv.c32
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c144
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c139
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S131
-rw-r--r--arch/powerpc/kvm/book3s_pr.c114
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S3
-rw-r--r--arch/powerpc/kvm/booke.c36
-rw-r--r--arch/powerpc/kvm/booke.h1
-rw-r--r--arch/powerpc/kvm/booke_emulate.c14
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S145
-rw-r--r--arch/powerpc/kvm/e500.h8
-rw-r--r--arch/powerpc/kvm/e500_emulate.c14
-rw-r--r--arch/powerpc/kvm/e500_tlb.c38
-rw-r--r--arch/powerpc/kvm/powerpc.c34
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--include/uapi/linux/kvm.h3
-rw-r--r--virt/kvm/eventfd.c6
34 files changed, 1279 insertions, 229 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6671fdc0afb1..a4df5535996b 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1773,6 +1773,7 @@ registers, find a list below:
1773 PPC | KVM_REG_PPC_VPA_ADDR | 64 1773 PPC | KVM_REG_PPC_VPA_ADDR | 64
1774 PPC | KVM_REG_PPC_VPA_SLB | 128 1774 PPC | KVM_REG_PPC_VPA_SLB | 128
1775 PPC | KVM_REG_PPC_VPA_DTL | 128 1775 PPC | KVM_REG_PPC_VPA_DTL | 128
1776 PPC | KVM_REG_PPC_EPCR | 32
1776 1777
17774.69 KVM_GET_ONE_REG 17784.69 KVM_GET_ONE_REG
1778 1779
@@ -2071,6 +2072,60 @@ KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
2071 2072
2072Note that the vcpu ioctl is asynchronous to vcpu execution. 2073Note that the vcpu ioctl is asynchronous to vcpu execution.
2073 2074
20754.78 KVM_PPC_GET_HTAB_FD
2076
2077Capability: KVM_CAP_PPC_HTAB_FD
2078Architectures: powerpc
2079Type: vm ioctl
2080Parameters: Pointer to struct kvm_get_htab_fd (in)
2081Returns: file descriptor number (>= 0) on success, -1 on error
2082
2083This returns a file descriptor that can be used either to read out the
2084entries in the guest's hashed page table (HPT), or to write entries to
2085initialize the HPT. The returned fd can only be written to if the
2086KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
2087can only be read if that bit is clear. The argument struct looks like
2088this:
2089
2090/* For KVM_PPC_GET_HTAB_FD */
2091struct kvm_get_htab_fd {
2092 __u64 flags;
2093 __u64 start_index;
2094 __u64 reserved[2];
2095};
2096
2097/* Values for kvm_get_htab_fd.flags */
2098#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
2099#define KVM_GET_HTAB_WRITE ((__u64)0x2)
2100
2101The `start_index' field gives the index in the HPT of the entry at
2102which to start reading. It is ignored when writing.
2103
2104Reads on the fd will initially supply information about all
2105"interesting" HPT entries. Interesting entries are those with the
2106bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise
2107all entries. When the end of the HPT is reached, the read() will
2108return. If read() is called again on the fd, it will start again from
2109the beginning of the HPT, but will only return HPT entries that have
2110changed since they were last read.
2111
2112Data read or written is structured as a header (8 bytes) followed by a
2113series of valid HPT entries (16 bytes) each. The header indicates how
2114many valid HPT entries there are and how many invalid entries follow
2115the valid entries. The invalid entries are not represented explicitly
2116in the stream. The header format is:
2117
2118struct kvm_get_htab_header {
2119 __u32 index;
2120 __u16 n_valid;
2121 __u16 n_invalid;
2122};
2123
2124Writes to the fd create HPT entries starting at the index given in the
2125header; first `n_valid' valid entries with contents from the data
2126written, then `n_invalid' invalid entries, invalidating any previously
2127valid entries found.
2128
2074 2129
20755. The kvm_run structure 21305. The kvm_run structure
2076------------------------ 2131------------------------
diff --git a/MAINTAINERS b/MAINTAINERS
index 4376c528f75b..703446720a26 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4253,6 +4253,7 @@ KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
4253M: Alexander Graf <agraf@suse.de> 4253M: Alexander Graf <agraf@suse.de>
4254L: kvm-ppc@vger.kernel.org 4254L: kvm-ppc@vger.kernel.org
4255W: http://kvm.qumranet.com 4255W: http://kvm.qumranet.com
4256T: git git://github.com/agraf/linux-2.6.git
4256S: Supported 4257S: Supported
4257F: arch/powerpc/include/asm/kvm* 4258F: arch/powerpc/include/asm/kvm*
4258F: arch/powerpc/kvm/ 4259F: arch/powerpc/kvm/
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 36fcf4190461..5a56e1c5f851 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -81,6 +81,8 @@ struct kvmppc_vcpu_book3s {
81 u64 sdr1; 81 u64 sdr1;
82 u64 hior; 82 u64 hior;
83 u64 msr_mask; 83 u64 msr_mask;
84 u64 purr_offset;
85 u64 spurr_offset;
84#ifdef CONFIG_PPC_BOOK3S_32 86#ifdef CONFIG_PPC_BOOK3S_32
85 u32 vsid_pool[VSID_POOL_SIZE]; 87 u32 vsid_pool[VSID_POOL_SIZE];
86 u32 vsid_next; 88 u32 vsid_next;
@@ -157,8 +159,12 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
157extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); 159extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
158extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 160extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
159 long pte_index, unsigned long pteh, unsigned long ptel); 161 long pte_index, unsigned long pteh, unsigned long ptel);
160extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 162extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
161 long pte_index, unsigned long pteh, unsigned long ptel); 163 long pte_index, unsigned long pteh, unsigned long ptel,
164 pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
165extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
166 unsigned long pte_index, unsigned long avpn,
167 unsigned long *hpret);
162extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, 168extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
163 struct kvm_memory_slot *memslot, unsigned long *map); 169 struct kvm_memory_slot *memslot, unsigned long *map);
164 170
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 1472a5b4e4e3..38bec1dc9928 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -50,6 +50,15 @@ extern int kvm_hpt_order; /* order of preallocated HPTs */
50#define HPTE_V_HVLOCK 0x40UL 50#define HPTE_V_HVLOCK 0x40UL
51#define HPTE_V_ABSENT 0x20UL 51#define HPTE_V_ABSENT 0x20UL
52 52
53/*
54 * We use this bit in the guest_rpte field of the revmap entry
55 * to indicate a modified HPTE.
56 */
57#define HPTE_GR_MODIFIED (1ul << 62)
58
59/* These bits are reserved in the guest view of the HPTE */
60#define HPTE_GR_RESERVED HPTE_GR_MODIFIED
61
53static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) 62static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
54{ 63{
55 unsigned long tmp, old; 64 unsigned long tmp, old;
@@ -237,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
237 return !(memslot->base_gfn & mask) && !(memslot->npages & mask); 246 return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
238} 247}
239 248
249/*
250 * This works for 4k, 64k and 16M pages on POWER7,
251 * and 4k and 16M pages on PPC970.
252 */
253static inline unsigned long slb_pgsize_encoding(unsigned long psize)
254{
255 unsigned long senc = 0;
256
257 if (psize > 0x1000) {
258 senc = SLB_VSID_L;
259 if (psize == 0x10000)
260 senc |= SLB_VSID_LP_01;
261 }
262 return senc;
263}
264
265static inline int is_vrma_hpte(unsigned long hpte_v)
266{
267 return (hpte_v & ~0xffffffUL) ==
268 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
269}
270
240#endif /* __ASM_KVM_BOOK3S_64_H__ */ 271#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index a37a12a9a7d7..3a79f5325712 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -17,6 +17,7 @@
17 * there are no exceptions for which we fall through directly to 17 * there are no exceptions for which we fall through directly to
18 * the normal host handler. 18 * the normal host handler.
19 * 19 *
20 * 32-bit host
20 * Expected inputs (normal exceptions): 21 * Expected inputs (normal exceptions):
21 * SCRATCH0 = saved r10 22 * SCRATCH0 = saved r10
22 * r10 = thread struct 23 * r10 = thread struct
@@ -33,6 +34,30 @@
33 * *(r8 + GPR9) = saved r9 34 * *(r8 + GPR9) = saved r9
34 * *(r8 + GPR10) = saved r10 (r10 not yet clobbered) 35 * *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
35 * *(r8 + GPR11) = saved r11 36 * *(r8 + GPR11) = saved r11
37 *
38 * 64-bit host
39 * Expected inputs (GEN/GDBELL/DBG/MC exception types):
40 * r10 = saved CR
41 * r13 = PACA_POINTER
42 * *(r13 + PACA_EX##type + EX_R10) = saved r10
43 * *(r13 + PACA_EX##type + EX_R11) = saved r11
44 * SPRN_SPRG_##type##_SCRATCH = saved r13
45 *
46 * Expected inputs (CRIT exception type):
47 * r10 = saved CR
48 * r13 = PACA_POINTER
49 * *(r13 + PACA_EX##type + EX_R10) = saved r10
50 * *(r13 + PACA_EX##type + EX_R11) = saved r11
51 * *(r13 + PACA_EX##type + EX_R13) = saved r13
52 *
53 * Expected inputs (TLB exception type):
54 * r10 = saved CR
55 * r13 = PACA_POINTER
56 * *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10
57 * *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11
58 * SPRN_SPRG_GEN_SCRATCH = saved r13
59 *
60 * Only the bolted version of TLB miss exception handlers is supported now.
36 */ 61 */
37.macro DO_KVM intno srr1 62.macro DO_KVM intno srr1
38#ifdef CONFIG_KVM_BOOKE_HV 63#ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3093896015f0..ca9bf459db6a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -246,10 +246,12 @@ struct kvm_arch {
246 int using_mmu_notifiers; 246 int using_mmu_notifiers;
247 u32 hpt_order; 247 u32 hpt_order;
248 atomic_t vcpus_running; 248 atomic_t vcpus_running;
249 u32 online_vcores;
249 unsigned long hpt_npte; 250 unsigned long hpt_npte;
250 unsigned long hpt_mask; 251 unsigned long hpt_mask;
252 atomic_t hpte_mod_interest;
251 spinlock_t slot_phys_lock; 253 spinlock_t slot_phys_lock;
252 unsigned short last_vcpu[NR_CPUS]; 254 cpumask_t need_tlb_flush;
253 struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; 255 struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
254 struct kvmppc_linear_info *hpt_li; 256 struct kvmppc_linear_info *hpt_li;
255#endif /* CONFIG_KVM_BOOK3S_64_HV */ 257#endif /* CONFIG_KVM_BOOK3S_64_HV */
@@ -274,6 +276,7 @@ struct kvmppc_vcore {
274 int nap_count; 276 int nap_count;
275 int napping_threads; 277 int napping_threads;
276 u16 pcpu; 278 u16 pcpu;
279 u16 last_cpu;
277 u8 vcore_state; 280 u8 vcore_state;
278 u8 in_guest; 281 u8 in_guest;
279 struct list_head runnable_threads; 282 struct list_head runnable_threads;
@@ -403,13 +406,18 @@ struct kvm_vcpu_arch {
403 u32 host_mas4; 406 u32 host_mas4;
404 u32 host_mas6; 407 u32 host_mas6;
405 u32 shadow_epcr; 408 u32 shadow_epcr;
406 u32 epcr;
407 u32 shadow_msrp; 409 u32 shadow_msrp;
408 u32 eplc; 410 u32 eplc;
409 u32 epsc; 411 u32 epsc;
410 u32 oldpir; 412 u32 oldpir;
411#endif 413#endif
412 414
415#if defined(CONFIG_BOOKE)
416#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
417 u32 epcr;
418#endif
419#endif
420
413#ifdef CONFIG_PPC_BOOK3S 421#ifdef CONFIG_PPC_BOOK3S
414 /* For Gekko paired singles */ 422 /* For Gekko paired singles */
415 u32 qpr[32]; 423 u32 qpr[32];
@@ -522,7 +530,6 @@ struct kvm_vcpu_arch {
522 u64 dec_jiffies; 530 u64 dec_jiffies;
523 u64 dec_expires; 531 u64 dec_expires;
524 unsigned long pending_exceptions; 532 unsigned long pending_exceptions;
525 u16 last_cpu;
526 u8 ceded; 533 u8 ceded;
527 u8 prodded; 534 u8 prodded;
528 u32 last_inst; 535 u32 last_inst;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 609cca3e9426..572aa7530619 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void);
164 164
165extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); 165extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
166 166
167extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
168
167/* 169/*
168 * Cuts out inst bits with ordering according to spec. 170 * Cuts out inst bits with ordering according to spec.
169 * That means the leftmost bit is zero. All given bits are included. 171 * That means the leftmost bit is zero. All given bits are included.
@@ -293,4 +295,25 @@ static inline void kvmppc_lazy_ee_enable(void)
293#endif 295#endif
294} 296}
295 297
298static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
299{
300 ulong ea;
301 ulong msr_64bit = 0;
302
303 ea = kvmppc_get_gpr(vcpu, rb);
304 if (ra)
305 ea += kvmppc_get_gpr(vcpu, ra);
306
307#if defined(CONFIG_PPC_BOOK3E_64)
308 msr_64bit = MSR_CM;
309#elif defined(CONFIG_PPC_BOOK3S_64)
310 msr_64bit = MSR_SF;
311#endif
312
313 if (!(vcpu->arch.shared->msr & msr_64bit))
314 ea = (uint32_t)ea;
315
316 return ea;
317}
318
296#endif /* __POWERPC_KVM_PPC_H__ */ 319#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index eeabcdbc30f7..99d43e0c1e4a 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -59,7 +59,7 @@
59#define MAS1_TSIZE_SHIFT 7 59#define MAS1_TSIZE_SHIFT 7
60#define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK) 60#define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
61 61
62#define MAS2_EPN 0xFFFFF000 62#define MAS2_EPN (~0xFFFUL)
63#define MAS2_X0 0x00000040 63#define MAS2_X0 0x00000040
64#define MAS2_X1 0x00000020 64#define MAS2_X1 0x00000020
65#define MAS2_W 0x00000010 65#define MAS2_W 0x00000010
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 9673f73eb8db..2fdb47a19efd 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -121,6 +121,16 @@ extern char initial_stab[];
121#define PP_RXRX 3 /* Supervisor read, User read */ 121#define PP_RXRX 3 /* Supervisor read, User read */
122#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */ 122#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
123 123
124/* Fields for tlbiel instruction in architecture 2.06 */
125#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
126#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
127#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
128#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
129#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
130#define TLBIEL_INVAL_SET_SHIFT 12
131
132#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
133
124#ifndef __ASSEMBLY__ 134#ifndef __ASSEMBLY__
125 135
126struct hash_pte { 136struct hash_pte {
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d24c14163966..97d37278ea2d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -518,6 +518,7 @@
518#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ 518#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
519#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ 519#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
520#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ 520#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
521#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
521#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ 522#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
522#define SRR1_PROGTRAP 0x00020000 /* Trap */ 523#define SRR1_PROGTRAP 0x00020000 /* Trap */
523#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ 524#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b89ae4db45ce..2fba8a66fb10 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params {
331 __u32 reserved[8]; 331 __u32 reserved[8];
332}; 332};
333 333
334/* For KVM_PPC_GET_HTAB_FD */
335struct kvm_get_htab_fd {
336 __u64 flags;
337 __u64 start_index;
338 __u64 reserved[2];
339};
340
341/* Values for kvm_get_htab_fd.flags */
342#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
343#define KVM_GET_HTAB_WRITE ((__u64)0x2)
344
345/*
346 * Data read on the file descriptor is formatted as a series of
347 * records, each consisting of a header followed by a series of
348 * `n_valid' HPTEs (16 bytes each), which are all valid. Following
349 * those valid HPTEs there are `n_invalid' invalid HPTEs, which
350 * are not represented explicitly in the stream. The same format
351 * is used for writing.
352 */
353struct kvm_get_htab_header {
354 __u32 index;
355 __u16 n_valid;
356 __u16 n_invalid;
357};
358
334#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) 359#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
335#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) 360#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
336#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) 361#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
@@ -386,4 +411,6 @@ struct kvm_book3e_206_tlb_params {
386#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83) 411#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
387#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) 412#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
388 413
414#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
415
389#endif /* __LINUX_KVM_POWERPC_H */ 416#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7523539cfe9f..4e23ba2f3ca7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -441,8 +441,7 @@ int main(void)
441 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); 441 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
442 DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); 442 DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
443 DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); 443 DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
444 DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter)); 444 DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
445 DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
446 DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); 445 DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
447 DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); 446 DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
448 DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); 447 DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
@@ -470,7 +469,6 @@ int main(void)
470 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); 469 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
471 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); 470 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
472 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); 471 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
473 DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
474 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); 472 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
475 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); 473 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
476 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 474 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 71f0cd9edf33..4730c953f435 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
20 bool 20 bool
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select HAVE_KVM_EVENTFD
23 24
24config KVM_BOOK3S_HANDLER 25config KVM_BOOK3S_HANDLER
25 bool 26 bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index c2a08636e6d4..1e473d46322c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm 7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
8 8
9common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) 9common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
10 eventfd.o)
10 11
11CFLAGS_44x_tlb.o := -I. 12CFLAGS_44x_tlb.o := -I.
12CFLAGS_e500_tlb.o := -I. 13CFLAGS_e500_tlb.o := -I.
@@ -72,10 +73,12 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
72 book3s_hv_rmhandlers.o \ 73 book3s_hv_rmhandlers.o \
73 book3s_hv_rm_mmu.o \ 74 book3s_hv_rm_mmu.o \
74 book3s_64_vio_hv.o \ 75 book3s_64_vio_hv.o \
76 book3s_hv_ras.o \
75 book3s_hv_builtin.o 77 book3s_hv_builtin.o
76 78
77kvm-book3s_64-module-objs := \ 79kvm-book3s_64-module-objs := \
78 ../../../virt/kvm/kvm_main.o \ 80 ../../../virt/kvm/kvm_main.o \
81 ../../../virt/kvm/eventfd.o \
79 powerpc.o \ 82 powerpc.o \
80 emulate.o \ 83 emulate.o \
81 book3s.o \ 84 book3s.o \
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2a89a36e7263..8cc18abd6dde 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -25,6 +25,8 @@
25#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
26#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
27#include <linux/srcu.h> 27#include <linux/srcu.h>
28#include <linux/anon_inodes.h>
29#include <linux/file.h>
28 30
29#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
30#include <asm/kvm_ppc.h> 32#include <asm/kvm_ppc.h>
@@ -41,6 +43,11 @@
41/* Power architecture requires HPT is at least 256kB */ 43/* Power architecture requires HPT is at least 256kB */
42#define PPC_MIN_HPT_ORDER 18 44#define PPC_MIN_HPT_ORDER 18
43 45
46static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
47 long pte_index, unsigned long pteh,
48 unsigned long ptel, unsigned long *pte_idx_ret);
49static void kvmppc_rmap_reset(struct kvm *kvm);
50
44long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 51long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
45{ 52{
46 unsigned long hpt; 53 unsigned long hpt;
@@ -138,10 +145,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
138 /* Set the entire HPT to 0, i.e. invalid HPTEs */ 145 /* Set the entire HPT to 0, i.e. invalid HPTEs */
139 memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); 146 memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
140 /* 147 /*
141 * Set the whole last_vcpu array to an invalid vcpu number. 148 * Reset all the reverse-mapping chains for all memslots
142 * This ensures that each vcpu will flush its TLB on next entry.
143 */ 149 */
144 memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu)); 150 kvmppc_rmap_reset(kvm);
151 /* Ensure that each vcpu will flush its TLB on next entry. */
152 cpumask_setall(&kvm->arch.need_tlb_flush);
145 *htab_orderp = order; 153 *htab_orderp = order;
146 err = 0; 154 err = 0;
147 } else { 155 } else {
@@ -185,6 +193,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
185 unsigned long addr, hash; 193 unsigned long addr, hash;
186 unsigned long psize; 194 unsigned long psize;
187 unsigned long hp0, hp1; 195 unsigned long hp0, hp1;
196 unsigned long idx_ret;
188 long ret; 197 long ret;
189 struct kvm *kvm = vcpu->kvm; 198 struct kvm *kvm = vcpu->kvm;
190 199
@@ -216,7 +225,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
216 hash = (hash << 3) + 7; 225 hash = (hash << 3) + 7;
217 hp_v = hp0 | ((addr >> 16) & ~0x7fUL); 226 hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
218 hp_r = hp1 | addr; 227 hp_r = hp1 | addr;
219 ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); 228 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
229 &idx_ret);
220 if (ret != H_SUCCESS) { 230 if (ret != H_SUCCESS) {
221 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", 231 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
222 addr, ret); 232 addr, ret);
@@ -354,15 +364,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
354 return err; 364 return err;
355} 365}
356 366
357/* 367long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
358 * We come here on a H_ENTER call from the guest when we are not 368 long pte_index, unsigned long pteh,
359 * using mmu notifiers and we don't have the requested page pinned 369 unsigned long ptel, unsigned long *pte_idx_ret)
360 * already.
361 */
362long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
363 long pte_index, unsigned long pteh, unsigned long ptel)
364{ 370{
365 struct kvm *kvm = vcpu->kvm;
366 unsigned long psize, gpa, gfn; 371 unsigned long psize, gpa, gfn;
367 struct kvm_memory_slot *memslot; 372 struct kvm_memory_slot *memslot;
368 long ret; 373 long ret;
@@ -390,8 +395,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
390 do_insert: 395 do_insert:
391 /* Protect linux PTE lookup from page table destruction */ 396 /* Protect linux PTE lookup from page table destruction */
392 rcu_read_lock_sched(); /* this disables preemption too */ 397 rcu_read_lock_sched(); /* this disables preemption too */
393 vcpu->arch.pgdir = current->mm->pgd; 398 ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
394 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); 399 current->mm->pgd, false, pte_idx_ret);
395 rcu_read_unlock_sched(); 400 rcu_read_unlock_sched();
396 if (ret == H_TOO_HARD) { 401 if (ret == H_TOO_HARD) {
397 /* this can't happen */ 402 /* this can't happen */
@@ -402,6 +407,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
402 407
403} 408}
404 409
410/*
411 * We come here on a H_ENTER call from the guest when we are not
412 * using mmu notifiers and we don't have the requested page pinned
413 * already.
414 */
415long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
416 long pte_index, unsigned long pteh,
417 unsigned long ptel)
418{
419 return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
420 pteh, ptel, &vcpu->arch.gpr[4]);
421}
422
405static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, 423static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
406 gva_t eaddr) 424 gva_t eaddr)
407{ 425{
@@ -756,6 +774,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
756 goto out_put; 774 goto out_put;
757} 775}
758 776
777static void kvmppc_rmap_reset(struct kvm *kvm)
778{
779 struct kvm_memslots *slots;
780 struct kvm_memory_slot *memslot;
781 int srcu_idx;
782
783 srcu_idx = srcu_read_lock(&kvm->srcu);
784 slots = kvm->memslots;
785 kvm_for_each_memslot(memslot, slots) {
786 /*
787 * This assumes it is acceptable to lose reference and
788 * change bits across a reset.
789 */
790 memset(memslot->arch.rmap, 0,
791 memslot->npages * sizeof(*memslot->arch.rmap));
792 }
793 srcu_read_unlock(&kvm->srcu, srcu_idx);
794}
795
759static int kvm_handle_hva_range(struct kvm *kvm, 796static int kvm_handle_hva_range(struct kvm *kvm,
760 unsigned long start, 797 unsigned long start,
761 unsigned long end, 798 unsigned long end,
@@ -1131,6 +1168,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
1131 put_page(page); 1168 put_page(page);
1132} 1169}
1133 1170
1171/*
1172 * Functions for reading and writing the hash table via reads and
1173 * writes on a file descriptor.
1174 *
1175 * Reads return the guest view of the hash table, which has to be
1176 * pieced together from the real hash table and the guest_rpte
1177 * values in the revmap array.
1178 *
1179 * On writes, each HPTE written is considered in turn, and if it
1180 * is valid, it is written to the HPT as if an H_ENTER with the
1181 * exact flag set was done. When the invalid count is non-zero
1182 * in the header written to the stream, the kernel will make
1183 * sure that that many HPTEs are invalid, and invalidate them
1184 * if not.
1185 */
1186
1187struct kvm_htab_ctx {
1188 unsigned long index;
1189 unsigned long flags;
1190 struct kvm *kvm;
1191 int first_pass;
1192};
1193
1194#define HPTE_SIZE (2 * sizeof(unsigned long))
1195
1196static long record_hpte(unsigned long flags, unsigned long *hptp,
1197 unsigned long *hpte, struct revmap_entry *revp,
1198 int want_valid, int first_pass)
1199{
1200 unsigned long v, r;
1201 int ok = 1;
1202 int valid, dirty;
1203
1204 /* Unmodified entries are uninteresting except on the first pass */
1205 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1206 if (!first_pass && !dirty)
1207 return 0;
1208
1209 valid = 0;
1210 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
1211 valid = 1;
1212 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
1213 !(hptp[0] & HPTE_V_BOLTED))
1214 valid = 0;
1215 }
1216 if (valid != want_valid)
1217 return 0;
1218
1219 v = r = 0;
1220 if (valid || dirty) {
1221 /* lock the HPTE so it's stable and read it */
1222 preempt_disable();
1223 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1224 cpu_relax();
1225 v = hptp[0];
1226 if (v & HPTE_V_ABSENT) {
1227 v &= ~HPTE_V_ABSENT;
1228 v |= HPTE_V_VALID;
1229 }
1230 /* re-evaluate valid and dirty from synchronized HPTE value */
1231 valid = !!(v & HPTE_V_VALID);
1232 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
1233 valid = 0;
1234 r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
1235 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1236 /* only clear modified if this is the right sort of entry */
1237 if (valid == want_valid && dirty) {
1238 r &= ~HPTE_GR_MODIFIED;
1239 revp->guest_rpte = r;
1240 }
1241 asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
1242 hptp[0] &= ~HPTE_V_HVLOCK;
1243 preempt_enable();
1244 if (!(valid == want_valid && (first_pass || dirty)))
1245 ok = 0;
1246 }
1247 hpte[0] = v;
1248 hpte[1] = r;
1249 return ok;
1250}
1251
1252static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1253 size_t count, loff_t *ppos)
1254{
1255 struct kvm_htab_ctx *ctx = file->private_data;
1256 struct kvm *kvm = ctx->kvm;
1257 struct kvm_get_htab_header hdr;
1258 unsigned long *hptp;
1259 struct revmap_entry *revp;
1260 unsigned long i, nb, nw;
1261 unsigned long __user *lbuf;
1262 struct kvm_get_htab_header __user *hptr;
1263 unsigned long flags;
1264 int first_pass;
1265 unsigned long hpte[2];
1266
1267 if (!access_ok(VERIFY_WRITE, buf, count))
1268 return -EFAULT;
1269
1270 first_pass = ctx->first_pass;
1271 flags = ctx->flags;
1272
1273 i = ctx->index;
1274 hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
1275 revp = kvm->arch.revmap + i;
1276 lbuf = (unsigned long __user *)buf;
1277
1278 nb = 0;
1279 while (nb + sizeof(hdr) + HPTE_SIZE < count) {
1280 /* Initialize header */
1281 hptr = (struct kvm_get_htab_header __user *)buf;
1282 hdr.n_valid = 0;
1283 hdr.n_invalid = 0;
1284 nw = nb;
1285 nb += sizeof(hdr);
1286 lbuf = (unsigned long __user *)(buf + sizeof(hdr));
1287
1288 /* Skip uninteresting entries, i.e. clean on not-first pass */
1289 if (!first_pass) {
1290 while (i < kvm->arch.hpt_npte &&
1291 !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
1292 ++i;
1293 hptp += 2;
1294 ++revp;
1295 }
1296 }
1297 hdr.index = i;
1298
1299 /* Grab a series of valid entries */
1300 while (i < kvm->arch.hpt_npte &&
1301 hdr.n_valid < 0xffff &&
1302 nb + HPTE_SIZE < count &&
1303 record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
1304 /* valid entry, write it out */
1305 ++hdr.n_valid;
1306 if (__put_user(hpte[0], lbuf) ||
1307 __put_user(hpte[1], lbuf + 1))
1308 return -EFAULT;
1309 nb += HPTE_SIZE;
1310 lbuf += 2;
1311 ++i;
1312 hptp += 2;
1313 ++revp;
1314 }
1315 /* Now skip invalid entries while we can */
1316 while (i < kvm->arch.hpt_npte &&
1317 hdr.n_invalid < 0xffff &&
1318 record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
1319 /* found an invalid entry */
1320 ++hdr.n_invalid;
1321 ++i;
1322 hptp += 2;
1323 ++revp;
1324 }
1325
1326 if (hdr.n_valid || hdr.n_invalid) {
1327 /* write back the header */
1328 if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
1329 return -EFAULT;
1330 nw = nb;
1331 buf = (char __user *)lbuf;
1332 } else {
1333 nb = nw;
1334 }
1335
1336 /* Check if we've wrapped around the hash table */
1337 if (i >= kvm->arch.hpt_npte) {
1338 i = 0;
1339 ctx->first_pass = 0;
1340 break;
1341 }
1342 }
1343
1344 ctx->index = i;
1345
1346 return nb;
1347}
1348
1349static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1350 size_t count, loff_t *ppos)
1351{
1352 struct kvm_htab_ctx *ctx = file->private_data;
1353 struct kvm *kvm = ctx->kvm;
1354 struct kvm_get_htab_header hdr;
1355 unsigned long i, j;
1356 unsigned long v, r;
1357 unsigned long __user *lbuf;
1358 unsigned long *hptp;
1359 unsigned long tmp[2];
1360 ssize_t nb;
1361 long int err, ret;
1362 int rma_setup;
1363
1364 if (!access_ok(VERIFY_READ, buf, count))
1365 return -EFAULT;
1366
1367 /* lock out vcpus from running while we're doing this */
1368 mutex_lock(&kvm->lock);
1369 rma_setup = kvm->arch.rma_setup_done;
1370 if (rma_setup) {
1371 kvm->arch.rma_setup_done = 0; /* temporarily */
1372 /* order rma_setup_done vs. vcpus_running */
1373 smp_mb();
1374 if (atomic_read(&kvm->arch.vcpus_running)) {
1375 kvm->arch.rma_setup_done = 1;
1376 mutex_unlock(&kvm->lock);
1377 return -EBUSY;
1378 }
1379 }
1380
1381 err = 0;
1382 for (nb = 0; nb + sizeof(hdr) <= count; ) {
1383 err = -EFAULT;
1384 if (__copy_from_user(&hdr, buf, sizeof(hdr)))
1385 break;
1386
1387 err = 0;
1388 if (nb + hdr.n_valid * HPTE_SIZE > count)
1389 break;
1390
1391 nb += sizeof(hdr);
1392 buf += sizeof(hdr);
1393
1394 err = -EINVAL;
1395 i = hdr.index;
1396 if (i >= kvm->arch.hpt_npte ||
1397 i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
1398 break;
1399
1400 hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
1401 lbuf = (unsigned long __user *)buf;
1402 for (j = 0; j < hdr.n_valid; ++j) {
1403 err = -EFAULT;
1404 if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
1405 goto out;
1406 err = -EINVAL;
1407 if (!(v & HPTE_V_VALID))
1408 goto out;
1409 lbuf += 2;
1410 nb += HPTE_SIZE;
1411
1412 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
1413 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1414 err = -EIO;
1415 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
1416 tmp);
1417 if (ret != H_SUCCESS) {
1418 pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
1419 "r=%lx\n", ret, i, v, r);
1420 goto out;
1421 }
1422 if (!rma_setup && is_vrma_hpte(v)) {
1423 unsigned long psize = hpte_page_size(v, r);
1424 unsigned long senc = slb_pgsize_encoding(psize);
1425 unsigned long lpcr;
1426
1427 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1428 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1429 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1430 lpcr |= senc << (LPCR_VRMASD_SH - 4);
1431 kvm->arch.lpcr = lpcr;
1432 rma_setup = 1;
1433 }
1434 ++i;
1435 hptp += 2;
1436 }
1437
1438 for (j = 0; j < hdr.n_invalid; ++j) {
1439 if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
1440 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1441 ++i;
1442 hptp += 2;
1443 }
1444 err = 0;
1445 }
1446
1447 out:
1448 /* Order HPTE updates vs. rma_setup_done */
1449 smp_wmb();
1450 kvm->arch.rma_setup_done = rma_setup;
1451 mutex_unlock(&kvm->lock);
1452
1453 if (err)
1454 return err;
1455 return nb;
1456}
1457
1458static int kvm_htab_release(struct inode *inode, struct file *filp)
1459{
1460 struct kvm_htab_ctx *ctx = filp->private_data;
1461
1462 filp->private_data = NULL;
1463 if (!(ctx->flags & KVM_GET_HTAB_WRITE))
1464 atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
1465 kvm_put_kvm(ctx->kvm);
1466 kfree(ctx);
1467 return 0;
1468}
1469
1470static struct file_operations kvm_htab_fops = {
1471 .read = kvm_htab_read,
1472 .write = kvm_htab_write,
1473 .llseek = default_llseek,
1474 .release = kvm_htab_release,
1475};
1476
1477int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
1478{
1479 int ret;
1480 struct kvm_htab_ctx *ctx;
1481 int rwflag;
1482
1483 /* reject flags we don't recognize */
1484 if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
1485 return -EINVAL;
1486 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1487 if (!ctx)
1488 return -ENOMEM;
1489 kvm_get_kvm(kvm);
1490 ctx->kvm = kvm;
1491 ctx->index = ghf->start_index;
1492 ctx->flags = ghf->flags;
1493 ctx->first_pass = 1;
1494
1495 rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
1496 ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
1497 if (ret < 0) {
1498 kvm_put_kvm(kvm);
1499 return ret;
1500 }
1501
1502 if (rwflag == O_RDONLY) {
1503 mutex_lock(&kvm->slots_lock);
1504 atomic_inc(&kvm->arch.hpte_mod_interest);
1505 /* make sure kvmppc_do_h_enter etc. see the increment */
1506 synchronize_srcu_expedited(&kvm->srcu);
1507 mutex_unlock(&kvm->slots_lock);
1508 }
1509
1510 return ret;
1511}
1512
1134void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 1513void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
1135{ 1514{
1136 struct kvmppc_mmu *mmu = &vcpu->arch.mmu; 1515 struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index b9a989dc76cc..d31a716f7f2b 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -22,6 +22,7 @@
22#include <asm/kvm_book3s.h> 22#include <asm/kvm_book3s.h>
23#include <asm/reg.h> 23#include <asm/reg.h>
24#include <asm/switch_to.h> 24#include <asm/switch_to.h>
25#include <asm/time.h>
25 26
26#define OP_19_XOP_RFID 18 27#define OP_19_XOP_RFID 18
27#define OP_19_XOP_RFI 50 28#define OP_19_XOP_RFI 50
@@ -395,6 +396,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
395 (mfmsr() & MSR_HV)) 396 (mfmsr() & MSR_HV))
396 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 397 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
397 break; 398 break;
399 case SPRN_PURR:
400 to_book3s(vcpu)->purr_offset = spr_val - get_tb();
401 break;
402 case SPRN_SPURR:
403 to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
404 break;
398 case SPRN_GQR0: 405 case SPRN_GQR0:
399 case SPRN_GQR1: 406 case SPRN_GQR1:
400 case SPRN_GQR2: 407 case SPRN_GQR2:
@@ -412,6 +419,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
412 case SPRN_CTRLF: 419 case SPRN_CTRLF:
413 case SPRN_CTRLT: 420 case SPRN_CTRLT:
414 case SPRN_L2CR: 421 case SPRN_L2CR:
422 case SPRN_DSCR:
415 case SPRN_MMCR0_GEKKO: 423 case SPRN_MMCR0_GEKKO:
416 case SPRN_MMCR1_GEKKO: 424 case SPRN_MMCR1_GEKKO:
417 case SPRN_PMC1_GEKKO: 425 case SPRN_PMC1_GEKKO:
@@ -483,9 +491,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
483 *spr_val = to_book3s(vcpu)->hid[5]; 491 *spr_val = to_book3s(vcpu)->hid[5];
484 break; 492 break;
485 case SPRN_CFAR: 493 case SPRN_CFAR:
486 case SPRN_PURR: 494 case SPRN_DSCR:
487 *spr_val = 0; 495 *spr_val = 0;
488 break; 496 break;
497 case SPRN_PURR:
498 *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
499 break;
500 case SPRN_SPURR:
501 *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
502 break;
489 case SPRN_GQR0: 503 case SPRN_GQR0:
490 case SPRN_GQR1: 504 case SPRN_GQR1:
491 case SPRN_GQR2: 505 case SPRN_GQR2:
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index a150817d6d4c..7057a02f0906 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -28,8 +28,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
28#ifdef CONFIG_ALTIVEC 28#ifdef CONFIG_ALTIVEC
29EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); 29EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
30#endif 30#endif
31#ifdef CONFIG_VSX
32EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
33#endif
34#endif 31#endif
35 32
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 843eb754a1d5..71d0c90b62bf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -545,6 +545,17 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
545 case BOOK3S_INTERRUPT_PERFMON: 545 case BOOK3S_INTERRUPT_PERFMON:
546 r = RESUME_GUEST; 546 r = RESUME_GUEST;
547 break; 547 break;
548 case BOOK3S_INTERRUPT_MACHINE_CHECK:
549 /*
550 * Deliver a machine check interrupt to the guest.
551 * We have to do this, even if the host has handled the
552 * machine check, because machine checks use SRR0/1 and
553 * the interrupt might have trashed guest state in them.
554 */
555 kvmppc_book3s_queue_irqprio(vcpu,
556 BOOK3S_INTERRUPT_MACHINE_CHECK);
557 r = RESUME_GUEST;
558 break;
548 case BOOK3S_INTERRUPT_PROGRAM: 559 case BOOK3S_INTERRUPT_PROGRAM:
549 { 560 {
550 ulong flags; 561 ulong flags;
@@ -853,7 +864,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
853 goto free_vcpu; 864 goto free_vcpu;
854 865
855 vcpu->arch.shared = &vcpu->arch.shregs; 866 vcpu->arch.shared = &vcpu->arch.shregs;
856 vcpu->arch.last_cpu = -1;
857 vcpu->arch.mmcr[0] = MMCR0_FC; 867 vcpu->arch.mmcr[0] = MMCR0_FC;
858 vcpu->arch.ctrl = CTRL_RUNLATCH; 868 vcpu->arch.ctrl = CTRL_RUNLATCH;
859 /* default to host PVR, since we can't spoof it */ 869 /* default to host PVR, since we can't spoof it */
@@ -880,6 +890,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
880 vcore->preempt_tb = TB_NIL; 890 vcore->preempt_tb = TB_NIL;
881 } 891 }
882 kvm->arch.vcores[core] = vcore; 892 kvm->arch.vcores[core] = vcore;
893 kvm->arch.online_vcores++;
883 } 894 }
884 mutex_unlock(&kvm->lock); 895 mutex_unlock(&kvm->lock);
885 896
@@ -1563,18 +1574,6 @@ out:
1563 return r; 1574 return r;
1564} 1575}
1565 1576
1566static unsigned long slb_pgsize_encoding(unsigned long psize)
1567{
1568 unsigned long senc = 0;
1569
1570 if (psize > 0x1000) {
1571 senc = SLB_VSID_L;
1572 if (psize == 0x10000)
1573 senc |= SLB_VSID_LP_01;
1574 }
1575 return senc;
1576}
1577
1578static void unpin_slot(struct kvm_memory_slot *memslot) 1577static void unpin_slot(struct kvm_memory_slot *memslot)
1579{ 1578{
1580 unsigned long *physp; 1579 unsigned long *physp;
@@ -1814,6 +1813,13 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1814 return -ENOMEM; 1813 return -ENOMEM;
1815 kvm->arch.lpid = lpid; 1814 kvm->arch.lpid = lpid;
1816 1815
1816 /*
1817 * Since we don't flush the TLB when tearing down a VM,
1818 * and this lpid might have previously been used,
1819 * make sure we flush on each core before running the new VM.
1820 */
1821 cpumask_setall(&kvm->arch.need_tlb_flush);
1822
1817 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1823 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1818 1824
1819 kvm->arch.rma = NULL; 1825 kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
new file mode 100644
index 000000000000..35f3cf0269b3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -0,0 +1,144 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
7 */
8
9#include <linux/types.h>
10#include <linux/string.h>
11#include <linux/kvm.h>
12#include <linux/kvm_host.h>
13#include <linux/kernel.h>
14#include <asm/opal.h>
15
16/* SRR1 bits for machine check on POWER7 */
17#define SRR1_MC_LDSTERR (1ul << (63-42))
18#define SRR1_MC_IFETCH_SH (63-45)
19#define SRR1_MC_IFETCH_MASK 0x7
20#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */
21#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */
22#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */
23#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */
24
25/* DSISR bits for machine check on POWER7 */
26#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */
27#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */
28#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */
29#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */
30#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */
31
32/* POWER7 SLB flush and reload */
33static void reload_slb(struct kvm_vcpu *vcpu)
34{
35 struct slb_shadow *slb;
36 unsigned long i, n;
37
38 /* First clear out SLB */
39 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
40
41 /* Do they have an SLB shadow buffer registered? */
42 slb = vcpu->arch.slb_shadow.pinned_addr;
43 if (!slb)
44 return;
45
46 /* Sanity check */
47 n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
48 if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
49 return;
50
51 /* Load up the SLB from that */
52 for (i = 0; i < n; ++i) {
53 unsigned long rb = slb->save_area[i].esid;
54 unsigned long rs = slb->save_area[i].vsid;
55
56 rb = (rb & ~0xFFFul) | i; /* insert entry number */
57 asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
58 }
59}
60
61/* POWER7 TLB flush */
62static void flush_tlb_power7(struct kvm_vcpu *vcpu)
63{
64 unsigned long i, rb;
65
66 rb = TLBIEL_INVAL_SET_LPID;
67 for (i = 0; i < POWER7_TLB_SETS; ++i) {
68 asm volatile("tlbiel %0" : : "r" (rb));
69 rb += 1 << TLBIEL_INVAL_SET_SHIFT;
70 }
71}
72
73/*
74 * On POWER7, see if we can handle a machine check that occurred inside
75 * the guest in real mode, without switching to the host partition.
76 *
77 * Returns: 0 => exit guest, 1 => deliver machine check to guest
78 */
79static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
80{
81 unsigned long srr1 = vcpu->arch.shregs.msr;
82 struct opal_machine_check_event *opal_evt;
83 long handled = 1;
84
85 if (srr1 & SRR1_MC_LDSTERR) {
86 /* error on load/store */
87 unsigned long dsisr = vcpu->arch.shregs.dsisr;
88
89 if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
90 DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
91 /* flush and reload SLB; flushes D-ERAT too */
92 reload_slb(vcpu);
93 dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
94 DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
95 }
96 if (dsisr & DSISR_MC_TLB_MULTI) {
97 flush_tlb_power7(vcpu);
98 dsisr &= ~DSISR_MC_TLB_MULTI;
99 }
100 /* Any other errors we don't understand? */
101 if (dsisr & 0xffffffffUL)
102 handled = 0;
103 }
104
105 switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
106 case 0:
107 break;
108 case SRR1_MC_IFETCH_SLBPAR:
109 case SRR1_MC_IFETCH_SLBMULTI:
110 case SRR1_MC_IFETCH_SLBPARMULTI:
111 reload_slb(vcpu);
112 break;
113 case SRR1_MC_IFETCH_TLBMULTI:
114 flush_tlb_power7(vcpu);
115 break;
116 default:
117 handled = 0;
118 }
119
120 /*
121 * See if OPAL has already handled the condition.
122 * We assume that if the condition is recovered then OPAL
123 * will have generated an error log event that we will pick
124 * up and log later.
125 */
126 opal_evt = local_paca->opal_mc_evt;
127 if (opal_evt->version == OpalMCE_V1 &&
128 (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
129 opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
130 handled = 1;
131
132 if (handled)
133 opal_evt->in_use = 0;
134
135 return handled;
136}
137
138long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
139{
140 if (cpu_has_feature(CPU_FTR_ARCH_206))
141 return kvmppc_realmode_mc_power7(vcpu);
142
143 return 0;
144}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5e06e3153888..19c93bae1aea 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x)
35 return __va(addr); 35 return __va(addr);
36} 36}
37 37
38/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
39static int global_invalidates(struct kvm *kvm, unsigned long flags)
40{
41 int global;
42
43 /*
44 * If there is only one vcore, and it's currently running,
45 * we can use tlbiel as long as we mark all other physical
46 * cores as potentially having stale TLB entries for this lpid.
47 * If we're not using MMU notifiers, we never take pages away
48 * from the guest, so we can use tlbiel if requested.
49 * Otherwise, don't use tlbiel.
50 */
51 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
52 global = 0;
53 else if (kvm->arch.using_mmu_notifiers)
54 global = 1;
55 else
56 global = !(flags & H_LOCAL);
57
58 if (!global) {
59 /* any other core might now have stale TLB entries... */
60 smp_wmb();
61 cpumask_setall(&kvm->arch.need_tlb_flush);
62 cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
63 &kvm->arch.need_tlb_flush);
64 }
65
66 return global;
67}
68
38/* 69/*
39 * Add this HPTE into the chain for the real page. 70 * Add this HPTE into the chain for the real page.
40 * Must be called with the chain locked; it unlocks the chain. 71 * Must be called with the chain locked; it unlocks the chain.
@@ -59,13 +90,24 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
59 head->back = pte_index; 90 head->back = pte_index;
60 } else { 91 } else {
61 rev->forw = rev->back = pte_index; 92 rev->forw = rev->back = pte_index;
62 i = pte_index; 93 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
94 pte_index | KVMPPC_RMAP_PRESENT;
63 } 95 }
64 smp_wmb(); 96 unlock_rmap(rmap);
65 *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
66} 97}
67EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
68 99
100/*
101 * Note modification of an HPTE; set the HPTE modified bit
102 * if anyone is interested.
103 */
104static inline void note_hpte_modification(struct kvm *kvm,
105 struct revmap_entry *rev)
106{
107 if (atomic_read(&kvm->arch.hpte_mod_interest))
108 rev->guest_rpte |= HPTE_GR_MODIFIED;
109}
110
69/* Remove this HPTE from the chain for a real page */ 111/* Remove this HPTE from the chain for a real page */
70static void remove_revmap_chain(struct kvm *kvm, long pte_index, 112static void remove_revmap_chain(struct kvm *kvm, long pte_index,
71 struct revmap_entry *rev, 113 struct revmap_entry *rev,
@@ -103,14 +145,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
103 unlock_rmap(rmap); 145 unlock_rmap(rmap);
104} 146}
105 147
106static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, 148static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
107 int writing, unsigned long *pte_sizep) 149 int writing, unsigned long *pte_sizep)
108{ 150{
109 pte_t *ptep; 151 pte_t *ptep;
110 unsigned long ps = *pte_sizep; 152 unsigned long ps = *pte_sizep;
111 unsigned int shift; 153 unsigned int shift;
112 154
113 ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift); 155 ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
114 if (!ptep) 156 if (!ptep)
115 return __pte(0); 157 return __pte(0);
116 if (shift) 158 if (shift)
@@ -130,15 +172,15 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
130 hpte[0] = hpte_v; 172 hpte[0] = hpte_v;
131} 173}
132 174
133long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 175long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
134 long pte_index, unsigned long pteh, unsigned long ptel) 176 long pte_index, unsigned long pteh, unsigned long ptel,
177 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
135{ 178{
136 struct kvm *kvm = vcpu->kvm;
137 unsigned long i, pa, gpa, gfn, psize; 179 unsigned long i, pa, gpa, gfn, psize;
138 unsigned long slot_fn, hva; 180 unsigned long slot_fn, hva;
139 unsigned long *hpte; 181 unsigned long *hpte;
140 struct revmap_entry *rev; 182 struct revmap_entry *rev;
141 unsigned long g_ptel = ptel; 183 unsigned long g_ptel;
142 struct kvm_memory_slot *memslot; 184 struct kvm_memory_slot *memslot;
143 unsigned long *physp, pte_size; 185 unsigned long *physp, pte_size;
144 unsigned long is_io; 186 unsigned long is_io;
@@ -147,13 +189,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
147 unsigned int writing; 189 unsigned int writing;
148 unsigned long mmu_seq; 190 unsigned long mmu_seq;
149 unsigned long rcbits; 191 unsigned long rcbits;
150 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
151 192
152 psize = hpte_page_size(pteh, ptel); 193 psize = hpte_page_size(pteh, ptel);
153 if (!psize) 194 if (!psize)
154 return H_PARAMETER; 195 return H_PARAMETER;
155 writing = hpte_is_writable(ptel); 196 writing = hpte_is_writable(ptel);
156 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 197 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
198 ptel &= ~HPTE_GR_RESERVED;
199 g_ptel = ptel;
157 200
158 /* used later to detect if we might have been invalidated */ 201 /* used later to detect if we might have been invalidated */
159 mmu_seq = kvm->mmu_notifier_seq; 202 mmu_seq = kvm->mmu_notifier_seq;
@@ -201,7 +244,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
201 244
202 /* Look up the Linux PTE for the backing page */ 245 /* Look up the Linux PTE for the backing page */
203 pte_size = psize; 246 pte_size = psize;
204 pte = lookup_linux_pte(vcpu, hva, writing, &pte_size); 247 pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
205 if (pte_present(pte)) { 248 if (pte_present(pte)) {
206 if (writing && !pte_write(pte)) 249 if (writing && !pte_write(pte))
207 /* make the actual HPTE be read-only */ 250 /* make the actual HPTE be read-only */
@@ -210,6 +253,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
210 pa = pte_pfn(pte) << PAGE_SHIFT; 253 pa = pte_pfn(pte) << PAGE_SHIFT;
211 } 254 }
212 } 255 }
256
213 if (pte_size < psize) 257 if (pte_size < psize)
214 return H_PARAMETER; 258 return H_PARAMETER;
215 if (pa && pte_size > psize) 259 if (pa && pte_size > psize)
@@ -287,8 +331,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
287 rev = &kvm->arch.revmap[pte_index]; 331 rev = &kvm->arch.revmap[pte_index];
288 if (realmode) 332 if (realmode)
289 rev = real_vmalloc_addr(rev); 333 rev = real_vmalloc_addr(rev);
290 if (rev) 334 if (rev) {
291 rev->guest_rpte = g_ptel; 335 rev->guest_rpte = g_ptel;
336 note_hpte_modification(kvm, rev);
337 }
292 338
293 /* Link HPTE into reverse-map chain */ 339 /* Link HPTE into reverse-map chain */
294 if (pteh & HPTE_V_VALID) { 340 if (pteh & HPTE_V_VALID) {
@@ -297,7 +343,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
297 lock_rmap(rmap); 343 lock_rmap(rmap);
298 /* Check for pending invalidations under the rmap chain lock */ 344 /* Check for pending invalidations under the rmap chain lock */
299 if (kvm->arch.using_mmu_notifiers && 345 if (kvm->arch.using_mmu_notifiers &&
300 mmu_notifier_retry(vcpu->kvm, mmu_seq)) { 346 mmu_notifier_retry(kvm, mmu_seq)) {
301 /* inval in progress, write a non-present HPTE */ 347 /* inval in progress, write a non-present HPTE */
302 pteh |= HPTE_V_ABSENT; 348 pteh |= HPTE_V_ABSENT;
303 pteh &= ~HPTE_V_VALID; 349 pteh &= ~HPTE_V_VALID;
@@ -318,10 +364,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
318 hpte[0] = pteh; 364 hpte[0] = pteh;
319 asm volatile("ptesync" : : : "memory"); 365 asm volatile("ptesync" : : : "memory");
320 366
321 vcpu->arch.gpr[4] = pte_index; 367 *pte_idx_ret = pte_index;
322 return H_SUCCESS; 368 return H_SUCCESS;
323} 369}
324EXPORT_SYMBOL_GPL(kvmppc_h_enter); 370EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
371
372long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
373 long pte_index, unsigned long pteh, unsigned long ptel)
374{
375 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
376 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
377}
325 378
326#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 379#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
327 380
@@ -343,11 +396,10 @@ static inline int try_lock_tlbie(unsigned int *lock)
343 return old == 0; 396 return old == 0;
344} 397}
345 398
346long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 399long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
347 unsigned long pte_index, unsigned long avpn, 400 unsigned long pte_index, unsigned long avpn,
348 unsigned long va) 401 unsigned long *hpret)
349{ 402{
350 struct kvm *kvm = vcpu->kvm;
351 unsigned long *hpte; 403 unsigned long *hpte;
352 unsigned long v, r, rb; 404 unsigned long v, r, rb;
353 struct revmap_entry *rev; 405 struct revmap_entry *rev;
@@ -369,7 +421,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
369 if (v & HPTE_V_VALID) { 421 if (v & HPTE_V_VALID) {
370 hpte[0] &= ~HPTE_V_VALID; 422 hpte[0] &= ~HPTE_V_VALID;
371 rb = compute_tlbie_rb(v, hpte[1], pte_index); 423 rb = compute_tlbie_rb(v, hpte[1], pte_index);
372 if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) { 424 if (global_invalidates(kvm, flags)) {
373 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 425 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
374 cpu_relax(); 426 cpu_relax();
375 asm volatile("ptesync" : : : "memory"); 427 asm volatile("ptesync" : : : "memory");
@@ -385,13 +437,22 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
385 /* Read PTE low word after tlbie to get final R/C values */ 437 /* Read PTE low word after tlbie to get final R/C values */
386 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); 438 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
387 } 439 }
388 r = rev->guest_rpte; 440 r = rev->guest_rpte & ~HPTE_GR_RESERVED;
441 note_hpte_modification(kvm, rev);
389 unlock_hpte(hpte, 0); 442 unlock_hpte(hpte, 0);
390 443
391 vcpu->arch.gpr[4] = v; 444 hpret[0] = v;
392 vcpu->arch.gpr[5] = r; 445 hpret[1] = r;
393 return H_SUCCESS; 446 return H_SUCCESS;
394} 447}
448EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
449
450long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
451 unsigned long pte_index, unsigned long avpn)
452{
453 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
454 &vcpu->arch.gpr[4]);
455}
395 456
396long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 457long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
397{ 458{
@@ -459,6 +520,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
459 520
460 args[j] = ((0x80 | flags) << 56) + pte_index; 521 args[j] = ((0x80 | flags) << 56) + pte_index;
461 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 522 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
523 note_hpte_modification(kvm, rev);
462 524
463 if (!(hp[0] & HPTE_V_VALID)) { 525 if (!(hp[0] & HPTE_V_VALID)) {
464 /* insert R and C bits from PTE */ 526 /* insert R and C bits from PTE */
@@ -534,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
534 return H_NOT_FOUND; 596 return H_NOT_FOUND;
535 } 597 }
536 598
537 if (atomic_read(&kvm->online_vcpus) == 1)
538 flags |= H_LOCAL;
539 v = hpte[0]; 599 v = hpte[0];
540 bits = (flags << 55) & HPTE_R_PP0; 600 bits = (flags << 55) & HPTE_R_PP0;
541 bits |= (flags << 48) & HPTE_R_KEY_HI; 601 bits |= (flags << 48) & HPTE_R_KEY_HI;
@@ -548,6 +608,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
548 if (rev) { 608 if (rev) {
549 r = (rev->guest_rpte & ~mask) | bits; 609 r = (rev->guest_rpte & ~mask) | bits;
550 rev->guest_rpte = r; 610 rev->guest_rpte = r;
611 note_hpte_modification(kvm, rev);
551 } 612 }
552 r = (hpte[1] & ~mask) | bits; 613 r = (hpte[1] & ~mask) | bits;
553 614
@@ -555,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
555 if (v & HPTE_V_VALID) { 616 if (v & HPTE_V_VALID) {
556 rb = compute_tlbie_rb(v, r, pte_index); 617 rb = compute_tlbie_rb(v, r, pte_index);
557 hpte[0] = v & ~HPTE_V_VALID; 618 hpte[0] = v & ~HPTE_V_VALID;
558 if (!(flags & H_LOCAL)) { 619 if (global_invalidates(kvm, flags)) {
559 while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) 620 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
560 cpu_relax(); 621 cpu_relax();
561 asm volatile("ptesync" : : : "memory"); 622 asm volatile("ptesync" : : : "memory");
@@ -568,6 +629,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
568 asm volatile("tlbiel %0" : : "r" (rb)); 629 asm volatile("tlbiel %0" : : "r" (rb));
569 asm volatile("ptesync" : : : "memory"); 630 asm volatile("ptesync" : : : "memory");
570 } 631 }
632 /*
633 * If the host has this page as readonly but the guest
634 * wants to make it read/write, reduce the permissions.
635 * Checking the host permissions involves finding the
636 * memslot and then the Linux PTE for the page.
637 */
638 if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
639 unsigned long psize, gfn, hva;
640 struct kvm_memory_slot *memslot;
641 pgd_t *pgdir = vcpu->arch.pgdir;
642 pte_t pte;
643
644 psize = hpte_page_size(v, r);
645 gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
646 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
647 if (memslot) {
648 hva = __gfn_to_hva_memslot(memslot, gfn);
649 pte = lookup_linux_pte(pgdir, hva, 1, &psize);
650 if (pte_present(pte) && !pte_write(pte))
651 r = hpte_make_readonly(r);
652 }
653 }
571 } 654 }
572 hpte[1] = r; 655 hpte[1] = r;
573 eieio(); 656 eieio();
@@ -599,8 +682,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
599 v &= ~HPTE_V_ABSENT; 682 v &= ~HPTE_V_ABSENT;
600 v |= HPTE_V_VALID; 683 v |= HPTE_V_VALID;
601 } 684 }
602 if (v & HPTE_V_VALID) 685 if (v & HPTE_V_VALID) {
603 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 686 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
687 r &= ~HPTE_GR_RESERVED;
688 }
604 vcpu->arch.gpr[4 + i * 2] = v; 689 vcpu->arch.gpr[4 + i * 2] = v;
605 vcpu->arch.gpr[5 + i * 2] = r; 690 vcpu->arch.gpr[5 + i * 2] = r;
606 } 691 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 690d1120402d..10b6c358dd77 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -27,6 +27,7 @@
27#include <asm/asm-offsets.h> 27#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h> 28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h> 29#include <asm/kvm_book3s_asm.h>
30#include <asm/mmu-hash64.h>
30 31
31/***************************************************************************** 32/*****************************************************************************
32 * * 33 * *
@@ -313,7 +314,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
313 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 314 mtspr SPRN_SDR1,r6 /* switch to partition page table */
314 mtspr SPRN_LPID,r7 315 mtspr SPRN_LPID,r7
315 isync 316 isync
317
318 /* See if we need to flush the TLB */
319 lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
320 clrldi r7,r6,64-6 /* extract bit number (6 bits) */
321 srdi r6,r6,6 /* doubleword number */
322 sldi r6,r6,3 /* address offset */
323 add r6,r6,r9
324 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
316 li r0,1 325 li r0,1
326 sld r0,r0,r7
327 ld r7,0(r6)
328 and. r7,r7,r0
329 beq 22f
33023: ldarx r7,0,r6 /* if set, clear the bit */
331 andc r7,r7,r0
332 stdcx. r7,0,r6
333 bne 23b
334 li r6,128 /* and flush the TLB */
335 mtctr r6
336 li r7,0x800 /* IS field = 0b10 */
337 ptesync
33828: tlbiel r7
339 addi r7,r7,0x1000
340 bdnz 28b
341 ptesync
342
34322: li r0,1
317 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 344 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
318 b 10f 345 b 10f
319 346
@@ -336,36 +363,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
336 mr r9,r4 363 mr r9,r4
337 blt hdec_soon 364 blt hdec_soon
338 365
339 /*
340 * Invalidate the TLB if we could possibly have stale TLB
341 * entries for this partition on this core due to the use
342 * of tlbiel.
343 * XXX maybe only need this on primary thread?
344 */
345 ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
346 lwz r5,VCPU_VCPUID(r4)
347 lhz r6,PACAPACAINDEX(r13)
348 rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
349 lhz r8,VCPU_LAST_CPU(r4)
350 sldi r7,r6,1 /* see if this is the same vcpu */
351 add r7,r7,r9 /* as last ran on this pcpu */
352 lhz r0,KVM_LAST_VCPU(r7)
353 cmpw r6,r8 /* on the same cpu core as last time? */
354 bne 3f
355 cmpw r0,r5 /* same vcpu as this core last ran? */
356 beq 1f
3573: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
358 sth r5,KVM_LAST_VCPU(r7)
359 li r6,128
360 mtctr r6
361 li r7,0x800 /* IS field = 0b10 */
362 ptesync
3632: tlbiel r7
364 addi r7,r7,0x1000
365 bdnz 2b
366 ptesync
3671:
368
369 /* Save purr/spurr */ 366 /* Save purr/spurr */
370 mfspr r5,SPRN_PURR 367 mfspr r5,SPRN_PURR
371 mfspr r6,SPRN_SPURR 368 mfspr r6,SPRN_SPURR
@@ -682,8 +679,7 @@ BEGIN_FTR_SECTION
6821: 6791:
683END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 680END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
684 681
685nohpte_cont: 682guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
686hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
687 /* Save DEC */ 683 /* Save DEC */
688 mfspr r5,SPRN_DEC 684 mfspr r5,SPRN_DEC
689 mftb r6 685 mftb r6
@@ -704,6 +700,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
704 std r6, VCPU_FAULT_DAR(r9) 700 std r6, VCPU_FAULT_DAR(r9)
705 stw r7, VCPU_FAULT_DSISR(r9) 701 stw r7, VCPU_FAULT_DSISR(r9)
706 702
703 /* See if it is a machine check */
704 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
705 beq machine_check_realmode
706mc_cont:
707
707 /* Save guest CTRL register, set runlatch to 1 */ 708 /* Save guest CTRL register, set runlatch to 1 */
7086: mfspr r6,SPRN_CTRLF 7096: mfspr r6,SPRN_CTRLF
709 stw r6,VCPU_CTRL(r9) 710 stw r6,VCPU_CTRL(r9)
@@ -1116,38 +1117,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1116 /* 1117 /*
1117 * For external and machine check interrupts, we need 1118 * For external and machine check interrupts, we need
1118 * to call the Linux handler to process the interrupt. 1119 * to call the Linux handler to process the interrupt.
1119 * We do that by jumping to the interrupt vector address 1120 * We do that by jumping to absolute address 0x500 for
1120 * which we have in r12. The [h]rfid at the end of the 1121 * external interrupts, or the machine_check_fwnmi label
1122 * for machine checks (since firmware might have patched
1123 * the vector area at 0x200). The [h]rfid at the end of the
1121 * handler will return to the book3s_hv_interrupts.S code. 1124 * handler will return to the book3s_hv_interrupts.S code.
1122 * For other interrupts we do the rfid to get back 1125 * For other interrupts we do the rfid to get back
1123 * to the book3s_interrupts.S code here. 1126 * to the book3s_hv_interrupts.S code here.
1124 */ 1127 */
1125 ld r8, HSTATE_VMHANDLER(r13) 1128 ld r8, HSTATE_VMHANDLER(r13)
1126 ld r7, HSTATE_HOST_MSR(r13) 1129 ld r7, HSTATE_HOST_MSR(r13)
1127 1130
1131 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1128 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1132 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1133BEGIN_FTR_SECTION
1129 beq 11f 1134 beq 11f
1130 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1135END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1131 1136
1132 /* RFI into the highmem handler, or branch to interrupt handler */ 1137 /* RFI into the highmem handler, or branch to interrupt handler */
113312: mfmsr r6 1138 mfmsr r6
1134 mtctr r12
1135 li r0, MSR_RI 1139 li r0, MSR_RI
1136 andc r6, r6, r0 1140 andc r6, r6, r0
1137 mtmsrd r6, 1 /* Clear RI in MSR */ 1141 mtmsrd r6, 1 /* Clear RI in MSR */
1138 mtsrr0 r8 1142 mtsrr0 r8
1139 mtsrr1 r7 1143 mtsrr1 r7
1140 beqctr 1144 beqa 0x500 /* external interrupt (PPC970) */
1145 beq cr1, 13f /* machine check */
1141 RFI 1146 RFI
1142 1147
114311: 1148 /* On POWER7, we have external interrupts set to use HSRR0/1 */
1144BEGIN_FTR_SECTION 114911: mtspr SPRN_HSRR0, r8
1145 b 12b
1146END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1147 mtspr SPRN_HSRR0, r8
1148 mtspr SPRN_HSRR1, r7 1150 mtspr SPRN_HSRR1, r7
1149 ba 0x500 1151 ba 0x500
1150 1152
115313: b machine_check_fwnmi
1154
1151/* 1155/*
1152 * Check whether an HDSI is an HPTE not found fault or something else. 1156 * Check whether an HDSI is an HPTE not found fault or something else.
1153 * If it is an HPTE not found fault that is due to the guest accessing 1157 * If it is an HPTE not found fault that is due to the guest accessing
@@ -1180,7 +1184,7 @@ kvmppc_hdsi:
1180 cmpdi r3, 0 /* retry the instruction */ 1184 cmpdi r3, 0 /* retry the instruction */
1181 beq 6f 1185 beq 6f
1182 cmpdi r3, -1 /* handle in kernel mode */ 1186 cmpdi r3, -1 /* handle in kernel mode */
1183 beq nohpte_cont 1187 beq guest_exit_cont
1184 cmpdi r3, -2 /* MMIO emulation; need instr word */ 1188 cmpdi r3, -2 /* MMIO emulation; need instr word */
1185 beq 2f 1189 beq 2f
1186 1190
@@ -1194,6 +1198,7 @@ kvmppc_hdsi:
1194 li r10, BOOK3S_INTERRUPT_DATA_STORAGE 1198 li r10, BOOK3S_INTERRUPT_DATA_STORAGE
1195 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ 1199 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1196 rotldi r11, r11, 63 1200 rotldi r11, r11, 63
1201fast_interrupt_c_return:
11976: ld r7, VCPU_CTR(r9) 12026: ld r7, VCPU_CTR(r9)
1198 lwz r8, VCPU_XER(r9) 1203 lwz r8, VCPU_XER(r9)
1199 mtctr r7 1204 mtctr r7
@@ -1226,7 +1231,7 @@ kvmppc_hdsi:
1226 /* Unset guest mode. */ 1231 /* Unset guest mode. */
1227 li r0, KVM_GUEST_MODE_NONE 1232 li r0, KVM_GUEST_MODE_NONE
1228 stb r0, HSTATE_IN_GUEST(r13) 1233 stb r0, HSTATE_IN_GUEST(r13)
1229 b nohpte_cont 1234 b guest_exit_cont
1230 1235
1231/* 1236/*
1232 * Similarly for an HISI, reflect it to the guest as an ISI unless 1237 * Similarly for an HISI, reflect it to the guest as an ISI unless
@@ -1252,9 +1257,9 @@ kvmppc_hisi:
1252 ld r11, VCPU_MSR(r9) 1257 ld r11, VCPU_MSR(r9)
1253 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE 1258 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1254 cmpdi r3, 0 /* retry the instruction */ 1259 cmpdi r3, 0 /* retry the instruction */
1255 beq 6f 1260 beq fast_interrupt_c_return
1256 cmpdi r3, -1 /* handle in kernel mode */ 1261 cmpdi r3, -1 /* handle in kernel mode */
1257 beq nohpte_cont 1262 beq guest_exit_cont
1258 1263
1259 /* Synthesize an ISI for the guest */ 1264 /* Synthesize an ISI for the guest */
1260 mr r11, r3 1265 mr r11, r3
@@ -1263,12 +1268,7 @@ kvmppc_hisi:
1263 li r10, BOOK3S_INTERRUPT_INST_STORAGE 1268 li r10, BOOK3S_INTERRUPT_INST_STORAGE
1264 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ 1269 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1265 rotldi r11, r11, 63 1270 rotldi r11, r11, 63
12666: ld r7, VCPU_CTR(r9) 1271 b fast_interrupt_c_return
1267 lwz r8, VCPU_XER(r9)
1268 mtctr r7
1269 mtxer r8
1270 mr r4, r9
1271 b fast_guest_return
1272 1272
12733: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ 12733: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
1274 ld r5, KVM_VRMA_SLB_V(r6) 1274 ld r5, KVM_VRMA_SLB_V(r6)
@@ -1284,14 +1284,14 @@ kvmppc_hisi:
1284hcall_try_real_mode: 1284hcall_try_real_mode:
1285 ld r3,VCPU_GPR(R3)(r9) 1285 ld r3,VCPU_GPR(R3)(r9)
1286 andi. r0,r11,MSR_PR 1286 andi. r0,r11,MSR_PR
1287 bne hcall_real_cont 1287 bne guest_exit_cont
1288 clrrdi r3,r3,2 1288 clrrdi r3,r3,2
1289 cmpldi r3,hcall_real_table_end - hcall_real_table 1289 cmpldi r3,hcall_real_table_end - hcall_real_table
1290 bge hcall_real_cont 1290 bge guest_exit_cont
1291 LOAD_REG_ADDR(r4, hcall_real_table) 1291 LOAD_REG_ADDR(r4, hcall_real_table)
1292 lwzx r3,r3,r4 1292 lwzx r3,r3,r4
1293 cmpwi r3,0 1293 cmpwi r3,0
1294 beq hcall_real_cont 1294 beq guest_exit_cont
1295 add r3,r3,r4 1295 add r3,r3,r4
1296 mtctr r3 1296 mtctr r3
1297 mr r3,r9 /* get vcpu pointer */ 1297 mr r3,r9 /* get vcpu pointer */
@@ -1312,7 +1312,7 @@ hcall_real_fallback:
1312 li r12,BOOK3S_INTERRUPT_SYSCALL 1312 li r12,BOOK3S_INTERRUPT_SYSCALL
1313 ld r9, HSTATE_KVM_VCPU(r13) 1313 ld r9, HSTATE_KVM_VCPU(r13)
1314 1314
1315 b hcall_real_cont 1315 b guest_exit_cont
1316 1316
1317 .globl hcall_real_table 1317 .globl hcall_real_table
1318hcall_real_table: 1318hcall_real_table:
@@ -1571,6 +1571,21 @@ kvm_cede_exit:
1571 li r3,H_TOO_HARD 1571 li r3,H_TOO_HARD
1572 blr 1572 blr
1573 1573
1574 /* Try to handle a machine check in real mode */
1575machine_check_realmode:
1576 mr r3, r9 /* get vcpu pointer */
1577 bl .kvmppc_realmode_machine_check
1578 nop
1579 cmpdi r3, 0 /* continue exiting from guest? */
1580 ld r9, HSTATE_KVM_VCPU(r13)
1581 li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1582 beq mc_cont
1583 /* If not, deliver a machine check. SRR0/1 are already set */
1584 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
1585 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1586 rotldi r11, r11, 63
1587 b fast_interrupt_c_return
1588
1574secondary_too_late: 1589secondary_too_late:
1575 ld r5,HSTATE_KVM_VCORE(r13) 1590 ld r5,HSTATE_KVM_VCORE(r13)
1576 HMT_LOW 1591 HMT_LOW
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b853696b6d8e..28d38adeca73 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -81,9 +81,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
81 svcpu_put(svcpu); 81 svcpu_put(svcpu);
82#endif 82#endif
83 83
84 kvmppc_giveup_ext(vcpu, MSR_FP); 84 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
85 kvmppc_giveup_ext(vcpu, MSR_VEC);
86 kvmppc_giveup_ext(vcpu, MSR_VSX);
87 vcpu->cpu = -1; 85 vcpu->cpu = -1;
88} 86}
89 87
@@ -147,7 +145,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
147 ulong smsr = vcpu->arch.shared->msr; 145 ulong smsr = vcpu->arch.shared->msr;
148 146
149 /* Guest MSR values */ 147 /* Guest MSR values */
150 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; 148 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
151 /* Process MSR values */ 149 /* Process MSR values */
152 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; 150 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
153 /* External providers the guest reserved */ 151 /* External providers the guest reserved */
@@ -433,10 +431,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
433 431
434static inline int get_fpr_index(int i) 432static inline int get_fpr_index(int i)
435{ 433{
436#ifdef CONFIG_VSX 434 return i * TS_FPRWIDTH;
437 i *= 2;
438#endif
439 return i;
440} 435}
441 436
442/* Give up external provider (FPU, Altivec, VSX) */ 437/* Give up external provider (FPU, Altivec, VSX) */
@@ -450,41 +445,49 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
450 u64 *thread_fpr = (u64*)t->fpr; 445 u64 *thread_fpr = (u64*)t->fpr;
451 int i; 446 int i;
452 447
453 if (!(vcpu->arch.guest_owned_ext & msr)) 448 /*
449 * VSX instructions can access FP and vector registers, so if
450 * we are giving up VSX, make sure we give up FP and VMX as well.
451 */
452 if (msr & MSR_VSX)
453 msr |= MSR_FP | MSR_VEC;
454
455 msr &= vcpu->arch.guest_owned_ext;
456 if (!msr)
454 return; 457 return;
455 458
456#ifdef DEBUG_EXT 459#ifdef DEBUG_EXT
457 printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 460 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
458#endif 461#endif
459 462
460 switch (msr) { 463 if (msr & MSR_FP) {
461 case MSR_FP: 464 /*
465 * Note that on CPUs with VSX, giveup_fpu stores
466 * both the traditional FP registers and the added VSX
467 * registers into thread.fpr[].
468 */
462 giveup_fpu(current); 469 giveup_fpu(current);
463 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 470 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
464 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; 471 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
465 472
466 vcpu->arch.fpscr = t->fpscr.val; 473 vcpu->arch.fpscr = t->fpscr.val;
467 break; 474
468 case MSR_VEC: 475#ifdef CONFIG_VSX
476 if (cpu_has_feature(CPU_FTR_VSX))
477 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
478 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
479#endif
480 }
481
469#ifdef CONFIG_ALTIVEC 482#ifdef CONFIG_ALTIVEC
483 if (msr & MSR_VEC) {
470 giveup_altivec(current); 484 giveup_altivec(current);
471 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); 485 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
472 vcpu->arch.vscr = t->vscr; 486 vcpu->arch.vscr = t->vscr;
473#endif
474 break;
475 case MSR_VSX:
476#ifdef CONFIG_VSX
477 __giveup_vsx(current);
478 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
479 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
480#endif
481 break;
482 default:
483 BUG();
484 } 487 }
488#endif
485 489
486 vcpu->arch.guest_owned_ext &= ~msr; 490 vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
487 current->thread.regs->msr &= ~msr;
488 kvmppc_recalc_shadow_msr(vcpu); 491 kvmppc_recalc_shadow_msr(vcpu);
489} 492}
490 493
@@ -544,47 +547,56 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
544 return RESUME_GUEST; 547 return RESUME_GUEST;
545 } 548 }
546 549
547 /* We already own the ext */ 550 if (msr == MSR_VSX) {
548 if (vcpu->arch.guest_owned_ext & msr) { 551 /* No VSX? Give an illegal instruction interrupt */
549 return RESUME_GUEST; 552#ifdef CONFIG_VSX
553 if (!cpu_has_feature(CPU_FTR_VSX))
554#endif
555 {
556 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
557 return RESUME_GUEST;
558 }
559
560 /*
561 * We have to load up all the FP and VMX registers before
562 * we can let the guest use VSX instructions.
563 */
564 msr = MSR_FP | MSR_VEC | MSR_VSX;
550 } 565 }
551 566
567 /* See if we already own all the ext(s) needed */
568 msr &= ~vcpu->arch.guest_owned_ext;
569 if (!msr)
570 return RESUME_GUEST;
571
552#ifdef DEBUG_EXT 572#ifdef DEBUG_EXT
553 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 573 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
554#endif 574#endif
555 575
556 current->thread.regs->msr |= msr; 576 current->thread.regs->msr |= msr;
557 577
558 switch (msr) { 578 if (msr & MSR_FP) {
559 case MSR_FP:
560 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 579 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
561 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; 580 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
562 581#ifdef CONFIG_VSX
582 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
583 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
584#endif
563 t->fpscr.val = vcpu->arch.fpscr; 585 t->fpscr.val = vcpu->arch.fpscr;
564 t->fpexc_mode = 0; 586 t->fpexc_mode = 0;
565 kvmppc_load_up_fpu(); 587 kvmppc_load_up_fpu();
566 break; 588 }
567 case MSR_VEC: 589
590 if (msr & MSR_VEC) {
568#ifdef CONFIG_ALTIVEC 591#ifdef CONFIG_ALTIVEC
569 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); 592 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
570 t->vscr = vcpu->arch.vscr; 593 t->vscr = vcpu->arch.vscr;
571 t->vrsave = -1; 594 t->vrsave = -1;
572 kvmppc_load_up_altivec(); 595 kvmppc_load_up_altivec();
573#endif 596#endif
574 break;
575 case MSR_VSX:
576#ifdef CONFIG_VSX
577 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
578 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
579 kvmppc_load_up_vsx();
580#endif
581 break;
582 default:
583 BUG();
584 } 597 }
585 598
586 vcpu->arch.guest_owned_ext |= msr; 599 vcpu->arch.guest_owned_ext |= msr;
587
588 kvmppc_recalc_shadow_msr(vcpu); 600 kvmppc_recalc_shadow_msr(vcpu);
589 601
590 return RESUME_GUEST; 602 return RESUME_GUEST;
@@ -1134,7 +1146,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1134 /* Save VSX state in stack */ 1146 /* Save VSX state in stack */
1135 used_vsr = current->thread.used_vsr; 1147 used_vsr = current->thread.used_vsr;
1136 if (used_vsr && (current->thread.regs->msr & MSR_VSX)) 1148 if (used_vsr && (current->thread.regs->msr & MSR_VSX))
1137 __giveup_vsx(current); 1149 __giveup_vsx(current);
1138#endif 1150#endif
1139 1151
1140 /* Remember the MSR with disabled extensions */ 1152 /* Remember the MSR with disabled extensions */
@@ -1151,14 +1163,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1151 /* No need for kvm_guest_exit. It's done in handle_exit. 1163 /* No need for kvm_guest_exit. It's done in handle_exit.
1152 We also get here with interrupts enabled. */ 1164 We also get here with interrupts enabled. */
1153 1165
1154 current->thread.regs->msr = ext_msr;
1155
1156 /* Make sure we save the guest FPU/Altivec/VSX state */ 1166 /* Make sure we save the guest FPU/Altivec/VSX state */
1157 kvmppc_giveup_ext(vcpu, MSR_FP); 1167 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
1158 kvmppc_giveup_ext(vcpu, MSR_VEC); 1168
1159 kvmppc_giveup_ext(vcpu, MSR_VSX); 1169 current->thread.regs->msr = ext_msr;
1160 1170
1161 /* Restore FPU state from stack */ 1171 /* Restore FPU/VSX state from stack */
1162 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); 1172 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
1163 current->thread.fpscr.val = fpscr; 1173 current->thread.fpscr.val = fpscr;
1164 current->thread.fpexc_mode = fpexc_mode; 1174 current->thread.fpexc_mode = fpexc_mode;
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index b2f8258b545a..8f7633e3afb8 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -234,8 +234,5 @@ define_load_up(fpu)
234#ifdef CONFIG_ALTIVEC 234#ifdef CONFIG_ALTIVEC
235define_load_up(altivec) 235define_load_up(altivec)
236#endif 236#endif
237#ifdef CONFIG_VSX
238define_load_up(vsx)
239#endif
240 237
241#include "book3s_segment.S" 238#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3d1f35dc7862..69f114015780 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,6 +36,7 @@
36#include <asm/dbell.h> 36#include <asm/dbell.h>
37#include <asm/hw_irq.h> 37#include <asm/hw_irq.h>
38#include <asm/irq.h> 38#include <asm/irq.h>
39#include <asm/time.h>
39 40
40#include "timing.h" 41#include "timing.h"
41#include "booke.h" 42#include "booke.h"
@@ -311,6 +312,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
311 bool crit; 312 bool crit;
312 bool keep_irq = false; 313 bool keep_irq = false;
313 enum int_class int_class; 314 enum int_class int_class;
315 ulong new_msr = vcpu->arch.shared->msr;
314 316
315 /* Truncate crit indicators in 32 bit mode */ 317 /* Truncate crit indicators in 32 bit mode */
316 if (!(vcpu->arch.shared->msr & MSR_SF)) { 318 if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -406,7 +408,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
406 set_guest_esr(vcpu, vcpu->arch.queued_esr); 408 set_guest_esr(vcpu, vcpu->arch.queued_esr);
407 if (update_dear == true) 409 if (update_dear == true)
408 set_guest_dear(vcpu, vcpu->arch.queued_dear); 410 set_guest_dear(vcpu, vcpu->arch.queued_dear);
409 kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); 411
412 new_msr &= msr_mask;
413#if defined(CONFIG_64BIT)
414 if (vcpu->arch.epcr & SPRN_EPCR_ICM)
415 new_msr |= MSR_CM;
416#endif
417 kvmppc_set_msr(vcpu, new_msr);
410 418
411 if (!keep_irq) 419 if (!keep_irq)
412 clear_bit(priority, &vcpu->arch.pending_exceptions); 420 clear_bit(priority, &vcpu->arch.pending_exceptions);
@@ -1380,6 +1388,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1380 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); 1388 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
1381 break; 1389 break;
1382 } 1390 }
1391#if defined(CONFIG_64BIT)
1392 case KVM_REG_PPC_EPCR:
1393 r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
1394 break;
1395#endif
1383 default: 1396 default:
1384 break; 1397 break;
1385 } 1398 }
@@ -1407,6 +1420,15 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1407 (u64 __user *)(long)reg->addr, sizeof(u64)); 1420 (u64 __user *)(long)reg->addr, sizeof(u64));
1408 break; 1421 break;
1409 } 1422 }
1423#if defined(CONFIG_64BIT)
1424 case KVM_REG_PPC_EPCR: {
1425 u32 new_epcr;
1426 r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
1427 if (r == 0)
1428 kvmppc_set_epcr(vcpu, new_epcr);
1429 break;
1430 }
1431#endif
1410 default: 1432 default:
1411 break; 1433 break;
1412 } 1434 }
@@ -1465,6 +1487,18 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
1465{ 1487{
1466} 1488}
1467 1489
1490void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
1491{
1492#if defined(CONFIG_64BIT)
1493 vcpu->arch.epcr = new_epcr;
1494#ifdef CONFIG_KVM_BOOKE_HV
1495 vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
1496 if (vcpu->arch.epcr & SPRN_EPCR_ICM)
1497 vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
1498#endif
1499#endif
1500}
1501
1468void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) 1502void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
1469{ 1503{
1470 vcpu->arch.tcr = new_tcr; 1504 vcpu->arch.tcr = new_tcr;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index ba61974c1e20..e9b88e433f64 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -69,6 +69,7 @@ extern unsigned long kvmppc_booke_handlers;
69void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); 69void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
70void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); 70void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
71 71
72void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
72void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); 73void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
73void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); 74void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
74void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); 75void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 514790f41aba..4685b8cf2249 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -240,7 +240,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
240 case SPRN_MCSR: 240 case SPRN_MCSR:
241 vcpu->arch.mcsr &= ~spr_val; 241 vcpu->arch.mcsr &= ~spr_val;
242 break; 242 break;
243 243#if defined(CONFIG_64BIT)
244 case SPRN_EPCR:
245 kvmppc_set_epcr(vcpu, spr_val);
246#ifdef CONFIG_KVM_BOOKE_HV
247 mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
248#endif
249 break;
250#endif
244 default: 251 default:
245 emulated = EMULATE_FAIL; 252 emulated = EMULATE_FAIL;
246 } 253 }
@@ -335,6 +342,11 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
335 case SPRN_MCSR: 342 case SPRN_MCSR:
336 *spr_val = vcpu->arch.mcsr; 343 *spr_val = vcpu->arch.mcsr;
337 break; 344 break;
345#if defined(CONFIG_64BIT)
346 case SPRN_EPCR:
347 *spr_val = vcpu->arch.epcr;
348 break;
349#endif
338 350
339 default: 351 default:
340 emulated = EMULATE_FAIL; 352 emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 099fe8272b57..e8ed7d659c55 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -16,6 +16,7 @@
16 * 16 *
17 * Author: Varun Sethi <varun.sethi@freescale.com> 17 * Author: Varun Sethi <varun.sethi@freescale.com>
18 * Author: Scott Wood <scotwood@freescale.com> 18 * Author: Scott Wood <scotwood@freescale.com>
19 * Author: Mihai Caraman <mihai.caraman@freescale.com>
19 * 20 *
20 * This file is derived from arch/powerpc/kvm/booke_interrupts.S 21 * This file is derived from arch/powerpc/kvm/booke_interrupts.S
21 */ 22 */
@@ -30,31 +31,33 @@
30#include <asm/bitsperlong.h> 31#include <asm/bitsperlong.h>
31#include <asm/thread_info.h> 32#include <asm/thread_info.h>
32 33
34#ifdef CONFIG_64BIT
35#include <asm/exception-64e.h>
36#else
33#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ 37#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
34 38#endif
35#define GET_VCPU(vcpu, thread) \
36 PPC_LL vcpu, THREAD_KVM_VCPU(thread)
37 39
38#define LONGBYTES (BITS_PER_LONG / 8) 40#define LONGBYTES (BITS_PER_LONG / 8)
39 41
40#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES)) 42#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
41 43
42/* The host stack layout: */ 44/* The host stack layout: */
43#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */ 45#define HOST_R1 0 /* Implied by stwu. */
44#define HOST_CALLEE_LR (1 * LONGBYTES) 46#define HOST_CALLEE_LR PPC_LR_STKOFF
45#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */ 47#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES)
46/* 48/*
47 * r2 is special: it holds 'current', and it made nonvolatile in the 49 * r2 is special: it holds 'current', and it made nonvolatile in the
48 * kernel with the -ffixed-r2 gcc option. 50 * kernel with the -ffixed-r2 gcc option.
49 */ 51 */
50#define HOST_R2 (3 * LONGBYTES) 52#define HOST_R2 (HOST_RUN + LONGBYTES)
51#define HOST_CR (4 * LONGBYTES) 53#define HOST_CR (HOST_R2 + LONGBYTES)
52#define HOST_NV_GPRS (5 * LONGBYTES) 54#define HOST_NV_GPRS (HOST_CR + LONGBYTES)
53#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES)) 55#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
54#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n) 56#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
55#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES) 57#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
56#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */ 58#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
57#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */ 59/* LR in caller stack frame. */
60#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF)
58 61
59#define NEED_EMU 0x00000001 /* emulation -- save nv regs */ 62#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
60#define NEED_DEAR 0x00000002 /* save faulting DEAR */ 63#define NEED_DEAR 0x00000002 /* save faulting DEAR */
@@ -201,12 +204,128 @@
201 b kvmppc_resume_host 204 b kvmppc_resume_host
202.endm 205.endm
203 206
207#ifdef CONFIG_64BIT
208/* Exception types */
209#define EX_GEN 1
210#define EX_GDBELL 2
211#define EX_DBG 3
212#define EX_MC 4
213#define EX_CRIT 5
214#define EX_TLB 6
215
216/*
217 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
218 */
219.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
220 _GLOBAL(kvmppc_handler_\intno\()_\srr1)
221 mr r11, r4
222 /*
223 * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
224 */
225 PPC_LL r4, PACACURRENT(r13)
226 PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
227 stw r10, VCPU_CR(r4)
228 PPC_STL r11, VCPU_GPR(R4)(r4)
229 PPC_STL r5, VCPU_GPR(R5)(r4)
230 .if \type == EX_CRIT
231 PPC_LL r5, (\paca_ex + EX_R13)(r13)
232 .else
233 mfspr r5, \scratch
234 .endif
235 PPC_STL r6, VCPU_GPR(R6)(r4)
236 PPC_STL r8, VCPU_GPR(R8)(r4)
237 PPC_STL r9, VCPU_GPR(R9)(r4)
238 PPC_STL r5, VCPU_GPR(R13)(r4)
239 PPC_LL r6, (\paca_ex + \ex_r10)(r13)
240 PPC_LL r8, (\paca_ex + \ex_r11)(r13)
241 PPC_STL r3, VCPU_GPR(R3)(r4)
242 PPC_STL r7, VCPU_GPR(R7)(r4)
243 PPC_STL r12, VCPU_GPR(R12)(r4)
244 PPC_STL r6, VCPU_GPR(R10)(r4)
245 PPC_STL r8, VCPU_GPR(R11)(r4)
246 mfctr r5
247 PPC_STL r5, VCPU_CTR(r4)
248 mfspr r5, \srr0
249 mfspr r6, \srr1
250 kvm_handler_common \intno, \srr0, \flags
251.endm
252
253#define EX_PARAMS(type) \
254 EX_##type, \
255 SPRN_SPRG_##type##_SCRATCH, \
256 PACA_EX##type, \
257 EX_R10, \
258 EX_R11
259
260#define EX_PARAMS_TLB \
261 EX_TLB, \
262 SPRN_SPRG_GEN_SCRATCH, \
263 PACA_EXTLB, \
264 EX_TLB_R10, \
265 EX_TLB_R11
266
267kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
268 SPRN_CSRR0, SPRN_CSRR1, 0
269kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
270 SPRN_MCSRR0, SPRN_MCSRR1, 0
271kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
272 SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
273kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
274 SPRN_SRR0, SPRN_SRR1, NEED_ESR
275kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
276 SPRN_SRR0, SPRN_SRR1, 0
277kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
278 SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
279kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
280 SPRN_SRR0, SPRN_SRR1,NEED_ESR
281kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
282 SPRN_SRR0, SPRN_SRR1, 0
283kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
284 SPRN_SRR0, SPRN_SRR1, 0
285kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
286 SPRN_SRR0, SPRN_SRR1, 0
287kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
288 SPRN_SRR0, SPRN_SRR1, 0
289kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
290 SPRN_CSRR0, SPRN_CSRR1, 0
291/*
292 * Only bolted TLB miss exception handlers are supported for now
293 */
294kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
295 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
296kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
297 SPRN_SRR0, SPRN_SRR1, 0
298kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
299 SPRN_SRR0, SPRN_SRR1, 0
300kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
301 SPRN_SRR0, SPRN_SRR1, 0
302kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
303 SPRN_SRR0, SPRN_SRR1, 0
304kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
305 SPRN_SRR0, SPRN_SRR1, 0
306kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
307 SPRN_SRR0, SPRN_SRR1, 0
308kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
309 SPRN_CSRR0, SPRN_CSRR1, 0
310kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
311 SPRN_SRR0, SPRN_SRR1, NEED_EMU
312kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
313 SPRN_SRR0, SPRN_SRR1, 0
314kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
315 SPRN_GSRR0, SPRN_GSRR1, 0
316kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
317 SPRN_CSRR0, SPRN_CSRR1, 0
318kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
319 SPRN_DSRR0, SPRN_DSRR1, 0
320kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
321 SPRN_CSRR0, SPRN_CSRR1, 0
322#else
204/* 323/*
205 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h 324 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
206 */ 325 */
207.macro kvm_handler intno srr0, srr1, flags 326.macro kvm_handler intno srr0, srr1, flags
208_GLOBAL(kvmppc_handler_\intno\()_\srr1) 327_GLOBAL(kvmppc_handler_\intno\()_\srr1)
209 GET_VCPU(r11, r10) 328 PPC_LL r11, THREAD_KVM_VCPU(r10)
210 PPC_STL r3, VCPU_GPR(R3)(r11) 329 PPC_STL r3, VCPU_GPR(R3)(r11)
211 mfspr r3, SPRN_SPRG_RSCRATCH0 330 mfspr r3, SPRN_SPRG_RSCRATCH0
212 PPC_STL r4, VCPU_GPR(R4)(r11) 331 PPC_STL r4, VCPU_GPR(R4)(r11)
@@ -233,7 +352,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
233.macro kvm_lvl_handler intno scratch srr0, srr1, flags 352.macro kvm_lvl_handler intno scratch srr0, srr1, flags
234_GLOBAL(kvmppc_handler_\intno\()_\srr1) 353_GLOBAL(kvmppc_handler_\intno\()_\srr1)
235 mfspr r10, SPRN_SPRG_THREAD 354 mfspr r10, SPRN_SPRG_THREAD
236 GET_VCPU(r11, r10) 355 PPC_LL r11, THREAD_KVM_VCPU(r10)
237 PPC_STL r3, VCPU_GPR(R3)(r11) 356 PPC_STL r3, VCPU_GPR(R3)(r11)
238 mfspr r3, \scratch 357 mfspr r3, \scratch
239 PPC_STL r4, VCPU_GPR(R4)(r11) 358 PPC_STL r4, VCPU_GPR(R4)(r11)
@@ -295,7 +414,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
295 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 414 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
296kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ 415kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
297 SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0 416 SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
298 417#endif
299 418
300/* Registers: 419/* Registers:
301 * SPRG_SCRATCH0: guest r10 420 * SPRG_SCRATCH0: guest r10
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index d1622864549e..c70d37ed770a 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -129,9 +129,9 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
129 ulong value); 129 ulong value);
130int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu); 130int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
131int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu); 131int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
132int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb); 132int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
133int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb); 133int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
134int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb); 134int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
135int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500); 135int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
136void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); 136void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
137 137
@@ -154,7 +154,7 @@ get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
154 154
155static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) 155static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
156{ 156{
157 return tlbe->mas2 & 0xfffff000; 157 return tlbe->mas2 & MAS2_EPN;
158} 158}
159 159
160static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) 160static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e04b0ef55ce0..e78f353a836a 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -89,6 +89,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
89 int ra = get_ra(inst); 89 int ra = get_ra(inst);
90 int rb = get_rb(inst); 90 int rb = get_rb(inst);
91 int rt = get_rt(inst); 91 int rt = get_rt(inst);
92 gva_t ea;
92 93
93 switch (get_op(inst)) { 94 switch (get_op(inst)) {
94 case 31: 95 case 31:
@@ -113,15 +114,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
113 break; 114 break;
114 115
115 case XOP_TLBSX: 116 case XOP_TLBSX:
116 emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); 117 ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
118 emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
117 break; 119 break;
118 120
119 case XOP_TLBILX: 121 case XOP_TLBILX: {
120 emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb); 122 int type = rt & 0x3;
123 ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
124 emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
121 break; 125 break;
126 }
122 127
123 case XOP_TLBIVAX: 128 case XOP_TLBIVAX:
124 emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); 129 ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
130 emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
125 break; 131 break;
126 132
127 default: 133 default:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 6305ee692ef7..cf3f18012371 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -415,7 +415,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
415 struct tlbe_ref *ref) 415 struct tlbe_ref *ref)
416{ 416{
417 struct kvm_memory_slot *slot; 417 struct kvm_memory_slot *slot;
418 unsigned long pfn, hva; 418 unsigned long pfn = 0; /* silence GCC warning */
419 unsigned long hva;
419 int pfnmap = 0; 420 int pfnmap = 0;
420 int tsize = BOOK3E_PAGESZ_4K; 421 int tsize = BOOK3E_PAGESZ_4K;
421 422
@@ -688,14 +689,11 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
688 return EMULATE_DONE; 689 return EMULATE_DONE;
689} 690}
690 691
691int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) 692int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
692{ 693{
693 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 694 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
694 unsigned int ia; 695 unsigned int ia;
695 int esel, tlbsel; 696 int esel, tlbsel;
696 gva_t ea;
697
698 ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
699 697
700 ia = (ea >> 2) & 0x1; 698 ia = (ea >> 2) & 0x1;
701 699
@@ -722,7 +720,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
722} 720}
723 721
724static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, 722static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
725 int pid, int rt) 723 int pid, int type)
726{ 724{
727 struct kvm_book3e_206_tlb_entry *tlbe; 725 struct kvm_book3e_206_tlb_entry *tlbe;
728 int tid, esel; 726 int tid, esel;
@@ -731,7 +729,7 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
731 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) { 729 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
732 tlbe = get_entry(vcpu_e500, tlbsel, esel); 730 tlbe = get_entry(vcpu_e500, tlbsel, esel);
733 tid = get_tlb_tid(tlbe); 731 tid = get_tlb_tid(tlbe);
734 if (rt == 0 || tid == pid) { 732 if (type == 0 || tid == pid) {
735 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); 733 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
736 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); 734 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
737 } 735 }
@@ -739,14 +737,9 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
739} 737}
740 738
741static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, 739static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
742 int ra, int rb) 740 gva_t ea)
743{ 741{
744 int tlbsel, esel; 742 int tlbsel, esel;
745 gva_t ea;
746
747 ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
748 if (ra)
749 ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
750 743
751 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 744 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
752 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1); 745 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
@@ -758,16 +751,16 @@ static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
758 } 751 }
759} 752}
760 753
761int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb) 754int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
762{ 755{
763 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 756 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
764 int pid = get_cur_spid(vcpu); 757 int pid = get_cur_spid(vcpu);
765 758
766 if (rt == 0 || rt == 1) { 759 if (type == 0 || type == 1) {
767 tlbilx_all(vcpu_e500, 0, pid, rt); 760 tlbilx_all(vcpu_e500, 0, pid, type);
768 tlbilx_all(vcpu_e500, 1, pid, rt); 761 tlbilx_all(vcpu_e500, 1, pid, type);
769 } else if (rt == 3) { 762 } else if (type == 3) {
770 tlbilx_one(vcpu_e500, pid, ra, rb); 763 tlbilx_one(vcpu_e500, pid, ea);
771 } 764 }
772 765
773 return EMULATE_DONE; 766 return EMULATE_DONE;
@@ -792,16 +785,13 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
792 return EMULATE_DONE; 785 return EMULATE_DONE;
793} 786}
794 787
795int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) 788int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
796{ 789{
797 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 790 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
798 int as = !!get_cur_sas(vcpu); 791 int as = !!get_cur_sas(vcpu);
799 unsigned int pid = get_cur_spid(vcpu); 792 unsigned int pid = get_cur_spid(vcpu);
800 int esel, tlbsel; 793 int esel, tlbsel;
801 struct kvm_book3e_206_tlb_entry *gtlbe = NULL; 794 struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
802 gva_t ea;
803
804 ea = kvmppc_get_gpr(vcpu, rb);
805 795
806 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 796 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
807 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); 797 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@@ -881,6 +871,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
881 871
882 gtlbe->mas1 = vcpu->arch.shared->mas1; 872 gtlbe->mas1 = vcpu->arch.shared->mas1;
883 gtlbe->mas2 = vcpu->arch.shared->mas2; 873 gtlbe->mas2 = vcpu->arch.shared->mas2;
874 if (!(vcpu->arch.shared->msr & MSR_CM))
875 gtlbe->mas2 &= 0xffffffffUL;
884 gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; 876 gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
885 877
886 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, 878 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f9ab12aea829..70739a089560 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -314,6 +314,7 @@ int kvm_dev_ioctl_check_extension(long ext)
314 case KVM_CAP_PPC_IRQ_LEVEL: 314 case KVM_CAP_PPC_IRQ_LEVEL:
315 case KVM_CAP_ENABLE_CAP: 315 case KVM_CAP_ENABLE_CAP:
316 case KVM_CAP_ONE_REG: 316 case KVM_CAP_ONE_REG:
317 case KVM_CAP_IOEVENTFD:
317 r = 1; 318 r = 1;
318 break; 319 break;
319#ifndef CONFIG_KVM_BOOK3S_64_HV 320#ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -353,6 +354,12 @@ int kvm_dev_ioctl_check_extension(long ext)
353 r = 1; 354 r = 1;
354#else 355#else
355 r = 0; 356 r = 0;
357 break;
358#endif
359#ifdef CONFIG_KVM_BOOK3S_64_HV
360 case KVM_CAP_PPC_HTAB_FD:
361 r = 1;
362 break;
356#endif 363#endif
357 break; 364 break;
358 case KVM_CAP_NR_VCPUS: 365 case KVM_CAP_NR_VCPUS:
@@ -618,6 +625,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
618 vcpu->mmio_is_write = 0; 625 vcpu->mmio_is_write = 0;
619 vcpu->arch.mmio_sign_extend = 0; 626 vcpu->arch.mmio_sign_extend = 0;
620 627
628 if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
629 bytes, &run->mmio.data)) {
630 kvmppc_complete_mmio_load(vcpu, run);
631 vcpu->mmio_needed = 0;
632 return EMULATE_DONE;
633 }
634
621 return EMULATE_DO_MMIO; 635 return EMULATE_DO_MMIO;
622} 636}
623 637
@@ -627,8 +641,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
627{ 641{
628 int r; 642 int r;
629 643
630 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
631 vcpu->arch.mmio_sign_extend = 1; 644 vcpu->arch.mmio_sign_extend = 1;
645 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
632 646
633 return r; 647 return r;
634} 648}
@@ -666,6 +680,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
666 } 680 }
667 } 681 }
668 682
683 if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
684 bytes, &run->mmio.data)) {
685 kvmppc_complete_mmio_load(vcpu, run);
686 vcpu->mmio_needed = 0;
687 return EMULATE_DONE;
688 }
689
669 return EMULATE_DO_MMIO; 690 return EMULATE_DO_MMIO;
670} 691}
671 692
@@ -939,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
939 r = 0; 960 r = 0;
940 break; 961 break;
941 } 962 }
963
964 case KVM_PPC_GET_HTAB_FD: {
965 struct kvm *kvm = filp->private_data;
966 struct kvm_get_htab_fd ghf;
967
968 r = -EFAULT;
969 if (copy_from_user(&ghf, argp, sizeof(ghf)))
970 break;
971 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
972 break;
973 }
942#endif /* CONFIG_KVM_BOOK3S_64_HV */ 974#endif /* CONFIG_KVM_BOOK3S_64_HV */
943 975
944#ifdef CONFIG_PPC_BOOK3S_64 976#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 36c3704bfa7c..91ae127f4ac5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -901,10 +901,20 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
901#ifdef CONFIG_HAVE_KVM_EVENTFD 901#ifdef CONFIG_HAVE_KVM_EVENTFD
902 902
903void kvm_eventfd_init(struct kvm *kvm); 903void kvm_eventfd_init(struct kvm *kvm);
904int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
905
906#ifdef CONFIG_HAVE_KVM_IRQCHIP
904int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); 907int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
905void kvm_irqfd_release(struct kvm *kvm); 908void kvm_irqfd_release(struct kvm *kvm);
906void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); 909void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
907int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); 910#else
911static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
912{
913 return -EINVAL;
914}
915
916static inline void kvm_irqfd_release(struct kvm *kvm) {}
917#endif
908 918
909#else 919#else
910 920
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 494a84c37c3e..e6e5d4b13708 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_smmu_info {
634#endif 634#endif
635#define KVM_CAP_IRQFD_RESAMPLE 82 635#define KVM_CAP_IRQFD_RESAMPLE 82
636#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 636#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
637#define KVM_CAP_PPC_HTAB_FD 84
637 638
638#ifdef KVM_CAP_IRQ_ROUTING 639#ifdef KVM_CAP_IRQ_ROUTING
639 640
@@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping {
859#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) 860#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
860/* Available with KVM_CAP_RMA */ 861/* Available with KVM_CAP_RMA */
861#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) 862#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
863/* Available with KVM_CAP_PPC_HTAB_FD */
864#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd)
862 865
863/* 866/*
864 * ioctls for vcpu fds 867 * ioctls for vcpu fds
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9718e98d6d2a..d7424c8c138a 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
35 35
36#include "iodev.h" 36#include "iodev.h"
37 37
38#ifdef __KVM_HAVE_IOAPIC
38/* 39/*
39 * -------------------------------------------------------------------- 40 * --------------------------------------------------------------------
40 * irqfd: Allows an fd to be used to inject an interrupt to the guest 41 * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -425,17 +426,21 @@ fail:
425 kfree(irqfd); 426 kfree(irqfd);
426 return ret; 427 return ret;
427} 428}
429#endif
428 430
429void 431void
430kvm_eventfd_init(struct kvm *kvm) 432kvm_eventfd_init(struct kvm *kvm)
431{ 433{
434#ifdef __KVM_HAVE_IOAPIC
432 spin_lock_init(&kvm->irqfds.lock); 435 spin_lock_init(&kvm->irqfds.lock);
433 INIT_LIST_HEAD(&kvm->irqfds.items); 436 INIT_LIST_HEAD(&kvm->irqfds.items);
434 INIT_LIST_HEAD(&kvm->irqfds.resampler_list); 437 INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
435 mutex_init(&kvm->irqfds.resampler_lock); 438 mutex_init(&kvm->irqfds.resampler_lock);
439#endif
436 INIT_LIST_HEAD(&kvm->ioeventfds); 440 INIT_LIST_HEAD(&kvm->ioeventfds);
437} 441}
438 442
443#ifdef __KVM_HAVE_IOAPIC
439/* 444/*
440 * shutdown any irqfd's that match fd+gsi 445 * shutdown any irqfd's that match fd+gsi
441 */ 446 */
@@ -555,6 +560,7 @@ static void __exit irqfd_module_exit(void)
555 560
556module_init(irqfd_module_init); 561module_init(irqfd_module_init);
557module_exit(irqfd_module_exit); 562module_exit(irqfd_module_exit);
563#endif
558 564
559/* 565/*
560 * -------------------------------------------------------------------- 566 * --------------------------------------------------------------------