aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorGleb Natapov <gleb@redhat.com>2013-08-30 08:33:11 -0400
committerGleb Natapov <gleb@redhat.com>2013-08-30 08:33:11 -0400
commita9f6cf965e00dd3370229417675eb0127d580f96 (patch)
tree0fe5a9c57fdf6e8e614cdc02412876f153550be4 /arch/powerpc/kvm
parente5552fd252763c74ce6a6c27c7873939062b5038 (diff)
parentbf550fc93d9855872a95e69e4002256110d89858 (diff)
Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into queue
* 'kvm-ppc-next' of git://github.com/agraf/linux-2.6: KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate() KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX KVM: PPC: Book3S: Fix compile error in XICS emulation KVM: PPC: Book3S PR: return appropriate error when allocation fails arch: powerpc: kvm: add signed type cast for comparation powerpc/kvm: Copy the pvr value after memset KVM: PPC: Book3S PR: Load up SPRG3 register with guest value on guest entry kvm/ppc/booke: Don't call kvm_guest_enter twice kvm/ppc: Call trace_hardirqs_on before entry KVM: PPC: Book3S HV: Allow negative offsets to real-mode hcall handlers KVM: PPC: Book3S HV: Correct tlbie usage powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation. powerpc/kvm: Contiguous memory allocator based RMA allocation powerpc/kvm: Contiguous memory allocator based hash page table allocation KVM: PPC: Book3S: Ignore DABR register mm/cma: Move dma contiguous changes into a seperate config
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Makefile1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c150
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c40
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c38
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c246
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.c240
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.h27
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c139
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S14
-rw-r--r--arch/powerpc/kvm/book3s_pr.c40
-rw-r--r--arch/powerpc/kvm/book3s_xics.c1
-rw-r--r--arch/powerpc/kvm/booke.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c2
16 files changed, 614 insertions, 335 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..ffaef2cb101a 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
72 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" 72 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
73 depends on KVM_BOOK3S_64 73 depends on KVM_BOOK3S_64
74 select MMU_NOTIFIER 74 select MMU_NOTIFIER
75 select CMA
75 ---help--- 76 ---help---
76 Support running unmodified book3s_64 guest kernels in 77 Support running unmodified book3s_64 guest kernels in
77 virtual machines on POWER7 and PPC970 processors that have 78 virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 008cd856c5b5..6646c952c5e3 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
81 book3s_64_vio_hv.o \ 81 book3s_64_vio_hv.o \
82 book3s_hv_ras.o \ 82 book3s_hv_ras.o \
83 book3s_hv_builtin.o \ 83 book3s_hv_builtin.o \
84 book3s_hv_cma.o \
84 $(kvm-book3s_64-builtin-xics-objs-y) 85 $(kvm-book3s_64-builtin-xics-objs-y)
85 86
86kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ 87kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 739bfbadb85e..7e345e00661a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
182 hva_t ptegp; 182 hva_t ptegp;
183 u64 pteg[16]; 183 u64 pteg[16];
184 u64 avpn = 0; 184 u64 avpn = 0;
185 u64 v, r;
186 u64 v_val, v_mask;
187 u64 eaddr_mask;
185 int i; 188 int i;
186 u8 key = 0; 189 u8 pp, key = 0;
187 bool found = false; 190 bool found = false;
188 int second = 0; 191 bool second = false;
189 ulong mp_ea = vcpu->arch.magic_page_ea; 192 ulong mp_ea = vcpu->arch.magic_page_ea;
190 193
191 /* Magic page override */ 194 /* Magic page override */
@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
208 goto no_seg_found; 211 goto no_seg_found;
209 212
210 avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); 213 avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
214 v_val = avpn & HPTE_V_AVPN;
215
211 if (slbe->tb) 216 if (slbe->tb)
212 avpn |= SLB_VSID_B_1T; 217 v_val |= SLB_VSID_B_1T;
218 if (slbe->large)
219 v_val |= HPTE_V_LARGE;
220 v_val |= HPTE_V_VALID;
221
222 v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
223 HPTE_V_SECONDARY;
213 224
214do_second: 225do_second:
215 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); 226 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
@@ -227,91 +238,74 @@ do_second:
227 key = 4; 238 key = 4;
228 239
229 for (i=0; i<16; i+=2) { 240 for (i=0; i<16; i+=2) {
230 u64 v = pteg[i]; 241 /* Check all relevant fields of 1st dword */
231 u64 r = pteg[i+1]; 242 if ((pteg[i] & v_mask) == v_val) {
232
233 /* Valid check */
234 if (!(v & HPTE_V_VALID))
235 continue;
236 /* Hash check */
237 if ((v & HPTE_V_SECONDARY) != second)
238 continue;
239
240 /* AVPN compare */
241 if (HPTE_V_COMPARE(avpn, v)) {
242 u8 pp = (r & HPTE_R_PP) | key;
243 int eaddr_mask = 0xFFF;
244
245 gpte->eaddr = eaddr;
246 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
247 eaddr,
248 data);
249 if (slbe->large)
250 eaddr_mask = 0xFFFFFF;
251 gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
252 gpte->may_execute = ((r & HPTE_R_N) ? false : true);
253 gpte->may_read = false;
254 gpte->may_write = false;
255
256 switch (pp) {
257 case 0:
258 case 1:
259 case 2:
260 case 6:
261 gpte->may_write = true;
262 /* fall through */
263 case 3:
264 case 5:
265 case 7:
266 gpte->may_read = true;
267 break;
268 }
269
270 dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
271 "-> 0x%lx\n",
272 eaddr, avpn, gpte->vpage, gpte->raddr);
273 found = true; 243 found = true;
274 break; 244 break;
275 } 245 }
276 } 246 }
277 247
278 /* Update PTE R and C bits, so the guest's swapper knows we used the 248 if (!found) {
279 * page */ 249 if (second)
280 if (found) { 250 goto no_page_found;
281 u32 oldr = pteg[i+1]; 251 v_val |= HPTE_V_SECONDARY;
252 second = true;
253 goto do_second;
254 }
282 255
283 if (gpte->may_read) { 256 v = pteg[i];
284 /* Set the accessed flag */ 257 r = pteg[i+1];
285 pteg[i+1] |= HPTE_R_R; 258 pp = (r & HPTE_R_PP) | key;
286 } 259 eaddr_mask = 0xFFF;
287 if (gpte->may_write) { 260
288 /* Set the dirty flag */ 261 gpte->eaddr = eaddr;
289 pteg[i+1] |= HPTE_R_C; 262 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
290 } else { 263 if (slbe->large)
291 dprintk("KVM: Mapping read-only page!\n"); 264 eaddr_mask = 0xFFFFFF;
292 } 265 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
266 gpte->may_execute = ((r & HPTE_R_N) ? false : true);
267 gpte->may_read = false;
268 gpte->may_write = false;
269
270 switch (pp) {
271 case 0:
272 case 1:
273 case 2:
274 case 6:
275 gpte->may_write = true;
276 /* fall through */
277 case 3:
278 case 5:
279 case 7:
280 gpte->may_read = true;
281 break;
282 }
293 283
294 /* Write back into the PTEG */ 284 dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
295 if (pteg[i+1] != oldr) 285 "-> 0x%lx\n",
296 copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); 286 eaddr, avpn, gpte->vpage, gpte->raddr);
297 287
298 if (!gpte->may_read) 288 /* Update PTE R and C bits, so the guest's swapper knows we used the
299 return -EPERM; 289 * page */
300 return 0; 290 if (gpte->may_read) {
301 } else { 291 /* Set the accessed flag */
302 dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " 292 r |= HPTE_R_R;
303 "ptegp=0x%lx)\n", 293 }
304 eaddr, to_book3s(vcpu)->sdr1, ptegp); 294 if (data && gpte->may_write) {
305 for (i = 0; i < 16; i += 2) 295 /* Set the dirty flag -- XXX even if not writing */
306 dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n", 296 r |= HPTE_R_C;
307 i, pteg[i], pteg[i+1], avpn); 297 }
308 298
309 if (!second) { 299 /* Write back into the PTEG */
310 second = HPTE_V_SECONDARY; 300 if (pteg[i+1] != r) {
311 goto do_second; 301 pteg[i+1] = r;
312 } 302 copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
313 } 303 }
314 304
305 if (!gpte->may_read)
306 return -EPERM;
307 return 0;
308
315no_page_found: 309no_page_found:
316 return -ENOENT; 310 return -ENOENT;
317 311
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f7c9e8ae06ee..043eec8461e7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@
37#include <asm/ppc-opcode.h> 37#include <asm/ppc-opcode.h>
38#include <asm/cputable.h> 38#include <asm/cputable.h>
39 39
40#include "book3s_hv_cma.h"
41
40/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 42/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
41#define MAX_LPID_970 63 43#define MAX_LPID_970 63
42 44
@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
52{ 54{
53 unsigned long hpt; 55 unsigned long hpt;
54 struct revmap_entry *rev; 56 struct revmap_entry *rev;
55 struct kvmppc_linear_info *li; 57 struct page *page = NULL;
56 long order = kvm_hpt_order; 58 long order = KVM_DEFAULT_HPT_ORDER;
57 59
58 if (htab_orderp) { 60 if (htab_orderp) {
59 order = *htab_orderp; 61 order = *htab_orderp;
@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
61 order = PPC_MIN_HPT_ORDER; 63 order = PPC_MIN_HPT_ORDER;
62 } 64 }
63 65
66 kvm->arch.hpt_cma_alloc = 0;
64 /* 67 /*
65 * If the user wants a different size from default,
66 * try first to allocate it from the kernel page allocator. 68 * try first to allocate it from the kernel page allocator.
69 * We keep the CMA reserved for failed allocation.
67 */ 70 */
68 hpt = 0; 71 hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
69 if (order != kvm_hpt_order) { 72 __GFP_NOWARN, order - PAGE_SHIFT);
70 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
71 __GFP_NOWARN, order - PAGE_SHIFT);
72 if (!hpt)
73 --order;
74 }
75 73
76 /* Next try to allocate from the preallocated pool */ 74 /* Next try to allocate from the preallocated pool */
77 if (!hpt) { 75 if (!hpt) {
78 li = kvm_alloc_hpt(); 76 VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
79 if (li) { 77 page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
80 hpt = (ulong)li->base_virt; 78 if (page) {
81 kvm->arch.hpt_li = li; 79 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
82 order = kvm_hpt_order; 80 kvm->arch.hpt_cma_alloc = 1;
83 } 81 } else
82 --order;
84 } 83 }
85 84
86 /* Lastly try successively smaller sizes from the page allocator */ 85 /* Lastly try successively smaller sizes from the page allocator */
@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
118 return 0; 117 return 0;
119 118
120 out_freehpt: 119 out_freehpt:
121 if (kvm->arch.hpt_li) 120 if (kvm->arch.hpt_cma_alloc)
122 kvm_release_hpt(kvm->arch.hpt_li); 121 kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
123 else 122 else
124 free_pages(hpt, order - PAGE_SHIFT); 123 free_pages(hpt, order - PAGE_SHIFT);
125 return -ENOMEM; 124 return -ENOMEM;
@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
165{ 164{
166 kvmppc_free_lpid(kvm->arch.lpid); 165 kvmppc_free_lpid(kvm->arch.lpid);
167 vfree(kvm->arch.revmap); 166 vfree(kvm->arch.revmap);
168 if (kvm->arch.hpt_li) 167 if (kvm->arch.hpt_cma_alloc)
169 kvm_release_hpt(kvm->arch.hpt_li); 168 kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
169 1 << (kvm->arch.hpt_order - PAGE_SHIFT));
170 else 170 else
171 free_pages(kvm->arch.hpt_virt, 171 free_pages(kvm->arch.hpt_virt,
172 kvm->arch.hpt_order - PAGE_SHIFT); 172 kvm->arch.hpt_order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 1f6344c4408d..360ce68c9809 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
458 case SPRN_PMC4_GEKKO: 458 case SPRN_PMC4_GEKKO:
459 case SPRN_WPAR_GEKKO: 459 case SPRN_WPAR_GEKKO:
460 case SPRN_MSSSR0: 460 case SPRN_MSSSR0:
461 case SPRN_DABR:
461 break; 462 break;
462unprivileged: 463unprivileged:
463 default: 464 default:
@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
555 case SPRN_PMC4_GEKKO: 556 case SPRN_PMC4_GEKKO:
556 case SPRN_WPAR_GEKKO: 557 case SPRN_WPAR_GEKKO:
557 case SPRN_MSSSR0: 558 case SPRN_MSSSR0:
559 case SPRN_DABR:
558 *spr_val = 0; 560 *spr_val = 0;
559 break; 561 break;
560 default: 562 default:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 89eb4c7c527e..b0ee3bc9ca76 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
680} 680}
681 681
682int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 682int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
683 struct kvm_sregs *sregs) 683 struct kvm_sregs *sregs)
684{ 684{
685 int i; 685 int i;
686 686
687 sregs->pvr = vcpu->arch.pvr;
688
689 memset(sregs, 0, sizeof(struct kvm_sregs)); 687 memset(sregs, 0, sizeof(struct kvm_sregs));
688 sregs->pvr = vcpu->arch.pvr;
690 for (i = 0; i < vcpu->arch.slb_max; i++) { 689 for (i = 0; i < vcpu->arch.slb_max; i++) {
691 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; 690 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
692 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; 691 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
696} 695}
697 696
698int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 697int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
699 struct kvm_sregs *sregs) 698 struct kvm_sregs *sregs)
700{ 699{
701 int i, j; 700 int i, j;
702 701
@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
1511 1510
1512static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1511static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1513{ 1512{
1514 struct kvmppc_linear_info *ri = vma->vm_file->private_data;
1515 struct page *page; 1513 struct page *page;
1514 struct kvm_rma_info *ri = vma->vm_file->private_data;
1516 1515
1517 if (vmf->pgoff >= ri->npages) 1516 if (vmf->pgoff >= kvm_rma_pages)
1518 return VM_FAULT_SIGBUS; 1517 return VM_FAULT_SIGBUS;
1519 1518
1520 page = pfn_to_page(ri->base_pfn + vmf->pgoff); 1519 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1536 1535
1537static int kvm_rma_release(struct inode *inode, struct file *filp) 1536static int kvm_rma_release(struct inode *inode, struct file *filp)
1538{ 1537{
1539 struct kvmppc_linear_info *ri = filp->private_data; 1538 struct kvm_rma_info *ri = filp->private_data;
1540 1539
1541 kvm_release_rma(ri); 1540 kvm_release_rma(ri);
1542 return 0; 1541 return 0;
@@ -1549,8 +1548,17 @@ static const struct file_operations kvm_rma_fops = {
1549 1548
1550long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1549long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1551{ 1550{
1552 struct kvmppc_linear_info *ri;
1553 long fd; 1551 long fd;
1552 struct kvm_rma_info *ri;
1553 /*
1554 * Only do this on PPC970 in HV mode
1555 */
1556 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
1557 !cpu_has_feature(CPU_FTR_ARCH_201))
1558 return -EINVAL;
1559
1560 if (!kvm_rma_pages)
1561 return -EINVAL;
1554 1562
1555 ri = kvm_alloc_rma(); 1563 ri = kvm_alloc_rma();
1556 if (!ri) 1564 if (!ri)
@@ -1560,7 +1568,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1560 if (fd < 0) 1568 if (fd < 0)
1561 kvm_release_rma(ri); 1569 kvm_release_rma(ri);
1562 1570
1563 ret->rma_size = ri->npages << PAGE_SHIFT; 1571 ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
1564 return fd; 1572 return fd;
1565} 1573}
1566 1574
@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1725{ 1733{
1726 int err = 0; 1734 int err = 0;
1727 struct kvm *kvm = vcpu->kvm; 1735 struct kvm *kvm = vcpu->kvm;
1728 struct kvmppc_linear_info *ri = NULL; 1736 struct kvm_rma_info *ri = NULL;
1729 unsigned long hva; 1737 unsigned long hva;
1730 struct kvm_memory_slot *memslot; 1738 struct kvm_memory_slot *memslot;
1731 struct vm_area_struct *vma; 1739 struct vm_area_struct *vma;
@@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1803 1811
1804 } else { 1812 } else {
1805 /* Set up to use an RMO region */ 1813 /* Set up to use an RMO region */
1806 rma_size = ri->npages; 1814 rma_size = kvm_rma_pages;
1807 if (rma_size > memslot->npages) 1815 if (rma_size > memslot->npages)
1808 rma_size = memslot->npages; 1816 rma_size = memslot->npages;
1809 rma_size <<= PAGE_SHIFT; 1817 rma_size <<= PAGE_SHIFT;
1810 rmls = lpcr_rmls(rma_size); 1818 rmls = lpcr_rmls(rma_size);
1811 err = -EINVAL; 1819 err = -EINVAL;
1812 if (rmls < 0) { 1820 if ((long)rmls < 0) {
1813 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); 1821 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
1814 goto out_srcu; 1822 goto out_srcu;
1815 } 1823 }
@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1831 /* POWER7 */ 1839 /* POWER7 */
1832 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); 1840 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
1833 lpcr |= rmls << LPCR_RMLS_SH; 1841 lpcr |= rmls << LPCR_RMLS_SH;
1834 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; 1842 kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
1835 } 1843 }
1836 kvm->arch.lpcr = lpcr; 1844 kvm->arch.lpcr = lpcr;
1837 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1845 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
1838 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1846 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1839 1847
1840 /* Initialize phys addrs of pages in RMO */ 1848 /* Initialize phys addrs of pages in RMO */
1841 npages = ri->npages; 1849 npages = kvm_rma_pages;
1842 porder = __ilog2(npages); 1850 porder = __ilog2(npages);
1843 physp = memslot->arch.slot_phys; 1851 physp = memslot->arch.slot_phys;
1844 if (physp) { 1852 if (physp) {
@@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1874 /* Allocate the guest's logical partition ID */ 1882 /* Allocate the guest's logical partition ID */
1875 1883
1876 lpid = kvmppc_alloc_lpid(); 1884 lpid = kvmppc_alloc_lpid();
1877 if (lpid < 0) 1885 if ((long)lpid < 0)
1878 return -ENOMEM; 1886 return -ENOMEM;
1879 kvm->arch.lpid = lpid; 1887 kvm->arch.lpid = lpid;
1880 1888
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5de100..8cd0daebb82d 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -13,33 +13,34 @@
13#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/bootmem.h> 14#include <linux/bootmem.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/memblock.h>
17#include <linux/sizes.h>
16 18
17#include <asm/cputable.h> 19#include <asm/cputable.h>
18#include <asm/kvm_ppc.h> 20#include <asm/kvm_ppc.h>
19#include <asm/kvm_book3s.h> 21#include <asm/kvm_book3s.h>
20 22
21#define KVM_LINEAR_RMA 0 23#include "book3s_hv_cma.h"
22#define KVM_LINEAR_HPT 1 24/*
23 25 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
24static void __init kvm_linear_init_one(ulong size, int count, int type); 26 * should be power of 2.
25static struct kvmppc_linear_info *kvm_alloc_linear(int type); 27 */
26static void kvm_release_linear(struct kvmppc_linear_info *ri); 28#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */
27 29/*
28int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; 30 * By default we reserve 5% of memory for hash pagetable allocation.
29EXPORT_SYMBOL_GPL(kvm_hpt_order); 31 */
30 32static unsigned long kvm_cma_resv_ratio = 5;
31/*************** RMA *************/
32
33/* 33/*
34 * This maintains a list of RMAs (real mode areas) for KVM guests to use. 34 * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
35 * Each RMA has to be physically contiguous and of a size that the 35 * Each RMA has to be physically contiguous and of a size that the
36 * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, 36 * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
37 * and other larger sizes. Since we are unlikely to be allocate that 37 * and other larger sizes. Since we are unlikely to be allocate that
38 * much physically contiguous memory after the system is up and running, 38 * much physically contiguous memory after the system is up and running,
39 * we preallocate a set of RMAs in early boot for KVM to use. 39 * we preallocate a set of RMAs in early boot using CMA.
40 * should be power of 2.
40 */ 41 */
41static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ 42unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
42static unsigned long kvm_rma_count; 43EXPORT_SYMBOL_GPL(kvm_rma_pages);
43 44
44/* Work out RMLS (real mode limit selector) field value for a given RMA size. 45/* Work out RMLS (real mode limit selector) field value for a given RMA size.
45 Assumes POWER7 or PPC970. */ 46 Assumes POWER7 or PPC970. */
@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size)
69 70
70static int __init early_parse_rma_size(char *p) 71static int __init early_parse_rma_size(char *p)
71{ 72{
72 if (!p) 73 unsigned long kvm_rma_size;
73 return 1;
74 74
75 pr_debug("%s(%s)\n", __func__, p);
76 if (!p)
77 return -EINVAL;
75 kvm_rma_size = memparse(p, &p); 78 kvm_rma_size = memparse(p, &p);
76 79 /*
80 * Check that the requested size is one supported in hardware
81 */
82 if (lpcr_rmls(kvm_rma_size) < 0) {
83 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
84 return -EINVAL;
85 }
86 kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
77 return 0; 87 return 0;
78} 88}
79early_param("kvm_rma_size", early_parse_rma_size); 89early_param("kvm_rma_size", early_parse_rma_size);
80 90
81static int __init early_parse_rma_count(char *p) 91struct kvm_rma_info *kvm_alloc_rma()
82{ 92{
83 if (!p) 93 struct page *page;
84 return 1; 94 struct kvm_rma_info *ri;
85 95
86 kvm_rma_count = simple_strtoul(p, NULL, 0); 96 ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
87 97 if (!ri)
88 return 0; 98 return NULL;
89} 99 page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
90early_param("kvm_rma_count", early_parse_rma_count); 100 if (!page)
91 101 goto err_out;
92struct kvmppc_linear_info *kvm_alloc_rma(void) 102 atomic_set(&ri->use_count, 1);
93{ 103 ri->base_pfn = page_to_pfn(page);
94 return kvm_alloc_linear(KVM_LINEAR_RMA); 104 return ri;
105err_out:
106 kfree(ri);
107 return NULL;
95} 108}
96EXPORT_SYMBOL_GPL(kvm_alloc_rma); 109EXPORT_SYMBOL_GPL(kvm_alloc_rma);
97 110
98void kvm_release_rma(struct kvmppc_linear_info *ri) 111void kvm_release_rma(struct kvm_rma_info *ri)
99{ 112{
100 kvm_release_linear(ri); 113 if (atomic_dec_and_test(&ri->use_count)) {
114 kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
115 kfree(ri);
116 }
101} 117}
102EXPORT_SYMBOL_GPL(kvm_release_rma); 118EXPORT_SYMBOL_GPL(kvm_release_rma);
103 119
104/*************** HPT *************/ 120static int __init early_parse_kvm_cma_resv(char *p)
105
106/*
107 * This maintains a list of big linear HPT tables that contain the GVA->HPA
108 * memory mappings. If we don't reserve those early on, we might not be able
109 * to get a big (usually 16MB) linear memory region from the kernel anymore.
110 */
111
112static unsigned long kvm_hpt_count;
113
114static int __init early_parse_hpt_count(char *p)
115{ 121{
122 pr_debug("%s(%s)\n", __func__, p);
116 if (!p) 123 if (!p)
117 return 1; 124 return -EINVAL;
118 125 return kstrtoul(p, 0, &kvm_cma_resv_ratio);
119 kvm_hpt_count = simple_strtoul(p, NULL, 0);
120
121 return 0;
122} 126}
123early_param("kvm_hpt_count", early_parse_hpt_count); 127early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
124 128
125struct kvmppc_linear_info *kvm_alloc_hpt(void) 129struct page *kvm_alloc_hpt(unsigned long nr_pages)
126{ 130{
127 return kvm_alloc_linear(KVM_LINEAR_HPT); 131 unsigned long align_pages = HPT_ALIGN_PAGES;
132
133 /* Old CPUs require HPT aligned on a multiple of its size */
134 if (!cpu_has_feature(CPU_FTR_ARCH_206))
135 align_pages = nr_pages;
136 return kvm_alloc_cma(nr_pages, align_pages);
128} 137}
129EXPORT_SYMBOL_GPL(kvm_alloc_hpt); 138EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
130 139
131void kvm_release_hpt(struct kvmppc_linear_info *li) 140void kvm_release_hpt(struct page *page, unsigned long nr_pages)
132{ 141{
133 kvm_release_linear(li); 142 kvm_release_cma(page, nr_pages);
134} 143}
135EXPORT_SYMBOL_GPL(kvm_release_hpt); 144EXPORT_SYMBOL_GPL(kvm_release_hpt);
136 145
137/*************** generic *************/ 146/**
138 147 * kvm_cma_reserve() - reserve area for kvm hash pagetable
139static LIST_HEAD(free_linears); 148 *
140static DEFINE_SPINLOCK(linear_lock); 149 * This function reserves memory from early allocator. It should be
141 150 * called by arch specific code once the early allocator (memblock or bootmem)
142static void __init kvm_linear_init_one(ulong size, int count, int type) 151 * has been activated and all other subsystems have already allocated/reserved
143{ 152 * memory.
144 unsigned long i;
145 unsigned long j, npages;
146 void *linear;
147 struct page *pg;
148 const char *typestr;
149 struct kvmppc_linear_info *linear_info;
150
151 if (!count)
152 return;
153
154 typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
155
156 npages = size >> PAGE_SHIFT;
157 linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
158 for (i = 0; i < count; ++i) {
159 linear = alloc_bootmem_align(size, size);
160 pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
161 size >> 20);
162 linear_info[i].base_virt = linear;
163 linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
164 linear_info[i].npages = npages;
165 linear_info[i].type = type;
166 list_add_tail(&linear_info[i].list, &free_linears);
167 atomic_set(&linear_info[i].use_count, 0);
168
169 pg = pfn_to_page(linear_info[i].base_pfn);
170 for (j = 0; j < npages; ++j) {
171 atomic_inc(&pg->_count);
172 ++pg;
173 }
174 }
175}
176
177static struct kvmppc_linear_info *kvm_alloc_linear(int type)
178{
179 struct kvmppc_linear_info *ri, *ret;
180
181 ret = NULL;
182 spin_lock(&linear_lock);
183 list_for_each_entry(ri, &free_linears, list) {
184 if (ri->type != type)
185 continue;
186
187 list_del(&ri->list);
188 atomic_inc(&ri->use_count);
189 memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
190 ret = ri;
191 break;
192 }
193 spin_unlock(&linear_lock);
194 return ret;
195}
196
197static void kvm_release_linear(struct kvmppc_linear_info *ri)
198{
199 if (atomic_dec_and_test(&ri->use_count)) {
200 spin_lock(&linear_lock);
201 list_add_tail(&ri->list, &free_linears);
202 spin_unlock(&linear_lock);
203
204 }
205}
206
207/*
208 * Called at boot time while the bootmem allocator is active,
209 * to allocate contiguous physical memory for the hash page
210 * tables for guests.
211 */ 153 */
212void __init kvm_linear_init(void) 154void __init kvm_cma_reserve(void)
213{ 155{
214 /* HPT */ 156 unsigned long align_size;
215 kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); 157 struct memblock_region *reg;
216 158 phys_addr_t selected_size = 0;
217 /* RMA */ 159 /*
218 /* Only do this on PPC970 in HV mode */ 160 * We cannot use memblock_phys_mem_size() here, because
219 if (!cpu_has_feature(CPU_FTR_HVMODE) || 161 * memblock_analyze() has not been called yet.
220 !cpu_has_feature(CPU_FTR_ARCH_201)) 162 */
221 return; 163 for_each_memblock(memory, reg)
222 164 selected_size += memblock_region_memory_end_pfn(reg) -
223 if (!kvm_rma_size || !kvm_rma_count) 165 memblock_region_memory_base_pfn(reg);
224 return; 166
225 167 selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
226 /* Check that the requested size is one supported in hardware */ 168 if (selected_size) {
227 if (lpcr_rmls(kvm_rma_size) < 0) { 169 pr_debug("%s: reserving %ld MiB for global area\n", __func__,
228 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); 170 (unsigned long)selected_size / SZ_1M);
229 return; 171 /*
172 * Old CPUs require HPT aligned on a multiple of its size. So for them
173 * make the alignment as max size we could request.
174 */
175 if (!cpu_has_feature(CPU_FTR_ARCH_206))
176 align_size = __rounddown_pow_of_two(selected_size);
177 else
178 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
179
180 align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
181 kvm_cma_declare_contiguous(selected_size, align_size);
230 } 182 }
231
232 kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
233} 183}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 000000000000..d9d3d8553d51
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,240 @@
1/*
2 * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
3 * for DMA mapping framework
4 *
5 * Copyright IBM Corporation, 2013
6 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License or (at your optional) any later version of the license.
12 *
13 */
14#define pr_fmt(fmt) "kvm_cma: " fmt
15
16#ifdef CONFIG_CMA_DEBUG
17#ifndef DEBUG
18# define DEBUG
19#endif
20#endif
21
22#include <linux/memblock.h>
23#include <linux/mutex.h>
24#include <linux/sizes.h>
25#include <linux/slab.h>
26
27#include "book3s_hv_cma.h"
28
29struct kvm_cma {
30 unsigned long base_pfn;
31 unsigned long count;
32 unsigned long *bitmap;
33};
34
35static DEFINE_MUTEX(kvm_cma_mutex);
36static struct kvm_cma kvm_cma_area;
37
38/**
39 * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
40 * for kvm hash pagetable
41 * @size: Size of the reserved memory.
42 * @alignment: Alignment for the contiguous memory area
43 *
44 * This function reserves memory for kvm cma area. It should be
45 * called by arch code when early allocator (memblock or bootmem)
46 * is still activate.
47 */
48long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
49{
50 long base_pfn;
51 phys_addr_t addr;
52 struct kvm_cma *cma = &kvm_cma_area;
53
54 pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
55
56 if (!size)
57 return -EINVAL;
58 /*
59 * Sanitise input arguments.
60 * We should be pageblock aligned for CMA.
61 */
62 alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
63 size = ALIGN(size, alignment);
64 /*
65 * Reserve memory
66 * Use __memblock_alloc_base() since
67 * memblock_alloc_base() panic()s.
68 */
69 addr = __memblock_alloc_base(size, alignment, 0);
70 if (!addr) {
71 base_pfn = -ENOMEM;
72 goto err;
73 } else
74 base_pfn = PFN_DOWN(addr);
75
76 /*
77 * Each reserved area must be initialised later, when more kernel
78 * subsystems (like slab allocator) are available.
79 */
80 cma->base_pfn = base_pfn;
81 cma->count = size >> PAGE_SHIFT;
82 pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
83 return 0;
84err:
85 pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
86 return base_pfn;
87}
88
89/**
90 * kvm_alloc_cma() - allocate pages from contiguous area
91 * @nr_pages: Requested number of pages.
92 * @align_pages: Requested alignment in number of pages
93 *
94 * This function allocates memory buffer for hash pagetable.
95 */
96struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
97{
98 int ret;
99 struct page *page = NULL;
100 struct kvm_cma *cma = &kvm_cma_area;
101 unsigned long chunk_count, nr_chunk;
102 unsigned long mask, pfn, pageno, start = 0;
103
104
105 if (!cma || !cma->count)
106 return NULL;
107
108 pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
109 (void *)cma, nr_pages, align_pages);
110
111 if (!nr_pages)
112 return NULL;
113 /*
114 * align mask with chunk size. The bit tracks pages in chunk size
115 */
116 VM_BUG_ON(!is_power_of_2(align_pages));
117 mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
118 BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
119
120 chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
121 nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
122
123 mutex_lock(&kvm_cma_mutex);
124 for (;;) {
125 pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
126 start, nr_chunk, mask);
127 if (pageno >= chunk_count)
128 break;
129
130 pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
131 ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
132 if (ret == 0) {
133 bitmap_set(cma->bitmap, pageno, nr_chunk);
134 page = pfn_to_page(pfn);
135 memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
136 break;
137 } else if (ret != -EBUSY) {
138 break;
139 }
140 pr_debug("%s(): memory range at %p is busy, retrying\n",
141 __func__, pfn_to_page(pfn));
142 /* try again with a bit different memory target */
143 start = pageno + mask + 1;
144 }
145 mutex_unlock(&kvm_cma_mutex);
146 pr_debug("%s(): returned %p\n", __func__, page);
147 return page;
148}
149
150/**
151 * kvm_release_cma() - release allocated pages for hash pagetable
152 * @pages: Allocated pages.
153 * @nr_pages: Number of allocated pages.
154 *
155 * This function releases memory allocated by kvm_alloc_cma().
156 * It returns false when provided pages do not belong to contiguous area and
157 * true otherwise.
158 */
159bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
160{
161 unsigned long pfn;
162 unsigned long nr_chunk;
163 struct kvm_cma *cma = &kvm_cma_area;
164
165 if (!cma || !pages)
166 return false;
167
168 pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
169
170 pfn = page_to_pfn(pages);
171
172 if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
173 return false;
174
175 VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
176 nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
177
178 mutex_lock(&kvm_cma_mutex);
179 bitmap_clear(cma->bitmap,
180 (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
181 nr_chunk);
182 free_contig_range(pfn, nr_pages);
183 mutex_unlock(&kvm_cma_mutex);
184
185 return true;
186}
187
188static int __init kvm_cma_activate_area(unsigned long base_pfn,
189 unsigned long count)
190{
191 unsigned long pfn = base_pfn;
192 unsigned i = count >> pageblock_order;
193 struct zone *zone;
194
195 WARN_ON_ONCE(!pfn_valid(pfn));
196 zone = page_zone(pfn_to_page(pfn));
197 do {
198 unsigned j;
199 base_pfn = pfn;
200 for (j = pageblock_nr_pages; j; --j, pfn++) {
201 WARN_ON_ONCE(!pfn_valid(pfn));
202 /*
203 * alloc_contig_range requires the pfn range
204 * specified to be in the same zone. Make this
205 * simple by forcing the entire CMA resv range
206 * to be in the same zone.
207 */
208 if (page_zone(pfn_to_page(pfn)) != zone)
209 return -EINVAL;
210 }
211 init_cma_reserved_pageblock(pfn_to_page(base_pfn));
212 } while (--i);
213 return 0;
214}
215
216static int __init kvm_cma_init_reserved_areas(void)
217{
218 int bitmap_size, ret;
219 unsigned long chunk_count;
220 struct kvm_cma *cma = &kvm_cma_area;
221
222 pr_debug("%s()\n", __func__);
223 if (!cma->count)
224 return 0;
225 chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
226 bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
227 cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
228 if (!cma->bitmap)
229 return -ENOMEM;
230
231 ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
232 if (ret)
233 goto error;
234 return 0;
235
236error:
237 kfree(cma->bitmap);
238 return ret;
239}
240core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 000000000000..655144f75fa5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,27 @@
1/*
2 * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
3 * for DMA mapping framework
4 *
5 * Copyright IBM Corporation, 2013
6 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License or (at your optional) any later version of the license.
12 *
13 */
14
15#ifndef __POWERPC_KVM_CMA_ALLOC_H__
16#define __POWERPC_KVM_CMA_ALLOC_H__
17/*
18 * Both RMA and Hash page allocation will be multiple of 256K.
19 */
20#define KVM_CMA_CHUNK_ORDER 18
21
22extern struct page *kvm_alloc_cma(unsigned long nr_pages,
23 unsigned long align_pages);
24extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
25extern long kvm_cma_declare_contiguous(phys_addr_t size,
26 phys_addr_t alignment) __init;
27#endif
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fc25689a9f35..45e30d6e462b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock)
383 return old == 0; 383 return old == 0;
384} 384}
385 385
386/*
387 * tlbie/tlbiel is a bit different on the PPC970 compared to later
388 * processors such as POWER7; the large page bit is in the instruction
389 * not RB, and the top 16 bits and the bottom 12 bits of the VA
390 * in RB must be 0.
391 */
392static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
393 long npages, int global, bool need_sync)
394{
395 long i;
396
397 if (global) {
398 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
399 cpu_relax();
400 if (need_sync)
401 asm volatile("ptesync" : : : "memory");
402 for (i = 0; i < npages; ++i) {
403 unsigned long rb = rbvalues[i];
404
405 if (rb & 1) /* large page */
406 asm volatile("tlbie %0,1" : :
407 "r" (rb & 0x0000fffffffff000ul));
408 else
409 asm volatile("tlbie %0,0" : :
410 "r" (rb & 0x0000fffffffff000ul));
411 }
412 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
413 kvm->arch.tlbie_lock = 0;
414 } else {
415 if (need_sync)
416 asm volatile("ptesync" : : : "memory");
417 for (i = 0; i < npages; ++i) {
418 unsigned long rb = rbvalues[i];
419
420 if (rb & 1) /* large page */
421 asm volatile("tlbiel %0,1" : :
422 "r" (rb & 0x0000fffffffff000ul));
423 else
424 asm volatile("tlbiel %0,0" : :
425 "r" (rb & 0x0000fffffffff000ul));
426 }
427 asm volatile("ptesync" : : : "memory");
428 }
429}
430
431static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
432 long npages, int global, bool need_sync)
433{
434 long i;
435
436 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
437 /* PPC970 tlbie instruction is a bit different */
438 do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
439 return;
440 }
441 if (global) {
442 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
443 cpu_relax();
444 if (need_sync)
445 asm volatile("ptesync" : : : "memory");
446 for (i = 0; i < npages; ++i)
447 asm volatile(PPC_TLBIE(%1,%0) : :
448 "r" (rbvalues[i]), "r" (kvm->arch.lpid));
449 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
450 kvm->arch.tlbie_lock = 0;
451 } else {
452 if (need_sync)
453 asm volatile("ptesync" : : : "memory");
454 for (i = 0; i < npages; ++i)
455 asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
456 asm volatile("ptesync" : : : "memory");
457 }
458}
459
386long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 460long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
387 unsigned long pte_index, unsigned long avpn, 461 unsigned long pte_index, unsigned long avpn,
388 unsigned long *hpret) 462 unsigned long *hpret)
@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
408 if (v & HPTE_V_VALID) { 482 if (v & HPTE_V_VALID) {
409 hpte[0] &= ~HPTE_V_VALID; 483 hpte[0] &= ~HPTE_V_VALID;
410 rb = compute_tlbie_rb(v, hpte[1], pte_index); 484 rb = compute_tlbie_rb(v, hpte[1], pte_index);
411 if (global_invalidates(kvm, flags)) { 485 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
412 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
413 cpu_relax();
414 asm volatile("ptesync" : : : "memory");
415 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
416 : : "r" (rb), "r" (kvm->arch.lpid));
417 asm volatile("ptesync" : : : "memory");
418 kvm->arch.tlbie_lock = 0;
419 } else {
420 asm volatile("ptesync" : : : "memory");
421 asm volatile("tlbiel %0" : : "r" (rb));
422 asm volatile("ptesync" : : : "memory");
423 }
424 /* Read PTE low word after tlbie to get final R/C values */ 486 /* Read PTE low word after tlbie to get final R/C values */
425 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); 487 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
426 } 488 }
@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
448 unsigned long *hp, *hptes[4], tlbrb[4]; 510 unsigned long *hp, *hptes[4], tlbrb[4];
449 long int i, j, k, n, found, indexes[4]; 511 long int i, j, k, n, found, indexes[4];
450 unsigned long flags, req, pte_index, rcbits; 512 unsigned long flags, req, pte_index, rcbits;
451 long int local = 0; 513 int global;
452 long int ret = H_SUCCESS; 514 long int ret = H_SUCCESS;
453 struct revmap_entry *rev, *revs[4]; 515 struct revmap_entry *rev, *revs[4];
454 516
455 if (atomic_read(&kvm->online_vcpus) == 1) 517 global = global_invalidates(kvm, 0);
456 local = 1;
457 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 518 for (i = 0; i < 4 && ret == H_SUCCESS; ) {
458 n = 0; 519 n = 0;
459 for (; i < 4; ++i) { 520 for (; i < 4; ++i) {
@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
529 break; 590 break;
530 591
531 /* Now that we've collected a batch, do the tlbies */ 592 /* Now that we've collected a batch, do the tlbies */
532 if (!local) { 593 do_tlbies(kvm, tlbrb, n, global, true);
533 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
534 cpu_relax();
535 asm volatile("ptesync" : : : "memory");
536 for (k = 0; k < n; ++k)
537 asm volatile(PPC_TLBIE(%1,%0) : :
538 "r" (tlbrb[k]),
539 "r" (kvm->arch.lpid));
540 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
541 kvm->arch.tlbie_lock = 0;
542 } else {
543 asm volatile("ptesync" : : : "memory");
544 for (k = 0; k < n; ++k)
545 asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
546 asm volatile("ptesync" : : : "memory");
547 }
548 594
549 /* Read PTE low words after tlbie to get final R/C values */ 595 /* Read PTE low words after tlbie to get final R/C values */
550 for (k = 0; k < n; ++k) { 596 for (k = 0; k < n; ++k) {
@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
603 if (v & HPTE_V_VALID) { 649 if (v & HPTE_V_VALID) {
604 rb = compute_tlbie_rb(v, r, pte_index); 650 rb = compute_tlbie_rb(v, r, pte_index);
605 hpte[0] = v & ~HPTE_V_VALID; 651 hpte[0] = v & ~HPTE_V_VALID;
606 if (global_invalidates(kvm, flags)) { 652 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
607 while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
608 cpu_relax();
609 asm volatile("ptesync" : : : "memory");
610 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
611 : : "r" (rb), "r" (kvm->arch.lpid));
612 asm volatile("ptesync" : : : "memory");
613 kvm->arch.tlbie_lock = 0;
614 } else {
615 asm volatile("ptesync" : : : "memory");
616 asm volatile("tlbiel %0" : : "r" (rb));
617 asm volatile("ptesync" : : : "memory");
618 }
619 /* 653 /*
620 * If the host has this page as readonly but the guest 654 * If the host has this page as readonly but the guest
621 * wants to make it read/write, reduce the permissions. 655 * wants to make it read/write, reduce the permissions.
@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
686 720
687 hptep[0] &= ~HPTE_V_VALID; 721 hptep[0] &= ~HPTE_V_VALID;
688 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); 722 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
689 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 723 do_tlbies(kvm, &rb, 1, 1, true);
690 cpu_relax();
691 asm volatile("ptesync" : : : "memory");
692 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
693 : : "r" (rb), "r" (kvm->arch.lpid));
694 asm volatile("ptesync" : : : "memory");
695 kvm->arch.tlbie_lock = 0;
696} 724}
697EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 725EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
698 726
@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
706 rbyte = (hptep[1] & ~HPTE_R_R) >> 8; 734 rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
707 /* modify only the second-last byte, which contains the ref bit */ 735 /* modify only the second-last byte, which contains the ref bit */
708 *((char *)hptep + 14) = rbyte; 736 *((char *)hptep + 14) = rbyte;
709 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 737 do_tlbies(kvm, &rb, 1, 1, false);
710 cpu_relax();
711 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
712 : : "r" (rb), "r" (kvm->arch.lpid));
713 asm volatile("ptesync" : : : "memory");
714 kvm->arch.tlbie_lock = 0;
715} 738}
716EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 739EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
717 740
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b02f91e4c70d..60dce5bfab3f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1381,7 +1381,7 @@ hcall_try_real_mode:
1381 cmpldi r3,hcall_real_table_end - hcall_real_table 1381 cmpldi r3,hcall_real_table_end - hcall_real_table
1382 bge guest_exit_cont 1382 bge guest_exit_cont
1383 LOAD_REG_ADDR(r4, hcall_real_table) 1383 LOAD_REG_ADDR(r4, hcall_real_table)
1384 lwzx r3,r3,r4 1384 lwax r3,r3,r4
1385 cmpwi r3,0 1385 cmpwi r3,0
1386 beq guest_exit_cont 1386 beq guest_exit_cont
1387 add r3,r3,r4 1387 add r3,r3,r4
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 48cbbf862958..17cfae5497a3 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -92,6 +92,11 @@ kvm_start_lightweight:
92 PPC_LL r3, VCPU_HFLAGS(r4) 92 PPC_LL r3, VCPU_HFLAGS(r4)
93 rldicl r3, r3, 0, 63 /* r3 &= 1 */ 93 rldicl r3, r3, 0, 63 /* r3 &= 1 */
94 stb r3, HSTATE_RESTORE_HID5(r13) 94 stb r3, HSTATE_RESTORE_HID5(r13)
95
96 /* Load up guest SPRG3 value, since it's user readable */
97 ld r3, VCPU_SHARED(r4)
98 ld r3, VCPU_SHARED_SPRG3(r3)
99 mtspr SPRN_SPRG3, r3
95#endif /* CONFIG_PPC_BOOK3S_64 */ 100#endif /* CONFIG_PPC_BOOK3S_64 */
96 101
97 PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ 102 PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
@@ -123,6 +128,15 @@ kvmppc_handler_highmem:
123 /* R7 = vcpu */ 128 /* R7 = vcpu */
124 PPC_LL r7, GPR4(r1) 129 PPC_LL r7, GPR4(r1)
125 130
131#ifdef CONFIG_PPC_BOOK3S_64
132 /*
133 * Reload kernel SPRG3 value.
134 * No need to save guest value as usermode can't modify SPRG3.
135 */
136 ld r3, PACA_SPRG3(r13)
137 mtspr SPRN_SPRG3, r3
138#endif /* CONFIG_PPC_BOOK3S_64 */
139
126 PPC_STL r14, VCPU_GPR(R14)(r7) 140 PPC_STL r14, VCPU_GPR(R14)(r7)
127 PPC_STL r15, VCPU_GPR(R15)(r7) 141 PPC_STL r15, VCPU_GPR(R15)(r7)
128 PPC_STL r16, VCPU_GPR(R16)(r7) 142 PPC_STL r16, VCPU_GPR(R16)(r7)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 19498a567a81..27db1e665959 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
468 * both the traditional FP registers and the added VSX 468 * both the traditional FP registers and the added VSX
469 * registers into thread.fpr[]. 469 * registers into thread.fpr[].
470 */ 470 */
471 giveup_fpu(current); 471 if (current->thread.regs->msr & MSR_FP)
472 giveup_fpu(current);
472 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 473 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
473 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; 474 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
474 475
@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
483 484
484#ifdef CONFIG_ALTIVEC 485#ifdef CONFIG_ALTIVEC
485 if (msr & MSR_VEC) { 486 if (msr & MSR_VEC) {
486 giveup_altivec(current); 487 if (current->thread.regs->msr & MSR_VEC)
488 giveup_altivec(current);
487 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); 489 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
488 vcpu->arch.vscr = t->vscr; 490 vcpu->arch.vscr = t->vscr;
489 } 491 }
@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
575 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 577 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
576#endif 578#endif
577 579
578 current->thread.regs->msr |= msr;
579
580 if (msr & MSR_FP) { 580 if (msr & MSR_FP) {
581 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 581 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
582 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; 582 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
598#endif 598#endif
599 } 599 }
600 600
601 current->thread.regs->msr |= msr;
601 vcpu->arch.guest_owned_ext |= msr; 602 vcpu->arch.guest_owned_ext |= msr;
602 kvmppc_recalc_shadow_msr(vcpu); 603 kvmppc_recalc_shadow_msr(vcpu);
603 604
604 return RESUME_GUEST; 605 return RESUME_GUEST;
605} 606}
606 607
608/*
609 * Kernel code using FP or VMX could have flushed guest state to
610 * the thread_struct; if so, get it back now.
611 */
612static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
613{
614 unsigned long lost_ext;
615
616 lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
617 if (!lost_ext)
618 return;
619
620 if (lost_ext & MSR_FP)
621 kvmppc_load_up_fpu();
622 if (lost_ext & MSR_VEC)
623 kvmppc_load_up_altivec();
624 current->thread.regs->msr |= lost_ext;
625}
626
607int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 627int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
608 unsigned int exit_nr) 628 unsigned int exit_nr)
609{ 629{
@@ -772,7 +792,7 @@ program_interrupt:
772 } 792 }
773 case BOOK3S_INTERRUPT_SYSCALL: 793 case BOOK3S_INTERRUPT_SYSCALL:
774 if (vcpu->arch.papr_enabled && 794 if (vcpu->arch.papr_enabled &&
775 (kvmppc_get_last_inst(vcpu) == 0x44000022) && 795 (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
776 !(vcpu->arch.shared->msr & MSR_PR)) { 796 !(vcpu->arch.shared->msr & MSR_PR)) {
777 /* SC 1 papr hypercalls */ 797 /* SC 1 papr hypercalls */
778 ulong cmd = kvmppc_get_gpr(vcpu, 3); 798 ulong cmd = kvmppc_get_gpr(vcpu, 3);
@@ -890,8 +910,9 @@ program_interrupt:
890 local_irq_enable(); 910 local_irq_enable();
891 r = s; 911 r = s;
892 } else { 912 } else {
893 kvmppc_lazy_ee_enable(); 913 kvmppc_fix_ee_before_entry();
894 } 914 }
915 kvmppc_handle_lost_ext(vcpu);
895 } 916 }
896 917
897 trace_kvm_book3s_reenter(r, vcpu); 918 trace_kvm_book3s_reenter(r, vcpu);
@@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1047 if (err) 1068 if (err)
1048 goto free_shadow_vcpu; 1069 goto free_shadow_vcpu;
1049 1070
1071 err = -ENOMEM;
1050 p = __get_free_page(GFP_KERNEL|__GFP_ZERO); 1072 p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
1051 /* the real shared page fills the last 4k of our page */
1052 vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
1053 if (!p) 1073 if (!p)
1054 goto uninit_vcpu; 1074 goto uninit_vcpu;
1075 /* the real shared page fills the last 4k of our page */
1076 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
1055 1077
1056#ifdef CONFIG_PPC_BOOK3S_64 1078#ifdef CONFIG_PPC_BOOK3S_64
1057 /* default to book3s_64 (970fx) */ 1079 /* default to book3s_64 (970fx) */
@@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1161 if (vcpu->arch.shared->msr & MSR_FP) 1183 if (vcpu->arch.shared->msr & MSR_FP)
1162 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 1184 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1163 1185
1164 kvmppc_lazy_ee_enable(); 1186 kvmppc_fix_ee_before_entry();
1165 1187
1166 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 1188 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
1167 1189
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 94c1dd46b83d..a3a5cb8ee7ea 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -19,6 +19,7 @@
19#include <asm/hvcall.h> 19#include <asm/hvcall.h>
20#include <asm/xics.h> 20#include <asm/xics.h>
21#include <asm/debug.h> 21#include <asm/debug.h>
22#include <asm/time.h>
22 23
23#include <linux/debugfs.h> 24#include <linux/debugfs.h>
24#include <linux/seq_file.h> 25#include <linux/seq_file.h>
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index dcc94f016007..17722d82f1d1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
674 goto out; 674 goto out;
675 } 675 }
676 676
677 kvm_guest_enter();
678
679#ifdef CONFIG_PPC_FPU 677#ifdef CONFIG_PPC_FPU
680 /* Save userspace FPU state in stack */ 678 /* Save userspace FPU state in stack */
681 enable_kernel_fp(); 679 enable_kernel_fp();
@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
698 kvmppc_load_guest_fp(vcpu); 696 kvmppc_load_guest_fp(vcpu);
699#endif 697#endif
700 698
701 kvmppc_lazy_ee_enable(); 699 kvmppc_fix_ee_before_entry();
702 700
703 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 701 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
704 702
@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
1168 local_irq_enable(); 1166 local_irq_enable();
1169 r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 1167 r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
1170 } else { 1168 } else {
1171 kvmppc_lazy_ee_enable(); 1169 kvmppc_fix_ee_before_entry();
1172 } 1170 }
1173 } 1171 }
1174 1172
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ae63ae4a1a5f..f55e14cd1762 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
117 kvm_guest_exit(); 117 kvm_guest_exit();
118 continue; 118 continue;
119 } 119 }
120
121 trace_hardirqs_on();
122#endif 120#endif
123 121
124 kvm_guest_enter(); 122 kvm_guest_enter();