diff options
author | Gleb Natapov <gleb@redhat.com> | 2013-08-30 08:33:11 -0400 |
---|---|---|
committer | Gleb Natapov <gleb@redhat.com> | 2013-08-30 08:33:11 -0400 |
commit | a9f6cf965e00dd3370229417675eb0127d580f96 (patch) | |
tree | 0fe5a9c57fdf6e8e614cdc02412876f153550be4 /arch/powerpc/kvm | |
parent | e5552fd252763c74ce6a6c27c7873939062b5038 (diff) | |
parent | bf550fc93d9855872a95e69e4002256110d89858 (diff) |
Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into queue
* 'kvm-ppc-next' of git://github.com/agraf/linux-2.6:
KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate()
KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls
KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX
KVM: PPC: Book3S: Fix compile error in XICS emulation
KVM: PPC: Book3S PR: return appropriate error when allocation fails
arch: powerpc: kvm: add signed type cast for comparation
powerpc/kvm: Copy the pvr value after memset
KVM: PPC: Book3S PR: Load up SPRG3 register with guest value on guest entry
kvm/ppc/booke: Don't call kvm_guest_enter twice
kvm/ppc: Call trace_hardirqs_on before entry
KVM: PPC: Book3S HV: Allow negative offsets to real-mode hcall handlers
KVM: PPC: Book3S HV: Correct tlbie usage
powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
powerpc/kvm: Contiguous memory allocator based RMA allocation
powerpc/kvm: Contiguous memory allocator based hash page table allocation
KVM: PPC: Book3S: Ignore DABR register
mm/cma: Move dma contiguous changes into a seperate config
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu.c | 150 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 40 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_emulate.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 38 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 246 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_cma.c | 240 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_cma.h | 27 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 139 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_interrupts.S | 14 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 40 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 2 |
16 files changed, 614 insertions, 335 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index eb643f862579..ffaef2cb101a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV | |||
72 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" | 72 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" |
73 | depends on KVM_BOOK3S_64 | 73 | depends on KVM_BOOK3S_64 |
74 | select MMU_NOTIFIER | 74 | select MMU_NOTIFIER |
75 | select CMA | ||
75 | ---help--- | 76 | ---help--- |
76 | Support running unmodified book3s_64 guest kernels in | 77 | Support running unmodified book3s_64 guest kernels in |
77 | virtual machines on POWER7 and PPC970 processors that have | 78 | virtual machines on POWER7 and PPC970 processors that have |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 008cd856c5b5..6646c952c5e3 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | |||
81 | book3s_64_vio_hv.o \ | 81 | book3s_64_vio_hv.o \ |
82 | book3s_hv_ras.o \ | 82 | book3s_hv_ras.o \ |
83 | book3s_hv_builtin.o \ | 83 | book3s_hv_builtin.o \ |
84 | book3s_hv_cma.o \ | ||
84 | $(kvm-book3s_64-builtin-xics-objs-y) | 85 | $(kvm-book3s_64-builtin-xics-objs-y) |
85 | 86 | ||
86 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ | 87 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 739bfbadb85e..7e345e00661a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
182 | hva_t ptegp; | 182 | hva_t ptegp; |
183 | u64 pteg[16]; | 183 | u64 pteg[16]; |
184 | u64 avpn = 0; | 184 | u64 avpn = 0; |
185 | u64 v, r; | ||
186 | u64 v_val, v_mask; | ||
187 | u64 eaddr_mask; | ||
185 | int i; | 188 | int i; |
186 | u8 key = 0; | 189 | u8 pp, key = 0; |
187 | bool found = false; | 190 | bool found = false; |
188 | int second = 0; | 191 | bool second = false; |
189 | ulong mp_ea = vcpu->arch.magic_page_ea; | 192 | ulong mp_ea = vcpu->arch.magic_page_ea; |
190 | 193 | ||
191 | /* Magic page override */ | 194 | /* Magic page override */ |
@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
208 | goto no_seg_found; | 211 | goto no_seg_found; |
209 | 212 | ||
210 | avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); | 213 | avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); |
214 | v_val = avpn & HPTE_V_AVPN; | ||
215 | |||
211 | if (slbe->tb) | 216 | if (slbe->tb) |
212 | avpn |= SLB_VSID_B_1T; | 217 | v_val |= SLB_VSID_B_1T; |
218 | if (slbe->large) | ||
219 | v_val |= HPTE_V_LARGE; | ||
220 | v_val |= HPTE_V_VALID; | ||
221 | |||
222 | v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | | ||
223 | HPTE_V_SECONDARY; | ||
213 | 224 | ||
214 | do_second: | 225 | do_second: |
215 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); | 226 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); |
@@ -227,91 +238,74 @@ do_second: | |||
227 | key = 4; | 238 | key = 4; |
228 | 239 | ||
229 | for (i=0; i<16; i+=2) { | 240 | for (i=0; i<16; i+=2) { |
230 | u64 v = pteg[i]; | 241 | /* Check all relevant fields of 1st dword */ |
231 | u64 r = pteg[i+1]; | 242 | if ((pteg[i] & v_mask) == v_val) { |
232 | |||
233 | /* Valid check */ | ||
234 | if (!(v & HPTE_V_VALID)) | ||
235 | continue; | ||
236 | /* Hash check */ | ||
237 | if ((v & HPTE_V_SECONDARY) != second) | ||
238 | continue; | ||
239 | |||
240 | /* AVPN compare */ | ||
241 | if (HPTE_V_COMPARE(avpn, v)) { | ||
242 | u8 pp = (r & HPTE_R_PP) | key; | ||
243 | int eaddr_mask = 0xFFF; | ||
244 | |||
245 | gpte->eaddr = eaddr; | ||
246 | gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, | ||
247 | eaddr, | ||
248 | data); | ||
249 | if (slbe->large) | ||
250 | eaddr_mask = 0xFFFFFF; | ||
251 | gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask); | ||
252 | gpte->may_execute = ((r & HPTE_R_N) ? false : true); | ||
253 | gpte->may_read = false; | ||
254 | gpte->may_write = false; | ||
255 | |||
256 | switch (pp) { | ||
257 | case 0: | ||
258 | case 1: | ||
259 | case 2: | ||
260 | case 6: | ||
261 | gpte->may_write = true; | ||
262 | /* fall through */ | ||
263 | case 3: | ||
264 | case 5: | ||
265 | case 7: | ||
266 | gpte->may_read = true; | ||
267 | break; | ||
268 | } | ||
269 | |||
270 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " | ||
271 | "-> 0x%lx\n", | ||
272 | eaddr, avpn, gpte->vpage, gpte->raddr); | ||
273 | found = true; | 243 | found = true; |
274 | break; | 244 | break; |
275 | } | 245 | } |
276 | } | 246 | } |
277 | 247 | ||
278 | /* Update PTE R and C bits, so the guest's swapper knows we used the | 248 | if (!found) { |
279 | * page */ | 249 | if (second) |
280 | if (found) { | 250 | goto no_page_found; |
281 | u32 oldr = pteg[i+1]; | 251 | v_val |= HPTE_V_SECONDARY; |
252 | second = true; | ||
253 | goto do_second; | ||
254 | } | ||
282 | 255 | ||
283 | if (gpte->may_read) { | 256 | v = pteg[i]; |
284 | /* Set the accessed flag */ | 257 | r = pteg[i+1]; |
285 | pteg[i+1] |= HPTE_R_R; | 258 | pp = (r & HPTE_R_PP) | key; |
286 | } | 259 | eaddr_mask = 0xFFF; |
287 | if (gpte->may_write) { | 260 | |
288 | /* Set the dirty flag */ | 261 | gpte->eaddr = eaddr; |
289 | pteg[i+1] |= HPTE_R_C; | 262 | gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); |
290 | } else { | 263 | if (slbe->large) |
291 | dprintk("KVM: Mapping read-only page!\n"); | 264 | eaddr_mask = 0xFFFFFF; |
292 | } | 265 | gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); |
266 | gpte->may_execute = ((r & HPTE_R_N) ? false : true); | ||
267 | gpte->may_read = false; | ||
268 | gpte->may_write = false; | ||
269 | |||
270 | switch (pp) { | ||
271 | case 0: | ||
272 | case 1: | ||
273 | case 2: | ||
274 | case 6: | ||
275 | gpte->may_write = true; | ||
276 | /* fall through */ | ||
277 | case 3: | ||
278 | case 5: | ||
279 | case 7: | ||
280 | gpte->may_read = true; | ||
281 | break; | ||
282 | } | ||
293 | 283 | ||
294 | /* Write back into the PTEG */ | 284 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " |
295 | if (pteg[i+1] != oldr) | 285 | "-> 0x%lx\n", |
296 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); | 286 | eaddr, avpn, gpte->vpage, gpte->raddr); |
297 | 287 | ||
298 | if (!gpte->may_read) | 288 | /* Update PTE R and C bits, so the guest's swapper knows we used the |
299 | return -EPERM; | 289 | * page */ |
300 | return 0; | 290 | if (gpte->may_read) { |
301 | } else { | 291 | /* Set the accessed flag */ |
302 | dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " | 292 | r |= HPTE_R_R; |
303 | "ptegp=0x%lx)\n", | 293 | } |
304 | eaddr, to_book3s(vcpu)->sdr1, ptegp); | 294 | if (data && gpte->may_write) { |
305 | for (i = 0; i < 16; i += 2) | 295 | /* Set the dirty flag -- XXX even if not writing */ |
306 | dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n", | 296 | r |= HPTE_R_C; |
307 | i, pteg[i], pteg[i+1], avpn); | 297 | } |
308 | 298 | ||
309 | if (!second) { | 299 | /* Write back into the PTEG */ |
310 | second = HPTE_V_SECONDARY; | 300 | if (pteg[i+1] != r) { |
311 | goto do_second; | 301 | pteg[i+1] = r; |
312 | } | 302 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); |
313 | } | 303 | } |
314 | 304 | ||
305 | if (!gpte->may_read) | ||
306 | return -EPERM; | ||
307 | return 0; | ||
308 | |||
315 | no_page_found: | 309 | no_page_found: |
316 | return -ENOENT; | 310 | return -ENOENT; |
317 | 311 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f7c9e8ae06ee..043eec8461e7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #include <asm/ppc-opcode.h> | 37 | #include <asm/ppc-opcode.h> |
38 | #include <asm/cputable.h> | 38 | #include <asm/cputable.h> |
39 | 39 | ||
40 | #include "book3s_hv_cma.h" | ||
41 | |||
40 | /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ | 42 | /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ |
41 | #define MAX_LPID_970 63 | 43 | #define MAX_LPID_970 63 |
42 | 44 | ||
@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
52 | { | 54 | { |
53 | unsigned long hpt; | 55 | unsigned long hpt; |
54 | struct revmap_entry *rev; | 56 | struct revmap_entry *rev; |
55 | struct kvmppc_linear_info *li; | 57 | struct page *page = NULL; |
56 | long order = kvm_hpt_order; | 58 | long order = KVM_DEFAULT_HPT_ORDER; |
57 | 59 | ||
58 | if (htab_orderp) { | 60 | if (htab_orderp) { |
59 | order = *htab_orderp; | 61 | order = *htab_orderp; |
@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
61 | order = PPC_MIN_HPT_ORDER; | 63 | order = PPC_MIN_HPT_ORDER; |
62 | } | 64 | } |
63 | 65 | ||
66 | kvm->arch.hpt_cma_alloc = 0; | ||
64 | /* | 67 | /* |
65 | * If the user wants a different size from default, | ||
66 | * try first to allocate it from the kernel page allocator. | 68 | * try first to allocate it from the kernel page allocator. |
69 | * We keep the CMA reserved for failed allocation. | ||
67 | */ | 70 | */ |
68 | hpt = 0; | 71 | hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | |
69 | if (order != kvm_hpt_order) { | 72 | __GFP_NOWARN, order - PAGE_SHIFT); |
70 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| | ||
71 | __GFP_NOWARN, order - PAGE_SHIFT); | ||
72 | if (!hpt) | ||
73 | --order; | ||
74 | } | ||
75 | 73 | ||
76 | /* Next try to allocate from the preallocated pool */ | 74 | /* Next try to allocate from the preallocated pool */ |
77 | if (!hpt) { | 75 | if (!hpt) { |
78 | li = kvm_alloc_hpt(); | 76 | VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); |
79 | if (li) { | 77 | page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); |
80 | hpt = (ulong)li->base_virt; | 78 | if (page) { |
81 | kvm->arch.hpt_li = li; | 79 | hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); |
82 | order = kvm_hpt_order; | 80 | kvm->arch.hpt_cma_alloc = 1; |
83 | } | 81 | } else |
82 | --order; | ||
84 | } | 83 | } |
85 | 84 | ||
86 | /* Lastly try successively smaller sizes from the page allocator */ | 85 | /* Lastly try successively smaller sizes from the page allocator */ |
@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
118 | return 0; | 117 | return 0; |
119 | 118 | ||
120 | out_freehpt: | 119 | out_freehpt: |
121 | if (kvm->arch.hpt_li) | 120 | if (kvm->arch.hpt_cma_alloc) |
122 | kvm_release_hpt(kvm->arch.hpt_li); | 121 | kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); |
123 | else | 122 | else |
124 | free_pages(hpt, order - PAGE_SHIFT); | 123 | free_pages(hpt, order - PAGE_SHIFT); |
125 | return -ENOMEM; | 124 | return -ENOMEM; |
@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm) | |||
165 | { | 164 | { |
166 | kvmppc_free_lpid(kvm->arch.lpid); | 165 | kvmppc_free_lpid(kvm->arch.lpid); |
167 | vfree(kvm->arch.revmap); | 166 | vfree(kvm->arch.revmap); |
168 | if (kvm->arch.hpt_li) | 167 | if (kvm->arch.hpt_cma_alloc) |
169 | kvm_release_hpt(kvm->arch.hpt_li); | 168 | kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), |
169 | 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); | ||
170 | else | 170 | else |
171 | free_pages(kvm->arch.hpt_virt, | 171 | free_pages(kvm->arch.hpt_virt, |
172 | kvm->arch.hpt_order - PAGE_SHIFT); | 172 | kvm->arch.hpt_order - PAGE_SHIFT); |
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 1f6344c4408d..360ce68c9809 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) | |||
458 | case SPRN_PMC4_GEKKO: | 458 | case SPRN_PMC4_GEKKO: |
459 | case SPRN_WPAR_GEKKO: | 459 | case SPRN_WPAR_GEKKO: |
460 | case SPRN_MSSSR0: | 460 | case SPRN_MSSSR0: |
461 | case SPRN_DABR: | ||
461 | break; | 462 | break; |
462 | unprivileged: | 463 | unprivileged: |
463 | default: | 464 | default: |
@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
555 | case SPRN_PMC4_GEKKO: | 556 | case SPRN_PMC4_GEKKO: |
556 | case SPRN_WPAR_GEKKO: | 557 | case SPRN_WPAR_GEKKO: |
557 | case SPRN_MSSSR0: | 558 | case SPRN_MSSSR0: |
559 | case SPRN_DABR: | ||
558 | *spr_val = 0; | 560 | *spr_val = 0; |
559 | break; | 561 | break; |
560 | default: | 562 | default: |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89eb4c7c527e..b0ee3bc9ca76 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
680 | } | 680 | } |
681 | 681 | ||
682 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 682 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
683 | struct kvm_sregs *sregs) | 683 | struct kvm_sregs *sregs) |
684 | { | 684 | { |
685 | int i; | 685 | int i; |
686 | 686 | ||
687 | sregs->pvr = vcpu->arch.pvr; | ||
688 | |||
689 | memset(sregs, 0, sizeof(struct kvm_sregs)); | 687 | memset(sregs, 0, sizeof(struct kvm_sregs)); |
688 | sregs->pvr = vcpu->arch.pvr; | ||
690 | for (i = 0; i < vcpu->arch.slb_max; i++) { | 689 | for (i = 0; i < vcpu->arch.slb_max; i++) { |
691 | sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; | 690 | sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; |
692 | sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; | 691 | sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; |
@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
696 | } | 695 | } |
697 | 696 | ||
698 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 697 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
699 | struct kvm_sregs *sregs) | 698 | struct kvm_sregs *sregs) |
700 | { | 699 | { |
701 | int i, j; | 700 | int i, j; |
702 | 701 | ||
@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size) | |||
1511 | 1510 | ||
1512 | static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1511 | static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1513 | { | 1512 | { |
1514 | struct kvmppc_linear_info *ri = vma->vm_file->private_data; | ||
1515 | struct page *page; | 1513 | struct page *page; |
1514 | struct kvm_rma_info *ri = vma->vm_file->private_data; | ||
1516 | 1515 | ||
1517 | if (vmf->pgoff >= ri->npages) | 1516 | if (vmf->pgoff >= kvm_rma_pages) |
1518 | return VM_FAULT_SIGBUS; | 1517 | return VM_FAULT_SIGBUS; |
1519 | 1518 | ||
1520 | page = pfn_to_page(ri->base_pfn + vmf->pgoff); | 1519 | page = pfn_to_page(ri->base_pfn + vmf->pgoff); |
@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) | |||
1536 | 1535 | ||
1537 | static int kvm_rma_release(struct inode *inode, struct file *filp) | 1536 | static int kvm_rma_release(struct inode *inode, struct file *filp) |
1538 | { | 1537 | { |
1539 | struct kvmppc_linear_info *ri = filp->private_data; | 1538 | struct kvm_rma_info *ri = filp->private_data; |
1540 | 1539 | ||
1541 | kvm_release_rma(ri); | 1540 | kvm_release_rma(ri); |
1542 | return 0; | 1541 | return 0; |
@@ -1549,8 +1548,17 @@ static const struct file_operations kvm_rma_fops = { | |||
1549 | 1548 | ||
1550 | long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) | 1549 | long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) |
1551 | { | 1550 | { |
1552 | struct kvmppc_linear_info *ri; | ||
1553 | long fd; | 1551 | long fd; |
1552 | struct kvm_rma_info *ri; | ||
1553 | /* | ||
1554 | * Only do this on PPC970 in HV mode | ||
1555 | */ | ||
1556 | if (!cpu_has_feature(CPU_FTR_HVMODE) || | ||
1557 | !cpu_has_feature(CPU_FTR_ARCH_201)) | ||
1558 | return -EINVAL; | ||
1559 | |||
1560 | if (!kvm_rma_pages) | ||
1561 | return -EINVAL; | ||
1554 | 1562 | ||
1555 | ri = kvm_alloc_rma(); | 1563 | ri = kvm_alloc_rma(); |
1556 | if (!ri) | 1564 | if (!ri) |
@@ -1560,7 +1568,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) | |||
1560 | if (fd < 0) | 1568 | if (fd < 0) |
1561 | kvm_release_rma(ri); | 1569 | kvm_release_rma(ri); |
1562 | 1570 | ||
1563 | ret->rma_size = ri->npages << PAGE_SHIFT; | 1571 | ret->rma_size = kvm_rma_pages << PAGE_SHIFT; |
1564 | return fd; | 1572 | return fd; |
1565 | } | 1573 | } |
1566 | 1574 | ||
@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
1725 | { | 1733 | { |
1726 | int err = 0; | 1734 | int err = 0; |
1727 | struct kvm *kvm = vcpu->kvm; | 1735 | struct kvm *kvm = vcpu->kvm; |
1728 | struct kvmppc_linear_info *ri = NULL; | 1736 | struct kvm_rma_info *ri = NULL; |
1729 | unsigned long hva; | 1737 | unsigned long hva; |
1730 | struct kvm_memory_slot *memslot; | 1738 | struct kvm_memory_slot *memslot; |
1731 | struct vm_area_struct *vma; | 1739 | struct vm_area_struct *vma; |
@@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
1803 | 1811 | ||
1804 | } else { | 1812 | } else { |
1805 | /* Set up to use an RMO region */ | 1813 | /* Set up to use an RMO region */ |
1806 | rma_size = ri->npages; | 1814 | rma_size = kvm_rma_pages; |
1807 | if (rma_size > memslot->npages) | 1815 | if (rma_size > memslot->npages) |
1808 | rma_size = memslot->npages; | 1816 | rma_size = memslot->npages; |
1809 | rma_size <<= PAGE_SHIFT; | 1817 | rma_size <<= PAGE_SHIFT; |
1810 | rmls = lpcr_rmls(rma_size); | 1818 | rmls = lpcr_rmls(rma_size); |
1811 | err = -EINVAL; | 1819 | err = -EINVAL; |
1812 | if (rmls < 0) { | 1820 | if ((long)rmls < 0) { |
1813 | pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); | 1821 | pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); |
1814 | goto out_srcu; | 1822 | goto out_srcu; |
1815 | } | 1823 | } |
@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
1831 | /* POWER7 */ | 1839 | /* POWER7 */ |
1832 | lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); | 1840 | lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); |
1833 | lpcr |= rmls << LPCR_RMLS_SH; | 1841 | lpcr |= rmls << LPCR_RMLS_SH; |
1834 | kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; | 1842 | kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; |
1835 | } | 1843 | } |
1836 | kvm->arch.lpcr = lpcr; | 1844 | kvm->arch.lpcr = lpcr; |
1837 | pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", | 1845 | pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", |
1838 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); | 1846 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); |
1839 | 1847 | ||
1840 | /* Initialize phys addrs of pages in RMO */ | 1848 | /* Initialize phys addrs of pages in RMO */ |
1841 | npages = ri->npages; | 1849 | npages = kvm_rma_pages; |
1842 | porder = __ilog2(npages); | 1850 | porder = __ilog2(npages); |
1843 | physp = memslot->arch.slot_phys; | 1851 | physp = memslot->arch.slot_phys; |
1844 | if (physp) { | 1852 | if (physp) { |
@@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1874 | /* Allocate the guest's logical partition ID */ | 1882 | /* Allocate the guest's logical partition ID */ |
1875 | 1883 | ||
1876 | lpid = kvmppc_alloc_lpid(); | 1884 | lpid = kvmppc_alloc_lpid(); |
1877 | if (lpid < 0) | 1885 | if ((long)lpid < 0) |
1878 | return -ENOMEM; | 1886 | return -ENOMEM; |
1879 | kvm->arch.lpid = lpid; | 1887 | kvm->arch.lpid = lpid; |
1880 | 1888 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ec0a9e5de100..8cd0daebb82d 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -13,33 +13,34 @@ | |||
13 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/memblock.h> | ||
17 | #include <linux/sizes.h> | ||
16 | 18 | ||
17 | #include <asm/cputable.h> | 19 | #include <asm/cputable.h> |
18 | #include <asm/kvm_ppc.h> | 20 | #include <asm/kvm_ppc.h> |
19 | #include <asm/kvm_book3s.h> | 21 | #include <asm/kvm_book3s.h> |
20 | 22 | ||
21 | #define KVM_LINEAR_RMA 0 | 23 | #include "book3s_hv_cma.h" |
22 | #define KVM_LINEAR_HPT 1 | 24 | /* |
23 | 25 | * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) | |
24 | static void __init kvm_linear_init_one(ulong size, int count, int type); | 26 | * should be power of 2. |
25 | static struct kvmppc_linear_info *kvm_alloc_linear(int type); | 27 | */ |
26 | static void kvm_release_linear(struct kvmppc_linear_info *ri); | 28 | #define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */ |
27 | 29 | /* | |
28 | int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; | 30 | * By default we reserve 5% of memory for hash pagetable allocation. |
29 | EXPORT_SYMBOL_GPL(kvm_hpt_order); | 31 | */ |
30 | 32 | static unsigned long kvm_cma_resv_ratio = 5; | |
31 | /*************** RMA *************/ | ||
32 | |||
33 | /* | 33 | /* |
34 | * This maintains a list of RMAs (real mode areas) for KVM guests to use. | 34 | * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. |
35 | * Each RMA has to be physically contiguous and of a size that the | 35 | * Each RMA has to be physically contiguous and of a size that the |
36 | * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, | 36 | * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, |
37 | * and other larger sizes. Since we are unlikely to be allocate that | 37 | * and other larger sizes. Since we are unlikely to be allocate that |
38 | * much physically contiguous memory after the system is up and running, | 38 | * much physically contiguous memory after the system is up and running, |
39 | * we preallocate a set of RMAs in early boot for KVM to use. | 39 | * we preallocate a set of RMAs in early boot using CMA. |
40 | * should be power of 2. | ||
40 | */ | 41 | */ |
41 | static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ | 42 | unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ |
42 | static unsigned long kvm_rma_count; | 43 | EXPORT_SYMBOL_GPL(kvm_rma_pages); |
43 | 44 | ||
44 | /* Work out RMLS (real mode limit selector) field value for a given RMA size. | 45 | /* Work out RMLS (real mode limit selector) field value for a given RMA size. |
45 | Assumes POWER7 or PPC970. */ | 46 | Assumes POWER7 or PPC970. */ |
@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size) | |||
69 | 70 | ||
70 | static int __init early_parse_rma_size(char *p) | 71 | static int __init early_parse_rma_size(char *p) |
71 | { | 72 | { |
72 | if (!p) | 73 | unsigned long kvm_rma_size; |
73 | return 1; | ||
74 | 74 | ||
75 | pr_debug("%s(%s)\n", __func__, p); | ||
76 | if (!p) | ||
77 | return -EINVAL; | ||
75 | kvm_rma_size = memparse(p, &p); | 78 | kvm_rma_size = memparse(p, &p); |
76 | 79 | /* | |
80 | * Check that the requested size is one supported in hardware | ||
81 | */ | ||
82 | if (lpcr_rmls(kvm_rma_size) < 0) { | ||
83 | pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); | ||
84 | return -EINVAL; | ||
85 | } | ||
86 | kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT; | ||
77 | return 0; | 87 | return 0; |
78 | } | 88 | } |
79 | early_param("kvm_rma_size", early_parse_rma_size); | 89 | early_param("kvm_rma_size", early_parse_rma_size); |
80 | 90 | ||
81 | static int __init early_parse_rma_count(char *p) | 91 | struct kvm_rma_info *kvm_alloc_rma() |
82 | { | 92 | { |
83 | if (!p) | 93 | struct page *page; |
84 | return 1; | 94 | struct kvm_rma_info *ri; |
85 | 95 | ||
86 | kvm_rma_count = simple_strtoul(p, NULL, 0); | 96 | ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); |
87 | 97 | if (!ri) | |
88 | return 0; | 98 | return NULL; |
89 | } | 99 | page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); |
90 | early_param("kvm_rma_count", early_parse_rma_count); | 100 | if (!page) |
91 | 101 | goto err_out; | |
92 | struct kvmppc_linear_info *kvm_alloc_rma(void) | 102 | atomic_set(&ri->use_count, 1); |
93 | { | 103 | ri->base_pfn = page_to_pfn(page); |
94 | return kvm_alloc_linear(KVM_LINEAR_RMA); | 104 | return ri; |
105 | err_out: | ||
106 | kfree(ri); | ||
107 | return NULL; | ||
95 | } | 108 | } |
96 | EXPORT_SYMBOL_GPL(kvm_alloc_rma); | 109 | EXPORT_SYMBOL_GPL(kvm_alloc_rma); |
97 | 110 | ||
98 | void kvm_release_rma(struct kvmppc_linear_info *ri) | 111 | void kvm_release_rma(struct kvm_rma_info *ri) |
99 | { | 112 | { |
100 | kvm_release_linear(ri); | 113 | if (atomic_dec_and_test(&ri->use_count)) { |
114 | kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); | ||
115 | kfree(ri); | ||
116 | } | ||
101 | } | 117 | } |
102 | EXPORT_SYMBOL_GPL(kvm_release_rma); | 118 | EXPORT_SYMBOL_GPL(kvm_release_rma); |
103 | 119 | ||
104 | /*************** HPT *************/ | 120 | static int __init early_parse_kvm_cma_resv(char *p) |
105 | |||
106 | /* | ||
107 | * This maintains a list of big linear HPT tables that contain the GVA->HPA | ||
108 | * memory mappings. If we don't reserve those early on, we might not be able | ||
109 | * to get a big (usually 16MB) linear memory region from the kernel anymore. | ||
110 | */ | ||
111 | |||
112 | static unsigned long kvm_hpt_count; | ||
113 | |||
114 | static int __init early_parse_hpt_count(char *p) | ||
115 | { | 121 | { |
122 | pr_debug("%s(%s)\n", __func__, p); | ||
116 | if (!p) | 123 | if (!p) |
117 | return 1; | 124 | return -EINVAL; |
118 | 125 | return kstrtoul(p, 0, &kvm_cma_resv_ratio); | |
119 | kvm_hpt_count = simple_strtoul(p, NULL, 0); | ||
120 | |||
121 | return 0; | ||
122 | } | 126 | } |
123 | early_param("kvm_hpt_count", early_parse_hpt_count); | 127 | early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); |
124 | 128 | ||
125 | struct kvmppc_linear_info *kvm_alloc_hpt(void) | 129 | struct page *kvm_alloc_hpt(unsigned long nr_pages) |
126 | { | 130 | { |
127 | return kvm_alloc_linear(KVM_LINEAR_HPT); | 131 | unsigned long align_pages = HPT_ALIGN_PAGES; |
132 | |||
133 | /* Old CPUs require HPT aligned on a multiple of its size */ | ||
134 | if (!cpu_has_feature(CPU_FTR_ARCH_206)) | ||
135 | align_pages = nr_pages; | ||
136 | return kvm_alloc_cma(nr_pages, align_pages); | ||
128 | } | 137 | } |
129 | EXPORT_SYMBOL_GPL(kvm_alloc_hpt); | 138 | EXPORT_SYMBOL_GPL(kvm_alloc_hpt); |
130 | 139 | ||
131 | void kvm_release_hpt(struct kvmppc_linear_info *li) | 140 | void kvm_release_hpt(struct page *page, unsigned long nr_pages) |
132 | { | 141 | { |
133 | kvm_release_linear(li); | 142 | kvm_release_cma(page, nr_pages); |
134 | } | 143 | } |
135 | EXPORT_SYMBOL_GPL(kvm_release_hpt); | 144 | EXPORT_SYMBOL_GPL(kvm_release_hpt); |
136 | 145 | ||
137 | /*************** generic *************/ | 146 | /** |
138 | 147 | * kvm_cma_reserve() - reserve area for kvm hash pagetable | |
139 | static LIST_HEAD(free_linears); | 148 | * |
140 | static DEFINE_SPINLOCK(linear_lock); | 149 | * This function reserves memory from early allocator. It should be |
141 | 150 | * called by arch specific code once the early allocator (memblock or bootmem) | |
142 | static void __init kvm_linear_init_one(ulong size, int count, int type) | 151 | * has been activated and all other subsystems have already allocated/reserved |
143 | { | 152 | * memory. |
144 | unsigned long i; | ||
145 | unsigned long j, npages; | ||
146 | void *linear; | ||
147 | struct page *pg; | ||
148 | const char *typestr; | ||
149 | struct kvmppc_linear_info *linear_info; | ||
150 | |||
151 | if (!count) | ||
152 | return; | ||
153 | |||
154 | typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; | ||
155 | |||
156 | npages = size >> PAGE_SHIFT; | ||
157 | linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); | ||
158 | for (i = 0; i < count; ++i) { | ||
159 | linear = alloc_bootmem_align(size, size); | ||
160 | pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, | ||
161 | size >> 20); | ||
162 | linear_info[i].base_virt = linear; | ||
163 | linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; | ||
164 | linear_info[i].npages = npages; | ||
165 | linear_info[i].type = type; | ||
166 | list_add_tail(&linear_info[i].list, &free_linears); | ||
167 | atomic_set(&linear_info[i].use_count, 0); | ||
168 | |||
169 | pg = pfn_to_page(linear_info[i].base_pfn); | ||
170 | for (j = 0; j < npages; ++j) { | ||
171 | atomic_inc(&pg->_count); | ||
172 | ++pg; | ||
173 | } | ||
174 | } | ||
175 | } | ||
176 | |||
177 | static struct kvmppc_linear_info *kvm_alloc_linear(int type) | ||
178 | { | ||
179 | struct kvmppc_linear_info *ri, *ret; | ||
180 | |||
181 | ret = NULL; | ||
182 | spin_lock(&linear_lock); | ||
183 | list_for_each_entry(ri, &free_linears, list) { | ||
184 | if (ri->type != type) | ||
185 | continue; | ||
186 | |||
187 | list_del(&ri->list); | ||
188 | atomic_inc(&ri->use_count); | ||
189 | memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); | ||
190 | ret = ri; | ||
191 | break; | ||
192 | } | ||
193 | spin_unlock(&linear_lock); | ||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static void kvm_release_linear(struct kvmppc_linear_info *ri) | ||
198 | { | ||
199 | if (atomic_dec_and_test(&ri->use_count)) { | ||
200 | spin_lock(&linear_lock); | ||
201 | list_add_tail(&ri->list, &free_linears); | ||
202 | spin_unlock(&linear_lock); | ||
203 | |||
204 | } | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Called at boot time while the bootmem allocator is active, | ||
209 | * to allocate contiguous physical memory for the hash page | ||
210 | * tables for guests. | ||
211 | */ | 153 | */ |
212 | void __init kvm_linear_init(void) | 154 | void __init kvm_cma_reserve(void) |
213 | { | 155 | { |
214 | /* HPT */ | 156 | unsigned long align_size; |
215 | kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); | 157 | struct memblock_region *reg; |
216 | 158 | phys_addr_t selected_size = 0; | |
217 | /* RMA */ | 159 | /* |
218 | /* Only do this on PPC970 in HV mode */ | 160 | * We cannot use memblock_phys_mem_size() here, because |
219 | if (!cpu_has_feature(CPU_FTR_HVMODE) || | 161 | * memblock_analyze() has not been called yet. |
220 | !cpu_has_feature(CPU_FTR_ARCH_201)) | 162 | */ |
221 | return; | 163 | for_each_memblock(memory, reg) |
222 | 164 | selected_size += memblock_region_memory_end_pfn(reg) - | |
223 | if (!kvm_rma_size || !kvm_rma_count) | 165 | memblock_region_memory_base_pfn(reg); |
224 | return; | 166 | |
225 | 167 | selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; | |
226 | /* Check that the requested size is one supported in hardware */ | 168 | if (selected_size) { |
227 | if (lpcr_rmls(kvm_rma_size) < 0) { | 169 | pr_debug("%s: reserving %ld MiB for global area\n", __func__, |
228 | pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); | 170 | (unsigned long)selected_size / SZ_1M); |
229 | return; | 171 | /* |
172 | * Old CPUs require HPT aligned on a multiple of its size. So for them | ||
173 | * make the alignment as max size we could request. | ||
174 | */ | ||
175 | if (!cpu_has_feature(CPU_FTR_ARCH_206)) | ||
176 | align_size = __rounddown_pow_of_two(selected_size); | ||
177 | else | ||
178 | align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; | ||
179 | |||
180 | align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); | ||
181 | kvm_cma_declare_contiguous(selected_size, align_size); | ||
230 | } | 182 | } |
231 | |||
232 | kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); | ||
233 | } | 183 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c new file mode 100644 index 000000000000..d9d3d8553d51 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.c | |||
@@ -0,0 +1,240 @@ | |||
1 | /* | ||
2 | * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA | ||
3 | * for DMA mapping framework | ||
4 | * | ||
5 | * Copyright IBM Corporation, 2013 | ||
6 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as | ||
10 | * published by the Free Software Foundation; either version 2 of the | ||
11 | * License or (at your optional) any later version of the license. | ||
12 | * | ||
13 | */ | ||
14 | #define pr_fmt(fmt) "kvm_cma: " fmt | ||
15 | |||
16 | #ifdef CONFIG_CMA_DEBUG | ||
17 | #ifndef DEBUG | ||
18 | # define DEBUG | ||
19 | #endif | ||
20 | #endif | ||
21 | |||
22 | #include <linux/memblock.h> | ||
23 | #include <linux/mutex.h> | ||
24 | #include <linux/sizes.h> | ||
25 | #include <linux/slab.h> | ||
26 | |||
27 | #include "book3s_hv_cma.h" | ||
28 | |||
29 | struct kvm_cma { | ||
30 | unsigned long base_pfn; | ||
31 | unsigned long count; | ||
32 | unsigned long *bitmap; | ||
33 | }; | ||
34 | |||
35 | static DEFINE_MUTEX(kvm_cma_mutex); | ||
36 | static struct kvm_cma kvm_cma_area; | ||
37 | |||
38 | /** | ||
39 | * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling | ||
40 | * for kvm hash pagetable | ||
41 | * @size: Size of the reserved memory. | ||
42 | * @alignment: Alignment for the contiguous memory area | ||
43 | * | ||
44 | * This function reserves memory for kvm cma area. It should be | ||
45 | * called by arch code when early allocator (memblock or bootmem) | ||
46 | * is still activate. | ||
47 | */ | ||
48 | long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment) | ||
49 | { | ||
50 | long base_pfn; | ||
51 | phys_addr_t addr; | ||
52 | struct kvm_cma *cma = &kvm_cma_area; | ||
53 | |||
54 | pr_debug("%s(size %lx)\n", __func__, (unsigned long)size); | ||
55 | |||
56 | if (!size) | ||
57 | return -EINVAL; | ||
58 | /* | ||
59 | * Sanitise input arguments. | ||
60 | * We should be pageblock aligned for CMA. | ||
61 | */ | ||
62 | alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order)); | ||
63 | size = ALIGN(size, alignment); | ||
64 | /* | ||
65 | * Reserve memory | ||
66 | * Use __memblock_alloc_base() since | ||
67 | * memblock_alloc_base() panic()s. | ||
68 | */ | ||
69 | addr = __memblock_alloc_base(size, alignment, 0); | ||
70 | if (!addr) { | ||
71 | base_pfn = -ENOMEM; | ||
72 | goto err; | ||
73 | } else | ||
74 | base_pfn = PFN_DOWN(addr); | ||
75 | |||
76 | /* | ||
77 | * Each reserved area must be initialised later, when more kernel | ||
78 | * subsystems (like slab allocator) are available. | ||
79 | */ | ||
80 | cma->base_pfn = base_pfn; | ||
81 | cma->count = size >> PAGE_SHIFT; | ||
82 | pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M); | ||
83 | return 0; | ||
84 | err: | ||
85 | pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); | ||
86 | return base_pfn; | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * kvm_alloc_cma() - allocate pages from contiguous area | ||
91 | * @nr_pages: Requested number of pages. | ||
92 | * @align_pages: Requested alignment in number of pages | ||
93 | * | ||
94 | * This function allocates memory buffer for hash pagetable. | ||
95 | */ | ||
96 | struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages) | ||
97 | { | ||
98 | int ret; | ||
99 | struct page *page = NULL; | ||
100 | struct kvm_cma *cma = &kvm_cma_area; | ||
101 | unsigned long chunk_count, nr_chunk; | ||
102 | unsigned long mask, pfn, pageno, start = 0; | ||
103 | |||
104 | |||
105 | if (!cma || !cma->count) | ||
106 | return NULL; | ||
107 | |||
108 | pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__, | ||
109 | (void *)cma, nr_pages, align_pages); | ||
110 | |||
111 | if (!nr_pages) | ||
112 | return NULL; | ||
113 | /* | ||
114 | * align mask with chunk size. The bit tracks pages in chunk size | ||
115 | */ | ||
116 | VM_BUG_ON(!is_power_of_2(align_pages)); | ||
117 | mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1; | ||
118 | BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER); | ||
119 | |||
120 | chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
121 | nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
122 | |||
123 | mutex_lock(&kvm_cma_mutex); | ||
124 | for (;;) { | ||
125 | pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count, | ||
126 | start, nr_chunk, mask); | ||
127 | if (pageno >= chunk_count) | ||
128 | break; | ||
129 | |||
130 | pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)); | ||
131 | ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA); | ||
132 | if (ret == 0) { | ||
133 | bitmap_set(cma->bitmap, pageno, nr_chunk); | ||
134 | page = pfn_to_page(pfn); | ||
135 | memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT); | ||
136 | break; | ||
137 | } else if (ret != -EBUSY) { | ||
138 | break; | ||
139 | } | ||
140 | pr_debug("%s(): memory range at %p is busy, retrying\n", | ||
141 | __func__, pfn_to_page(pfn)); | ||
142 | /* try again with a bit different memory target */ | ||
143 | start = pageno + mask + 1; | ||
144 | } | ||
145 | mutex_unlock(&kvm_cma_mutex); | ||
146 | pr_debug("%s(): returned %p\n", __func__, page); | ||
147 | return page; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * kvm_release_cma() - release allocated pages for hash pagetable | ||
152 | * @pages: Allocated pages. | ||
153 | * @nr_pages: Number of allocated pages. | ||
154 | * | ||
155 | * This function releases memory allocated by kvm_alloc_cma(). | ||
156 | * It returns false when provided pages do not belong to contiguous area and | ||
157 | * true otherwise. | ||
158 | */ | ||
159 | bool kvm_release_cma(struct page *pages, unsigned long nr_pages) | ||
160 | { | ||
161 | unsigned long pfn; | ||
162 | unsigned long nr_chunk; | ||
163 | struct kvm_cma *cma = &kvm_cma_area; | ||
164 | |||
165 | if (!cma || !pages) | ||
166 | return false; | ||
167 | |||
168 | pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages); | ||
169 | |||
170 | pfn = page_to_pfn(pages); | ||
171 | |||
172 | if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) | ||
173 | return false; | ||
174 | |||
175 | VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count); | ||
176 | nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
177 | |||
178 | mutex_lock(&kvm_cma_mutex); | ||
179 | bitmap_clear(cma->bitmap, | ||
180 | (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT), | ||
181 | nr_chunk); | ||
182 | free_contig_range(pfn, nr_pages); | ||
183 | mutex_unlock(&kvm_cma_mutex); | ||
184 | |||
185 | return true; | ||
186 | } | ||
187 | |||
188 | static int __init kvm_cma_activate_area(unsigned long base_pfn, | ||
189 | unsigned long count) | ||
190 | { | ||
191 | unsigned long pfn = base_pfn; | ||
192 | unsigned i = count >> pageblock_order; | ||
193 | struct zone *zone; | ||
194 | |||
195 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
196 | zone = page_zone(pfn_to_page(pfn)); | ||
197 | do { | ||
198 | unsigned j; | ||
199 | base_pfn = pfn; | ||
200 | for (j = pageblock_nr_pages; j; --j, pfn++) { | ||
201 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
202 | /* | ||
203 | * alloc_contig_range requires the pfn range | ||
204 | * specified to be in the same zone. Make this | ||
205 | * simple by forcing the entire CMA resv range | ||
206 | * to be in the same zone. | ||
207 | */ | ||
208 | if (page_zone(pfn_to_page(pfn)) != zone) | ||
209 | return -EINVAL; | ||
210 | } | ||
211 | init_cma_reserved_pageblock(pfn_to_page(base_pfn)); | ||
212 | } while (--i); | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int __init kvm_cma_init_reserved_areas(void) | ||
217 | { | ||
218 | int bitmap_size, ret; | ||
219 | unsigned long chunk_count; | ||
220 | struct kvm_cma *cma = &kvm_cma_area; | ||
221 | |||
222 | pr_debug("%s()\n", __func__); | ||
223 | if (!cma->count) | ||
224 | return 0; | ||
225 | chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
226 | bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long); | ||
227 | cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
228 | if (!cma->bitmap) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | ret = kvm_cma_activate_area(cma->base_pfn, cma->count); | ||
232 | if (ret) | ||
233 | goto error; | ||
234 | return 0; | ||
235 | |||
236 | error: | ||
237 | kfree(cma->bitmap); | ||
238 | return ret; | ||
239 | } | ||
240 | core_initcall(kvm_cma_init_reserved_areas); | ||
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h new file mode 100644 index 000000000000..655144f75fa5 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA | ||
3 | * for DMA mapping framework | ||
4 | * | ||
5 | * Copyright IBM Corporation, 2013 | ||
6 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as | ||
10 | * published by the Free Software Foundation; either version 2 of the | ||
11 | * License or (at your optional) any later version of the license. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __POWERPC_KVM_CMA_ALLOC_H__ | ||
16 | #define __POWERPC_KVM_CMA_ALLOC_H__ | ||
17 | /* | ||
18 | * Both RMA and Hash page allocation will be multiple of 256K. | ||
19 | */ | ||
20 | #define KVM_CMA_CHUNK_ORDER 18 | ||
21 | |||
22 | extern struct page *kvm_alloc_cma(unsigned long nr_pages, | ||
23 | unsigned long align_pages); | ||
24 | extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages); | ||
25 | extern long kvm_cma_declare_contiguous(phys_addr_t size, | ||
26 | phys_addr_t alignment) __init; | ||
27 | #endif | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fc25689a9f35..45e30d6e462b 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock) | |||
383 | return old == 0; | 383 | return old == 0; |
384 | } | 384 | } |
385 | 385 | ||
386 | /* | ||
387 | * tlbie/tlbiel is a bit different on the PPC970 compared to later | ||
388 | * processors such as POWER7; the large page bit is in the instruction | ||
389 | * not RB, and the top 16 bits and the bottom 12 bits of the VA | ||
390 | * in RB must be 0. | ||
391 | */ | ||
392 | static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues, | ||
393 | long npages, int global, bool need_sync) | ||
394 | { | ||
395 | long i; | ||
396 | |||
397 | if (global) { | ||
398 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
399 | cpu_relax(); | ||
400 | if (need_sync) | ||
401 | asm volatile("ptesync" : : : "memory"); | ||
402 | for (i = 0; i < npages; ++i) { | ||
403 | unsigned long rb = rbvalues[i]; | ||
404 | |||
405 | if (rb & 1) /* large page */ | ||
406 | asm volatile("tlbie %0,1" : : | ||
407 | "r" (rb & 0x0000fffffffff000ul)); | ||
408 | else | ||
409 | asm volatile("tlbie %0,0" : : | ||
410 | "r" (rb & 0x0000fffffffff000ul)); | ||
411 | } | ||
412 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
413 | kvm->arch.tlbie_lock = 0; | ||
414 | } else { | ||
415 | if (need_sync) | ||
416 | asm volatile("ptesync" : : : "memory"); | ||
417 | for (i = 0; i < npages; ++i) { | ||
418 | unsigned long rb = rbvalues[i]; | ||
419 | |||
420 | if (rb & 1) /* large page */ | ||
421 | asm volatile("tlbiel %0,1" : : | ||
422 | "r" (rb & 0x0000fffffffff000ul)); | ||
423 | else | ||
424 | asm volatile("tlbiel %0,0" : : | ||
425 | "r" (rb & 0x0000fffffffff000ul)); | ||
426 | } | ||
427 | asm volatile("ptesync" : : : "memory"); | ||
428 | } | ||
429 | } | ||
430 | |||
431 | static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, | ||
432 | long npages, int global, bool need_sync) | ||
433 | { | ||
434 | long i; | ||
435 | |||
436 | if (cpu_has_feature(CPU_FTR_ARCH_201)) { | ||
437 | /* PPC970 tlbie instruction is a bit different */ | ||
438 | do_tlbies_970(kvm, rbvalues, npages, global, need_sync); | ||
439 | return; | ||
440 | } | ||
441 | if (global) { | ||
442 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
443 | cpu_relax(); | ||
444 | if (need_sync) | ||
445 | asm volatile("ptesync" : : : "memory"); | ||
446 | for (i = 0; i < npages; ++i) | ||
447 | asm volatile(PPC_TLBIE(%1,%0) : : | ||
448 | "r" (rbvalues[i]), "r" (kvm->arch.lpid)); | ||
449 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
450 | kvm->arch.tlbie_lock = 0; | ||
451 | } else { | ||
452 | if (need_sync) | ||
453 | asm volatile("ptesync" : : : "memory"); | ||
454 | for (i = 0; i < npages; ++i) | ||
455 | asm volatile("tlbiel %0" : : "r" (rbvalues[i])); | ||
456 | asm volatile("ptesync" : : : "memory"); | ||
457 | } | ||
458 | } | ||
459 | |||
386 | long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | 460 | long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, |
387 | unsigned long pte_index, unsigned long avpn, | 461 | unsigned long pte_index, unsigned long avpn, |
388 | unsigned long *hpret) | 462 | unsigned long *hpret) |
@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | |||
408 | if (v & HPTE_V_VALID) { | 482 | if (v & HPTE_V_VALID) { |
409 | hpte[0] &= ~HPTE_V_VALID; | 483 | hpte[0] &= ~HPTE_V_VALID; |
410 | rb = compute_tlbie_rb(v, hpte[1], pte_index); | 484 | rb = compute_tlbie_rb(v, hpte[1], pte_index); |
411 | if (global_invalidates(kvm, flags)) { | 485 | do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); |
412 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
413 | cpu_relax(); | ||
414 | asm volatile("ptesync" : : : "memory"); | ||
415 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
416 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
417 | asm volatile("ptesync" : : : "memory"); | ||
418 | kvm->arch.tlbie_lock = 0; | ||
419 | } else { | ||
420 | asm volatile("ptesync" : : : "memory"); | ||
421 | asm volatile("tlbiel %0" : : "r" (rb)); | ||
422 | asm volatile("ptesync" : : : "memory"); | ||
423 | } | ||
424 | /* Read PTE low word after tlbie to get final R/C values */ | 486 | /* Read PTE low word after tlbie to get final R/C values */ |
425 | remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); | 487 | remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); |
426 | } | 488 | } |
@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
448 | unsigned long *hp, *hptes[4], tlbrb[4]; | 510 | unsigned long *hp, *hptes[4], tlbrb[4]; |
449 | long int i, j, k, n, found, indexes[4]; | 511 | long int i, j, k, n, found, indexes[4]; |
450 | unsigned long flags, req, pte_index, rcbits; | 512 | unsigned long flags, req, pte_index, rcbits; |
451 | long int local = 0; | 513 | int global; |
452 | long int ret = H_SUCCESS; | 514 | long int ret = H_SUCCESS; |
453 | struct revmap_entry *rev, *revs[4]; | 515 | struct revmap_entry *rev, *revs[4]; |
454 | 516 | ||
455 | if (atomic_read(&kvm->online_vcpus) == 1) | 517 | global = global_invalidates(kvm, 0); |
456 | local = 1; | ||
457 | for (i = 0; i < 4 && ret == H_SUCCESS; ) { | 518 | for (i = 0; i < 4 && ret == H_SUCCESS; ) { |
458 | n = 0; | 519 | n = 0; |
459 | for (; i < 4; ++i) { | 520 | for (; i < 4; ++i) { |
@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
529 | break; | 590 | break; |
530 | 591 | ||
531 | /* Now that we've collected a batch, do the tlbies */ | 592 | /* Now that we've collected a batch, do the tlbies */ |
532 | if (!local) { | 593 | do_tlbies(kvm, tlbrb, n, global, true); |
533 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
534 | cpu_relax(); | ||
535 | asm volatile("ptesync" : : : "memory"); | ||
536 | for (k = 0; k < n; ++k) | ||
537 | asm volatile(PPC_TLBIE(%1,%0) : : | ||
538 | "r" (tlbrb[k]), | ||
539 | "r" (kvm->arch.lpid)); | ||
540 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
541 | kvm->arch.tlbie_lock = 0; | ||
542 | } else { | ||
543 | asm volatile("ptesync" : : : "memory"); | ||
544 | for (k = 0; k < n; ++k) | ||
545 | asm volatile("tlbiel %0" : : "r" (tlbrb[k])); | ||
546 | asm volatile("ptesync" : : : "memory"); | ||
547 | } | ||
548 | 594 | ||
549 | /* Read PTE low words after tlbie to get final R/C values */ | 595 | /* Read PTE low words after tlbie to get final R/C values */ |
550 | for (k = 0; k < n; ++k) { | 596 | for (k = 0; k < n; ++k) { |
@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, | |||
603 | if (v & HPTE_V_VALID) { | 649 | if (v & HPTE_V_VALID) { |
604 | rb = compute_tlbie_rb(v, r, pte_index); | 650 | rb = compute_tlbie_rb(v, r, pte_index); |
605 | hpte[0] = v & ~HPTE_V_VALID; | 651 | hpte[0] = v & ~HPTE_V_VALID; |
606 | if (global_invalidates(kvm, flags)) { | 652 | do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); |
607 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
608 | cpu_relax(); | ||
609 | asm volatile("ptesync" : : : "memory"); | ||
610 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
611 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
612 | asm volatile("ptesync" : : : "memory"); | ||
613 | kvm->arch.tlbie_lock = 0; | ||
614 | } else { | ||
615 | asm volatile("ptesync" : : : "memory"); | ||
616 | asm volatile("tlbiel %0" : : "r" (rb)); | ||
617 | asm volatile("ptesync" : : : "memory"); | ||
618 | } | ||
619 | /* | 653 | /* |
620 | * If the host has this page as readonly but the guest | 654 | * If the host has this page as readonly but the guest |
621 | * wants to make it read/write, reduce the permissions. | 655 | * wants to make it read/write, reduce the permissions. |
@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, | |||
686 | 720 | ||
687 | hptep[0] &= ~HPTE_V_VALID; | 721 | hptep[0] &= ~HPTE_V_VALID; |
688 | rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); | 722 | rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); |
689 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | 723 | do_tlbies(kvm, &rb, 1, 1, true); |
690 | cpu_relax(); | ||
691 | asm volatile("ptesync" : : : "memory"); | ||
692 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
693 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
694 | asm volatile("ptesync" : : : "memory"); | ||
695 | kvm->arch.tlbie_lock = 0; | ||
696 | } | 724 | } |
697 | EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); | 725 | EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); |
698 | 726 | ||
@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, | |||
706 | rbyte = (hptep[1] & ~HPTE_R_R) >> 8; | 734 | rbyte = (hptep[1] & ~HPTE_R_R) >> 8; |
707 | /* modify only the second-last byte, which contains the ref bit */ | 735 | /* modify only the second-last byte, which contains the ref bit */ |
708 | *((char *)hptep + 14) = rbyte; | 736 | *((char *)hptep + 14) = rbyte; |
709 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | 737 | do_tlbies(kvm, &rb, 1, 1, false); |
710 | cpu_relax(); | ||
711 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
712 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
713 | asm volatile("ptesync" : : : "memory"); | ||
714 | kvm->arch.tlbie_lock = 0; | ||
715 | } | 738 | } |
716 | EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); | 739 | EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); |
717 | 740 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b02f91e4c70d..60dce5bfab3f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -1381,7 +1381,7 @@ hcall_try_real_mode: | |||
1381 | cmpldi r3,hcall_real_table_end - hcall_real_table | 1381 | cmpldi r3,hcall_real_table_end - hcall_real_table |
1382 | bge guest_exit_cont | 1382 | bge guest_exit_cont |
1383 | LOAD_REG_ADDR(r4, hcall_real_table) | 1383 | LOAD_REG_ADDR(r4, hcall_real_table) |
1384 | lwzx r3,r3,r4 | 1384 | lwax r3,r3,r4 |
1385 | cmpwi r3,0 | 1385 | cmpwi r3,0 |
1386 | beq guest_exit_cont | 1386 | beq guest_exit_cont |
1387 | add r3,r3,r4 | 1387 | add r3,r3,r4 |
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 48cbbf862958..17cfae5497a3 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S | |||
@@ -92,6 +92,11 @@ kvm_start_lightweight: | |||
92 | PPC_LL r3, VCPU_HFLAGS(r4) | 92 | PPC_LL r3, VCPU_HFLAGS(r4) |
93 | rldicl r3, r3, 0, 63 /* r3 &= 1 */ | 93 | rldicl r3, r3, 0, 63 /* r3 &= 1 */ |
94 | stb r3, HSTATE_RESTORE_HID5(r13) | 94 | stb r3, HSTATE_RESTORE_HID5(r13) |
95 | |||
96 | /* Load up guest SPRG3 value, since it's user readable */ | ||
97 | ld r3, VCPU_SHARED(r4) | ||
98 | ld r3, VCPU_SHARED_SPRG3(r3) | ||
99 | mtspr SPRN_SPRG3, r3 | ||
95 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 100 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
96 | 101 | ||
97 | PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ | 102 | PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ |
@@ -123,6 +128,15 @@ kvmppc_handler_highmem: | |||
123 | /* R7 = vcpu */ | 128 | /* R7 = vcpu */ |
124 | PPC_LL r7, GPR4(r1) | 129 | PPC_LL r7, GPR4(r1) |
125 | 130 | ||
131 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
132 | /* | ||
133 | * Reload kernel SPRG3 value. | ||
134 | * No need to save guest value as usermode can't modify SPRG3. | ||
135 | */ | ||
136 | ld r3, PACA_SPRG3(r13) | ||
137 | mtspr SPRN_SPRG3, r3 | ||
138 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
139 | |||
126 | PPC_STL r14, VCPU_GPR(R14)(r7) | 140 | PPC_STL r14, VCPU_GPR(R14)(r7) |
127 | PPC_STL r15, VCPU_GPR(R15)(r7) | 141 | PPC_STL r15, VCPU_GPR(R15)(r7) |
128 | PPC_STL r16, VCPU_GPR(R16)(r7) | 142 | PPC_STL r16, VCPU_GPR(R16)(r7) |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 19498a567a81..27db1e665959 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | |||
468 | * both the traditional FP registers and the added VSX | 468 | * both the traditional FP registers and the added VSX |
469 | * registers into thread.fpr[]. | 469 | * registers into thread.fpr[]. |
470 | */ | 470 | */ |
471 | giveup_fpu(current); | 471 | if (current->thread.regs->msr & MSR_FP) |
472 | giveup_fpu(current); | ||
472 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) | 473 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) |
473 | vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; | 474 | vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; |
474 | 475 | ||
@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | |||
483 | 484 | ||
484 | #ifdef CONFIG_ALTIVEC | 485 | #ifdef CONFIG_ALTIVEC |
485 | if (msr & MSR_VEC) { | 486 | if (msr & MSR_VEC) { |
486 | giveup_altivec(current); | 487 | if (current->thread.regs->msr & MSR_VEC) |
488 | giveup_altivec(current); | ||
487 | memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); | 489 | memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); |
488 | vcpu->arch.vscr = t->vscr; | 490 | vcpu->arch.vscr = t->vscr; |
489 | } | 491 | } |
@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | |||
575 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); | 577 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); |
576 | #endif | 578 | #endif |
577 | 579 | ||
578 | current->thread.regs->msr |= msr; | ||
579 | |||
580 | if (msr & MSR_FP) { | 580 | if (msr & MSR_FP) { |
581 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) | 581 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) |
582 | thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; | 582 | thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; |
@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | |||
598 | #endif | 598 | #endif |
599 | } | 599 | } |
600 | 600 | ||
601 | current->thread.regs->msr |= msr; | ||
601 | vcpu->arch.guest_owned_ext |= msr; | 602 | vcpu->arch.guest_owned_ext |= msr; |
602 | kvmppc_recalc_shadow_msr(vcpu); | 603 | kvmppc_recalc_shadow_msr(vcpu); |
603 | 604 | ||
604 | return RESUME_GUEST; | 605 | return RESUME_GUEST; |
605 | } | 606 | } |
606 | 607 | ||
608 | /* | ||
609 | * Kernel code using FP or VMX could have flushed guest state to | ||
610 | * the thread_struct; if so, get it back now. | ||
611 | */ | ||
612 | static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) | ||
613 | { | ||
614 | unsigned long lost_ext; | ||
615 | |||
616 | lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr; | ||
617 | if (!lost_ext) | ||
618 | return; | ||
619 | |||
620 | if (lost_ext & MSR_FP) | ||
621 | kvmppc_load_up_fpu(); | ||
622 | if (lost_ext & MSR_VEC) | ||
623 | kvmppc_load_up_altivec(); | ||
624 | current->thread.regs->msr |= lost_ext; | ||
625 | } | ||
626 | |||
607 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | 627 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, |
608 | unsigned int exit_nr) | 628 | unsigned int exit_nr) |
609 | { | 629 | { |
@@ -772,7 +792,7 @@ program_interrupt: | |||
772 | } | 792 | } |
773 | case BOOK3S_INTERRUPT_SYSCALL: | 793 | case BOOK3S_INTERRUPT_SYSCALL: |
774 | if (vcpu->arch.papr_enabled && | 794 | if (vcpu->arch.papr_enabled && |
775 | (kvmppc_get_last_inst(vcpu) == 0x44000022) && | 795 | (kvmppc_get_last_sc(vcpu) == 0x44000022) && |
776 | !(vcpu->arch.shared->msr & MSR_PR)) { | 796 | !(vcpu->arch.shared->msr & MSR_PR)) { |
777 | /* SC 1 papr hypercalls */ | 797 | /* SC 1 papr hypercalls */ |
778 | ulong cmd = kvmppc_get_gpr(vcpu, 3); | 798 | ulong cmd = kvmppc_get_gpr(vcpu, 3); |
@@ -890,8 +910,9 @@ program_interrupt: | |||
890 | local_irq_enable(); | 910 | local_irq_enable(); |
891 | r = s; | 911 | r = s; |
892 | } else { | 912 | } else { |
893 | kvmppc_lazy_ee_enable(); | 913 | kvmppc_fix_ee_before_entry(); |
894 | } | 914 | } |
915 | kvmppc_handle_lost_ext(vcpu); | ||
895 | } | 916 | } |
896 | 917 | ||
897 | trace_kvm_book3s_reenter(r, vcpu); | 918 | trace_kvm_book3s_reenter(r, vcpu); |
@@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
1047 | if (err) | 1068 | if (err) |
1048 | goto free_shadow_vcpu; | 1069 | goto free_shadow_vcpu; |
1049 | 1070 | ||
1071 | err = -ENOMEM; | ||
1050 | p = __get_free_page(GFP_KERNEL|__GFP_ZERO); | 1072 | p = __get_free_page(GFP_KERNEL|__GFP_ZERO); |
1051 | /* the real shared page fills the last 4k of our page */ | ||
1052 | vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); | ||
1053 | if (!p) | 1073 | if (!p) |
1054 | goto uninit_vcpu; | 1074 | goto uninit_vcpu; |
1075 | /* the real shared page fills the last 4k of our page */ | ||
1076 | vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); | ||
1055 | 1077 | ||
1056 | #ifdef CONFIG_PPC_BOOK3S_64 | 1078 | #ifdef CONFIG_PPC_BOOK3S_64 |
1057 | /* default to book3s_64 (970fx) */ | 1079 | /* default to book3s_64 (970fx) */ |
@@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1161 | if (vcpu->arch.shared->msr & MSR_FP) | 1183 | if (vcpu->arch.shared->msr & MSR_FP) |
1162 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); | 1184 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); |
1163 | 1185 | ||
1164 | kvmppc_lazy_ee_enable(); | 1186 | kvmppc_fix_ee_before_entry(); |
1165 | 1187 | ||
1166 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); | 1188 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); |
1167 | 1189 | ||
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 94c1dd46b83d..a3a5cb8ee7ea 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/hvcall.h> | 19 | #include <asm/hvcall.h> |
20 | #include <asm/xics.h> | 20 | #include <asm/xics.h> |
21 | #include <asm/debug.h> | 21 | #include <asm/debug.h> |
22 | #include <asm/time.h> | ||
22 | 23 | ||
23 | #include <linux/debugfs.h> | 24 | #include <linux/debugfs.h> |
24 | #include <linux/seq_file.h> | 25 | #include <linux/seq_file.h> |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index dcc94f016007..17722d82f1d1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
674 | goto out; | 674 | goto out; |
675 | } | 675 | } |
676 | 676 | ||
677 | kvm_guest_enter(); | ||
678 | |||
679 | #ifdef CONFIG_PPC_FPU | 677 | #ifdef CONFIG_PPC_FPU |
680 | /* Save userspace FPU state in stack */ | 678 | /* Save userspace FPU state in stack */ |
681 | enable_kernel_fp(); | 679 | enable_kernel_fp(); |
@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
698 | kvmppc_load_guest_fp(vcpu); | 696 | kvmppc_load_guest_fp(vcpu); |
699 | #endif | 697 | #endif |
700 | 698 | ||
701 | kvmppc_lazy_ee_enable(); | 699 | kvmppc_fix_ee_before_entry(); |
702 | 700 | ||
703 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); | 701 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); |
704 | 702 | ||
@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1168 | local_irq_enable(); | 1166 | local_irq_enable(); |
1169 | r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); | 1167 | r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); |
1170 | } else { | 1168 | } else { |
1171 | kvmppc_lazy_ee_enable(); | 1169 | kvmppc_fix_ee_before_entry(); |
1172 | } | 1170 | } |
1173 | } | 1171 | } |
1174 | 1172 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ae63ae4a1a5f..f55e14cd1762 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) | |||
117 | kvm_guest_exit(); | 117 | kvm_guest_exit(); |
118 | continue; | 118 | continue; |
119 | } | 119 | } |
120 | |||
121 | trace_hardirqs_on(); | ||
122 | #endif | 120 | #endif |
123 | 121 | ||
124 | kvm_guest_enter(); | 122 | kvm_guest_enter(); |