aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2011-06-14 19:34:39 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 06:16:36 -0400
commit9973d54eeafcd1c3a2e89f0f59280c4c1e03e73b (patch)
tree80a68f063e34b88d217fb669227cd1e537c884d2
parent59c1f4e35c3db6c7ea5a04503a43bcbeb98977df (diff)
KVM: PPC: e500: Support large page mappings of PFNMAP vmas.
This allows large pages to be used on guest mappings backed by things like /dev/mem, resulting in a significant speedup when guest memory is mapped this way (it's useful for directly-assigned MMIO, too). This is not a substitute for hugetlbfs integration, but is useful for configurations where devices are directly assigned on chips without an IOMMU -- in these cases, we need guest physical and true physical to match, and be contiguous, so static reservation and mapping via /dev/mem is the most straightforward way to set things up. Signed-off-by: Scott Wood <scottwood@freescale.com> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--arch/powerpc/kvm/e500_tlb.c103
1 files changed, 94 insertions, 9 deletions
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 0291c3cf5055..7f808c52e64a 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -270,28 +270,113 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
270static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, 270static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
271 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) 271 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
272{ 272{
273 struct kvm_memory_slot *slot;
273 struct tlbe *stlbe; 274 struct tlbe *stlbe;
274 unsigned long pfn; 275 unsigned long pfn, hva;
276 int pfnmap = 0;
277 int tsize = BOOK3E_PAGESZ_4K;
275 278
276 stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; 279 stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
277 280
278 /* 281 /*
279 * Translate guest physical to true physical, acquiring 282 * Translate guest physical to true physical, acquiring
280 * a page reference if it is normal, non-reserved memory. 283 * a page reference if it is normal, non-reserved memory.
284 *
285 * gfn_to_memslot() must succeed because otherwise we wouldn't
286 * have gotten this far. Eventually we should just pass the slot
287 * pointer through from the first lookup.
281 */ 288 */
282 pfn = gfn_to_pfn(vcpu_e500->vcpu.kvm, gfn); 289 slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
283 if (is_error_pfn(pfn)) { 290 hva = gfn_to_hva_memslot(slot, gfn);
284 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", 291
285 (long)gfn); 292 if (tlbsel == 1) {
286 kvm_release_pfn_clean(pfn); 293 struct vm_area_struct *vma;
287 return; 294 down_read(&current->mm->mmap_sem);
295
296 vma = find_vma(current->mm, hva);
297 if (vma && hva >= vma->vm_start &&
298 (vma->vm_flags & VM_PFNMAP)) {
299 /*
300 * This VMA is a physically contiguous region (e.g.
301 * /dev/mem) that bypasses normal Linux page
302 * management. Find the overlap between the
303 * vma and the memslot.
304 */
305
306 unsigned long start, end;
307 unsigned long slot_start, slot_end;
308
309 pfnmap = 1;
310
311 start = vma->vm_pgoff;
312 end = start +
313 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
314
315 pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
316
317 slot_start = pfn - (gfn - slot->base_gfn);
318 slot_end = slot_start + slot->npages;
319
320 if (start < slot_start)
321 start = slot_start;
322 if (end > slot_end)
323 end = slot_end;
324
325 tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
326 MAS1_TSIZE_SHIFT;
327
328 /*
329 * e500 doesn't implement the lowest tsize bit,
330 * or 1K pages.
331 */
332 tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
333
334 /*
335 * Now find the largest tsize (up to what the guest
336 * requested) that will cover gfn, stay within the
337 * range, and for which gfn and pfn are mutually
338 * aligned.
339 */
340
341 for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
342 unsigned long gfn_start, gfn_end, tsize_pages;
343 tsize_pages = 1 << (tsize - 2);
344
345 gfn_start = gfn & ~(tsize_pages - 1);
346 gfn_end = gfn_start + tsize_pages;
347
348 if (gfn_start + pfn - gfn < start)
349 continue;
350 if (gfn_end + pfn - gfn > end)
351 continue;
352 if ((gfn & (tsize_pages - 1)) !=
353 (pfn & (tsize_pages - 1)))
354 continue;
355
356 gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
357 pfn &= ~(tsize_pages - 1);
358 break;
359 }
360 }
361
362 up_read(&current->mm->mmap_sem);
363 }
364
365 if (likely(!pfnmap)) {
366 pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
367 if (is_error_pfn(pfn)) {
368 printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
369 (long)gfn);
370 kvm_release_pfn_clean(pfn);
371 return;
372 }
288 } 373 }
289 374
290 /* Drop reference to old page. */ 375 /* Drop reference to old page. */
291 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); 376 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
292 377
293 /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ 378 /* Force TS=1 IPROT=0 for all guest mappings. */
294 stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) 379 stlbe->mas1 = MAS1_TSIZE(tsize)
295 | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; 380 | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
296 stlbe->mas2 = (gvaddr & MAS2_EPN) 381 stlbe->mas2 = (gvaddr & MAS2_EPN)
297 | e500_shadow_mas2_attrib(gtlbe->mas2, 382 | e500_shadow_mas2_attrib(gtlbe->mas2,