diff options
author | Scott Wood <scottwood@freescale.com> | 2011-06-14 19:34:39 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-07-12 06:16:36 -0400 |
commit | 9973d54eeafcd1c3a2e89f0f59280c4c1e03e73b (patch) | |
tree | 80a68f063e34b88d217fb669227cd1e537c884d2 /arch/powerpc/kvm/e500_tlb.c | |
parent | 59c1f4e35c3db6c7ea5a04503a43bcbeb98977df (diff) |
KVM: PPC: e500: Support large page mappings of PFNMAP vmas.
This allows large pages to be used on guest mappings backed by things like
/dev/mem, resulting in a significant speedup when guest memory
is mapped this way (it's useful for directly-assigned MMIO, too).
This is not a substitute for hugetlbfs integration, but is useful for
configurations where devices are directly assigned on chips without an
IOMMU -- in these cases, we need guest physical and true physical to
match, and be contiguous, so static reservation and mapping via /dev/mem
is the most straightforward way to set things up.
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm/e500_tlb.c')
-rw-r--r-- | arch/powerpc/kvm/e500_tlb.c | 103 |
1 files changed, 94 insertions, 9 deletions
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 0291c3cf5055..7f808c52e64a 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c | |||
@@ -270,28 +270,113 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | |||
270 | static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | 270 | static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, |
271 | u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) | 271 | u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) |
272 | { | 272 | { |
273 | struct kvm_memory_slot *slot; | ||
273 | struct tlbe *stlbe; | 274 | struct tlbe *stlbe; |
274 | unsigned long pfn; | 275 | unsigned long pfn, hva; |
276 | int pfnmap = 0; | ||
277 | int tsize = BOOK3E_PAGESZ_4K; | ||
275 | 278 | ||
276 | stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; | 279 | stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; |
277 | 280 | ||
278 | /* | 281 | /* |
279 | * Translate guest physical to true physical, acquiring | 282 | * Translate guest physical to true physical, acquiring |
280 | * a page reference if it is normal, non-reserved memory. | 283 | * a page reference if it is normal, non-reserved memory. |
284 | * | ||
285 | * gfn_to_memslot() must succeed because otherwise we wouldn't | ||
286 | * have gotten this far. Eventually we should just pass the slot | ||
287 | * pointer through from the first lookup. | ||
281 | */ | 288 | */ |
282 | pfn = gfn_to_pfn(vcpu_e500->vcpu.kvm, gfn); | 289 | slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); |
283 | if (is_error_pfn(pfn)) { | 290 | hva = gfn_to_hva_memslot(slot, gfn); |
284 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", | 291 | |
285 | (long)gfn); | 292 | if (tlbsel == 1) { |
286 | kvm_release_pfn_clean(pfn); | 293 | struct vm_area_struct *vma; |
287 | return; | 294 | down_read(¤t->mm->mmap_sem); |
295 | |||
296 | vma = find_vma(current->mm, hva); | ||
297 | if (vma && hva >= vma->vm_start && | ||
298 | (vma->vm_flags & VM_PFNMAP)) { | ||
299 | /* | ||
300 | * This VMA is a physically contiguous region (e.g. | ||
301 | * /dev/mem) that bypasses normal Linux page | ||
302 | * management. Find the overlap between the | ||
303 | * vma and the memslot. | ||
304 | */ | ||
305 | |||
306 | unsigned long start, end; | ||
307 | unsigned long slot_start, slot_end; | ||
308 | |||
309 | pfnmap = 1; | ||
310 | |||
311 | start = vma->vm_pgoff; | ||
312 | end = start + | ||
313 | ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); | ||
314 | |||
315 | pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
316 | |||
317 | slot_start = pfn - (gfn - slot->base_gfn); | ||
318 | slot_end = slot_start + slot->npages; | ||
319 | |||
320 | if (start < slot_start) | ||
321 | start = slot_start; | ||
322 | if (end > slot_end) | ||
323 | end = slot_end; | ||
324 | |||
325 | tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> | ||
326 | MAS1_TSIZE_SHIFT; | ||
327 | |||
328 | /* | ||
329 | * e500 doesn't implement the lowest tsize bit, | ||
330 | * or 1K pages. | ||
331 | */ | ||
332 | tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); | ||
333 | |||
334 | /* | ||
335 | * Now find the largest tsize (up to what the guest | ||
336 | * requested) that will cover gfn, stay within the | ||
337 | * range, and for which gfn and pfn are mutually | ||
338 | * aligned. | ||
339 | */ | ||
340 | |||
341 | for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { | ||
342 | unsigned long gfn_start, gfn_end, tsize_pages; | ||
343 | tsize_pages = 1 << (tsize - 2); | ||
344 | |||
345 | gfn_start = gfn & ~(tsize_pages - 1); | ||
346 | gfn_end = gfn_start + tsize_pages; | ||
347 | |||
348 | if (gfn_start + pfn - gfn < start) | ||
349 | continue; | ||
350 | if (gfn_end + pfn - gfn > end) | ||
351 | continue; | ||
352 | if ((gfn & (tsize_pages - 1)) != | ||
353 | (pfn & (tsize_pages - 1))) | ||
354 | continue; | ||
355 | |||
356 | gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); | ||
357 | pfn &= ~(tsize_pages - 1); | ||
358 | break; | ||
359 | } | ||
360 | } | ||
361 | |||
362 | up_read(¤t->mm->mmap_sem); | ||
363 | } | ||
364 | |||
365 | if (likely(!pfnmap)) { | ||
366 | pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); | ||
367 | if (is_error_pfn(pfn)) { | ||
368 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", | ||
369 | (long)gfn); | ||
370 | kvm_release_pfn_clean(pfn); | ||
371 | return; | ||
372 | } | ||
288 | } | 373 | } |
289 | 374 | ||
290 | /* Drop reference to old page. */ | 375 | /* Drop reference to old page. */ |
291 | kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); | 376 | kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); |
292 | 377 | ||
293 | /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ | 378 | /* Force TS=1 IPROT=0 for all guest mappings. */ |
294 | stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) | 379 | stlbe->mas1 = MAS1_TSIZE(tsize) |
295 | | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; | 380 | | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; |
296 | stlbe->mas2 = (gvaddr & MAS2_EPN) | 381 | stlbe->mas2 = (gvaddr & MAS2_EPN) |
297 | | e500_shadow_mas2_attrib(gtlbe->mas2, | 382 | | e500_shadow_mas2_attrib(gtlbe->mas2, |