aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kvm
diff options
context:
space:
mode:
authorChristoffer Dall <c.dall@virtualopensystems.com>2013-01-20 18:28:12 -0500
committerChristoffer Dall <c.dall@virtualopensystems.com>2013-01-23 13:29:16 -0500
commit94f8e6418d3915dbefbb5d66b63146f1df12b0c0 (patch)
tree93f016f65c5f4aef881974d87a395688dc407808 /arch/arm/kvm
parent4fe21e4c6def3c6a8f609893b4d5c72bc186d0d5 (diff)
KVM: ARM: Handle guest faults in KVM
Handles the guest faults in KVM by mapping in corresponding user pages in the 2nd stage page tables. We invalidate the instruction cache by MVA whenever we map a page to the guest (no, we cannot only do it when we have an iabt because the guest may happily read/write a page before hitting the icache) if the hardware uses VIPT or PIPT. In the latter case, we can invalidate only that physical page. In the first case, all bets are off and we simply must invalidate the whole affair. Not that VIVT icaches are tagged with vmids, and we are out of the woods on that one. Alexander Graf was nice enough to remind us of this massive pain. Reviewed-by: Will Deacon <will.deacon@arm.com> Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/mmu.c153
-rw-r--r--arch/arm/kvm/trace.h26
2 files changed, 178 insertions, 1 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 4347d68f052f..a4b7b0f900e5 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -21,9 +21,11 @@
21#include <linux/io.h> 21#include <linux/io.h>
22#include <asm/idmap.h> 22#include <asm/idmap.h>
23#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
24#include <asm/cacheflush.h>
24#include <asm/kvm_arm.h> 25#include <asm/kvm_arm.h>
25#include <asm/kvm_mmu.h> 26#include <asm/kvm_mmu.h>
26#include <asm/kvm_asm.h> 27#include <asm/kvm_asm.h>
28#include <asm/kvm_emulate.h>
27#include <asm/mach/map.h> 29#include <asm/mach/map.h>
28#include <trace/events/kvm.h> 30#include <trace/events/kvm.h>
29 31
@@ -488,9 +490,158 @@ out:
488 return ret; 490 return ret;
489} 491}
490 492
493static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
494{
495 /*
496 * If we are going to insert an instruction page and the icache is
497 * either VIPT or PIPT, there is a potential problem where the host
498 * (or another VM) may have used the same page as this guest, and we
499 * read incorrect data from the icache. If we're using a PIPT cache,
500 * we can invalidate just that page, but if we are using a VIPT cache
501 * we need to invalidate the entire icache - damn shame - as written
502 * in the ARM ARM (DDI 0406C.b - Page B3-1393).
503 *
504 * VIVT caches are tagged using both the ASID and the VMID and doesn't
505 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
506 */
507 if (icache_is_pipt()) {
508 unsigned long hva = gfn_to_hva(kvm, gfn);
509 __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
510 } else if (!icache_is_vivt_asid_tagged()) {
511 /* any kind of VIPT cache */
512 __flush_icache_all();
513 }
514}
515
516static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
517 gfn_t gfn, struct kvm_memory_slot *memslot,
518 unsigned long fault_status)
519{
520 pte_t new_pte;
521 pfn_t pfn;
522 int ret;
523 bool write_fault, writable;
524 unsigned long mmu_seq;
525 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
526
527 write_fault = kvm_is_write_fault(vcpu->arch.hsr);
528 if (fault_status == FSC_PERM && !write_fault) {
529 kvm_err("Unexpected L2 read permission error\n");
530 return -EFAULT;
531 }
532
533 /* We need minimum second+third level pages */
534 ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
535 if (ret)
536 return ret;
537
538 mmu_seq = vcpu->kvm->mmu_notifier_seq;
539 /*
540 * Ensure the read of mmu_notifier_seq happens before we call
541 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
542 * the page we just got a reference to gets unmapped before we have a
543 * chance to grab the mmu_lock, which ensure that if the page gets
544 * unmapped afterwards, the call to kvm_unmap_hva will take it away
545 * from us again properly. This smp_rmb() interacts with the smp_wmb()
546 * in kvm_mmu_notifier_invalidate_<page|range_end>.
547 */
548 smp_rmb();
549
550 pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
551 if (is_error_pfn(pfn))
552 return -EFAULT;
553
554 new_pte = pfn_pte(pfn, PAGE_S2);
555 coherent_icache_guest_page(vcpu->kvm, gfn);
556
557 spin_lock(&vcpu->kvm->mmu_lock);
558 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
559 goto out_unlock;
560 if (writable) {
561 pte_val(new_pte) |= L_PTE_S2_RDWR;
562 kvm_set_pfn_dirty(pfn);
563 }
564 stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
565
566out_unlock:
567 spin_unlock(&vcpu->kvm->mmu_lock);
568 kvm_release_pfn_clean(pfn);
569 return 0;
570}
571
572/**
573 * kvm_handle_guest_abort - handles all 2nd stage aborts
574 * @vcpu: the VCPU pointer
575 * @run: the kvm_run structure
576 *
577 * Any abort that gets to the host is almost guaranteed to be caused by a
578 * missing second stage translation table entry, which can mean that either the
579 * guest simply needs more memory and we must allocate an appropriate page or it
580 * can mean that the guest tried to access I/O memory, which is emulated by user
581 * space. The distinction is based on the IPA causing the fault and whether this
582 * memory region has been registered as standard RAM by user space.
583 */
491int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) 584int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
492{ 585{
493 return -EINVAL; 586 unsigned long hsr_ec;
587 unsigned long fault_status;
588 phys_addr_t fault_ipa;
589 struct kvm_memory_slot *memslot;
590 bool is_iabt;
591 gfn_t gfn;
592 int ret, idx;
593
594 hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
595 is_iabt = (hsr_ec == HSR_EC_IABT);
596 fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
597
598 trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr,
599 vcpu->arch.hxfar, fault_ipa);
600
601 /* Check the stage-2 fault is trans. fault or write fault */
602 fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE);
603 if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
604 kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n",
605 hsr_ec, fault_status);
606 return -EFAULT;
607 }
608
609 idx = srcu_read_lock(&vcpu->kvm->srcu);
610
611 gfn = fault_ipa >> PAGE_SHIFT;
612 if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
613 if (is_iabt) {
614 /* Prefetch Abort on I/O address */
615 kvm_inject_pabt(vcpu, vcpu->arch.hxfar);
616 ret = 1;
617 goto out_unlock;
618 }
619
620 if (fault_status != FSC_FAULT) {
621 kvm_err("Unsupported fault status on io memory: %#lx\n",
622 fault_status);
623 ret = -EFAULT;
624 goto out_unlock;
625 }
626
627 kvm_pr_unimpl("I/O address abort...");
628 ret = 0;
629 goto out_unlock;
630 }
631
632 memslot = gfn_to_memslot(vcpu->kvm, gfn);
633 if (!memslot->user_alloc) {
634 kvm_err("non user-alloc memslots not supported\n");
635 ret = -EINVAL;
636 goto out_unlock;
637 }
638
639 ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
640 if (ret == 0)
641 ret = 1;
642out_unlock:
643 srcu_read_unlock(&vcpu->kvm->srcu, idx);
644 return ret;
494} 645}
495 646
496static void handle_hva_to_gpa(struct kvm *kvm, 647static void handle_hva_to_gpa(struct kvm *kvm,
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 022305b38c27..624b5a4e8fad 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -39,6 +39,32 @@ TRACE_EVENT(kvm_exit,
39 TP_printk("PC: 0x%08lx", __entry->vcpu_pc) 39 TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
40); 40);
41 41
42TRACE_EVENT(kvm_guest_fault,
43 TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
44 unsigned long hxfar,
45 unsigned long long ipa),
46 TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
47
48 TP_STRUCT__entry(
49 __field( unsigned long, vcpu_pc )
50 __field( unsigned long, hsr )
51 __field( unsigned long, hxfar )
52 __field( unsigned long long, ipa )
53 ),
54
55 TP_fast_assign(
56 __entry->vcpu_pc = vcpu_pc;
57 __entry->hsr = hsr;
58 __entry->hxfar = hxfar;
59 __entry->ipa = ipa;
60 ),
61
62 TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
63 "ipa %#16llx, hsr %#08lx",
64 __entry->vcpu_pc, __entry->hxfar,
65 __entry->ipa, __entry->hsr)
66);
67
42TRACE_EVENT(kvm_irq_line, 68TRACE_EVENT(kvm_irq_line,
43 TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), 69 TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
44 TP_ARGS(type, vcpu_idx, irq_num, level), 70 TP_ARGS(type, vcpu_idx, irq_num, level),