diff options
-rw-r--r-- | arch/arm/include/asm/kvm_asm.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/kvm_mmu.h | 12 | ||||
-rw-r--r-- | arch/arm/kvm/mmu.c | 153 | ||||
-rw-r--r-- | arch/arm/kvm/trace.h | 26 |
4 files changed, 192 insertions, 1 deletions
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index f6652f6c5d84..5e06e8177784 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h | |||
@@ -71,6 +71,8 @@ extern char __kvm_hyp_vector[]; | |||
71 | extern char __kvm_hyp_code_start[]; | 71 | extern char __kvm_hyp_code_start[]; |
72 | extern char __kvm_hyp_code_end[]; | 72 | extern char __kvm_hyp_code_end[]; |
73 | 73 | ||
74 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | ||
75 | |||
74 | extern void __kvm_flush_vm_context(void); | 76 | extern void __kvm_flush_vm_context(void); |
75 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | 77 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); |
76 | 78 | ||
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 499e7b0925ff..421a20b34874 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -35,4 +35,16 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); | |||
35 | phys_addr_t kvm_mmu_get_httbr(void); | 35 | phys_addr_t kvm_mmu_get_httbr(void); |
36 | int kvm_mmu_init(void); | 36 | int kvm_mmu_init(void); |
37 | void kvm_clear_hyp_idmap(void); | 37 | void kvm_clear_hyp_idmap(void); |
38 | |||
39 | static inline bool kvm_is_write_fault(unsigned long hsr) | ||
40 | { | ||
41 | unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; | ||
42 | if (hsr_ec == HSR_EC_IABT) | ||
43 | return false; | ||
44 | else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR)) | ||
45 | return false; | ||
46 | else | ||
47 | return true; | ||
48 | } | ||
49 | |||
38 | #endif /* __ARM_KVM_MMU_H__ */ | 50 | #endif /* __ARM_KVM_MMU_H__ */ |
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 4347d68f052f..a4b7b0f900e5 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -21,9 +21,11 @@ | |||
21 | #include <linux/io.h> | 21 | #include <linux/io.h> |
22 | #include <asm/idmap.h> | 22 | #include <asm/idmap.h> |
23 | #include <asm/pgalloc.h> | 23 | #include <asm/pgalloc.h> |
24 | #include <asm/cacheflush.h> | ||
24 | #include <asm/kvm_arm.h> | 25 | #include <asm/kvm_arm.h> |
25 | #include <asm/kvm_mmu.h> | 26 | #include <asm/kvm_mmu.h> |
26 | #include <asm/kvm_asm.h> | 27 | #include <asm/kvm_asm.h> |
28 | #include <asm/kvm_emulate.h> | ||
27 | #include <asm/mach/map.h> | 29 | #include <asm/mach/map.h> |
28 | #include <trace/events/kvm.h> | 30 | #include <trace/events/kvm.h> |
29 | 31 | ||
@@ -488,9 +490,158 @@ out: | |||
488 | return ret; | 490 | return ret; |
489 | } | 491 | } |
490 | 492 | ||
493 | static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) | ||
494 | { | ||
495 | /* | ||
496 | * If we are going to insert an instruction page and the icache is | ||
497 | * either VIPT or PIPT, there is a potential problem where the host | ||
498 | * (or another VM) may have used the same page as this guest, and we | ||
499 | * read incorrect data from the icache. If we're using a PIPT cache, | ||
500 | * we can invalidate just that page, but if we are using a VIPT cache | ||
501 | * we need to invalidate the entire icache - damn shame - as written | ||
502 | * in the ARM ARM (DDI 0406C.b - Page B3-1393). | ||
503 | * | ||
504 | * VIVT caches are tagged using both the ASID and the VMID and doesn't | ||
505 | * need any kind of flushing (DDI 0406C.b - Page B3-1392). | ||
506 | */ | ||
507 | if (icache_is_pipt()) { | ||
508 | unsigned long hva = gfn_to_hva(kvm, gfn); | ||
509 | __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); | ||
510 | } else if (!icache_is_vivt_asid_tagged()) { | ||
511 | /* any kind of VIPT cache */ | ||
512 | __flush_icache_all(); | ||
513 | } | ||
514 | } | ||
515 | |||
516 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | ||
517 | gfn_t gfn, struct kvm_memory_slot *memslot, | ||
518 | unsigned long fault_status) | ||
519 | { | ||
520 | pte_t new_pte; | ||
521 | pfn_t pfn; | ||
522 | int ret; | ||
523 | bool write_fault, writable; | ||
524 | unsigned long mmu_seq; | ||
525 | struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; | ||
526 | |||
527 | write_fault = kvm_is_write_fault(vcpu->arch.hsr); | ||
528 | if (fault_status == FSC_PERM && !write_fault) { | ||
529 | kvm_err("Unexpected L2 read permission error\n"); | ||
530 | return -EFAULT; | ||
531 | } | ||
532 | |||
533 | /* We need minimum second+third level pages */ | ||
534 | ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); | ||
535 | if (ret) | ||
536 | return ret; | ||
537 | |||
538 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
539 | /* | ||
540 | * Ensure the read of mmu_notifier_seq happens before we call | ||
541 | * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk | ||
542 | * the page we just got a reference to gets unmapped before we have a | ||
543 | * chance to grab the mmu_lock, which ensure that if the page gets | ||
544 | * unmapped afterwards, the call to kvm_unmap_hva will take it away | ||
545 | * from us again properly. This smp_rmb() interacts with the smp_wmb() | ||
546 | * in kvm_mmu_notifier_invalidate_<page|range_end>. | ||
547 | */ | ||
548 | smp_rmb(); | ||
549 | |||
550 | pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); | ||
551 | if (is_error_pfn(pfn)) | ||
552 | return -EFAULT; | ||
553 | |||
554 | new_pte = pfn_pte(pfn, PAGE_S2); | ||
555 | coherent_icache_guest_page(vcpu->kvm, gfn); | ||
556 | |||
557 | spin_lock(&vcpu->kvm->mmu_lock); | ||
558 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | ||
559 | goto out_unlock; | ||
560 | if (writable) { | ||
561 | pte_val(new_pte) |= L_PTE_S2_RDWR; | ||
562 | kvm_set_pfn_dirty(pfn); | ||
563 | } | ||
564 | stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); | ||
565 | |||
566 | out_unlock: | ||
567 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
568 | kvm_release_pfn_clean(pfn); | ||
569 | return 0; | ||
570 | } | ||
571 | |||
572 | /** | ||
573 | * kvm_handle_guest_abort - handles all 2nd stage aborts | ||
574 | * @vcpu: the VCPU pointer | ||
575 | * @run: the kvm_run structure | ||
576 | * | ||
577 | * Any abort that gets to the host is almost guaranteed to be caused by a | ||
578 | * missing second stage translation table entry, which can mean that either the | ||
579 | * guest simply needs more memory and we must allocate an appropriate page or it | ||
580 | * can mean that the guest tried to access I/O memory, which is emulated by user | ||
581 | * space. The distinction is based on the IPA causing the fault and whether this | ||
582 | * memory region has been registered as standard RAM by user space. | ||
583 | */ | ||
491 | int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | 584 | int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) |
492 | { | 585 | { |
493 | return -EINVAL; | 586 | unsigned long hsr_ec; |
587 | unsigned long fault_status; | ||
588 | phys_addr_t fault_ipa; | ||
589 | struct kvm_memory_slot *memslot; | ||
590 | bool is_iabt; | ||
591 | gfn_t gfn; | ||
592 | int ret, idx; | ||
593 | |||
594 | hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; | ||
595 | is_iabt = (hsr_ec == HSR_EC_IABT); | ||
596 | fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8; | ||
597 | |||
598 | trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr, | ||
599 | vcpu->arch.hxfar, fault_ipa); | ||
600 | |||
601 | /* Check the stage-2 fault is trans. fault or write fault */ | ||
602 | fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE); | ||
603 | if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { | ||
604 | kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n", | ||
605 | hsr_ec, fault_status); | ||
606 | return -EFAULT; | ||
607 | } | ||
608 | |||
609 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
610 | |||
611 | gfn = fault_ipa >> PAGE_SHIFT; | ||
612 | if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { | ||
613 | if (is_iabt) { | ||
614 | /* Prefetch Abort on I/O address */ | ||
615 | kvm_inject_pabt(vcpu, vcpu->arch.hxfar); | ||
616 | ret = 1; | ||
617 | goto out_unlock; | ||
618 | } | ||
619 | |||
620 | if (fault_status != FSC_FAULT) { | ||
621 | kvm_err("Unsupported fault status on io memory: %#lx\n", | ||
622 | fault_status); | ||
623 | ret = -EFAULT; | ||
624 | goto out_unlock; | ||
625 | } | ||
626 | |||
627 | kvm_pr_unimpl("I/O address abort..."); | ||
628 | ret = 0; | ||
629 | goto out_unlock; | ||
630 | } | ||
631 | |||
632 | memslot = gfn_to_memslot(vcpu->kvm, gfn); | ||
633 | if (!memslot->user_alloc) { | ||
634 | kvm_err("non user-alloc memslots not supported\n"); | ||
635 | ret = -EINVAL; | ||
636 | goto out_unlock; | ||
637 | } | ||
638 | |||
639 | ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); | ||
640 | if (ret == 0) | ||
641 | ret = 1; | ||
642 | out_unlock: | ||
643 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
644 | return ret; | ||
494 | } | 645 | } |
495 | 646 | ||
496 | static void handle_hva_to_gpa(struct kvm *kvm, | 647 | static void handle_hva_to_gpa(struct kvm *kvm, |
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 022305b38c27..624b5a4e8fad 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h | |||
@@ -39,6 +39,32 @@ TRACE_EVENT(kvm_exit, | |||
39 | TP_printk("PC: 0x%08lx", __entry->vcpu_pc) | 39 | TP_printk("PC: 0x%08lx", __entry->vcpu_pc) |
40 | ); | 40 | ); |
41 | 41 | ||
42 | TRACE_EVENT(kvm_guest_fault, | ||
43 | TP_PROTO(unsigned long vcpu_pc, unsigned long hsr, | ||
44 | unsigned long hxfar, | ||
45 | unsigned long long ipa), | ||
46 | TP_ARGS(vcpu_pc, hsr, hxfar, ipa), | ||
47 | |||
48 | TP_STRUCT__entry( | ||
49 | __field( unsigned long, vcpu_pc ) | ||
50 | __field( unsigned long, hsr ) | ||
51 | __field( unsigned long, hxfar ) | ||
52 | __field( unsigned long long, ipa ) | ||
53 | ), | ||
54 | |||
55 | TP_fast_assign( | ||
56 | __entry->vcpu_pc = vcpu_pc; | ||
57 | __entry->hsr = hsr; | ||
58 | __entry->hxfar = hxfar; | ||
59 | __entry->ipa = ipa; | ||
60 | ), | ||
61 | |||
62 | TP_printk("guest fault at PC %#08lx (hxfar %#08lx, " | ||
63 | "ipa %#16llx, hsr %#08lx", | ||
64 | __entry->vcpu_pc, __entry->hxfar, | ||
65 | __entry->ipa, __entry->hsr) | ||
66 | ); | ||
67 | |||
42 | TRACE_EVENT(kvm_irq_line, | 68 | TRACE_EVENT(kvm_irq_line, |
43 | TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), | 69 | TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), |
44 | TP_ARGS(type, vcpu_idx, irq_num, level), | 70 | TP_ARGS(type, vcpu_idx, irq_num, level), |