aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/include/asm/kvm_asm.h1
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm/include/asm/kvm_mmio.h1
-rw-r--r--arch/arm/include/asm/kvm_mmu.h21
-rw-r--r--arch/arm/include/asm/pgtable-3level.h1
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/Kconfig2
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm/kvm/arm.c58
-rw-r--r--arch/arm/kvm/handle_exit.c8
-rw-r--r--arch/arm/kvm/interrupts.S11
-rw-r--r--arch/arm/kvm/mmu.c271
-rw-r--r--arch/arm/kvm/psci.c17
-rw-r--r--arch/arm/kvm/trace.h11
15 files changed, 382 insertions, 34 deletions
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 3a67bec72d0c..25410b2d8bc1 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -96,6 +96,7 @@ extern char __kvm_hyp_code_end[];
96 96
97extern void __kvm_flush_vm_context(void); 97extern void __kvm_flush_vm_context(void);
98extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 98extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
99extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
99 100
100extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 101extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
101#endif 102#endif
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 7b0152321b20..a9c80a2ea1a7 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@
23#include <asm/kvm_asm.h> 23#include <asm/kvm_asm.h>
24#include <asm/kvm_mmio.h> 24#include <asm/kvm_mmio.h>
25#include <asm/kvm_arm.h> 25#include <asm/kvm_arm.h>
26#include <asm/cputype.h>
26 27
27unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); 28unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
28unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); 29unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
@@ -177,9 +178,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
177 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; 178 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
178} 179}
179 180
180static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) 181static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
181{ 182{
182 return vcpu->arch.cp15[c0_MPIDR]; 183 return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK;
183} 184}
184 185
185static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 186static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 04b4ea0b550a..41008cd7c53f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -68,6 +68,7 @@ struct kvm_arch {
68 68
69 /* Interrupt controller */ 69 /* Interrupt controller */
70 struct vgic_dist vgic; 70 struct vgic_dist vgic;
71 int max_vcpus;
71}; 72};
72 73
73#define KVM_NR_MEM_OBJS 40 74#define KVM_NR_MEM_OBJS 40
@@ -144,6 +145,7 @@ struct kvm_vm_stat {
144}; 145};
145 146
146struct kvm_vcpu_stat { 147struct kvm_vcpu_stat {
148 u32 halt_successful_poll;
147 u32 halt_wakeup; 149 u32 halt_wakeup;
148}; 150};
149 151
@@ -231,6 +233,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
231int kvm_perf_init(void); 233int kvm_perf_init(void);
232int kvm_perf_teardown(void); 234int kvm_perf_teardown(void);
233 235
236void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
237
238struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
239
234static inline void kvm_arch_hardware_disable(void) {} 240static inline void kvm_arch_hardware_disable(void) {}
235static inline void kvm_arch_hardware_unsetup(void) {} 241static inline void kvm_arch_hardware_unsetup(void) {}
236static inline void kvm_arch_sync_events(struct kvm *kvm) {} 242static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index adcc0d7d3175..3f83db2f6cf0 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -37,6 +37,7 @@ struct kvm_exit_mmio {
37 u8 data[8]; 37 u8 data[8];
38 u32 len; 38 u32 len;
39 bool is_write; 39 bool is_write;
40 void *private;
40}; 41};
41 42
42static inline void kvm_prepare_mmio(struct kvm_run *run, 43static inline void kvm_prepare_mmio(struct kvm_run *run,
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 1bca8f8af442..37ca2a4c6f09 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -115,6 +115,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
115 pmd_val(*pmd) |= L_PMD_S2_RDWR; 115 pmd_val(*pmd) |= L_PMD_S2_RDWR;
116} 116}
117 117
118static inline void kvm_set_s2pte_readonly(pte_t *pte)
119{
120 pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
121}
122
123static inline bool kvm_s2pte_readonly(pte_t *pte)
124{
125 return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
126}
127
128static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
129{
130 pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
131}
132
133static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
134{
135 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
136}
137
138
118/* Open coded p*d_addr_end that can deal with 64bit addresses */ 139/* Open coded p*d_addr_end that can deal with 64bit addresses */
119#define kvm_pgd_addr_end(addr, end) \ 140#define kvm_pgd_addr_end(addr, end) \
120({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 141({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 423a5ac09d3a..a745a2a53853 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -129,6 +129,7 @@
129#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ 129#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */
130#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 130#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
131 131
132#define L_PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[1] */
132#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 133#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
133 134
134/* 135/*
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 09ee408c1a67..0db25bc32864 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -175,6 +175,8 @@ struct kvm_arch_memory_slot {
175#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 175#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
176#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) 176#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
177#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 177#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
178#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
179#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
178 180
179/* KVM_IRQ_LINE irq field index values */ 181/* KVM_IRQ_LINE irq field index values */
180#define KVM_ARM_IRQ_TYPE_SHIFT 24 182#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 3afee5f40f4f..338ace78ed18 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -21,8 +21,10 @@ config KVM
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select HAVE_KVM_CPU_RELAX_INTERCEPT 23 select HAVE_KVM_CPU_RELAX_INTERCEPT
24 select HAVE_KVM_ARCH_TLB_FLUSH_ALL
24 select KVM_MMIO 25 select KVM_MMIO
25 select KVM_ARM_HOST 26 select KVM_ARM_HOST
27 select KVM_GENERIC_DIRTYLOG_READ_PROTECT
26 select SRCU 28 select SRCU
27 depends on ARM_VIRT_EXT && ARM_LPAE 29 depends on ARM_VIRT_EXT && ARM_LPAE
28 ---help--- 30 ---help---
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f7057ed045b6..443b8bea43e9 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -22,4 +22,5 @@ obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
22obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 22obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
23obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o 23obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
24obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o 24obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
25obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
25obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o 26obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 0b0d58a905c4..07e7eb1d7ab6 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -132,6 +132,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
132 /* Mark the initial VMID generation invalid */ 132 /* Mark the initial VMID generation invalid */
133 kvm->arch.vmid_gen = 0; 133 kvm->arch.vmid_gen = 0;
134 134
135 /* The maximum number of VCPUs is limited by the host's GIC model */
136 kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
137
135 return ret; 138 return ret;
136out_free_stage2_pgd: 139out_free_stage2_pgd:
137 kvm_free_stage2_pgd(kvm); 140 kvm_free_stage2_pgd(kvm);
@@ -218,6 +221,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
218 goto out; 221 goto out;
219 } 222 }
220 223
224 if (id >= kvm->arch.max_vcpus) {
225 err = -EINVAL;
226 goto out;
227 }
228
221 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 229 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
222 if (!vcpu) { 230 if (!vcpu) {
223 err = -ENOMEM; 231 err = -ENOMEM;
@@ -241,9 +249,8 @@ out:
241 return ERR_PTR(err); 249 return ERR_PTR(err);
242} 250}
243 251
244int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 252void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
245{ 253{
246 return 0;
247} 254}
248 255
249void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 256void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
@@ -777,9 +784,39 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
777 } 784 }
778} 785}
779 786
787/**
788 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
789 * @kvm: kvm instance
790 * @log: slot id and address to which we copy the log
791 *
792 * Steps 1-4 below provide general overview of dirty page logging. See
793 * kvm_get_dirty_log_protect() function description for additional details.
794 *
795 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
796 * always flush the TLB (step 4) even if previous step failed and the dirty
797 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
798 * does not preclude user space subsequent dirty log read. Flushing TLB ensures
799 * writes will be marked dirty for next log read.
800 *
801 * 1. Take a snapshot of the bit and clear it if needed.
802 * 2. Write protect the corresponding page.
803 * 3. Copy the snapshot to the userspace.
804 * 4. Flush TLB's if needed.
805 */
780int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 806int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
781{ 807{
782 return -EINVAL; 808 bool is_dirty = false;
809 int r;
810
811 mutex_lock(&kvm->slots_lock);
812
813 r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
814
815 if (is_dirty)
816 kvm_flush_remote_tlbs(kvm);
817
818 mutex_unlock(&kvm->slots_lock);
819 return r;
783} 820}
784 821
785static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, 822static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
@@ -811,7 +848,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
811 switch (ioctl) { 848 switch (ioctl) {
812 case KVM_CREATE_IRQCHIP: { 849 case KVM_CREATE_IRQCHIP: {
813 if (vgic_present) 850 if (vgic_present)
814 return kvm_vgic_create(kvm); 851 return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
815 else 852 else
816 return -ENXIO; 853 return -ENXIO;
817 } 854 }
@@ -1035,6 +1072,19 @@ static void check_kvm_target_cpu(void *ret)
1035 *(int *)ret = kvm_target_cpu(); 1072 *(int *)ret = kvm_target_cpu();
1036} 1073}
1037 1074
1075struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1076{
1077 struct kvm_vcpu *vcpu;
1078 int i;
1079
1080 mpidr &= MPIDR_HWID_BITMASK;
1081 kvm_for_each_vcpu(i, vcpu, kvm) {
1082 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
1083 return vcpu;
1084 }
1085 return NULL;
1086}
1087
1038/** 1088/**
1039 * Initialize Hyp-mode and memory mappings on all CPUs. 1089 * Initialize Hyp-mode and memory mappings on all CPUs.
1040 */ 1090 */
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index a96a8043277c..95f12b2ccdcb 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -87,11 +87,13 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
87 */ 87 */
88static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) 88static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
89{ 89{
90 trace_kvm_wfi(*vcpu_pc(vcpu)); 90 if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
91 if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) 91 trace_kvm_wfx(*vcpu_pc(vcpu), true);
92 kvm_vcpu_on_spin(vcpu); 92 kvm_vcpu_on_spin(vcpu);
93 else 93 } else {
94 trace_kvm_wfx(*vcpu_pc(vcpu), false);
94 kvm_vcpu_block(vcpu); 95 kvm_vcpu_block(vcpu);
96 }
95 97
96 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); 98 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
97 99
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 01dcb0e752d9..79caf79b304a 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -66,6 +66,17 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
66 bx lr 66 bx lr
67ENDPROC(__kvm_tlb_flush_vmid_ipa) 67ENDPROC(__kvm_tlb_flush_vmid_ipa)
68 68
69/**
70 * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
71 *
72 * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
73 * parameter
74 */
75
76ENTRY(__kvm_tlb_flush_vmid)
77 b __kvm_tlb_flush_vmid_ipa
78ENDPROC(__kvm_tlb_flush_vmid)
79
69/******************************************************************** 80/********************************************************************
70 * Flush TLBs and instruction caches of all CPUs inside the inner-shareable 81 * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
71 * domain, for all VMIDs 82 * domain, for all VMIDs
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 136662547ca6..3e6859bc3e11 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -45,6 +45,26 @@ static phys_addr_t hyp_idmap_vector;
45#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 45#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
46 46
47#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 47#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
48#define kvm_pud_huge(_x) pud_huge(_x)
49
50#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
51#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
52
53static bool memslot_is_logging(struct kvm_memory_slot *memslot)
54{
55 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
56}
57
58/**
59 * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
60 * @kvm: pointer to kvm structure.
61 *
62 * Interface to HYP function to flush all VM TLB entries
63 */
64void kvm_flush_remote_tlbs(struct kvm *kvm)
65{
66 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
67}
48 68
49static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 69static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
50{ 70{
@@ -78,6 +98,25 @@ static void kvm_flush_dcache_pud(pud_t pud)
78 __kvm_flush_dcache_pud(pud); 98 __kvm_flush_dcache_pud(pud);
79} 99}
80 100
101/**
102 * stage2_dissolve_pmd() - clear and flush huge PMD entry
103 * @kvm: pointer to kvm structure.
104 * @addr: IPA
105 * @pmd: pmd pointer for IPA
106 *
107 * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
108 * pages in the range dirty.
109 */
110static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
111{
112 if (!kvm_pmd_huge(*pmd))
113 return;
114
115 pmd_clear(pmd);
116 kvm_tlb_flush_vmid_ipa(kvm, addr);
117 put_page(virt_to_page(pmd));
118}
119
81static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 120static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
82 int min, int max) 121 int min, int max)
83{ 122{
@@ -819,10 +858,15 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
819} 858}
820 859
821static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 860static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
822 phys_addr_t addr, const pte_t *new_pte, bool iomap) 861 phys_addr_t addr, const pte_t *new_pte,
862 unsigned long flags)
823{ 863{
824 pmd_t *pmd; 864 pmd_t *pmd;
825 pte_t *pte, old_pte; 865 pte_t *pte, old_pte;
866 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
867 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
868
869 VM_BUG_ON(logging_active && !cache);
826 870
827 /* Create stage-2 page table mapping - Levels 0 and 1 */ 871 /* Create stage-2 page table mapping - Levels 0 and 1 */
828 pmd = stage2_get_pmd(kvm, cache, addr); 872 pmd = stage2_get_pmd(kvm, cache, addr);
@@ -834,6 +878,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
834 return 0; 878 return 0;
835 } 879 }
836 880
881 /*
882 * While dirty page logging - dissolve huge PMD, then continue on to
883 * allocate page.
884 */
885 if (logging_active)
886 stage2_dissolve_pmd(kvm, addr, pmd);
887
837 /* Create stage-2 page mappings - Level 2 */ 888 /* Create stage-2 page mappings - Level 2 */
838 if (pmd_none(*pmd)) { 889 if (pmd_none(*pmd)) {
839 if (!cache) 890 if (!cache)
@@ -890,7 +941,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
890 if (ret) 941 if (ret)
891 goto out; 942 goto out;
892 spin_lock(&kvm->mmu_lock); 943 spin_lock(&kvm->mmu_lock);
893 ret = stage2_set_pte(kvm, &cache, addr, &pte, true); 944 ret = stage2_set_pte(kvm, &cache, addr, &pte,
945 KVM_S2PTE_FLAG_IS_IOMAP);
894 spin_unlock(&kvm->mmu_lock); 946 spin_unlock(&kvm->mmu_lock);
895 if (ret) 947 if (ret)
896 goto out; 948 goto out;
@@ -957,6 +1009,165 @@ static bool kvm_is_device_pfn(unsigned long pfn)
957 return !pfn_valid(pfn); 1009 return !pfn_valid(pfn);
958} 1010}
959 1011
1012/**
1013 * stage2_wp_ptes - write protect PMD range
1014 * @pmd: pointer to pmd entry
1015 * @addr: range start address
1016 * @end: range end address
1017 */
1018static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1019{
1020 pte_t *pte;
1021
1022 pte = pte_offset_kernel(pmd, addr);
1023 do {
1024 if (!pte_none(*pte)) {
1025 if (!kvm_s2pte_readonly(pte))
1026 kvm_set_s2pte_readonly(pte);
1027 }
1028 } while (pte++, addr += PAGE_SIZE, addr != end);
1029}
1030
1031/**
1032 * stage2_wp_pmds - write protect PUD range
1033 * @pud: pointer to pud entry
1034 * @addr: range start address
1035 * @end: range end address
1036 */
1037static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1038{
1039 pmd_t *pmd;
1040 phys_addr_t next;
1041
1042 pmd = pmd_offset(pud, addr);
1043
1044 do {
1045 next = kvm_pmd_addr_end(addr, end);
1046 if (!pmd_none(*pmd)) {
1047 if (kvm_pmd_huge(*pmd)) {
1048 if (!kvm_s2pmd_readonly(pmd))
1049 kvm_set_s2pmd_readonly(pmd);
1050 } else {
1051 stage2_wp_ptes(pmd, addr, next);
1052 }
1053 }
1054 } while (pmd++, addr = next, addr != end);
1055}
1056
1057/**
1058 * stage2_wp_puds - write protect PGD range
1059 * @pgd: pointer to pgd entry
1060 * @addr: range start address
1061 * @end: range end address
1062 *
1063 * Process PUD entries, for a huge PUD we cause a panic.
1064 */
1065static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1066{
1067 pud_t *pud;
1068 phys_addr_t next;
1069
1070 pud = pud_offset(pgd, addr);
1071 do {
1072 next = kvm_pud_addr_end(addr, end);
1073 if (!pud_none(*pud)) {
1074 /* TODO:PUD not supported, revisit later if supported */
1075 BUG_ON(kvm_pud_huge(*pud));
1076 stage2_wp_pmds(pud, addr, next);
1077 }
1078 } while (pud++, addr = next, addr != end);
1079}
1080
1081/**
1082 * stage2_wp_range() - write protect stage2 memory region range
1083 * @kvm: The KVM pointer
1084 * @addr: Start address of range
1085 * @end: End address of range
1086 */
1087static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1088{
1089 pgd_t *pgd;
1090 phys_addr_t next;
1091
1092 pgd = kvm->arch.pgd + pgd_index(addr);
1093 do {
1094 /*
1095 * Release kvm_mmu_lock periodically if the memory region is
1096 * large. Otherwise, we may see kernel panics with
1097 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
1098 * CONFIG_LOCKDEP. Additionally, holding the lock too long
1099 * will also starve other vCPUs.
1100 */
1101 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
1102 cond_resched_lock(&kvm->mmu_lock);
1103
1104 next = kvm_pgd_addr_end(addr, end);
1105 if (pgd_present(*pgd))
1106 stage2_wp_puds(pgd, addr, next);
1107 } while (pgd++, addr = next, addr != end);
1108}
1109
1110/**
1111 * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
1112 * @kvm: The KVM pointer
1113 * @slot: The memory slot to write protect
1114 *
1115 * Called to start logging dirty pages after memory region
1116 * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
1117 * all present PMD and PTEs are write protected in the memory region.
1118 * Afterwards read of dirty page log can be called.
1119 *
1120 * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
1121 * serializing operations for VM memory regions.
1122 */
1123void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1124{
1125 struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
1126 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
1127 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1128
1129 spin_lock(&kvm->mmu_lock);
1130 stage2_wp_range(kvm, start, end);
1131 spin_unlock(&kvm->mmu_lock);
1132 kvm_flush_remote_tlbs(kvm);
1133}
1134
1135/**
1136 * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
1137 * @kvm: The KVM pointer
1138 * @slot: The memory slot associated with mask
1139 * @gfn_offset: The gfn offset in memory slot
1140 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
1141 * slot to be write protected
1142 *
1143 * Walks bits set in mask write protects the associated pte's. Caller must
1144 * acquire kvm_mmu_lock.
1145 */
1146static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1147 struct kvm_memory_slot *slot,
1148 gfn_t gfn_offset, unsigned long mask)
1149{
1150 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1151 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1152 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1153
1154 stage2_wp_range(kvm, start, end);
1155}
1156
1157/*
1158 * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
1159 * dirty pages.
1160 *
1161 * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
1162 * enable dirty logging for them.
1163 */
1164void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1165 struct kvm_memory_slot *slot,
1166 gfn_t gfn_offset, unsigned long mask)
1167{
1168 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1169}
1170
960static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, 1171static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
961 unsigned long size, bool uncached) 1172 unsigned long size, bool uncached)
962{ 1173{
@@ -977,6 +1188,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
977 pfn_t pfn; 1188 pfn_t pfn;
978 pgprot_t mem_type = PAGE_S2; 1189 pgprot_t mem_type = PAGE_S2;
979 bool fault_ipa_uncached; 1190 bool fault_ipa_uncached;
1191 bool logging_active = memslot_is_logging(memslot);
1192 unsigned long flags = 0;
980 1193
981 write_fault = kvm_is_write_fault(vcpu); 1194 write_fault = kvm_is_write_fault(vcpu);
982 if (fault_status == FSC_PERM && !write_fault) { 1195 if (fault_status == FSC_PERM && !write_fault) {
@@ -993,7 +1206,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
993 return -EFAULT; 1206 return -EFAULT;
994 } 1207 }
995 1208
996 if (is_vm_hugetlb_page(vma)) { 1209 if (is_vm_hugetlb_page(vma) && !logging_active) {
997 hugetlb = true; 1210 hugetlb = true;
998 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; 1211 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
999 } else { 1212 } else {
@@ -1034,12 +1247,30 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1034 if (is_error_pfn(pfn)) 1247 if (is_error_pfn(pfn))
1035 return -EFAULT; 1248 return -EFAULT;
1036 1249
1037 if (kvm_is_device_pfn(pfn)) 1250 if (kvm_is_device_pfn(pfn)) {
1038 mem_type = PAGE_S2_DEVICE; 1251 mem_type = PAGE_S2_DEVICE;
1252 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1253 } else if (logging_active) {
1254 /*
1255 * Faults on pages in a memslot with logging enabled
1256 * should not be mapped with huge pages (it introduces churn
1257 * and performance degradation), so force a pte mapping.
1258 */
1259 force_pte = true;
1260 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1261
1262 /*
1263 * Only actually map the page as writable if this was a write
1264 * fault.
1265 */
1266 if (!write_fault)
1267 writable = false;
1268 }
1039 1269
1040 spin_lock(&kvm->mmu_lock); 1270 spin_lock(&kvm->mmu_lock);
1041 if (mmu_notifier_retry(kvm, mmu_seq)) 1271 if (mmu_notifier_retry(kvm, mmu_seq))
1042 goto out_unlock; 1272 goto out_unlock;
1273
1043 if (!hugetlb && !force_pte) 1274 if (!hugetlb && !force_pte)
1044 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); 1275 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
1045 1276
@@ -1056,16 +1287,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1056 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1287 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1057 } else { 1288 } else {
1058 pte_t new_pte = pfn_pte(pfn, mem_type); 1289 pte_t new_pte = pfn_pte(pfn, mem_type);
1290
1059 if (writable) { 1291 if (writable) {
1060 kvm_set_s2pte_writable(&new_pte); 1292 kvm_set_s2pte_writable(&new_pte);
1061 kvm_set_pfn_dirty(pfn); 1293 kvm_set_pfn_dirty(pfn);
1294 mark_page_dirty(kvm, gfn);
1062 } 1295 }
1063 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); 1296 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
1064 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 1297 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
1065 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
1066 } 1298 }
1067 1299
1068
1069out_unlock: 1300out_unlock:
1070 spin_unlock(&kvm->mmu_lock); 1301 spin_unlock(&kvm->mmu_lock);
1071 kvm_release_pfn_clean(pfn); 1302 kvm_release_pfn_clean(pfn);
@@ -1215,7 +1446,14 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
1215{ 1446{
1216 pte_t *pte = (pte_t *)data; 1447 pte_t *pte = (pte_t *)data;
1217 1448
1218 stage2_set_pte(kvm, NULL, gpa, pte, false); 1449 /*
1450 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
1451 * flag clear because MMU notifiers will have unmapped a huge PMD before
1452 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
1453 * therefore stage2_set_pte() never needs to clear out a huge PMD
1454 * through this calling path.
1455 */
1456 stage2_set_pte(kvm, NULL, gpa, pte, 0);
1219} 1457}
1220 1458
1221 1459
@@ -1348,6 +1586,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
1348 const struct kvm_memory_slot *old, 1586 const struct kvm_memory_slot *old,
1349 enum kvm_mr_change change) 1587 enum kvm_mr_change change)
1350{ 1588{
1589 /*
1590 * At this point memslot has been committed and there is an
1591 * allocated dirty_bitmap[], dirty pages will be be tracked while the
1592 * memory slot is write protected.
1593 */
1594 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
1595 kvm_mmu_wp_memory_region(kvm, mem->slot);
1351} 1596}
1352 1597
1353int kvm_arch_prepare_memory_region(struct kvm *kvm, 1598int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1360,7 +1605,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1360 bool writable = !(mem->flags & KVM_MEM_READONLY); 1605 bool writable = !(mem->flags & KVM_MEM_READONLY);
1361 int ret = 0; 1606 int ret = 0;
1362 1607
1363 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE) 1608 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
1609 change != KVM_MR_FLAGS_ONLY)
1364 return 0; 1610 return 0;
1365 1611
1366 /* 1612 /*
@@ -1411,6 +1657,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1411 phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + 1657 phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
1412 vm_start - vma->vm_start; 1658 vm_start - vma->vm_start;
1413 1659
1660 /* IO region dirty page logging not allowed */
1661 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
1662 return -EINVAL;
1663
1414 ret = kvm_phys_addr_ioremap(kvm, gpa, pa, 1664 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
1415 vm_end - vm_start, 1665 vm_end - vm_start,
1416 writable); 1666 writable);
@@ -1420,6 +1670,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1420 hva = vm_end; 1670 hva = vm_end;
1421 } while (hva < reg_end); 1671 } while (hva < reg_end);
1422 1672
1673 if (change == KVM_MR_FLAGS_ONLY)
1674 return ret;
1675
1423 spin_lock(&kvm->mmu_lock); 1676 spin_lock(&kvm->mmu_lock);
1424 if (ret) 1677 if (ret)
1425 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); 1678 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 58cb3248d277..02fa8eff6ae1 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -22,6 +22,7 @@
22#include <asm/cputype.h> 22#include <asm/cputype.h>
23#include <asm/kvm_emulate.h> 23#include <asm/kvm_emulate.h>
24#include <asm/kvm_psci.h> 24#include <asm/kvm_psci.h>
25#include <asm/kvm_host.h>
25 26
26/* 27/*
27 * This is an implementation of the Power State Coordination Interface 28 * This is an implementation of the Power State Coordination Interface
@@ -66,25 +67,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
66static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 67static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
67{ 68{
68 struct kvm *kvm = source_vcpu->kvm; 69 struct kvm *kvm = source_vcpu->kvm;
69 struct kvm_vcpu *vcpu = NULL, *tmp; 70 struct kvm_vcpu *vcpu = NULL;
70 wait_queue_head_t *wq; 71 wait_queue_head_t *wq;
71 unsigned long cpu_id; 72 unsigned long cpu_id;
72 unsigned long context_id; 73 unsigned long context_id;
73 unsigned long mpidr;
74 phys_addr_t target_pc; 74 phys_addr_t target_pc;
75 int i;
76 75
77 cpu_id = *vcpu_reg(source_vcpu, 1); 76 cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
78 if (vcpu_mode_is_32bit(source_vcpu)) 77 if (vcpu_mode_is_32bit(source_vcpu))
79 cpu_id &= ~((u32) 0); 78 cpu_id &= ~((u32) 0);
80 79
81 kvm_for_each_vcpu(i, tmp, kvm) { 80 vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
82 mpidr = kvm_vcpu_get_mpidr(tmp);
83 if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
84 vcpu = tmp;
85 break;
86 }
87 }
88 81
89 /* 82 /*
90 * Make sure the caller requested a valid CPU and that the CPU is 83 * Make sure the caller requested a valid CPU and that the CPU is
@@ -155,7 +148,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
155 * then ON else OFF 148 * then ON else OFF
156 */ 149 */
157 kvm_for_each_vcpu(i, tmp, kvm) { 150 kvm_for_each_vcpu(i, tmp, kvm) {
158 mpidr = kvm_vcpu_get_mpidr(tmp); 151 mpidr = kvm_vcpu_get_mpidr_aff(tmp);
159 if (((mpidr & target_affinity_mask) == target_affinity) && 152 if (((mpidr & target_affinity_mask) == target_affinity) &&
160 !tmp->arch.pause) { 153 !tmp->arch.pause) {
161 return PSCI_0_2_AFFINITY_LEVEL_ON; 154 return PSCI_0_2_AFFINITY_LEVEL_ON;
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index b6a6e7102201..881874b1a036 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -140,19 +140,22 @@ TRACE_EVENT(kvm_emulate_cp15_imp,
140 __entry->CRm, __entry->Op2) 140 __entry->CRm, __entry->Op2)
141); 141);
142 142
143TRACE_EVENT(kvm_wfi, 143TRACE_EVENT(kvm_wfx,
144 TP_PROTO(unsigned long vcpu_pc), 144 TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
145 TP_ARGS(vcpu_pc), 145 TP_ARGS(vcpu_pc, is_wfe),
146 146
147 TP_STRUCT__entry( 147 TP_STRUCT__entry(
148 __field( unsigned long, vcpu_pc ) 148 __field( unsigned long, vcpu_pc )
149 __field( bool, is_wfe )
149 ), 150 ),
150 151
151 TP_fast_assign( 152 TP_fast_assign(
152 __entry->vcpu_pc = vcpu_pc; 153 __entry->vcpu_pc = vcpu_pc;
154 __entry->is_wfe = is_wfe;
153 ), 155 ),
154 156
155 TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc) 157 TP_printk("guest executed wf%c at: 0x%08lx",
158 __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
156); 159);
157 160
158TRACE_EVENT(kvm_unmap_hva, 161TRACE_EVENT(kvm_unmap_hva,