aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_mmu.h6
-rw-r--r--arch/arm/kvm/arm.c78
-rw-r--r--arch/arm/kvm/guest.c26
-rw-r--r--arch/arm/kvm/mmio.c15
-rw-r--r--arch/arm/kvm/mmu.c99
-rw-r--r--arch/arm/kvm/psci.c18
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h6
-rw-r--r--arch/arm64/kvm/guest.c26
-rw-r--r--arch/x86/include/asm/kvm_host.h10
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kvm/emulate.c20
-rw-r--r--arch/x86/kvm/ioapic.h17
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/vmx.c4
19 files changed, 261 insertions, 95 deletions
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269c6e61..66ce17655bb9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
33void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); 33void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
34void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); 34void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
35 35
36static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
37{
38 vcpu->arch.hcr = HCR_GUEST_MASK;
39}
40
36static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) 41static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
37{ 42{
38 return 1; 43 return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 53036e21756b..254e0650e48b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -150,8 +150,6 @@ struct kvm_vcpu_stat {
150 u32 halt_wakeup; 150 u32 halt_wakeup;
151}; 151};
152 152
153int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
154 const struct kvm_vcpu_init *init);
155int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 153int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
156unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 154unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
157int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 155int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d5712716..63e0ecc04901 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
52void free_boot_hyp_pgd(void); 52void free_boot_hyp_pgd(void);
53void free_hyp_pgds(void); 53void free_hyp_pgds(void);
54 54
55void stage2_unmap_vm(struct kvm *kvm);
55int kvm_alloc_stage2_pgd(struct kvm *kvm); 56int kvm_alloc_stage2_pgd(struct kvm *kvm);
56void kvm_free_stage2_pgd(struct kvm *kvm); 57void kvm_free_stage2_pgd(struct kvm *kvm);
57int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 58int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
161} 162}
162 163
163static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 164static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
164 unsigned long size) 165 unsigned long size,
166 bool ipa_uncached)
165{ 167{
166 if (!vcpu_has_cache_enabled(vcpu)) 168 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
167 kvm_flush_dcache_to_poc((void *)hva, size); 169 kvm_flush_dcache_to_poc((void *)hva, size);
168 170
169 /* 171 /*
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8a959e..2d6d91001062 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
213 int err; 213 int err;
214 struct kvm_vcpu *vcpu; 214 struct kvm_vcpu *vcpu;
215 215
216 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
217 err = -EBUSY;
218 goto out;
219 }
220
216 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 221 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
217 if (!vcpu) { 222 if (!vcpu) {
218 err = -ENOMEM; 223 err = -ENOMEM;
@@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
263{ 268{
264 /* Force users to call KVM_ARM_VCPU_INIT */ 269 /* Force users to call KVM_ARM_VCPU_INIT */
265 vcpu->arch.target = -1; 270 vcpu->arch.target = -1;
271 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
266 272
267 /* Set up the timer */ 273 /* Set up the timer */
268 kvm_timer_vcpu_init(vcpu); 274 kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
419 425
420static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) 426static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
421{ 427{
428 struct kvm *kvm = vcpu->kvm;
422 int ret; 429 int ret;
423 430
424 if (likely(vcpu->arch.has_run_once)) 431 if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
427 vcpu->arch.has_run_once = true; 434 vcpu->arch.has_run_once = true;
428 435
429 /* 436 /*
430 * Initialize the VGIC before running a vcpu the first time on 437 * Map the VGIC hardware resources before running a vcpu the first
431 * this VM. 438 * time on this VM.
432 */ 439 */
433 if (unlikely(!vgic_initialized(vcpu->kvm))) { 440 if (unlikely(!vgic_ready(kvm))) {
434 ret = kvm_vgic_init(vcpu->kvm); 441 ret = kvm_vgic_map_resources(kvm);
435 if (ret) 442 if (ret)
436 return ret; 443 return ret;
437 } 444 }
438 445
446 /*
447 * Enable the arch timers only if we have an in-kernel VGIC
448 * and it has been properly initialized, since we cannot handle
449 * interrupts from the virtual timer with a userspace gic.
450 */
451 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
452 kvm_timer_enable(kvm);
453
439 return 0; 454 return 0;
440} 455}
441 456
@@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
649 return -EINVAL; 664 return -EINVAL;
650} 665}
651 666
667static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
668 const struct kvm_vcpu_init *init)
669{
670 unsigned int i;
671 int phys_target = kvm_target_cpu();
672
673 if (init->target != phys_target)
674 return -EINVAL;
675
676 /*
677 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
678 * use the same target.
679 */
680 if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
681 return -EINVAL;
682
683 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
684 for (i = 0; i < sizeof(init->features) * 8; i++) {
685 bool set = (init->features[i / 32] & (1 << (i % 32)));
686
687 if (set && i >= KVM_VCPU_MAX_FEATURES)
688 return -ENOENT;
689
690 /*
691 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
692 * use the same feature set.
693 */
694 if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
695 test_bit(i, vcpu->arch.features) != set)
696 return -EINVAL;
697
698 if (set)
699 set_bit(i, vcpu->arch.features);
700 }
701
702 vcpu->arch.target = phys_target;
703
704 /* Now we know what it is, we can reset it. */
705 return kvm_reset_vcpu(vcpu);
706}
707
708
652static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, 709static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
653 struct kvm_vcpu_init *init) 710 struct kvm_vcpu_init *init)
654{ 711{
@@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
659 return ret; 716 return ret;
660 717
661 /* 718 /*
719 * Ensure a rebooted VM will fault in RAM pages and detect if the
720 * guest MMU is turned off and flush the caches as needed.
721 */
722 if (vcpu->arch.has_run_once)
723 stage2_unmap_vm(vcpu->kvm);
724
725 vcpu_reset_hcr(vcpu);
726
727 /*
662 * Handle the "start in power-off" case by marking the VCPU as paused. 728 * Handle the "start in power-off" case by marking the VCPU as paused.
663 */ 729 */
664 if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) 730 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
665 vcpu->arch.pause = true; 731 vcpu->arch.pause = true;
732 else
733 vcpu->arch.pause = false;
666 734
667 return 0; 735 return 0;
668} 736}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b78769bd8..384bab67c462 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
38 38
39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
40{ 40{
41 vcpu->arch.hcr = HCR_GUEST_MASK;
42 return 0; 41 return 0;
43} 42}
44 43
@@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
274 } 273 }
275} 274}
276 275
277int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
278 const struct kvm_vcpu_init *init)
279{
280 unsigned int i;
281
282 /* We can only cope with guest==host and only on A15/A7 (for now). */
283 if (init->target != kvm_target_cpu())
284 return -EINVAL;
285
286 vcpu->arch.target = init->target;
287 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
288
289 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
290 for (i = 0; i < sizeof(init->features) * 8; i++) {
291 if (test_bit(i, (void *)init->features)) {
292 if (i >= KVM_VCPU_MAX_FEATURES)
293 return -ENOENT;
294 set_bit(i, vcpu->arch.features);
295 }
296 }
297
298 /* Now we know what it is, we can reset it. */
299 return kvm_reset_vcpu(vcpu);
300}
301
302int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 276int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
303{ 277{
304 int target = kvm_target_cpu(); 278 int target = kvm_target_cpu();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93182e9..5d3bfc0eb3f0 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
187 } 187 }
188 188
189 rt = vcpu->arch.mmio_decode.rt; 189 rt = vcpu->arch.mmio_decode.rt;
190 data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
191 190
192 trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : 191 if (mmio.is_write) {
193 KVM_TRACE_MMIO_READ_UNSATISFIED, 192 data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
194 mmio.len, fault_ipa, 193 mmio.len);
195 (mmio.is_write) ? data : 0);
196 194
197 if (mmio.is_write) 195 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
196 fault_ipa, data);
198 mmio_write_buf(mmio.data, mmio.len, data); 197 mmio_write_buf(mmio.data, mmio.len, data);
198 } else {
199 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
200 fault_ipa, 0);
201 }
199 202
200 if (vgic_handle_mmio(vcpu, run, &mmio)) 203 if (vgic_handle_mmio(vcpu, run, &mmio))
201 return 1; 204 return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 57a403a5c22b..3756dd3e85c2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -611,6 +611,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
611 unmap_range(kvm, kvm->arch.pgd, start, size); 611 unmap_range(kvm, kvm->arch.pgd, start, size);
612} 612}
613 613
614static void stage2_unmap_memslot(struct kvm *kvm,
615 struct kvm_memory_slot *memslot)
616{
617 hva_t hva = memslot->userspace_addr;
618 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
619 phys_addr_t size = PAGE_SIZE * memslot->npages;
620 hva_t reg_end = hva + size;
621
622 /*
623 * A memory region could potentially cover multiple VMAs, and any holes
624 * between them, so iterate over all of them to find out if we should
625 * unmap any of them.
626 *
627 * +--------------------------------------------+
628 * +---------------+----------------+ +----------------+
629 * | : VMA 1 | VMA 2 | | VMA 3 : |
630 * +---------------+----------------+ +----------------+
631 * | memory region |
632 * +--------------------------------------------+
633 */
634 do {
635 struct vm_area_struct *vma = find_vma(current->mm, hva);
636 hva_t vm_start, vm_end;
637
638 if (!vma || vma->vm_start >= reg_end)
639 break;
640
641 /*
642 * Take the intersection of this VMA with the memory region
643 */
644 vm_start = max(hva, vma->vm_start);
645 vm_end = min(reg_end, vma->vm_end);
646
647 if (!(vma->vm_flags & VM_PFNMAP)) {
648 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
649 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
650 }
651 hva = vm_end;
652 } while (hva < reg_end);
653}
654
655/**
656 * stage2_unmap_vm - Unmap Stage-2 RAM mappings
657 * @kvm: The struct kvm pointer
658 *
659 * Go through the memregions and unmap any reguler RAM
660 * backing memory already mapped to the VM.
661 */
662void stage2_unmap_vm(struct kvm *kvm)
663{
664 struct kvm_memslots *slots;
665 struct kvm_memory_slot *memslot;
666 int idx;
667
668 idx = srcu_read_lock(&kvm->srcu);
669 spin_lock(&kvm->mmu_lock);
670
671 slots = kvm_memslots(kvm);
672 kvm_for_each_memslot(memslot, slots)
673 stage2_unmap_memslot(kvm, memslot);
674
675 spin_unlock(&kvm->mmu_lock);
676 srcu_read_unlock(&kvm->srcu, idx);
677}
678
614/** 679/**
615 * kvm_free_stage2_pgd - free all stage-2 tables 680 * kvm_free_stage2_pgd - free all stage-2 tables
616 * @kvm: The KVM struct pointer for the VM. 681 * @kvm: The KVM struct pointer for the VM.
@@ -834,6 +899,11 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
834 return kvm_vcpu_dabt_iswrite(vcpu); 899 return kvm_vcpu_dabt_iswrite(vcpu);
835} 900}
836 901
902static bool kvm_is_device_pfn(unsigned long pfn)
903{
904 return !pfn_valid(pfn);
905}
906
837static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 907static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
838 struct kvm_memory_slot *memslot, unsigned long hva, 908 struct kvm_memory_slot *memslot, unsigned long hva,
839 unsigned long fault_status) 909 unsigned long fault_status)
@@ -847,6 +917,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
847 struct vm_area_struct *vma; 917 struct vm_area_struct *vma;
848 pfn_t pfn; 918 pfn_t pfn;
849 pgprot_t mem_type = PAGE_S2; 919 pgprot_t mem_type = PAGE_S2;
920 bool fault_ipa_uncached;
850 921
851 write_fault = kvm_is_write_fault(vcpu); 922 write_fault = kvm_is_write_fault(vcpu);
852 if (fault_status == FSC_PERM && !write_fault) { 923 if (fault_status == FSC_PERM && !write_fault) {
@@ -904,7 +975,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
904 if (is_error_pfn(pfn)) 975 if (is_error_pfn(pfn))
905 return -EFAULT; 976 return -EFAULT;
906 977
907 if (kvm_is_mmio_pfn(pfn)) 978 if (kvm_is_device_pfn(pfn))
908 mem_type = PAGE_S2_DEVICE; 979 mem_type = PAGE_S2_DEVICE;
909 980
910 spin_lock(&kvm->mmu_lock); 981 spin_lock(&kvm->mmu_lock);
@@ -913,6 +984,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
913 if (!hugetlb && !force_pte) 984 if (!hugetlb && !force_pte)
914 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); 985 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
915 986
987 fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
988
916 if (hugetlb) { 989 if (hugetlb) {
917 pmd_t new_pmd = pfn_pmd(pfn, mem_type); 990 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
918 new_pmd = pmd_mkhuge(new_pmd); 991 new_pmd = pmd_mkhuge(new_pmd);
@@ -920,7 +993,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
920 kvm_set_s2pmd_writable(&new_pmd); 993 kvm_set_s2pmd_writable(&new_pmd);
921 kvm_set_pfn_dirty(pfn); 994 kvm_set_pfn_dirty(pfn);
922 } 995 }
923 coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); 996 coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
997 fault_ipa_uncached);
924 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 998 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
925 } else { 999 } else {
926 pte_t new_pte = pfn_pte(pfn, mem_type); 1000 pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -928,7 +1002,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
928 kvm_set_s2pte_writable(&new_pte); 1002 kvm_set_s2pte_writable(&new_pte);
929 kvm_set_pfn_dirty(pfn); 1003 kvm_set_pfn_dirty(pfn);
930 } 1004 }
931 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); 1005 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
1006 fault_ipa_uncached);
932 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 1007 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
933 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); 1008 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
934 } 1009 }
@@ -1288,11 +1363,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1288 hva = vm_end; 1363 hva = vm_end;
1289 } while (hva < reg_end); 1364 } while (hva < reg_end);
1290 1365
1291 if (ret) { 1366 spin_lock(&kvm->mmu_lock);
1292 spin_lock(&kvm->mmu_lock); 1367 if (ret)
1293 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); 1368 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
1294 spin_unlock(&kvm->mmu_lock); 1369 else
1295 } 1370 stage2_flush_memslot(kvm, memslot);
1371 spin_unlock(&kvm->mmu_lock);
1296 return ret; 1372 return ret;
1297} 1373}
1298 1374
@@ -1304,6 +1380,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1304int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1380int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1305 unsigned long npages) 1381 unsigned long npages)
1306{ 1382{
1383 /*
1384 * Readonly memslots are not incoherent with the caches by definition,
1385 * but in practice, they are used mostly to emulate ROMs or NOR flashes
1386 * that the guest may consider devices and hence map as uncached.
1387 * To prevent incoherency issues in these cases, tag all readonly
1388 * regions as incoherent.
1389 */
1390 if (slot->flags & KVM_MEM_READONLY)
1391 slot->flags |= KVM_MEMSLOT_INCOHERENT;
1307 return 0; 1392 return 0;
1308} 1393}
1309 1394
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf37737ee2..58cb3248d277 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include <linux/preempt.h>
18#include <linux/kvm_host.h> 19#include <linux/kvm_host.h>
19#include <linux/wait.h> 20#include <linux/wait.h>
20 21
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
166 167
167static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) 168static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
168{ 169{
170 int i;
171 struct kvm_vcpu *tmp;
172
173 /*
174 * The KVM ABI specifies that a system event exit may call KVM_RUN
175 * again and may perform shutdown/reboot at a later time that when the
176 * actual request is made. Since we are implementing PSCI and a
177 * caller of PSCI reboot and shutdown expects that the system shuts
178 * down or reboots immediately, let's make sure that VCPUs are not run
179 * after this call is handled and before the VCPUs have been
180 * re-initialized.
181 */
182 kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
183 tmp->arch.pause = true;
184 kvm_vcpu_kick(tmp);
185 }
186
169 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 187 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
170 vcpu->run->system_event.type = type; 188 vcpu->run->system_event.type = type;
171 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 189 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5674a55b5518..8127e45e2637 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
38void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); 38void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
39void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); 39void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
40 40
41static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
42{
43 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
44}
45
41static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) 46static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
42{ 47{
43 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; 48 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2012c4ba8d67..0b7dfdb931df 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -165,8 +165,6 @@ struct kvm_vcpu_stat {
165 u32 halt_wakeup; 165 u32 halt_wakeup;
166}; 166};
167 167
168int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
169 const struct kvm_vcpu_init *init);
170int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 168int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
171unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 169unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
172int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 170int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
200struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); 198struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
201 199
202u64 kvm_call_hyp(void *hypfn, ...); 200u64 kvm_call_hyp(void *hypfn, ...);
201void force_vm_exit(const cpumask_t *mask);
203 202
204int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, 203int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
205 int exception_index); 204 int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0caf7a59f6a1..14a74f136272 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
83void free_boot_hyp_pgd(void); 83void free_boot_hyp_pgd(void);
84void free_hyp_pgds(void); 84void free_hyp_pgds(void);
85 85
86void stage2_unmap_vm(struct kvm *kvm);
86int kvm_alloc_stage2_pgd(struct kvm *kvm); 87int kvm_alloc_stage2_pgd(struct kvm *kvm);
87void kvm_free_stage2_pgd(struct kvm *kvm); 88void kvm_free_stage2_pgd(struct kvm *kvm);
88int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 89int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
243} 244}
244 245
245static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 246static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
246 unsigned long size) 247 unsigned long size,
248 bool ipa_uncached)
247{ 249{
248 if (!vcpu_has_cache_enabled(vcpu)) 250 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
249 kvm_flush_dcache_to_poc((void *)hva, size); 251 kvm_flush_dcache_to_poc((void *)hva, size);
250 252
251 if (!icache_is_aliasing()) { /* PIPT */ 253 if (!icache_is_aliasing()) { /* PIPT */
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 76794692c20b..9535bd555d1d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
38 38
39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
40{ 40{
41 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
42 return 0; 41 return 0;
43} 42}
44 43
@@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void)
297 return -EINVAL; 296 return -EINVAL;
298} 297}
299 298
300int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
301 const struct kvm_vcpu_init *init)
302{
303 unsigned int i;
304 int phys_target = kvm_target_cpu();
305
306 if (init->target != phys_target)
307 return -EINVAL;
308
309 vcpu->arch.target = phys_target;
310 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
311
312 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
313 for (i = 0; i < sizeof(init->features) * 8; i++) {
314 if (init->features[i / 32] & (1 << (i % 32))) {
315 if (i >= KVM_VCPU_MAX_FEATURES)
316 return -ENOENT;
317 set_bit(i, vcpu->arch.features);
318 }
319 }
320
321 /* Now we know what it is, we can reset it. */
322 return kvm_reset_vcpu(vcpu);
323}
324
325int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 299int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
326{ 300{
327 int target = kvm_target_cpu(); 301 int target = kvm_target_cpu();
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0c4c88c008ce..d89c6b828c96 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -664,6 +664,16 @@ struct msr_data {
664 u64 data; 664 u64 data;
665}; 665};
666 666
667struct kvm_lapic_irq {
668 u32 vector;
669 u32 delivery_mode;
670 u32 dest_mode;
671 u32 level;
672 u32 trig_mode;
673 u32 shorthand;
674 u32 dest_id;
675};
676
667struct kvm_x86_ops { 677struct kvm_x86_ops {
668 int (*cpu_has_kvm_support)(void); /* __init */ 678 int (*cpu_has_kvm_support)(void); /* __init */
669 int (*disabled_by_bios)(void); /* __init */ 679 int (*disabled_by_bios)(void); /* __init */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f6945bef2cd1..94f643484300 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault);
283static void __init paravirt_ops_setup(void) 283static void __init paravirt_ops_setup(void)
284{ 284{
285 pv_info.name = "KVM"; 285 pv_info.name = "KVM";
286 pv_info.paravirt_enabled = 1; 286
287 /*
288 * KVM isn't paravirt in the sense of paravirt_enabled. A KVM
289 * guest kernel works like a bare metal kernel with additional
290 * features, and paravirt_enabled is about features that are
291 * missing.
292 */
293 pv_info.paravirt_enabled = 0;
287 294
288 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) 295 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
289 pv_cpu_ops.io_delay = kvm_io_delay; 296 pv_cpu_ops.io_delay = kvm_io_delay;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 0bf3467d7f30..42caaef897c8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -261,7 +261,6 @@ void __init kvmclock_init(void)
261#endif 261#endif
262 kvm_get_preset_lpj(); 262 kvm_get_preset_lpj();
263 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); 263 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
264 pv_info.paravirt_enabled = 1;
265 pv_info.name = "KVM"; 264 pv_info.name = "KVM";
266 265
267 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) 266 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 38173343153f..9715d6ea7d72 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1861,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
1861 1861
1862static int em_pushf(struct x86_emulate_ctxt *ctxt) 1862static int em_pushf(struct x86_emulate_ctxt *ctxt)
1863{ 1863{
1864 ctxt->src.val = (unsigned long)ctxt->eflags; 1864 ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
1865 return em_push(ctxt); 1865 return em_push(ctxt);
1866} 1866}
1867 1867
@@ -2130,7 +2130,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2130 /* Outer-privilege level return is not implemented */ 2130 /* Outer-privilege level return is not implemented */
2131 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2131 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2132 return X86EMUL_UNHANDLEABLE; 2132 return X86EMUL_UNHANDLEABLE;
2133 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, 2133 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
2134 &new_desc); 2134 &new_desc);
2135 if (rc != X86EMUL_CONTINUE) 2135 if (rc != X86EMUL_CONTINUE)
2136 return rc; 2136 return rc;
@@ -4172,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = {
4172 /* 0x80 - 0xef */ 4172 /* 0x80 - 0xef */
4173 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), 4173 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4174 /* 0xf0 - 0xf1 */ 4174 /* 0xf0 - 0xf1 */
4175 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), 4175 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4176 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), 4176 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4177 /* 0xf2 - 0xff */ 4177 /* 0xf2 - 0xff */
4178 N, N, X4(N), X8(N) 4178 N, N, X4(N), X8(N)
4179}; 4179};
@@ -4801,6 +4801,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4801 goto done; 4801 goto done;
4802 } 4802 }
4803 4803
4804 /* Instruction can only be executed in protected mode */
4805 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
4806 rc = emulate_ud(ctxt);
4807 goto done;
4808 }
4809
4804 /* Privileged instruction can be executed only in CPL=0 */ 4810 /* Privileged instruction can be executed only in CPL=0 */
4805 if ((ctxt->d & Priv) && ops->cpl(ctxt)) { 4811 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
4806 if (ctxt->d & PrivUD) 4812 if (ctxt->d & PrivUD)
@@ -4810,12 +4816,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4810 goto done; 4816 goto done;
4811 } 4817 }
4812 4818
4813 /* Instruction can only be executed in protected mode */
4814 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
4815 rc = emulate_ud(ctxt);
4816 goto done;
4817 }
4818
4819 /* Do instruction specific permission checks */ 4819 /* Do instruction specific permission checks */
4820 if (ctxt->d & CheckPerm) { 4820 if (ctxt->d & CheckPerm) {
4821 rc = ctxt->check_perm(ctxt); 4821 rc = ctxt->check_perm(ctxt);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index deac8d509f2a..3c9195535ffc 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -44,6 +44,23 @@ struct rtc_status {
44 DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); 44 DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
45}; 45};
46 46
47union kvm_ioapic_redirect_entry {
48 u64 bits;
49 struct {
50 u8 vector;
51 u8 delivery_mode:3;
52 u8 dest_mode:1;
53 u8 delivery_status:1;
54 u8 polarity:1;
55 u8 remote_irr:1;
56 u8 trig_mode:1;
57 u8 mask:1;
58 u8 reserve:7;
59 u8 reserved[4];
60 u8 dest_id;
61 } fields;
62};
63
47struct kvm_ioapic { 64struct kvm_ioapic {
48 u64 base_address; 65 u64 base_address;
49 u32 ioregsel; 66 u32 ioregsel;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4ea0dcb0b21b..10fbed126b11 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -629,7 +629,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
629 * kvm mmu, before reclaiming the page, we should 629 * kvm mmu, before reclaiming the page, we should
630 * unmap it from mmu first. 630 * unmap it from mmu first.
631 */ 631 */
632 WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); 632 WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
633 633
634 if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) 634 if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
635 kvm_set_pfn_accessed(pfn); 635 kvm_set_pfn_accessed(pfn);
@@ -2460,7 +2460,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2460 spte |= PT_PAGE_SIZE_MASK; 2460 spte |= PT_PAGE_SIZE_MASK;
2461 if (tdp_enabled) 2461 if (tdp_enabled)
2462 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, 2462 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
2463 kvm_is_mmio_pfn(pfn)); 2463 kvm_is_reserved_pfn(pfn));
2464 2464
2465 if (host_writable) 2465 if (host_writable)
2466 spte |= SPTE_HOST_WRITEABLE; 2466 spte |= SPTE_HOST_WRITEABLE;
@@ -2736,7 +2736,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
2736 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done 2736 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
2737 * here. 2737 * here.
2738 */ 2738 */
2739 if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && 2739 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
2740 level == PT_PAGE_TABLE_LEVEL && 2740 level == PT_PAGE_TABLE_LEVEL &&
2741 PageTransCompound(pfn_to_page(pfn)) && 2741 PageTransCompound(pfn_to_page(pfn)) &&
2742 !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { 2742 !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9bcc871f0635..feb852b04598 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2399,13 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2399 nested_vmx_secondary_ctls_low = 0; 2399 nested_vmx_secondary_ctls_low = 0;
2400 nested_vmx_secondary_ctls_high &= 2400 nested_vmx_secondary_ctls_high &=
2401 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2401 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2402 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2403 SECONDARY_EXEC_WBINVD_EXITING | 2402 SECONDARY_EXEC_WBINVD_EXITING |
2404 SECONDARY_EXEC_XSAVES; 2403 SECONDARY_EXEC_XSAVES;
2405 2404
2406 if (enable_ept) { 2405 if (enable_ept) {
2407 /* nested EPT: emulate EPT also to L1 */ 2406 /* nested EPT: emulate EPT also to L1 */
2408 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2407 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
2408 SECONDARY_EXEC_UNRESTRICTED_GUEST;
2409 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2409 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
2410 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | 2410 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
2411 VMX_EPT_INVEPT_BIT; 2411 VMX_EPT_INVEPT_BIT;