aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 00:40:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 00:40:43 -0500
commit7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (patch)
treec45acf88b7976dcec117b6a3dbe31a7fe710ef33 /arch
parentbb1281f2aae08e5ef23eb0692c8833e95579cdf2 (diff)
parent7650b6870930055426abb32cc47d164ccdea49db (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "First round of KVM updates for 3.14; PPC parts will come next week. Nothing major here, just bugfixes all over the place. The most interesting part is the ARM guys' virtualized interrupt controller overhaul, which lets userspace get/set the state and thus enables migration of ARM VMs" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (67 commits) kvm: make KVM_MMU_AUDIT help text more readable KVM: s390: Fix memory access error detection KVM: nVMX: Update guest activity state field on L2 exits KVM: nVMX: Fix nested_run_pending on activity state HLT KVM: nVMX: Clean up handling of VMX-related MSRs KVM: nVMX: Add tracepoints for nested_vmexit and nested_vmexit_inject KVM: nVMX: Pass vmexit parameters to nested_vmx_vmexit KVM: nVMX: Leave VMX mode on clearing of feature control MSR KVM: VMX: Fix DR6 update on #DB exception KVM: SVM: Fix reading of DR6 KVM: x86: Sync DR7 on KVM_SET_DEBUGREGS add support for Hyper-V reference time counter KVM: remove useless write to vcpu->hv_clock.tsc_timestamp KVM: x86: fix tsc catchup issue with tsc scaling KVM: x86: limit PIT timer frequency KVM: x86: handle invalid root_hpa everywhere kvm: Provide kvm_vcpu_eligible_for_directed_yield() stub kvm: vfio: silence GCC warning KVM: ARM: Remove duplicate include arm/arm64: KVM: relax the requirements of VMA alignment for THP ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kvm_host.h3
-rw-r--r--arch/arm/include/asm/kvm_mmu.h1
-rw-r--r--arch/arm/include/uapi/asm/kvm.h28
-rw-r--r--arch/arm/kvm/arm.c49
-rw-r--r--arch/arm/kvm/guest.c92
-rw-r--r--arch/arm/kvm/handle_exit.c2
-rw-r--r--arch/arm/kvm/mmu.c24
-rw-r--r--arch/arm/kvm/psci.c11
-rw-r--r--arch/arm64/include/asm/kvm_host.h7
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h1
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h21
-rw-r--r--arch/arm64/kvm/Kconfig11
-rw-r--r--arch/arm64/kvm/guest.c32
-rw-r--r--arch/arm64/kvm/handle_exit.c3
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c3
-rw-r--r--arch/ia64/kvm/kvm-ia64.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/s390/include/asm/sigp.h2
-rw-r--r--arch/s390/kvm/diag.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c55
-rw-r--r--arch/s390/kvm/kvm-s390.h10
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/s390/kvm/sigp.c120
-rw-r--r--arch/s390/kvm/trace.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h13
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/i8254.c18
-rw-r--r--arch/x86/kvm/lapic.c9
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h8
-rw-r--r--arch/x86/kvm/svm.c15
-rw-r--r--arch/x86/kvm/vmx.c323
-rw-r--r--arch/x86/kvm/x86.c101
-rw-r--r--arch/x86/kvm/x86.h2
37 files changed, 711 insertions, 285 deletions
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a6f6db14ee4..098f7dd6d564 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
225int kvm_perf_init(void); 225int kvm_perf_init(void);
226int kvm_perf_teardown(void); 226int kvm_perf_teardown(void);
227 227
228u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
229int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
230
228#endif /* __ARM_KVM_HOST_H__ */ 231#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 77de4a41cc50..2d122adcdb22 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
140} 140}
141 141
142#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) 142#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
143#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
143 144
144#endif /* !__ASSEMBLY__ */ 145#endif /* !__ASSEMBLY__ */
145 146
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c498b60c0505..ef0c8785ba16 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -119,6 +119,26 @@ struct kvm_arch_memory_slot {
119#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 119#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
120#define KVM_REG_ARM_32_CRN_SHIFT 11 120#define KVM_REG_ARM_32_CRN_SHIFT 11
121 121
122#define ARM_CP15_REG_SHIFT_MASK(x,n) \
123 (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
124
125#define __ARM_CP15_REG(op1,crn,crm,op2) \
126 (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
127 ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
128 ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
129 ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
130 ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
131
132#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
133
134#define __ARM_CP15_REG64(op1,crm) \
135 (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
136#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
137
138#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
139#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
140#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
141
122/* Normal registers are mapped as coprocessor 16. */ 142/* Normal registers are mapped as coprocessor 16. */
123#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) 143#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
124#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) 144#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
@@ -143,6 +163,14 @@ struct kvm_arch_memory_slot {
143#define KVM_REG_ARM_VFP_FPINST 0x1009 163#define KVM_REG_ARM_VFP_FPINST 0x1009
144#define KVM_REG_ARM_VFP_FPINST2 0x100A 164#define KVM_REG_ARM_VFP_FPINST2 0x100A
145 165
166/* Device Control API: ARM VGIC */
167#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
168#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
169#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
170#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
171#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
172#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
173#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
146 174
147/* KVM_IRQ_LINE irq field index values */ 175/* KVM_IRQ_LINE irq field index values */
148#define KVM_ARM_IRQ_TYPE_SHIFT 24 176#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b18165ca1d38..1d8248ea5669 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -138,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
138 if (ret) 138 if (ret)
139 goto out_free_stage2_pgd; 139 goto out_free_stage2_pgd;
140 140
141 kvm_timer_init(kvm);
142
141 /* Mark the initial VMID generation invalid */ 143 /* Mark the initial VMID generation invalid */
142 kvm->arch.vmid_gen = 0; 144 kvm->arch.vmid_gen = 0;
143 145
@@ -189,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext)
189 case KVM_CAP_IRQCHIP: 191 case KVM_CAP_IRQCHIP:
190 r = vgic_present; 192 r = vgic_present;
191 break; 193 break;
194 case KVM_CAP_DEVICE_CTRL:
192 case KVM_CAP_USER_MEMORY: 195 case KVM_CAP_USER_MEMORY:
193 case KVM_CAP_SYNC_MMU: 196 case KVM_CAP_SYNC_MMU:
194 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 197 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
@@ -340,6 +343,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
340 343
341void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 344void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
342{ 345{
346 /*
347 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
348 * if the vcpu is no longer assigned to a cpu. This is used for the
349 * optimized make_all_cpus_request path.
350 */
351 vcpu->cpu = -1;
352
343 kvm_arm_set_running_vcpu(NULL); 353 kvm_arm_set_running_vcpu(NULL);
344} 354}
345 355
@@ -463,6 +473,8 @@ static void update_vttbr(struct kvm *kvm)
463 473
464static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) 474static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
465{ 475{
476 int ret;
477
466 if (likely(vcpu->arch.has_run_once)) 478 if (likely(vcpu->arch.has_run_once))
467 return 0; 479 return 0;
468 480
@@ -472,22 +484,12 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
472 * Initialize the VGIC before running a vcpu the first time on 484 * Initialize the VGIC before running a vcpu the first time on
473 * this VM. 485 * this VM.
474 */ 486 */
475 if (irqchip_in_kernel(vcpu->kvm) && 487 if (unlikely(!vgic_initialized(vcpu->kvm))) {
476 unlikely(!vgic_initialized(vcpu->kvm))) { 488 ret = kvm_vgic_init(vcpu->kvm);
477 int ret = kvm_vgic_init(vcpu->kvm);
478 if (ret) 489 if (ret)
479 return ret; 490 return ret;
480 } 491 }
481 492
482 /*
483 * Handle the "start in power-off" case by calling into the
484 * PSCI code.
485 */
486 if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
487 *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
488 kvm_psci_call(vcpu);
489 }
490
491 return 0; 493 return 0;
492} 494}
493 495
@@ -701,6 +703,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
701 return -EINVAL; 703 return -EINVAL;
702} 704}
703 705
706static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
707 struct kvm_vcpu_init *init)
708{
709 int ret;
710
711 ret = kvm_vcpu_set_target(vcpu, init);
712 if (ret)
713 return ret;
714
715 /*
716 * Handle the "start in power-off" case by marking the VCPU as paused.
717 */
718 if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
719 vcpu->arch.pause = true;
720
721 return 0;
722}
723
704long kvm_arch_vcpu_ioctl(struct file *filp, 724long kvm_arch_vcpu_ioctl(struct file *filp,
705 unsigned int ioctl, unsigned long arg) 725 unsigned int ioctl, unsigned long arg)
706{ 726{
@@ -714,8 +734,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
714 if (copy_from_user(&init, argp, sizeof(init))) 734 if (copy_from_user(&init, argp, sizeof(init)))
715 return -EFAULT; 735 return -EFAULT;
716 736
717 return kvm_vcpu_set_target(vcpu, &init); 737 return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
718
719 } 738 }
720 case KVM_SET_ONE_REG: 739 case KVM_SET_ONE_REG:
721 case KVM_GET_ONE_REG: { 740 case KVM_GET_ONE_REG: {
@@ -773,7 +792,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
773 case KVM_ARM_DEVICE_VGIC_V2: 792 case KVM_ARM_DEVICE_VGIC_V2:
774 if (!vgic_present) 793 if (!vgic_present)
775 return -ENXIO; 794 return -ENXIO;
776 return kvm_vgic_set_addr(kvm, type, dev_addr->addr); 795 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
777 default: 796 default:
778 return -ENODEV; 797 return -ENODEV;
779 } 798 }
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 20f8d97904af..2786eae10c0d 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
109 return -EINVAL; 109 return -EINVAL;
110} 110}
111 111
112#ifndef CONFIG_KVM_ARM_TIMER
113
114#define NUM_TIMER_REGS 0
115
116static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
117{
118 return 0;
119}
120
121static bool is_timer_reg(u64 index)
122{
123 return false;
124}
125
126int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
127{
128 return 0;
129}
130
131u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
132{
133 return 0;
134}
135
136#else
137
138#define NUM_TIMER_REGS 3
139
140static bool is_timer_reg(u64 index)
141{
142 switch (index) {
143 case KVM_REG_ARM_TIMER_CTL:
144 case KVM_REG_ARM_TIMER_CNT:
145 case KVM_REG_ARM_TIMER_CVAL:
146 return true;
147 }
148 return false;
149}
150
151static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
152{
153 if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
154 return -EFAULT;
155 uindices++;
156 if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
157 return -EFAULT;
158 uindices++;
159 if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
160 return -EFAULT;
161
162 return 0;
163}
164
165#endif
166
167static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
168{
169 void __user *uaddr = (void __user *)(long)reg->addr;
170 u64 val;
171 int ret;
172
173 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
174 if (ret != 0)
175 return ret;
176
177 return kvm_arm_timer_set_reg(vcpu, reg->id, val);
178}
179
180static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
181{
182 void __user *uaddr = (void __user *)(long)reg->addr;
183 u64 val;
184
185 val = kvm_arm_timer_get_reg(vcpu, reg->id);
186 return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
187}
188
112static unsigned long num_core_regs(void) 189static unsigned long num_core_regs(void)
113{ 190{
114 return sizeof(struct kvm_regs) / sizeof(u32); 191 return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +198,8 @@ static unsigned long num_core_regs(void)
121 */ 198 */
122unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) 199unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
123{ 200{
124 return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); 201 return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
202 + NUM_TIMER_REGS;
125} 203}
126 204
127/** 205/**
@@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
133{ 211{
134 unsigned int i; 212 unsigned int i;
135 const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; 213 const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
214 int ret;
136 215
137 for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { 216 for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
138 if (put_user(core_reg | i, uindices)) 217 if (put_user(core_reg | i, uindices))
@@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
140 uindices++; 219 uindices++;
141 } 220 }
142 221
222 ret = copy_timer_indices(vcpu, uindices);
223 if (ret)
224 return ret;
225 uindices += NUM_TIMER_REGS;
226
143 return kvm_arm_copy_coproc_indices(vcpu, uindices); 227 return kvm_arm_copy_coproc_indices(vcpu, uindices);
144} 228}
145 229
@@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
153 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) 237 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
154 return get_core_reg(vcpu, reg); 238 return get_core_reg(vcpu, reg);
155 239
240 if (is_timer_reg(reg->id))
241 return get_timer_reg(vcpu, reg);
242
156 return kvm_arm_coproc_get_reg(vcpu, reg); 243 return kvm_arm_coproc_get_reg(vcpu, reg);
157} 244}
158 245
@@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
166 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) 253 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
167 return set_core_reg(vcpu, reg); 254 return set_core_reg(vcpu, reg);
168 255
256 if (is_timer_reg(reg->id))
257 return set_timer_reg(vcpu, reg);
258
169 return kvm_arm_coproc_set_reg(vcpu, reg); 259 return kvm_arm_coproc_set_reg(vcpu, reg);
170} 260}
171 261
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index a92079011a83..0de91fc6de0f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
26 26
27#include "trace.h" 27#include "trace.h"
28 28
29#include "trace.h"
30
31typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); 29typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
32 30
33static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) 31static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 580906989db1..7789857d1470 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
667 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; 667 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
668 } else { 668 } else {
669 /* 669 /*
670 * Pages belonging to VMAs not aligned to the PMD mapping 670 * Pages belonging to memslots that don't have the same
671 * granularity cannot be mapped using block descriptors even 671 * alignment for userspace and IPA cannot be mapped using
672 * if the pages belong to a THP for the process, because the 672 * block descriptors even if the pages belong to a THP for
673 * stage-2 block descriptor will cover more than a single THP 673 * the process, because the stage-2 block descriptor will
674 * and we loose atomicity for unmapping, updates, and splits 674 * cover more than a single THP and we loose atomicity for
675 * of the THP or other pages in the stage-2 block range. 675 * unmapping, updates, and splits of the THP or other pages
676 * in the stage-2 block range.
676 */ 677 */
677 if (vma->vm_start & ~PMD_MASK) 678 if ((memslot->userspace_addr & ~PMD_MASK) !=
679 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
678 force_pte = true; 680 force_pte = true;
679 } 681 }
680 up_read(&current->mm->mmap_sem); 682 up_read(&current->mm->mmap_sem);
@@ -916,9 +918,9 @@ int kvm_mmu_init(void)
916{ 918{
917 int err; 919 int err;
918 920
919 hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); 921 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
920 hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); 922 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
921 hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); 923 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
922 924
923 if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { 925 if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
924 /* 926 /*
@@ -945,7 +947,7 @@ int kvm_mmu_init(void)
945 */ 947 */
946 kvm_flush_dcache_to_poc(init_bounce_page, len); 948 kvm_flush_dcache_to_poc(init_bounce_page, len);
947 949
948 phys_base = virt_to_phys(init_bounce_page); 950 phys_base = kvm_virt_to_phys(init_bounce_page);
949 hyp_idmap_vector += phys_base - hyp_idmap_start; 951 hyp_idmap_vector += phys_base - hyp_idmap_start;
950 hyp_idmap_start = phys_base; 952 hyp_idmap_start = phys_base;
951 hyp_idmap_end = phys_base + len; 953 hyp_idmap_end = phys_base + len;
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0881bf169fbc..448f60e8d23c 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
54 } 54 }
55 } 55 }
56 56
57 if (!vcpu) 57 /*
58 * Make sure the caller requested a valid CPU and that the CPU is
59 * turned off.
60 */
61 if (!vcpu || !vcpu->arch.pause)
58 return KVM_PSCI_RET_INVAL; 62 return KVM_PSCI_RET_INVAL;
59 63
60 target_pc = *vcpu_reg(source_vcpu, 2); 64 target_pc = *vcpu_reg(source_vcpu, 2);
61 65
62 wq = kvm_arch_vcpu_wq(vcpu);
63 if (!waitqueue_active(wq))
64 return KVM_PSCI_RET_INVAL;
65
66 kvm_reset_vcpu(vcpu); 66 kvm_reset_vcpu(vcpu);
67 67
68 /* Gracefully handle Thumb2 entry point */ 68 /* Gracefully handle Thumb2 entry point */
@@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
79 vcpu->arch.pause = false; 79 vcpu->arch.pause = false;
80 smp_mb(); /* Make sure the above is visible */ 80 smp_mb(); /* Make sure the above is visible */
81 81
82 wq = kvm_arch_vcpu_wq(vcpu);
82 wake_up_interruptible(wq); 83 wake_up_interruptible(wq);
83 84
84 return KVM_PSCI_RET_SUCCESS; 85 return KVM_PSCI_RET_SUCCESS;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5d85a02d1231..0a1d69751562 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -26,7 +26,12 @@
26#include <asm/kvm_asm.h> 26#include <asm/kvm_asm.h>
27#include <asm/kvm_mmio.h> 27#include <asm/kvm_mmio.h>
28 28
29#define KVM_MAX_VCPUS 4 29#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
30#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
31#else
32#define KVM_MAX_VCPUS 0
33#endif
34
30#define KVM_USER_MEM_SLOTS 32 35#define KVM_USER_MEM_SLOTS 32
31#define KVM_PRIVATE_MEM_SLOTS 4 36#define KVM_PRIVATE_MEM_SLOTS 4
32#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 37#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 680f74e67497..7f1f9408ff66 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
136} 136}
137 137
138#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) 138#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
139#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
139 140
140#endif /* __ASSEMBLY__ */ 141#endif /* __ASSEMBLY__ */
141#endif /* __ARM64_KVM_MMU_H__ */ 142#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5031f4263937..495ab6f84a61 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -55,8 +55,9 @@ struct kvm_regs {
55#define KVM_ARM_TARGET_AEM_V8 0 55#define KVM_ARM_TARGET_AEM_V8 0
56#define KVM_ARM_TARGET_FOUNDATION_V8 1 56#define KVM_ARM_TARGET_FOUNDATION_V8 1
57#define KVM_ARM_TARGET_CORTEX_A57 2 57#define KVM_ARM_TARGET_CORTEX_A57 2
58#define KVM_ARM_TARGET_XGENE_POTENZA 3
58 59
59#define KVM_ARM_NUM_TARGETS 3 60#define KVM_ARM_NUM_TARGETS 4
60 61
61/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 62/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
62#define KVM_ARM_DEVICE_TYPE_SHIFT 0 63#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -129,6 +130,24 @@ struct kvm_arch_memory_slot {
129#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 130#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007
130#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 131#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0
131 132
133#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
134 (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
135 KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
136
137#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
138 (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
139 ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
140 ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
141 ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
142 ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
143 ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
144
145#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
146
147#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
148#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
149#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
150
132/* KVM_IRQ_LINE irq field index values */ 151/* KVM_IRQ_LINE irq field index values */
133#define KVM_ARM_IRQ_TYPE_SHIFT 24 152#define KVM_ARM_IRQ_TYPE_SHIFT 24
134#define KVM_ARM_IRQ_TYPE_MASK 0xff 153#define KVM_ARM_IRQ_TYPE_MASK 0xff
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4480ab339a00..8ba85e9ea388 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,17 @@ config KVM_ARM_HOST
36 ---help--- 36 ---help---
37 Provides host support for ARM processors. 37 Provides host support for ARM processors.
38 38
39config KVM_ARM_MAX_VCPUS
40 int "Number maximum supported virtual CPUs per VM"
41 depends on KVM_ARM_HOST
42 default 4
43 help
44 Static number of max supported virtual CPUs per VM.
45
46 If you choose a high number, the vcpu structures will be quite
47 large, so only choose a reasonable number that you expect to
48 actually use.
49
39config KVM_ARM_VGIC 50config KVM_ARM_VGIC
40 bool 51 bool
41 depends on KVM_ARM_HOST && OF 52 depends on KVM_ARM_HOST && OF
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3f0731e53274..08745578d54d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void)
207 unsigned long implementor = read_cpuid_implementor(); 207 unsigned long implementor = read_cpuid_implementor();
208 unsigned long part_number = read_cpuid_part_number(); 208 unsigned long part_number = read_cpuid_part_number();
209 209
210 if (implementor != ARM_CPU_IMP_ARM) 210 switch (implementor) {
211 return -EINVAL; 211 case ARM_CPU_IMP_ARM:
212 switch (part_number) {
213 case ARM_CPU_PART_AEM_V8:
214 return KVM_ARM_TARGET_AEM_V8;
215 case ARM_CPU_PART_FOUNDATION:
216 return KVM_ARM_TARGET_FOUNDATION_V8;
217 case ARM_CPU_PART_CORTEX_A57:
218 return KVM_ARM_TARGET_CORTEX_A57;
219 };
220 break;
221 case ARM_CPU_IMP_APM:
222 switch (part_number) {
223 case APM_CPU_PART_POTENZA:
224 return KVM_ARM_TARGET_XGENE_POTENZA;
225 };
226 break;
227 };
212 228
213 switch (part_number) { 229 return -EINVAL;
214 case ARM_CPU_PART_AEM_V8:
215 return KVM_ARM_TARGET_AEM_V8;
216 case ARM_CPU_PART_FOUNDATION:
217 return KVM_ARM_TARGET_FOUNDATION_V8;
218 case ARM_CPU_PART_CORTEX_A57:
219 /* Currently handled by the generic backend */
220 return KVM_ARM_TARGET_CORTEX_A57;
221 default:
222 return -EINVAL;
223 }
224} 230}
225 231
226int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 232int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 42a0f1bddfe7..7bc41eab4c64 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
39 39
40static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) 40static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
41{ 41{
42 if (kvm_psci_call(vcpu))
43 return 1;
44
45 kvm_inject_undefined(vcpu); 42 kvm_inject_undefined(vcpu);
46 return 1; 43 return 1;
47} 44}
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 4268ab9356b1..8fe6f76b0edc 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void)
90 &genericv8_target_table); 90 &genericv8_target_table);
91 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, 91 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
92 &genericv8_target_table); 92 &genericv8_target_table);
93 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
94 &genericv8_target_table);
95
93 return 0; 96 return 0;
94} 97}
95late_initcall(sys_reg_genericv8_init); 98late_initcall(sys_reg_genericv8_init);
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 985bf80c622e..53f44bee9ebb 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -702,7 +702,7 @@ again:
702out: 702out:
703 srcu_read_unlock(&vcpu->kvm->srcu, idx); 703 srcu_read_unlock(&vcpu->kvm->srcu, idx);
704 if (r > 0) { 704 if (r > 0) {
705 kvm_resched(vcpu); 705 cond_resched();
706 idx = srcu_read_lock(&vcpu->kvm->srcu); 706 idx = srcu_read_lock(&vcpu->kvm->srcu);
707 goto again; 707 goto again;
708 } 708 }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b51d5db78068..3818bd95327c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1352,7 +1352,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1352 kvm_guest_exit(); 1352 kvm_guest_exit();
1353 1353
1354 preempt_enable(); 1354 preempt_enable();
1355 kvm_resched(vcpu); 1355 cond_resched();
1356 1356
1357 spin_lock(&vc->lock); 1357 spin_lock(&vc->lock);
1358 now = get_tb(); 1358 now = get_tb();
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 5a87d16d3e7c..d091aa1aaf11 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -5,6 +5,7 @@
5#define SIGP_SENSE 1 5#define SIGP_SENSE 1
6#define SIGP_EXTERNAL_CALL 2 6#define SIGP_EXTERNAL_CALL 2
7#define SIGP_EMERGENCY_SIGNAL 3 7#define SIGP_EMERGENCY_SIGNAL 3
8#define SIGP_START 4
8#define SIGP_STOP 5 9#define SIGP_STOP 5
9#define SIGP_RESTART 6 10#define SIGP_RESTART 6
10#define SIGP_STOP_AND_STORE_STATUS 9 11#define SIGP_STOP_AND_STORE_STATUS 9
@@ -12,6 +13,7 @@
12#define SIGP_SET_PREFIX 13 13#define SIGP_SET_PREFIX 13
13#define SIGP_STORE_STATUS_AT_ADDRESS 14 14#define SIGP_STORE_STATUS_AT_ADDRESS 14
14#define SIGP_SET_ARCHITECTURE 18 15#define SIGP_SET_ARCHITECTURE 18
16#define SIGP_COND_EMERGENCY_SIGNAL 19
15#define SIGP_SENSE_RUNNING 21 17#define SIGP_SENSE_RUNNING 21
16 18
17/* SIGP condition codes */ 19/* SIGP condition codes */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 78d967f180f4..8216c0e0b2e2 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
121 * - gpr 4 contains the index on the bus (optionally) 121 * - gpr 4 contains the index on the bus (optionally)
122 */ 122 */
123 ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, 123 ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
124 vcpu->run->s.regs.gprs[2], 124 vcpu->run->s.regs.gprs[2] & 0xffffffff,
125 8, &vcpu->run->s.regs.gprs[3], 125 8, &vcpu->run->s.regs.gprs[3],
126 vcpu->run->s.regs.gprs[4]); 126 vcpu->run->s.regs.gprs[4]);
127 127
@@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
137 137
138int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) 138int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
139{ 139{
140 int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; 140 int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
141 141
142 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 142 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
143 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 143 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 569494e01ec6..7635c00a1479 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -732,14 +732,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
732 732
733 if (exit_reason >= 0) { 733 if (exit_reason >= 0) {
734 rc = 0; 734 rc = 0;
735 } else if (kvm_is_ucontrol(vcpu->kvm)) {
736 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
737 vcpu->run->s390_ucontrol.trans_exc_code =
738 current->thread.gmap_addr;
739 vcpu->run->s390_ucontrol.pgm_code = 0x10;
740 rc = -EREMOTE;
735 } else { 741 } else {
736 if (kvm_is_ucontrol(vcpu->kvm)) { 742 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
737 rc = SIE_INTERCEPT_UCONTROL; 743 trace_kvm_s390_sie_fault(vcpu);
738 } else { 744 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
739 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
740 trace_kvm_s390_sie_fault(vcpu);
741 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
742 }
743 } 745 }
744 746
745 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 747 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
@@ -833,16 +835,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
833 rc = -EINTR; 835 rc = -EINTR;
834 } 836 }
835 837
836#ifdef CONFIG_KVM_S390_UCONTROL
837 if (rc == SIE_INTERCEPT_UCONTROL) {
838 kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
839 kvm_run->s390_ucontrol.trans_exc_code =
840 current->thread.gmap_addr;
841 kvm_run->s390_ucontrol.pgm_code = 0x10;
842 rc = 0;
843 }
844#endif
845
846 if (rc == -EOPNOTSUPP) { 838 if (rc == -EOPNOTSUPP) {
847 /* intercept cannot be handled in-kernel, prepare kvm-run */ 839 /* intercept cannot be handled in-kernel, prepare kvm-run */
848 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 840 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
@@ -885,10 +877,11 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
885 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 877 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
886 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 878 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
887 */ 879 */
888int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 880int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
889{ 881{
890 unsigned char archmode = 1; 882 unsigned char archmode = 1;
891 int prefix; 883 int prefix;
884 u64 clkcomp;
892 885
893 if (addr == KVM_S390_STORE_STATUS_NOADDR) { 886 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
894 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) 887 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
@@ -903,15 +896,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
903 } else 896 } else
904 prefix = 0; 897 prefix = 0;
905 898
906 /*
907 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
908 * copying in vcpu load/put. Lets update our copies before we save
909 * it into the save area
910 */
911 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
912 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
913 save_access_regs(vcpu->run->s.regs.acrs);
914
915 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), 899 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
916 vcpu->arch.guest_fpregs.fprs, 128, prefix)) 900 vcpu->arch.guest_fpregs.fprs, 128, prefix))
917 return -EFAULT; 901 return -EFAULT;
@@ -941,8 +925,9 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
941 &vcpu->arch.sie_block->cputm, 8, prefix)) 925 &vcpu->arch.sie_block->cputm, 8, prefix))
942 return -EFAULT; 926 return -EFAULT;
943 927
928 clkcomp = vcpu->arch.sie_block->ckc >> 8;
944 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), 929 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
945 &vcpu->arch.sie_block->ckc, 8, prefix)) 930 &clkcomp, 8, prefix))
946 return -EFAULT; 931 return -EFAULT;
947 932
948 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), 933 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
@@ -956,6 +941,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
956 return 0; 941 return 0;
957} 942}
958 943
944int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
945{
946 /*
947 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
948 * copying in vcpu load/put. Lets update our copies before we save
949 * it into the save area
950 */
951 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
952 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
953 save_access_regs(vcpu->run->s.regs.acrs);
954
955 return kvm_s390_store_status_unloaded(vcpu, addr);
956}
957
959static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 958static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
960 struct kvm_enable_cap *cap) 959 struct kvm_enable_cap *cap)
961{ 960{
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b44912a32949..095cf51b16ec 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,16 +19,11 @@
19#include <linux/kvm.h> 19#include <linux/kvm.h>
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21 21
22/* The current code can have up to 256 pages for virtio */
23#define VIRTIODESCSPACE (256ul * 4096ul)
24
25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); 22typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
26 23
27/* declare vfacilities extern */ 24/* declare vfacilities extern */
28extern unsigned long *vfacilities; 25extern unsigned long *vfacilities;
29 26
30/* negativ values are error codes, positive values for internal conditions */
31#define SIE_INTERCEPT_UCONTROL (1<<0)
32int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); 27int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
33 28
34#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ 29#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
@@ -133,7 +128,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
133int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 128int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
134 struct kvm_s390_interrupt *s390int); 129 struct kvm_s390_interrupt *s390int);
135int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); 130int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
136int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
137struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 131struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
138 u64 cr6, u64 schid); 132 u64 cr6, u64 schid);
139 133
@@ -150,8 +144,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
150int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 144int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
151 145
152/* implemented in kvm-s390.c */ 146/* implemented in kvm-s390.c */
153int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, 147int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
154 unsigned long addr); 148int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
155void s390_vcpu_block(struct kvm_vcpu *vcpu); 149void s390_vcpu_block(struct kvm_vcpu *vcpu);
156void s390_vcpu_unblock(struct kvm_vcpu *vcpu); 150void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
157void exit_sie(struct kvm_vcpu *vcpu); 151void exit_sie(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d101dae62771..75beea632a10 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
197 if (addr & 3) 197 if (addr & 3)
198 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 198 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
199 cc = 0; 199 cc = 0;
200 inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); 200 inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
201 if (!inti) 201 if (!inti)
202 goto no_interrupt; 202 goto no_interrupt;
203 cc = 1; 203 cc = 1;
@@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
638 638
639static const intercept_handler_t b9_handlers[256] = { 639static const intercept_handler_t b9_handlers[256] = {
640 [0x8d] = handle_epsw, 640 [0x8d] = handle_epsw,
641 [0x9c] = handle_io_inst,
642 [0xaf] = handle_pfmf, 641 [0xaf] = handle_pfmf,
643}; 642};
644 643
@@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
731 730
732static const intercept_handler_t eb_handlers[256] = { 731static const intercept_handler_t eb_handlers[256] = {
733 [0x2f] = handle_lctlg, 732 [0x2f] = handle_lctlg,
734 [0x8a] = handle_io_inst,
735}; 733};
736 734
737int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) 735int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index bec398c57acf..87c2b3a3bd3e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * handling interprocessor communication 2 * handling interprocessor communication
3 * 3 *
4 * Copyright IBM Corp. 2008, 2009 4 * Copyright IBM Corp. 2008, 2013
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only) 7 * it under the terms of the GNU General Public License (version 2 only)
@@ -89,6 +89,37 @@ unlock:
89 return rc; 89 return rc;
90} 90}
91 91
92static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
93 u16 asn, u64 *reg)
94{
95 struct kvm_vcpu *dst_vcpu = NULL;
96 const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
97 u16 p_asn, s_asn;
98 psw_t *psw;
99 u32 flags;
100
101 if (cpu_addr < KVM_MAX_VCPUS)
102 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
103 if (!dst_vcpu)
104 return SIGP_CC_NOT_OPERATIONAL;
105 flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
106 psw = &dst_vcpu->arch.sie_block->gpsw;
107 p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
108 s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
109
110 /* Deliver the emergency signal? */
111 if (!(flags & CPUSTAT_STOPPED)
112 || (psw->mask & psw_int_mask) != psw_int_mask
113 || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
114 || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
115 return __sigp_emergency(vcpu, cpu_addr);
116 } else {
117 *reg &= 0xffffffff00000000UL;
118 *reg |= SIGP_STATUS_INCORRECT_STATE;
119 return SIGP_CC_STATUS_STORED;
120 }
121}
122
92static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) 123static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
93{ 124{
94 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; 125 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
@@ -130,6 +161,7 @@ unlock:
130static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) 161static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
131{ 162{
132 struct kvm_s390_interrupt_info *inti; 163 struct kvm_s390_interrupt_info *inti;
164 int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
133 165
134 inti = kzalloc(sizeof(*inti), GFP_ATOMIC); 166 inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
135 if (!inti) 167 if (!inti)
@@ -139,6 +171,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
139 spin_lock_bh(&li->lock); 171 spin_lock_bh(&li->lock);
140 if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { 172 if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
141 kfree(inti); 173 kfree(inti);
174 if ((action & ACTION_STORE_ON_STOP) != 0)
175 rc = -ESHUTDOWN;
142 goto out; 176 goto out;
143 } 177 }
144 list_add_tail(&inti->list, &li->list); 178 list_add_tail(&inti->list, &li->list);
@@ -150,7 +184,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
150out: 184out:
151 spin_unlock_bh(&li->lock); 185 spin_unlock_bh(&li->lock);
152 186
153 return SIGP_CC_ORDER_CODE_ACCEPTED; 187 return rc;
154} 188}
155 189
156static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) 190static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
@@ -174,13 +208,17 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
174unlock: 208unlock:
175 spin_unlock(&fi->lock); 209 spin_unlock(&fi->lock);
176 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); 210 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
177 return rc;
178}
179 211
180int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action) 212 if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
181{ 213 /* If the CPU has already been stopped, we still have
182 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 214 * to save the status when doing stop-and-store. This
183 return __inject_sigp_stop(li, action); 215 * has to be done after unlocking all spinlocks. */
216 struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
217 rc = kvm_s390_store_status_unloaded(dst_vcpu,
218 KVM_S390_STORE_STATUS_NOADDR);
219 }
220
221 return rc;
184} 222}
185 223
186static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) 224static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
@@ -262,6 +300,37 @@ out_fi:
262 return rc; 300 return rc;
263} 301}
264 302
303static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
304 u32 addr, u64 *reg)
305{
306 struct kvm_vcpu *dst_vcpu = NULL;
307 int flags;
308 int rc;
309
310 if (cpu_id < KVM_MAX_VCPUS)
311 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
312 if (!dst_vcpu)
313 return SIGP_CC_NOT_OPERATIONAL;
314
315 spin_lock_bh(&dst_vcpu->arch.local_int.lock);
316 flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
317 spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
318 if (!(flags & CPUSTAT_STOPPED)) {
319 *reg &= 0xffffffff00000000UL;
320 *reg |= SIGP_STATUS_INCORRECT_STATE;
321 return SIGP_CC_STATUS_STORED;
322 }
323
324 addr &= 0x7ffffe00;
325 rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
326 if (rc == -EFAULT) {
327 *reg &= 0xffffffff00000000UL;
328 *reg |= SIGP_STATUS_INVALID_PARAMETER;
329 rc = SIGP_CC_STATUS_STORED;
330 }
331 return rc;
332}
333
265static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, 334static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
266 u64 *reg) 335 u64 *reg)
267{ 336{
@@ -294,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
294 return rc; 363 return rc;
295} 364}
296 365
297static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) 366/* Test whether the destination CPU is available and not busy */
367static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
298{ 368{
299 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; 369 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
300 struct kvm_s390_local_interrupt *li; 370 struct kvm_s390_local_interrupt *li;
@@ -313,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
313 spin_lock_bh(&li->lock); 383 spin_lock_bh(&li->lock);
314 if (li->action_bits & ACTION_STOP_ON_STOP) 384 if (li->action_bits & ACTION_STOP_ON_STOP)
315 rc = SIGP_CC_BUSY; 385 rc = SIGP_CC_BUSY;
316 else
317 VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace",
318 cpu_addr);
319 spin_unlock_bh(&li->lock); 386 spin_unlock_bh(&li->lock);
320out: 387out:
321 spin_unlock(&fi->lock); 388 spin_unlock(&fi->lock);
@@ -366,6 +433,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
366 rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | 433 rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
367 ACTION_STOP_ON_STOP); 434 ACTION_STOP_ON_STOP);
368 break; 435 break;
436 case SIGP_STORE_STATUS_AT_ADDRESS:
437 rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
438 &vcpu->run->s.regs.gprs[r1]);
439 break;
369 case SIGP_SET_ARCHITECTURE: 440 case SIGP_SET_ARCHITECTURE:
370 vcpu->stat.instruction_sigp_arch++; 441 vcpu->stat.instruction_sigp_arch++;
371 rc = __sigp_set_arch(vcpu, parameter); 442 rc = __sigp_set_arch(vcpu, parameter);
@@ -375,17 +446,31 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
375 rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, 446 rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
376 &vcpu->run->s.regs.gprs[r1]); 447 &vcpu->run->s.regs.gprs[r1]);
377 break; 448 break;
449 case SIGP_COND_EMERGENCY_SIGNAL:
450 rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
451 &vcpu->run->s.regs.gprs[r1]);
452 break;
378 case SIGP_SENSE_RUNNING: 453 case SIGP_SENSE_RUNNING:
379 vcpu->stat.instruction_sigp_sense_running++; 454 vcpu->stat.instruction_sigp_sense_running++;
380 rc = __sigp_sense_running(vcpu, cpu_addr, 455 rc = __sigp_sense_running(vcpu, cpu_addr,
381 &vcpu->run->s.regs.gprs[r1]); 456 &vcpu->run->s.regs.gprs[r1]);
382 break; 457 break;
458 case SIGP_START:
459 rc = sigp_check_callable(vcpu, cpu_addr);
460 if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
461 rc = -EOPNOTSUPP; /* Handle START in user space */
462 break;
383 case SIGP_RESTART: 463 case SIGP_RESTART:
384 vcpu->stat.instruction_sigp_restart++; 464 vcpu->stat.instruction_sigp_restart++;
385 rc = __sigp_restart(vcpu, cpu_addr); 465 rc = sigp_check_callable(vcpu, cpu_addr);
386 if (rc == SIGP_CC_BUSY) 466 if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
387 break; 467 VCPU_EVENT(vcpu, 4,
388 /* user space must know about restart */ 468 "sigp restart %x to handle userspace",
469 cpu_addr);
470 /* user space must know about restart */
471 rc = -EOPNOTSUPP;
472 }
473 break;
389 default: 474 default:
390 return -EOPNOTSUPP; 475 return -EOPNOTSUPP;
391 } 476 }
@@ -393,7 +478,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
393 if (rc < 0) 478 if (rc < 0)
394 return rc; 479 return rc;
395 480
396 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 481 kvm_s390_set_psw_cc(vcpu, rc);
397 vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
398 return 0; 482 return 0;
399} 483}
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 0c991c6748ab..3db76b2daed7 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity,
175 {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \ 175 {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \
176 {SIGP_SET_ARCHITECTURE, "set architecture"}, \ 176 {SIGP_SET_ARCHITECTURE, "set architecture"}, \
177 {SIGP_SET_PREFIX, "set prefix"}, \ 177 {SIGP_SET_PREFIX, "set prefix"}, \
178 {SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"}, \
178 {SIGP_SENSE_RUNNING, "sense running"}, \ 179 {SIGP_SENSE_RUNNING, "sense running"}, \
179 {SIGP_RESTART, "restart"} 180 {SIGP_RESTART, "restart"}
180 181
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d7830855c..fdf83afbb7d9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -605,6 +605,7 @@ struct kvm_arch {
605 /* fields used by HYPER-V emulation */ 605 /* fields used by HYPER-V emulation */
606 u64 hv_guest_os_id; 606 u64 hv_guest_os_id;
607 u64 hv_hypercall; 607 u64 hv_hypercall;
608 u64 hv_tsc_page;
608 609
609 #ifdef CONFIG_KVM_MMU_AUDIT 610 #ifdef CONFIG_KVM_MMU_AUDIT
610 int audit_point; 611 int audit_point;
@@ -699,6 +700,8 @@ struct kvm_x86_ops {
699 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 700 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
700 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 701 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
701 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 702 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
703 u64 (*get_dr6)(struct kvm_vcpu *vcpu);
704 void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
702 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); 705 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
703 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 706 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
704 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 707 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 966502d4682e..2067264fb7f5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -100,6 +100,7 @@
100 100
101#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f 101#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
102#define VMX_MISC_SAVE_EFER_LMA 0x00000020 102#define VMX_MISC_SAVE_EFER_LMA 0x00000020
103#define VMX_MISC_ACTIVITY_HLT 0x00000040
103 104
104/* VMCS Encodings */ 105/* VMCS Encodings */
105enum vmcs_field { 106enum vmcs_field {
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b8f1c0176cbc..462efe746d77 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -28,6 +28,9 @@
28/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ 28/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
29#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) 29#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
30 30
31/* A partition's reference time stamp counter (TSC) page */
32#define HV_X64_MSR_REFERENCE_TSC 0x40000021
33
31/* 34/*
32 * There is a single feature flag that signifies the presence of the MSR 35 * There is a single feature flag that signifies the presence of the MSR
33 * that can be used to retrieve both the local APIC Timer frequency as 36 * that can be used to retrieve both the local APIC Timer frequency as
@@ -198,6 +201,9 @@
198#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ 201#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
199 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) 202 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
200 203
204#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
205#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
206
201#define HV_PROCESSOR_POWER_STATE_C0 0 207#define HV_PROCESSOR_POWER_STATE_C0 0
202#define HV_PROCESSOR_POWER_STATE_C1 1 208#define HV_PROCESSOR_POWER_STATE_C1 1
203#define HV_PROCESSOR_POWER_STATE_C2 2 209#define HV_PROCESSOR_POWER_STATE_C2 2
@@ -210,4 +216,11 @@
210#define HV_STATUS_INVALID_ALIGNMENT 4 216#define HV_STATUS_INVALID_ALIGNMENT 4
211#define HV_STATUS_INSUFFICIENT_BUFFERS 19 217#define HV_STATUS_INSUFFICIENT_BUFFERS 19
212 218
219typedef struct _HV_REFERENCE_TSC_PAGE {
220 __u32 tsc_sequence;
221 __u32 res1;
222 __u64 tsc_scale;
223 __s64 tsc_offset;
224} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
225
213#endif 226#endif
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 59cea185ad1d..c19fc60ff062 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -528,6 +528,7 @@
528#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e 528#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
529#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f 529#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
530#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 530#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
531#define MSR_IA32_VMX_VMFUNC 0x00000491
531 532
532/* VMX_BASIC bits and bitmasks */ 533/* VMX_BASIC bits and bitmasks */
533#define VMX_BASIC_VMCS_SIZE_SHIFT 32 534#define VMX_BASIC_VMCS_SIZE_SHIFT 32
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db2b832..287e4c85fff9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT
80 depends on KVM && TRACEPOINTS 80 depends on KVM && TRACEPOINTS
81 ---help--- 81 ---help---
82 This option adds a R/W kVM module parameter 'mmu_audit', which allows 82 This option adds a R/W kVM module parameter 'mmu_audit', which allows
83 audit KVM MMU at runtime. 83 auditing of KVM MMU events at runtime.
84 84
85config KVM_DEVICE_ASSIGNMENT 85config KVM_DEVICE_ASSIGNMENT
86 bool "KVM legacy PCI device assignment support" 86 bool "KVM legacy PCI device assignment support"
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 412a5aa0ef94..518d86471b76 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -37,6 +37,7 @@
37 37
38#include "irq.h" 38#include "irq.h"
39#include "i8254.h" 39#include "i8254.h"
40#include "x86.h"
40 41
41#ifndef CONFIG_X86_64 42#ifndef CONFIG_X86_64
42#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 43#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
349 atomic_set(&ps->pending, 0); 350 atomic_set(&ps->pending, 0);
350 ps->irq_ack = 1; 351 ps->irq_ack = 1;
351 352
353 /*
354 * Do not allow the guest to program periodic timers with small
355 * interval, since the hrtimers are not throttled by the host
356 * scheduler.
357 */
358 if (ps->is_periodic) {
359 s64 min_period = min_timer_period_us * 1000LL;
360
361 if (ps->period < min_period) {
362 pr_info_ratelimited(
363 "kvm: requested %lld ns "
364 "i8254 timer period limited to %lld ns\n",
365 ps->period, min_period);
366 ps->period = min_period;
367 }
368 }
369
352 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), 370 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
353 HRTIMER_MODE_ABS); 371 HRTIMER_MODE_ABS);
354} 372}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 775702f649ca..9736529ade08 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,9 +71,6 @@
71#define VEC_POS(v) ((v) & (32 - 1)) 71#define VEC_POS(v) ((v) & (32 - 1))
72#define REG_POS(v) (((v) >> 5) << 4) 72#define REG_POS(v) (((v) >> 5) << 4)
73 73
74static unsigned int min_timer_period_us = 500;
75module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
76
77static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 74static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
78{ 75{
79 *((u32 *) (apic->regs + reg_off)) = val; 76 *((u32 *) (apic->regs + reg_off)) = val;
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
435 u8 val; 432 u8 val;
436 if (pv_eoi_get_user(vcpu, &val) < 0) 433 if (pv_eoi_get_user(vcpu, &val) < 0)
437 apic_debug("Can't read EOI MSR value: 0x%llx\n", 434 apic_debug("Can't read EOI MSR value: 0x%llx\n",
438 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 435 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
439 return val & 0x1; 436 return val & 0x1;
440} 437}
441 438
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
443{ 440{
444 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 441 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
445 apic_debug("Can't set EOI MSR value: 0x%llx\n", 442 apic_debug("Can't set EOI MSR value: 0x%llx\n",
446 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 443 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
447 return; 444 return;
448 } 445 }
449 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 446 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
453{ 450{
454 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 451 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
455 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 452 apic_debug("Can't clear EOI MSR value: 0x%llx\n",
456 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 453 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
457 return; 454 return;
458 } 455 }
459 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef0f2b1..e50425d0f5f7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2659 int emulate = 0; 2659 int emulate = 0;
2660 gfn_t pseudo_gfn; 2660 gfn_t pseudo_gfn;
2661 2661
2662 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2663 return 0;
2664
2662 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2665 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2663 if (iterator.level == level) { 2666 if (iterator.level == level) {
2664 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, 2667 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
@@ -2829,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2829 bool ret = false; 2832 bool ret = false;
2830 u64 spte = 0ull; 2833 u64 spte = 0ull;
2831 2834
2835 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2836 return false;
2837
2832 if (!page_fault_can_be_fast(error_code)) 2838 if (!page_fault_can_be_fast(error_code))
2833 return false; 2839 return false;
2834 2840
@@ -3224,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
3224 struct kvm_shadow_walk_iterator iterator; 3230 struct kvm_shadow_walk_iterator iterator;
3225 u64 spte = 0ull; 3231 u64 spte = 0ull;
3226 3232
3233 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3234 return spte;
3235
3227 walk_shadow_page_lockless_begin(vcpu); 3236 walk_shadow_page_lockless_begin(vcpu);
3228 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) 3237 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
3229 if (!is_shadow_present_pte(spte)) 3238 if (!is_shadow_present_pte(spte))
@@ -4510,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
4510 u64 spte; 4519 u64 spte;
4511 int nr_sptes = 0; 4520 int nr_sptes = 0;
4512 4521
4522 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4523 return nr_sptes;
4524
4513 walk_shadow_page_lockless_begin(vcpu); 4525 walk_shadow_page_lockless_begin(vcpu);
4514 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { 4526 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
4515 sptes[iterator.level-1] = spte; 4527 sptes[iterator.level-1] = spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ad75d77999d0..cba218a2f08d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
569 if (FNAME(gpte_changed)(vcpu, gw, top_level)) 569 if (FNAME(gpte_changed)(vcpu, gw, top_level))
570 goto out_gpte_changed; 570 goto out_gpte_changed;
571 571
572 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
573 goto out_gpte_changed;
574
572 for (shadow_walk_init(&it, vcpu, addr); 575 for (shadow_walk_init(&it, vcpu, addr);
573 shadow_walk_okay(&it) && it.level > gw->level; 576 shadow_walk_okay(&it) && it.level > gw->level;
574 shadow_walk_next(&it)) { 577 shadow_walk_next(&it)) {
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
820 */ 823 */
821 mmu_topup_memory_caches(vcpu); 824 mmu_topup_memory_caches(vcpu);
822 825
826 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
827 WARN_ON(1);
828 return;
829 }
830
823 spin_lock(&vcpu->kvm->mmu_lock); 831 spin_lock(&vcpu->kvm->mmu_lock);
824 for_each_shadow_entry(vcpu, gva, iterator) { 832 for_each_shadow_entry(vcpu, gva, iterator) {
825 level = iterator.level; 833 level = iterator.level;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c7168a5cff1b..e81df8fce027 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1671 mark_dirty(svm->vmcb, VMCB_ASID); 1671 mark_dirty(svm->vmcb, VMCB_ASID);
1672} 1672}
1673 1673
1674static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
1675{
1676 return to_svm(vcpu)->vmcb->save.dr6;
1677}
1678
1679static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1680{
1681 struct vcpu_svm *svm = to_svm(vcpu);
1682
1683 svm->vmcb->save.dr6 = value;
1684 mark_dirty(svm->vmcb, VMCB_DR);
1685}
1686
1674static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1687static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1675{ 1688{
1676 struct vcpu_svm *svm = to_svm(vcpu); 1689 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = {
4286 .set_idt = svm_set_idt, 4299 .set_idt = svm_set_idt,
4287 .get_gdt = svm_get_gdt, 4300 .get_gdt = svm_get_gdt,
4288 .set_gdt = svm_set_gdt, 4301 .set_gdt = svm_set_gdt,
4302 .get_dr6 = svm_get_dr6,
4303 .set_dr6 = svm_set_dr6,
4289 .set_dr7 = svm_set_dr7, 4304 .set_dr7 = svm_set_dr7,
4290 .cache_reg = svm_cache_reg, 4305 .cache_reg = svm_cache_reg,
4291 .get_rflags = svm_get_rflags, 4306 .get_rflags = svm_get_rflags,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index da7837e1349d..5c8879127cfa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,8 @@ struct vcpu_vmx {
418 u64 msr_host_kernel_gs_base; 418 u64 msr_host_kernel_gs_base;
419 u64 msr_guest_kernel_gs_base; 419 u64 msr_guest_kernel_gs_base;
420#endif 420#endif
421 u32 vm_entry_controls_shadow;
422 u32 vm_exit_controls_shadow;
421 /* 423 /*
422 * loaded_vmcs points to the VMCS currently used in this vcpu. For a 424 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
423 * non-nested (L1) guest, it always points to vmcs01. For a nested 425 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -1056,7 +1058,9 @@ static inline bool is_exception(u32 intr_info)
1056 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); 1058 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
1057} 1059}
1058 1060
1059static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); 1061static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1062 u32 exit_intr_info,
1063 unsigned long exit_qualification);
1060static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, 1064static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1061 struct vmcs12 *vmcs12, 1065 struct vmcs12 *vmcs12,
1062 u32 reason, unsigned long qualification); 1066 u32 reason, unsigned long qualification);
@@ -1326,6 +1330,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
1326 vmcs_writel(field, vmcs_readl(field) | mask); 1330 vmcs_writel(field, vmcs_readl(field) | mask);
1327} 1331}
1328 1332
1333static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
1334{
1335 vmcs_write32(VM_ENTRY_CONTROLS, val);
1336 vmx->vm_entry_controls_shadow = val;
1337}
1338
1339static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
1340{
1341 if (vmx->vm_entry_controls_shadow != val)
1342 vm_entry_controls_init(vmx, val);
1343}
1344
1345static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
1346{
1347 return vmx->vm_entry_controls_shadow;
1348}
1349
1350
1351static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1352{
1353 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
1354}
1355
1356static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1357{
1358 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
1359}
1360
1361static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
1362{
1363 vmcs_write32(VM_EXIT_CONTROLS, val);
1364 vmx->vm_exit_controls_shadow = val;
1365}
1366
1367static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
1368{
1369 if (vmx->vm_exit_controls_shadow != val)
1370 vm_exit_controls_init(vmx, val);
1371}
1372
1373static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
1374{
1375 return vmx->vm_exit_controls_shadow;
1376}
1377
1378
1379static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1380{
1381 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
1382}
1383
1384static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1385{
1386 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
1387}
1388
1329static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 1389static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
1330{ 1390{
1331 vmx->segment_cache.bitmask = 0; 1391 vmx->segment_cache.bitmask = 0;
@@ -1410,11 +1470,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1410 vmcs_write32(EXCEPTION_BITMAP, eb); 1470 vmcs_write32(EXCEPTION_BITMAP, eb);
1411} 1471}
1412 1472
1413static void clear_atomic_switch_msr_special(unsigned long entry, 1473static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1414 unsigned long exit) 1474 unsigned long entry, unsigned long exit)
1415{ 1475{
1416 vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); 1476 vm_entry_controls_clearbit(vmx, entry);
1417 vmcs_clear_bits(VM_EXIT_CONTROLS, exit); 1477 vm_exit_controls_clearbit(vmx, exit);
1418} 1478}
1419 1479
1420static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 1480static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
@@ -1425,14 +1485,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1425 switch (msr) { 1485 switch (msr) {
1426 case MSR_EFER: 1486 case MSR_EFER:
1427 if (cpu_has_load_ia32_efer) { 1487 if (cpu_has_load_ia32_efer) {
1428 clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1488 clear_atomic_switch_msr_special(vmx,
1489 VM_ENTRY_LOAD_IA32_EFER,
1429 VM_EXIT_LOAD_IA32_EFER); 1490 VM_EXIT_LOAD_IA32_EFER);
1430 return; 1491 return;
1431 } 1492 }
1432 break; 1493 break;
1433 case MSR_CORE_PERF_GLOBAL_CTRL: 1494 case MSR_CORE_PERF_GLOBAL_CTRL:
1434 if (cpu_has_load_perf_global_ctrl) { 1495 if (cpu_has_load_perf_global_ctrl) {
1435 clear_atomic_switch_msr_special( 1496 clear_atomic_switch_msr_special(vmx,
1436 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1497 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1437 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); 1498 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1438 return; 1499 return;
@@ -1453,14 +1514,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1453 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); 1514 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1454} 1515}
1455 1516
1456static void add_atomic_switch_msr_special(unsigned long entry, 1517static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1457 unsigned long exit, unsigned long guest_val_vmcs, 1518 unsigned long entry, unsigned long exit,
1458 unsigned long host_val_vmcs, u64 guest_val, u64 host_val) 1519 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1520 u64 guest_val, u64 host_val)
1459{ 1521{
1460 vmcs_write64(guest_val_vmcs, guest_val); 1522 vmcs_write64(guest_val_vmcs, guest_val);
1461 vmcs_write64(host_val_vmcs, host_val); 1523 vmcs_write64(host_val_vmcs, host_val);
1462 vmcs_set_bits(VM_ENTRY_CONTROLS, entry); 1524 vm_entry_controls_setbit(vmx, entry);
1463 vmcs_set_bits(VM_EXIT_CONTROLS, exit); 1525 vm_exit_controls_setbit(vmx, exit);
1464} 1526}
1465 1527
1466static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 1528static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
@@ -1472,7 +1534,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1472 switch (msr) { 1534 switch (msr) {
1473 case MSR_EFER: 1535 case MSR_EFER:
1474 if (cpu_has_load_ia32_efer) { 1536 if (cpu_has_load_ia32_efer) {
1475 add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1537 add_atomic_switch_msr_special(vmx,
1538 VM_ENTRY_LOAD_IA32_EFER,
1476 VM_EXIT_LOAD_IA32_EFER, 1539 VM_EXIT_LOAD_IA32_EFER,
1477 GUEST_IA32_EFER, 1540 GUEST_IA32_EFER,
1478 HOST_IA32_EFER, 1541 HOST_IA32_EFER,
@@ -1482,7 +1545,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1482 break; 1545 break;
1483 case MSR_CORE_PERF_GLOBAL_CTRL: 1546 case MSR_CORE_PERF_GLOBAL_CTRL:
1484 if (cpu_has_load_perf_global_ctrl) { 1547 if (cpu_has_load_perf_global_ctrl) {
1485 add_atomic_switch_msr_special( 1548 add_atomic_switch_msr_special(vmx,
1486 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1549 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1487 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, 1550 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1488 GUEST_IA32_PERF_GLOBAL_CTRL, 1551 GUEST_IA32_PERF_GLOBAL_CTRL,
@@ -1906,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906 if (!(vmcs12->exception_bitmap & (1u << nr))) 1969 if (!(vmcs12->exception_bitmap & (1u << nr)))
1907 return 0; 1970 return 0;
1908 1971
1909 nested_vmx_vmexit(vcpu); 1972 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
1973 vmcs_read32(VM_EXIT_INTR_INFO),
1974 vmcs_readl(EXIT_QUALIFICATION));
1910 return 1; 1975 return 1;
1911} 1976}
1912 1977
@@ -2279,6 +2344,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2279 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); 2344 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
2280 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | 2345 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
2281 VMX_MISC_SAVE_EFER_LMA; 2346 VMX_MISC_SAVE_EFER_LMA;
2347 nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
2282 nested_vmx_misc_high = 0; 2348 nested_vmx_misc_high = 0;
2283} 2349}
2284 2350
@@ -2295,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
2295 return low | ((u64)high << 32); 2361 return low | ((u64)high << 32);
2296} 2362}
2297 2363
2298/* 2364/* Returns 0 on success, non-0 otherwise. */
2299 * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
2300 * also let it use VMX-specific MSRs.
2301 * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
2302 * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
2303 * like all other MSRs).
2304 */
2305static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 2365static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2306{ 2366{
2307 if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
2308 msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
2309 /*
2310 * According to the spec, processors which do not support VMX
2311 * should throw a #GP(0) when VMX capability MSRs are read.
2312 */
2313 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
2314 return 1;
2315 }
2316
2317 switch (msr_index) { 2367 switch (msr_index) {
2318 case MSR_IA32_FEATURE_CONTROL:
2319 if (nested_vmx_allowed(vcpu)) {
2320 *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2321 break;
2322 }
2323 return 0;
2324 case MSR_IA32_VMX_BASIC: 2368 case MSR_IA32_VMX_BASIC:
2325 /* 2369 /*
2326 * This MSR reports some information about VMX support. We 2370 * This MSR reports some information about VMX support. We
@@ -2387,34 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2387 *pdata = nested_vmx_ept_caps; 2431 *pdata = nested_vmx_ept_caps;
2388 break; 2432 break;
2389 default: 2433 default:
2390 return 0;
2391 }
2392
2393 return 1;
2394}
2395
2396static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2397{
2398 u32 msr_index = msr_info->index;
2399 u64 data = msr_info->data;
2400 bool host_initialized = msr_info->host_initiated;
2401
2402 if (!nested_vmx_allowed(vcpu))
2403 return 0;
2404
2405 if (msr_index == MSR_IA32_FEATURE_CONTROL) {
2406 if (!host_initialized &&
2407 to_vmx(vcpu)->nested.msr_ia32_feature_control
2408 & FEATURE_CONTROL_LOCKED)
2409 return 0;
2410 to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
2411 return 1; 2434 return 1;
2412 } 2435 }
2413 2436
2414 /*
2415 * No need to treat VMX capability MSRs specially: If we don't handle
2416 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
2417 */
2418 return 0; 2437 return 0;
2419} 2438}
2420 2439
@@ -2460,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2460 case MSR_IA32_SYSENTER_ESP: 2479 case MSR_IA32_SYSENTER_ESP:
2461 data = vmcs_readl(GUEST_SYSENTER_ESP); 2480 data = vmcs_readl(GUEST_SYSENTER_ESP);
2462 break; 2481 break;
2482 case MSR_IA32_FEATURE_CONTROL:
2483 if (!nested_vmx_allowed(vcpu))
2484 return 1;
2485 data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2486 break;
2487 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2488 if (!nested_vmx_allowed(vcpu))
2489 return 1;
2490 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
2463 case MSR_TSC_AUX: 2491 case MSR_TSC_AUX:
2464 if (!to_vmx(vcpu)->rdtscp_enabled) 2492 if (!to_vmx(vcpu)->rdtscp_enabled)
2465 return 1; 2493 return 1;
2466 /* Otherwise falls through */ 2494 /* Otherwise falls through */
2467 default: 2495 default:
2468 if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
2469 return 0;
2470 msr = find_msr_entry(to_vmx(vcpu), msr_index); 2496 msr = find_msr_entry(to_vmx(vcpu), msr_index);
2471 if (msr) { 2497 if (msr) {
2472 data = msr->data; 2498 data = msr->data;
@@ -2479,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2479 return 0; 2505 return 0;
2480} 2506}
2481 2507
2508static void vmx_leave_nested(struct kvm_vcpu *vcpu);
2509
2482/* 2510/*
2483 * Writes msr value into into the appropriate "register". 2511 * Writes msr value into into the appropriate "register".
2484 * Returns 0 on success, non-0 otherwise. 2512 * Returns 0 on success, non-0 otherwise.
@@ -2533,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2533 case MSR_IA32_TSC_ADJUST: 2561 case MSR_IA32_TSC_ADJUST:
2534 ret = kvm_set_msr_common(vcpu, msr_info); 2562 ret = kvm_set_msr_common(vcpu, msr_info);
2535 break; 2563 break;
2564 case MSR_IA32_FEATURE_CONTROL:
2565 if (!nested_vmx_allowed(vcpu) ||
2566 (to_vmx(vcpu)->nested.msr_ia32_feature_control &
2567 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2568 return 1;
2569 vmx->nested.msr_ia32_feature_control = data;
2570 if (msr_info->host_initiated && data == 0)
2571 vmx_leave_nested(vcpu);
2572 break;
2573 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2574 return 1; /* they are read-only */
2536 case MSR_TSC_AUX: 2575 case MSR_TSC_AUX:
2537 if (!vmx->rdtscp_enabled) 2576 if (!vmx->rdtscp_enabled)
2538 return 1; 2577 return 1;
@@ -2541,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2541 return 1; 2580 return 1;
2542 /* Otherwise falls through */ 2581 /* Otherwise falls through */
2543 default: 2582 default:
2544 if (vmx_set_vmx_msr(vcpu, msr_info))
2545 break;
2546 msr = find_msr_entry(vmx, msr_index); 2583 msr = find_msr_entry(vmx, msr_index);
2547 if (msr) { 2584 if (msr) {
2548 msr->data = data; 2585 msr->data = data;
@@ -3182,14 +3219,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3182 vmx_load_host_state(to_vmx(vcpu)); 3219 vmx_load_host_state(to_vmx(vcpu));
3183 vcpu->arch.efer = efer; 3220 vcpu->arch.efer = efer;
3184 if (efer & EFER_LMA) { 3221 if (efer & EFER_LMA) {
3185 vmcs_write32(VM_ENTRY_CONTROLS, 3222 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3186 vmcs_read32(VM_ENTRY_CONTROLS) |
3187 VM_ENTRY_IA32E_MODE);
3188 msr->data = efer; 3223 msr->data = efer;
3189 } else { 3224 } else {
3190 vmcs_write32(VM_ENTRY_CONTROLS, 3225 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3191 vmcs_read32(VM_ENTRY_CONTROLS) &
3192 ~VM_ENTRY_IA32E_MODE);
3193 3226
3194 msr->data = efer & ~EFER_LME; 3227 msr->data = efer & ~EFER_LME;
3195 } 3228 }
@@ -3217,9 +3250,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
3217 3250
3218static void exit_lmode(struct kvm_vcpu *vcpu) 3251static void exit_lmode(struct kvm_vcpu *vcpu)
3219{ 3252{
3220 vmcs_write32(VM_ENTRY_CONTROLS, 3253 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3221 vmcs_read32(VM_ENTRY_CONTROLS)
3222 & ~VM_ENTRY_IA32E_MODE);
3223 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); 3254 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3224} 3255}
3225 3256
@@ -4346,10 +4377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4346 ++vmx->nmsrs; 4377 ++vmx->nmsrs;
4347 } 4378 }
4348 4379
4349 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 4380
4381 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
4350 4382
4351 /* 22.2.1, 20.8.1 */ 4383 /* 22.2.1, 20.8.1 */
4352 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 4384 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
4353 4385
4354 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 4386 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4355 set_cr4_guest_host_mask(vmx); 4387 set_cr4_guest_host_mask(vmx);
@@ -4588,15 +4620,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4588static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4620static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4589{ 4621{
4590 if (is_guest_mode(vcpu)) { 4622 if (is_guest_mode(vcpu)) {
4591 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4592
4593 if (to_vmx(vcpu)->nested.nested_run_pending) 4623 if (to_vmx(vcpu)->nested.nested_run_pending)
4594 return 0; 4624 return 0;
4595 if (nested_exit_on_nmi(vcpu)) { 4625 if (nested_exit_on_nmi(vcpu)) {
4596 nested_vmx_vmexit(vcpu); 4626 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4597 vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; 4627 NMI_VECTOR | INTR_TYPE_NMI_INTR |
4598 vmcs12->vm_exit_intr_info = NMI_VECTOR | 4628 INTR_INFO_VALID_MASK, 0);
4599 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
4600 /* 4629 /*
4601 * The NMI-triggered VM exit counts as injection: 4630 * The NMI-triggered VM exit counts as injection:
4602 * clear this one and block further NMIs. 4631 * clear this one and block further NMIs.
@@ -4618,15 +4647,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4618static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4647static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4619{ 4648{
4620 if (is_guest_mode(vcpu)) { 4649 if (is_guest_mode(vcpu)) {
4621 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4622
4623 if (to_vmx(vcpu)->nested.nested_run_pending) 4650 if (to_vmx(vcpu)->nested.nested_run_pending)
4624 return 0; 4651 return 0;
4625 if (nested_exit_on_intr(vcpu)) { 4652 if (nested_exit_on_intr(vcpu)) {
4626 nested_vmx_vmexit(vcpu); 4653 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
4627 vmcs12->vm_exit_reason = 4654 0, 0);
4628 EXIT_REASON_EXTERNAL_INTERRUPT;
4629 vmcs12->vm_exit_intr_info = 0;
4630 /* 4655 /*
4631 * fall through to normal code, but now in L1, not L2 4656 * fall through to normal code, but now in L1, not L2
4632 */ 4657 */
@@ -4812,7 +4837,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4812 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4837 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4813 if (!(vcpu->guest_debug & 4838 if (!(vcpu->guest_debug &
4814 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4839 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4815 vcpu->arch.dr6 = dr6 | DR6_FIXED_1; 4840 vcpu->arch.dr6 &= ~15;
4841 vcpu->arch.dr6 |= dr6;
4816 kvm_queue_exception(vcpu, DB_VECTOR); 4842 kvm_queue_exception(vcpu, DB_VECTOR);
4817 return 1; 4843 return 1;
4818 } 4844 }
@@ -5080,14 +5106,27 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5080 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5106 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5081 if (exit_qualification & TYPE_MOV_FROM_DR) { 5107 if (exit_qualification & TYPE_MOV_FROM_DR) {
5082 unsigned long val; 5108 unsigned long val;
5083 if (!kvm_get_dr(vcpu, dr, &val)) 5109
5084 kvm_register_write(vcpu, reg, val); 5110 if (kvm_get_dr(vcpu, dr, &val))
5111 return 1;
5112 kvm_register_write(vcpu, reg, val);
5085 } else 5113 } else
5086 kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); 5114 if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
5115 return 1;
5116
5087 skip_emulated_instruction(vcpu); 5117 skip_emulated_instruction(vcpu);
5088 return 1; 5118 return 1;
5089} 5119}
5090 5120
5121static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
5122{
5123 return vcpu->arch.dr6;
5124}
5125
5126static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5127{
5128}
5129
5091static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5130static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5092{ 5131{
5093 vmcs_writel(GUEST_DR7, val); 5132 vmcs_writel(GUEST_DR7, val);
@@ -6460,11 +6499,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
6460 int size; 6499 int size;
6461 u8 b; 6500 u8 b;
6462 6501
6463 if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
6464 return 1;
6465
6466 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 6502 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
6467 return 0; 6503 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
6468 6504
6469 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6505 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6470 6506
@@ -6628,6 +6664,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6628 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 6664 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6629 u32 exit_reason = vmx->exit_reason; 6665 u32 exit_reason = vmx->exit_reason;
6630 6666
6667 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
6668 vmcs_readl(EXIT_QUALIFICATION),
6669 vmx->idt_vectoring_info,
6670 intr_info,
6671 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6672 KVM_ISA_VMX);
6673
6631 if (vmx->nested.nested_run_pending) 6674 if (vmx->nested.nested_run_pending)
6632 return 0; 6675 return 0;
6633 6676
@@ -6777,7 +6820,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6777 return handle_invalid_guest_state(vcpu); 6820 return handle_invalid_guest_state(vcpu);
6778 6821
6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6822 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6780 nested_vmx_vmexit(vcpu); 6823 nested_vmx_vmexit(vcpu, exit_reason,
6824 vmcs_read32(VM_EXIT_INTR_INFO),
6825 vmcs_readl(EXIT_QUALIFICATION));
6781 return 1; 6826 return 1;
6782 } 6827 }
6783 6828
@@ -7332,8 +7377,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7332 struct vcpu_vmx *vmx = to_vmx(vcpu); 7377 struct vcpu_vmx *vmx = to_vmx(vcpu);
7333 7378
7334 free_vpid(vmx); 7379 free_vpid(vmx);
7335 free_nested(vmx);
7336 free_loaded_vmcs(vmx->loaded_vmcs); 7380 free_loaded_vmcs(vmx->loaded_vmcs);
7381 free_nested(vmx);
7337 kfree(vmx->guest_msrs); 7382 kfree(vmx->guest_msrs);
7338 kvm_vcpu_uninit(vcpu); 7383 kvm_vcpu_uninit(vcpu);
7339 kmem_cache_free(kvm_vcpu_cache, vmx); 7384 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -7518,15 +7563,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7518static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, 7563static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
7519 struct x86_exception *fault) 7564 struct x86_exception *fault)
7520{ 7565{
7521 struct vmcs12 *vmcs12; 7566 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7522 nested_vmx_vmexit(vcpu); 7567 u32 exit_reason;
7523 vmcs12 = get_vmcs12(vcpu);
7524 7568
7525 if (fault->error_code & PFERR_RSVD_MASK) 7569 if (fault->error_code & PFERR_RSVD_MASK)
7526 vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 7570 exit_reason = EXIT_REASON_EPT_MISCONFIG;
7527 else 7571 else
7528 vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 7572 exit_reason = EXIT_REASON_EPT_VIOLATION;
7529 vmcs12->exit_qualification = vcpu->arch.exit_qualification; 7573 nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
7530 vmcs12->guest_physical_address = fault->address; 7574 vmcs12->guest_physical_address = fault->address;
7531} 7575}
7532 7576
@@ -7564,7 +7608,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7564 7608
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 7609 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) 7610 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu); 7611 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
7612 vmcs_read32(VM_EXIT_INTR_INFO),
7613 vmcs_readl(EXIT_QUALIFICATION));
7568 else 7614 else
7569 kvm_inject_page_fault(vcpu, fault); 7615 kvm_inject_page_fault(vcpu, fault);
7570} 7616}
@@ -7706,6 +7752,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 else 7752 else
7707 vmcs_write64(APIC_ACCESS_ADDR, 7753 vmcs_write64(APIC_ACCESS_ADDR,
7708 page_to_phys(vmx->nested.apic_access_page)); 7754 page_to_phys(vmx->nested.apic_access_page));
7755 } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
7756 exec_control |=
7757 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
7758 vmcs_write64(APIC_ACCESS_ADDR,
7759 page_to_phys(vcpu->kvm->arch.apic_access_page));
7709 } 7760 }
7710 7761
7711 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 7762 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -7759,12 +7810,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7759 exit_control = vmcs_config.vmexit_ctrl; 7810 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7811 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 7812 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control); 7813 vm_exit_controls_init(vmx, exit_control);
7763 7814
7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7815 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7765 * emulated by vmx_set_efer(), below. 7816 * emulated by vmx_set_efer(), below.
7766 */ 7817 */
7767 vmcs_write32(VM_ENTRY_CONTROLS, 7818 vm_entry_controls_init(vmx,
7768 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & 7819 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
7769 ~VM_ENTRY_IA32E_MODE) | 7820 ~VM_ENTRY_IA32E_MODE) |
7770 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); 7821 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -7882,7 +7933,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7882 return 1; 7933 return 1;
7883 } 7934 }
7884 7935
7885 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { 7936 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
7937 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
7886 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 7938 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
7887 return 1; 7939 return 1;
7888 } 7940 }
@@ -7994,8 +8046,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7994 8046
7995 enter_guest_mode(vcpu); 8047 enter_guest_mode(vcpu);
7996 8048
7997 vmx->nested.nested_run_pending = 1;
7998
7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 8049 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
8000 8050
8001 cpu = get_cpu(); 8051 cpu = get_cpu();
@@ -8011,6 +8061,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8011 8061
8012 prepare_vmcs02(vcpu, vmcs12); 8062 prepare_vmcs02(vcpu, vmcs12);
8013 8063
8064 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
8065 return kvm_emulate_halt(vcpu);
8066
8067 vmx->nested.nested_run_pending = 1;
8068
8014 /* 8069 /*
8015 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 8070 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
8016 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 8071 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -8110,7 +8165,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8110 * exit-information fields only. Other fields are modified by L1 with VMWRITE, 8165 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
8111 * which already writes to vmcs12 directly. 8166 * which already writes to vmcs12 directly.
8112 */ 8167 */
8113static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 8168static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8169 u32 exit_reason, u32 exit_intr_info,
8170 unsigned long exit_qualification)
8114{ 8171{
8115 /* update guest state fields: */ 8172 /* update guest state fields: */
8116 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 8173 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -8162,6 +8219,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8162 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 8219 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
8163 vmcs12->guest_pending_dbg_exceptions = 8220 vmcs12->guest_pending_dbg_exceptions =
8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8221 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8222 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
8223 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
8224 else
8225 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
8165 8226
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8227 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8228 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
@@ -8186,7 +8247,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8186 8247
8187 vmcs12->vm_entry_controls = 8248 vmcs12->vm_entry_controls =
8188 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 8249 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
8189 (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); 8250 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
8190 8251
8191 /* TODO: These cannot have changed unless we have MSR bitmaps and 8252 /* TODO: These cannot have changed unless we have MSR bitmaps and
8192 * the relevant bit asks not to trap the change */ 8253 * the relevant bit asks not to trap the change */
@@ -8201,10 +8262,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8201 8262
8202 /* update exit information fields: */ 8263 /* update exit information fields: */
8203 8264
8204 vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; 8265 vmcs12->vm_exit_reason = exit_reason;
8205 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 8266 vmcs12->exit_qualification = exit_qualification;
8206 8267
8207 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8268 vmcs12->vm_exit_intr_info = exit_intr_info;
8208 if ((vmcs12->vm_exit_intr_info & 8269 if ((vmcs12->vm_exit_intr_info &
8209 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == 8270 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
8210 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) 8271 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
@@ -8370,7 +8431,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8370 * and modify vmcs12 to make it see what it would expect to see there if 8431 * and modify vmcs12 to make it see what it would expect to see there if
8371 * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) 8432 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
8372 */ 8433 */
8373static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) 8434static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8435 u32 exit_intr_info,
8436 unsigned long exit_qualification)
8374{ 8437{
8375 struct vcpu_vmx *vmx = to_vmx(vcpu); 8438 struct vcpu_vmx *vmx = to_vmx(vcpu);
8376 int cpu; 8439 int cpu;
@@ -8380,7 +8443,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8380 WARN_ON_ONCE(vmx->nested.nested_run_pending); 8443 WARN_ON_ONCE(vmx->nested.nested_run_pending);
8381 8444
8382 leave_guest_mode(vcpu); 8445 leave_guest_mode(vcpu);
8383 prepare_vmcs12(vcpu, vmcs12); 8446 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
8447 exit_qualification);
8448
8449 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
8450 vmcs12->exit_qualification,
8451 vmcs12->idt_vectoring_info_field,
8452 vmcs12->vm_exit_intr_info,
8453 vmcs12->vm_exit_intr_error_code,
8454 KVM_ISA_VMX);
8384 8455
8385 cpu = get_cpu(); 8456 cpu = get_cpu();
8386 vmx->loaded_vmcs = &vmx->vmcs01; 8457 vmx->loaded_vmcs = &vmx->vmcs01;
@@ -8389,6 +8460,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8389 vcpu->cpu = cpu; 8460 vcpu->cpu = cpu;
8390 put_cpu(); 8461 put_cpu();
8391 8462
8463 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8464 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
8392 vmx_segment_cache_clear(vmx); 8465 vmx_segment_cache_clear(vmx);
8393 8466
8394 /* if no vmcs02 cache requested, remove the one we used */ 8467 /* if no vmcs02 cache requested, remove the one we used */
@@ -8424,6 +8497,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8424} 8497}
8425 8498
8426/* 8499/*
8500 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
8501 */
8502static void vmx_leave_nested(struct kvm_vcpu *vcpu)
8503{
8504 if (is_guest_mode(vcpu))
8505 nested_vmx_vmexit(vcpu, -1, 0, 0);
8506 free_nested(to_vmx(vcpu));
8507}
8508
8509/*
8427 * L1's failure to enter L2 is a subset of a normal exit, as explained in 8510 * L1's failure to enter L2 is a subset of a normal exit, as explained in
8428 * 23.7 "VM-entry failures during or after loading guest state" (this also 8511 * 23.7 "VM-entry failures during or after loading guest state" (this also
8429 * lists the acceptable exit-reason and exit-qualification parameters). 8512 * lists the acceptable exit-reason and exit-qualification parameters).
@@ -8486,6 +8569,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
8486 .set_idt = vmx_set_idt, 8569 .set_idt = vmx_set_idt,
8487 .get_gdt = vmx_get_gdt, 8570 .get_gdt = vmx_get_gdt,
8488 .set_gdt = vmx_set_gdt, 8571 .set_gdt = vmx_set_gdt,
8572 .get_dr6 = vmx_get_dr6,
8573 .set_dr6 = vmx_set_dr6,
8489 .set_dr7 = vmx_set_dr7, 8574 .set_dr7 = vmx_set_dr7,
8490 .cache_reg = vmx_cache_reg, 8575 .cache_reg = vmx_cache_reg,
8491 .get_rflags = vmx_get_rflags, 8576 .get_rflags = vmx_get_rflags,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d004da1e35d..0c76f7cfdb32 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
94static bool ignore_msrs = 0; 94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96 96
97unsigned int min_timer_period_us = 500;
98module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
99
97bool kvm_has_tsc_control; 100bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control); 101EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz; 102u32 kvm_max_guest_tsc_khz;
@@ -719,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
719} 722}
720EXPORT_SYMBOL_GPL(kvm_get_cr8); 723EXPORT_SYMBOL_GPL(kvm_get_cr8);
721 724
725static void kvm_update_dr6(struct kvm_vcpu *vcpu)
726{
727 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
728 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
729}
730
722static void kvm_update_dr7(struct kvm_vcpu *vcpu) 731static void kvm_update_dr7(struct kvm_vcpu *vcpu)
723{ 732{
724 unsigned long dr7; 733 unsigned long dr7;
@@ -747,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
747 if (val & 0xffffffff00000000ULL) 756 if (val & 0xffffffff00000000ULL)
748 return -1; /* #GP */ 757 return -1; /* #GP */
749 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 758 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
759 kvm_update_dr6(vcpu);
750 break; 760 break;
751 case 5: 761 case 5:
752 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 762 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -788,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
788 return 1; 798 return 1;
789 /* fall through */ 799 /* fall through */
790 case 6: 800 case 6:
791 *val = vcpu->arch.dr6; 801 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
802 *val = vcpu->arch.dr6;
803 else
804 *val = kvm_x86_ops->get_dr6(vcpu);
792 break; 805 break;
793 case 5: 806 case 5:
794 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 807 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -836,11 +849,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
836 * kvm-specific. Those are put in the beginning of the list. 849 * kvm-specific. Those are put in the beginning of the list.
837 */ 850 */
838 851
839#define KVM_SAVE_MSRS_BEGIN 10 852#define KVM_SAVE_MSRS_BEGIN 12
840static u32 msrs_to_save[] = { 853static u32 msrs_to_save[] = {
841 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 854 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
842 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 855 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
843 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 856 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
857 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
844 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 858 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
845 MSR_KVM_PV_EOI_EN, 859 MSR_KVM_PV_EOI_EN,
846 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 860 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1275,8 +1289,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1275 kvm->arch.last_tsc_write = data; 1289 kvm->arch.last_tsc_write = data;
1276 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; 1290 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1277 1291
1278 /* Reset of TSC must disable overshoot protection below */
1279 vcpu->arch.hv_clock.tsc_timestamp = 0;
1280 vcpu->arch.last_guest_tsc = data; 1292 vcpu->arch.last_guest_tsc = data;
1281 1293
1282 /* Keep track of which generation this VCPU has synchronized to */ 1294 /* Keep track of which generation this VCPU has synchronized to */
@@ -1484,7 +1496,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1484 unsigned long flags, this_tsc_khz; 1496 unsigned long flags, this_tsc_khz;
1485 struct kvm_vcpu_arch *vcpu = &v->arch; 1497 struct kvm_vcpu_arch *vcpu = &v->arch;
1486 struct kvm_arch *ka = &v->kvm->arch; 1498 struct kvm_arch *ka = &v->kvm->arch;
1487 s64 kernel_ns, max_kernel_ns; 1499 s64 kernel_ns;
1488 u64 tsc_timestamp, host_tsc; 1500 u64 tsc_timestamp, host_tsc;
1489 struct pvclock_vcpu_time_info guest_hv_clock; 1501 struct pvclock_vcpu_time_info guest_hv_clock;
1490 u8 pvclock_flags; 1502 u8 pvclock_flags;
@@ -1543,37 +1555,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1543 if (!vcpu->pv_time_enabled) 1555 if (!vcpu->pv_time_enabled)
1544 return 0; 1556 return 0;
1545 1557
1546 /*
1547 * Time as measured by the TSC may go backwards when resetting the base
1548 * tsc_timestamp. The reason for this is that the TSC resolution is
1549 * higher than the resolution of the other clock scales. Thus, many
1550 * possible measurments of the TSC correspond to one measurement of any
1551 * other clock, and so a spread of values is possible. This is not a
1552 * problem for the computation of the nanosecond clock; with TSC rates
1553 * around 1GHZ, there can only be a few cycles which correspond to one
1554 * nanosecond value, and any path through this code will inevitably
1555 * take longer than that. However, with the kernel_ns value itself,
1556 * the precision may be much lower, down to HZ granularity. If the
1557 * first sampling of TSC against kernel_ns ends in the low part of the
1558 * range, and the second in the high end of the range, we can get:
1559 *
1560 * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
1561 *
1562 * As the sampling errors potentially range in the thousands of cycles,
1563 * it is possible such a time value has already been observed by the
1564 * guest. To protect against this, we must compute the system time as
1565 * observed by the guest and ensure the new system time is greater.
1566 */
1567 max_kernel_ns = 0;
1568 if (vcpu->hv_clock.tsc_timestamp) {
1569 max_kernel_ns = vcpu->last_guest_tsc -
1570 vcpu->hv_clock.tsc_timestamp;
1571 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1572 vcpu->hv_clock.tsc_to_system_mul,
1573 vcpu->hv_clock.tsc_shift);
1574 max_kernel_ns += vcpu->last_kernel_ns;
1575 }
1576
1577 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { 1558 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1578 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, 1559 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1579 &vcpu->hv_clock.tsc_shift, 1560 &vcpu->hv_clock.tsc_shift,
@@ -1581,14 +1562,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1581 vcpu->hw_tsc_khz = this_tsc_khz; 1562 vcpu->hw_tsc_khz = this_tsc_khz;
1582 } 1563 }
1583 1564
1584 /* with a master <monotonic time, tsc value> tuple,
1585 * pvclock clock reads always increase at the (scaled) rate
1586 * of guest TSC - no need to deal with sampling errors.
1587 */
1588 if (!use_master_clock) {
1589 if (max_kernel_ns > kernel_ns)
1590 kernel_ns = max_kernel_ns;
1591 }
1592 /* With all the info we got, fill in the values */ 1565 /* With all the info we got, fill in the values */
1593 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1566 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1594 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1567 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1826,6 +1799,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
1826 switch (msr) { 1799 switch (msr) {
1827 case HV_X64_MSR_GUEST_OS_ID: 1800 case HV_X64_MSR_GUEST_OS_ID:
1828 case HV_X64_MSR_HYPERCALL: 1801 case HV_X64_MSR_HYPERCALL:
1802 case HV_X64_MSR_REFERENCE_TSC:
1803 case HV_X64_MSR_TIME_REF_COUNT:
1829 r = true; 1804 r = true;
1830 break; 1805 break;
1831 } 1806 }
@@ -1867,6 +1842,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1867 kvm->arch.hv_hypercall = data; 1842 kvm->arch.hv_hypercall = data;
1868 break; 1843 break;
1869 } 1844 }
1845 case HV_X64_MSR_REFERENCE_TSC: {
1846 u64 gfn;
1847 HV_REFERENCE_TSC_PAGE tsc_ref;
1848 memset(&tsc_ref, 0, sizeof(tsc_ref));
1849 kvm->arch.hv_tsc_page = data;
1850 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1851 break;
1852 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1853 if (kvm_write_guest(kvm, data,
1854 &tsc_ref, sizeof(tsc_ref)))
1855 return 1;
1856 mark_page_dirty(kvm, gfn);
1857 break;
1858 }
1870 default: 1859 default:
1871 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1860 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1872 "data 0x%llx\n", msr, data); 1861 "data 0x%llx\n", msr, data);
@@ -2291,6 +2280,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2291 case HV_X64_MSR_HYPERCALL: 2280 case HV_X64_MSR_HYPERCALL:
2292 data = kvm->arch.hv_hypercall; 2281 data = kvm->arch.hv_hypercall;
2293 break; 2282 break;
2283 case HV_X64_MSR_TIME_REF_COUNT: {
2284 data =
2285 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2286 break;
2287 }
2288 case HV_X64_MSR_REFERENCE_TSC:
2289 data = kvm->arch.hv_tsc_page;
2290 break;
2294 default: 2291 default:
2295 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 2292 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2296 return 1; 2293 return 1;
@@ -2604,6 +2601,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2604#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2601#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2605 case KVM_CAP_ASSIGN_DEV_IRQ: 2602 case KVM_CAP_ASSIGN_DEV_IRQ:
2606 case KVM_CAP_PCI_2_3: 2603 case KVM_CAP_PCI_2_3:
2604 case KVM_CAP_HYPERV_TIME:
2607#endif 2605#endif
2608 r = 1; 2606 r = 1;
2609 break; 2607 break;
@@ -2972,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2972static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2970static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2973 struct kvm_debugregs *dbgregs) 2971 struct kvm_debugregs *dbgregs)
2974{ 2972{
2973 unsigned long val;
2974
2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2976 dbgregs->dr6 = vcpu->arch.dr6; 2976 _kvm_get_dr(vcpu, 6, &val);
2977 dbgregs->dr6 = val;
2977 dbgregs->dr7 = vcpu->arch.dr7; 2978 dbgregs->dr7 = vcpu->arch.dr7;
2978 dbgregs->flags = 0; 2979 dbgregs->flags = 0;
2979 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); 2980 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2987,7 +2988,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2987 2988
2988 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2989 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2989 vcpu->arch.dr6 = dbgregs->dr6; 2990 vcpu->arch.dr6 = dbgregs->dr6;
2991 kvm_update_dr6(vcpu);
2990 vcpu->arch.dr7 = dbgregs->dr7; 2992 vcpu->arch.dr7 = dbgregs->dr7;
2993 kvm_update_dr7(vcpu);
2991 2994
2992 return 0; 2995 return 0;
2993} 2996}
@@ -5834,6 +5837,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5834 kvm_apic_update_tmr(vcpu, tmr); 5837 kvm_apic_update_tmr(vcpu, tmr);
5835} 5838}
5836 5839
5840/*
5841 * Returns 1 to let __vcpu_run() continue the guest execution loop without
5842 * exiting to the userspace. Otherwise, the value will be returned to the
5843 * userspace.
5844 */
5837static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5845static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5838{ 5846{
5839 int r; 5847 int r;
@@ -6089,7 +6097,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
6089 } 6097 }
6090 if (need_resched()) { 6098 if (need_resched()) {
6091 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 6099 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6092 kvm_resched(vcpu); 6100 cond_resched();
6093 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 6101 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6094 } 6102 }
6095 } 6103 }
@@ -6717,6 +6725,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6717 6725
6718 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 6726 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6719 vcpu->arch.dr6 = DR6_FIXED_1; 6727 vcpu->arch.dr6 = DR6_FIXED_1;
6728 kvm_update_dr6(vcpu);
6720 vcpu->arch.dr7 = DR7_FIXED_1; 6729 vcpu->arch.dr7 = DR7_FIXED_1;
6721 kvm_update_dr7(vcpu); 6730 kvm_update_dr7(vcpu);
6722 6731
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 587fb9ede436..8da5823bcde6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
126extern u64 host_xcr0; 126extern u64 host_xcr0;
127 127
128extern unsigned int min_timer_period_us;
129
128extern struct static_key kvm_no_apic_vcpu; 130extern struct static_key kvm_no_apic_vcpu;
129#endif 131#endif