aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 00:40:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 00:40:43 -0500
commit7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (patch)
treec45acf88b7976dcec117b6a3dbe31a7fe710ef33 /arch/x86/kvm
parentbb1281f2aae08e5ef23eb0692c8833e95579cdf2 (diff)
parent7650b6870930055426abb32cc47d164ccdea49db (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "First round of KVM updates for 3.14; PPC parts will come next week. Nothing major here, just bugfixes all over the place. The most interesting part is the ARM guys' virtualized interrupt controller overhaul, which lets userspace get/set the state and thus enables migration of ARM VMs" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (67 commits) kvm: make KVM_MMU_AUDIT help text more readable KVM: s390: Fix memory access error detection KVM: nVMX: Update guest activity state field on L2 exits KVM: nVMX: Fix nested_run_pending on activity state HLT KVM: nVMX: Clean up handling of VMX-related MSRs KVM: nVMX: Add tracepoints for nested_vmexit and nested_vmexit_inject KVM: nVMX: Pass vmexit parameters to nested_vmx_vmexit KVM: nVMX: Leave VMX mode on clearing of feature control MSR KVM: VMX: Fix DR6 update on #DB exception KVM: SVM: Fix reading of DR6 KVM: x86: Sync DR7 on KVM_SET_DEBUGREGS add support for Hyper-V reference time counter KVM: remove useless write to vcpu->hv_clock.tsc_timestamp KVM: x86: fix tsc catchup issue with tsc scaling KVM: x86: limit PIT timer frequency KVM: x86: handle invalid root_hpa everywhere kvm: Provide kvm_vcpu_eligible_for_directed_yield() stub kvm: vfio: silence GCC warning KVM: ARM: Remove duplicate include arm/arm64: KVM: relax the requirements of VMA alignment for THP ...
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/i8254.c18
-rw-r--r--arch/x86/kvm/lapic.c9
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h8
-rw-r--r--arch/x86/kvm/svm.c15
-rw-r--r--arch/x86/kvm/vmx.c323
-rw-r--r--arch/x86/kvm/x86.c101
-rw-r--r--arch/x86/kvm/x86.h2
9 files changed, 318 insertions, 172 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db2b832..287e4c85fff9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT
80 depends on KVM && TRACEPOINTS 80 depends on KVM && TRACEPOINTS
81 ---help--- 81 ---help---
82 This option adds a R/W kVM module parameter 'mmu_audit', which allows 82 This option adds a R/W kVM module parameter 'mmu_audit', which allows
83 audit KVM MMU at runtime. 83 auditing of KVM MMU events at runtime.
84 84
85config KVM_DEVICE_ASSIGNMENT 85config KVM_DEVICE_ASSIGNMENT
86 bool "KVM legacy PCI device assignment support" 86 bool "KVM legacy PCI device assignment support"
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 412a5aa0ef94..518d86471b76 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -37,6 +37,7 @@
37 37
38#include "irq.h" 38#include "irq.h"
39#include "i8254.h" 39#include "i8254.h"
40#include "x86.h"
40 41
41#ifndef CONFIG_X86_64 42#ifndef CONFIG_X86_64
42#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 43#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
349 atomic_set(&ps->pending, 0); 350 atomic_set(&ps->pending, 0);
350 ps->irq_ack = 1; 351 ps->irq_ack = 1;
351 352
353 /*
354 * Do not allow the guest to program periodic timers with small
355 * interval, since the hrtimers are not throttled by the host
356 * scheduler.
357 */
358 if (ps->is_periodic) {
359 s64 min_period = min_timer_period_us * 1000LL;
360
361 if (ps->period < min_period) {
362 pr_info_ratelimited(
363 "kvm: requested %lld ns "
364 "i8254 timer period limited to %lld ns\n",
365 ps->period, min_period);
366 ps->period = min_period;
367 }
368 }
369
352 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), 370 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
353 HRTIMER_MODE_ABS); 371 HRTIMER_MODE_ABS);
354} 372}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 775702f649ca..9736529ade08 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,9 +71,6 @@
71#define VEC_POS(v) ((v) & (32 - 1)) 71#define VEC_POS(v) ((v) & (32 - 1))
72#define REG_POS(v) (((v) >> 5) << 4) 72#define REG_POS(v) (((v) >> 5) << 4)
73 73
74static unsigned int min_timer_period_us = 500;
75module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
76
77static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 74static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
78{ 75{
79 *((u32 *) (apic->regs + reg_off)) = val; 76 *((u32 *) (apic->regs + reg_off)) = val;
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
435 u8 val; 432 u8 val;
436 if (pv_eoi_get_user(vcpu, &val) < 0) 433 if (pv_eoi_get_user(vcpu, &val) < 0)
437 apic_debug("Can't read EOI MSR value: 0x%llx\n", 434 apic_debug("Can't read EOI MSR value: 0x%llx\n",
438 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 435 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
439 return val & 0x1; 436 return val & 0x1;
440} 437}
441 438
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
443{ 440{
444 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 441 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
445 apic_debug("Can't set EOI MSR value: 0x%llx\n", 442 apic_debug("Can't set EOI MSR value: 0x%llx\n",
446 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 443 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
447 return; 444 return;
448 } 445 }
449 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 446 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
453{ 450{
454 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 451 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
455 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 452 apic_debug("Can't clear EOI MSR value: 0x%llx\n",
456 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 453 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
457 return; 454 return;
458 } 455 }
459 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef0f2b1..e50425d0f5f7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2659 int emulate = 0; 2659 int emulate = 0;
2660 gfn_t pseudo_gfn; 2660 gfn_t pseudo_gfn;
2661 2661
2662 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2663 return 0;
2664
2662 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2665 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2663 if (iterator.level == level) { 2666 if (iterator.level == level) {
2664 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, 2667 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
@@ -2829,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2829 bool ret = false; 2832 bool ret = false;
2830 u64 spte = 0ull; 2833 u64 spte = 0ull;
2831 2834
2835 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2836 return false;
2837
2832 if (!page_fault_can_be_fast(error_code)) 2838 if (!page_fault_can_be_fast(error_code))
2833 return false; 2839 return false;
2834 2840
@@ -3224,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
3224 struct kvm_shadow_walk_iterator iterator; 3230 struct kvm_shadow_walk_iterator iterator;
3225 u64 spte = 0ull; 3231 u64 spte = 0ull;
3226 3232
3233 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3234 return spte;
3235
3227 walk_shadow_page_lockless_begin(vcpu); 3236 walk_shadow_page_lockless_begin(vcpu);
3228 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) 3237 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
3229 if (!is_shadow_present_pte(spte)) 3238 if (!is_shadow_present_pte(spte))
@@ -4510,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
4510 u64 spte; 4519 u64 spte;
4511 int nr_sptes = 0; 4520 int nr_sptes = 0;
4512 4521
4522 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4523 return nr_sptes;
4524
4513 walk_shadow_page_lockless_begin(vcpu); 4525 walk_shadow_page_lockless_begin(vcpu);
4514 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { 4526 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
4515 sptes[iterator.level-1] = spte; 4527 sptes[iterator.level-1] = spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ad75d77999d0..cba218a2f08d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
569 if (FNAME(gpte_changed)(vcpu, gw, top_level)) 569 if (FNAME(gpte_changed)(vcpu, gw, top_level))
570 goto out_gpte_changed; 570 goto out_gpte_changed;
571 571
572 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
573 goto out_gpte_changed;
574
572 for (shadow_walk_init(&it, vcpu, addr); 575 for (shadow_walk_init(&it, vcpu, addr);
573 shadow_walk_okay(&it) && it.level > gw->level; 576 shadow_walk_okay(&it) && it.level > gw->level;
574 shadow_walk_next(&it)) { 577 shadow_walk_next(&it)) {
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
820 */ 823 */
821 mmu_topup_memory_caches(vcpu); 824 mmu_topup_memory_caches(vcpu);
822 825
826 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
827 WARN_ON(1);
828 return;
829 }
830
823 spin_lock(&vcpu->kvm->mmu_lock); 831 spin_lock(&vcpu->kvm->mmu_lock);
824 for_each_shadow_entry(vcpu, gva, iterator) { 832 for_each_shadow_entry(vcpu, gva, iterator) {
825 level = iterator.level; 833 level = iterator.level;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c7168a5cff1b..e81df8fce027 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1671 mark_dirty(svm->vmcb, VMCB_ASID); 1671 mark_dirty(svm->vmcb, VMCB_ASID);
1672} 1672}
1673 1673
1674static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
1675{
1676 return to_svm(vcpu)->vmcb->save.dr6;
1677}
1678
1679static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1680{
1681 struct vcpu_svm *svm = to_svm(vcpu);
1682
1683 svm->vmcb->save.dr6 = value;
1684 mark_dirty(svm->vmcb, VMCB_DR);
1685}
1686
1674static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1687static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1675{ 1688{
1676 struct vcpu_svm *svm = to_svm(vcpu); 1689 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = {
4286 .set_idt = svm_set_idt, 4299 .set_idt = svm_set_idt,
4287 .get_gdt = svm_get_gdt, 4300 .get_gdt = svm_get_gdt,
4288 .set_gdt = svm_set_gdt, 4301 .set_gdt = svm_set_gdt,
4302 .get_dr6 = svm_get_dr6,
4303 .set_dr6 = svm_set_dr6,
4289 .set_dr7 = svm_set_dr7, 4304 .set_dr7 = svm_set_dr7,
4290 .cache_reg = svm_cache_reg, 4305 .cache_reg = svm_cache_reg,
4291 .get_rflags = svm_get_rflags, 4306 .get_rflags = svm_get_rflags,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index da7837e1349d..5c8879127cfa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,8 @@ struct vcpu_vmx {
418 u64 msr_host_kernel_gs_base; 418 u64 msr_host_kernel_gs_base;
419 u64 msr_guest_kernel_gs_base; 419 u64 msr_guest_kernel_gs_base;
420#endif 420#endif
421 u32 vm_entry_controls_shadow;
422 u32 vm_exit_controls_shadow;
421 /* 423 /*
422 * loaded_vmcs points to the VMCS currently used in this vcpu. For a 424 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
423 * non-nested (L1) guest, it always points to vmcs01. For a nested 425 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -1056,7 +1058,9 @@ static inline bool is_exception(u32 intr_info)
1056 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); 1058 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
1057} 1059}
1058 1060
1059static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); 1061static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1062 u32 exit_intr_info,
1063 unsigned long exit_qualification);
1060static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, 1064static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1061 struct vmcs12 *vmcs12, 1065 struct vmcs12 *vmcs12,
1062 u32 reason, unsigned long qualification); 1066 u32 reason, unsigned long qualification);
@@ -1326,6 +1330,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
1326 vmcs_writel(field, vmcs_readl(field) | mask); 1330 vmcs_writel(field, vmcs_readl(field) | mask);
1327} 1331}
1328 1332
1333static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
1334{
1335 vmcs_write32(VM_ENTRY_CONTROLS, val);
1336 vmx->vm_entry_controls_shadow = val;
1337}
1338
1339static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
1340{
1341 if (vmx->vm_entry_controls_shadow != val)
1342 vm_entry_controls_init(vmx, val);
1343}
1344
1345static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
1346{
1347 return vmx->vm_entry_controls_shadow;
1348}
1349
1350
1351static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1352{
1353 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
1354}
1355
1356static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1357{
1358 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
1359}
1360
1361static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
1362{
1363 vmcs_write32(VM_EXIT_CONTROLS, val);
1364 vmx->vm_exit_controls_shadow = val;
1365}
1366
1367static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
1368{
1369 if (vmx->vm_exit_controls_shadow != val)
1370 vm_exit_controls_init(vmx, val);
1371}
1372
1373static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
1374{
1375 return vmx->vm_exit_controls_shadow;
1376}
1377
1378
1379static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1380{
1381 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
1382}
1383
1384static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1385{
1386 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
1387}
1388
1329static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 1389static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
1330{ 1390{
1331 vmx->segment_cache.bitmask = 0; 1391 vmx->segment_cache.bitmask = 0;
@@ -1410,11 +1470,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1410 vmcs_write32(EXCEPTION_BITMAP, eb); 1470 vmcs_write32(EXCEPTION_BITMAP, eb);
1411} 1471}
1412 1472
1413static void clear_atomic_switch_msr_special(unsigned long entry, 1473static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1414 unsigned long exit) 1474 unsigned long entry, unsigned long exit)
1415{ 1475{
1416 vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); 1476 vm_entry_controls_clearbit(vmx, entry);
1417 vmcs_clear_bits(VM_EXIT_CONTROLS, exit); 1477 vm_exit_controls_clearbit(vmx, exit);
1418} 1478}
1419 1479
1420static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 1480static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
@@ -1425,14 +1485,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1425 switch (msr) { 1485 switch (msr) {
1426 case MSR_EFER: 1486 case MSR_EFER:
1427 if (cpu_has_load_ia32_efer) { 1487 if (cpu_has_load_ia32_efer) {
1428 clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1488 clear_atomic_switch_msr_special(vmx,
1489 VM_ENTRY_LOAD_IA32_EFER,
1429 VM_EXIT_LOAD_IA32_EFER); 1490 VM_EXIT_LOAD_IA32_EFER);
1430 return; 1491 return;
1431 } 1492 }
1432 break; 1493 break;
1433 case MSR_CORE_PERF_GLOBAL_CTRL: 1494 case MSR_CORE_PERF_GLOBAL_CTRL:
1434 if (cpu_has_load_perf_global_ctrl) { 1495 if (cpu_has_load_perf_global_ctrl) {
1435 clear_atomic_switch_msr_special( 1496 clear_atomic_switch_msr_special(vmx,
1436 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1497 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1437 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); 1498 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1438 return; 1499 return;
@@ -1453,14 +1514,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1453 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); 1514 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1454} 1515}
1455 1516
1456static void add_atomic_switch_msr_special(unsigned long entry, 1517static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1457 unsigned long exit, unsigned long guest_val_vmcs, 1518 unsigned long entry, unsigned long exit,
1458 unsigned long host_val_vmcs, u64 guest_val, u64 host_val) 1519 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1520 u64 guest_val, u64 host_val)
1459{ 1521{
1460 vmcs_write64(guest_val_vmcs, guest_val); 1522 vmcs_write64(guest_val_vmcs, guest_val);
1461 vmcs_write64(host_val_vmcs, host_val); 1523 vmcs_write64(host_val_vmcs, host_val);
1462 vmcs_set_bits(VM_ENTRY_CONTROLS, entry); 1524 vm_entry_controls_setbit(vmx, entry);
1463 vmcs_set_bits(VM_EXIT_CONTROLS, exit); 1525 vm_exit_controls_setbit(vmx, exit);
1464} 1526}
1465 1527
1466static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 1528static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
@@ -1472,7 +1534,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1472 switch (msr) { 1534 switch (msr) {
1473 case MSR_EFER: 1535 case MSR_EFER:
1474 if (cpu_has_load_ia32_efer) { 1536 if (cpu_has_load_ia32_efer) {
1475 add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1537 add_atomic_switch_msr_special(vmx,
1538 VM_ENTRY_LOAD_IA32_EFER,
1476 VM_EXIT_LOAD_IA32_EFER, 1539 VM_EXIT_LOAD_IA32_EFER,
1477 GUEST_IA32_EFER, 1540 GUEST_IA32_EFER,
1478 HOST_IA32_EFER, 1541 HOST_IA32_EFER,
@@ -1482,7 +1545,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1482 break; 1545 break;
1483 case MSR_CORE_PERF_GLOBAL_CTRL: 1546 case MSR_CORE_PERF_GLOBAL_CTRL:
1484 if (cpu_has_load_perf_global_ctrl) { 1547 if (cpu_has_load_perf_global_ctrl) {
1485 add_atomic_switch_msr_special( 1548 add_atomic_switch_msr_special(vmx,
1486 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1549 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1487 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, 1550 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1488 GUEST_IA32_PERF_GLOBAL_CTRL, 1551 GUEST_IA32_PERF_GLOBAL_CTRL,
@@ -1906,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906 if (!(vmcs12->exception_bitmap & (1u << nr))) 1969 if (!(vmcs12->exception_bitmap & (1u << nr)))
1907 return 0; 1970 return 0;
1908 1971
1909 nested_vmx_vmexit(vcpu); 1972 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
1973 vmcs_read32(VM_EXIT_INTR_INFO),
1974 vmcs_readl(EXIT_QUALIFICATION));
1910 return 1; 1975 return 1;
1911} 1976}
1912 1977
@@ -2279,6 +2344,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2279 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); 2344 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
2280 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | 2345 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
2281 VMX_MISC_SAVE_EFER_LMA; 2346 VMX_MISC_SAVE_EFER_LMA;
2347 nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
2282 nested_vmx_misc_high = 0; 2348 nested_vmx_misc_high = 0;
2283} 2349}
2284 2350
@@ -2295,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
2295 return low | ((u64)high << 32); 2361 return low | ((u64)high << 32);
2296} 2362}
2297 2363
2298/* 2364/* Returns 0 on success, non-0 otherwise. */
2299 * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
2300 * also let it use VMX-specific MSRs.
2301 * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
2302 * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
2303 * like all other MSRs).
2304 */
2305static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 2365static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2306{ 2366{
2307 if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
2308 msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
2309 /*
2310 * According to the spec, processors which do not support VMX
2311 * should throw a #GP(0) when VMX capability MSRs are read.
2312 */
2313 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
2314 return 1;
2315 }
2316
2317 switch (msr_index) { 2367 switch (msr_index) {
2318 case MSR_IA32_FEATURE_CONTROL:
2319 if (nested_vmx_allowed(vcpu)) {
2320 *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2321 break;
2322 }
2323 return 0;
2324 case MSR_IA32_VMX_BASIC: 2368 case MSR_IA32_VMX_BASIC:
2325 /* 2369 /*
2326 * This MSR reports some information about VMX support. We 2370 * This MSR reports some information about VMX support. We
@@ -2387,34 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2387 *pdata = nested_vmx_ept_caps; 2431 *pdata = nested_vmx_ept_caps;
2388 break; 2432 break;
2389 default: 2433 default:
2390 return 0;
2391 }
2392
2393 return 1;
2394}
2395
2396static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2397{
2398 u32 msr_index = msr_info->index;
2399 u64 data = msr_info->data;
2400 bool host_initialized = msr_info->host_initiated;
2401
2402 if (!nested_vmx_allowed(vcpu))
2403 return 0;
2404
2405 if (msr_index == MSR_IA32_FEATURE_CONTROL) {
2406 if (!host_initialized &&
2407 to_vmx(vcpu)->nested.msr_ia32_feature_control
2408 & FEATURE_CONTROL_LOCKED)
2409 return 0;
2410 to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
2411 return 1; 2434 return 1;
2412 } 2435 }
2413 2436
2414 /*
2415 * No need to treat VMX capability MSRs specially: If we don't handle
2416 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
2417 */
2418 return 0; 2437 return 0;
2419} 2438}
2420 2439
@@ -2460,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2460 case MSR_IA32_SYSENTER_ESP: 2479 case MSR_IA32_SYSENTER_ESP:
2461 data = vmcs_readl(GUEST_SYSENTER_ESP); 2480 data = vmcs_readl(GUEST_SYSENTER_ESP);
2462 break; 2481 break;
2482 case MSR_IA32_FEATURE_CONTROL:
2483 if (!nested_vmx_allowed(vcpu))
2484 return 1;
2485 data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2486 break;
2487 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2488 if (!nested_vmx_allowed(vcpu))
2489 return 1;
2490 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
2463 case MSR_TSC_AUX: 2491 case MSR_TSC_AUX:
2464 if (!to_vmx(vcpu)->rdtscp_enabled) 2492 if (!to_vmx(vcpu)->rdtscp_enabled)
2465 return 1; 2493 return 1;
2466 /* Otherwise falls through */ 2494 /* Otherwise falls through */
2467 default: 2495 default:
2468 if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
2469 return 0;
2470 msr = find_msr_entry(to_vmx(vcpu), msr_index); 2496 msr = find_msr_entry(to_vmx(vcpu), msr_index);
2471 if (msr) { 2497 if (msr) {
2472 data = msr->data; 2498 data = msr->data;
@@ -2479,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2479 return 0; 2505 return 0;
2480} 2506}
2481 2507
2508static void vmx_leave_nested(struct kvm_vcpu *vcpu);
2509
2482/* 2510/*
2483 * Writes msr value into into the appropriate "register". 2511 * Writes msr value into into the appropriate "register".
2484 * Returns 0 on success, non-0 otherwise. 2512 * Returns 0 on success, non-0 otherwise.
@@ -2533,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2533 case MSR_IA32_TSC_ADJUST: 2561 case MSR_IA32_TSC_ADJUST:
2534 ret = kvm_set_msr_common(vcpu, msr_info); 2562 ret = kvm_set_msr_common(vcpu, msr_info);
2535 break; 2563 break;
2564 case MSR_IA32_FEATURE_CONTROL:
2565 if (!nested_vmx_allowed(vcpu) ||
2566 (to_vmx(vcpu)->nested.msr_ia32_feature_control &
2567 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2568 return 1;
2569 vmx->nested.msr_ia32_feature_control = data;
2570 if (msr_info->host_initiated && data == 0)
2571 vmx_leave_nested(vcpu);
2572 break;
2573 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2574 return 1; /* they are read-only */
2536 case MSR_TSC_AUX: 2575 case MSR_TSC_AUX:
2537 if (!vmx->rdtscp_enabled) 2576 if (!vmx->rdtscp_enabled)
2538 return 1; 2577 return 1;
@@ -2541,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2541 return 1; 2580 return 1;
2542 /* Otherwise falls through */ 2581 /* Otherwise falls through */
2543 default: 2582 default:
2544 if (vmx_set_vmx_msr(vcpu, msr_info))
2545 break;
2546 msr = find_msr_entry(vmx, msr_index); 2583 msr = find_msr_entry(vmx, msr_index);
2547 if (msr) { 2584 if (msr) {
2548 msr->data = data; 2585 msr->data = data;
@@ -3182,14 +3219,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3182 vmx_load_host_state(to_vmx(vcpu)); 3219 vmx_load_host_state(to_vmx(vcpu));
3183 vcpu->arch.efer = efer; 3220 vcpu->arch.efer = efer;
3184 if (efer & EFER_LMA) { 3221 if (efer & EFER_LMA) {
3185 vmcs_write32(VM_ENTRY_CONTROLS, 3222 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3186 vmcs_read32(VM_ENTRY_CONTROLS) |
3187 VM_ENTRY_IA32E_MODE);
3188 msr->data = efer; 3223 msr->data = efer;
3189 } else { 3224 } else {
3190 vmcs_write32(VM_ENTRY_CONTROLS, 3225 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3191 vmcs_read32(VM_ENTRY_CONTROLS) &
3192 ~VM_ENTRY_IA32E_MODE);
3193 3226
3194 msr->data = efer & ~EFER_LME; 3227 msr->data = efer & ~EFER_LME;
3195 } 3228 }
@@ -3217,9 +3250,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
3217 3250
3218static void exit_lmode(struct kvm_vcpu *vcpu) 3251static void exit_lmode(struct kvm_vcpu *vcpu)
3219{ 3252{
3220 vmcs_write32(VM_ENTRY_CONTROLS, 3253 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3221 vmcs_read32(VM_ENTRY_CONTROLS)
3222 & ~VM_ENTRY_IA32E_MODE);
3223 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); 3254 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3224} 3255}
3225 3256
@@ -4346,10 +4377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4346 ++vmx->nmsrs; 4377 ++vmx->nmsrs;
4347 } 4378 }
4348 4379
4349 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 4380
4381 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
4350 4382
4351 /* 22.2.1, 20.8.1 */ 4383 /* 22.2.1, 20.8.1 */
4352 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 4384 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
4353 4385
4354 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 4386 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4355 set_cr4_guest_host_mask(vmx); 4387 set_cr4_guest_host_mask(vmx);
@@ -4588,15 +4620,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4588static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4620static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4589{ 4621{
4590 if (is_guest_mode(vcpu)) { 4622 if (is_guest_mode(vcpu)) {
4591 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4592
4593 if (to_vmx(vcpu)->nested.nested_run_pending) 4623 if (to_vmx(vcpu)->nested.nested_run_pending)
4594 return 0; 4624 return 0;
4595 if (nested_exit_on_nmi(vcpu)) { 4625 if (nested_exit_on_nmi(vcpu)) {
4596 nested_vmx_vmexit(vcpu); 4626 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4597 vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; 4627 NMI_VECTOR | INTR_TYPE_NMI_INTR |
4598 vmcs12->vm_exit_intr_info = NMI_VECTOR | 4628 INTR_INFO_VALID_MASK, 0);
4599 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
4600 /* 4629 /*
4601 * The NMI-triggered VM exit counts as injection: 4630 * The NMI-triggered VM exit counts as injection:
4602 * clear this one and block further NMIs. 4631 * clear this one and block further NMIs.
@@ -4618,15 +4647,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4618static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4647static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4619{ 4648{
4620 if (is_guest_mode(vcpu)) { 4649 if (is_guest_mode(vcpu)) {
4621 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4622
4623 if (to_vmx(vcpu)->nested.nested_run_pending) 4650 if (to_vmx(vcpu)->nested.nested_run_pending)
4624 return 0; 4651 return 0;
4625 if (nested_exit_on_intr(vcpu)) { 4652 if (nested_exit_on_intr(vcpu)) {
4626 nested_vmx_vmexit(vcpu); 4653 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
4627 vmcs12->vm_exit_reason = 4654 0, 0);
4628 EXIT_REASON_EXTERNAL_INTERRUPT;
4629 vmcs12->vm_exit_intr_info = 0;
4630 /* 4655 /*
4631 * fall through to normal code, but now in L1, not L2 4656 * fall through to normal code, but now in L1, not L2
4632 */ 4657 */
@@ -4812,7 +4837,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4812 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4837 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4813 if (!(vcpu->guest_debug & 4838 if (!(vcpu->guest_debug &
4814 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4839 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4815 vcpu->arch.dr6 = dr6 | DR6_FIXED_1; 4840 vcpu->arch.dr6 &= ~15;
4841 vcpu->arch.dr6 |= dr6;
4816 kvm_queue_exception(vcpu, DB_VECTOR); 4842 kvm_queue_exception(vcpu, DB_VECTOR);
4817 return 1; 4843 return 1;
4818 } 4844 }
@@ -5080,14 +5106,27 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5080 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5106 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5081 if (exit_qualification & TYPE_MOV_FROM_DR) { 5107 if (exit_qualification & TYPE_MOV_FROM_DR) {
5082 unsigned long val; 5108 unsigned long val;
5083 if (!kvm_get_dr(vcpu, dr, &val)) 5109
5084 kvm_register_write(vcpu, reg, val); 5110 if (kvm_get_dr(vcpu, dr, &val))
5111 return 1;
5112 kvm_register_write(vcpu, reg, val);
5085 } else 5113 } else
5086 kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); 5114 if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
5115 return 1;
5116
5087 skip_emulated_instruction(vcpu); 5117 skip_emulated_instruction(vcpu);
5088 return 1; 5118 return 1;
5089} 5119}
5090 5120
5121static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
5122{
5123 return vcpu->arch.dr6;
5124}
5125
5126static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5127{
5128}
5129
5091static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5130static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5092{ 5131{
5093 vmcs_writel(GUEST_DR7, val); 5132 vmcs_writel(GUEST_DR7, val);
@@ -6460,11 +6499,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
6460 int size; 6499 int size;
6461 u8 b; 6500 u8 b;
6462 6501
6463 if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
6464 return 1;
6465
6466 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 6502 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
6467 return 0; 6503 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
6468 6504
6469 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6505 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6470 6506
@@ -6628,6 +6664,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6628 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 6664 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6629 u32 exit_reason = vmx->exit_reason; 6665 u32 exit_reason = vmx->exit_reason;
6630 6666
6667 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
6668 vmcs_readl(EXIT_QUALIFICATION),
6669 vmx->idt_vectoring_info,
6670 intr_info,
6671 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6672 KVM_ISA_VMX);
6673
6631 if (vmx->nested.nested_run_pending) 6674 if (vmx->nested.nested_run_pending)
6632 return 0; 6675 return 0;
6633 6676
@@ -6777,7 +6820,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6777 return handle_invalid_guest_state(vcpu); 6820 return handle_invalid_guest_state(vcpu);
6778 6821
6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6822 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6780 nested_vmx_vmexit(vcpu); 6823 nested_vmx_vmexit(vcpu, exit_reason,
6824 vmcs_read32(VM_EXIT_INTR_INFO),
6825 vmcs_readl(EXIT_QUALIFICATION));
6781 return 1; 6826 return 1;
6782 } 6827 }
6783 6828
@@ -7332,8 +7377,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7332 struct vcpu_vmx *vmx = to_vmx(vcpu); 7377 struct vcpu_vmx *vmx = to_vmx(vcpu);
7333 7378
7334 free_vpid(vmx); 7379 free_vpid(vmx);
7335 free_nested(vmx);
7336 free_loaded_vmcs(vmx->loaded_vmcs); 7380 free_loaded_vmcs(vmx->loaded_vmcs);
7381 free_nested(vmx);
7337 kfree(vmx->guest_msrs); 7382 kfree(vmx->guest_msrs);
7338 kvm_vcpu_uninit(vcpu); 7383 kvm_vcpu_uninit(vcpu);
7339 kmem_cache_free(kvm_vcpu_cache, vmx); 7384 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -7518,15 +7563,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7518static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, 7563static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
7519 struct x86_exception *fault) 7564 struct x86_exception *fault)
7520{ 7565{
7521 struct vmcs12 *vmcs12; 7566 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7522 nested_vmx_vmexit(vcpu); 7567 u32 exit_reason;
7523 vmcs12 = get_vmcs12(vcpu);
7524 7568
7525 if (fault->error_code & PFERR_RSVD_MASK) 7569 if (fault->error_code & PFERR_RSVD_MASK)
7526 vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 7570 exit_reason = EXIT_REASON_EPT_MISCONFIG;
7527 else 7571 else
7528 vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 7572 exit_reason = EXIT_REASON_EPT_VIOLATION;
7529 vmcs12->exit_qualification = vcpu->arch.exit_qualification; 7573 nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
7530 vmcs12->guest_physical_address = fault->address; 7574 vmcs12->guest_physical_address = fault->address;
7531} 7575}
7532 7576
@@ -7564,7 +7608,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7564 7608
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 7609 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) 7610 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu); 7611 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
7612 vmcs_read32(VM_EXIT_INTR_INFO),
7613 vmcs_readl(EXIT_QUALIFICATION));
7568 else 7614 else
7569 kvm_inject_page_fault(vcpu, fault); 7615 kvm_inject_page_fault(vcpu, fault);
7570} 7616}
@@ -7706,6 +7752,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 else 7752 else
7707 vmcs_write64(APIC_ACCESS_ADDR, 7753 vmcs_write64(APIC_ACCESS_ADDR,
7708 page_to_phys(vmx->nested.apic_access_page)); 7754 page_to_phys(vmx->nested.apic_access_page));
7755 } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
7756 exec_control |=
7757 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
7758 vmcs_write64(APIC_ACCESS_ADDR,
7759 page_to_phys(vcpu->kvm->arch.apic_access_page));
7709 } 7760 }
7710 7761
7711 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 7762 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -7759,12 +7810,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7759 exit_control = vmcs_config.vmexit_ctrl; 7810 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7811 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 7812 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control); 7813 vm_exit_controls_init(vmx, exit_control);
7763 7814
7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7815 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7765 * emulated by vmx_set_efer(), below. 7816 * emulated by vmx_set_efer(), below.
7766 */ 7817 */
7767 vmcs_write32(VM_ENTRY_CONTROLS, 7818 vm_entry_controls_init(vmx,
7768 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & 7819 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
7769 ~VM_ENTRY_IA32E_MODE) | 7820 ~VM_ENTRY_IA32E_MODE) |
7770 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); 7821 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -7882,7 +7933,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7882 return 1; 7933 return 1;
7883 } 7934 }
7884 7935
7885 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { 7936 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
7937 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
7886 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 7938 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
7887 return 1; 7939 return 1;
7888 } 7940 }
@@ -7994,8 +8046,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7994 8046
7995 enter_guest_mode(vcpu); 8047 enter_guest_mode(vcpu);
7996 8048
7997 vmx->nested.nested_run_pending = 1;
7998
7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 8049 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
8000 8050
8001 cpu = get_cpu(); 8051 cpu = get_cpu();
@@ -8011,6 +8061,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8011 8061
8012 prepare_vmcs02(vcpu, vmcs12); 8062 prepare_vmcs02(vcpu, vmcs12);
8013 8063
8064 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
8065 return kvm_emulate_halt(vcpu);
8066
8067 vmx->nested.nested_run_pending = 1;
8068
8014 /* 8069 /*
8015 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 8070 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
8016 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 8071 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -8110,7 +8165,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8110 * exit-information fields only. Other fields are modified by L1 with VMWRITE, 8165 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
8111 * which already writes to vmcs12 directly. 8166 * which already writes to vmcs12 directly.
8112 */ 8167 */
8113static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 8168static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8169 u32 exit_reason, u32 exit_intr_info,
8170 unsigned long exit_qualification)
8114{ 8171{
8115 /* update guest state fields: */ 8172 /* update guest state fields: */
8116 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 8173 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -8162,6 +8219,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8162 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 8219 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
8163 vmcs12->guest_pending_dbg_exceptions = 8220 vmcs12->guest_pending_dbg_exceptions =
8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8221 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8222 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
8223 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
8224 else
8225 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
8165 8226
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8227 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8228 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
@@ -8186,7 +8247,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8186 8247
8187 vmcs12->vm_entry_controls = 8248 vmcs12->vm_entry_controls =
8188 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 8249 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
8189 (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); 8250 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
8190 8251
8191 /* TODO: These cannot have changed unless we have MSR bitmaps and 8252 /* TODO: These cannot have changed unless we have MSR bitmaps and
8192 * the relevant bit asks not to trap the change */ 8253 * the relevant bit asks not to trap the change */
@@ -8201,10 +8262,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8201 8262
8202 /* update exit information fields: */ 8263 /* update exit information fields: */
8203 8264
8204 vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; 8265 vmcs12->vm_exit_reason = exit_reason;
8205 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 8266 vmcs12->exit_qualification = exit_qualification;
8206 8267
8207 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8268 vmcs12->vm_exit_intr_info = exit_intr_info;
8208 if ((vmcs12->vm_exit_intr_info & 8269 if ((vmcs12->vm_exit_intr_info &
8209 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == 8270 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
8210 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) 8271 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
@@ -8370,7 +8431,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8370 * and modify vmcs12 to make it see what it would expect to see there if 8431 * and modify vmcs12 to make it see what it would expect to see there if
8371 * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) 8432 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
8372 */ 8433 */
8373static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) 8434static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8435 u32 exit_intr_info,
8436 unsigned long exit_qualification)
8374{ 8437{
8375 struct vcpu_vmx *vmx = to_vmx(vcpu); 8438 struct vcpu_vmx *vmx = to_vmx(vcpu);
8376 int cpu; 8439 int cpu;
@@ -8380,7 +8443,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8380 WARN_ON_ONCE(vmx->nested.nested_run_pending); 8443 WARN_ON_ONCE(vmx->nested.nested_run_pending);
8381 8444
8382 leave_guest_mode(vcpu); 8445 leave_guest_mode(vcpu);
8383 prepare_vmcs12(vcpu, vmcs12); 8446 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
8447 exit_qualification);
8448
8449 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
8450 vmcs12->exit_qualification,
8451 vmcs12->idt_vectoring_info_field,
8452 vmcs12->vm_exit_intr_info,
8453 vmcs12->vm_exit_intr_error_code,
8454 KVM_ISA_VMX);
8384 8455
8385 cpu = get_cpu(); 8456 cpu = get_cpu();
8386 vmx->loaded_vmcs = &vmx->vmcs01; 8457 vmx->loaded_vmcs = &vmx->vmcs01;
@@ -8389,6 +8460,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8389 vcpu->cpu = cpu; 8460 vcpu->cpu = cpu;
8390 put_cpu(); 8461 put_cpu();
8391 8462
8463 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8464 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
8392 vmx_segment_cache_clear(vmx); 8465 vmx_segment_cache_clear(vmx);
8393 8466
8394 /* if no vmcs02 cache requested, remove the one we used */ 8467 /* if no vmcs02 cache requested, remove the one we used */
@@ -8424,6 +8497,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8424} 8497}
8425 8498
8426/* 8499/*
8500 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
8501 */
8502static void vmx_leave_nested(struct kvm_vcpu *vcpu)
8503{
8504 if (is_guest_mode(vcpu))
8505 nested_vmx_vmexit(vcpu, -1, 0, 0);
8506 free_nested(to_vmx(vcpu));
8507}
8508
8509/*
8427 * L1's failure to enter L2 is a subset of a normal exit, as explained in 8510 * L1's failure to enter L2 is a subset of a normal exit, as explained in
8428 * 23.7 "VM-entry failures during or after loading guest state" (this also 8511 * 23.7 "VM-entry failures during or after loading guest state" (this also
8429 * lists the acceptable exit-reason and exit-qualification parameters). 8512 * lists the acceptable exit-reason and exit-qualification parameters).
@@ -8486,6 +8569,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
8486 .set_idt = vmx_set_idt, 8569 .set_idt = vmx_set_idt,
8487 .get_gdt = vmx_get_gdt, 8570 .get_gdt = vmx_get_gdt,
8488 .set_gdt = vmx_set_gdt, 8571 .set_gdt = vmx_set_gdt,
8572 .get_dr6 = vmx_get_dr6,
8573 .set_dr6 = vmx_set_dr6,
8489 .set_dr7 = vmx_set_dr7, 8574 .set_dr7 = vmx_set_dr7,
8490 .cache_reg = vmx_cache_reg, 8575 .cache_reg = vmx_cache_reg,
8491 .get_rflags = vmx_get_rflags, 8576 .get_rflags = vmx_get_rflags,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d004da1e35d..0c76f7cfdb32 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
94static bool ignore_msrs = 0; 94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96 96
97unsigned int min_timer_period_us = 500;
98module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
99
97bool kvm_has_tsc_control; 100bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control); 101EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz; 102u32 kvm_max_guest_tsc_khz;
@@ -719,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
719} 722}
720EXPORT_SYMBOL_GPL(kvm_get_cr8); 723EXPORT_SYMBOL_GPL(kvm_get_cr8);
721 724
725static void kvm_update_dr6(struct kvm_vcpu *vcpu)
726{
727 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
728 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
729}
730
722static void kvm_update_dr7(struct kvm_vcpu *vcpu) 731static void kvm_update_dr7(struct kvm_vcpu *vcpu)
723{ 732{
724 unsigned long dr7; 733 unsigned long dr7;
@@ -747,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
747 if (val & 0xffffffff00000000ULL) 756 if (val & 0xffffffff00000000ULL)
748 return -1; /* #GP */ 757 return -1; /* #GP */
749 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 758 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
759 kvm_update_dr6(vcpu);
750 break; 760 break;
751 case 5: 761 case 5:
752 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 762 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -788,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
788 return 1; 798 return 1;
789 /* fall through */ 799 /* fall through */
790 case 6: 800 case 6:
791 *val = vcpu->arch.dr6; 801 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
802 *val = vcpu->arch.dr6;
803 else
804 *val = kvm_x86_ops->get_dr6(vcpu);
792 break; 805 break;
793 case 5: 806 case 5:
794 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 807 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -836,11 +849,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
836 * kvm-specific. Those are put in the beginning of the list. 849 * kvm-specific. Those are put in the beginning of the list.
837 */ 850 */
838 851
839#define KVM_SAVE_MSRS_BEGIN 10 852#define KVM_SAVE_MSRS_BEGIN 12
840static u32 msrs_to_save[] = { 853static u32 msrs_to_save[] = {
841 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 854 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
842 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 855 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
843 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 856 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
857 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
844 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 858 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
845 MSR_KVM_PV_EOI_EN, 859 MSR_KVM_PV_EOI_EN,
846 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 860 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1275,8 +1289,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1275 kvm->arch.last_tsc_write = data; 1289 kvm->arch.last_tsc_write = data;
1276 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; 1290 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1277 1291
1278 /* Reset of TSC must disable overshoot protection below */
1279 vcpu->arch.hv_clock.tsc_timestamp = 0;
1280 vcpu->arch.last_guest_tsc = data; 1292 vcpu->arch.last_guest_tsc = data;
1281 1293
1282 /* Keep track of which generation this VCPU has synchronized to */ 1294 /* Keep track of which generation this VCPU has synchronized to */
@@ -1484,7 +1496,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1484 unsigned long flags, this_tsc_khz; 1496 unsigned long flags, this_tsc_khz;
1485 struct kvm_vcpu_arch *vcpu = &v->arch; 1497 struct kvm_vcpu_arch *vcpu = &v->arch;
1486 struct kvm_arch *ka = &v->kvm->arch; 1498 struct kvm_arch *ka = &v->kvm->arch;
1487 s64 kernel_ns, max_kernel_ns; 1499 s64 kernel_ns;
1488 u64 tsc_timestamp, host_tsc; 1500 u64 tsc_timestamp, host_tsc;
1489 struct pvclock_vcpu_time_info guest_hv_clock; 1501 struct pvclock_vcpu_time_info guest_hv_clock;
1490 u8 pvclock_flags; 1502 u8 pvclock_flags;
@@ -1543,37 +1555,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1543 if (!vcpu->pv_time_enabled) 1555 if (!vcpu->pv_time_enabled)
1544 return 0; 1556 return 0;
1545 1557
1546 /*
1547 * Time as measured by the TSC may go backwards when resetting the base
1548 * tsc_timestamp. The reason for this is that the TSC resolution is
1549 * higher than the resolution of the other clock scales. Thus, many
1550 * possible measurments of the TSC correspond to one measurement of any
1551 * other clock, and so a spread of values is possible. This is not a
1552 * problem for the computation of the nanosecond clock; with TSC rates
1553 * around 1GHZ, there can only be a few cycles which correspond to one
1554 * nanosecond value, and any path through this code will inevitably
1555 * take longer than that. However, with the kernel_ns value itself,
1556 * the precision may be much lower, down to HZ granularity. If the
1557 * first sampling of TSC against kernel_ns ends in the low part of the
1558 * range, and the second in the high end of the range, we can get:
1559 *
1560 * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
1561 *
1562 * As the sampling errors potentially range in the thousands of cycles,
1563 * it is possible such a time value has already been observed by the
1564 * guest. To protect against this, we must compute the system time as
1565 * observed by the guest and ensure the new system time is greater.
1566 */
1567 max_kernel_ns = 0;
1568 if (vcpu->hv_clock.tsc_timestamp) {
1569 max_kernel_ns = vcpu->last_guest_tsc -
1570 vcpu->hv_clock.tsc_timestamp;
1571 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1572 vcpu->hv_clock.tsc_to_system_mul,
1573 vcpu->hv_clock.tsc_shift);
1574 max_kernel_ns += vcpu->last_kernel_ns;
1575 }
1576
1577 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { 1558 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1578 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, 1559 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1579 &vcpu->hv_clock.tsc_shift, 1560 &vcpu->hv_clock.tsc_shift,
@@ -1581,14 +1562,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1581 vcpu->hw_tsc_khz = this_tsc_khz; 1562 vcpu->hw_tsc_khz = this_tsc_khz;
1582 } 1563 }
1583 1564
1584 /* with a master <monotonic time, tsc value> tuple,
1585 * pvclock clock reads always increase at the (scaled) rate
1586 * of guest TSC - no need to deal with sampling errors.
1587 */
1588 if (!use_master_clock) {
1589 if (max_kernel_ns > kernel_ns)
1590 kernel_ns = max_kernel_ns;
1591 }
1592 /* With all the info we got, fill in the values */ 1565 /* With all the info we got, fill in the values */
1593 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1566 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1594 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1567 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1826,6 +1799,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
1826 switch (msr) { 1799 switch (msr) {
1827 case HV_X64_MSR_GUEST_OS_ID: 1800 case HV_X64_MSR_GUEST_OS_ID:
1828 case HV_X64_MSR_HYPERCALL: 1801 case HV_X64_MSR_HYPERCALL:
1802 case HV_X64_MSR_REFERENCE_TSC:
1803 case HV_X64_MSR_TIME_REF_COUNT:
1829 r = true; 1804 r = true;
1830 break; 1805 break;
1831 } 1806 }
@@ -1867,6 +1842,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1867 kvm->arch.hv_hypercall = data; 1842 kvm->arch.hv_hypercall = data;
1868 break; 1843 break;
1869 } 1844 }
1845 case HV_X64_MSR_REFERENCE_TSC: {
1846 u64 gfn;
1847 HV_REFERENCE_TSC_PAGE tsc_ref;
1848 memset(&tsc_ref, 0, sizeof(tsc_ref));
1849 kvm->arch.hv_tsc_page = data;
1850 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1851 break;
1852 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1853 if (kvm_write_guest(kvm, data,
1854 &tsc_ref, sizeof(tsc_ref)))
1855 return 1;
1856 mark_page_dirty(kvm, gfn);
1857 break;
1858 }
1870 default: 1859 default:
1871 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1860 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1872 "data 0x%llx\n", msr, data); 1861 "data 0x%llx\n", msr, data);
@@ -2291,6 +2280,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2291 case HV_X64_MSR_HYPERCALL: 2280 case HV_X64_MSR_HYPERCALL:
2292 data = kvm->arch.hv_hypercall; 2281 data = kvm->arch.hv_hypercall;
2293 break; 2282 break;
2283 case HV_X64_MSR_TIME_REF_COUNT: {
2284 data =
2285 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2286 break;
2287 }
2288 case HV_X64_MSR_REFERENCE_TSC:
2289 data = kvm->arch.hv_tsc_page;
2290 break;
2294 default: 2291 default:
2295 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 2292 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2296 return 1; 2293 return 1;
@@ -2604,6 +2601,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2604#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2601#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2605 case KVM_CAP_ASSIGN_DEV_IRQ: 2602 case KVM_CAP_ASSIGN_DEV_IRQ:
2606 case KVM_CAP_PCI_2_3: 2603 case KVM_CAP_PCI_2_3:
2604 case KVM_CAP_HYPERV_TIME:
2607#endif 2605#endif
2608 r = 1; 2606 r = 1;
2609 break; 2607 break;
@@ -2972,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2972static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2970static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2973 struct kvm_debugregs *dbgregs) 2971 struct kvm_debugregs *dbgregs)
2974{ 2972{
2973 unsigned long val;
2974
2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2976 dbgregs->dr6 = vcpu->arch.dr6; 2976 _kvm_get_dr(vcpu, 6, &val);
2977 dbgregs->dr6 = val;
2977 dbgregs->dr7 = vcpu->arch.dr7; 2978 dbgregs->dr7 = vcpu->arch.dr7;
2978 dbgregs->flags = 0; 2979 dbgregs->flags = 0;
2979 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); 2980 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2987,7 +2988,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2987 2988
2988 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2989 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2989 vcpu->arch.dr6 = dbgregs->dr6; 2990 vcpu->arch.dr6 = dbgregs->dr6;
2991 kvm_update_dr6(vcpu);
2990 vcpu->arch.dr7 = dbgregs->dr7; 2992 vcpu->arch.dr7 = dbgregs->dr7;
2993 kvm_update_dr7(vcpu);
2991 2994
2992 return 0; 2995 return 0;
2993} 2996}
@@ -5834,6 +5837,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5834 kvm_apic_update_tmr(vcpu, tmr); 5837 kvm_apic_update_tmr(vcpu, tmr);
5835} 5838}
5836 5839
5840/*
5841 * Returns 1 to let __vcpu_run() continue the guest execution loop without
5842 * exiting to the userspace. Otherwise, the value will be returned to the
5843 * userspace.
5844 */
5837static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5845static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5838{ 5846{
5839 int r; 5847 int r;
@@ -6089,7 +6097,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
6089 } 6097 }
6090 if (need_resched()) { 6098 if (need_resched()) {
6091 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 6099 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6092 kvm_resched(vcpu); 6100 cond_resched();
6093 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 6101 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6094 } 6102 }
6095 } 6103 }
@@ -6717,6 +6725,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6717 6725
6718 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 6726 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6719 vcpu->arch.dr6 = DR6_FIXED_1; 6727 vcpu->arch.dr6 = DR6_FIXED_1;
6728 kvm_update_dr6(vcpu);
6720 vcpu->arch.dr7 = DR7_FIXED_1; 6729 vcpu->arch.dr7 = DR7_FIXED_1;
6721 kvm_update_dr7(vcpu); 6730 kvm_update_dr7(vcpu);
6722 6731
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 587fb9ede436..8da5823bcde6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
126extern u64 host_xcr0; 126extern u64 host_xcr0;
127 127
128extern unsigned int min_timer_period_us;
129
128extern struct static_key kvm_no_apic_vcpu; 130extern struct static_key kvm_no_apic_vcpu;
129#endif 131#endif