aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2014-02-07 14:27:30 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2014-02-07 14:27:30 -0500
commita3b072cd180c12e8fe0ece9487b9065808327640 (patch)
tree62b982041be84748852d77cdf6ca5639ef40858f /arch/x86/kvm
parent75a1ba5b2c529db60ca49626bcaf0bddf4548438 (diff)
parent081cd62a010f97b5bc1d2b0cd123c5abc692b68a (diff)
Merge tag 'efi-urgent' into x86/urgent
* Avoid WARN_ON() when mapping BGRT on Baytrail (EFI 32-bit). Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/i8254.c18
-rw-r--r--arch/x86/kvm/lapic.c9
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h8
-rw-r--r--arch/x86/kvm/svm.c15
-rw-r--r--arch/x86/kvm/vmx.c332
-rw-r--r--arch/x86/kvm/x86.c140
-rw-r--r--arch/x86/kvm/x86.h2
11 files changed, 362 insertions, 186 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db2b832..287e4c85fff9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT
80 depends on KVM && TRACEPOINTS 80 depends on KVM && TRACEPOINTS
81 ---help--- 81 ---help---
82 This option adds a R/W kVM module parameter 'mmu_audit', which allows 82 This option adds a R/W kVM module parameter 'mmu_audit', which allows
83 audit KVM MMU at runtime. 83 auditing of KVM MMU events at runtime.
84 84
85config KVM_DEVICE_ASSIGNMENT 85config KVM_DEVICE_ASSIGNMENT
86 bool "KVM legacy PCI device assignment support" 86 bool "KVM legacy PCI device assignment support"
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index f1e4895174b2..a2a1bb7ed8c1 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -72,4 +72,12 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
72 return best && (best->ecx & bit(X86_FEATURE_PCID)); 72 return best && (best->ecx & bit(X86_FEATURE_PCID));
73} 73}
74 74
75static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
76{
77 struct kvm_cpuid_entry2 *best;
78
79 best = kvm_find_cpuid_entry(vcpu, 1, 0);
80 return best && (best->ecx & bit(X86_FEATURE_X2APIC));
81}
82
75#endif 83#endif
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 412a5aa0ef94..518d86471b76 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -37,6 +37,7 @@
37 37
38#include "irq.h" 38#include "irq.h"
39#include "i8254.h" 39#include "i8254.h"
40#include "x86.h"
40 41
41#ifndef CONFIG_X86_64 42#ifndef CONFIG_X86_64
42#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 43#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
349 atomic_set(&ps->pending, 0); 350 atomic_set(&ps->pending, 0);
350 ps->irq_ack = 1; 351 ps->irq_ack = 1;
351 352
353 /*
354 * Do not allow the guest to program periodic timers with small
355 * interval, since the hrtimers are not throttled by the host
356 * scheduler.
357 */
358 if (ps->is_periodic) {
359 s64 min_period = min_timer_period_us * 1000LL;
360
361 if (ps->period < min_period) {
362 pr_info_ratelimited(
363 "kvm: requested %lld ns "
364 "i8254 timer period limited to %lld ns\n",
365 ps->period, min_period);
366 ps->period = min_period;
367 }
368 }
369
352 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), 370 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
353 HRTIMER_MODE_ABS); 371 HRTIMER_MODE_ABS);
354} 372}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 775702f649ca..9736529ade08 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,9 +71,6 @@
71#define VEC_POS(v) ((v) & (32 - 1)) 71#define VEC_POS(v) ((v) & (32 - 1))
72#define REG_POS(v) (((v) >> 5) << 4) 72#define REG_POS(v) (((v) >> 5) << 4)
73 73
74static unsigned int min_timer_period_us = 500;
75module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
76
77static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 74static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
78{ 75{
79 *((u32 *) (apic->regs + reg_off)) = val; 76 *((u32 *) (apic->regs + reg_off)) = val;
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
435 u8 val; 432 u8 val;
436 if (pv_eoi_get_user(vcpu, &val) < 0) 433 if (pv_eoi_get_user(vcpu, &val) < 0)
437 apic_debug("Can't read EOI MSR value: 0x%llx\n", 434 apic_debug("Can't read EOI MSR value: 0x%llx\n",
438 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 435 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
439 return val & 0x1; 436 return val & 0x1;
440} 437}
441 438
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
443{ 440{
444 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 441 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
445 apic_debug("Can't set EOI MSR value: 0x%llx\n", 442 apic_debug("Can't set EOI MSR value: 0x%llx\n",
446 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 443 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
447 return; 444 return;
448 } 445 }
449 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 446 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
453{ 450{
454 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 451 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
455 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 452 apic_debug("Can't clear EOI MSR value: 0x%llx\n",
456 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 453 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
457 return; 454 return;
458 } 455 }
459 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index c8b0d0d2da5c..6a11845fd8b9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -65,7 +65,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
65 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); 65 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
66 66
67u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); 67u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
68void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); 68int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
69void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 69void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
70 struct kvm_lapic_state *s); 70 struct kvm_lapic_state *s);
71int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 71int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef0f2b1..e50425d0f5f7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2659 int emulate = 0; 2659 int emulate = 0;
2660 gfn_t pseudo_gfn; 2660 gfn_t pseudo_gfn;
2661 2661
2662 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2663 return 0;
2664
2662 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2665 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2663 if (iterator.level == level) { 2666 if (iterator.level == level) {
2664 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, 2667 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
@@ -2829,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2829 bool ret = false; 2832 bool ret = false;
2830 u64 spte = 0ull; 2833 u64 spte = 0ull;
2831 2834
2835 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2836 return false;
2837
2832 if (!page_fault_can_be_fast(error_code)) 2838 if (!page_fault_can_be_fast(error_code))
2833 return false; 2839 return false;
2834 2840
@@ -3224,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
3224 struct kvm_shadow_walk_iterator iterator; 3230 struct kvm_shadow_walk_iterator iterator;
3225 u64 spte = 0ull; 3231 u64 spte = 0ull;
3226 3232
3233 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3234 return spte;
3235
3227 walk_shadow_page_lockless_begin(vcpu); 3236 walk_shadow_page_lockless_begin(vcpu);
3228 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) 3237 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
3229 if (!is_shadow_present_pte(spte)) 3238 if (!is_shadow_present_pte(spte))
@@ -4510,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
4510 u64 spte; 4519 u64 spte;
4511 int nr_sptes = 0; 4520 int nr_sptes = 0;
4512 4521
4522 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4523 return nr_sptes;
4524
4513 walk_shadow_page_lockless_begin(vcpu); 4525 walk_shadow_page_lockless_begin(vcpu);
4514 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { 4526 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
4515 sptes[iterator.level-1] = spte; 4527 sptes[iterator.level-1] = spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ad75d77999d0..cba218a2f08d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
569 if (FNAME(gpte_changed)(vcpu, gw, top_level)) 569 if (FNAME(gpte_changed)(vcpu, gw, top_level))
570 goto out_gpte_changed; 570 goto out_gpte_changed;
571 571
572 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
573 goto out_gpte_changed;
574
572 for (shadow_walk_init(&it, vcpu, addr); 575 for (shadow_walk_init(&it, vcpu, addr);
573 shadow_walk_okay(&it) && it.level > gw->level; 576 shadow_walk_okay(&it) && it.level > gw->level;
574 shadow_walk_next(&it)) { 577 shadow_walk_next(&it)) {
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
820 */ 823 */
821 mmu_topup_memory_caches(vcpu); 824 mmu_topup_memory_caches(vcpu);
822 825
826 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
827 WARN_ON(1);
828 return;
829 }
830
823 spin_lock(&vcpu->kvm->mmu_lock); 831 spin_lock(&vcpu->kvm->mmu_lock);
824 for_each_shadow_entry(vcpu, gva, iterator) { 832 for_each_shadow_entry(vcpu, gva, iterator) {
825 level = iterator.level; 833 level = iterator.level;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c7168a5cff1b..e81df8fce027 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1671 mark_dirty(svm->vmcb, VMCB_ASID); 1671 mark_dirty(svm->vmcb, VMCB_ASID);
1672} 1672}
1673 1673
1674static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
1675{
1676 return to_svm(vcpu)->vmcb->save.dr6;
1677}
1678
1679static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1680{
1681 struct vcpu_svm *svm = to_svm(vcpu);
1682
1683 svm->vmcb->save.dr6 = value;
1684 mark_dirty(svm->vmcb, VMCB_DR);
1685}
1686
1674static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1687static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1675{ 1688{
1676 struct vcpu_svm *svm = to_svm(vcpu); 1689 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = {
4286 .set_idt = svm_set_idt, 4299 .set_idt = svm_set_idt,
4287 .get_gdt = svm_get_gdt, 4300 .get_gdt = svm_get_gdt,
4288 .set_gdt = svm_set_gdt, 4301 .set_gdt = svm_set_gdt,
4302 .get_dr6 = svm_get_dr6,
4303 .set_dr6 = svm_set_dr6,
4289 .set_dr7 = svm_set_dr7, 4304 .set_dr7 = svm_set_dr7,
4290 .cache_reg = svm_cache_reg, 4305 .cache_reg = svm_cache_reg,
4291 .get_rflags = svm_get_rflags, 4306 .get_rflags = svm_get_rflags,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index da7837e1349d..a06f101ef64b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,8 @@ struct vcpu_vmx {
418 u64 msr_host_kernel_gs_base; 418 u64 msr_host_kernel_gs_base;
419 u64 msr_guest_kernel_gs_base; 419 u64 msr_guest_kernel_gs_base;
420#endif 420#endif
421 u32 vm_entry_controls_shadow;
422 u32 vm_exit_controls_shadow;
421 /* 423 /*
422 * loaded_vmcs points to the VMCS currently used in this vcpu. For a 424 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
423 * non-nested (L1) guest, it always points to vmcs01. For a nested 425 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -1056,7 +1058,9 @@ static inline bool is_exception(u32 intr_info)
1056 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); 1058 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
1057} 1059}
1058 1060
1059static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); 1061static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1062 u32 exit_intr_info,
1063 unsigned long exit_qualification);
1060static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, 1064static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1061 struct vmcs12 *vmcs12, 1065 struct vmcs12 *vmcs12,
1062 u32 reason, unsigned long qualification); 1066 u32 reason, unsigned long qualification);
@@ -1326,6 +1330,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
1326 vmcs_writel(field, vmcs_readl(field) | mask); 1330 vmcs_writel(field, vmcs_readl(field) | mask);
1327} 1331}
1328 1332
1333static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
1334{
1335 vmcs_write32(VM_ENTRY_CONTROLS, val);
1336 vmx->vm_entry_controls_shadow = val;
1337}
1338
1339static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
1340{
1341 if (vmx->vm_entry_controls_shadow != val)
1342 vm_entry_controls_init(vmx, val);
1343}
1344
1345static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
1346{
1347 return vmx->vm_entry_controls_shadow;
1348}
1349
1350
1351static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1352{
1353 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
1354}
1355
1356static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1357{
1358 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
1359}
1360
1361static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
1362{
1363 vmcs_write32(VM_EXIT_CONTROLS, val);
1364 vmx->vm_exit_controls_shadow = val;
1365}
1366
1367static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
1368{
1369 if (vmx->vm_exit_controls_shadow != val)
1370 vm_exit_controls_init(vmx, val);
1371}
1372
1373static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
1374{
1375 return vmx->vm_exit_controls_shadow;
1376}
1377
1378
1379static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1380{
1381 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
1382}
1383
1384static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1385{
1386 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
1387}
1388
1329static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 1389static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
1330{ 1390{
1331 vmx->segment_cache.bitmask = 0; 1391 vmx->segment_cache.bitmask = 0;
@@ -1410,11 +1470,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1410 vmcs_write32(EXCEPTION_BITMAP, eb); 1470 vmcs_write32(EXCEPTION_BITMAP, eb);
1411} 1471}
1412 1472
1413static void clear_atomic_switch_msr_special(unsigned long entry, 1473static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1414 unsigned long exit) 1474 unsigned long entry, unsigned long exit)
1415{ 1475{
1416 vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); 1476 vm_entry_controls_clearbit(vmx, entry);
1417 vmcs_clear_bits(VM_EXIT_CONTROLS, exit); 1477 vm_exit_controls_clearbit(vmx, exit);
1418} 1478}
1419 1479
1420static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 1480static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
@@ -1425,14 +1485,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1425 switch (msr) { 1485 switch (msr) {
1426 case MSR_EFER: 1486 case MSR_EFER:
1427 if (cpu_has_load_ia32_efer) { 1487 if (cpu_has_load_ia32_efer) {
1428 clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1488 clear_atomic_switch_msr_special(vmx,
1489 VM_ENTRY_LOAD_IA32_EFER,
1429 VM_EXIT_LOAD_IA32_EFER); 1490 VM_EXIT_LOAD_IA32_EFER);
1430 return; 1491 return;
1431 } 1492 }
1432 break; 1493 break;
1433 case MSR_CORE_PERF_GLOBAL_CTRL: 1494 case MSR_CORE_PERF_GLOBAL_CTRL:
1434 if (cpu_has_load_perf_global_ctrl) { 1495 if (cpu_has_load_perf_global_ctrl) {
1435 clear_atomic_switch_msr_special( 1496 clear_atomic_switch_msr_special(vmx,
1436 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1497 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1437 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); 1498 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1438 return; 1499 return;
@@ -1453,14 +1514,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1453 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); 1514 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1454} 1515}
1455 1516
1456static void add_atomic_switch_msr_special(unsigned long entry, 1517static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1457 unsigned long exit, unsigned long guest_val_vmcs, 1518 unsigned long entry, unsigned long exit,
1458 unsigned long host_val_vmcs, u64 guest_val, u64 host_val) 1519 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1520 u64 guest_val, u64 host_val)
1459{ 1521{
1460 vmcs_write64(guest_val_vmcs, guest_val); 1522 vmcs_write64(guest_val_vmcs, guest_val);
1461 vmcs_write64(host_val_vmcs, host_val); 1523 vmcs_write64(host_val_vmcs, host_val);
1462 vmcs_set_bits(VM_ENTRY_CONTROLS, entry); 1524 vm_entry_controls_setbit(vmx, entry);
1463 vmcs_set_bits(VM_EXIT_CONTROLS, exit); 1525 vm_exit_controls_setbit(vmx, exit);
1464} 1526}
1465 1527
1466static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 1528static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
@@ -1472,7 +1534,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1472 switch (msr) { 1534 switch (msr) {
1473 case MSR_EFER: 1535 case MSR_EFER:
1474 if (cpu_has_load_ia32_efer) { 1536 if (cpu_has_load_ia32_efer) {
1475 add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1537 add_atomic_switch_msr_special(vmx,
1538 VM_ENTRY_LOAD_IA32_EFER,
1476 VM_EXIT_LOAD_IA32_EFER, 1539 VM_EXIT_LOAD_IA32_EFER,
1477 GUEST_IA32_EFER, 1540 GUEST_IA32_EFER,
1478 HOST_IA32_EFER, 1541 HOST_IA32_EFER,
@@ -1482,7 +1545,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1482 break; 1545 break;
1483 case MSR_CORE_PERF_GLOBAL_CTRL: 1546 case MSR_CORE_PERF_GLOBAL_CTRL:
1484 if (cpu_has_load_perf_global_ctrl) { 1547 if (cpu_has_load_perf_global_ctrl) {
1485 add_atomic_switch_msr_special( 1548 add_atomic_switch_msr_special(vmx,
1486 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1549 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1487 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, 1550 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1488 GUEST_IA32_PERF_GLOBAL_CTRL, 1551 GUEST_IA32_PERF_GLOBAL_CTRL,
@@ -1906,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906 if (!(vmcs12->exception_bitmap & (1u << nr))) 1969 if (!(vmcs12->exception_bitmap & (1u << nr)))
1907 return 0; 1970 return 0;
1908 1971
1909 nested_vmx_vmexit(vcpu); 1972 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
1973 vmcs_read32(VM_EXIT_INTR_INFO),
1974 vmcs_readl(EXIT_QUALIFICATION));
1910 return 1; 1975 return 1;
1911} 1976}
1912 1977
@@ -2279,6 +2344,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2279 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); 2344 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
2280 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | 2345 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
2281 VMX_MISC_SAVE_EFER_LMA; 2346 VMX_MISC_SAVE_EFER_LMA;
2347 nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
2282 nested_vmx_misc_high = 0; 2348 nested_vmx_misc_high = 0;
2283} 2349}
2284 2350
@@ -2295,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
2295 return low | ((u64)high << 32); 2361 return low | ((u64)high << 32);
2296} 2362}
2297 2363
2298/* 2364/* Returns 0 on success, non-0 otherwise. */
2299 * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
2300 * also let it use VMX-specific MSRs.
2301 * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
2302 * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
2303 * like all other MSRs).
2304 */
2305static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 2365static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2306{ 2366{
2307 if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
2308 msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
2309 /*
2310 * According to the spec, processors which do not support VMX
2311 * should throw a #GP(0) when VMX capability MSRs are read.
2312 */
2313 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
2314 return 1;
2315 }
2316
2317 switch (msr_index) { 2367 switch (msr_index) {
2318 case MSR_IA32_FEATURE_CONTROL:
2319 if (nested_vmx_allowed(vcpu)) {
2320 *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2321 break;
2322 }
2323 return 0;
2324 case MSR_IA32_VMX_BASIC: 2368 case MSR_IA32_VMX_BASIC:
2325 /* 2369 /*
2326 * This MSR reports some information about VMX support. We 2370 * This MSR reports some information about VMX support. We
@@ -2387,34 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2387 *pdata = nested_vmx_ept_caps; 2431 *pdata = nested_vmx_ept_caps;
2388 break; 2432 break;
2389 default: 2433 default:
2390 return 0;
2391 }
2392
2393 return 1;
2394}
2395
2396static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2397{
2398 u32 msr_index = msr_info->index;
2399 u64 data = msr_info->data;
2400 bool host_initialized = msr_info->host_initiated;
2401
2402 if (!nested_vmx_allowed(vcpu))
2403 return 0;
2404
2405 if (msr_index == MSR_IA32_FEATURE_CONTROL) {
2406 if (!host_initialized &&
2407 to_vmx(vcpu)->nested.msr_ia32_feature_control
2408 & FEATURE_CONTROL_LOCKED)
2409 return 0;
2410 to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
2411 return 1; 2434 return 1;
2412 } 2435 }
2413 2436
2414 /*
2415 * No need to treat VMX capability MSRs specially: If we don't handle
2416 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
2417 */
2418 return 0; 2437 return 0;
2419} 2438}
2420 2439
@@ -2460,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2460 case MSR_IA32_SYSENTER_ESP: 2479 case MSR_IA32_SYSENTER_ESP:
2461 data = vmcs_readl(GUEST_SYSENTER_ESP); 2480 data = vmcs_readl(GUEST_SYSENTER_ESP);
2462 break; 2481 break;
2482 case MSR_IA32_FEATURE_CONTROL:
2483 if (!nested_vmx_allowed(vcpu))
2484 return 1;
2485 data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2486 break;
2487 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2488 if (!nested_vmx_allowed(vcpu))
2489 return 1;
2490 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
2463 case MSR_TSC_AUX: 2491 case MSR_TSC_AUX:
2464 if (!to_vmx(vcpu)->rdtscp_enabled) 2492 if (!to_vmx(vcpu)->rdtscp_enabled)
2465 return 1; 2493 return 1;
2466 /* Otherwise falls through */ 2494 /* Otherwise falls through */
2467 default: 2495 default:
2468 if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
2469 return 0;
2470 msr = find_msr_entry(to_vmx(vcpu), msr_index); 2496 msr = find_msr_entry(to_vmx(vcpu), msr_index);
2471 if (msr) { 2497 if (msr) {
2472 data = msr->data; 2498 data = msr->data;
@@ -2479,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2479 return 0; 2505 return 0;
2480} 2506}
2481 2507
2508static void vmx_leave_nested(struct kvm_vcpu *vcpu);
2509
2482/* 2510/*
2483 * Writes msr value into into the appropriate "register". 2511 * Writes msr value into into the appropriate "register".
2484 * Returns 0 on success, non-0 otherwise. 2512 * Returns 0 on success, non-0 otherwise.
@@ -2533,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2533 case MSR_IA32_TSC_ADJUST: 2561 case MSR_IA32_TSC_ADJUST:
2534 ret = kvm_set_msr_common(vcpu, msr_info); 2562 ret = kvm_set_msr_common(vcpu, msr_info);
2535 break; 2563 break;
2564 case MSR_IA32_FEATURE_CONTROL:
2565 if (!nested_vmx_allowed(vcpu) ||
2566 (to_vmx(vcpu)->nested.msr_ia32_feature_control &
2567 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2568 return 1;
2569 vmx->nested.msr_ia32_feature_control = data;
2570 if (msr_info->host_initiated && data == 0)
2571 vmx_leave_nested(vcpu);
2572 break;
2573 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2574 return 1; /* they are read-only */
2536 case MSR_TSC_AUX: 2575 case MSR_TSC_AUX:
2537 if (!vmx->rdtscp_enabled) 2576 if (!vmx->rdtscp_enabled)
2538 return 1; 2577 return 1;
@@ -2541,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2541 return 1; 2580 return 1;
2542 /* Otherwise falls through */ 2581 /* Otherwise falls through */
2543 default: 2582 default:
2544 if (vmx_set_vmx_msr(vcpu, msr_info))
2545 break;
2546 msr = find_msr_entry(vmx, msr_index); 2583 msr = find_msr_entry(vmx, msr_index);
2547 if (msr) { 2584 if (msr) {
2548 msr->data = data; 2585 msr->data = data;
@@ -3182,14 +3219,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3182 vmx_load_host_state(to_vmx(vcpu)); 3219 vmx_load_host_state(to_vmx(vcpu));
3183 vcpu->arch.efer = efer; 3220 vcpu->arch.efer = efer;
3184 if (efer & EFER_LMA) { 3221 if (efer & EFER_LMA) {
3185 vmcs_write32(VM_ENTRY_CONTROLS, 3222 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3186 vmcs_read32(VM_ENTRY_CONTROLS) |
3187 VM_ENTRY_IA32E_MODE);
3188 msr->data = efer; 3223 msr->data = efer;
3189 } else { 3224 } else {
3190 vmcs_write32(VM_ENTRY_CONTROLS, 3225 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3191 vmcs_read32(VM_ENTRY_CONTROLS) &
3192 ~VM_ENTRY_IA32E_MODE);
3193 3226
3194 msr->data = efer & ~EFER_LME; 3227 msr->data = efer & ~EFER_LME;
3195 } 3228 }
@@ -3217,9 +3250,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
3217 3250
3218static void exit_lmode(struct kvm_vcpu *vcpu) 3251static void exit_lmode(struct kvm_vcpu *vcpu)
3219{ 3252{
3220 vmcs_write32(VM_ENTRY_CONTROLS, 3253 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3221 vmcs_read32(VM_ENTRY_CONTROLS)
3222 & ~VM_ENTRY_IA32E_MODE);
3223 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); 3254 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3224} 3255}
3225 3256
@@ -4346,10 +4377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4346 ++vmx->nmsrs; 4377 ++vmx->nmsrs;
4347 } 4378 }
4348 4379
4349 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 4380
4381 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
4350 4382
4351 /* 22.2.1, 20.8.1 */ 4383 /* 22.2.1, 20.8.1 */
4352 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 4384 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
4353 4385
4354 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 4386 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4355 set_cr4_guest_host_mask(vmx); 4387 set_cr4_guest_host_mask(vmx);
@@ -4360,7 +4392,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4360static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) 4392static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4361{ 4393{
4362 struct vcpu_vmx *vmx = to_vmx(vcpu); 4394 struct vcpu_vmx *vmx = to_vmx(vcpu);
4363 u64 msr; 4395 struct msr_data apic_base_msr;
4364 4396
4365 vmx->rmode.vm86_active = 0; 4397 vmx->rmode.vm86_active = 0;
4366 4398
@@ -4368,10 +4400,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4368 4400
4369 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 4401 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4370 kvm_set_cr8(&vmx->vcpu, 0); 4402 kvm_set_cr8(&vmx->vcpu, 0);
4371 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 4403 apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
4372 if (kvm_vcpu_is_bsp(&vmx->vcpu)) 4404 if (kvm_vcpu_is_bsp(&vmx->vcpu))
4373 msr |= MSR_IA32_APICBASE_BSP; 4405 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
4374 kvm_set_apic_base(&vmx->vcpu, msr); 4406 apic_base_msr.host_initiated = true;
4407 kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
4375 4408
4376 vmx_segment_cache_clear(vmx); 4409 vmx_segment_cache_clear(vmx);
4377 4410
@@ -4588,15 +4621,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4588static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4621static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4589{ 4622{
4590 if (is_guest_mode(vcpu)) { 4623 if (is_guest_mode(vcpu)) {
4591 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4592
4593 if (to_vmx(vcpu)->nested.nested_run_pending) 4624 if (to_vmx(vcpu)->nested.nested_run_pending)
4594 return 0; 4625 return 0;
4595 if (nested_exit_on_nmi(vcpu)) { 4626 if (nested_exit_on_nmi(vcpu)) {
4596 nested_vmx_vmexit(vcpu); 4627 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4597 vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; 4628 NMI_VECTOR | INTR_TYPE_NMI_INTR |
4598 vmcs12->vm_exit_intr_info = NMI_VECTOR | 4629 INTR_INFO_VALID_MASK, 0);
4599 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
4600 /* 4630 /*
4601 * The NMI-triggered VM exit counts as injection: 4631 * The NMI-triggered VM exit counts as injection:
4602 * clear this one and block further NMIs. 4632 * clear this one and block further NMIs.
@@ -4618,15 +4648,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4618static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4648static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4619{ 4649{
4620 if (is_guest_mode(vcpu)) { 4650 if (is_guest_mode(vcpu)) {
4621 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4622
4623 if (to_vmx(vcpu)->nested.nested_run_pending) 4651 if (to_vmx(vcpu)->nested.nested_run_pending)
4624 return 0; 4652 return 0;
4625 if (nested_exit_on_intr(vcpu)) { 4653 if (nested_exit_on_intr(vcpu)) {
4626 nested_vmx_vmexit(vcpu); 4654 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
4627 vmcs12->vm_exit_reason = 4655 0, 0);
4628 EXIT_REASON_EXTERNAL_INTERRUPT;
4629 vmcs12->vm_exit_intr_info = 0;
4630 /* 4656 /*
4631 * fall through to normal code, but now in L1, not L2 4657 * fall through to normal code, but now in L1, not L2
4632 */ 4658 */
@@ -4812,7 +4838,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4812 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4838 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4813 if (!(vcpu->guest_debug & 4839 if (!(vcpu->guest_debug &
4814 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4840 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4815 vcpu->arch.dr6 = dr6 | DR6_FIXED_1; 4841 vcpu->arch.dr6 &= ~15;
4842 vcpu->arch.dr6 |= dr6;
4816 kvm_queue_exception(vcpu, DB_VECTOR); 4843 kvm_queue_exception(vcpu, DB_VECTOR);
4817 return 1; 4844 return 1;
4818 } 4845 }
@@ -5080,14 +5107,27 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5080 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5107 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5081 if (exit_qualification & TYPE_MOV_FROM_DR) { 5108 if (exit_qualification & TYPE_MOV_FROM_DR) {
5082 unsigned long val; 5109 unsigned long val;
5083 if (!kvm_get_dr(vcpu, dr, &val)) 5110
5084 kvm_register_write(vcpu, reg, val); 5111 if (kvm_get_dr(vcpu, dr, &val))
5112 return 1;
5113 kvm_register_write(vcpu, reg, val);
5085 } else 5114 } else
5086 kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); 5115 if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
5116 return 1;
5117
5087 skip_emulated_instruction(vcpu); 5118 skip_emulated_instruction(vcpu);
5088 return 1; 5119 return 1;
5089} 5120}
5090 5121
5122static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
5123{
5124 return vcpu->arch.dr6;
5125}
5126
5127static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5128{
5129}
5130
5091static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5131static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5092{ 5132{
5093 vmcs_writel(GUEST_DR7, val); 5133 vmcs_writel(GUEST_DR7, val);
@@ -6460,11 +6500,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
6460 int size; 6500 int size;
6461 u8 b; 6501 u8 b;
6462 6502
6463 if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
6464 return 1;
6465
6466 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 6503 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
6467 return 0; 6504 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
6468 6505
6469 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6506 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6470 6507
@@ -6628,6 +6665,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6628 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 6665 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6629 u32 exit_reason = vmx->exit_reason; 6666 u32 exit_reason = vmx->exit_reason;
6630 6667
6668 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
6669 vmcs_readl(EXIT_QUALIFICATION),
6670 vmx->idt_vectoring_info,
6671 intr_info,
6672 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6673 KVM_ISA_VMX);
6674
6631 if (vmx->nested.nested_run_pending) 6675 if (vmx->nested.nested_run_pending)
6632 return 0; 6676 return 0;
6633 6677
@@ -6777,7 +6821,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6777 return handle_invalid_guest_state(vcpu); 6821 return handle_invalid_guest_state(vcpu);
6778 6822
6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6823 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6780 nested_vmx_vmexit(vcpu); 6824 nested_vmx_vmexit(vcpu, exit_reason,
6825 vmcs_read32(VM_EXIT_INTR_INFO),
6826 vmcs_readl(EXIT_QUALIFICATION));
6781 return 1; 6827 return 1;
6782 } 6828 }
6783 6829
@@ -7332,8 +7378,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7332 struct vcpu_vmx *vmx = to_vmx(vcpu); 7378 struct vcpu_vmx *vmx = to_vmx(vcpu);
7333 7379
7334 free_vpid(vmx); 7380 free_vpid(vmx);
7335 free_nested(vmx);
7336 free_loaded_vmcs(vmx->loaded_vmcs); 7381 free_loaded_vmcs(vmx->loaded_vmcs);
7382 free_nested(vmx);
7337 kfree(vmx->guest_msrs); 7383 kfree(vmx->guest_msrs);
7338 kvm_vcpu_uninit(vcpu); 7384 kvm_vcpu_uninit(vcpu);
7339 kmem_cache_free(kvm_vcpu_cache, vmx); 7385 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -7518,15 +7564,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7518static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, 7564static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
7519 struct x86_exception *fault) 7565 struct x86_exception *fault)
7520{ 7566{
7521 struct vmcs12 *vmcs12; 7567 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7522 nested_vmx_vmexit(vcpu); 7568 u32 exit_reason;
7523 vmcs12 = get_vmcs12(vcpu);
7524 7569
7525 if (fault->error_code & PFERR_RSVD_MASK) 7570 if (fault->error_code & PFERR_RSVD_MASK)
7526 vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 7571 exit_reason = EXIT_REASON_EPT_MISCONFIG;
7527 else 7572 else
7528 vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 7573 exit_reason = EXIT_REASON_EPT_VIOLATION;
7529 vmcs12->exit_qualification = vcpu->arch.exit_qualification; 7574 nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
7530 vmcs12->guest_physical_address = fault->address; 7575 vmcs12->guest_physical_address = fault->address;
7531} 7576}
7532 7577
@@ -7564,7 +7609,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7564 7609
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 7610 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) 7611 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu); 7612 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
7613 vmcs_read32(VM_EXIT_INTR_INFO),
7614 vmcs_readl(EXIT_QUALIFICATION));
7568 else 7615 else
7569 kvm_inject_page_fault(vcpu, fault); 7616 kvm_inject_page_fault(vcpu, fault);
7570} 7617}
@@ -7706,6 +7753,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 else 7753 else
7707 vmcs_write64(APIC_ACCESS_ADDR, 7754 vmcs_write64(APIC_ACCESS_ADDR,
7708 page_to_phys(vmx->nested.apic_access_page)); 7755 page_to_phys(vmx->nested.apic_access_page));
7756 } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
7757 exec_control |=
7758 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
7759 vmcs_write64(APIC_ACCESS_ADDR,
7760 page_to_phys(vcpu->kvm->arch.apic_access_page));
7709 } 7761 }
7710 7762
7711 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 7763 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -7759,12 +7811,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7759 exit_control = vmcs_config.vmexit_ctrl; 7811 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7812 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 7813 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control); 7814 vm_exit_controls_init(vmx, exit_control);
7763 7815
7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7816 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7765 * emulated by vmx_set_efer(), below. 7817 * emulated by vmx_set_efer(), below.
7766 */ 7818 */
7767 vmcs_write32(VM_ENTRY_CONTROLS, 7819 vm_entry_controls_init(vmx,
7768 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & 7820 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
7769 ~VM_ENTRY_IA32E_MODE) | 7821 ~VM_ENTRY_IA32E_MODE) |
7770 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); 7822 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -7882,7 +7934,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7882 return 1; 7934 return 1;
7883 } 7935 }
7884 7936
7885 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { 7937 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
7938 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
7886 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 7939 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
7887 return 1; 7940 return 1;
7888 } 7941 }
@@ -7994,8 +8047,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7994 8047
7995 enter_guest_mode(vcpu); 8048 enter_guest_mode(vcpu);
7996 8049
7997 vmx->nested.nested_run_pending = 1;
7998
7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 8050 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
8000 8051
8001 cpu = get_cpu(); 8052 cpu = get_cpu();
@@ -8011,6 +8062,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8011 8062
8012 prepare_vmcs02(vcpu, vmcs12); 8063 prepare_vmcs02(vcpu, vmcs12);
8013 8064
8065 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
8066 return kvm_emulate_halt(vcpu);
8067
8068 vmx->nested.nested_run_pending = 1;
8069
8014 /* 8070 /*
8015 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 8071 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
8016 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 8072 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -8110,7 +8166,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8110 * exit-information fields only. Other fields are modified by L1 with VMWRITE, 8166 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
8111 * which already writes to vmcs12 directly. 8167 * which already writes to vmcs12 directly.
8112 */ 8168 */
8113static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 8169static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8170 u32 exit_reason, u32 exit_intr_info,
8171 unsigned long exit_qualification)
8114{ 8172{
8115 /* update guest state fields: */ 8173 /* update guest state fields: */
8116 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 8174 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -8162,6 +8220,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8162 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 8220 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
8163 vmcs12->guest_pending_dbg_exceptions = 8221 vmcs12->guest_pending_dbg_exceptions =
8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8222 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8223 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
8224 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
8225 else
8226 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
8165 8227
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8228 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8229 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
@@ -8186,7 +8248,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8186 8248
8187 vmcs12->vm_entry_controls = 8249 vmcs12->vm_entry_controls =
8188 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 8250 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
8189 (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); 8251 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
8190 8252
8191 /* TODO: These cannot have changed unless we have MSR bitmaps and 8253 /* TODO: These cannot have changed unless we have MSR bitmaps and
8192 * the relevant bit asks not to trap the change */ 8254 * the relevant bit asks not to trap the change */
@@ -8201,10 +8263,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8201 8263
8202 /* update exit information fields: */ 8264 /* update exit information fields: */
8203 8265
8204 vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; 8266 vmcs12->vm_exit_reason = exit_reason;
8205 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 8267 vmcs12->exit_qualification = exit_qualification;
8206 8268
8207 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8269 vmcs12->vm_exit_intr_info = exit_intr_info;
8208 if ((vmcs12->vm_exit_intr_info & 8270 if ((vmcs12->vm_exit_intr_info &
8209 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == 8271 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
8210 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) 8272 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
@@ -8370,7 +8432,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8370 * and modify vmcs12 to make it see what it would expect to see there if 8432 * and modify vmcs12 to make it see what it would expect to see there if
8371 * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) 8433 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
8372 */ 8434 */
8373static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) 8435static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8436 u32 exit_intr_info,
8437 unsigned long exit_qualification)
8374{ 8438{
8375 struct vcpu_vmx *vmx = to_vmx(vcpu); 8439 struct vcpu_vmx *vmx = to_vmx(vcpu);
8376 int cpu; 8440 int cpu;
@@ -8380,7 +8444,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8380 WARN_ON_ONCE(vmx->nested.nested_run_pending); 8444 WARN_ON_ONCE(vmx->nested.nested_run_pending);
8381 8445
8382 leave_guest_mode(vcpu); 8446 leave_guest_mode(vcpu);
8383 prepare_vmcs12(vcpu, vmcs12); 8447 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
8448 exit_qualification);
8449
8450 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
8451 vmcs12->exit_qualification,
8452 vmcs12->idt_vectoring_info_field,
8453 vmcs12->vm_exit_intr_info,
8454 vmcs12->vm_exit_intr_error_code,
8455 KVM_ISA_VMX);
8384 8456
8385 cpu = get_cpu(); 8457 cpu = get_cpu();
8386 vmx->loaded_vmcs = &vmx->vmcs01; 8458 vmx->loaded_vmcs = &vmx->vmcs01;
@@ -8389,6 +8461,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8389 vcpu->cpu = cpu; 8461 vcpu->cpu = cpu;
8390 put_cpu(); 8462 put_cpu();
8391 8463
8464 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8465 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
8392 vmx_segment_cache_clear(vmx); 8466 vmx_segment_cache_clear(vmx);
8393 8467
8394 /* if no vmcs02 cache requested, remove the one we used */ 8468 /* if no vmcs02 cache requested, remove the one we used */
@@ -8424,6 +8498,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8424} 8498}
8425 8499
8426/* 8500/*
8501 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
8502 */
8503static void vmx_leave_nested(struct kvm_vcpu *vcpu)
8504{
8505 if (is_guest_mode(vcpu))
8506 nested_vmx_vmexit(vcpu, -1, 0, 0);
8507 free_nested(to_vmx(vcpu));
8508}
8509
8510/*
8427 * L1's failure to enter L2 is a subset of a normal exit, as explained in 8511 * L1's failure to enter L2 is a subset of a normal exit, as explained in
8428 * 23.7 "VM-entry failures during or after loading guest state" (this also 8512 * 23.7 "VM-entry failures during or after loading guest state" (this also
8429 * lists the acceptable exit-reason and exit-qualification parameters). 8513 * lists the acceptable exit-reason and exit-qualification parameters).
@@ -8486,6 +8570,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
8486 .set_idt = vmx_set_idt, 8570 .set_idt = vmx_set_idt,
8487 .get_gdt = vmx_get_gdt, 8571 .get_gdt = vmx_get_gdt,
8488 .set_gdt = vmx_set_gdt, 8572 .set_gdt = vmx_set_gdt,
8573 .get_dr6 = vmx_get_dr6,
8574 .set_dr6 = vmx_set_dr6,
8489 .set_dr7 = vmx_set_dr7, 8575 .set_dr7 = vmx_set_dr7,
8490 .cache_reg = vmx_cache_reg, 8576 .cache_reg = vmx_cache_reg,
8491 .get_rflags = vmx_get_rflags, 8577 .get_rflags = vmx_get_rflags,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d004da1e35d..39c28f09dfd5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
94static bool ignore_msrs = 0; 94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96 96
97unsigned int min_timer_period_us = 500;
98module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
99
97bool kvm_has_tsc_control; 100bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control); 101EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz; 102u32 kvm_max_guest_tsc_khz;
@@ -254,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
254} 257}
255EXPORT_SYMBOL_GPL(kvm_get_apic_base); 258EXPORT_SYMBOL_GPL(kvm_get_apic_base);
256 259
257void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) 260int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
258{ 261{
259 /* TODO: reserve bits check */ 262 u64 old_state = vcpu->arch.apic_base &
260 kvm_lapic_set_base(vcpu, data); 263 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
264 u64 new_state = msr_info->data &
265 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
266 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
267 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
268
269 if (!msr_info->host_initiated &&
270 ((msr_info->data & reserved_bits) != 0 ||
271 new_state == X2APIC_ENABLE ||
272 (new_state == MSR_IA32_APICBASE_ENABLE &&
273 old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
274 (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
275 old_state == 0)))
276 return 1;
277
278 kvm_lapic_set_base(vcpu, msr_info->data);
279 return 0;
261} 280}
262EXPORT_SYMBOL_GPL(kvm_set_apic_base); 281EXPORT_SYMBOL_GPL(kvm_set_apic_base);
263 282
@@ -719,6 +738,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
719} 738}
720EXPORT_SYMBOL_GPL(kvm_get_cr8); 739EXPORT_SYMBOL_GPL(kvm_get_cr8);
721 740
741static void kvm_update_dr6(struct kvm_vcpu *vcpu)
742{
743 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
744 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
745}
746
722static void kvm_update_dr7(struct kvm_vcpu *vcpu) 747static void kvm_update_dr7(struct kvm_vcpu *vcpu)
723{ 748{
724 unsigned long dr7; 749 unsigned long dr7;
@@ -747,6 +772,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
747 if (val & 0xffffffff00000000ULL) 772 if (val & 0xffffffff00000000ULL)
748 return -1; /* #GP */ 773 return -1; /* #GP */
749 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 774 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
775 kvm_update_dr6(vcpu);
750 break; 776 break;
751 case 5: 777 case 5:
752 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 778 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -788,7 +814,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
788 return 1; 814 return 1;
789 /* fall through */ 815 /* fall through */
790 case 6: 816 case 6:
791 *val = vcpu->arch.dr6; 817 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
818 *val = vcpu->arch.dr6;
819 else
820 *val = kvm_x86_ops->get_dr6(vcpu);
792 break; 821 break;
793 case 5: 822 case 5:
794 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 823 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -836,11 +865,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
836 * kvm-specific. Those are put in the beginning of the list. 865 * kvm-specific. Those are put in the beginning of the list.
837 */ 866 */
838 867
839#define KVM_SAVE_MSRS_BEGIN 10 868#define KVM_SAVE_MSRS_BEGIN 12
840static u32 msrs_to_save[] = { 869static u32 msrs_to_save[] = {
841 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 870 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
842 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 871 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
843 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 872 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
873 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
844 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 874 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
845 MSR_KVM_PV_EOI_EN, 875 MSR_KVM_PV_EOI_EN,
846 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 876 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1275,8 +1305,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1275 kvm->arch.last_tsc_write = data; 1305 kvm->arch.last_tsc_write = data;
1276 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; 1306 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1277 1307
1278 /* Reset of TSC must disable overshoot protection below */
1279 vcpu->arch.hv_clock.tsc_timestamp = 0;
1280 vcpu->arch.last_guest_tsc = data; 1308 vcpu->arch.last_guest_tsc = data;
1281 1309
1282 /* Keep track of which generation this VCPU has synchronized to */ 1310 /* Keep track of which generation this VCPU has synchronized to */
@@ -1484,7 +1512,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1484 unsigned long flags, this_tsc_khz; 1512 unsigned long flags, this_tsc_khz;
1485 struct kvm_vcpu_arch *vcpu = &v->arch; 1513 struct kvm_vcpu_arch *vcpu = &v->arch;
1486 struct kvm_arch *ka = &v->kvm->arch; 1514 struct kvm_arch *ka = &v->kvm->arch;
1487 s64 kernel_ns, max_kernel_ns; 1515 s64 kernel_ns;
1488 u64 tsc_timestamp, host_tsc; 1516 u64 tsc_timestamp, host_tsc;
1489 struct pvclock_vcpu_time_info guest_hv_clock; 1517 struct pvclock_vcpu_time_info guest_hv_clock;
1490 u8 pvclock_flags; 1518 u8 pvclock_flags;
@@ -1543,37 +1571,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1543 if (!vcpu->pv_time_enabled) 1571 if (!vcpu->pv_time_enabled)
1544 return 0; 1572 return 0;
1545 1573
1546 /*
1547 * Time as measured by the TSC may go backwards when resetting the base
1548 * tsc_timestamp. The reason for this is that the TSC resolution is
1549 * higher than the resolution of the other clock scales. Thus, many
1550 * possible measurments of the TSC correspond to one measurement of any
1551 * other clock, and so a spread of values is possible. This is not a
1552 * problem for the computation of the nanosecond clock; with TSC rates
1553 * around 1GHZ, there can only be a few cycles which correspond to one
1554 * nanosecond value, and any path through this code will inevitably
1555 * take longer than that. However, with the kernel_ns value itself,
1556 * the precision may be much lower, down to HZ granularity. If the
1557 * first sampling of TSC against kernel_ns ends in the low part of the
1558 * range, and the second in the high end of the range, we can get:
1559 *
1560 * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
1561 *
1562 * As the sampling errors potentially range in the thousands of cycles,
1563 * it is possible such a time value has already been observed by the
1564 * guest. To protect against this, we must compute the system time as
1565 * observed by the guest and ensure the new system time is greater.
1566 */
1567 max_kernel_ns = 0;
1568 if (vcpu->hv_clock.tsc_timestamp) {
1569 max_kernel_ns = vcpu->last_guest_tsc -
1570 vcpu->hv_clock.tsc_timestamp;
1571 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1572 vcpu->hv_clock.tsc_to_system_mul,
1573 vcpu->hv_clock.tsc_shift);
1574 max_kernel_ns += vcpu->last_kernel_ns;
1575 }
1576
1577 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { 1574 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1578 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, 1575 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1579 &vcpu->hv_clock.tsc_shift, 1576 &vcpu->hv_clock.tsc_shift,
@@ -1581,14 +1578,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1581 vcpu->hw_tsc_khz = this_tsc_khz; 1578 vcpu->hw_tsc_khz = this_tsc_khz;
1582 } 1579 }
1583 1580
1584 /* with a master <monotonic time, tsc value> tuple,
1585 * pvclock clock reads always increase at the (scaled) rate
1586 * of guest TSC - no need to deal with sampling errors.
1587 */
1588 if (!use_master_clock) {
1589 if (max_kernel_ns > kernel_ns)
1590 kernel_ns = max_kernel_ns;
1591 }
1592 /* With all the info we got, fill in the values */ 1581 /* With all the info we got, fill in the values */
1593 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1582 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1594 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1583 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1826,6 +1815,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
1826 switch (msr) { 1815 switch (msr) {
1827 case HV_X64_MSR_GUEST_OS_ID: 1816 case HV_X64_MSR_GUEST_OS_ID:
1828 case HV_X64_MSR_HYPERCALL: 1817 case HV_X64_MSR_HYPERCALL:
1818 case HV_X64_MSR_REFERENCE_TSC:
1819 case HV_X64_MSR_TIME_REF_COUNT:
1829 r = true; 1820 r = true;
1830 break; 1821 break;
1831 } 1822 }
@@ -1865,6 +1856,21 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1865 if (__copy_to_user((void __user *)addr, instructions, 4)) 1856 if (__copy_to_user((void __user *)addr, instructions, 4))
1866 return 1; 1857 return 1;
1867 kvm->arch.hv_hypercall = data; 1858 kvm->arch.hv_hypercall = data;
1859 mark_page_dirty(kvm, gfn);
1860 break;
1861 }
1862 case HV_X64_MSR_REFERENCE_TSC: {
1863 u64 gfn;
1864 HV_REFERENCE_TSC_PAGE tsc_ref;
1865 memset(&tsc_ref, 0, sizeof(tsc_ref));
1866 kvm->arch.hv_tsc_page = data;
1867 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1868 break;
1869 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1870 if (kvm_write_guest(kvm, data,
1871 &tsc_ref, sizeof(tsc_ref)))
1872 return 1;
1873 mark_page_dirty(kvm, gfn);
1868 break; 1874 break;
1869 } 1875 }
1870 default: 1876 default:
@@ -1879,19 +1885,21 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1879{ 1885{
1880 switch (msr) { 1886 switch (msr) {
1881 case HV_X64_MSR_APIC_ASSIST_PAGE: { 1887 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1888 u64 gfn;
1882 unsigned long addr; 1889 unsigned long addr;
1883 1890
1884 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { 1891 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1885 vcpu->arch.hv_vapic = data; 1892 vcpu->arch.hv_vapic = data;
1886 break; 1893 break;
1887 } 1894 }
1888 addr = gfn_to_hva(vcpu->kvm, data >> 1895 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
1889 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); 1896 addr = gfn_to_hva(vcpu->kvm, gfn);
1890 if (kvm_is_error_hva(addr)) 1897 if (kvm_is_error_hva(addr))
1891 return 1; 1898 return 1;
1892 if (__clear_user((void __user *)addr, PAGE_SIZE)) 1899 if (__clear_user((void __user *)addr, PAGE_SIZE))
1893 return 1; 1900 return 1;
1894 vcpu->arch.hv_vapic = data; 1901 vcpu->arch.hv_vapic = data;
1902 mark_page_dirty(vcpu->kvm, gfn);
1895 break; 1903 break;
1896 } 1904 }
1897 case HV_X64_MSR_EOI: 1905 case HV_X64_MSR_EOI:
@@ -2017,8 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2017 case 0x200 ... 0x2ff: 2025 case 0x200 ... 0x2ff:
2018 return set_msr_mtrr(vcpu, msr, data); 2026 return set_msr_mtrr(vcpu, msr, data);
2019 case MSR_IA32_APICBASE: 2027 case MSR_IA32_APICBASE:
2020 kvm_set_apic_base(vcpu, data); 2028 return kvm_set_apic_base(vcpu, msr_info);
2021 break;
2022 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: 2029 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2023 return kvm_x2apic_msr_write(vcpu, msr, data); 2030 return kvm_x2apic_msr_write(vcpu, msr, data);
2024 case MSR_IA32_TSCDEADLINE: 2031 case MSR_IA32_TSCDEADLINE:
@@ -2291,6 +2298,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2291 case HV_X64_MSR_HYPERCALL: 2298 case HV_X64_MSR_HYPERCALL:
2292 data = kvm->arch.hv_hypercall; 2299 data = kvm->arch.hv_hypercall;
2293 break; 2300 break;
2301 case HV_X64_MSR_TIME_REF_COUNT: {
2302 data =
2303 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2304 break;
2305 }
2306 case HV_X64_MSR_REFERENCE_TSC:
2307 data = kvm->arch.hv_tsc_page;
2308 break;
2294 default: 2309 default:
2295 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 2310 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2296 return 1; 2311 return 1;
@@ -2601,6 +2616,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2601 case KVM_CAP_GET_TSC_KHZ: 2616 case KVM_CAP_GET_TSC_KHZ:
2602 case KVM_CAP_KVMCLOCK_CTRL: 2617 case KVM_CAP_KVMCLOCK_CTRL:
2603 case KVM_CAP_READONLY_MEM: 2618 case KVM_CAP_READONLY_MEM:
2619 case KVM_CAP_HYPERV_TIME:
2604#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2620#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2605 case KVM_CAP_ASSIGN_DEV_IRQ: 2621 case KVM_CAP_ASSIGN_DEV_IRQ:
2606 case KVM_CAP_PCI_2_3: 2622 case KVM_CAP_PCI_2_3:
@@ -2972,8 +2988,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2972static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2988static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2973 struct kvm_debugregs *dbgregs) 2989 struct kvm_debugregs *dbgregs)
2974{ 2990{
2991 unsigned long val;
2992
2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2993 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2976 dbgregs->dr6 = vcpu->arch.dr6; 2994 _kvm_get_dr(vcpu, 6, &val);
2995 dbgregs->dr6 = val;
2977 dbgregs->dr7 = vcpu->arch.dr7; 2996 dbgregs->dr7 = vcpu->arch.dr7;
2978 dbgregs->flags = 0; 2997 dbgregs->flags = 0;
2979 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); 2998 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2987,7 +3006,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2987 3006
2988 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 3007 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2989 vcpu->arch.dr6 = dbgregs->dr6; 3008 vcpu->arch.dr6 = dbgregs->dr6;
3009 kvm_update_dr6(vcpu);
2990 vcpu->arch.dr7 = dbgregs->dr7; 3010 vcpu->arch.dr7 = dbgregs->dr7;
3011 kvm_update_dr7(vcpu);
2991 3012
2992 return 0; 3013 return 0;
2993} 3014}
@@ -5834,6 +5855,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5834 kvm_apic_update_tmr(vcpu, tmr); 5855 kvm_apic_update_tmr(vcpu, tmr);
5835} 5856}
5836 5857
5858/*
5859 * Returns 1 to let __vcpu_run() continue the guest execution loop without
5860 * exiting to the userspace. Otherwise, the value will be returned to the
5861 * userspace.
5862 */
5837static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5863static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5838{ 5864{
5839 int r; 5865 int r;
@@ -6089,7 +6115,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
6089 } 6115 }
6090 if (need_resched()) { 6116 if (need_resched()) {
6091 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 6117 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6092 kvm_resched(vcpu); 6118 cond_resched();
6093 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 6119 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6094 } 6120 }
6095 } 6121 }
@@ -6401,6 +6427,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
6401int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 6427int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6402 struct kvm_sregs *sregs) 6428 struct kvm_sregs *sregs)
6403{ 6429{
6430 struct msr_data apic_base_msr;
6404 int mmu_reset_needed = 0; 6431 int mmu_reset_needed = 0;
6405 int pending_vec, max_bits, idx; 6432 int pending_vec, max_bits, idx;
6406 struct desc_ptr dt; 6433 struct desc_ptr dt;
@@ -6424,7 +6451,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6424 6451
6425 mmu_reset_needed |= vcpu->arch.efer != sregs->efer; 6452 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6426 kvm_x86_ops->set_efer(vcpu, sregs->efer); 6453 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6427 kvm_set_apic_base(vcpu, sregs->apic_base); 6454 apic_base_msr.data = sregs->apic_base;
6455 apic_base_msr.host_initiated = true;
6456 kvm_set_apic_base(vcpu, &apic_base_msr);
6428 6457
6429 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; 6458 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6430 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 6459 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
@@ -6717,6 +6746,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6717 6746
6718 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 6747 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6719 vcpu->arch.dr6 = DR6_FIXED_1; 6748 vcpu->arch.dr6 = DR6_FIXED_1;
6749 kvm_update_dr6(vcpu);
6720 vcpu->arch.dr7 = DR7_FIXED_1; 6750 vcpu->arch.dr7 = DR7_FIXED_1;
6721 kvm_update_dr7(vcpu); 6751 kvm_update_dr7(vcpu);
6722 6752
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 587fb9ede436..8da5823bcde6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
126extern u64 host_xcr0; 126extern u64 host_xcr0;
127 127
128extern unsigned int min_timer_period_us;
129
128extern struct static_key kvm_no_apic_vcpu; 130extern struct static_key kvm_no_apic_vcpu;
129#endif 131#endif