diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 101 |
1 files changed, 55 insertions, 46 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d004da1e35d..0c76f7cfdb32 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
94 | static bool ignore_msrs = 0; | 94 | static bool ignore_msrs = 0; |
95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); | 95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); |
96 | 96 | ||
97 | unsigned int min_timer_period_us = 500; | ||
98 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
99 | |||
97 | bool kvm_has_tsc_control; | 100 | bool kvm_has_tsc_control; |
98 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 101 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
99 | u32 kvm_max_guest_tsc_khz; | 102 | u32 kvm_max_guest_tsc_khz; |
@@ -719,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
719 | } | 722 | } |
720 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 723 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
721 | 724 | ||
725 | static void kvm_update_dr6(struct kvm_vcpu *vcpu) | ||
726 | { | ||
727 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
728 | kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6); | ||
729 | } | ||
730 | |||
722 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) | 731 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) |
723 | { | 732 | { |
724 | unsigned long dr7; | 733 | unsigned long dr7; |
@@ -747,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
747 | if (val & 0xffffffff00000000ULL) | 756 | if (val & 0xffffffff00000000ULL) |
748 | return -1; /* #GP */ | 757 | return -1; /* #GP */ |
749 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 758 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
759 | kvm_update_dr6(vcpu); | ||
750 | break; | 760 | break; |
751 | case 5: | 761 | case 5: |
752 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 762 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -788,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
788 | return 1; | 798 | return 1; |
789 | /* fall through */ | 799 | /* fall through */ |
790 | case 6: | 800 | case 6: |
791 | *val = vcpu->arch.dr6; | 801 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
802 | *val = vcpu->arch.dr6; | ||
803 | else | ||
804 | *val = kvm_x86_ops->get_dr6(vcpu); | ||
792 | break; | 805 | break; |
793 | case 5: | 806 | case 5: |
794 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 807 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -836,11 +849,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); | |||
836 | * kvm-specific. Those are put in the beginning of the list. | 849 | * kvm-specific. Those are put in the beginning of the list. |
837 | */ | 850 | */ |
838 | 851 | ||
839 | #define KVM_SAVE_MSRS_BEGIN 10 | 852 | #define KVM_SAVE_MSRS_BEGIN 12 |
840 | static u32 msrs_to_save[] = { | 853 | static u32 msrs_to_save[] = { |
841 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 854 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
842 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 855 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
843 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 856 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
857 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | ||
844 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 858 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
845 | MSR_KVM_PV_EOI_EN, | 859 | MSR_KVM_PV_EOI_EN, |
846 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 860 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
@@ -1275,8 +1289,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1275 | kvm->arch.last_tsc_write = data; | 1289 | kvm->arch.last_tsc_write = data; |
1276 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; | 1290 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; |
1277 | 1291 | ||
1278 | /* Reset of TSC must disable overshoot protection below */ | ||
1279 | vcpu->arch.hv_clock.tsc_timestamp = 0; | ||
1280 | vcpu->arch.last_guest_tsc = data; | 1292 | vcpu->arch.last_guest_tsc = data; |
1281 | 1293 | ||
1282 | /* Keep track of which generation this VCPU has synchronized to */ | 1294 | /* Keep track of which generation this VCPU has synchronized to */ |
@@ -1484,7 +1496,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1484 | unsigned long flags, this_tsc_khz; | 1496 | unsigned long flags, this_tsc_khz; |
1485 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1497 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1486 | struct kvm_arch *ka = &v->kvm->arch; | 1498 | struct kvm_arch *ka = &v->kvm->arch; |
1487 | s64 kernel_ns, max_kernel_ns; | 1499 | s64 kernel_ns; |
1488 | u64 tsc_timestamp, host_tsc; | 1500 | u64 tsc_timestamp, host_tsc; |
1489 | struct pvclock_vcpu_time_info guest_hv_clock; | 1501 | struct pvclock_vcpu_time_info guest_hv_clock; |
1490 | u8 pvclock_flags; | 1502 | u8 pvclock_flags; |
@@ -1543,37 +1555,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1543 | if (!vcpu->pv_time_enabled) | 1555 | if (!vcpu->pv_time_enabled) |
1544 | return 0; | 1556 | return 0; |
1545 | 1557 | ||
1546 | /* | ||
1547 | * Time as measured by the TSC may go backwards when resetting the base | ||
1548 | * tsc_timestamp. The reason for this is that the TSC resolution is | ||
1549 | * higher than the resolution of the other clock scales. Thus, many | ||
1550 | * possible measurments of the TSC correspond to one measurement of any | ||
1551 | * other clock, and so a spread of values is possible. This is not a | ||
1552 | * problem for the computation of the nanosecond clock; with TSC rates | ||
1553 | * around 1GHZ, there can only be a few cycles which correspond to one | ||
1554 | * nanosecond value, and any path through this code will inevitably | ||
1555 | * take longer than that. However, with the kernel_ns value itself, | ||
1556 | * the precision may be much lower, down to HZ granularity. If the | ||
1557 | * first sampling of TSC against kernel_ns ends in the low part of the | ||
1558 | * range, and the second in the high end of the range, we can get: | ||
1559 | * | ||
1560 | * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new | ||
1561 | * | ||
1562 | * As the sampling errors potentially range in the thousands of cycles, | ||
1563 | * it is possible such a time value has already been observed by the | ||
1564 | * guest. To protect against this, we must compute the system time as | ||
1565 | * observed by the guest and ensure the new system time is greater. | ||
1566 | */ | ||
1567 | max_kernel_ns = 0; | ||
1568 | if (vcpu->hv_clock.tsc_timestamp) { | ||
1569 | max_kernel_ns = vcpu->last_guest_tsc - | ||
1570 | vcpu->hv_clock.tsc_timestamp; | ||
1571 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | ||
1572 | vcpu->hv_clock.tsc_to_system_mul, | ||
1573 | vcpu->hv_clock.tsc_shift); | ||
1574 | max_kernel_ns += vcpu->last_kernel_ns; | ||
1575 | } | ||
1576 | |||
1577 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | 1558 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { |
1578 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, | 1559 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, |
1579 | &vcpu->hv_clock.tsc_shift, | 1560 | &vcpu->hv_clock.tsc_shift, |
@@ -1581,14 +1562,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1581 | vcpu->hw_tsc_khz = this_tsc_khz; | 1562 | vcpu->hw_tsc_khz = this_tsc_khz; |
1582 | } | 1563 | } |
1583 | 1564 | ||
1584 | /* with a master <monotonic time, tsc value> tuple, | ||
1585 | * pvclock clock reads always increase at the (scaled) rate | ||
1586 | * of guest TSC - no need to deal with sampling errors. | ||
1587 | */ | ||
1588 | if (!use_master_clock) { | ||
1589 | if (max_kernel_ns > kernel_ns) | ||
1590 | kernel_ns = max_kernel_ns; | ||
1591 | } | ||
1592 | /* With all the info we got, fill in the values */ | 1565 | /* With all the info we got, fill in the values */ |
1593 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; | 1566 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; |
1594 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | 1567 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; |
@@ -1826,6 +1799,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) | |||
1826 | switch (msr) { | 1799 | switch (msr) { |
1827 | case HV_X64_MSR_GUEST_OS_ID: | 1800 | case HV_X64_MSR_GUEST_OS_ID: |
1828 | case HV_X64_MSR_HYPERCALL: | 1801 | case HV_X64_MSR_HYPERCALL: |
1802 | case HV_X64_MSR_REFERENCE_TSC: | ||
1803 | case HV_X64_MSR_TIME_REF_COUNT: | ||
1829 | r = true; | 1804 | r = true; |
1830 | break; | 1805 | break; |
1831 | } | 1806 | } |
@@ -1867,6 +1842,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1867 | kvm->arch.hv_hypercall = data; | 1842 | kvm->arch.hv_hypercall = data; |
1868 | break; | 1843 | break; |
1869 | } | 1844 | } |
1845 | case HV_X64_MSR_REFERENCE_TSC: { | ||
1846 | u64 gfn; | ||
1847 | HV_REFERENCE_TSC_PAGE tsc_ref; | ||
1848 | memset(&tsc_ref, 0, sizeof(tsc_ref)); | ||
1849 | kvm->arch.hv_tsc_page = data; | ||
1850 | if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) | ||
1851 | break; | ||
1852 | gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; | ||
1853 | if (kvm_write_guest(kvm, data, | ||
1854 | &tsc_ref, sizeof(tsc_ref))) | ||
1855 | return 1; | ||
1856 | mark_page_dirty(kvm, gfn); | ||
1857 | break; | ||
1858 | } | ||
1870 | default: | 1859 | default: |
1871 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | 1860 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " |
1872 | "data 0x%llx\n", msr, data); | 1861 | "data 0x%llx\n", msr, data); |
@@ -2291,6 +2280,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2291 | case HV_X64_MSR_HYPERCALL: | 2280 | case HV_X64_MSR_HYPERCALL: |
2292 | data = kvm->arch.hv_hypercall; | 2281 | data = kvm->arch.hv_hypercall; |
2293 | break; | 2282 | break; |
2283 | case HV_X64_MSR_TIME_REF_COUNT: { | ||
2284 | data = | ||
2285 | div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); | ||
2286 | break; | ||
2287 | } | ||
2288 | case HV_X64_MSR_REFERENCE_TSC: | ||
2289 | data = kvm->arch.hv_tsc_page; | ||
2290 | break; | ||
2294 | default: | 2291 | default: |
2295 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 2292 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
2296 | return 1; | 2293 | return 1; |
@@ -2604,6 +2601,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2604 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2601 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2605 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2602 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2606 | case KVM_CAP_PCI_2_3: | 2603 | case KVM_CAP_PCI_2_3: |
2604 | case KVM_CAP_HYPERV_TIME: | ||
2607 | #endif | 2605 | #endif |
2608 | r = 1; | 2606 | r = 1; |
2609 | break; | 2607 | break; |
@@ -2972,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2972 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | 2970 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, |
2973 | struct kvm_debugregs *dbgregs) | 2971 | struct kvm_debugregs *dbgregs) |
2974 | { | 2972 | { |
2973 | unsigned long val; | ||
2974 | |||
2975 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | 2975 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); |
2976 | dbgregs->dr6 = vcpu->arch.dr6; | 2976 | _kvm_get_dr(vcpu, 6, &val); |
2977 | dbgregs->dr6 = val; | ||
2977 | dbgregs->dr7 = vcpu->arch.dr7; | 2978 | dbgregs->dr7 = vcpu->arch.dr7; |
2978 | dbgregs->flags = 0; | 2979 | dbgregs->flags = 0; |
2979 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); | 2980 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); |
@@ -2987,7 +2988,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
2987 | 2988 | ||
2988 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 2989 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
2989 | vcpu->arch.dr6 = dbgregs->dr6; | 2990 | vcpu->arch.dr6 = dbgregs->dr6; |
2991 | kvm_update_dr6(vcpu); | ||
2990 | vcpu->arch.dr7 = dbgregs->dr7; | 2992 | vcpu->arch.dr7 = dbgregs->dr7; |
2993 | kvm_update_dr7(vcpu); | ||
2991 | 2994 | ||
2992 | return 0; | 2995 | return 0; |
2993 | } | 2996 | } |
@@ -5834,6 +5837,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
5834 | kvm_apic_update_tmr(vcpu, tmr); | 5837 | kvm_apic_update_tmr(vcpu, tmr); |
5835 | } | 5838 | } |
5836 | 5839 | ||
5840 | /* | ||
5841 | * Returns 1 to let __vcpu_run() continue the guest execution loop without | ||
5842 | * exiting to the userspace. Otherwise, the value will be returned to the | ||
5843 | * userspace. | ||
5844 | */ | ||
5837 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5845 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5838 | { | 5846 | { |
5839 | int r; | 5847 | int r; |
@@ -6089,7 +6097,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
6089 | } | 6097 | } |
6090 | if (need_resched()) { | 6098 | if (need_resched()) { |
6091 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6099 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
6092 | kvm_resched(vcpu); | 6100 | cond_resched(); |
6093 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6101 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6094 | } | 6102 | } |
6095 | } | 6103 | } |
@@ -6717,6 +6725,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6717 | 6725 | ||
6718 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 6726 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
6719 | vcpu->arch.dr6 = DR6_FIXED_1; | 6727 | vcpu->arch.dr6 = DR6_FIXED_1; |
6728 | kvm_update_dr6(vcpu); | ||
6720 | vcpu->arch.dr7 = DR7_FIXED_1; | 6729 | vcpu->arch.dr7 = DR7_FIXED_1; |
6721 | kvm_update_dr7(vcpu); | 6730 | kvm_update_dr7(vcpu); |
6722 | 6731 | ||