aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 11:02:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 11:02:38 -0500
commited9216c1717a3f3738a77908aff78995ea69e7ff (patch)
treec6b5ace7c333dabbf1d94074a13a98244bcdfb26 /arch/x86/kvm/x86.c
parentd7fc02c7bae7b1cf69269992cf880a43a350cdaa (diff)
parentd5696725b2a4c59503f5e0bc33adeee7f30cd45b (diff)
Merge branch 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (84 commits) KVM: VMX: Fix comparison of guest efer with stale host value KVM: s390: Fix prefix register checking in arch/s390/kvm/sigp.c KVM: Drop user return notifier when disabling virtualization on a cpu KVM: VMX: Disable unrestricted guest when EPT disabled KVM: x86 emulator: limit instructions to 15 bytes KVM: s390: Make psw available on all exits, not just a subset KVM: x86: Add KVM_GET/SET_VCPU_EVENTS KVM: VMX: Report unexpected simultaneous exceptions as internal errors KVM: Allow internal errors reported to userspace to carry extra data KVM: Reorder IOCTLs in main kvm.h KVM: x86: Polish exception injection via KVM_SET_GUEST_DEBUG KVM: only clear irq_source_id if irqchip is present KVM: x86: disallow KVM_{SET,GET}_LAPIC without allocated in-kernel lapic KVM: x86: disallow multiple KVM_CREATE_IRQCHIP KVM: VMX: Remove vmx->msr_offset_efer KVM: MMU: update invlpg handler comment KVM: VMX: move CR3/PDPTR update to vmx_set_cr3 KVM: remove duplicated task_switch check KVM: powerpc: Fix BUILD_BUG_ON condition KVM: VMX: Use shared msr infrastructure ... Trivial conflicts due to new Kconfig options in arch/Kconfig and kernel/Makefile
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c550
1 files changed, 441 insertions, 109 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4fc80174191c..9d068966fb2a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,6 +37,7 @@
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
40#include <linux/user-return-notifier.h>
40#include <trace/events/kvm.h> 41#include <trace/events/kvm.h>
41#undef TRACE_INCLUDE_FILE 42#undef TRACE_INCLUDE_FILE
42#define CREATE_TRACE_POINTS 43#define CREATE_TRACE_POINTS
@@ -88,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
88int ignore_msrs = 0; 89int ignore_msrs = 0;
89module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); 90module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
90 91
92#define KVM_NR_SHARED_MSRS 16
93
94struct kvm_shared_msrs_global {
95 int nr;
96 struct kvm_shared_msr {
97 u32 msr;
98 u64 value;
99 } msrs[KVM_NR_SHARED_MSRS];
100};
101
102struct kvm_shared_msrs {
103 struct user_return_notifier urn;
104 bool registered;
105 u64 current_value[KVM_NR_SHARED_MSRS];
106};
107
108static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
109static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
110
91struct kvm_stats_debugfs_item debugfs_entries[] = { 111struct kvm_stats_debugfs_item debugfs_entries[] = {
92 { "pf_fixed", VCPU_STAT(pf_fixed) }, 112 { "pf_fixed", VCPU_STAT(pf_fixed) },
93 { "pf_guest", VCPU_STAT(pf_guest) }, 113 { "pf_guest", VCPU_STAT(pf_guest) },
@@ -124,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
124 { NULL } 144 { NULL }
125}; 145};
126 146
147static void kvm_on_user_return(struct user_return_notifier *urn)
148{
149 unsigned slot;
150 struct kvm_shared_msr *global;
151 struct kvm_shared_msrs *locals
152 = container_of(urn, struct kvm_shared_msrs, urn);
153
154 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
155 global = &shared_msrs_global.msrs[slot];
156 if (global->value != locals->current_value[slot]) {
157 wrmsrl(global->msr, global->value);
158 locals->current_value[slot] = global->value;
159 }
160 }
161 locals->registered = false;
162 user_return_notifier_unregister(urn);
163}
164
165void kvm_define_shared_msr(unsigned slot, u32 msr)
166{
167 int cpu;
168 u64 value;
169
170 if (slot >= shared_msrs_global.nr)
171 shared_msrs_global.nr = slot + 1;
172 shared_msrs_global.msrs[slot].msr = msr;
173 rdmsrl_safe(msr, &value);
174 shared_msrs_global.msrs[slot].value = value;
175 for_each_online_cpu(cpu)
176 per_cpu(shared_msrs, cpu).current_value[slot] = value;
177}
178EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
179
180static void kvm_shared_msr_cpu_online(void)
181{
182 unsigned i;
183 struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
184
185 for (i = 0; i < shared_msrs_global.nr; ++i)
186 locals->current_value[i] = shared_msrs_global.msrs[i].value;
187}
188
189void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
190{
191 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
192
193 if (((value ^ smsr->current_value[slot]) & mask) == 0)
194 return;
195 smsr->current_value[slot] = value;
196 wrmsrl(shared_msrs_global.msrs[slot].msr, value);
197 if (!smsr->registered) {
198 smsr->urn.on_user_return = kvm_on_user_return;
199 user_return_notifier_register(&smsr->urn);
200 smsr->registered = true;
201 }
202}
203EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
204
205static void drop_user_return_notifiers(void *ignore)
206{
207 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
208
209 if (smsr->registered)
210 kvm_on_user_return(&smsr->urn);
211}
212
127unsigned long segment_base(u16 selector) 213unsigned long segment_base(u16 selector)
128{ 214{
129 struct descriptor_table gdt; 215 struct descriptor_table gdt;
@@ -485,16 +571,19 @@ static inline u32 bit(int bitno)
485 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 571 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
486 * 572 *
487 * This list is modified at module load time to reflect the 573 * This list is modified at module load time to reflect the
488 * capabilities of the host cpu. 574 * capabilities of the host cpu. This capabilities test skips MSRs that are
575 * kvm-specific. Those are put in the beginning of the list.
489 */ 576 */
577
578#define KVM_SAVE_MSRS_BEGIN 2
490static u32 msrs_to_save[] = { 579static u32 msrs_to_save[] = {
580 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
491 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 581 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
492 MSR_K6_STAR, 582 MSR_K6_STAR,
493#ifdef CONFIG_X86_64 583#ifdef CONFIG_X86_64
494 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 584 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
495#endif 585#endif
496 MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 586 MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
497 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
498}; 587};
499 588
500static unsigned num_msrs_to_save; 589static unsigned num_msrs_to_save;
@@ -678,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
678 /* With all the info we got, fill in the values */ 767 /* With all the info we got, fill in the values */
679 768
680 vcpu->hv_clock.system_time = ts.tv_nsec + 769 vcpu->hv_clock.system_time = ts.tv_nsec +
681 (NSEC_PER_SEC * (u64)ts.tv_sec); 770 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
771
682 /* 772 /*
683 * The interface expects us to write an even number signaling that the 773 * The interface expects us to write an even number signaling that the
684 * update is finished. Since the guest won't see the intermediate 774 * update is finished. Since the guest won't see the intermediate
@@ -836,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
836 return 0; 926 return 0;
837} 927}
838 928
929static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
930{
931 struct kvm *kvm = vcpu->kvm;
932 int lm = is_long_mode(vcpu);
933 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
934 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
935 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
936 : kvm->arch.xen_hvm_config.blob_size_32;
937 u32 page_num = data & ~PAGE_MASK;
938 u64 page_addr = data & PAGE_MASK;
939 u8 *page;
940 int r;
941
942 r = -E2BIG;
943 if (page_num >= blob_size)
944 goto out;
945 r = -ENOMEM;
946 page = kzalloc(PAGE_SIZE, GFP_KERNEL);
947 if (!page)
948 goto out;
949 r = -EFAULT;
950 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
951 goto out_free;
952 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
953 goto out_free;
954 r = 0;
955out_free:
956 kfree(page);
957out:
958 return r;
959}
960
839int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 961int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
840{ 962{
841 switch (msr) { 963 switch (msr) {
@@ -951,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
951 "0x%x data 0x%llx\n", msr, data); 1073 "0x%x data 0x%llx\n", msr, data);
952 break; 1074 break;
953 default: 1075 default:
1076 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1077 return xen_hvm_config(vcpu, data);
954 if (!ignore_msrs) { 1078 if (!ignore_msrs) {
955 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", 1079 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
956 msr, data); 1080 msr, data);
@@ -1225,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext)
1225 case KVM_CAP_PIT2: 1349 case KVM_CAP_PIT2:
1226 case KVM_CAP_PIT_STATE2: 1350 case KVM_CAP_PIT_STATE2:
1227 case KVM_CAP_SET_IDENTITY_MAP_ADDR: 1351 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
1352 case KVM_CAP_XEN_HVM:
1353 case KVM_CAP_ADJUST_CLOCK:
1354 case KVM_CAP_VCPU_EVENTS:
1228 r = 1; 1355 r = 1;
1229 break; 1356 break;
1230 case KVM_CAP_COALESCED_MMIO: 1357 case KVM_CAP_COALESCED_MMIO:
@@ -1239,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext)
1239 case KVM_CAP_NR_MEMSLOTS: 1366 case KVM_CAP_NR_MEMSLOTS:
1240 r = KVM_MEMORY_SLOTS; 1367 r = KVM_MEMORY_SLOTS;
1241 break; 1368 break;
1242 case KVM_CAP_PV_MMU: 1369 case KVM_CAP_PV_MMU: /* obsolete */
1243 r = !tdp_enabled; 1370 r = 0;
1244 break; 1371 break;
1245 case KVM_CAP_IOMMU: 1372 case KVM_CAP_IOMMU:
1246 r = iommu_found(); 1373 r = iommu_found();
@@ -1327,6 +1454,12 @@ out:
1327void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1454void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1328{ 1455{
1329 kvm_x86_ops->vcpu_load(vcpu, cpu); 1456 kvm_x86_ops->vcpu_load(vcpu, cpu);
1457 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1458 unsigned long khz = cpufreq_quick_get(cpu);
1459 if (!khz)
1460 khz = tsc_khz;
1461 per_cpu(cpu_tsc_khz, cpu) = khz;
1462 }
1330 kvm_request_guest_time_update(vcpu); 1463 kvm_request_guest_time_update(vcpu);
1331} 1464}
1332 1465
@@ -1760,6 +1893,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1760 return 0; 1893 return 0;
1761} 1894}
1762 1895
1896static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
1897 struct kvm_vcpu_events *events)
1898{
1899 vcpu_load(vcpu);
1900
1901 events->exception.injected = vcpu->arch.exception.pending;
1902 events->exception.nr = vcpu->arch.exception.nr;
1903 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
1904 events->exception.error_code = vcpu->arch.exception.error_code;
1905
1906 events->interrupt.injected = vcpu->arch.interrupt.pending;
1907 events->interrupt.nr = vcpu->arch.interrupt.nr;
1908 events->interrupt.soft = vcpu->arch.interrupt.soft;
1909
1910 events->nmi.injected = vcpu->arch.nmi_injected;
1911 events->nmi.pending = vcpu->arch.nmi_pending;
1912 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
1913
1914 events->sipi_vector = vcpu->arch.sipi_vector;
1915
1916 events->flags = 0;
1917
1918 vcpu_put(vcpu);
1919}
1920
1921static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
1922 struct kvm_vcpu_events *events)
1923{
1924 if (events->flags)
1925 return -EINVAL;
1926
1927 vcpu_load(vcpu);
1928
1929 vcpu->arch.exception.pending = events->exception.injected;
1930 vcpu->arch.exception.nr = events->exception.nr;
1931 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
1932 vcpu->arch.exception.error_code = events->exception.error_code;
1933
1934 vcpu->arch.interrupt.pending = events->interrupt.injected;
1935 vcpu->arch.interrupt.nr = events->interrupt.nr;
1936 vcpu->arch.interrupt.soft = events->interrupt.soft;
1937 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
1938 kvm_pic_clear_isr_ack(vcpu->kvm);
1939
1940 vcpu->arch.nmi_injected = events->nmi.injected;
1941 vcpu->arch.nmi_pending = events->nmi.pending;
1942 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
1943
1944 vcpu->arch.sipi_vector = events->sipi_vector;
1945
1946 vcpu_put(vcpu);
1947
1948 return 0;
1949}
1950
1763long kvm_arch_vcpu_ioctl(struct file *filp, 1951long kvm_arch_vcpu_ioctl(struct file *filp,
1764 unsigned int ioctl, unsigned long arg) 1952 unsigned int ioctl, unsigned long arg)
1765{ 1953{
@@ -1770,6 +1958,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1770 1958
1771 switch (ioctl) { 1959 switch (ioctl) {
1772 case KVM_GET_LAPIC: { 1960 case KVM_GET_LAPIC: {
1961 r = -EINVAL;
1962 if (!vcpu->arch.apic)
1963 goto out;
1773 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 1964 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1774 1965
1775 r = -ENOMEM; 1966 r = -ENOMEM;
@@ -1785,6 +1976,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1785 break; 1976 break;
1786 } 1977 }
1787 case KVM_SET_LAPIC: { 1978 case KVM_SET_LAPIC: {
1979 r = -EINVAL;
1980 if (!vcpu->arch.apic)
1981 goto out;
1788 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 1982 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1789 r = -ENOMEM; 1983 r = -ENOMEM;
1790 if (!lapic) 1984 if (!lapic)
@@ -1911,6 +2105,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1911 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2105 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
1912 break; 2106 break;
1913 } 2107 }
2108 case KVM_GET_VCPU_EVENTS: {
2109 struct kvm_vcpu_events events;
2110
2111 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
2112
2113 r = -EFAULT;
2114 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
2115 break;
2116 r = 0;
2117 break;
2118 }
2119 case KVM_SET_VCPU_EVENTS: {
2120 struct kvm_vcpu_events events;
2121
2122 r = -EFAULT;
2123 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
2124 break;
2125
2126 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2127 break;
2128 }
1914 default: 2129 default:
1915 r = -EINVAL; 2130 r = -EINVAL;
1916 } 2131 }
@@ -2039,9 +2254,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2039 sizeof(struct kvm_pic_state)); 2254 sizeof(struct kvm_pic_state));
2040 break; 2255 break;
2041 case KVM_IRQCHIP_IOAPIC: 2256 case KVM_IRQCHIP_IOAPIC:
2042 memcpy(&chip->chip.ioapic, 2257 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
2043 ioapic_irqchip(kvm),
2044 sizeof(struct kvm_ioapic_state));
2045 break; 2258 break;
2046 default: 2259 default:
2047 r = -EINVAL; 2260 r = -EINVAL;
@@ -2071,11 +2284,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2071 spin_unlock(&pic_irqchip(kvm)->lock); 2284 spin_unlock(&pic_irqchip(kvm)->lock);
2072 break; 2285 break;
2073 case KVM_IRQCHIP_IOAPIC: 2286 case KVM_IRQCHIP_IOAPIC:
2074 mutex_lock(&kvm->irq_lock); 2287 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
2075 memcpy(ioapic_irqchip(kvm),
2076 &chip->chip.ioapic,
2077 sizeof(struct kvm_ioapic_state));
2078 mutex_unlock(&kvm->irq_lock);
2079 break; 2288 break;
2080 default: 2289 default:
2081 r = -EINVAL; 2290 r = -EINVAL;
@@ -2183,7 +2392,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2183{ 2392{
2184 struct kvm *kvm = filp->private_data; 2393 struct kvm *kvm = filp->private_data;
2185 void __user *argp = (void __user *)arg; 2394 void __user *argp = (void __user *)arg;
2186 int r = -EINVAL; 2395 int r = -ENOTTY;
2187 /* 2396 /*
2188 * This union makes it completely explicit to gcc-3.x 2397 * This union makes it completely explicit to gcc-3.x
2189 * that these two variables' stack usage should be 2398 * that these two variables' stack usage should be
@@ -2245,25 +2454,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
2245 if (r) 2454 if (r)
2246 goto out; 2455 goto out;
2247 break; 2456 break;
2248 case KVM_CREATE_IRQCHIP: 2457 case KVM_CREATE_IRQCHIP: {
2458 struct kvm_pic *vpic;
2459
2460 mutex_lock(&kvm->lock);
2461 r = -EEXIST;
2462 if (kvm->arch.vpic)
2463 goto create_irqchip_unlock;
2249 r = -ENOMEM; 2464 r = -ENOMEM;
2250 kvm->arch.vpic = kvm_create_pic(kvm); 2465 vpic = kvm_create_pic(kvm);
2251 if (kvm->arch.vpic) { 2466 if (vpic) {
2252 r = kvm_ioapic_init(kvm); 2467 r = kvm_ioapic_init(kvm);
2253 if (r) { 2468 if (r) {
2254 kfree(kvm->arch.vpic); 2469 kfree(vpic);
2255 kvm->arch.vpic = NULL; 2470 goto create_irqchip_unlock;
2256 goto out;
2257 } 2471 }
2258 } else 2472 } else
2259 goto out; 2473 goto create_irqchip_unlock;
2474 smp_wmb();
2475 kvm->arch.vpic = vpic;
2476 smp_wmb();
2260 r = kvm_setup_default_irq_routing(kvm); 2477 r = kvm_setup_default_irq_routing(kvm);
2261 if (r) { 2478 if (r) {
2479 mutex_lock(&kvm->irq_lock);
2262 kfree(kvm->arch.vpic); 2480 kfree(kvm->arch.vpic);
2263 kfree(kvm->arch.vioapic); 2481 kfree(kvm->arch.vioapic);
2264 goto out; 2482 kvm->arch.vpic = NULL;
2483 kvm->arch.vioapic = NULL;
2484 mutex_unlock(&kvm->irq_lock);
2265 } 2485 }
2486 create_irqchip_unlock:
2487 mutex_unlock(&kvm->lock);
2266 break; 2488 break;
2489 }
2267 case KVM_CREATE_PIT: 2490 case KVM_CREATE_PIT:
2268 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; 2491 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
2269 goto create_pit; 2492 goto create_pit;
@@ -2293,10 +2516,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2293 goto out; 2516 goto out;
2294 if (irqchip_in_kernel(kvm)) { 2517 if (irqchip_in_kernel(kvm)) {
2295 __s32 status; 2518 __s32 status;
2296 mutex_lock(&kvm->irq_lock);
2297 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2519 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
2298 irq_event.irq, irq_event.level); 2520 irq_event.irq, irq_event.level);
2299 mutex_unlock(&kvm->irq_lock);
2300 if (ioctl == KVM_IRQ_LINE_STATUS) { 2521 if (ioctl == KVM_IRQ_LINE_STATUS) {
2301 irq_event.status = status; 2522 irq_event.status = status;
2302 if (copy_to_user(argp, &irq_event, 2523 if (copy_to_user(argp, &irq_event,
@@ -2422,6 +2643,55 @@ long kvm_arch_vm_ioctl(struct file *filp,
2422 r = 0; 2643 r = 0;
2423 break; 2644 break;
2424 } 2645 }
2646 case KVM_XEN_HVM_CONFIG: {
2647 r = -EFAULT;
2648 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
2649 sizeof(struct kvm_xen_hvm_config)))
2650 goto out;
2651 r = -EINVAL;
2652 if (kvm->arch.xen_hvm_config.flags)
2653 goto out;
2654 r = 0;
2655 break;
2656 }
2657 case KVM_SET_CLOCK: {
2658 struct timespec now;
2659 struct kvm_clock_data user_ns;
2660 u64 now_ns;
2661 s64 delta;
2662
2663 r = -EFAULT;
2664 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
2665 goto out;
2666
2667 r = -EINVAL;
2668 if (user_ns.flags)
2669 goto out;
2670
2671 r = 0;
2672 ktime_get_ts(&now);
2673 now_ns = timespec_to_ns(&now);
2674 delta = user_ns.clock - now_ns;
2675 kvm->arch.kvmclock_offset = delta;
2676 break;
2677 }
2678 case KVM_GET_CLOCK: {
2679 struct timespec now;
2680 struct kvm_clock_data user_ns;
2681 u64 now_ns;
2682
2683 ktime_get_ts(&now);
2684 now_ns = timespec_to_ns(&now);
2685 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
2686 user_ns.flags = 0;
2687
2688 r = -EFAULT;
2689 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
2690 goto out;
2691 r = 0;
2692 break;
2693 }
2694
2425 default: 2695 default:
2426 ; 2696 ;
2427 } 2697 }
@@ -2434,7 +2704,8 @@ static void kvm_init_msr_list(void)
2434 u32 dummy[2]; 2704 u32 dummy[2];
2435 unsigned i, j; 2705 unsigned i, j;
2436 2706
2437 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { 2707 /* skip the first msrs in the list. KVM-specific */
2708 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
2438 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 2709 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
2439 continue; 2710 continue;
2440 if (j < i) 2711 if (j < i)
@@ -2758,13 +3029,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
2758} 3029}
2759 3030
2760int emulate_instruction(struct kvm_vcpu *vcpu, 3031int emulate_instruction(struct kvm_vcpu *vcpu,
2761 struct kvm_run *run,
2762 unsigned long cr2, 3032 unsigned long cr2,
2763 u16 error_code, 3033 u16 error_code,
2764 int emulation_type) 3034 int emulation_type)
2765{ 3035{
2766 int r, shadow_mask; 3036 int r, shadow_mask;
2767 struct decode_cache *c; 3037 struct decode_cache *c;
3038 struct kvm_run *run = vcpu->run;
2768 3039
2769 kvm_clear_exception_queue(vcpu); 3040 kvm_clear_exception_queue(vcpu);
2770 vcpu->arch.mmio_fault_cr2 = cr2; 3041 vcpu->arch.mmio_fault_cr2 = cr2;
@@ -2784,7 +3055,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2784 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3055 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
2785 3056
2786 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3057 vcpu->arch.emulate_ctxt.vcpu = vcpu;
2787 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); 3058 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
2788 vcpu->arch.emulate_ctxt.mode = 3059 vcpu->arch.emulate_ctxt.mode =
2789 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3060 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
2790 ? X86EMUL_MODE_REAL : cs_l 3061 ? X86EMUL_MODE_REAL : cs_l
@@ -2862,7 +3133,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2862 return EMULATE_DO_MMIO; 3133 return EMULATE_DO_MMIO;
2863 } 3134 }
2864 3135
2865 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 3136 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
2866 3137
2867 if (vcpu->mmio_is_write) { 3138 if (vcpu->mmio_is_write) {
2868 vcpu->mmio_needed = 0; 3139 vcpu->mmio_needed = 0;
@@ -2970,8 +3241,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
2970 return r; 3241 return r;
2971} 3242}
2972 3243
2973int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 3244int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
2974 int size, unsigned port)
2975{ 3245{
2976 unsigned long val; 3246 unsigned long val;
2977 3247
@@ -3000,7 +3270,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
3000} 3270}
3001EXPORT_SYMBOL_GPL(kvm_emulate_pio); 3271EXPORT_SYMBOL_GPL(kvm_emulate_pio);
3002 3272
3003int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 3273int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3004 int size, unsigned long count, int down, 3274 int size, unsigned long count, int down,
3005 gva_t address, int rep, unsigned port) 3275 gva_t address, int rep, unsigned port)
3006{ 3276{
@@ -3073,9 +3343,6 @@ static void bounce_off(void *info)
3073 /* nothing */ 3343 /* nothing */
3074} 3344}
3075 3345
3076static unsigned int ref_freq;
3077static unsigned long tsc_khz_ref;
3078
3079static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 3346static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
3080 void *data) 3347 void *data)
3081{ 3348{
@@ -3084,14 +3351,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
3084 struct kvm_vcpu *vcpu; 3351 struct kvm_vcpu *vcpu;
3085 int i, send_ipi = 0; 3352 int i, send_ipi = 0;
3086 3353
3087 if (!ref_freq)
3088 ref_freq = freq->old;
3089
3090 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) 3354 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
3091 return 0; 3355 return 0;
3092 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) 3356 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
3093 return 0; 3357 return 0;
3094 per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 3358 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
3095 3359
3096 spin_lock(&kvm_lock); 3360 spin_lock(&kvm_lock);
3097 list_for_each_entry(kvm, &vm_list, vm_list) { 3361 list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3128,9 +3392,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
3128 .notifier_call = kvmclock_cpufreq_notifier 3392 .notifier_call = kvmclock_cpufreq_notifier
3129}; 3393};
3130 3394
3395static void kvm_timer_init(void)
3396{
3397 int cpu;
3398
3399 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3400 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
3401 CPUFREQ_TRANSITION_NOTIFIER);
3402 for_each_online_cpu(cpu) {
3403 unsigned long khz = cpufreq_get(cpu);
3404 if (!khz)
3405 khz = tsc_khz;
3406 per_cpu(cpu_tsc_khz, cpu) = khz;
3407 }
3408 } else {
3409 for_each_possible_cpu(cpu)
3410 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
3411 }
3412}
3413
3131int kvm_arch_init(void *opaque) 3414int kvm_arch_init(void *opaque)
3132{ 3415{
3133 int r, cpu; 3416 int r;
3134 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 3417 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
3135 3418
3136 if (kvm_x86_ops) { 3419 if (kvm_x86_ops) {
@@ -3162,13 +3445,7 @@ int kvm_arch_init(void *opaque)
3162 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 3445 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
3163 PT_DIRTY_MASK, PT64_NX_MASK, 0); 3446 PT_DIRTY_MASK, PT64_NX_MASK, 0);
3164 3447
3165 for_each_possible_cpu(cpu) 3448 kvm_timer_init();
3166 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
3167 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3168 tsc_khz_ref = tsc_khz;
3169 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
3170 CPUFREQ_TRANSITION_NOTIFIER);
3171 }
3172 3449
3173 return 0; 3450 return 0;
3174 3451
@@ -3296,7 +3573,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
3296 unsigned long *rflags) 3573 unsigned long *rflags)
3297{ 3574{
3298 kvm_lmsw(vcpu, msw); 3575 kvm_lmsw(vcpu, msw);
3299 *rflags = kvm_x86_ops->get_rflags(vcpu); 3576 *rflags = kvm_get_rflags(vcpu);
3300} 3577}
3301 3578
3302unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 3579unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
@@ -3334,7 +3611,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3334 switch (cr) { 3611 switch (cr) {
3335 case 0: 3612 case 0:
3336 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 3613 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
3337 *rflags = kvm_x86_ops->get_rflags(vcpu); 3614 *rflags = kvm_get_rflags(vcpu);
3338 break; 3615 break;
3339 case 2: 3616 case 2:
3340 vcpu->arch.cr2 = val; 3617 vcpu->arch.cr2 = val;
@@ -3454,18 +3731,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
3454 * 3731 *
3455 * No need to exit to userspace if we already have an interrupt queued. 3732 * No need to exit to userspace if we already have an interrupt queued.
3456 */ 3733 */
3457static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, 3734static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
3458 struct kvm_run *kvm_run)
3459{ 3735{
3460 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && 3736 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
3461 kvm_run->request_interrupt_window && 3737 vcpu->run->request_interrupt_window &&
3462 kvm_arch_interrupt_allowed(vcpu)); 3738 kvm_arch_interrupt_allowed(vcpu));
3463} 3739}
3464 3740
3465static void post_kvm_run_save(struct kvm_vcpu *vcpu, 3741static void post_kvm_run_save(struct kvm_vcpu *vcpu)
3466 struct kvm_run *kvm_run)
3467{ 3742{
3468 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; 3743 struct kvm_run *kvm_run = vcpu->run;
3744
3745 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
3469 kvm_run->cr8 = kvm_get_cr8(vcpu); 3746 kvm_run->cr8 = kvm_get_cr8(vcpu);
3470 kvm_run->apic_base = kvm_get_apic_base(vcpu); 3747 kvm_run->apic_base = kvm_get_apic_base(vcpu);
3471 if (irqchip_in_kernel(vcpu->kvm)) 3748 if (irqchip_in_kernel(vcpu->kvm))
@@ -3526,7 +3803,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
3526 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); 3803 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
3527} 3804}
3528 3805
3529static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3806static void inject_pending_event(struct kvm_vcpu *vcpu)
3530{ 3807{
3531 /* try to reinject previous events if any */ 3808 /* try to reinject previous events if any */
3532 if (vcpu->arch.exception.pending) { 3809 if (vcpu->arch.exception.pending) {
@@ -3562,11 +3839,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3562 } 3839 }
3563} 3840}
3564 3841
3565static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3842static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3566{ 3843{
3567 int r; 3844 int r;
3568 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 3845 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
3569 kvm_run->request_interrupt_window; 3846 vcpu->run->request_interrupt_window;
3570 3847
3571 if (vcpu->requests) 3848 if (vcpu->requests)
3572 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 3849 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3587,12 +3864,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3587 kvm_x86_ops->tlb_flush(vcpu); 3864 kvm_x86_ops->tlb_flush(vcpu);
3588 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 3865 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
3589 &vcpu->requests)) { 3866 &vcpu->requests)) {
3590 kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; 3867 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
3591 r = 0; 3868 r = 0;
3592 goto out; 3869 goto out;
3593 } 3870 }
3594 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { 3871 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
3595 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 3872 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
3596 r = 0; 3873 r = 0;
3597 goto out; 3874 goto out;
3598 } 3875 }
@@ -3616,7 +3893,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3616 goto out; 3893 goto out;
3617 } 3894 }
3618 3895
3619 inject_pending_event(vcpu, kvm_run); 3896 inject_pending_event(vcpu);
3620 3897
3621 /* enable NMI/IRQ window open exits if needed */ 3898 /* enable NMI/IRQ window open exits if needed */
3622 if (vcpu->arch.nmi_pending) 3899 if (vcpu->arch.nmi_pending)
@@ -3642,7 +3919,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3642 } 3919 }
3643 3920
3644 trace_kvm_entry(vcpu->vcpu_id); 3921 trace_kvm_entry(vcpu->vcpu_id);
3645 kvm_x86_ops->run(vcpu, kvm_run); 3922 kvm_x86_ops->run(vcpu);
3646 3923
3647 /* 3924 /*
3648 * If the guest has used debug registers, at least dr7 3925 * If the guest has used debug registers, at least dr7
@@ -3684,13 +3961,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3684 3961
3685 kvm_lapic_sync_from_vapic(vcpu); 3962 kvm_lapic_sync_from_vapic(vcpu);
3686 3963
3687 r = kvm_x86_ops->handle_exit(kvm_run, vcpu); 3964 r = kvm_x86_ops->handle_exit(vcpu);
3688out: 3965out:
3689 return r; 3966 return r;
3690} 3967}
3691 3968
3692 3969
3693static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3970static int __vcpu_run(struct kvm_vcpu *vcpu)
3694{ 3971{
3695 int r; 3972 int r;
3696 3973
@@ -3710,7 +3987,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3710 r = 1; 3987 r = 1;
3711 while (r > 0) { 3988 while (r > 0) {
3712 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 3989 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
3713 r = vcpu_enter_guest(vcpu, kvm_run); 3990 r = vcpu_enter_guest(vcpu);
3714 else { 3991 else {
3715 up_read(&vcpu->kvm->slots_lock); 3992 up_read(&vcpu->kvm->slots_lock);
3716 kvm_vcpu_block(vcpu); 3993 kvm_vcpu_block(vcpu);
@@ -3738,14 +4015,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3738 if (kvm_cpu_has_pending_timer(vcpu)) 4015 if (kvm_cpu_has_pending_timer(vcpu))
3739 kvm_inject_pending_timer_irqs(vcpu); 4016 kvm_inject_pending_timer_irqs(vcpu);
3740 4017
3741 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 4018 if (dm_request_for_irq_injection(vcpu)) {
3742 r = -EINTR; 4019 r = -EINTR;
3743 kvm_run->exit_reason = KVM_EXIT_INTR; 4020 vcpu->run->exit_reason = KVM_EXIT_INTR;
3744 ++vcpu->stat.request_irq_exits; 4021 ++vcpu->stat.request_irq_exits;
3745 } 4022 }
3746 if (signal_pending(current)) { 4023 if (signal_pending(current)) {
3747 r = -EINTR; 4024 r = -EINTR;
3748 kvm_run->exit_reason = KVM_EXIT_INTR; 4025 vcpu->run->exit_reason = KVM_EXIT_INTR;
3749 ++vcpu->stat.signal_exits; 4026 ++vcpu->stat.signal_exits;
3750 } 4027 }
3751 if (need_resched()) { 4028 if (need_resched()) {
@@ -3756,7 +4033,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3756 } 4033 }
3757 4034
3758 up_read(&vcpu->kvm->slots_lock); 4035 up_read(&vcpu->kvm->slots_lock);
3759 post_kvm_run_save(vcpu, kvm_run); 4036 post_kvm_run_save(vcpu);
3760 4037
3761 vapic_exit(vcpu); 4038 vapic_exit(vcpu);
3762 4039
@@ -3789,15 +4066,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3789 if (r) 4066 if (r)
3790 goto out; 4067 goto out;
3791 } 4068 }
3792#if CONFIG_HAS_IOMEM
3793 if (vcpu->mmio_needed) { 4069 if (vcpu->mmio_needed) {
3794 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 4070 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
3795 vcpu->mmio_read_completed = 1; 4071 vcpu->mmio_read_completed = 1;
3796 vcpu->mmio_needed = 0; 4072 vcpu->mmio_needed = 0;
3797 4073
3798 down_read(&vcpu->kvm->slots_lock); 4074 down_read(&vcpu->kvm->slots_lock);
3799 r = emulate_instruction(vcpu, kvm_run, 4075 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
3800 vcpu->arch.mmio_fault_cr2, 0,
3801 EMULTYPE_NO_DECODE); 4076 EMULTYPE_NO_DECODE);
3802 up_read(&vcpu->kvm->slots_lock); 4077 up_read(&vcpu->kvm->slots_lock);
3803 if (r == EMULATE_DO_MMIO) { 4078 if (r == EMULATE_DO_MMIO) {
@@ -3808,12 +4083,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3808 goto out; 4083 goto out;
3809 } 4084 }
3810 } 4085 }
3811#endif
3812 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) 4086 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
3813 kvm_register_write(vcpu, VCPU_REGS_RAX, 4087 kvm_register_write(vcpu, VCPU_REGS_RAX,
3814 kvm_run->hypercall.ret); 4088 kvm_run->hypercall.ret);
3815 4089
3816 r = __vcpu_run(vcpu, kvm_run); 4090 r = __vcpu_run(vcpu);
3817 4091
3818out: 4092out:
3819 if (vcpu->sigset_active) 4093 if (vcpu->sigset_active)
@@ -3847,13 +4121,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3847#endif 4121#endif
3848 4122
3849 regs->rip = kvm_rip_read(vcpu); 4123 regs->rip = kvm_rip_read(vcpu);
3850 regs->rflags = kvm_x86_ops->get_rflags(vcpu); 4124 regs->rflags = kvm_get_rflags(vcpu);
3851
3852 /*
3853 * Don't leak debug flags in case they were set for guest debugging
3854 */
3855 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3856 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
3857 4125
3858 vcpu_put(vcpu); 4126 vcpu_put(vcpu);
3859 4127
@@ -3881,12 +4149,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3881 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); 4149 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
3882 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); 4150 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
3883 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); 4151 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
3884
3885#endif 4152#endif
3886 4153
3887 kvm_rip_write(vcpu, regs->rip); 4154 kvm_rip_write(vcpu, regs->rip);
3888 kvm_x86_ops->set_rflags(vcpu, regs->rflags); 4155 kvm_set_rflags(vcpu, regs->rflags);
3889
3890 4156
3891 vcpu->arch.exception.pending = false; 4157 vcpu->arch.exception.pending = false;
3892 4158
@@ -4105,7 +4371,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4105{ 4371{
4106 return (seg != VCPU_SREG_LDTR) && 4372 return (seg != VCPU_SREG_LDTR) &&
4107 (seg != VCPU_SREG_TR) && 4373 (seg != VCPU_SREG_TR) &&
4108 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM); 4374 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4109} 4375}
4110 4376
4111int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4377int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
@@ -4133,7 +4399,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4133{ 4399{
4134 tss->cr3 = vcpu->arch.cr3; 4400 tss->cr3 = vcpu->arch.cr3;
4135 tss->eip = kvm_rip_read(vcpu); 4401 tss->eip = kvm_rip_read(vcpu);
4136 tss->eflags = kvm_x86_ops->get_rflags(vcpu); 4402 tss->eflags = kvm_get_rflags(vcpu);
4137 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4403 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4138 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4404 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4139 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4405 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4157,7 +4423,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4157 kvm_set_cr3(vcpu, tss->cr3); 4423 kvm_set_cr3(vcpu, tss->cr3);
4158 4424
4159 kvm_rip_write(vcpu, tss->eip); 4425 kvm_rip_write(vcpu, tss->eip);
4160 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); 4426 kvm_set_rflags(vcpu, tss->eflags | 2);
4161 4427
4162 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); 4428 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
4163 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); 4429 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
@@ -4195,7 +4461,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
4195 struct tss_segment_16 *tss) 4461 struct tss_segment_16 *tss)
4196{ 4462{
4197 tss->ip = kvm_rip_read(vcpu); 4463 tss->ip = kvm_rip_read(vcpu);
4198 tss->flag = kvm_x86_ops->get_rflags(vcpu); 4464 tss->flag = kvm_get_rflags(vcpu);
4199 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4465 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4200 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4466 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4201 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4467 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4210,14 +4476,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
4210 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 4476 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
4211 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); 4477 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
4212 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4478 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4213 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
4214} 4479}
4215 4480
4216static int load_state_from_tss16(struct kvm_vcpu *vcpu, 4481static int load_state_from_tss16(struct kvm_vcpu *vcpu,
4217 struct tss_segment_16 *tss) 4482 struct tss_segment_16 *tss)
4218{ 4483{
4219 kvm_rip_write(vcpu, tss->ip); 4484 kvm_rip_write(vcpu, tss->ip);
4220 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); 4485 kvm_set_rflags(vcpu, tss->flag | 2);
4221 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); 4486 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
4222 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); 4487 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
4223 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); 4488 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
@@ -4363,8 +4628,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4363 } 4628 }
4364 4629
4365 if (reason == TASK_SWITCH_IRET) { 4630 if (reason == TASK_SWITCH_IRET) {
4366 u32 eflags = kvm_x86_ops->get_rflags(vcpu); 4631 u32 eflags = kvm_get_rflags(vcpu);
4367 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); 4632 kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
4368 } 4633 }
4369 4634
4370 /* set back link to prev task only if NT bit is set in eflags 4635 /* set back link to prev task only if NT bit is set in eflags
@@ -4372,11 +4637,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4372 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) 4637 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
4373 old_tss_sel = 0xffff; 4638 old_tss_sel = 0xffff;
4374 4639
4375 /* set back link to prev task only if NT bit is set in eflags
4376 note that old_tss_sel is not used afetr this point */
4377 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
4378 old_tss_sel = 0xffff;
4379
4380 if (nseg_desc.type & 8) 4640 if (nseg_desc.type & 8)
4381 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, 4641 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
4382 old_tss_base, &nseg_desc); 4642 old_tss_base, &nseg_desc);
@@ -4385,8 +4645,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4385 old_tss_base, &nseg_desc); 4645 old_tss_base, &nseg_desc);
4386 4646
4387 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 4647 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
4388 u32 eflags = kvm_x86_ops->get_rflags(vcpu); 4648 u32 eflags = kvm_get_rflags(vcpu);
4389 kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); 4649 kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
4390 } 4650 }
4391 4651
4392 if (reason != TASK_SWITCH_IRET) { 4652 if (reason != TASK_SWITCH_IRET) {
@@ -4438,8 +4698,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4438 4698
4439 mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; 4699 mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
4440 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 4700 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
4441 if (!is_long_mode(vcpu) && is_pae(vcpu)) 4701 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
4442 load_pdptrs(vcpu, vcpu->arch.cr3); 4702 load_pdptrs(vcpu, vcpu->arch.cr3);
4703 mmu_reset_needed = 1;
4704 }
4443 4705
4444 if (mmu_reset_needed) 4706 if (mmu_reset_needed)
4445 kvm_mmu_reset_context(vcpu); 4707 kvm_mmu_reset_context(vcpu);
@@ -4480,12 +4742,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4480int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 4742int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4481 struct kvm_guest_debug *dbg) 4743 struct kvm_guest_debug *dbg)
4482{ 4744{
4745 unsigned long rflags;
4483 int i, r; 4746 int i, r;
4484 4747
4485 vcpu_load(vcpu); 4748 vcpu_load(vcpu);
4486 4749
4487 if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == 4750 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
4488 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { 4751 r = -EBUSY;
4752 if (vcpu->arch.exception.pending)
4753 goto unlock_out;
4754 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
4755 kvm_queue_exception(vcpu, DB_VECTOR);
4756 else
4757 kvm_queue_exception(vcpu, BP_VECTOR);
4758 }
4759
4760 /*
4761 * Read rflags as long as potentially injected trace flags are still
4762 * filtered out.
4763 */
4764 rflags = kvm_get_rflags(vcpu);
4765
4766 vcpu->guest_debug = dbg->control;
4767 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
4768 vcpu->guest_debug = 0;
4769
4770 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
4489 for (i = 0; i < KVM_NR_DB_REGS; ++i) 4771 for (i = 0; i < KVM_NR_DB_REGS; ++i)
4490 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; 4772 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
4491 vcpu->arch.switch_db_regs = 4773 vcpu->arch.switch_db_regs =
@@ -4496,13 +4778,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4496 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 4778 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
4497 } 4779 }
4498 4780
4499 r = kvm_x86_ops->set_guest_debug(vcpu, dbg); 4781 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
4782 vcpu->arch.singlestep_cs =
4783 get_segment_selector(vcpu, VCPU_SREG_CS);
4784 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
4785 }
4786
4787 /*
4788 * Trigger an rflags update that will inject or remove the trace
4789 * flags.
4790 */
4791 kvm_set_rflags(vcpu, rflags);
4792
4793 kvm_x86_ops->set_guest_debug(vcpu, dbg);
4500 4794
4501 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 4795 r = 0;
4502 kvm_queue_exception(vcpu, DB_VECTOR);
4503 else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
4504 kvm_queue_exception(vcpu, BP_VECTOR);
4505 4796
4797unlock_out:
4506 vcpu_put(vcpu); 4798 vcpu_put(vcpu);
4507 4799
4508 return r; 4800 return r;
@@ -4703,14 +4995,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
4703 return kvm_x86_ops->vcpu_reset(vcpu); 4995 return kvm_x86_ops->vcpu_reset(vcpu);
4704} 4996}
4705 4997
4706void kvm_arch_hardware_enable(void *garbage) 4998int kvm_arch_hardware_enable(void *garbage)
4707{ 4999{
4708 kvm_x86_ops->hardware_enable(garbage); 5000 /*
5001 * Since this may be called from a hotplug notifcation,
5002 * we can't get the CPU frequency directly.
5003 */
5004 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5005 int cpu = raw_smp_processor_id();
5006 per_cpu(cpu_tsc_khz, cpu) = 0;
5007 }
5008
5009 kvm_shared_msr_cpu_online();
5010
5011 return kvm_x86_ops->hardware_enable(garbage);
4709} 5012}
4710 5013
4711void kvm_arch_hardware_disable(void *garbage) 5014void kvm_arch_hardware_disable(void *garbage)
4712{ 5015{
4713 kvm_x86_ops->hardware_disable(garbage); 5016 kvm_x86_ops->hardware_disable(garbage);
5017 drop_user_return_notifiers(garbage);
4714} 5018}
4715 5019
4716int kvm_arch_hardware_setup(void) 5020int kvm_arch_hardware_setup(void)
@@ -4948,8 +5252,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
4948 return kvm_x86_ops->interrupt_allowed(vcpu); 5252 return kvm_x86_ops->interrupt_allowed(vcpu);
4949} 5253}
4950 5254
5255unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
5256{
5257 unsigned long rflags;
5258
5259 rflags = kvm_x86_ops->get_rflags(vcpu);
5260 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5261 rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
5262 return rflags;
5263}
5264EXPORT_SYMBOL_GPL(kvm_get_rflags);
5265
5266void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
5267{
5268 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
5269 vcpu->arch.singlestep_cs ==
5270 get_segment_selector(vcpu, VCPU_SREG_CS) &&
5271 vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
5272 rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
5273 kvm_x86_ops->set_rflags(vcpu, rflags);
5274}
5275EXPORT_SYMBOL_GPL(kvm_set_rflags);
5276
4951EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 5277EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
4952EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 5278EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
4953EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 5279EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
4954EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); 5280EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
4955EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); 5281EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
5282EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
5283EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
5284EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
5285EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
5286EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
5287EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);