aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c1599
1 files changed, 640 insertions, 959 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dd9bc8fb81ab..05d571f6f196 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,7 +42,7 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/perf_event.h> 43#include <linux/perf_event.h>
44#include <trace/events/kvm.h> 44#include <trace/events/kvm.h>
45#undef TRACE_INCLUDE_FILE 45
46#define CREATE_TRACE_POINTS 46#define CREATE_TRACE_POINTS
47#include "trace.h" 47#include "trace.h"
48 48
@@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore)
224 kvm_on_user_return(&smsr->urn); 224 kvm_on_user_return(&smsr->urn);
225} 225}
226 226
227unsigned long segment_base(u16 selector)
228{
229 struct descriptor_table gdt;
230 struct desc_struct *d;
231 unsigned long table_base;
232 unsigned long v;
233
234 if (selector == 0)
235 return 0;
236
237 kvm_get_gdt(&gdt);
238 table_base = gdt.base;
239
240 if (selector & 4) { /* from ldt */
241 u16 ldt_selector = kvm_read_ldt();
242
243 table_base = segment_base(ldt_selector);
244 }
245 d = (struct desc_struct *)(table_base + (selector & ~7));
246 v = get_desc_base(d);
247#ifdef CONFIG_X86_64
248 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
249 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
250#endif
251 return v;
252}
253EXPORT_SYMBOL_GPL(segment_base);
254
255u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) 227u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
256{ 228{
257 if (irqchip_in_kernel(vcpu->kvm)) 229 if (irqchip_in_kernel(vcpu->kvm))
@@ -293,7 +265,8 @@ static int exception_class(int vector)
293} 265}
294 266
295static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 267static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
296 unsigned nr, bool has_error, u32 error_code) 268 unsigned nr, bool has_error, u32 error_code,
269 bool reinject)
297{ 270{
298 u32 prev_nr; 271 u32 prev_nr;
299 int class1, class2; 272 int class1, class2;
@@ -304,6 +277,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
304 vcpu->arch.exception.has_error_code = has_error; 277 vcpu->arch.exception.has_error_code = has_error;
305 vcpu->arch.exception.nr = nr; 278 vcpu->arch.exception.nr = nr;
306 vcpu->arch.exception.error_code = error_code; 279 vcpu->arch.exception.error_code = error_code;
280 vcpu->arch.exception.reinject = reinject;
307 return; 281 return;
308 } 282 }
309 283
@@ -332,10 +306,16 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
332 306
333void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 307void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
334{ 308{
335 kvm_multiple_exception(vcpu, nr, false, 0); 309 kvm_multiple_exception(vcpu, nr, false, 0, false);
336} 310}
337EXPORT_SYMBOL_GPL(kvm_queue_exception); 311EXPORT_SYMBOL_GPL(kvm_queue_exception);
338 312
313void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
314{
315 kvm_multiple_exception(vcpu, nr, false, 0, true);
316}
317EXPORT_SYMBOL_GPL(kvm_requeue_exception);
318
339void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, 319void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
340 u32 error_code) 320 u32 error_code)
341{ 321{
@@ -352,10 +332,16 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
352 332
353void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 333void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
354{ 334{
355 kvm_multiple_exception(vcpu, nr, true, error_code); 335 kvm_multiple_exception(vcpu, nr, true, error_code, false);
356} 336}
357EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 337EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
358 338
339void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
340{
341 kvm_multiple_exception(vcpu, nr, true, error_code, true);
342}
343EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
344
359/* 345/*
360 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue 346 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue
361 * a #GP and return false. 347 * a #GP and return false.
@@ -476,7 +462,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
476 } 462 }
477 463
478 kvm_x86_ops->set_cr0(vcpu, cr0); 464 kvm_x86_ops->set_cr0(vcpu, cr0);
479 vcpu->arch.cr0 = cr0;
480 465
481 kvm_mmu_reset_context(vcpu); 466 kvm_mmu_reset_context(vcpu);
482 return; 467 return;
@@ -485,7 +470,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
485 470
486void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 471void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
487{ 472{
488 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); 473 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
489} 474}
490EXPORT_SYMBOL_GPL(kvm_lmsw); 475EXPORT_SYMBOL_GPL(kvm_lmsw);
491 476
@@ -517,7 +502,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
517 } 502 }
518 kvm_x86_ops->set_cr4(vcpu, cr4); 503 kvm_x86_ops->set_cr4(vcpu, cr4);
519 vcpu->arch.cr4 = cr4; 504 vcpu->arch.cr4 = cr4;
520 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
521 kvm_mmu_reset_context(vcpu); 505 kvm_mmu_reset_context(vcpu);
522} 506}
523EXPORT_SYMBOL_GPL(kvm_set_cr4); 507EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -592,6 +576,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
592} 576}
593EXPORT_SYMBOL_GPL(kvm_get_cr8); 577EXPORT_SYMBOL_GPL(kvm_get_cr8);
594 578
579int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
580{
581 switch (dr) {
582 case 0 ... 3:
583 vcpu->arch.db[dr] = val;
584 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
585 vcpu->arch.eff_db[dr] = val;
586 break;
587 case 4:
588 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
589 kvm_queue_exception(vcpu, UD_VECTOR);
590 return 1;
591 }
592 /* fall through */
593 case 6:
594 if (val & 0xffffffff00000000ULL) {
595 kvm_inject_gp(vcpu, 0);
596 return 1;
597 }
598 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
599 break;
600 case 5:
601 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
602 kvm_queue_exception(vcpu, UD_VECTOR);
603 return 1;
604 }
605 /* fall through */
606 default: /* 7 */
607 if (val & 0xffffffff00000000ULL) {
608 kvm_inject_gp(vcpu, 0);
609 return 1;
610 }
611 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
612 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
613 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
614 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
615 }
616 break;
617 }
618
619 return 0;
620}
621EXPORT_SYMBOL_GPL(kvm_set_dr);
622
623int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
624{
625 switch (dr) {
626 case 0 ... 3:
627 *val = vcpu->arch.db[dr];
628 break;
629 case 4:
630 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
631 kvm_queue_exception(vcpu, UD_VECTOR);
632 return 1;
633 }
634 /* fall through */
635 case 6:
636 *val = vcpu->arch.dr6;
637 break;
638 case 5:
639 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
640 kvm_queue_exception(vcpu, UD_VECTOR);
641 return 1;
642 }
643 /* fall through */
644 default: /* 7 */
645 *val = vcpu->arch.dr7;
646 break;
647 }
648
649 return 0;
650}
651EXPORT_SYMBOL_GPL(kvm_get_dr);
652
595static inline u32 bit(int bitno) 653static inline u32 bit(int bitno)
596{ 654{
597 return 1 << (bitno & 31); 655 return 1 << (bitno & 31);
@@ -606,9 +664,10 @@ static inline u32 bit(int bitno)
606 * kvm-specific. Those are put in the beginning of the list. 664 * kvm-specific. Those are put in the beginning of the list.
607 */ 665 */
608 666
609#define KVM_SAVE_MSRS_BEGIN 5 667#define KVM_SAVE_MSRS_BEGIN 7
610static u32 msrs_to_save[] = { 668static u32 msrs_to_save[] = {
611 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 669 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
670 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
612 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 671 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
613 HV_X64_MSR_APIC_ASSIST_PAGE, 672 HV_X64_MSR_APIC_ASSIST_PAGE,
614 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 673 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -625,48 +684,42 @@ static u32 emulated_msrs[] = {
625 MSR_IA32_MISC_ENABLE, 684 MSR_IA32_MISC_ENABLE,
626}; 685};
627 686
628static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 687static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
629{ 688{
630 if (efer & efer_reserved_bits) { 689 if (efer & efer_reserved_bits)
631 kvm_inject_gp(vcpu, 0); 690 return 1;
632 return;
633 }
634 691
635 if (is_paging(vcpu) 692 if (is_paging(vcpu)
636 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 693 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
637 kvm_inject_gp(vcpu, 0); 694 return 1;
638 return;
639 }
640 695
641 if (efer & EFER_FFXSR) { 696 if (efer & EFER_FFXSR) {
642 struct kvm_cpuid_entry2 *feat; 697 struct kvm_cpuid_entry2 *feat;
643 698
644 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 699 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
645 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 700 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
646 kvm_inject_gp(vcpu, 0); 701 return 1;
647 return;
648 }
649 } 702 }
650 703
651 if (efer & EFER_SVME) { 704 if (efer & EFER_SVME) {
652 struct kvm_cpuid_entry2 *feat; 705 struct kvm_cpuid_entry2 *feat;
653 706
654 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 707 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
655 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 708 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
656 kvm_inject_gp(vcpu, 0); 709 return 1;
657 return;
658 }
659 } 710 }
660 711
661 kvm_x86_ops->set_efer(vcpu, efer);
662
663 efer &= ~EFER_LMA; 712 efer &= ~EFER_LMA;
664 efer |= vcpu->arch.efer & EFER_LMA; 713 efer |= vcpu->arch.efer & EFER_LMA;
665 714
715 kvm_x86_ops->set_efer(vcpu, efer);
716
666 vcpu->arch.efer = efer; 717 vcpu->arch.efer = efer;
667 718
668 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 719 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
669 kvm_mmu_reset_context(vcpu); 720 kvm_mmu_reset_context(vcpu);
721
722 return 0;
670} 723}
671 724
672void kvm_enable_efer_bits(u64 mask) 725void kvm_enable_efer_bits(u64 mask)
@@ -696,14 +749,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
696 749
697static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 750static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
698{ 751{
699 static int version; 752 int version;
753 int r;
700 struct pvclock_wall_clock wc; 754 struct pvclock_wall_clock wc;
701 struct timespec boot; 755 struct timespec boot;
702 756
703 if (!wall_clock) 757 if (!wall_clock)
704 return; 758 return;
705 759
706 version++; 760 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
761 if (r)
762 return;
763
764 if (version & 1)
765 ++version; /* first time write, random junk */
766
767 ++version;
707 768
708 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 769 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
709 770
@@ -796,6 +857,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
796 vcpu->hv_clock.system_time = ts.tv_nsec + 857 vcpu->hv_clock.system_time = ts.tv_nsec +
797 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; 858 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
798 859
860 vcpu->hv_clock.flags = 0;
861
799 /* 862 /*
800 * The interface expects us to write an even number signaling that the 863 * The interface expects us to write an even number signaling that the
801 * update is finished. Since the guest won't see the intermediate 864 * update is finished. Since the guest won't see the intermediate
@@ -1087,10 +1150,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1087{ 1150{
1088 switch (msr) { 1151 switch (msr) {
1089 case MSR_EFER: 1152 case MSR_EFER:
1090 set_efer(vcpu, data); 1153 return set_efer(vcpu, data);
1091 break;
1092 case MSR_K7_HWCR: 1154 case MSR_K7_HWCR:
1093 data &= ~(u64)0x40; /* ignore flush filter disable */ 1155 data &= ~(u64)0x40; /* ignore flush filter disable */
1156 data &= ~(u64)0x100; /* ignore ignne emulation enable */
1094 if (data != 0) { 1157 if (data != 0) {
1095 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1158 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1096 data); 1159 data);
@@ -1133,10 +1196,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1133 case MSR_IA32_MISC_ENABLE: 1196 case MSR_IA32_MISC_ENABLE:
1134 vcpu->arch.ia32_misc_enable_msr = data; 1197 vcpu->arch.ia32_misc_enable_msr = data;
1135 break; 1198 break;
1199 case MSR_KVM_WALL_CLOCK_NEW:
1136 case MSR_KVM_WALL_CLOCK: 1200 case MSR_KVM_WALL_CLOCK:
1137 vcpu->kvm->arch.wall_clock = data; 1201 vcpu->kvm->arch.wall_clock = data;
1138 kvm_write_wall_clock(vcpu->kvm, data); 1202 kvm_write_wall_clock(vcpu->kvm, data);
1139 break; 1203 break;
1204 case MSR_KVM_SYSTEM_TIME_NEW:
1140 case MSR_KVM_SYSTEM_TIME: { 1205 case MSR_KVM_SYSTEM_TIME: {
1141 if (vcpu->arch.time_page) { 1206 if (vcpu->arch.time_page) {
1142 kvm_release_page_dirty(vcpu->arch.time_page); 1207 kvm_release_page_dirty(vcpu->arch.time_page);
@@ -1408,9 +1473,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1408 data = vcpu->arch.efer; 1473 data = vcpu->arch.efer;
1409 break; 1474 break;
1410 case MSR_KVM_WALL_CLOCK: 1475 case MSR_KVM_WALL_CLOCK:
1476 case MSR_KVM_WALL_CLOCK_NEW:
1411 data = vcpu->kvm->arch.wall_clock; 1477 data = vcpu->kvm->arch.wall_clock;
1412 break; 1478 break;
1413 case MSR_KVM_SYSTEM_TIME: 1479 case MSR_KVM_SYSTEM_TIME:
1480 case MSR_KVM_SYSTEM_TIME_NEW:
1414 data = vcpu->arch.time; 1481 data = vcpu->arch.time;
1415 break; 1482 break;
1416 case MSR_IA32_P5_MC_ADDR: 1483 case MSR_IA32_P5_MC_ADDR:
@@ -1549,6 +1616,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1549 case KVM_CAP_HYPERV_VAPIC: 1616 case KVM_CAP_HYPERV_VAPIC:
1550 case KVM_CAP_HYPERV_SPIN: 1617 case KVM_CAP_HYPERV_SPIN:
1551 case KVM_CAP_PCI_SEGMENT: 1618 case KVM_CAP_PCI_SEGMENT:
1619 case KVM_CAP_DEBUGREGS:
1552 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1620 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1553 r = 1; 1621 r = 1;
1554 break; 1622 break;
@@ -1769,6 +1837,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1769{ 1837{
1770 int r; 1838 int r;
1771 1839
1840 vcpu_load(vcpu);
1772 r = -E2BIG; 1841 r = -E2BIG;
1773 if (cpuid->nent < vcpu->arch.cpuid_nent) 1842 if (cpuid->nent < vcpu->arch.cpuid_nent)
1774 goto out; 1843 goto out;
@@ -1780,6 +1849,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1780 1849
1781out: 1850out:
1782 cpuid->nent = vcpu->arch.cpuid_nent; 1851 cpuid->nent = vcpu->arch.cpuid_nent;
1852 vcpu_put(vcpu);
1783 return r; 1853 return r;
1784} 1854}
1785 1855
@@ -1910,6 +1980,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1910 } 1980 }
1911 break; 1981 break;
1912 } 1982 }
1983 case KVM_CPUID_SIGNATURE: {
1984 char signature[12] = "KVMKVMKVM\0\0";
1985 u32 *sigptr = (u32 *)signature;
1986 entry->eax = 0;
1987 entry->ebx = sigptr[0];
1988 entry->ecx = sigptr[1];
1989 entry->edx = sigptr[2];
1990 break;
1991 }
1992 case KVM_CPUID_FEATURES:
1993 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
1994 (1 << KVM_FEATURE_NOP_IO_DELAY) |
1995 (1 << KVM_FEATURE_CLOCKSOURCE2) |
1996 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
1997 entry->ebx = 0;
1998 entry->ecx = 0;
1999 entry->edx = 0;
2000 break;
1913 case 0x80000000: 2001 case 0x80000000:
1914 entry->eax = min(entry->eax, 0x8000001a); 2002 entry->eax = min(entry->eax, 0x8000001a);
1915 break; 2003 break;
@@ -1918,6 +2006,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1918 entry->ecx &= kvm_supported_word6_x86_features; 2006 entry->ecx &= kvm_supported_word6_x86_features;
1919 break; 2007 break;
1920 } 2008 }
2009
2010 kvm_x86_ops->set_supported_cpuid(function, entry);
2011
1921 put_cpu(); 2012 put_cpu();
1922} 2013}
1923 2014
@@ -1953,6 +2044,23 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1953 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 2044 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1954 do_cpuid_ent(&cpuid_entries[nent], func, 0, 2045 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1955 &nent, cpuid->nent); 2046 &nent, cpuid->nent);
2047
2048
2049
2050 r = -E2BIG;
2051 if (nent >= cpuid->nent)
2052 goto out_free;
2053
2054 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2055 cpuid->nent);
2056
2057 r = -E2BIG;
2058 if (nent >= cpuid->nent)
2059 goto out_free;
2060
2061 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
2062 cpuid->nent);
2063
1956 r = -E2BIG; 2064 r = -E2BIG;
1957 if (nent >= cpuid->nent) 2065 if (nent >= cpuid->nent)
1958 goto out_free; 2066 goto out_free;
@@ -2032,6 +2140,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2032 int r; 2140 int r;
2033 unsigned bank_num = mcg_cap & 0xff, bank; 2141 unsigned bank_num = mcg_cap & 0xff, bank;
2034 2142
2143 vcpu_load(vcpu);
2035 r = -EINVAL; 2144 r = -EINVAL;
2036 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2145 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2037 goto out; 2146 goto out;
@@ -2046,6 +2155,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2046 for (bank = 0; bank < bank_num; bank++) 2155 for (bank = 0; bank < bank_num; bank++)
2047 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2156 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2048out: 2157out:
2158 vcpu_put(vcpu);
2049 return r; 2159 return r;
2050} 2160}
2051 2161
@@ -2105,14 +2215,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2105{ 2215{
2106 vcpu_load(vcpu); 2216 vcpu_load(vcpu);
2107 2217
2108 events->exception.injected = vcpu->arch.exception.pending; 2218 events->exception.injected =
2219 vcpu->arch.exception.pending &&
2220 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2109 events->exception.nr = vcpu->arch.exception.nr; 2221 events->exception.nr = vcpu->arch.exception.nr;
2110 events->exception.has_error_code = vcpu->arch.exception.has_error_code; 2222 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2111 events->exception.error_code = vcpu->arch.exception.error_code; 2223 events->exception.error_code = vcpu->arch.exception.error_code;
2112 2224
2113 events->interrupt.injected = vcpu->arch.interrupt.pending; 2225 events->interrupt.injected =
2226 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2114 events->interrupt.nr = vcpu->arch.interrupt.nr; 2227 events->interrupt.nr = vcpu->arch.interrupt.nr;
2115 events->interrupt.soft = vcpu->arch.interrupt.soft; 2228 events->interrupt.soft = 0;
2229 events->interrupt.shadow =
2230 kvm_x86_ops->get_interrupt_shadow(vcpu,
2231 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2116 2232
2117 events->nmi.injected = vcpu->arch.nmi_injected; 2233 events->nmi.injected = vcpu->arch.nmi_injected;
2118 events->nmi.pending = vcpu->arch.nmi_pending; 2234 events->nmi.pending = vcpu->arch.nmi_pending;
@@ -2121,7 +2237,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2121 events->sipi_vector = vcpu->arch.sipi_vector; 2237 events->sipi_vector = vcpu->arch.sipi_vector;
2122 2238
2123 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2239 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2124 | KVM_VCPUEVENT_VALID_SIPI_VECTOR); 2240 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2241 | KVM_VCPUEVENT_VALID_SHADOW);
2125 2242
2126 vcpu_put(vcpu); 2243 vcpu_put(vcpu);
2127} 2244}
@@ -2130,7 +2247,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2130 struct kvm_vcpu_events *events) 2247 struct kvm_vcpu_events *events)
2131{ 2248{
2132 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING 2249 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2133 | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) 2250 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2251 | KVM_VCPUEVENT_VALID_SHADOW))
2134 return -EINVAL; 2252 return -EINVAL;
2135 2253
2136 vcpu_load(vcpu); 2254 vcpu_load(vcpu);
@@ -2145,6 +2263,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2145 vcpu->arch.interrupt.soft = events->interrupt.soft; 2263 vcpu->arch.interrupt.soft = events->interrupt.soft;
2146 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) 2264 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2147 kvm_pic_clear_isr_ack(vcpu->kvm); 2265 kvm_pic_clear_isr_ack(vcpu->kvm);
2266 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2267 kvm_x86_ops->set_interrupt_shadow(vcpu,
2268 events->interrupt.shadow);
2148 2269
2149 vcpu->arch.nmi_injected = events->nmi.injected; 2270 vcpu->arch.nmi_injected = events->nmi.injected;
2150 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) 2271 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
@@ -2159,6 +2280,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2159 return 0; 2280 return 0;
2160} 2281}
2161 2282
2283static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2284 struct kvm_debugregs *dbgregs)
2285{
2286 vcpu_load(vcpu);
2287
2288 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2289 dbgregs->dr6 = vcpu->arch.dr6;
2290 dbgregs->dr7 = vcpu->arch.dr7;
2291 dbgregs->flags = 0;
2292
2293 vcpu_put(vcpu);
2294}
2295
2296static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2297 struct kvm_debugregs *dbgregs)
2298{
2299 if (dbgregs->flags)
2300 return -EINVAL;
2301
2302 vcpu_load(vcpu);
2303
2304 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2305 vcpu->arch.dr6 = dbgregs->dr6;
2306 vcpu->arch.dr7 = dbgregs->dr7;
2307
2308 vcpu_put(vcpu);
2309
2310 return 0;
2311}
2312
2162long kvm_arch_vcpu_ioctl(struct file *filp, 2313long kvm_arch_vcpu_ioctl(struct file *filp,
2163 unsigned int ioctl, unsigned long arg) 2314 unsigned int ioctl, unsigned long arg)
2164{ 2315{
@@ -2313,7 +2464,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2313 r = -EFAULT; 2464 r = -EFAULT;
2314 if (copy_from_user(&mce, argp, sizeof mce)) 2465 if (copy_from_user(&mce, argp, sizeof mce))
2315 goto out; 2466 goto out;
2467 vcpu_load(vcpu);
2316 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2468 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
2469 vcpu_put(vcpu);
2317 break; 2470 break;
2318 } 2471 }
2319 case KVM_GET_VCPU_EVENTS: { 2472 case KVM_GET_VCPU_EVENTS: {
@@ -2337,6 +2490,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2337 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); 2490 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2338 break; 2491 break;
2339 } 2492 }
2493 case KVM_GET_DEBUGREGS: {
2494 struct kvm_debugregs dbgregs;
2495
2496 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
2497
2498 r = -EFAULT;
2499 if (copy_to_user(argp, &dbgregs,
2500 sizeof(struct kvm_debugregs)))
2501 break;
2502 r = 0;
2503 break;
2504 }
2505 case KVM_SET_DEBUGREGS: {
2506 struct kvm_debugregs dbgregs;
2507
2508 r = -EFAULT;
2509 if (copy_from_user(&dbgregs, argp,
2510 sizeof(struct kvm_debugregs)))
2511 break;
2512
2513 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
2514 break;
2515 }
2340 default: 2516 default:
2341 r = -EINVAL; 2517 r = -EINVAL;
2342 } 2518 }
@@ -2390,7 +2566,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2390 struct kvm_mem_alias *alias; 2566 struct kvm_mem_alias *alias;
2391 struct kvm_mem_aliases *aliases; 2567 struct kvm_mem_aliases *aliases;
2392 2568
2393 aliases = rcu_dereference(kvm->arch.aliases); 2569 aliases = kvm_aliases(kvm);
2394 2570
2395 for (i = 0; i < aliases->naliases; ++i) { 2571 for (i = 0; i < aliases->naliases; ++i) {
2396 alias = &aliases->aliases[i]; 2572 alias = &aliases->aliases[i];
@@ -2409,7 +2585,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2409 struct kvm_mem_alias *alias; 2585 struct kvm_mem_alias *alias;
2410 struct kvm_mem_aliases *aliases; 2586 struct kvm_mem_aliases *aliases;
2411 2587
2412 aliases = rcu_dereference(kvm->arch.aliases); 2588 aliases = kvm_aliases(kvm);
2413 2589
2414 for (i = 0; i < aliases->naliases; ++i) { 2590 for (i = 0; i < aliases->naliases; ++i) {
2415 alias = &aliases->aliases[i]; 2591 alias = &aliases->aliases[i];
@@ -2804,11 +2980,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
2804 r = -EFAULT; 2980 r = -EFAULT;
2805 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 2981 if (copy_from_user(&irq_event, argp, sizeof irq_event))
2806 goto out; 2982 goto out;
2983 r = -ENXIO;
2807 if (irqchip_in_kernel(kvm)) { 2984 if (irqchip_in_kernel(kvm)) {
2808 __s32 status; 2985 __s32 status;
2809 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2986 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
2810 irq_event.irq, irq_event.level); 2987 irq_event.irq, irq_event.level);
2811 if (ioctl == KVM_IRQ_LINE_STATUS) { 2988 if (ioctl == KVM_IRQ_LINE_STATUS) {
2989 r = -EFAULT;
2812 irq_event.status = status; 2990 irq_event.status = status;
2813 if (copy_to_user(argp, &irq_event, 2991 if (copy_to_user(argp, &irq_event,
2814 sizeof irq_event)) 2992 sizeof irq_event))
@@ -3024,6 +3202,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3024 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3202 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3025} 3203}
3026 3204
3205static void kvm_set_segment(struct kvm_vcpu *vcpu,
3206 struct kvm_segment *var, int seg)
3207{
3208 kvm_x86_ops->set_segment(vcpu, var, seg);
3209}
3210
3211void kvm_get_segment(struct kvm_vcpu *vcpu,
3212 struct kvm_segment *var, int seg)
3213{
3214 kvm_x86_ops->get_segment(vcpu, var, seg);
3215}
3216
3027gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3217gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3028{ 3218{
3029 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3219 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
@@ -3104,14 +3294,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3104 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 3294 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3105} 3295}
3106 3296
3107static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3297static int kvm_write_guest_virt_system(gva_t addr, void *val,
3108 struct kvm_vcpu *vcpu, u32 *error) 3298 unsigned int bytes,
3299 struct kvm_vcpu *vcpu,
3300 u32 *error)
3109{ 3301{
3110 void *data = val; 3302 void *data = val;
3111 int r = X86EMUL_CONTINUE; 3303 int r = X86EMUL_CONTINUE;
3112 3304
3113 while (bytes) { 3305 while (bytes) {
3114 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); 3306 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr,
3307 PFERR_WRITE_MASK, error);
3115 unsigned offset = addr & (PAGE_SIZE-1); 3308 unsigned offset = addr & (PAGE_SIZE-1);
3116 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3309 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3117 int ret; 3310 int ret;
@@ -3134,7 +3327,6 @@ out:
3134 return r; 3327 return r;
3135} 3328}
3136 3329
3137
3138static int emulator_read_emulated(unsigned long addr, 3330static int emulator_read_emulated(unsigned long addr,
3139 void *val, 3331 void *val,
3140 unsigned int bytes, 3332 unsigned int bytes,
@@ -3237,9 +3429,9 @@ mmio:
3237} 3429}
3238 3430
3239int emulator_write_emulated(unsigned long addr, 3431int emulator_write_emulated(unsigned long addr,
3240 const void *val, 3432 const void *val,
3241 unsigned int bytes, 3433 unsigned int bytes,
3242 struct kvm_vcpu *vcpu) 3434 struct kvm_vcpu *vcpu)
3243{ 3435{
3244 /* Crossing a page boundary? */ 3436 /* Crossing a page boundary? */
3245 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3437 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
@@ -3257,45 +3449,150 @@ int emulator_write_emulated(unsigned long addr,
3257} 3449}
3258EXPORT_SYMBOL_GPL(emulator_write_emulated); 3450EXPORT_SYMBOL_GPL(emulator_write_emulated);
3259 3451
3452#define CMPXCHG_TYPE(t, ptr, old, new) \
3453 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
3454
3455#ifdef CONFIG_X86_64
3456# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
3457#else
3458# define CMPXCHG64(ptr, old, new) \
3459 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3460#endif
3461
3260static int emulator_cmpxchg_emulated(unsigned long addr, 3462static int emulator_cmpxchg_emulated(unsigned long addr,
3261 const void *old, 3463 const void *old,
3262 const void *new, 3464 const void *new,
3263 unsigned int bytes, 3465 unsigned int bytes,
3264 struct kvm_vcpu *vcpu) 3466 struct kvm_vcpu *vcpu)
3265{ 3467{
3266 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3468 gpa_t gpa;
3267#ifndef CONFIG_X86_64 3469 struct page *page;
3268 /* guests cmpxchg8b have to be emulated atomically */ 3470 char *kaddr;
3269 if (bytes == 8) { 3471 bool exchanged;
3270 gpa_t gpa;
3271 struct page *page;
3272 char *kaddr;
3273 u64 val;
3274 3472
3275 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3473 /* guests cmpxchg8b have to be emulated atomically */
3474 if (bytes > 8 || (bytes & (bytes - 1)))
3475 goto emul_write;
3276 3476
3277 if (gpa == UNMAPPED_GVA || 3477 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
3278 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3279 goto emul_write;
3280 3478
3281 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) 3479 if (gpa == UNMAPPED_GVA ||
3282 goto emul_write; 3480 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3481 goto emul_write;
3283 3482
3284 val = *(u64 *)new; 3483 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3484 goto emul_write;
3285 3485
3286 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3486 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3287 3487
3288 kaddr = kmap_atomic(page, KM_USER0); 3488 kaddr = kmap_atomic(page, KM_USER0);
3289 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 3489 kaddr += offset_in_page(gpa);
3290 kunmap_atomic(kaddr, KM_USER0); 3490 switch (bytes) {
3291 kvm_release_page_dirty(page); 3491 case 1:
3492 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3493 break;
3494 case 2:
3495 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
3496 break;
3497 case 4:
3498 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
3499 break;
3500 case 8:
3501 exchanged = CMPXCHG64(kaddr, old, new);
3502 break;
3503 default:
3504 BUG();
3292 } 3505 }
3506 kunmap_atomic(kaddr, KM_USER0);
3507 kvm_release_page_dirty(page);
3508
3509 if (!exchanged)
3510 return X86EMUL_CMPXCHG_FAILED;
3511
3512 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
3513
3514 return X86EMUL_CONTINUE;
3515
3293emul_write: 3516emul_write:
3294#endif 3517 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3295 3518
3296 return emulator_write_emulated(addr, new, bytes, vcpu); 3519 return emulator_write_emulated(addr, new, bytes, vcpu);
3297} 3520}
3298 3521
3522static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3523{
3524 /* TODO: String I/O for in kernel device */
3525 int r;
3526
3527 if (vcpu->arch.pio.in)
3528 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3529 vcpu->arch.pio.size, pd);
3530 else
3531 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3532 vcpu->arch.pio.port, vcpu->arch.pio.size,
3533 pd);
3534 return r;
3535}
3536
3537
3538static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
3539 unsigned int count, struct kvm_vcpu *vcpu)
3540{
3541 if (vcpu->arch.pio.count)
3542 goto data_avail;
3543
3544 trace_kvm_pio(1, port, size, 1);
3545
3546 vcpu->arch.pio.port = port;
3547 vcpu->arch.pio.in = 1;
3548 vcpu->arch.pio.count = count;
3549 vcpu->arch.pio.size = size;
3550
3551 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3552 data_avail:
3553 memcpy(val, vcpu->arch.pio_data, size * count);
3554 vcpu->arch.pio.count = 0;
3555 return 1;
3556 }
3557
3558 vcpu->run->exit_reason = KVM_EXIT_IO;
3559 vcpu->run->io.direction = KVM_EXIT_IO_IN;
3560 vcpu->run->io.size = size;
3561 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3562 vcpu->run->io.count = count;
3563 vcpu->run->io.port = port;
3564
3565 return 0;
3566}
3567
3568static int emulator_pio_out_emulated(int size, unsigned short port,
3569 const void *val, unsigned int count,
3570 struct kvm_vcpu *vcpu)
3571{
3572 trace_kvm_pio(0, port, size, 1);
3573
3574 vcpu->arch.pio.port = port;
3575 vcpu->arch.pio.in = 0;
3576 vcpu->arch.pio.count = count;
3577 vcpu->arch.pio.size = size;
3578
3579 memcpy(vcpu->arch.pio_data, val, size * count);
3580
3581 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3582 vcpu->arch.pio.count = 0;
3583 return 1;
3584 }
3585
3586 vcpu->run->exit_reason = KVM_EXIT_IO;
3587 vcpu->run->io.direction = KVM_EXIT_IO_OUT;
3588 vcpu->run->io.size = size;
3589 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3590 vcpu->run->io.count = count;
3591 vcpu->run->io.port = port;
3592
3593 return 0;
3594}
3595
3299static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 3596static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
3300{ 3597{
3301 return kvm_x86_ops->get_segment_base(vcpu, seg); 3598 return kvm_x86_ops->get_segment_base(vcpu, seg);
@@ -3316,14 +3613,14 @@ int emulate_clts(struct kvm_vcpu *vcpu)
3316 3613
3317int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3614int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
3318{ 3615{
3319 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); 3616 return kvm_get_dr(ctxt->vcpu, dr, dest);
3320} 3617}
3321 3618
3322int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3619int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
3323{ 3620{
3324 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3621 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
3325 3622
3326 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); 3623 return kvm_set_dr(ctxt->vcpu, dr, value & mask);
3327} 3624}
3328 3625
3329void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3626void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -3344,12 +3641,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3344} 3641}
3345EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3642EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3346 3643
3644static u64 mk_cr_64(u64 curr_cr, u32 new_val)
3645{
3646 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
3647}
3648
3649static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
3650{
3651 unsigned long value;
3652
3653 switch (cr) {
3654 case 0:
3655 value = kvm_read_cr0(vcpu);
3656 break;
3657 case 2:
3658 value = vcpu->arch.cr2;
3659 break;
3660 case 3:
3661 value = vcpu->arch.cr3;
3662 break;
3663 case 4:
3664 value = kvm_read_cr4(vcpu);
3665 break;
3666 case 8:
3667 value = kvm_get_cr8(vcpu);
3668 break;
3669 default:
3670 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3671 return 0;
3672 }
3673
3674 return value;
3675}
3676
3677static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
3678{
3679 switch (cr) {
3680 case 0:
3681 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3682 break;
3683 case 2:
3684 vcpu->arch.cr2 = val;
3685 break;
3686 case 3:
3687 kvm_set_cr3(vcpu, val);
3688 break;
3689 case 4:
3690 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3691 break;
3692 case 8:
3693 kvm_set_cr8(vcpu, val & 0xfUL);
3694 break;
3695 default:
3696 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3697 }
3698}
3699
3700static int emulator_get_cpl(struct kvm_vcpu *vcpu)
3701{
3702 return kvm_x86_ops->get_cpl(vcpu);
3703}
3704
3705static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
3706{
3707 kvm_x86_ops->get_gdt(vcpu, dt);
3708}
3709
3710static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
3711 struct kvm_vcpu *vcpu)
3712{
3713 struct kvm_segment var;
3714
3715 kvm_get_segment(vcpu, &var, seg);
3716
3717 if (var.unusable)
3718 return false;
3719
3720 if (var.g)
3721 var.limit >>= 12;
3722 set_desc_limit(desc, var.limit);
3723 set_desc_base(desc, (unsigned long)var.base);
3724 desc->type = var.type;
3725 desc->s = var.s;
3726 desc->dpl = var.dpl;
3727 desc->p = var.present;
3728 desc->avl = var.avl;
3729 desc->l = var.l;
3730 desc->d = var.db;
3731 desc->g = var.g;
3732
3733 return true;
3734}
3735
3736static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
3737 struct kvm_vcpu *vcpu)
3738{
3739 struct kvm_segment var;
3740
3741 /* needed to preserve selector */
3742 kvm_get_segment(vcpu, &var, seg);
3743
3744 var.base = get_desc_base(desc);
3745 var.limit = get_desc_limit(desc);
3746 if (desc->g)
3747 var.limit = (var.limit << 12) | 0xfff;
3748 var.type = desc->type;
3749 var.present = desc->p;
3750 var.dpl = desc->dpl;
3751 var.db = desc->d;
3752 var.s = desc->s;
3753 var.l = desc->l;
3754 var.g = desc->g;
3755 var.avl = desc->avl;
3756 var.present = desc->p;
3757 var.unusable = !var.present;
3758 var.padding = 0;
3759
3760 kvm_set_segment(vcpu, &var, seg);
3761 return;
3762}
3763
3764static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
3765{
3766 struct kvm_segment kvm_seg;
3767
3768 kvm_get_segment(vcpu, &kvm_seg, seg);
3769 return kvm_seg.selector;
3770}
3771
3772static void emulator_set_segment_selector(u16 sel, int seg,
3773 struct kvm_vcpu *vcpu)
3774{
3775 struct kvm_segment kvm_seg;
3776
3777 kvm_get_segment(vcpu, &kvm_seg, seg);
3778 kvm_seg.selector = sel;
3779 kvm_set_segment(vcpu, &kvm_seg, seg);
3780}
3781
3782static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
3783{
3784 kvm_x86_ops->set_rflags(vcpu, rflags);
3785}
3786
3347static struct x86_emulate_ops emulate_ops = { 3787static struct x86_emulate_ops emulate_ops = {
3348 .read_std = kvm_read_guest_virt_system, 3788 .read_std = kvm_read_guest_virt_system,
3789 .write_std = kvm_write_guest_virt_system,
3349 .fetch = kvm_fetch_guest_virt, 3790 .fetch = kvm_fetch_guest_virt,
3350 .read_emulated = emulator_read_emulated, 3791 .read_emulated = emulator_read_emulated,
3351 .write_emulated = emulator_write_emulated, 3792 .write_emulated = emulator_write_emulated,
3352 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3793 .cmpxchg_emulated = emulator_cmpxchg_emulated,
3794 .pio_in_emulated = emulator_pio_in_emulated,
3795 .pio_out_emulated = emulator_pio_out_emulated,
3796 .get_cached_descriptor = emulator_get_cached_descriptor,
3797 .set_cached_descriptor = emulator_set_cached_descriptor,
3798 .get_segment_selector = emulator_get_segment_selector,
3799 .set_segment_selector = emulator_set_segment_selector,
3800 .get_gdt = emulator_get_gdt,
3801 .get_cr = emulator_get_cr,
3802 .set_cr = emulator_set_cr,
3803 .cpl = emulator_get_cpl,
3804 .set_rflags = emulator_set_rflags,
3353}; 3805};
3354 3806
3355static void cache_all_regs(struct kvm_vcpu *vcpu) 3807static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -3380,14 +3832,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3380 cache_all_regs(vcpu); 3832 cache_all_regs(vcpu);
3381 3833
3382 vcpu->mmio_is_write = 0; 3834 vcpu->mmio_is_write = 0;
3383 vcpu->arch.pio.string = 0;
3384 3835
3385 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3836 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
3386 int cs_db, cs_l; 3837 int cs_db, cs_l;
3387 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3838 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3388 3839
3389 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3840 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3390 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3841 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
3842 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
3391 vcpu->arch.emulate_ctxt.mode = 3843 vcpu->arch.emulate_ctxt.mode =
3392 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 3844 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3393 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3845 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
@@ -3396,6 +3848,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3396 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3848 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3397 3849
3398 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3850 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3851 trace_kvm_emulate_insn_start(vcpu);
3399 3852
3400 /* Only allow emulation of specific instructions on #UD 3853 /* Only allow emulation of specific instructions on #UD
3401 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 3854 * (namely VMMCALL, sysenter, sysexit, syscall)*/
@@ -3428,6 +3881,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3428 ++vcpu->stat.insn_emulation; 3881 ++vcpu->stat.insn_emulation;
3429 if (r) { 3882 if (r) {
3430 ++vcpu->stat.insn_emulation_fail; 3883 ++vcpu->stat.insn_emulation_fail;
3884 trace_kvm_emulate_insn_failed(vcpu);
3431 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3885 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3432 return EMULATE_DONE; 3886 return EMULATE_DONE;
3433 return EMULATE_FAIL; 3887 return EMULATE_FAIL;
@@ -3439,16 +3893,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3439 return EMULATE_DONE; 3893 return EMULATE_DONE;
3440 } 3894 }
3441 3895
3896restart:
3442 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3897 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3443 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; 3898 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
3444 3899
3445 if (r == 0) 3900 if (r == 0)
3446 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); 3901 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
3447 3902
3448 if (vcpu->arch.pio.string) 3903 if (vcpu->arch.pio.count) {
3904 if (!vcpu->arch.pio.in)
3905 vcpu->arch.pio.count = 0;
3449 return EMULATE_DO_MMIO; 3906 return EMULATE_DO_MMIO;
3907 }
3450 3908
3451 if ((r || vcpu->mmio_is_write) && run) { 3909 if (r || vcpu->mmio_is_write) {
3452 run->exit_reason = KVM_EXIT_MMIO; 3910 run->exit_reason = KVM_EXIT_MMIO;
3453 run->mmio.phys_addr = vcpu->mmio_phys_addr; 3911 run->mmio.phys_addr = vcpu->mmio_phys_addr;
3454 memcpy(run->mmio.data, vcpu->mmio_data, 8); 3912 memcpy(run->mmio.data, vcpu->mmio_data, 8);
@@ -3458,222 +3916,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3458 3916
3459 if (r) { 3917 if (r) {
3460 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3918 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3461 return EMULATE_DONE; 3919 goto done;
3462 if (!vcpu->mmio_needed) { 3920 if (!vcpu->mmio_needed) {
3921 ++vcpu->stat.insn_emulation_fail;
3922 trace_kvm_emulate_insn_failed(vcpu);
3463 kvm_report_emulation_failure(vcpu, "mmio"); 3923 kvm_report_emulation_failure(vcpu, "mmio");
3464 return EMULATE_FAIL; 3924 return EMULATE_FAIL;
3465 } 3925 }
3466 return EMULATE_DO_MMIO; 3926 return EMULATE_DO_MMIO;
3467 } 3927 }
3468 3928
3469 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
3470
3471 if (vcpu->mmio_is_write) { 3929 if (vcpu->mmio_is_write) {
3472 vcpu->mmio_needed = 0; 3930 vcpu->mmio_needed = 0;
3473 return EMULATE_DO_MMIO; 3931 return EMULATE_DO_MMIO;
3474 } 3932 }
3475 3933
3476 return EMULATE_DONE; 3934done:
3477} 3935 if (vcpu->arch.exception.pending)
3478EXPORT_SYMBOL_GPL(emulate_instruction); 3936 vcpu->arch.emulate_ctxt.restart = false;
3479
3480static int pio_copy_data(struct kvm_vcpu *vcpu)
3481{
3482 void *p = vcpu->arch.pio_data;
3483 gva_t q = vcpu->arch.pio.guest_gva;
3484 unsigned bytes;
3485 int ret;
3486 u32 error_code;
3487
3488 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
3489 if (vcpu->arch.pio.in)
3490 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
3491 else
3492 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3493
3494 if (ret == X86EMUL_PROPAGATE_FAULT)
3495 kvm_inject_page_fault(vcpu, q, error_code);
3496
3497 return ret;
3498}
3499
3500int complete_pio(struct kvm_vcpu *vcpu)
3501{
3502 struct kvm_pio_request *io = &vcpu->arch.pio;
3503 long delta;
3504 int r;
3505 unsigned long val;
3506
3507 if (!io->string) {
3508 if (io->in) {
3509 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3510 memcpy(&val, vcpu->arch.pio_data, io->size);
3511 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
3512 }
3513 } else {
3514 if (io->in) {
3515 r = pio_copy_data(vcpu);
3516 if (r)
3517 goto out;
3518 }
3519
3520 delta = 1;
3521 if (io->rep) {
3522 delta *= io->cur_count;
3523 /*
3524 * The size of the register should really depend on
3525 * current address size.
3526 */
3527 val = kvm_register_read(vcpu, VCPU_REGS_RCX);
3528 val -= delta;
3529 kvm_register_write(vcpu, VCPU_REGS_RCX, val);
3530 }
3531 if (io->down)
3532 delta = -delta;
3533 delta *= io->size;
3534 if (io->in) {
3535 val = kvm_register_read(vcpu, VCPU_REGS_RDI);
3536 val += delta;
3537 kvm_register_write(vcpu, VCPU_REGS_RDI, val);
3538 } else {
3539 val = kvm_register_read(vcpu, VCPU_REGS_RSI);
3540 val += delta;
3541 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
3542 }
3543 }
3544out:
3545 io->count -= io->cur_count;
3546 io->cur_count = 0;
3547
3548 return 0;
3549}
3550
3551static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3552{
3553 /* TODO: String I/O for in kernel device */
3554 int r;
3555
3556 if (vcpu->arch.pio.in)
3557 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3558 vcpu->arch.pio.size, pd);
3559 else
3560 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3561 vcpu->arch.pio.port, vcpu->arch.pio.size,
3562 pd);
3563 return r;
3564}
3565 3937
3566static int pio_string_write(struct kvm_vcpu *vcpu) 3938 if (vcpu->arch.emulate_ctxt.restart)
3567{ 3939 goto restart;
3568 struct kvm_pio_request *io = &vcpu->arch.pio;
3569 void *pd = vcpu->arch.pio_data;
3570 int i, r = 0;
3571 3940
3572 for (i = 0; i < io->cur_count; i++) { 3941 return EMULATE_DONE;
3573 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3574 io->port, io->size, pd)) {
3575 r = -EOPNOTSUPP;
3576 break;
3577 }
3578 pd += io->size;
3579 }
3580 return r;
3581}
3582
3583int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3584{
3585 unsigned long val;
3586
3587 trace_kvm_pio(!in, port, size, 1);
3588
3589 vcpu->run->exit_reason = KVM_EXIT_IO;
3590 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3591 vcpu->run->io.size = vcpu->arch.pio.size = size;
3592 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3593 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
3594 vcpu->run->io.port = vcpu->arch.pio.port = port;
3595 vcpu->arch.pio.in = in;
3596 vcpu->arch.pio.string = 0;
3597 vcpu->arch.pio.down = 0;
3598 vcpu->arch.pio.rep = 0;
3599
3600 if (!vcpu->arch.pio.in) {
3601 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3602 memcpy(vcpu->arch.pio_data, &val, 4);
3603 }
3604
3605 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3606 complete_pio(vcpu);
3607 return 1;
3608 }
3609 return 0;
3610} 3942}
3611EXPORT_SYMBOL_GPL(kvm_emulate_pio); 3943EXPORT_SYMBOL_GPL(emulate_instruction);
3612 3944
3613int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, 3945int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
3614 int size, unsigned long count, int down,
3615 gva_t address, int rep, unsigned port)
3616{ 3946{
3617 unsigned now, in_page; 3947 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3618 int ret = 0; 3948 int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
3619 3949 /* do not return to emulator after return from userspace */
3620 trace_kvm_pio(!in, port, size, count); 3950 vcpu->arch.pio.count = 0;
3621
3622 vcpu->run->exit_reason = KVM_EXIT_IO;
3623 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3624 vcpu->run->io.size = vcpu->arch.pio.size = size;
3625 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3626 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
3627 vcpu->run->io.port = vcpu->arch.pio.port = port;
3628 vcpu->arch.pio.in = in;
3629 vcpu->arch.pio.string = 1;
3630 vcpu->arch.pio.down = down;
3631 vcpu->arch.pio.rep = rep;
3632
3633 if (!count) {
3634 kvm_x86_ops->skip_emulated_instruction(vcpu);
3635 return 1;
3636 }
3637
3638 if (!down)
3639 in_page = PAGE_SIZE - offset_in_page(address);
3640 else
3641 in_page = offset_in_page(address) + size;
3642 now = min(count, (unsigned long)in_page / size);
3643 if (!now)
3644 now = 1;
3645 if (down) {
3646 /*
3647 * String I/O in reverse. Yuck. Kill the guest, fix later.
3648 */
3649 pr_unimpl(vcpu, "guest string pio down\n");
3650 kvm_inject_gp(vcpu, 0);
3651 return 1;
3652 }
3653 vcpu->run->io.count = now;
3654 vcpu->arch.pio.cur_count = now;
3655
3656 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
3657 kvm_x86_ops->skip_emulated_instruction(vcpu);
3658
3659 vcpu->arch.pio.guest_gva = address;
3660
3661 if (!vcpu->arch.pio.in) {
3662 /* string PIO write */
3663 ret = pio_copy_data(vcpu);
3664 if (ret == X86EMUL_PROPAGATE_FAULT)
3665 return 1;
3666 if (ret == 0 && !pio_string_write(vcpu)) {
3667 complete_pio(vcpu);
3668 if (vcpu->arch.pio.count == 0)
3669 ret = 1;
3670 }
3671 }
3672 /* no string PIO read support yet */
3673
3674 return ret; 3951 return ret;
3675} 3952}
3676EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 3953EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
3677 3954
3678static void bounce_off(void *info) 3955static void bounce_off(void *info)
3679{ 3956{
@@ -3996,85 +4273,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3996 return emulator_write_emulated(rip, instruction, 3, vcpu); 4273 return emulator_write_emulated(rip, instruction, 3, vcpu);
3997} 4274}
3998 4275
3999static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4000{
4001 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4002}
4003
4004void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4276void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4005{ 4277{
4006 struct descriptor_table dt = { limit, base }; 4278 struct desc_ptr dt = { limit, base };
4007 4279
4008 kvm_x86_ops->set_gdt(vcpu, &dt); 4280 kvm_x86_ops->set_gdt(vcpu, &dt);
4009} 4281}
4010 4282
4011void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4283void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4012{ 4284{
4013 struct descriptor_table dt = { limit, base }; 4285 struct desc_ptr dt = { limit, base };
4014 4286
4015 kvm_x86_ops->set_idt(vcpu, &dt); 4287 kvm_x86_ops->set_idt(vcpu, &dt);
4016} 4288}
4017 4289
4018void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
4019 unsigned long *rflags)
4020{
4021 kvm_lmsw(vcpu, msw);
4022 *rflags = kvm_get_rflags(vcpu);
4023}
4024
4025unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
4026{
4027 unsigned long value;
4028
4029 switch (cr) {
4030 case 0:
4031 value = kvm_read_cr0(vcpu);
4032 break;
4033 case 2:
4034 value = vcpu->arch.cr2;
4035 break;
4036 case 3:
4037 value = vcpu->arch.cr3;
4038 break;
4039 case 4:
4040 value = kvm_read_cr4(vcpu);
4041 break;
4042 case 8:
4043 value = kvm_get_cr8(vcpu);
4044 break;
4045 default:
4046 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4047 return 0;
4048 }
4049
4050 return value;
4051}
4052
4053void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
4054 unsigned long *rflags)
4055{
4056 switch (cr) {
4057 case 0:
4058 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4059 *rflags = kvm_get_rflags(vcpu);
4060 break;
4061 case 2:
4062 vcpu->arch.cr2 = val;
4063 break;
4064 case 3:
4065 kvm_set_cr3(vcpu, val);
4066 break;
4067 case 4:
4068 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4069 break;
4070 case 8:
4071 kvm_set_cr8(vcpu, val & 0xfUL);
4072 break;
4073 default:
4074 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4075 }
4076}
4077
4078static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) 4290static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
4079{ 4291{
4080 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; 4292 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
@@ -4138,9 +4350,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
4138{ 4350{
4139 struct kvm_cpuid_entry2 *best; 4351 struct kvm_cpuid_entry2 *best;
4140 4352
4353 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
4354 if (!best || best->eax < 0x80000008)
4355 goto not_found;
4141 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); 4356 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
4142 if (best) 4357 if (best)
4143 return best->eax & 0xff; 4358 return best->eax & 0xff;
4359not_found:
4144 return 36; 4360 return 36;
4145} 4361}
4146 4362
@@ -4254,9 +4470,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
4254{ 4470{
4255 /* try to reinject previous events if any */ 4471 /* try to reinject previous events if any */
4256 if (vcpu->arch.exception.pending) { 4472 if (vcpu->arch.exception.pending) {
4473 trace_kvm_inj_exception(vcpu->arch.exception.nr,
4474 vcpu->arch.exception.has_error_code,
4475 vcpu->arch.exception.error_code);
4257 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 4476 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
4258 vcpu->arch.exception.has_error_code, 4477 vcpu->arch.exception.has_error_code,
4259 vcpu->arch.exception.error_code); 4478 vcpu->arch.exception.error_code,
4479 vcpu->arch.exception.reinject);
4260 return; 4480 return;
4261 } 4481 }
4262 4482
@@ -4486,7 +4706,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
4486 } 4706 }
4487 4707
4488 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4708 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4489 post_kvm_run_save(vcpu);
4490 4709
4491 vapic_exit(vcpu); 4710 vapic_exit(vcpu);
4492 4711
@@ -4514,26 +4733,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4514 if (!irqchip_in_kernel(vcpu->kvm)) 4733 if (!irqchip_in_kernel(vcpu->kvm))
4515 kvm_set_cr8(vcpu, kvm_run->cr8); 4734 kvm_set_cr8(vcpu, kvm_run->cr8);
4516 4735
4517 if (vcpu->arch.pio.cur_count) { 4736 if (vcpu->arch.pio.count || vcpu->mmio_needed ||
4518 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4737 vcpu->arch.emulate_ctxt.restart) {
4519 r = complete_pio(vcpu); 4738 if (vcpu->mmio_needed) {
4520 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4739 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
4521 if (r) 4740 vcpu->mmio_read_completed = 1;
4522 goto out; 4741 vcpu->mmio_needed = 0;
4523 } 4742 }
4524 if (vcpu->mmio_needed) {
4525 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
4526 vcpu->mmio_read_completed = 1;
4527 vcpu->mmio_needed = 0;
4528
4529 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4743 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4530 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4744 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
4531 EMULTYPE_NO_DECODE);
4532 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4745 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4533 if (r == EMULATE_DO_MMIO) { 4746 if (r == EMULATE_DO_MMIO) {
4534 /*
4535 * Read-modify-write. Back to userspace.
4536 */
4537 r = 0; 4747 r = 0;
4538 goto out; 4748 goto out;
4539 } 4749 }
@@ -4545,6 +4755,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4545 r = __vcpu_run(vcpu); 4755 r = __vcpu_run(vcpu);
4546 4756
4547out: 4757out:
4758 post_kvm_run_save(vcpu);
4548 if (vcpu->sigset_active) 4759 if (vcpu->sigset_active)
4549 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4760 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
4550 4761
@@ -4616,12 +4827,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4616 return 0; 4827 return 0;
4617} 4828}
4618 4829
4619void kvm_get_segment(struct kvm_vcpu *vcpu,
4620 struct kvm_segment *var, int seg)
4621{
4622 kvm_x86_ops->get_segment(vcpu, var, seg);
4623}
4624
4625void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 4830void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4626{ 4831{
4627 struct kvm_segment cs; 4832 struct kvm_segment cs;
@@ -4635,7 +4840,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
4635int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4840int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4636 struct kvm_sregs *sregs) 4841 struct kvm_sregs *sregs)
4637{ 4842{
4638 struct descriptor_table dt; 4843 struct desc_ptr dt;
4639 4844
4640 vcpu_load(vcpu); 4845 vcpu_load(vcpu);
4641 4846
@@ -4650,11 +4855,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4650 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 4855 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
4651 4856
4652 kvm_x86_ops->get_idt(vcpu, &dt); 4857 kvm_x86_ops->get_idt(vcpu, &dt);
4653 sregs->idt.limit = dt.limit; 4858 sregs->idt.limit = dt.size;
4654 sregs->idt.base = dt.base; 4859 sregs->idt.base = dt.address;
4655 kvm_x86_ops->get_gdt(vcpu, &dt); 4860 kvm_x86_ops->get_gdt(vcpu, &dt);
4656 sregs->gdt.limit = dt.limit; 4861 sregs->gdt.limit = dt.size;
4657 sregs->gdt.base = dt.base; 4862 sregs->gdt.base = dt.address;
4658 4863
4659 sregs->cr0 = kvm_read_cr0(vcpu); 4864 sregs->cr0 = kvm_read_cr0(vcpu);
4660 sregs->cr2 = vcpu->arch.cr2; 4865 sregs->cr2 = vcpu->arch.cr2;
@@ -4693,563 +4898,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4693 return 0; 4898 return 0;
4694} 4899}
4695 4900
4696static void kvm_set_segment(struct kvm_vcpu *vcpu, 4901int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
4697 struct kvm_segment *var, int seg) 4902 bool has_error_code, u32 error_code)
4698{
4699 kvm_x86_ops->set_segment(vcpu, var, seg);
4700}
4701
4702static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
4703 struct kvm_segment *kvm_desct)
4704{
4705 kvm_desct->base = get_desc_base(seg_desc);
4706 kvm_desct->limit = get_desc_limit(seg_desc);
4707 if (seg_desc->g) {
4708 kvm_desct->limit <<= 12;
4709 kvm_desct->limit |= 0xfff;
4710 }
4711 kvm_desct->selector = selector;
4712 kvm_desct->type = seg_desc->type;
4713 kvm_desct->present = seg_desc->p;
4714 kvm_desct->dpl = seg_desc->dpl;
4715 kvm_desct->db = seg_desc->d;
4716 kvm_desct->s = seg_desc->s;
4717 kvm_desct->l = seg_desc->l;
4718 kvm_desct->g = seg_desc->g;
4719 kvm_desct->avl = seg_desc->avl;
4720 if (!selector)
4721 kvm_desct->unusable = 1;
4722 else
4723 kvm_desct->unusable = 0;
4724 kvm_desct->padding = 0;
4725}
4726
4727static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
4728 u16 selector,
4729 struct descriptor_table *dtable)
4730{
4731 if (selector & 1 << 2) {
4732 struct kvm_segment kvm_seg;
4733
4734 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
4735
4736 if (kvm_seg.unusable)
4737 dtable->limit = 0;
4738 else
4739 dtable->limit = kvm_seg.limit;
4740 dtable->base = kvm_seg.base;
4741 }
4742 else
4743 kvm_x86_ops->get_gdt(vcpu, dtable);
4744}
4745
4746/* allowed just for 8 bytes segments */
4747static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4748 struct desc_struct *seg_desc)
4749{
4750 struct descriptor_table dtable;
4751 u16 index = selector >> 3;
4752 int ret;
4753 u32 err;
4754 gva_t addr;
4755
4756 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4757
4758 if (dtable.limit < index * 8 + 7) {
4759 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4760 return X86EMUL_PROPAGATE_FAULT;
4761 }
4762 addr = dtable.base + index * 8;
4763 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4764 vcpu, &err);
4765 if (ret == X86EMUL_PROPAGATE_FAULT)
4766 kvm_inject_page_fault(vcpu, addr, err);
4767
4768 return ret;
4769}
4770
4771/* allowed just for 8 bytes segments */
4772static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4773 struct desc_struct *seg_desc)
4774{
4775 struct descriptor_table dtable;
4776 u16 index = selector >> 3;
4777
4778 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4779
4780 if (dtable.limit < index * 8 + 7)
4781 return 1;
4782 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4783}
4784
4785static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4786 struct desc_struct *seg_desc)
4787{
4788 u32 base_addr = get_desc_base(seg_desc);
4789
4790 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4791}
4792
4793static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4794 struct desc_struct *seg_desc)
4795{
4796 u32 base_addr = get_desc_base(seg_desc);
4797
4798 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4799}
4800
4801static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4802{
4803 struct kvm_segment kvm_seg;
4804
4805 kvm_get_segment(vcpu, &kvm_seg, seg);
4806 return kvm_seg.selector;
4807}
4808
4809static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4810{
4811 struct kvm_segment segvar = {
4812 .base = selector << 4,
4813 .limit = 0xffff,
4814 .selector = selector,
4815 .type = 3,
4816 .present = 1,
4817 .dpl = 3,
4818 .db = 0,
4819 .s = 1,
4820 .l = 0,
4821 .g = 0,
4822 .avl = 0,
4823 .unusable = 0,
4824 };
4825 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4826 return X86EMUL_CONTINUE;
4827}
4828
4829static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4830{ 4903{
4831 return (seg != VCPU_SREG_LDTR) && 4904 int cs_db, cs_l, ret;
4832 (seg != VCPU_SREG_TR) && 4905 cache_all_regs(vcpu);
4833 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4834}
4835
4836int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4837{
4838 struct kvm_segment kvm_seg;
4839 struct desc_struct seg_desc;
4840 u8 dpl, rpl, cpl;
4841 unsigned err_vec = GP_VECTOR;
4842 u32 err_code = 0;
4843 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4844 int ret;
4845 4906
4846 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) 4907 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4847 return kvm_load_realmode_segment(vcpu, selector, seg);
4848 4908
4849 /* NULL selector is not valid for TR, CS and SS */ 4909 vcpu->arch.emulate_ctxt.vcpu = vcpu;
4850 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) 4910 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
4851 && null_selector) 4911 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4852 goto exception; 4912 vcpu->arch.emulate_ctxt.mode =
4913 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4914 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
4915 ? X86EMUL_MODE_VM86 : cs_l
4916 ? X86EMUL_MODE_PROT64 : cs_db
4917 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
4853 4918
4854 /* TR should be in GDT only */ 4919 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
4855 if (seg == VCPU_SREG_TR && (selector & (1 << 2))) 4920 tss_selector, reason, has_error_code,
4856 goto exception; 4921 error_code);
4857 4922
4858 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4859 if (ret) 4923 if (ret)
4860 return ret; 4924 return EMULATE_FAIL;
4861
4862 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4863
4864 if (null_selector) { /* for NULL selector skip all following checks */
4865 kvm_seg.unusable = 1;
4866 goto load;
4867 }
4868
4869 err_code = selector & 0xfffc;
4870 err_vec = GP_VECTOR;
4871
4872 /* can't load system descriptor into segment selecor */
4873 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4874 goto exception;
4875
4876 if (!kvm_seg.present) {
4877 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4878 goto exception;
4879 }
4880
4881 rpl = selector & 3;
4882 dpl = kvm_seg.dpl;
4883 cpl = kvm_x86_ops->get_cpl(vcpu);
4884
4885 switch (seg) {
4886 case VCPU_SREG_SS:
4887 /*
4888 * segment is not a writable data segment or segment
4889 * selector's RPL != CPL or segment selector's RPL != CPL
4890 */
4891 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4892 goto exception;
4893 break;
4894 case VCPU_SREG_CS:
4895 if (!(kvm_seg.type & 8))
4896 goto exception;
4897
4898 if (kvm_seg.type & 4) {
4899 /* conforming */
4900 if (dpl > cpl)
4901 goto exception;
4902 } else {
4903 /* nonconforming */
4904 if (rpl > cpl || dpl != cpl)
4905 goto exception;
4906 }
4907 /* CS(RPL) <- CPL */
4908 selector = (selector & 0xfffc) | cpl;
4909 break;
4910 case VCPU_SREG_TR:
4911 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4912 goto exception;
4913 break;
4914 case VCPU_SREG_LDTR:
4915 if (kvm_seg.s || kvm_seg.type != 2)
4916 goto exception;
4917 break;
4918 default: /* DS, ES, FS, or GS */
4919 /*
4920 * segment is not a data or readable code segment or
4921 * ((segment is a data or nonconforming code segment)
4922 * and (both RPL and CPL > DPL))
4923 */
4924 if ((kvm_seg.type & 0xa) == 0x8 ||
4925 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4926 goto exception;
4927 break;
4928 }
4929
4930 if (!kvm_seg.unusable && kvm_seg.s) {
4931 /* mark segment as accessed */
4932 kvm_seg.type |= 1;
4933 seg_desc.type |= 1;
4934 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4935 }
4936load:
4937 kvm_set_segment(vcpu, &kvm_seg, seg);
4938 return X86EMUL_CONTINUE;
4939exception:
4940 kvm_queue_exception_e(vcpu, err_vec, err_code);
4941 return X86EMUL_PROPAGATE_FAULT;
4942}
4943
4944static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4945 struct tss_segment_32 *tss)
4946{
4947 tss->cr3 = vcpu->arch.cr3;
4948 tss->eip = kvm_rip_read(vcpu);
4949 tss->eflags = kvm_get_rflags(vcpu);
4950 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4951 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4952 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
4953 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4954 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
4955 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
4956 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
4957 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
4958 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
4959 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
4960 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
4961 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
4962 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
4963 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
4964 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4965}
4966
4967static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4968{
4969 struct kvm_segment kvm_seg;
4970 kvm_get_segment(vcpu, &kvm_seg, seg);
4971 kvm_seg.selector = sel;
4972 kvm_set_segment(vcpu, &kvm_seg, seg);
4973}
4974
4975static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4976 struct tss_segment_32 *tss)
4977{
4978 kvm_set_cr3(vcpu, tss->cr3);
4979
4980 kvm_rip_write(vcpu, tss->eip);
4981 kvm_set_rflags(vcpu, tss->eflags | 2);
4982
4983 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
4984 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
4985 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
4986 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
4987 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
4988 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
4989 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
4990 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
4991
4992 /*
4993 * SDM says that segment selectors are loaded before segment
4994 * descriptors
4995 */
4996 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
4997 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
4998 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
4999 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5000 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5001 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
5002 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
5003
5004 /*
5005 * Now load segment descriptors. If fault happenes at this stage
5006 * it is handled in a context of new task
5007 */
5008 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
5009 return 1;
5010
5011 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
5012 return 1;
5013 4925
5014 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 4926 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
5015 return 1; 4927 return EMULATE_DONE;
5016
5017 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
5018 return 1;
5019
5020 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
5021 return 1;
5022
5023 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
5024 return 1;
5025
5026 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
5027 return 1;
5028 return 0;
5029}
5030
5031static void save_state_to_tss16(struct kvm_vcpu *vcpu,
5032 struct tss_segment_16 *tss)
5033{
5034 tss->ip = kvm_rip_read(vcpu);
5035 tss->flag = kvm_get_rflags(vcpu);
5036 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5037 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5038 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5039 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5040 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
5041 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
5042 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
5043 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
5044
5045 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
5046 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
5047 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
5048 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
5049 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
5050}
5051
5052static int load_state_from_tss16(struct kvm_vcpu *vcpu,
5053 struct tss_segment_16 *tss)
5054{
5055 kvm_rip_write(vcpu, tss->ip);
5056 kvm_set_rflags(vcpu, tss->flag | 2);
5057 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
5058 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
5059 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
5060 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
5061 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
5062 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
5063 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
5064 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
5065
5066 /*
5067 * SDM says that segment selectors are loaded before segment
5068 * descriptors
5069 */
5070 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5071 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5072 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5073 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5074 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5075
5076 /*
5077 * Now load segment descriptors. If fault happenes at this stage
5078 * it is handled in a context of new task
5079 */
5080 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
5081 return 1;
5082
5083 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
5084 return 1;
5085
5086 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
5087 return 1;
5088
5089 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
5090 return 1;
5091
5092 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
5093 return 1;
5094 return 0;
5095}
5096
5097static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
5098 u16 old_tss_sel, u32 old_tss_base,
5099 struct desc_struct *nseg_desc)
5100{
5101 struct tss_segment_16 tss_segment_16;
5102 int ret = 0;
5103
5104 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5105 sizeof tss_segment_16))
5106 goto out;
5107
5108 save_state_to_tss16(vcpu, &tss_segment_16);
5109
5110 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5111 sizeof tss_segment_16))
5112 goto out;
5113
5114 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5115 &tss_segment_16, sizeof tss_segment_16))
5116 goto out;
5117
5118 if (old_tss_sel != 0xffff) {
5119 tss_segment_16.prev_task_link = old_tss_sel;
5120
5121 if (kvm_write_guest(vcpu->kvm,
5122 get_tss_base_addr_write(vcpu, nseg_desc),
5123 &tss_segment_16.prev_task_link,
5124 sizeof tss_segment_16.prev_task_link))
5125 goto out;
5126 }
5127
5128 if (load_state_from_tss16(vcpu, &tss_segment_16))
5129 goto out;
5130
5131 ret = 1;
5132out:
5133 return ret;
5134}
5135
5136static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
5137 u16 old_tss_sel, u32 old_tss_base,
5138 struct desc_struct *nseg_desc)
5139{
5140 struct tss_segment_32 tss_segment_32;
5141 int ret = 0;
5142
5143 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5144 sizeof tss_segment_32))
5145 goto out;
5146
5147 save_state_to_tss32(vcpu, &tss_segment_32);
5148
5149 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5150 sizeof tss_segment_32))
5151 goto out;
5152
5153 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5154 &tss_segment_32, sizeof tss_segment_32))
5155 goto out;
5156
5157 if (old_tss_sel != 0xffff) {
5158 tss_segment_32.prev_task_link = old_tss_sel;
5159
5160 if (kvm_write_guest(vcpu->kvm,
5161 get_tss_base_addr_write(vcpu, nseg_desc),
5162 &tss_segment_32.prev_task_link,
5163 sizeof tss_segment_32.prev_task_link))
5164 goto out;
5165 }
5166
5167 if (load_state_from_tss32(vcpu, &tss_segment_32))
5168 goto out;
5169
5170 ret = 1;
5171out:
5172 return ret;
5173}
5174
5175int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5176{
5177 struct kvm_segment tr_seg;
5178 struct desc_struct cseg_desc;
5179 struct desc_struct nseg_desc;
5180 int ret = 0;
5181 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5182 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5183 u32 desc_limit;
5184
5185 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5186
5187 /* FIXME: Handle errors. Failure to read either TSS or their
5188 * descriptors should generate a pagefault.
5189 */
5190 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
5191 goto out;
5192
5193 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
5194 goto out;
5195
5196 if (reason != TASK_SWITCH_IRET) {
5197 int cpl;
5198
5199 cpl = kvm_x86_ops->get_cpl(vcpu);
5200 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
5201 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
5202 return 1;
5203 }
5204 }
5205
5206 desc_limit = get_desc_limit(&nseg_desc);
5207 if (!nseg_desc.p ||
5208 ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
5209 desc_limit < 0x2b)) {
5210 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
5211 return 1;
5212 }
5213
5214 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
5215 cseg_desc.type &= ~(1 << 1); //clear the B flag
5216 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
5217 }
5218
5219 if (reason == TASK_SWITCH_IRET) {
5220 u32 eflags = kvm_get_rflags(vcpu);
5221 kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
5222 }
5223
5224 /* set back link to prev task only if NT bit is set in eflags
5225 note that old_tss_sel is not used afetr this point */
5226 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
5227 old_tss_sel = 0xffff;
5228
5229 if (nseg_desc.type & 8)
5230 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
5231 old_tss_base, &nseg_desc);
5232 else
5233 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
5234 old_tss_base, &nseg_desc);
5235
5236 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
5237 u32 eflags = kvm_get_rflags(vcpu);
5238 kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
5239 }
5240
5241 if (reason != TASK_SWITCH_IRET) {
5242 nseg_desc.type |= (1 << 1);
5243 save_guest_segment_descriptor(vcpu, tss_selector,
5244 &nseg_desc);
5245 }
5246
5247 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
5248 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
5249 tr_seg.type = 11;
5250 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
5251out:
5252 return ret;
5253} 4928}
5254EXPORT_SYMBOL_GPL(kvm_task_switch); 4929EXPORT_SYMBOL_GPL(kvm_task_switch);
5255 4930
@@ -5258,15 +4933,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5258{ 4933{
5259 int mmu_reset_needed = 0; 4934 int mmu_reset_needed = 0;
5260 int pending_vec, max_bits; 4935 int pending_vec, max_bits;
5261 struct descriptor_table dt; 4936 struct desc_ptr dt;
5262 4937
5263 vcpu_load(vcpu); 4938 vcpu_load(vcpu);
5264 4939
5265 dt.limit = sregs->idt.limit; 4940 dt.size = sregs->idt.limit;
5266 dt.base = sregs->idt.base; 4941 dt.address = sregs->idt.base;
5267 kvm_x86_ops->set_idt(vcpu, &dt); 4942 kvm_x86_ops->set_idt(vcpu, &dt);
5268 dt.limit = sregs->gdt.limit; 4943 dt.size = sregs->gdt.limit;
5269 dt.base = sregs->gdt.base; 4944 dt.address = sregs->gdt.base;
5270 kvm_x86_ops->set_gdt(vcpu, &dt); 4945 kvm_x86_ops->set_gdt(vcpu, &dt);
5271 4946
5272 vcpu->arch.cr2 = sregs->cr2; 4947 vcpu->arch.cr2 = sregs->cr2;
@@ -5365,11 +5040,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5365 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 5040 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
5366 } 5041 }
5367 5042
5368 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 5043 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5369 vcpu->arch.singlestep_cs = 5044 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
5370 get_segment_selector(vcpu, VCPU_SREG_CS); 5045 get_segment_base(vcpu, VCPU_SREG_CS);
5371 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
5372 }
5373 5046
5374 /* 5047 /*
5375 * Trigger an rflags update that will inject or remove the trace 5048 * Trigger an rflags update that will inject or remove the trace
@@ -5860,13 +5533,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
5860 return kvm_x86_ops->interrupt_allowed(vcpu); 5533 return kvm_x86_ops->interrupt_allowed(vcpu);
5861} 5534}
5862 5535
5536bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
5537{
5538 unsigned long current_rip = kvm_rip_read(vcpu) +
5539 get_segment_base(vcpu, VCPU_SREG_CS);
5540
5541 return current_rip == linear_rip;
5542}
5543EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
5544
5863unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) 5545unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
5864{ 5546{
5865 unsigned long rflags; 5547 unsigned long rflags;
5866 5548
5867 rflags = kvm_x86_ops->get_rflags(vcpu); 5549 rflags = kvm_x86_ops->get_rflags(vcpu);
5868 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5550 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5869 rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); 5551 rflags &= ~X86_EFLAGS_TF;
5870 return rflags; 5552 return rflags;
5871} 5553}
5872EXPORT_SYMBOL_GPL(kvm_get_rflags); 5554EXPORT_SYMBOL_GPL(kvm_get_rflags);
@@ -5874,10 +5556,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags);
5874void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 5556void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
5875{ 5557{
5876 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 5558 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
5877 vcpu->arch.singlestep_cs == 5559 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
5878 get_segment_selector(vcpu, VCPU_SREG_CS) && 5560 rflags |= X86_EFLAGS_TF;
5879 vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
5880 rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
5881 kvm_x86_ops->set_rflags(vcpu, rflags); 5561 kvm_x86_ops->set_rflags(vcpu, rflags);
5882} 5562}
5883EXPORT_SYMBOL_GPL(kvm_set_rflags); 5563EXPORT_SYMBOL_GPL(kvm_set_rflags);
@@ -5893,3 +5573,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
5893EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); 5573EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
5894EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); 5574EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
5895EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 5575EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
5576EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);