aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c1506
1 files changed, 552 insertions, 954 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21b9b6aa3e88..848c814e8c3c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,7 +42,7 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/perf_event.h> 43#include <linux/perf_event.h>
44#include <trace/events/kvm.h> 44#include <trace/events/kvm.h>
45#undef TRACE_INCLUDE_FILE 45
46#define CREATE_TRACE_POINTS 46#define CREATE_TRACE_POINTS
47#include "trace.h" 47#include "trace.h"
48 48
@@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore)
224 kvm_on_user_return(&smsr->urn); 224 kvm_on_user_return(&smsr->urn);
225} 225}
226 226
227unsigned long segment_base(u16 selector)
228{
229 struct descriptor_table gdt;
230 struct desc_struct *d;
231 unsigned long table_base;
232 unsigned long v;
233
234 if (selector == 0)
235 return 0;
236
237 kvm_get_gdt(&gdt);
238 table_base = gdt.base;
239
240 if (selector & 4) { /* from ldt */
241 u16 ldt_selector = kvm_read_ldt();
242
243 table_base = segment_base(ldt_selector);
244 }
245 d = (struct desc_struct *)(table_base + (selector & ~7));
246 v = get_desc_base(d);
247#ifdef CONFIG_X86_64
248 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
249 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
250#endif
251 return v;
252}
253EXPORT_SYMBOL_GPL(segment_base);
254
255u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) 227u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
256{ 228{
257 if (irqchip_in_kernel(vcpu->kvm)) 229 if (irqchip_in_kernel(vcpu->kvm))
@@ -434,8 +406,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
434 406
435#ifdef CONFIG_X86_64 407#ifdef CONFIG_X86_64
436 if (cr0 & 0xffffffff00000000UL) { 408 if (cr0 & 0xffffffff00000000UL) {
437 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
438 cr0, kvm_read_cr0(vcpu));
439 kvm_inject_gp(vcpu, 0); 409 kvm_inject_gp(vcpu, 0);
440 return; 410 return;
441 } 411 }
@@ -444,14 +414,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
444 cr0 &= ~CR0_RESERVED_BITS; 414 cr0 &= ~CR0_RESERVED_BITS;
445 415
446 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 416 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
447 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
448 kvm_inject_gp(vcpu, 0); 417 kvm_inject_gp(vcpu, 0);
449 return; 418 return;
450 } 419 }
451 420
452 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 421 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
453 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
454 "and a clear PE flag\n");
455 kvm_inject_gp(vcpu, 0); 422 kvm_inject_gp(vcpu, 0);
456 return; 423 return;
457 } 424 }
@@ -462,15 +429,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
462 int cs_db, cs_l; 429 int cs_db, cs_l;
463 430
464 if (!is_pae(vcpu)) { 431 if (!is_pae(vcpu)) {
465 printk(KERN_DEBUG "set_cr0: #GP, start paging "
466 "in long mode while PAE is disabled\n");
467 kvm_inject_gp(vcpu, 0); 432 kvm_inject_gp(vcpu, 0);
468 return; 433 return;
469 } 434 }
470 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 435 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
471 if (cs_l) { 436 if (cs_l) {
472 printk(KERN_DEBUG "set_cr0: #GP, start paging "
473 "in long mode while CS.L == 1\n");
474 kvm_inject_gp(vcpu, 0); 437 kvm_inject_gp(vcpu, 0);
475 return; 438 return;
476 439
@@ -478,8 +441,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
478 } else 441 } else
479#endif 442#endif
480 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 443 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
481 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
482 "reserved bits\n");
483 kvm_inject_gp(vcpu, 0); 444 kvm_inject_gp(vcpu, 0);
484 return; 445 return;
485 } 446 }
@@ -487,7 +448,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
487 } 448 }
488 449
489 kvm_x86_ops->set_cr0(vcpu, cr0); 450 kvm_x86_ops->set_cr0(vcpu, cr0);
490 vcpu->arch.cr0 = cr0;
491 451
492 kvm_mmu_reset_context(vcpu); 452 kvm_mmu_reset_context(vcpu);
493 return; 453 return;
@@ -506,34 +466,28 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
506 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 466 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
507 467
508 if (cr4 & CR4_RESERVED_BITS) { 468 if (cr4 & CR4_RESERVED_BITS) {
509 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
510 kvm_inject_gp(vcpu, 0); 469 kvm_inject_gp(vcpu, 0);
511 return; 470 return;
512 } 471 }
513 472
514 if (is_long_mode(vcpu)) { 473 if (is_long_mode(vcpu)) {
515 if (!(cr4 & X86_CR4_PAE)) { 474 if (!(cr4 & X86_CR4_PAE)) {
516 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
517 "in long mode\n");
518 kvm_inject_gp(vcpu, 0); 475 kvm_inject_gp(vcpu, 0);
519 return; 476 return;
520 } 477 }
521 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 478 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
522 && ((cr4 ^ old_cr4) & pdptr_bits) 479 && ((cr4 ^ old_cr4) & pdptr_bits)
523 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 480 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
524 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
525 kvm_inject_gp(vcpu, 0); 481 kvm_inject_gp(vcpu, 0);
526 return; 482 return;
527 } 483 }
528 484
529 if (cr4 & X86_CR4_VMXE) { 485 if (cr4 & X86_CR4_VMXE) {
530 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
531 kvm_inject_gp(vcpu, 0); 486 kvm_inject_gp(vcpu, 0);
532 return; 487 return;
533 } 488 }
534 kvm_x86_ops->set_cr4(vcpu, cr4); 489 kvm_x86_ops->set_cr4(vcpu, cr4);
535 vcpu->arch.cr4 = cr4; 490 vcpu->arch.cr4 = cr4;
536 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
537 kvm_mmu_reset_context(vcpu); 491 kvm_mmu_reset_context(vcpu);
538} 492}
539EXPORT_SYMBOL_GPL(kvm_set_cr4); 493EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -548,21 +502,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
548 502
549 if (is_long_mode(vcpu)) { 503 if (is_long_mode(vcpu)) {
550 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 504 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
551 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
552 kvm_inject_gp(vcpu, 0); 505 kvm_inject_gp(vcpu, 0);
553 return; 506 return;
554 } 507 }
555 } else { 508 } else {
556 if (is_pae(vcpu)) { 509 if (is_pae(vcpu)) {
557 if (cr3 & CR3_PAE_RESERVED_BITS) { 510 if (cr3 & CR3_PAE_RESERVED_BITS) {
558 printk(KERN_DEBUG
559 "set_cr3: #GP, reserved bits\n");
560 kvm_inject_gp(vcpu, 0); 511 kvm_inject_gp(vcpu, 0);
561 return; 512 return;
562 } 513 }
563 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 514 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
564 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
565 "reserved bits\n");
566 kvm_inject_gp(vcpu, 0); 515 kvm_inject_gp(vcpu, 0);
567 return; 516 return;
568 } 517 }
@@ -594,7 +543,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
594void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 543void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
595{ 544{
596 if (cr8 & CR8_RESERVED_BITS) { 545 if (cr8 & CR8_RESERVED_BITS) {
597 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
598 kvm_inject_gp(vcpu, 0); 546 kvm_inject_gp(vcpu, 0);
599 return; 547 return;
600 } 548 }
@@ -614,6 +562,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
614} 562}
615EXPORT_SYMBOL_GPL(kvm_get_cr8); 563EXPORT_SYMBOL_GPL(kvm_get_cr8);
616 564
565int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
566{
567 switch (dr) {
568 case 0 ... 3:
569 vcpu->arch.db[dr] = val;
570 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
571 vcpu->arch.eff_db[dr] = val;
572 break;
573 case 4:
574 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
575 kvm_queue_exception(vcpu, UD_VECTOR);
576 return 1;
577 }
578 /* fall through */
579 case 6:
580 if (val & 0xffffffff00000000ULL) {
581 kvm_inject_gp(vcpu, 0);
582 return 1;
583 }
584 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
585 break;
586 case 5:
587 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
588 kvm_queue_exception(vcpu, UD_VECTOR);
589 return 1;
590 }
591 /* fall through */
592 default: /* 7 */
593 if (val & 0xffffffff00000000ULL) {
594 kvm_inject_gp(vcpu, 0);
595 return 1;
596 }
597 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
598 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
599 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
600 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
601 }
602 break;
603 }
604
605 return 0;
606}
607EXPORT_SYMBOL_GPL(kvm_set_dr);
608
609int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
610{
611 switch (dr) {
612 case 0 ... 3:
613 *val = vcpu->arch.db[dr];
614 break;
615 case 4:
616 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
617 kvm_queue_exception(vcpu, UD_VECTOR);
618 return 1;
619 }
620 /* fall through */
621 case 6:
622 *val = vcpu->arch.dr6;
623 break;
624 case 5:
625 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
626 kvm_queue_exception(vcpu, UD_VECTOR);
627 return 1;
628 }
629 /* fall through */
630 default: /* 7 */
631 *val = vcpu->arch.dr7;
632 break;
633 }
634
635 return 0;
636}
637EXPORT_SYMBOL_GPL(kvm_get_dr);
638
617static inline u32 bit(int bitno) 639static inline u32 bit(int bitno)
618{ 640{
619 return 1 << (bitno & 31); 641 return 1 << (bitno & 31);
@@ -650,15 +672,12 @@ static u32 emulated_msrs[] = {
650static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 672static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
651{ 673{
652 if (efer & efer_reserved_bits) { 674 if (efer & efer_reserved_bits) {
653 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
654 efer);
655 kvm_inject_gp(vcpu, 0); 675 kvm_inject_gp(vcpu, 0);
656 return; 676 return;
657 } 677 }
658 678
659 if (is_paging(vcpu) 679 if (is_paging(vcpu)
660 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 680 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
661 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
662 kvm_inject_gp(vcpu, 0); 681 kvm_inject_gp(vcpu, 0);
663 return; 682 return;
664 } 683 }
@@ -668,7 +687,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
668 687
669 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 688 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
670 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 689 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
671 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
672 kvm_inject_gp(vcpu, 0); 690 kvm_inject_gp(vcpu, 0);
673 return; 691 return;
674 } 692 }
@@ -679,7 +697,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
679 697
680 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 698 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
681 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 699 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
682 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
683 kvm_inject_gp(vcpu, 0); 700 kvm_inject_gp(vcpu, 0);
684 return; 701 return;
685 } 702 }
@@ -968,9 +985,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
968 if (msr >= MSR_IA32_MC0_CTL && 985 if (msr >= MSR_IA32_MC0_CTL &&
969 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 986 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
970 u32 offset = msr - MSR_IA32_MC0_CTL; 987 u32 offset = msr - MSR_IA32_MC0_CTL;
971 /* only 0 or all 1s can be written to IA32_MCi_CTL */ 988 /* only 0 or all 1s can be written to IA32_MCi_CTL
989 * some Linux kernels though clear bit 10 in bank 4 to
990 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
991 * this to avoid an uncatched #GP in the guest
992 */
972 if ((offset & 0x3) == 0 && 993 if ((offset & 0x3) == 0 &&
973 data != 0 && data != ~(u64)0) 994 data != 0 && (data | (1 << 10)) != ~(u64)0)
974 return -1; 995 return -1;
975 vcpu->arch.mce_banks[offset] = data; 996 vcpu->arch.mce_banks[offset] = data;
976 break; 997 break;
@@ -1114,6 +1135,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1114 break; 1135 break;
1115 case MSR_K7_HWCR: 1136 case MSR_K7_HWCR:
1116 data &= ~(u64)0x40; /* ignore flush filter disable */ 1137 data &= ~(u64)0x40; /* ignore flush filter disable */
1138 data &= ~(u64)0x100; /* ignore ignne emulation enable */
1117 if (data != 0) { 1139 if (data != 0) {
1118 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1140 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1119 data); 1141 data);
@@ -1572,6 +1594,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1572 case KVM_CAP_HYPERV_VAPIC: 1594 case KVM_CAP_HYPERV_VAPIC:
1573 case KVM_CAP_HYPERV_SPIN: 1595 case KVM_CAP_HYPERV_SPIN:
1574 case KVM_CAP_PCI_SEGMENT: 1596 case KVM_CAP_PCI_SEGMENT:
1597 case KVM_CAP_DEBUGREGS:
1575 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1598 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1576 r = 1; 1599 r = 1;
1577 break; 1600 break;
@@ -2124,14 +2147,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2124{ 2147{
2125 vcpu_load(vcpu); 2148 vcpu_load(vcpu);
2126 2149
2127 events->exception.injected = vcpu->arch.exception.pending; 2150 events->exception.injected =
2151 vcpu->arch.exception.pending &&
2152 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2128 events->exception.nr = vcpu->arch.exception.nr; 2153 events->exception.nr = vcpu->arch.exception.nr;
2129 events->exception.has_error_code = vcpu->arch.exception.has_error_code; 2154 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2130 events->exception.error_code = vcpu->arch.exception.error_code; 2155 events->exception.error_code = vcpu->arch.exception.error_code;
2131 2156
2132 events->interrupt.injected = vcpu->arch.interrupt.pending; 2157 events->interrupt.injected =
2158 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2133 events->interrupt.nr = vcpu->arch.interrupt.nr; 2159 events->interrupt.nr = vcpu->arch.interrupt.nr;
2134 events->interrupt.soft = vcpu->arch.interrupt.soft; 2160 events->interrupt.soft = 0;
2161 events->interrupt.shadow =
2162 kvm_x86_ops->get_interrupt_shadow(vcpu,
2163 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2135 2164
2136 events->nmi.injected = vcpu->arch.nmi_injected; 2165 events->nmi.injected = vcpu->arch.nmi_injected;
2137 events->nmi.pending = vcpu->arch.nmi_pending; 2166 events->nmi.pending = vcpu->arch.nmi_pending;
@@ -2140,7 +2169,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2140 events->sipi_vector = vcpu->arch.sipi_vector; 2169 events->sipi_vector = vcpu->arch.sipi_vector;
2141 2170
2142 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2171 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2143 | KVM_VCPUEVENT_VALID_SIPI_VECTOR); 2172 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2173 | KVM_VCPUEVENT_VALID_SHADOW);
2144 2174
2145 vcpu_put(vcpu); 2175 vcpu_put(vcpu);
2146} 2176}
@@ -2149,7 +2179,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2149 struct kvm_vcpu_events *events) 2179 struct kvm_vcpu_events *events)
2150{ 2180{
2151 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING 2181 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2152 | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) 2182 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2183 | KVM_VCPUEVENT_VALID_SHADOW))
2153 return -EINVAL; 2184 return -EINVAL;
2154 2185
2155 vcpu_load(vcpu); 2186 vcpu_load(vcpu);
@@ -2164,6 +2195,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2164 vcpu->arch.interrupt.soft = events->interrupt.soft; 2195 vcpu->arch.interrupt.soft = events->interrupt.soft;
2165 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) 2196 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2166 kvm_pic_clear_isr_ack(vcpu->kvm); 2197 kvm_pic_clear_isr_ack(vcpu->kvm);
2198 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2199 kvm_x86_ops->set_interrupt_shadow(vcpu,
2200 events->interrupt.shadow);
2167 2201
2168 vcpu->arch.nmi_injected = events->nmi.injected; 2202 vcpu->arch.nmi_injected = events->nmi.injected;
2169 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) 2203 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
@@ -2178,6 +2212,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2178 return 0; 2212 return 0;
2179} 2213}
2180 2214
2215static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2216 struct kvm_debugregs *dbgregs)
2217{
2218 vcpu_load(vcpu);
2219
2220 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2221 dbgregs->dr6 = vcpu->arch.dr6;
2222 dbgregs->dr7 = vcpu->arch.dr7;
2223 dbgregs->flags = 0;
2224
2225 vcpu_put(vcpu);
2226}
2227
2228static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2229 struct kvm_debugregs *dbgregs)
2230{
2231 if (dbgregs->flags)
2232 return -EINVAL;
2233
2234 vcpu_load(vcpu);
2235
2236 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2237 vcpu->arch.dr6 = dbgregs->dr6;
2238 vcpu->arch.dr7 = dbgregs->dr7;
2239
2240 vcpu_put(vcpu);
2241
2242 return 0;
2243}
2244
2181long kvm_arch_vcpu_ioctl(struct file *filp, 2245long kvm_arch_vcpu_ioctl(struct file *filp,
2182 unsigned int ioctl, unsigned long arg) 2246 unsigned int ioctl, unsigned long arg)
2183{ 2247{
@@ -2356,6 +2420,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2356 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); 2420 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2357 break; 2421 break;
2358 } 2422 }
2423 case KVM_GET_DEBUGREGS: {
2424 struct kvm_debugregs dbgregs;
2425
2426 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
2427
2428 r = -EFAULT;
2429 if (copy_to_user(argp, &dbgregs,
2430 sizeof(struct kvm_debugregs)))
2431 break;
2432 r = 0;
2433 break;
2434 }
2435 case KVM_SET_DEBUGREGS: {
2436 struct kvm_debugregs dbgregs;
2437
2438 r = -EFAULT;
2439 if (copy_from_user(&dbgregs, argp,
2440 sizeof(struct kvm_debugregs)))
2441 break;
2442
2443 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
2444 break;
2445 }
2359 default: 2446 default:
2360 r = -EINVAL; 2447 r = -EINVAL;
2361 } 2448 }
@@ -2409,7 +2496,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2409 struct kvm_mem_alias *alias; 2496 struct kvm_mem_alias *alias;
2410 struct kvm_mem_aliases *aliases; 2497 struct kvm_mem_aliases *aliases;
2411 2498
2412 aliases = rcu_dereference(kvm->arch.aliases); 2499 aliases = kvm_aliases(kvm);
2413 2500
2414 for (i = 0; i < aliases->naliases; ++i) { 2501 for (i = 0; i < aliases->naliases; ++i) {
2415 alias = &aliases->aliases[i]; 2502 alias = &aliases->aliases[i];
@@ -2428,7 +2515,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2428 struct kvm_mem_alias *alias; 2515 struct kvm_mem_alias *alias;
2429 struct kvm_mem_aliases *aliases; 2516 struct kvm_mem_aliases *aliases;
2430 2517
2431 aliases = rcu_dereference(kvm->arch.aliases); 2518 aliases = kvm_aliases(kvm);
2432 2519
2433 for (i = 0; i < aliases->naliases; ++i) { 2520 for (i = 0; i < aliases->naliases; ++i) {
2434 alias = &aliases->aliases[i]; 2521 alias = &aliases->aliases[i];
@@ -2636,8 +2723,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2636int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2723int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2637 struct kvm_dirty_log *log) 2724 struct kvm_dirty_log *log)
2638{ 2725{
2639 int r, n, i; 2726 int r, i;
2640 struct kvm_memory_slot *memslot; 2727 struct kvm_memory_slot *memslot;
2728 unsigned long n;
2641 unsigned long is_dirty = 0; 2729 unsigned long is_dirty = 0;
2642 unsigned long *dirty_bitmap = NULL; 2730 unsigned long *dirty_bitmap = NULL;
2643 2731
@@ -2652,7 +2740,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2652 if (!memslot->dirty_bitmap) 2740 if (!memslot->dirty_bitmap)
2653 goto out; 2741 goto out;
2654 2742
2655 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2743 n = kvm_dirty_bitmap_bytes(memslot);
2656 2744
2657 r = -ENOMEM; 2745 r = -ENOMEM;
2658 dirty_bitmap = vmalloc(n); 2746 dirty_bitmap = vmalloc(n);
@@ -2822,11 +2910,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
2822 r = -EFAULT; 2910 r = -EFAULT;
2823 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 2911 if (copy_from_user(&irq_event, argp, sizeof irq_event))
2824 goto out; 2912 goto out;
2913 r = -ENXIO;
2825 if (irqchip_in_kernel(kvm)) { 2914 if (irqchip_in_kernel(kvm)) {
2826 __s32 status; 2915 __s32 status;
2827 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2916 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
2828 irq_event.irq, irq_event.level); 2917 irq_event.irq, irq_event.level);
2829 if (ioctl == KVM_IRQ_LINE_STATUS) { 2918 if (ioctl == KVM_IRQ_LINE_STATUS) {
2919 r = -EFAULT;
2830 irq_event.status = status; 2920 irq_event.status = status;
2831 if (copy_to_user(argp, &irq_event, 2921 if (copy_to_user(argp, &irq_event,
2832 sizeof irq_event)) 2922 sizeof irq_event))
@@ -3042,6 +3132,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3042 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3132 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3043} 3133}
3044 3134
3135static void kvm_set_segment(struct kvm_vcpu *vcpu,
3136 struct kvm_segment *var, int seg)
3137{
3138 kvm_x86_ops->set_segment(vcpu, var, seg);
3139}
3140
3141void kvm_get_segment(struct kvm_vcpu *vcpu,
3142 struct kvm_segment *var, int seg)
3143{
3144 kvm_x86_ops->get_segment(vcpu, var, seg);
3145}
3146
3045gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3147gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3046{ 3148{
3047 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3149 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
@@ -3122,14 +3224,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3122 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 3224 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3123} 3225}
3124 3226
3125static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3227static int kvm_write_guest_virt_system(gva_t addr, void *val,
3126 struct kvm_vcpu *vcpu, u32 *error) 3228 unsigned int bytes,
3229 struct kvm_vcpu *vcpu,
3230 u32 *error)
3127{ 3231{
3128 void *data = val; 3232 void *data = val;
3129 int r = X86EMUL_CONTINUE; 3233 int r = X86EMUL_CONTINUE;
3130 3234
3131 while (bytes) { 3235 while (bytes) {
3132 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); 3236 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr,
3237 PFERR_WRITE_MASK, error);
3133 unsigned offset = addr & (PAGE_SIZE-1); 3238 unsigned offset = addr & (PAGE_SIZE-1);
3134 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3239 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3135 int ret; 3240 int ret;
@@ -3152,7 +3257,6 @@ out:
3152 return r; 3257 return r;
3153} 3258}
3154 3259
3155
3156static int emulator_read_emulated(unsigned long addr, 3260static int emulator_read_emulated(unsigned long addr,
3157 void *val, 3261 void *val,
3158 unsigned int bytes, 3262 unsigned int bytes,
@@ -3255,9 +3359,9 @@ mmio:
3255} 3359}
3256 3360
3257int emulator_write_emulated(unsigned long addr, 3361int emulator_write_emulated(unsigned long addr,
3258 const void *val, 3362 const void *val,
3259 unsigned int bytes, 3363 unsigned int bytes,
3260 struct kvm_vcpu *vcpu) 3364 struct kvm_vcpu *vcpu)
3261{ 3365{
3262 /* Crossing a page boundary? */ 3366 /* Crossing a page boundary? */
3263 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3367 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
@@ -3275,45 +3379,150 @@ int emulator_write_emulated(unsigned long addr,
3275} 3379}
3276EXPORT_SYMBOL_GPL(emulator_write_emulated); 3380EXPORT_SYMBOL_GPL(emulator_write_emulated);
3277 3381
3382#define CMPXCHG_TYPE(t, ptr, old, new) \
3383 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
3384
3385#ifdef CONFIG_X86_64
3386# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
3387#else
3388# define CMPXCHG64(ptr, old, new) \
3389 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3390#endif
3391
3278static int emulator_cmpxchg_emulated(unsigned long addr, 3392static int emulator_cmpxchg_emulated(unsigned long addr,
3279 const void *old, 3393 const void *old,
3280 const void *new, 3394 const void *new,
3281 unsigned int bytes, 3395 unsigned int bytes,
3282 struct kvm_vcpu *vcpu) 3396 struct kvm_vcpu *vcpu)
3283{ 3397{
3284 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3398 gpa_t gpa;
3285#ifndef CONFIG_X86_64 3399 struct page *page;
3286 /* guests cmpxchg8b have to be emulated atomically */ 3400 char *kaddr;
3287 if (bytes == 8) { 3401 bool exchanged;
3288 gpa_t gpa;
3289 struct page *page;
3290 char *kaddr;
3291 u64 val;
3292 3402
3293 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3403 /* guests cmpxchg8b have to be emulated atomically */
3404 if (bytes > 8 || (bytes & (bytes - 1)))
3405 goto emul_write;
3294 3406
3295 if (gpa == UNMAPPED_GVA || 3407 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
3296 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3297 goto emul_write;
3298 3408
3299 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) 3409 if (gpa == UNMAPPED_GVA ||
3300 goto emul_write; 3410 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3411 goto emul_write;
3301 3412
3302 val = *(u64 *)new; 3413 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3414 goto emul_write;
3303 3415
3304 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3416 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3305 3417
3306 kaddr = kmap_atomic(page, KM_USER0); 3418 kaddr = kmap_atomic(page, KM_USER0);
3307 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 3419 kaddr += offset_in_page(gpa);
3308 kunmap_atomic(kaddr, KM_USER0); 3420 switch (bytes) {
3309 kvm_release_page_dirty(page); 3421 case 1:
3422 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3423 break;
3424 case 2:
3425 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
3426 break;
3427 case 4:
3428 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
3429 break;
3430 case 8:
3431 exchanged = CMPXCHG64(kaddr, old, new);
3432 break;
3433 default:
3434 BUG();
3310 } 3435 }
3436 kunmap_atomic(kaddr, KM_USER0);
3437 kvm_release_page_dirty(page);
3438
3439 if (!exchanged)
3440 return X86EMUL_CMPXCHG_FAILED;
3441
3442 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
3443
3444 return X86EMUL_CONTINUE;
3445
3311emul_write: 3446emul_write:
3312#endif 3447 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3313 3448
3314 return emulator_write_emulated(addr, new, bytes, vcpu); 3449 return emulator_write_emulated(addr, new, bytes, vcpu);
3315} 3450}
3316 3451
3452static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3453{
3454 /* TODO: String I/O for in kernel device */
3455 int r;
3456
3457 if (vcpu->arch.pio.in)
3458 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3459 vcpu->arch.pio.size, pd);
3460 else
3461 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3462 vcpu->arch.pio.port, vcpu->arch.pio.size,
3463 pd);
3464 return r;
3465}
3466
3467
3468static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
3469 unsigned int count, struct kvm_vcpu *vcpu)
3470{
3471 if (vcpu->arch.pio.count)
3472 goto data_avail;
3473
3474 trace_kvm_pio(1, port, size, 1);
3475
3476 vcpu->arch.pio.port = port;
3477 vcpu->arch.pio.in = 1;
3478 vcpu->arch.pio.count = count;
3479 vcpu->arch.pio.size = size;
3480
3481 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3482 data_avail:
3483 memcpy(val, vcpu->arch.pio_data, size * count);
3484 vcpu->arch.pio.count = 0;
3485 return 1;
3486 }
3487
3488 vcpu->run->exit_reason = KVM_EXIT_IO;
3489 vcpu->run->io.direction = KVM_EXIT_IO_IN;
3490 vcpu->run->io.size = size;
3491 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3492 vcpu->run->io.count = count;
3493 vcpu->run->io.port = port;
3494
3495 return 0;
3496}
3497
3498static int emulator_pio_out_emulated(int size, unsigned short port,
3499 const void *val, unsigned int count,
3500 struct kvm_vcpu *vcpu)
3501{
3502 trace_kvm_pio(0, port, size, 1);
3503
3504 vcpu->arch.pio.port = port;
3505 vcpu->arch.pio.in = 0;
3506 vcpu->arch.pio.count = count;
3507 vcpu->arch.pio.size = size;
3508
3509 memcpy(vcpu->arch.pio_data, val, size * count);
3510
3511 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3512 vcpu->arch.pio.count = 0;
3513 return 1;
3514 }
3515
3516 vcpu->run->exit_reason = KVM_EXIT_IO;
3517 vcpu->run->io.direction = KVM_EXIT_IO_OUT;
3518 vcpu->run->io.size = size;
3519 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3520 vcpu->run->io.count = count;
3521 vcpu->run->io.port = port;
3522
3523 return 0;
3524}
3525
3317static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 3526static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
3318{ 3527{
3319 return kvm_x86_ops->get_segment_base(vcpu, seg); 3528 return kvm_x86_ops->get_segment_base(vcpu, seg);
@@ -3334,14 +3543,14 @@ int emulate_clts(struct kvm_vcpu *vcpu)
3334 3543
3335int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3544int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
3336{ 3545{
3337 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); 3546 return kvm_get_dr(ctxt->vcpu, dr, dest);
3338} 3547}
3339 3548
3340int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3549int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
3341{ 3550{
3342 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3551 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
3343 3552
3344 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); 3553 return kvm_set_dr(ctxt->vcpu, dr, value & mask);
3345} 3554}
3346 3555
3347void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3556void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -3362,12 +3571,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3362} 3571}
3363EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3572EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3364 3573
3574static u64 mk_cr_64(u64 curr_cr, u32 new_val)
3575{
3576 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
3577}
3578
3579static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
3580{
3581 unsigned long value;
3582
3583 switch (cr) {
3584 case 0:
3585 value = kvm_read_cr0(vcpu);
3586 break;
3587 case 2:
3588 value = vcpu->arch.cr2;
3589 break;
3590 case 3:
3591 value = vcpu->arch.cr3;
3592 break;
3593 case 4:
3594 value = kvm_read_cr4(vcpu);
3595 break;
3596 case 8:
3597 value = kvm_get_cr8(vcpu);
3598 break;
3599 default:
3600 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3601 return 0;
3602 }
3603
3604 return value;
3605}
3606
3607static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
3608{
3609 switch (cr) {
3610 case 0:
3611 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3612 break;
3613 case 2:
3614 vcpu->arch.cr2 = val;
3615 break;
3616 case 3:
3617 kvm_set_cr3(vcpu, val);
3618 break;
3619 case 4:
3620 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3621 break;
3622 case 8:
3623 kvm_set_cr8(vcpu, val & 0xfUL);
3624 break;
3625 default:
3626 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3627 }
3628}
3629
3630static int emulator_get_cpl(struct kvm_vcpu *vcpu)
3631{
3632 return kvm_x86_ops->get_cpl(vcpu);
3633}
3634
3635static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
3636{
3637 kvm_x86_ops->get_gdt(vcpu, dt);
3638}
3639
3640static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
3641 struct kvm_vcpu *vcpu)
3642{
3643 struct kvm_segment var;
3644
3645 kvm_get_segment(vcpu, &var, seg);
3646
3647 if (var.unusable)
3648 return false;
3649
3650 if (var.g)
3651 var.limit >>= 12;
3652 set_desc_limit(desc, var.limit);
3653 set_desc_base(desc, (unsigned long)var.base);
3654 desc->type = var.type;
3655 desc->s = var.s;
3656 desc->dpl = var.dpl;
3657 desc->p = var.present;
3658 desc->avl = var.avl;
3659 desc->l = var.l;
3660 desc->d = var.db;
3661 desc->g = var.g;
3662
3663 return true;
3664}
3665
3666static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
3667 struct kvm_vcpu *vcpu)
3668{
3669 struct kvm_segment var;
3670
3671 /* needed to preserve selector */
3672 kvm_get_segment(vcpu, &var, seg);
3673
3674 var.base = get_desc_base(desc);
3675 var.limit = get_desc_limit(desc);
3676 if (desc->g)
3677 var.limit = (var.limit << 12) | 0xfff;
3678 var.type = desc->type;
3679 var.present = desc->p;
3680 var.dpl = desc->dpl;
3681 var.db = desc->d;
3682 var.s = desc->s;
3683 var.l = desc->l;
3684 var.g = desc->g;
3685 var.avl = desc->avl;
3686 var.present = desc->p;
3687 var.unusable = !var.present;
3688 var.padding = 0;
3689
3690 kvm_set_segment(vcpu, &var, seg);
3691 return;
3692}
3693
3694static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
3695{
3696 struct kvm_segment kvm_seg;
3697
3698 kvm_get_segment(vcpu, &kvm_seg, seg);
3699 return kvm_seg.selector;
3700}
3701
3702static void emulator_set_segment_selector(u16 sel, int seg,
3703 struct kvm_vcpu *vcpu)
3704{
3705 struct kvm_segment kvm_seg;
3706
3707 kvm_get_segment(vcpu, &kvm_seg, seg);
3708 kvm_seg.selector = sel;
3709 kvm_set_segment(vcpu, &kvm_seg, seg);
3710}
3711
3712static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
3713{
3714 kvm_x86_ops->set_rflags(vcpu, rflags);
3715}
3716
3365static struct x86_emulate_ops emulate_ops = { 3717static struct x86_emulate_ops emulate_ops = {
3366 .read_std = kvm_read_guest_virt_system, 3718 .read_std = kvm_read_guest_virt_system,
3719 .write_std = kvm_write_guest_virt_system,
3367 .fetch = kvm_fetch_guest_virt, 3720 .fetch = kvm_fetch_guest_virt,
3368 .read_emulated = emulator_read_emulated, 3721 .read_emulated = emulator_read_emulated,
3369 .write_emulated = emulator_write_emulated, 3722 .write_emulated = emulator_write_emulated,
3370 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3723 .cmpxchg_emulated = emulator_cmpxchg_emulated,
3724 .pio_in_emulated = emulator_pio_in_emulated,
3725 .pio_out_emulated = emulator_pio_out_emulated,
3726 .get_cached_descriptor = emulator_get_cached_descriptor,
3727 .set_cached_descriptor = emulator_set_cached_descriptor,
3728 .get_segment_selector = emulator_get_segment_selector,
3729 .set_segment_selector = emulator_set_segment_selector,
3730 .get_gdt = emulator_get_gdt,
3731 .get_cr = emulator_get_cr,
3732 .set_cr = emulator_set_cr,
3733 .cpl = emulator_get_cpl,
3734 .set_rflags = emulator_set_rflags,
3371}; 3735};
3372 3736
3373static void cache_all_regs(struct kvm_vcpu *vcpu) 3737static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -3398,14 +3762,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3398 cache_all_regs(vcpu); 3762 cache_all_regs(vcpu);
3399 3763
3400 vcpu->mmio_is_write = 0; 3764 vcpu->mmio_is_write = 0;
3401 vcpu->arch.pio.string = 0;
3402 3765
3403 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3766 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
3404 int cs_db, cs_l; 3767 int cs_db, cs_l;
3405 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3768 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3406 3769
3407 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3770 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3408 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3771 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
3772 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
3409 vcpu->arch.emulate_ctxt.mode = 3773 vcpu->arch.emulate_ctxt.mode =
3410 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 3774 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3411 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3775 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
@@ -3414,6 +3778,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3414 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3778 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3415 3779
3416 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3780 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3781 trace_kvm_emulate_insn_start(vcpu);
3417 3782
3418 /* Only allow emulation of specific instructions on #UD 3783 /* Only allow emulation of specific instructions on #UD
3419 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 3784 * (namely VMMCALL, sysenter, sysexit, syscall)*/
@@ -3446,6 +3811,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3446 ++vcpu->stat.insn_emulation; 3811 ++vcpu->stat.insn_emulation;
3447 if (r) { 3812 if (r) {
3448 ++vcpu->stat.insn_emulation_fail; 3813 ++vcpu->stat.insn_emulation_fail;
3814 trace_kvm_emulate_insn_failed(vcpu);
3449 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3815 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3450 return EMULATE_DONE; 3816 return EMULATE_DONE;
3451 return EMULATE_FAIL; 3817 return EMULATE_FAIL;
@@ -3457,16 +3823,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3457 return EMULATE_DONE; 3823 return EMULATE_DONE;
3458 } 3824 }
3459 3825
3826restart:
3460 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3827 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3461 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; 3828 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
3462 3829
3463 if (r == 0) 3830 if (r == 0)
3464 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); 3831 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
3465 3832
3466 if (vcpu->arch.pio.string) 3833 if (vcpu->arch.pio.count) {
3834 if (!vcpu->arch.pio.in)
3835 vcpu->arch.pio.count = 0;
3467 return EMULATE_DO_MMIO; 3836 return EMULATE_DO_MMIO;
3837 }
3468 3838
3469 if ((r || vcpu->mmio_is_write) && run) { 3839 if (r || vcpu->mmio_is_write) {
3470 run->exit_reason = KVM_EXIT_MMIO; 3840 run->exit_reason = KVM_EXIT_MMIO;
3471 run->mmio.phys_addr = vcpu->mmio_phys_addr; 3841 run->mmio.phys_addr = vcpu->mmio_phys_addr;
3472 memcpy(run->mmio.data, vcpu->mmio_data, 8); 3842 memcpy(run->mmio.data, vcpu->mmio_data, 8);
@@ -3476,222 +3846,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3476 3846
3477 if (r) { 3847 if (r) {
3478 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3848 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3479 return EMULATE_DONE; 3849 goto done;
3480 if (!vcpu->mmio_needed) { 3850 if (!vcpu->mmio_needed) {
3851 ++vcpu->stat.insn_emulation_fail;
3852 trace_kvm_emulate_insn_failed(vcpu);
3481 kvm_report_emulation_failure(vcpu, "mmio"); 3853 kvm_report_emulation_failure(vcpu, "mmio");
3482 return EMULATE_FAIL; 3854 return EMULATE_FAIL;
3483 } 3855 }
3484 return EMULATE_DO_MMIO; 3856 return EMULATE_DO_MMIO;
3485 } 3857 }
3486 3858
3487 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
3488
3489 if (vcpu->mmio_is_write) { 3859 if (vcpu->mmio_is_write) {
3490 vcpu->mmio_needed = 0; 3860 vcpu->mmio_needed = 0;
3491 return EMULATE_DO_MMIO; 3861 return EMULATE_DO_MMIO;
3492 } 3862 }
3493 3863
3494 return EMULATE_DONE; 3864done:
3495} 3865 if (vcpu->arch.exception.pending)
3496EXPORT_SYMBOL_GPL(emulate_instruction); 3866 vcpu->arch.emulate_ctxt.restart = false;
3497
3498static int pio_copy_data(struct kvm_vcpu *vcpu)
3499{
3500 void *p = vcpu->arch.pio_data;
3501 gva_t q = vcpu->arch.pio.guest_gva;
3502 unsigned bytes;
3503 int ret;
3504 u32 error_code;
3505
3506 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
3507 if (vcpu->arch.pio.in)
3508 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
3509 else
3510 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3511
3512 if (ret == X86EMUL_PROPAGATE_FAULT)
3513 kvm_inject_page_fault(vcpu, q, error_code);
3514
3515 return ret;
3516}
3517
3518int complete_pio(struct kvm_vcpu *vcpu)
3519{
3520 struct kvm_pio_request *io = &vcpu->arch.pio;
3521 long delta;
3522 int r;
3523 unsigned long val;
3524
3525 if (!io->string) {
3526 if (io->in) {
3527 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3528 memcpy(&val, vcpu->arch.pio_data, io->size);
3529 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
3530 }
3531 } else {
3532 if (io->in) {
3533 r = pio_copy_data(vcpu);
3534 if (r)
3535 goto out;
3536 }
3537
3538 delta = 1;
3539 if (io->rep) {
3540 delta *= io->cur_count;
3541 /*
3542 * The size of the register should really depend on
3543 * current address size.
3544 */
3545 val = kvm_register_read(vcpu, VCPU_REGS_RCX);
3546 val -= delta;
3547 kvm_register_write(vcpu, VCPU_REGS_RCX, val);
3548 }
3549 if (io->down)
3550 delta = -delta;
3551 delta *= io->size;
3552 if (io->in) {
3553 val = kvm_register_read(vcpu, VCPU_REGS_RDI);
3554 val += delta;
3555 kvm_register_write(vcpu, VCPU_REGS_RDI, val);
3556 } else {
3557 val = kvm_register_read(vcpu, VCPU_REGS_RSI);
3558 val += delta;
3559 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
3560 }
3561 }
3562out:
3563 io->count -= io->cur_count;
3564 io->cur_count = 0;
3565
3566 return 0;
3567}
3568 3867
3569static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 3868 if (vcpu->arch.emulate_ctxt.restart)
3570{ 3869 goto restart;
3571 /* TODO: String I/O for in kernel device */
3572 int r;
3573 3870
3574 if (vcpu->arch.pio.in) 3871 return EMULATE_DONE;
3575 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3576 vcpu->arch.pio.size, pd);
3577 else
3578 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3579 vcpu->arch.pio.port, vcpu->arch.pio.size,
3580 pd);
3581 return r;
3582}
3583
3584static int pio_string_write(struct kvm_vcpu *vcpu)
3585{
3586 struct kvm_pio_request *io = &vcpu->arch.pio;
3587 void *pd = vcpu->arch.pio_data;
3588 int i, r = 0;
3589
3590 for (i = 0; i < io->cur_count; i++) {
3591 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3592 io->port, io->size, pd)) {
3593 r = -EOPNOTSUPP;
3594 break;
3595 }
3596 pd += io->size;
3597 }
3598 return r;
3599}
3600
3601int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3602{
3603 unsigned long val;
3604
3605 trace_kvm_pio(!in, port, size, 1);
3606
3607 vcpu->run->exit_reason = KVM_EXIT_IO;
3608 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3609 vcpu->run->io.size = vcpu->arch.pio.size = size;
3610 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3611 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
3612 vcpu->run->io.port = vcpu->arch.pio.port = port;
3613 vcpu->arch.pio.in = in;
3614 vcpu->arch.pio.string = 0;
3615 vcpu->arch.pio.down = 0;
3616 vcpu->arch.pio.rep = 0;
3617
3618 if (!vcpu->arch.pio.in) {
3619 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3620 memcpy(vcpu->arch.pio_data, &val, 4);
3621 }
3622
3623 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3624 complete_pio(vcpu);
3625 return 1;
3626 }
3627 return 0;
3628} 3872}
3629EXPORT_SYMBOL_GPL(kvm_emulate_pio); 3873EXPORT_SYMBOL_GPL(emulate_instruction);
3630 3874
3631int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, 3875int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
3632 int size, unsigned long count, int down,
3633 gva_t address, int rep, unsigned port)
3634{ 3876{
3635 unsigned now, in_page; 3877 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3636 int ret = 0; 3878 int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
3637 3879 /* do not return to emulator after return from userspace */
3638 trace_kvm_pio(!in, port, size, count); 3880 vcpu->arch.pio.count = 0;
3639
3640 vcpu->run->exit_reason = KVM_EXIT_IO;
3641 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3642 vcpu->run->io.size = vcpu->arch.pio.size = size;
3643 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3644 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
3645 vcpu->run->io.port = vcpu->arch.pio.port = port;
3646 vcpu->arch.pio.in = in;
3647 vcpu->arch.pio.string = 1;
3648 vcpu->arch.pio.down = down;
3649 vcpu->arch.pio.rep = rep;
3650
3651 if (!count) {
3652 kvm_x86_ops->skip_emulated_instruction(vcpu);
3653 return 1;
3654 }
3655
3656 if (!down)
3657 in_page = PAGE_SIZE - offset_in_page(address);
3658 else
3659 in_page = offset_in_page(address) + size;
3660 now = min(count, (unsigned long)in_page / size);
3661 if (!now)
3662 now = 1;
3663 if (down) {
3664 /*
3665 * String I/O in reverse. Yuck. Kill the guest, fix later.
3666 */
3667 pr_unimpl(vcpu, "guest string pio down\n");
3668 kvm_inject_gp(vcpu, 0);
3669 return 1;
3670 }
3671 vcpu->run->io.count = now;
3672 vcpu->arch.pio.cur_count = now;
3673
3674 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
3675 kvm_x86_ops->skip_emulated_instruction(vcpu);
3676
3677 vcpu->arch.pio.guest_gva = address;
3678
3679 if (!vcpu->arch.pio.in) {
3680 /* string PIO write */
3681 ret = pio_copy_data(vcpu);
3682 if (ret == X86EMUL_PROPAGATE_FAULT)
3683 return 1;
3684 if (ret == 0 && !pio_string_write(vcpu)) {
3685 complete_pio(vcpu);
3686 if (vcpu->arch.pio.count == 0)
3687 ret = 1;
3688 }
3689 }
3690 /* no string PIO read support yet */
3691
3692 return ret; 3881 return ret;
3693} 3882}
3694EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 3883EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
3695 3884
3696static void bounce_off(void *info) 3885static void bounce_off(void *info)
3697{ 3886{
@@ -4014,85 +4203,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
4014 return emulator_write_emulated(rip, instruction, 3, vcpu); 4203 return emulator_write_emulated(rip, instruction, 3, vcpu);
4015} 4204}
4016 4205
4017static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4018{
4019 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4020}
4021
4022void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4206void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4023{ 4207{
4024 struct descriptor_table dt = { limit, base }; 4208 struct desc_ptr dt = { limit, base };
4025 4209
4026 kvm_x86_ops->set_gdt(vcpu, &dt); 4210 kvm_x86_ops->set_gdt(vcpu, &dt);
4027} 4211}
4028 4212
4029void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4213void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4030{ 4214{
4031 struct descriptor_table dt = { limit, base }; 4215 struct desc_ptr dt = { limit, base };
4032 4216
4033 kvm_x86_ops->set_idt(vcpu, &dt); 4217 kvm_x86_ops->set_idt(vcpu, &dt);
4034} 4218}
4035 4219
4036void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
4037 unsigned long *rflags)
4038{
4039 kvm_lmsw(vcpu, msw);
4040 *rflags = kvm_get_rflags(vcpu);
4041}
4042
4043unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
4044{
4045 unsigned long value;
4046
4047 switch (cr) {
4048 case 0:
4049 value = kvm_read_cr0(vcpu);
4050 break;
4051 case 2:
4052 value = vcpu->arch.cr2;
4053 break;
4054 case 3:
4055 value = vcpu->arch.cr3;
4056 break;
4057 case 4:
4058 value = kvm_read_cr4(vcpu);
4059 break;
4060 case 8:
4061 value = kvm_get_cr8(vcpu);
4062 break;
4063 default:
4064 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4065 return 0;
4066 }
4067
4068 return value;
4069}
4070
4071void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
4072 unsigned long *rflags)
4073{
4074 switch (cr) {
4075 case 0:
4076 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4077 *rflags = kvm_get_rflags(vcpu);
4078 break;
4079 case 2:
4080 vcpu->arch.cr2 = val;
4081 break;
4082 case 3:
4083 kvm_set_cr3(vcpu, val);
4084 break;
4085 case 4:
4086 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4087 break;
4088 case 8:
4089 kvm_set_cr8(vcpu, val & 0xfUL);
4090 break;
4091 default:
4092 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4093 }
4094}
4095
4096static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) 4220static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
4097{ 4221{
4098 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; 4222 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
@@ -4156,9 +4280,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
4156{ 4280{
4157 struct kvm_cpuid_entry2 *best; 4281 struct kvm_cpuid_entry2 *best;
4158 4282
4283 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
4284 if (!best || best->eax < 0x80000008)
4285 goto not_found;
4159 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); 4286 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
4160 if (best) 4287 if (best)
4161 return best->eax & 0xff; 4288 return best->eax & 0xff;
4289not_found:
4162 return 36; 4290 return 36;
4163} 4291}
4164 4292
@@ -4272,6 +4400,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
4272{ 4400{
4273 /* try to reinject previous events if any */ 4401 /* try to reinject previous events if any */
4274 if (vcpu->arch.exception.pending) { 4402 if (vcpu->arch.exception.pending) {
4403 trace_kvm_inj_exception(vcpu->arch.exception.nr,
4404 vcpu->arch.exception.has_error_code,
4405 vcpu->arch.exception.error_code);
4275 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 4406 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
4276 vcpu->arch.exception.has_error_code, 4407 vcpu->arch.exception.has_error_code,
4277 vcpu->arch.exception.error_code); 4408 vcpu->arch.exception.error_code);
@@ -4532,24 +4663,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4532 if (!irqchip_in_kernel(vcpu->kvm)) 4663 if (!irqchip_in_kernel(vcpu->kvm))
4533 kvm_set_cr8(vcpu, kvm_run->cr8); 4664 kvm_set_cr8(vcpu, kvm_run->cr8);
4534 4665
4535 if (vcpu->arch.pio.cur_count) { 4666 if (vcpu->arch.pio.count || vcpu->mmio_needed ||
4536 r = complete_pio(vcpu); 4667 vcpu->arch.emulate_ctxt.restart) {
4537 if (r) 4668 if (vcpu->mmio_needed) {
4538 goto out; 4669 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
4539 } 4670 vcpu->mmio_read_completed = 1;
4540 if (vcpu->mmio_needed) { 4671 vcpu->mmio_needed = 0;
4541 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 4672 }
4542 vcpu->mmio_read_completed = 1;
4543 vcpu->mmio_needed = 0;
4544
4545 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4673 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4546 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4674 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
4547 EMULTYPE_NO_DECODE);
4548 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4675 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4549 if (r == EMULATE_DO_MMIO) { 4676 if (r == EMULATE_DO_MMIO) {
4550 /*
4551 * Read-modify-write. Back to userspace.
4552 */
4553 r = 0; 4677 r = 0;
4554 goto out; 4678 goto out;
4555 } 4679 }
@@ -4632,12 +4756,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4632 return 0; 4756 return 0;
4633} 4757}
4634 4758
4635void kvm_get_segment(struct kvm_vcpu *vcpu,
4636 struct kvm_segment *var, int seg)
4637{
4638 kvm_x86_ops->get_segment(vcpu, var, seg);
4639}
4640
4641void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 4759void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4642{ 4760{
4643 struct kvm_segment cs; 4761 struct kvm_segment cs;
@@ -4651,7 +4769,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
4651int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4769int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4652 struct kvm_sregs *sregs) 4770 struct kvm_sregs *sregs)
4653{ 4771{
4654 struct descriptor_table dt; 4772 struct desc_ptr dt;
4655 4773
4656 vcpu_load(vcpu); 4774 vcpu_load(vcpu);
4657 4775
@@ -4666,11 +4784,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4666 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 4784 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
4667 4785
4668 kvm_x86_ops->get_idt(vcpu, &dt); 4786 kvm_x86_ops->get_idt(vcpu, &dt);
4669 sregs->idt.limit = dt.limit; 4787 sregs->idt.limit = dt.size;
4670 sregs->idt.base = dt.base; 4788 sregs->idt.base = dt.address;
4671 kvm_x86_ops->get_gdt(vcpu, &dt); 4789 kvm_x86_ops->get_gdt(vcpu, &dt);
4672 sregs->gdt.limit = dt.limit; 4790 sregs->gdt.limit = dt.size;
4673 sregs->gdt.base = dt.base; 4791 sregs->gdt.base = dt.address;
4674 4792
4675 sregs->cr0 = kvm_read_cr0(vcpu); 4793 sregs->cr0 = kvm_read_cr0(vcpu);
4676 sregs->cr2 = vcpu->arch.cr2; 4794 sregs->cr2 = vcpu->arch.cr2;
@@ -4709,559 +4827,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4709 return 0; 4827 return 0;
4710} 4828}
4711 4829
4712static void kvm_set_segment(struct kvm_vcpu *vcpu, 4830int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
4713 struct kvm_segment *var, int seg) 4831 bool has_error_code, u32 error_code)
4714{
4715 kvm_x86_ops->set_segment(vcpu, var, seg);
4716}
4717
4718static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
4719 struct kvm_segment *kvm_desct)
4720{
4721 kvm_desct->base = get_desc_base(seg_desc);
4722 kvm_desct->limit = get_desc_limit(seg_desc);
4723 if (seg_desc->g) {
4724 kvm_desct->limit <<= 12;
4725 kvm_desct->limit |= 0xfff;
4726 }
4727 kvm_desct->selector = selector;
4728 kvm_desct->type = seg_desc->type;
4729 kvm_desct->present = seg_desc->p;
4730 kvm_desct->dpl = seg_desc->dpl;
4731 kvm_desct->db = seg_desc->d;
4732 kvm_desct->s = seg_desc->s;
4733 kvm_desct->l = seg_desc->l;
4734 kvm_desct->g = seg_desc->g;
4735 kvm_desct->avl = seg_desc->avl;
4736 if (!selector)
4737 kvm_desct->unusable = 1;
4738 else
4739 kvm_desct->unusable = 0;
4740 kvm_desct->padding = 0;
4741}
4742
4743static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
4744 u16 selector,
4745 struct descriptor_table *dtable)
4746{
4747 if (selector & 1 << 2) {
4748 struct kvm_segment kvm_seg;
4749
4750 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
4751
4752 if (kvm_seg.unusable)
4753 dtable->limit = 0;
4754 else
4755 dtable->limit = kvm_seg.limit;
4756 dtable->base = kvm_seg.base;
4757 }
4758 else
4759 kvm_x86_ops->get_gdt(vcpu, dtable);
4760}
4761
4762/* allowed just for 8 bytes segments */
4763static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4764 struct desc_struct *seg_desc)
4765{
4766 struct descriptor_table dtable;
4767 u16 index = selector >> 3;
4768 int ret;
4769 u32 err;
4770 gva_t addr;
4771
4772 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4773
4774 if (dtable.limit < index * 8 + 7) {
4775 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4776 return X86EMUL_PROPAGATE_FAULT;
4777 }
4778 addr = dtable.base + index * 8;
4779 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4780 vcpu, &err);
4781 if (ret == X86EMUL_PROPAGATE_FAULT)
4782 kvm_inject_page_fault(vcpu, addr, err);
4783
4784 return ret;
4785}
4786
4787/* allowed just for 8 bytes segments */
4788static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4789 struct desc_struct *seg_desc)
4790{
4791 struct descriptor_table dtable;
4792 u16 index = selector >> 3;
4793
4794 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4795
4796 if (dtable.limit < index * 8 + 7)
4797 return 1;
4798 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4799}
4800
4801static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4802 struct desc_struct *seg_desc)
4803{
4804 u32 base_addr = get_desc_base(seg_desc);
4805
4806 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4807}
4808
4809static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4810 struct desc_struct *seg_desc)
4811{
4812 u32 base_addr = get_desc_base(seg_desc);
4813
4814 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4815}
4816
4817static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4818{
4819 struct kvm_segment kvm_seg;
4820
4821 kvm_get_segment(vcpu, &kvm_seg, seg);
4822 return kvm_seg.selector;
4823}
4824
4825static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4826{
4827 struct kvm_segment segvar = {
4828 .base = selector << 4,
4829 .limit = 0xffff,
4830 .selector = selector,
4831 .type = 3,
4832 .present = 1,
4833 .dpl = 3,
4834 .db = 0,
4835 .s = 1,
4836 .l = 0,
4837 .g = 0,
4838 .avl = 0,
4839 .unusable = 0,
4840 };
4841 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4842 return X86EMUL_CONTINUE;
4843}
4844
4845static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4846{ 4832{
4847 return (seg != VCPU_SREG_LDTR) && 4833 int cs_db, cs_l, ret;
4848 (seg != VCPU_SREG_TR) && 4834 cache_all_regs(vcpu);
4849 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4850}
4851
4852int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4853{
4854 struct kvm_segment kvm_seg;
4855 struct desc_struct seg_desc;
4856 u8 dpl, rpl, cpl;
4857 unsigned err_vec = GP_VECTOR;
4858 u32 err_code = 0;
4859 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4860 int ret;
4861 4835
4862 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) 4836 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4863 return kvm_load_realmode_segment(vcpu, selector, seg);
4864 4837
4865 /* NULL selector is not valid for TR, CS and SS */ 4838 vcpu->arch.emulate_ctxt.vcpu = vcpu;
4866 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) 4839 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
4867 && null_selector) 4840 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4868 goto exception; 4841 vcpu->arch.emulate_ctxt.mode =
4842 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4843 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
4844 ? X86EMUL_MODE_VM86 : cs_l
4845 ? X86EMUL_MODE_PROT64 : cs_db
4846 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
4869 4847
4870 /* TR should be in GDT only */ 4848 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
4871 if (seg == VCPU_SREG_TR && (selector & (1 << 2))) 4849 tss_selector, reason, has_error_code,
4872 goto exception; 4850 error_code);
4873 4851
4874 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4875 if (ret) 4852 if (ret)
4876 return ret; 4853 return EMULATE_FAIL;
4877
4878 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4879
4880 if (null_selector) { /* for NULL selector skip all following checks */
4881 kvm_seg.unusable = 1;
4882 goto load;
4883 }
4884
4885 err_code = selector & 0xfffc;
4886 err_vec = GP_VECTOR;
4887
4888 /* can't load system descriptor into segment selecor */
4889 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4890 goto exception;
4891
4892 if (!kvm_seg.present) {
4893 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4894 goto exception;
4895 }
4896
4897 rpl = selector & 3;
4898 dpl = kvm_seg.dpl;
4899 cpl = kvm_x86_ops->get_cpl(vcpu);
4900
4901 switch (seg) {
4902 case VCPU_SREG_SS:
4903 /*
4904 * segment is not a writable data segment or segment
4905 * selector's RPL != CPL or segment selector's RPL != CPL
4906 */
4907 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4908 goto exception;
4909 break;
4910 case VCPU_SREG_CS:
4911 if (!(kvm_seg.type & 8))
4912 goto exception;
4913
4914 if (kvm_seg.type & 4) {
4915 /* conforming */
4916 if (dpl > cpl)
4917 goto exception;
4918 } else {
4919 /* nonconforming */
4920 if (rpl > cpl || dpl != cpl)
4921 goto exception;
4922 }
4923 /* CS(RPL) <- CPL */
4924 selector = (selector & 0xfffc) | cpl;
4925 break;
4926 case VCPU_SREG_TR:
4927 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4928 goto exception;
4929 break;
4930 case VCPU_SREG_LDTR:
4931 if (kvm_seg.s || kvm_seg.type != 2)
4932 goto exception;
4933 break;
4934 default: /* DS, ES, FS, or GS */
4935 /*
4936 * segment is not a data or readable code segment or
4937 * ((segment is a data or nonconforming code segment)
4938 * and (both RPL and CPL > DPL))
4939 */
4940 if ((kvm_seg.type & 0xa) == 0x8 ||
4941 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4942 goto exception;
4943 break;
4944 }
4945
4946 if (!kvm_seg.unusable && kvm_seg.s) {
4947 /* mark segment as accessed */
4948 kvm_seg.type |= 1;
4949 seg_desc.type |= 1;
4950 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4951 }
4952load:
4953 kvm_set_segment(vcpu, &kvm_seg, seg);
4954 return X86EMUL_CONTINUE;
4955exception:
4956 kvm_queue_exception_e(vcpu, err_vec, err_code);
4957 return X86EMUL_PROPAGATE_FAULT;
4958}
4959
4960static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4961 struct tss_segment_32 *tss)
4962{
4963 tss->cr3 = vcpu->arch.cr3;
4964 tss->eip = kvm_rip_read(vcpu);
4965 tss->eflags = kvm_get_rflags(vcpu);
4966 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4967 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4968 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
4969 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4970 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
4971 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
4972 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
4973 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
4974 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
4975 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
4976 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
4977 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
4978 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
4979 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
4980 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4981}
4982
4983static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4984{
4985 struct kvm_segment kvm_seg;
4986 kvm_get_segment(vcpu, &kvm_seg, seg);
4987 kvm_seg.selector = sel;
4988 kvm_set_segment(vcpu, &kvm_seg, seg);
4989}
4990
4991static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4992 struct tss_segment_32 *tss)
4993{
4994 kvm_set_cr3(vcpu, tss->cr3);
4995
4996 kvm_rip_write(vcpu, tss->eip);
4997 kvm_set_rflags(vcpu, tss->eflags | 2);
4998
4999 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
5000 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
5001 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
5002 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
5003 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
5004 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
5005 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
5006 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
5007
5008 /*
5009 * SDM says that segment selectors are loaded before segment
5010 * descriptors
5011 */
5012 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
5013 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5014 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5015 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5016 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5017 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
5018 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
5019
5020 /*
5021 * Now load segment descriptors. If fault happenes at this stage
5022 * it is handled in a context of new task
5023 */
5024 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
5025 return 1;
5026
5027 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
5028 return 1;
5029
5030 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
5031 return 1;
5032
5033 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
5034 return 1;
5035
5036 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
5037 return 1;
5038
5039 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
5040 return 1;
5041
5042 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
5043 return 1;
5044 return 0;
5045}
5046
5047static void save_state_to_tss16(struct kvm_vcpu *vcpu,
5048 struct tss_segment_16 *tss)
5049{
5050 tss->ip = kvm_rip_read(vcpu);
5051 tss->flag = kvm_get_rflags(vcpu);
5052 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5053 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5054 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5055 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5056 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
5057 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
5058 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
5059 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
5060
5061 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
5062 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
5063 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
5064 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
5065 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
5066}
5067
5068static int load_state_from_tss16(struct kvm_vcpu *vcpu,
5069 struct tss_segment_16 *tss)
5070{
5071 kvm_rip_write(vcpu, tss->ip);
5072 kvm_set_rflags(vcpu, tss->flag | 2);
5073 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
5074 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
5075 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
5076 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
5077 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
5078 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
5079 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
5080 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
5081
5082 /*
5083 * SDM says that segment selectors are loaded before segment
5084 * descriptors
5085 */
5086 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5087 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5088 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5089 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5090 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5091
5092 /*
5093 * Now load segment descriptors. If fault happenes at this stage
5094 * it is handled in a context of new task
5095 */
5096 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
5097 return 1;
5098
5099 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
5100 return 1;
5101
5102 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
5103 return 1;
5104
5105 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
5106 return 1;
5107
5108 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
5109 return 1;
5110 return 0;
5111}
5112
5113static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
5114 u16 old_tss_sel, u32 old_tss_base,
5115 struct desc_struct *nseg_desc)
5116{
5117 struct tss_segment_16 tss_segment_16;
5118 int ret = 0;
5119
5120 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5121 sizeof tss_segment_16))
5122 goto out;
5123
5124 save_state_to_tss16(vcpu, &tss_segment_16);
5125
5126 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5127 sizeof tss_segment_16))
5128 goto out;
5129
5130 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5131 &tss_segment_16, sizeof tss_segment_16))
5132 goto out;
5133
5134 if (old_tss_sel != 0xffff) {
5135 tss_segment_16.prev_task_link = old_tss_sel;
5136
5137 if (kvm_write_guest(vcpu->kvm,
5138 get_tss_base_addr_write(vcpu, nseg_desc),
5139 &tss_segment_16.prev_task_link,
5140 sizeof tss_segment_16.prev_task_link))
5141 goto out;
5142 }
5143
5144 if (load_state_from_tss16(vcpu, &tss_segment_16))
5145 goto out;
5146
5147 ret = 1;
5148out:
5149 return ret;
5150}
5151
5152static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
5153 u16 old_tss_sel, u32 old_tss_base,
5154 struct desc_struct *nseg_desc)
5155{
5156 struct tss_segment_32 tss_segment_32;
5157 int ret = 0;
5158
5159 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5160 sizeof tss_segment_32))
5161 goto out;
5162
5163 save_state_to_tss32(vcpu, &tss_segment_32);
5164
5165 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5166 sizeof tss_segment_32))
5167 goto out;
5168
5169 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5170 &tss_segment_32, sizeof tss_segment_32))
5171 goto out;
5172
5173 if (old_tss_sel != 0xffff) {
5174 tss_segment_32.prev_task_link = old_tss_sel;
5175
5176 if (kvm_write_guest(vcpu->kvm,
5177 get_tss_base_addr_write(vcpu, nseg_desc),
5178 &tss_segment_32.prev_task_link,
5179 sizeof tss_segment_32.prev_task_link))
5180 goto out;
5181 }
5182
5183 if (load_state_from_tss32(vcpu, &tss_segment_32))
5184 goto out;
5185
5186 ret = 1;
5187out:
5188 return ret;
5189}
5190
5191int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5192{
5193 struct kvm_segment tr_seg;
5194 struct desc_struct cseg_desc;
5195 struct desc_struct nseg_desc;
5196 int ret = 0;
5197 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5198 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5199
5200 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5201
5202 /* FIXME: Handle errors. Failure to read either TSS or their
5203 * descriptors should generate a pagefault.
5204 */
5205 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
5206 goto out;
5207
5208 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
5209 goto out;
5210
5211 if (reason != TASK_SWITCH_IRET) {
5212 int cpl;
5213
5214 cpl = kvm_x86_ops->get_cpl(vcpu);
5215 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
5216 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
5217 return 1;
5218 }
5219 }
5220 4854
5221 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { 4855 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
5222 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 4856 return EMULATE_DONE;
5223 return 1;
5224 }
5225
5226 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
5227 cseg_desc.type &= ~(1 << 1); //clear the B flag
5228 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
5229 }
5230
5231 if (reason == TASK_SWITCH_IRET) {
5232 u32 eflags = kvm_get_rflags(vcpu);
5233 kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
5234 }
5235
5236 /* set back link to prev task only if NT bit is set in eflags
5237 note that old_tss_sel is not used afetr this point */
5238 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
5239 old_tss_sel = 0xffff;
5240
5241 if (nseg_desc.type & 8)
5242 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
5243 old_tss_base, &nseg_desc);
5244 else
5245 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
5246 old_tss_base, &nseg_desc);
5247
5248 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
5249 u32 eflags = kvm_get_rflags(vcpu);
5250 kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
5251 }
5252
5253 if (reason != TASK_SWITCH_IRET) {
5254 nseg_desc.type |= (1 << 1);
5255 save_guest_segment_descriptor(vcpu, tss_selector,
5256 &nseg_desc);
5257 }
5258
5259 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
5260 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
5261 tr_seg.type = 11;
5262 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
5263out:
5264 return ret;
5265} 4857}
5266EXPORT_SYMBOL_GPL(kvm_task_switch); 4858EXPORT_SYMBOL_GPL(kvm_task_switch);
5267 4859
@@ -5270,15 +4862,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5270{ 4862{
5271 int mmu_reset_needed = 0; 4863 int mmu_reset_needed = 0;
5272 int pending_vec, max_bits; 4864 int pending_vec, max_bits;
5273 struct descriptor_table dt; 4865 struct desc_ptr dt;
5274 4866
5275 vcpu_load(vcpu); 4867 vcpu_load(vcpu);
5276 4868
5277 dt.limit = sregs->idt.limit; 4869 dt.size = sregs->idt.limit;
5278 dt.base = sregs->idt.base; 4870 dt.address = sregs->idt.base;
5279 kvm_x86_ops->set_idt(vcpu, &dt); 4871 kvm_x86_ops->set_idt(vcpu, &dt);
5280 dt.limit = sregs->gdt.limit; 4872 dt.size = sregs->gdt.limit;
5281 dt.base = sregs->gdt.base; 4873 dt.address = sregs->gdt.base;
5282 kvm_x86_ops->set_gdt(vcpu, &dt); 4874 kvm_x86_ops->set_gdt(vcpu, &dt);
5283 4875
5284 vcpu->arch.cr2 = sregs->cr2; 4876 vcpu->arch.cr2 = sregs->cr2;
@@ -5377,11 +4969,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5377 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 4969 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
5378 } 4970 }
5379 4971
5380 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 4972 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5381 vcpu->arch.singlestep_cs = 4973 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
5382 get_segment_selector(vcpu, VCPU_SREG_CS); 4974 get_segment_base(vcpu, VCPU_SREG_CS);
5383 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
5384 }
5385 4975
5386 /* 4976 /*
5387 * Trigger an rflags update that will inject or remove the trace 4977 * Trigger an rflags update that will inject or remove the trace
@@ -5872,13 +5462,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
5872 return kvm_x86_ops->interrupt_allowed(vcpu); 5462 return kvm_x86_ops->interrupt_allowed(vcpu);
5873} 5463}
5874 5464
5465bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
5466{
5467 unsigned long current_rip = kvm_rip_read(vcpu) +
5468 get_segment_base(vcpu, VCPU_SREG_CS);
5469
5470 return current_rip == linear_rip;
5471}
5472EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
5473
5875unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) 5474unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
5876{ 5475{
5877 unsigned long rflags; 5476 unsigned long rflags;
5878 5477
5879 rflags = kvm_x86_ops->get_rflags(vcpu); 5478 rflags = kvm_x86_ops->get_rflags(vcpu);
5880 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5479 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5881 rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); 5480 rflags &= ~X86_EFLAGS_TF;
5882 return rflags; 5481 return rflags;
5883} 5482}
5884EXPORT_SYMBOL_GPL(kvm_get_rflags); 5483EXPORT_SYMBOL_GPL(kvm_get_rflags);
@@ -5886,10 +5485,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags);
5886void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 5485void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
5887{ 5486{
5888 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 5487 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
5889 vcpu->arch.singlestep_cs == 5488 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
5890 get_segment_selector(vcpu, VCPU_SREG_CS) && 5489 rflags |= X86_EFLAGS_TF;
5891 vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
5892 rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
5893 kvm_x86_ops->set_rflags(vcpu, rflags); 5490 kvm_x86_ops->set_rflags(vcpu, rflags);
5894} 5491}
5895EXPORT_SYMBOL_GPL(kvm_set_rflags); 5492EXPORT_SYMBOL_GPL(kvm_set_rflags);
@@ -5905,3 +5502,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
5905EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); 5502EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
5906EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); 5503EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
5907EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 5504EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
5505EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);