aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/kvm/kvm.h10
-rw-r--r--drivers/kvm/kvm_main.c442
-rw-r--r--drivers/kvm/x86.c427
3 files changed, 443 insertions, 436 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 469ca42c2a19..3f5ffc37480d 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -636,6 +636,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
636int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); 636int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
637int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); 637int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
638 638
639int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
640int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
641int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
642 struct kvm_sregs *sregs);
643int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
644 struct kvm_sregs *sregs);
645int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
646 struct kvm_debug_guest *dbg);
647int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
648
639__init void kvm_arch_init(void); 649__init void kvm_arch_init(void);
640 650
641static inline void kvm_guest_enter(void) 651static inline void kvm_guest_enter(void)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7230f48ba08e..8665531d9287 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -779,422 +779,6 @@ void kvm_resched(struct kvm_vcpu *vcpu)
779EXPORT_SYMBOL_GPL(kvm_resched); 779EXPORT_SYMBOL_GPL(kvm_resched);
780 780
781/* 781/*
782 * Check if userspace requested an interrupt window, and that the
783 * interrupt window is open.
784 *
785 * No need to exit to userspace if we already have an interrupt queued.
786 */
787static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
788 struct kvm_run *kvm_run)
789{
790 return (!vcpu->irq_summary &&
791 kvm_run->request_interrupt_window &&
792 vcpu->interrupt_window_open &&
793 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
794}
795
796static void post_kvm_run_save(struct kvm_vcpu *vcpu,
797 struct kvm_run *kvm_run)
798{
799 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
800 kvm_run->cr8 = get_cr8(vcpu);
801 kvm_run->apic_base = kvm_get_apic_base(vcpu);
802 if (irqchip_in_kernel(vcpu->kvm))
803 kvm_run->ready_for_interrupt_injection = 1;
804 else
805 kvm_run->ready_for_interrupt_injection =
806 (vcpu->interrupt_window_open &&
807 vcpu->irq_summary == 0);
808}
809
810static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
811{
812 int r;
813
814 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
815 pr_debug("vcpu %d received sipi with vector # %x\n",
816 vcpu->vcpu_id, vcpu->sipi_vector);
817 kvm_lapic_reset(vcpu);
818 r = kvm_x86_ops->vcpu_reset(vcpu);
819 if (r)
820 return r;
821 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
822 }
823
824preempted:
825 if (vcpu->guest_debug.enabled)
826 kvm_x86_ops->guest_debug_pre(vcpu);
827
828again:
829 r = kvm_mmu_reload(vcpu);
830 if (unlikely(r))
831 goto out;
832
833 kvm_inject_pending_timer_irqs(vcpu);
834
835 preempt_disable();
836
837 kvm_x86_ops->prepare_guest_switch(vcpu);
838 kvm_load_guest_fpu(vcpu);
839
840 local_irq_disable();
841
842 if (signal_pending(current)) {
843 local_irq_enable();
844 preempt_enable();
845 r = -EINTR;
846 kvm_run->exit_reason = KVM_EXIT_INTR;
847 ++vcpu->stat.signal_exits;
848 goto out;
849 }
850
851 if (irqchip_in_kernel(vcpu->kvm))
852 kvm_x86_ops->inject_pending_irq(vcpu);
853 else if (!vcpu->mmio_read_completed)
854 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
855
856 vcpu->guest_mode = 1;
857 kvm_guest_enter();
858
859 if (vcpu->requests)
860 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
861 kvm_x86_ops->tlb_flush(vcpu);
862
863 kvm_x86_ops->run(vcpu, kvm_run);
864
865 vcpu->guest_mode = 0;
866 local_irq_enable();
867
868 ++vcpu->stat.exits;
869
870 /*
871 * We must have an instruction between local_irq_enable() and
872 * kvm_guest_exit(), so the timer interrupt isn't delayed by
873 * the interrupt shadow. The stat.exits increment will do nicely.
874 * But we need to prevent reordering, hence this barrier():
875 */
876 barrier();
877
878 kvm_guest_exit();
879
880 preempt_enable();
881
882 /*
883 * Profile KVM exit RIPs:
884 */
885 if (unlikely(prof_on == KVM_PROFILING)) {
886 kvm_x86_ops->cache_regs(vcpu);
887 profile_hit(KVM_PROFILING, (void *)vcpu->rip);
888 }
889
890 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
891
892 if (r > 0) {
893 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
894 r = -EINTR;
895 kvm_run->exit_reason = KVM_EXIT_INTR;
896 ++vcpu->stat.request_irq_exits;
897 goto out;
898 }
899 if (!need_resched()) {
900 ++vcpu->stat.light_exits;
901 goto again;
902 }
903 }
904
905out:
906 if (r > 0) {
907 kvm_resched(vcpu);
908 goto preempted;
909 }
910
911 post_kvm_run_save(vcpu, kvm_run);
912
913 return r;
914}
915
916
917static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
918{
919 int r;
920 sigset_t sigsaved;
921
922 vcpu_load(vcpu);
923
924 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
925 kvm_vcpu_block(vcpu);
926 vcpu_put(vcpu);
927 return -EAGAIN;
928 }
929
930 if (vcpu->sigset_active)
931 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
932
933 /* re-sync apic's tpr */
934 if (!irqchip_in_kernel(vcpu->kvm))
935 set_cr8(vcpu, kvm_run->cr8);
936
937 if (vcpu->pio.cur_count) {
938 r = complete_pio(vcpu);
939 if (r)
940 goto out;
941 }
942#if CONFIG_HAS_IOMEM
943 if (vcpu->mmio_needed) {
944 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
945 vcpu->mmio_read_completed = 1;
946 vcpu->mmio_needed = 0;
947 r = emulate_instruction(vcpu, kvm_run,
948 vcpu->mmio_fault_cr2, 0, 1);
949 if (r == EMULATE_DO_MMIO) {
950 /*
951 * Read-modify-write. Back to userspace.
952 */
953 r = 0;
954 goto out;
955 }
956 }
957#endif
958 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
959 kvm_x86_ops->cache_regs(vcpu);
960 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
961 kvm_x86_ops->decache_regs(vcpu);
962 }
963
964 r = __vcpu_run(vcpu, kvm_run);
965
966out:
967 if (vcpu->sigset_active)
968 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
969
970 vcpu_put(vcpu);
971 return r;
972}
973
974static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
975 struct kvm_regs *regs)
976{
977 vcpu_load(vcpu);
978
979 kvm_x86_ops->cache_regs(vcpu);
980
981 regs->rax = vcpu->regs[VCPU_REGS_RAX];
982 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
983 regs->rcx = vcpu->regs[VCPU_REGS_RCX];
984 regs->rdx = vcpu->regs[VCPU_REGS_RDX];
985 regs->rsi = vcpu->regs[VCPU_REGS_RSI];
986 regs->rdi = vcpu->regs[VCPU_REGS_RDI];
987 regs->rsp = vcpu->regs[VCPU_REGS_RSP];
988 regs->rbp = vcpu->regs[VCPU_REGS_RBP];
989#ifdef CONFIG_X86_64
990 regs->r8 = vcpu->regs[VCPU_REGS_R8];
991 regs->r9 = vcpu->regs[VCPU_REGS_R9];
992 regs->r10 = vcpu->regs[VCPU_REGS_R10];
993 regs->r11 = vcpu->regs[VCPU_REGS_R11];
994 regs->r12 = vcpu->regs[VCPU_REGS_R12];
995 regs->r13 = vcpu->regs[VCPU_REGS_R13];
996 regs->r14 = vcpu->regs[VCPU_REGS_R14];
997 regs->r15 = vcpu->regs[VCPU_REGS_R15];
998#endif
999
1000 regs->rip = vcpu->rip;
1001 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
1002
1003 /*
1004 * Don't leak debug flags in case they were set for guest debugging
1005 */
1006 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
1007 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1008
1009 vcpu_put(vcpu);
1010
1011 return 0;
1012}
1013
1014static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1015 struct kvm_regs *regs)
1016{
1017 vcpu_load(vcpu);
1018
1019 vcpu->regs[VCPU_REGS_RAX] = regs->rax;
1020 vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
1021 vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
1022 vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
1023 vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
1024 vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
1025 vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
1026 vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
1027#ifdef CONFIG_X86_64
1028 vcpu->regs[VCPU_REGS_R8] = regs->r8;
1029 vcpu->regs[VCPU_REGS_R9] = regs->r9;
1030 vcpu->regs[VCPU_REGS_R10] = regs->r10;
1031 vcpu->regs[VCPU_REGS_R11] = regs->r11;
1032 vcpu->regs[VCPU_REGS_R12] = regs->r12;
1033 vcpu->regs[VCPU_REGS_R13] = regs->r13;
1034 vcpu->regs[VCPU_REGS_R14] = regs->r14;
1035 vcpu->regs[VCPU_REGS_R15] = regs->r15;
1036#endif
1037
1038 vcpu->rip = regs->rip;
1039 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
1040
1041 kvm_x86_ops->decache_regs(vcpu);
1042
1043 vcpu_put(vcpu);
1044
1045 return 0;
1046}
1047
1048static void get_segment(struct kvm_vcpu *vcpu,
1049 struct kvm_segment *var, int seg)
1050{
1051 return kvm_x86_ops->get_segment(vcpu, var, seg);
1052}
1053
1054static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1055 struct kvm_sregs *sregs)
1056{
1057 struct descriptor_table dt;
1058 int pending_vec;
1059
1060 vcpu_load(vcpu);
1061
1062 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1063 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
1064 get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
1065 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
1066 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
1067 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
1068
1069 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
1070 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
1071
1072 kvm_x86_ops->get_idt(vcpu, &dt);
1073 sregs->idt.limit = dt.limit;
1074 sregs->idt.base = dt.base;
1075 kvm_x86_ops->get_gdt(vcpu, &dt);
1076 sregs->gdt.limit = dt.limit;
1077 sregs->gdt.base = dt.base;
1078
1079 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
1080 sregs->cr0 = vcpu->cr0;
1081 sregs->cr2 = vcpu->cr2;
1082 sregs->cr3 = vcpu->cr3;
1083 sregs->cr4 = vcpu->cr4;
1084 sregs->cr8 = get_cr8(vcpu);
1085 sregs->efer = vcpu->shadow_efer;
1086 sregs->apic_base = kvm_get_apic_base(vcpu);
1087
1088 if (irqchip_in_kernel(vcpu->kvm)) {
1089 memset(sregs->interrupt_bitmap, 0,
1090 sizeof sregs->interrupt_bitmap);
1091 pending_vec = kvm_x86_ops->get_irq(vcpu);
1092 if (pending_vec >= 0)
1093 set_bit(pending_vec,
1094 (unsigned long *)sregs->interrupt_bitmap);
1095 } else
1096 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
1097 sizeof sregs->interrupt_bitmap);
1098
1099 vcpu_put(vcpu);
1100
1101 return 0;
1102}
1103
1104static void set_segment(struct kvm_vcpu *vcpu,
1105 struct kvm_segment *var, int seg)
1106{
1107 return kvm_x86_ops->set_segment(vcpu, var, seg);
1108}
1109
1110static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1111 struct kvm_sregs *sregs)
1112{
1113 int mmu_reset_needed = 0;
1114 int i, pending_vec, max_bits;
1115 struct descriptor_table dt;
1116
1117 vcpu_load(vcpu);
1118
1119 dt.limit = sregs->idt.limit;
1120 dt.base = sregs->idt.base;
1121 kvm_x86_ops->set_idt(vcpu, &dt);
1122 dt.limit = sregs->gdt.limit;
1123 dt.base = sregs->gdt.base;
1124 kvm_x86_ops->set_gdt(vcpu, &dt);
1125
1126 vcpu->cr2 = sregs->cr2;
1127 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
1128 vcpu->cr3 = sregs->cr3;
1129
1130 set_cr8(vcpu, sregs->cr8);
1131
1132 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
1133#ifdef CONFIG_X86_64
1134 kvm_x86_ops->set_efer(vcpu, sregs->efer);
1135#endif
1136 kvm_set_apic_base(vcpu, sregs->apic_base);
1137
1138 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
1139
1140 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
1141 vcpu->cr0 = sregs->cr0;
1142 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
1143
1144 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
1145 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
1146 if (!is_long_mode(vcpu) && is_pae(vcpu))
1147 load_pdptrs(vcpu, vcpu->cr3);
1148
1149 if (mmu_reset_needed)
1150 kvm_mmu_reset_context(vcpu);
1151
1152 if (!irqchip_in_kernel(vcpu->kvm)) {
1153 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
1154 sizeof vcpu->irq_pending);
1155 vcpu->irq_summary = 0;
1156 for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
1157 if (vcpu->irq_pending[i])
1158 __set_bit(i, &vcpu->irq_summary);
1159 } else {
1160 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
1161 pending_vec = find_first_bit(
1162 (const unsigned long *)sregs->interrupt_bitmap,
1163 max_bits);
1164 /* Only pending external irq is handled here */
1165 if (pending_vec < max_bits) {
1166 kvm_x86_ops->set_irq(vcpu, pending_vec);
1167 pr_debug("Set back pending irq %d\n",
1168 pending_vec);
1169 }
1170 }
1171
1172 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1173 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
1174 set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
1175 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
1176 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
1177 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
1178
1179 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
1180 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
1181
1182 vcpu_put(vcpu);
1183
1184 return 0;
1185}
1186
1187void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
1188{
1189 struct kvm_segment cs;
1190
1191 get_segment(vcpu, &cs, VCPU_SREG_CS);
1192 *db = cs.db;
1193 *l = cs.l;
1194}
1195EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
1196
1197/*
1198 * Translate a guest virtual address to a guest physical address. 782 * Translate a guest virtual address to a guest physical address.
1199 */ 783 */
1200static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 784static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -1233,20 +817,6 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1233 return 0; 817 return 0;
1234} 818}
1235 819
1236static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
1237 struct kvm_debug_guest *dbg)
1238{
1239 int r;
1240
1241 vcpu_load(vcpu);
1242
1243 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
1244
1245 vcpu_put(vcpu);
1246
1247 return r;
1248}
1249
1250static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, 820static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
1251 unsigned long address, 821 unsigned long address,
1252 int *type) 822 int *type)
@@ -1392,13 +962,13 @@ static long kvm_vcpu_ioctl(struct file *filp,
1392 r = -EINVAL; 962 r = -EINVAL;
1393 if (arg) 963 if (arg)
1394 goto out; 964 goto out;
1395 r = kvm_vcpu_ioctl_run(vcpu, vcpu->run); 965 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1396 break; 966 break;
1397 case KVM_GET_REGS: { 967 case KVM_GET_REGS: {
1398 struct kvm_regs kvm_regs; 968 struct kvm_regs kvm_regs;
1399 969
1400 memset(&kvm_regs, 0, sizeof kvm_regs); 970 memset(&kvm_regs, 0, sizeof kvm_regs);
1401 r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs); 971 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
1402 if (r) 972 if (r)
1403 goto out; 973 goto out;
1404 r = -EFAULT; 974 r = -EFAULT;
@@ -1413,7 +983,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
1413 r = -EFAULT; 983 r = -EFAULT;
1414 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) 984 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
1415 goto out; 985 goto out;
1416 r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs); 986 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
1417 if (r) 987 if (r)
1418 goto out; 988 goto out;
1419 r = 0; 989 r = 0;
@@ -1423,7 +993,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
1423 struct kvm_sregs kvm_sregs; 993 struct kvm_sregs kvm_sregs;
1424 994
1425 memset(&kvm_sregs, 0, sizeof kvm_sregs); 995 memset(&kvm_sregs, 0, sizeof kvm_sregs);
1426 r = kvm_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs); 996 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
1427 if (r) 997 if (r)
1428 goto out; 998 goto out;
1429 r = -EFAULT; 999 r = -EFAULT;
@@ -1438,7 +1008,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
1438 r = -EFAULT; 1008 r = -EFAULT;
1439 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) 1009 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
1440 goto out; 1010 goto out;
1441 r = kvm_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs); 1011 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
1442 if (r) 1012 if (r)
1443 goto out; 1013 goto out;
1444 r = 0; 1014 r = 0;
@@ -1477,7 +1047,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
1477 r = -EFAULT; 1047 r = -EFAULT;
1478 if (copy_from_user(&dbg, argp, sizeof dbg)) 1048 if (copy_from_user(&dbg, argp, sizeof dbg))
1479 goto out; 1049 goto out;
1480 r = kvm_vcpu_ioctl_debug_guest(vcpu, &dbg); 1050 r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
1481 if (r) 1051 if (r)
1482 goto out; 1052 goto out;
1483 r = 0; 1053 r = 0;
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index ef1661f10b48..394da6605364 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -1787,6 +1787,433 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1787EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 1787EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
1788 1788
1789/* 1789/*
1790 * Check if userspace requested an interrupt window, and that the
1791 * interrupt window is open.
1792 *
1793 * No need to exit to userspace if we already have an interrupt queued.
1794 */
1795static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1796 struct kvm_run *kvm_run)
1797{
1798 return (!vcpu->irq_summary &&
1799 kvm_run->request_interrupt_window &&
1800 vcpu->interrupt_window_open &&
1801 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
1802}
1803
1804static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1805 struct kvm_run *kvm_run)
1806{
1807 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
1808 kvm_run->cr8 = get_cr8(vcpu);
1809 kvm_run->apic_base = kvm_get_apic_base(vcpu);
1810 if (irqchip_in_kernel(vcpu->kvm))
1811 kvm_run->ready_for_interrupt_injection = 1;
1812 else
1813 kvm_run->ready_for_interrupt_injection =
1814 (vcpu->interrupt_window_open &&
1815 vcpu->irq_summary == 0);
1816}
1817
1818static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1819{
1820 int r;
1821
1822 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
1823 pr_debug("vcpu %d received sipi with vector # %x\n",
1824 vcpu->vcpu_id, vcpu->sipi_vector);
1825 kvm_lapic_reset(vcpu);
1826 r = kvm_x86_ops->vcpu_reset(vcpu);
1827 if (r)
1828 return r;
1829 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
1830 }
1831
1832preempted:
1833 if (vcpu->guest_debug.enabled)
1834 kvm_x86_ops->guest_debug_pre(vcpu);
1835
1836again:
1837 r = kvm_mmu_reload(vcpu);
1838 if (unlikely(r))
1839 goto out;
1840
1841 kvm_inject_pending_timer_irqs(vcpu);
1842
1843 preempt_disable();
1844
1845 kvm_x86_ops->prepare_guest_switch(vcpu);
1846 kvm_load_guest_fpu(vcpu);
1847
1848 local_irq_disable();
1849
1850 if (signal_pending(current)) {
1851 local_irq_enable();
1852 preempt_enable();
1853 r = -EINTR;
1854 kvm_run->exit_reason = KVM_EXIT_INTR;
1855 ++vcpu->stat.signal_exits;
1856 goto out;
1857 }
1858
1859 if (irqchip_in_kernel(vcpu->kvm))
1860 kvm_x86_ops->inject_pending_irq(vcpu);
1861 else if (!vcpu->mmio_read_completed)
1862 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
1863
1864 vcpu->guest_mode = 1;
1865 kvm_guest_enter();
1866
1867 if (vcpu->requests)
1868 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
1869 kvm_x86_ops->tlb_flush(vcpu);
1870
1871 kvm_x86_ops->run(vcpu, kvm_run);
1872
1873 vcpu->guest_mode = 0;
1874 local_irq_enable();
1875
1876 ++vcpu->stat.exits;
1877
1878 /*
1879 * We must have an instruction between local_irq_enable() and
1880 * kvm_guest_exit(), so the timer interrupt isn't delayed by
1881 * the interrupt shadow. The stat.exits increment will do nicely.
1882 * But we need to prevent reordering, hence this barrier():
1883 */
1884 barrier();
1885
1886 kvm_guest_exit();
1887
1888 preempt_enable();
1889
1890 /*
1891 * Profile KVM exit RIPs:
1892 */
1893 if (unlikely(prof_on == KVM_PROFILING)) {
1894 kvm_x86_ops->cache_regs(vcpu);
1895 profile_hit(KVM_PROFILING, (void *)vcpu->rip);
1896 }
1897
1898 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
1899
1900 if (r > 0) {
1901 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
1902 r = -EINTR;
1903 kvm_run->exit_reason = KVM_EXIT_INTR;
1904 ++vcpu->stat.request_irq_exits;
1905 goto out;
1906 }
1907 if (!need_resched()) {
1908 ++vcpu->stat.light_exits;
1909 goto again;
1910 }
1911 }
1912
1913out:
1914 if (r > 0) {
1915 kvm_resched(vcpu);
1916 goto preempted;
1917 }
1918
1919 post_kvm_run_save(vcpu, kvm_run);
1920
1921 return r;
1922}
1923
1924int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1925{
1926 int r;
1927 sigset_t sigsaved;
1928
1929 vcpu_load(vcpu);
1930
1931 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
1932 kvm_vcpu_block(vcpu);
1933 vcpu_put(vcpu);
1934 return -EAGAIN;
1935 }
1936
1937 if (vcpu->sigset_active)
1938 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1939
1940 /* re-sync apic's tpr */
1941 if (!irqchip_in_kernel(vcpu->kvm))
1942 set_cr8(vcpu, kvm_run->cr8);
1943
1944 if (vcpu->pio.cur_count) {
1945 r = complete_pio(vcpu);
1946 if (r)
1947 goto out;
1948 }
1949#if CONFIG_HAS_IOMEM
1950 if (vcpu->mmio_needed) {
1951 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
1952 vcpu->mmio_read_completed = 1;
1953 vcpu->mmio_needed = 0;
1954 r = emulate_instruction(vcpu, kvm_run,
1955 vcpu->mmio_fault_cr2, 0, 1);
1956 if (r == EMULATE_DO_MMIO) {
1957 /*
1958 * Read-modify-write. Back to userspace.
1959 */
1960 r = 0;
1961 goto out;
1962 }
1963 }
1964#endif
1965 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
1966 kvm_x86_ops->cache_regs(vcpu);
1967 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
1968 kvm_x86_ops->decache_regs(vcpu);
1969 }
1970
1971 r = __vcpu_run(vcpu, kvm_run);
1972
1973out:
1974 if (vcpu->sigset_active)
1975 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1976
1977 vcpu_put(vcpu);
1978 return r;
1979}
1980
1981int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1982{
1983 vcpu_load(vcpu);
1984
1985 kvm_x86_ops->cache_regs(vcpu);
1986
1987 regs->rax = vcpu->regs[VCPU_REGS_RAX];
1988 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
1989 regs->rcx = vcpu->regs[VCPU_REGS_RCX];
1990 regs->rdx = vcpu->regs[VCPU_REGS_RDX];
1991 regs->rsi = vcpu->regs[VCPU_REGS_RSI];
1992 regs->rdi = vcpu->regs[VCPU_REGS_RDI];
1993 regs->rsp = vcpu->regs[VCPU_REGS_RSP];
1994 regs->rbp = vcpu->regs[VCPU_REGS_RBP];
1995#ifdef CONFIG_X86_64
1996 regs->r8 = vcpu->regs[VCPU_REGS_R8];
1997 regs->r9 = vcpu->regs[VCPU_REGS_R9];
1998 regs->r10 = vcpu->regs[VCPU_REGS_R10];
1999 regs->r11 = vcpu->regs[VCPU_REGS_R11];
2000 regs->r12 = vcpu->regs[VCPU_REGS_R12];
2001 regs->r13 = vcpu->regs[VCPU_REGS_R13];
2002 regs->r14 = vcpu->regs[VCPU_REGS_R14];
2003 regs->r15 = vcpu->regs[VCPU_REGS_R15];
2004#endif
2005
2006 regs->rip = vcpu->rip;
2007 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
2008
2009 /*
2010 * Don't leak debug flags in case they were set for guest debugging
2011 */
2012 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
2013 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
2014
2015 vcpu_put(vcpu);
2016
2017 return 0;
2018}
2019
2020int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2021{
2022 vcpu_load(vcpu);
2023
2024 vcpu->regs[VCPU_REGS_RAX] = regs->rax;
2025 vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
2026 vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
2027 vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
2028 vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
2029 vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
2030 vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
2031 vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
2032#ifdef CONFIG_X86_64
2033 vcpu->regs[VCPU_REGS_R8] = regs->r8;
2034 vcpu->regs[VCPU_REGS_R9] = regs->r9;
2035 vcpu->regs[VCPU_REGS_R10] = regs->r10;
2036 vcpu->regs[VCPU_REGS_R11] = regs->r11;
2037 vcpu->regs[VCPU_REGS_R12] = regs->r12;
2038 vcpu->regs[VCPU_REGS_R13] = regs->r13;
2039 vcpu->regs[VCPU_REGS_R14] = regs->r14;
2040 vcpu->regs[VCPU_REGS_R15] = regs->r15;
2041#endif
2042
2043 vcpu->rip = regs->rip;
2044 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
2045
2046 kvm_x86_ops->decache_regs(vcpu);
2047
2048 vcpu_put(vcpu);
2049
2050 return 0;
2051}
2052
2053static void get_segment(struct kvm_vcpu *vcpu,
2054 struct kvm_segment *var, int seg)
2055{
2056 return kvm_x86_ops->get_segment(vcpu, var, seg);
2057}
2058
2059void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2060{
2061 struct kvm_segment cs;
2062
2063 get_segment(vcpu, &cs, VCPU_SREG_CS);
2064 *db = cs.db;
2065 *l = cs.l;
2066}
2067EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
2068
2069int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2070 struct kvm_sregs *sregs)
2071{
2072 struct descriptor_table dt;
2073 int pending_vec;
2074
2075 vcpu_load(vcpu);
2076
2077 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2078 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2079 get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2080 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2081 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2082 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2083
2084 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2085 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2086
2087 kvm_x86_ops->get_idt(vcpu, &dt);
2088 sregs->idt.limit = dt.limit;
2089 sregs->idt.base = dt.base;
2090 kvm_x86_ops->get_gdt(vcpu, &dt);
2091 sregs->gdt.limit = dt.limit;
2092 sregs->gdt.base = dt.base;
2093
2094 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2095 sregs->cr0 = vcpu->cr0;
2096 sregs->cr2 = vcpu->cr2;
2097 sregs->cr3 = vcpu->cr3;
2098 sregs->cr4 = vcpu->cr4;
2099 sregs->cr8 = get_cr8(vcpu);
2100 sregs->efer = vcpu->shadow_efer;
2101 sregs->apic_base = kvm_get_apic_base(vcpu);
2102
2103 if (irqchip_in_kernel(vcpu->kvm)) {
2104 memset(sregs->interrupt_bitmap, 0,
2105 sizeof sregs->interrupt_bitmap);
2106 pending_vec = kvm_x86_ops->get_irq(vcpu);
2107 if (pending_vec >= 0)
2108 set_bit(pending_vec,
2109 (unsigned long *)sregs->interrupt_bitmap);
2110 } else
2111 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
2112 sizeof sregs->interrupt_bitmap);
2113
2114 vcpu_put(vcpu);
2115
2116 return 0;
2117}
2118
2119static void set_segment(struct kvm_vcpu *vcpu,
2120 struct kvm_segment *var, int seg)
2121{
2122 return kvm_x86_ops->set_segment(vcpu, var, seg);
2123}
2124
2125int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2126 struct kvm_sregs *sregs)
2127{
2128 int mmu_reset_needed = 0;
2129 int i, pending_vec, max_bits;
2130 struct descriptor_table dt;
2131
2132 vcpu_load(vcpu);
2133
2134 dt.limit = sregs->idt.limit;
2135 dt.base = sregs->idt.base;
2136 kvm_x86_ops->set_idt(vcpu, &dt);
2137 dt.limit = sregs->gdt.limit;
2138 dt.base = sregs->gdt.base;
2139 kvm_x86_ops->set_gdt(vcpu, &dt);
2140
2141 vcpu->cr2 = sregs->cr2;
2142 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
2143 vcpu->cr3 = sregs->cr3;
2144
2145 set_cr8(vcpu, sregs->cr8);
2146
2147 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
2148#ifdef CONFIG_X86_64
2149 kvm_x86_ops->set_efer(vcpu, sregs->efer);
2150#endif
2151 kvm_set_apic_base(vcpu, sregs->apic_base);
2152
2153 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2154
2155 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
2156 vcpu->cr0 = sregs->cr0;
2157 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
2158
2159 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
2160 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
2161 if (!is_long_mode(vcpu) && is_pae(vcpu))
2162 load_pdptrs(vcpu, vcpu->cr3);
2163
2164 if (mmu_reset_needed)
2165 kvm_mmu_reset_context(vcpu);
2166
2167 if (!irqchip_in_kernel(vcpu->kvm)) {
2168 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
2169 sizeof vcpu->irq_pending);
2170 vcpu->irq_summary = 0;
2171 for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
2172 if (vcpu->irq_pending[i])
2173 __set_bit(i, &vcpu->irq_summary);
2174 } else {
2175 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
2176 pending_vec = find_first_bit(
2177 (const unsigned long *)sregs->interrupt_bitmap,
2178 max_bits);
2179 /* Only pending external irq is handled here */
2180 if (pending_vec < max_bits) {
2181 kvm_x86_ops->set_irq(vcpu, pending_vec);
2182 pr_debug("Set back pending irq %d\n",
2183 pending_vec);
2184 }
2185 }
2186
2187 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2188 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2189 set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2190 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2191 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2192 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2193
2194 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2195 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2196
2197 vcpu_put(vcpu);
2198
2199 return 0;
2200}
2201
2202int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
2203 struct kvm_debug_guest *dbg)
2204{
2205 int r;
2206
2207 vcpu_load(vcpu);
2208
2209 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
2210
2211 vcpu_put(vcpu);
2212
2213 return r;
2214}
2215
2216/*
1790 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when 2217 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
1791 * we have asm/x86/processor.h 2218 * we have asm/x86/processor.h
1792 */ 2219 */