aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 17:35:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 17:35:31 -0400
commit2e7580b0e75d771d93e24e681031a165b1d31071 (patch)
treed9449702609eeaab28913a43b5a4434667e09d43 /arch/x86/kvm/svm.c
parentd25413efa9536e2f425ea45c7720598035c597bc (diff)
parentcf9eeac46350b8b43730b7dc5e999757bed089a4 (diff)
Merge branch 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Avi Kivity: "Changes include timekeeping improvements, support for assigning host PCI devices that share interrupt lines, s390 user-controlled guests, a large ppc update, and random fixes." This is with the sign-off's fixed, hopefully next merge window we won't have rebased commits. * 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: Convert intx_mask_lock to spin lock KVM: x86: fix kvm_write_tsc() TSC matching thinko x86: kvmclock: abstract save/restore sched_clock_state KVM: nVMX: Fix erroneous exception bitmap check KVM: Ignore the writes to MSR_K7_HWCR(3) KVM: MMU: make use of ->root_level in reset_rsvds_bits_mask KVM: PMU: add proper support for fixed counter 2 KVM: PMU: Fix raw event check KVM: PMU: warn when pin control is set in eventsel msr KVM: VMX: Fix delayed load of shared MSRs KVM: use correct tlbs dirty type in cmpxchg KVM: Allow host IRQ sharing for assigned PCI 2.3 devices KVM: Ensure all vcpus are consistent with in-kernel irqchip settings KVM: x86 emulator: Allow PM/VM86 switch during task switch KVM: SVM: Fix CPL updates KVM: x86 emulator: VM86 segments must have DPL 3 KVM: x86 emulator: Fix task switch privilege checks arch/powerpc/kvm/book3s_hv.c: included linux/sched.h twice KVM: x86 emulator: correctly mask pmc index bits in RDPMC instruction emulation KVM: mmu_notifier: Flush TLBs before releasing mmu_lock ...
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c119
1 files changed, 104 insertions, 15 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e385214711cb..e334389e1c75 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -111,6 +111,12 @@ struct nested_state {
111#define MSRPM_OFFSETS 16 111#define MSRPM_OFFSETS 16
112static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 112static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
113 113
114/*
115 * Set osvw_len to higher value when updated Revision Guides
116 * are published and we know what the new status bits are
117 */
118static uint64_t osvw_len = 4, osvw_status;
119
114struct vcpu_svm { 120struct vcpu_svm {
115 struct kvm_vcpu vcpu; 121 struct kvm_vcpu vcpu;
116 struct vmcb *vmcb; 122 struct vmcb *vmcb;
@@ -177,11 +183,13 @@ static bool npt_enabled = true;
177#else 183#else
178static bool npt_enabled; 184static bool npt_enabled;
179#endif 185#endif
180static int npt = 1;
181 186
187/* allow nested paging (virtualized MMU) for all guests */
188static int npt = true;
182module_param(npt, int, S_IRUGO); 189module_param(npt, int, S_IRUGO);
183 190
184static int nested = 1; 191/* allow nested virtualization in KVM/SVM */
192static int nested = true;
185module_param(nested, int, S_IRUGO); 193module_param(nested, int, S_IRUGO);
186 194
187static void svm_flush_tlb(struct kvm_vcpu *vcpu); 195static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -557,6 +565,27 @@ static void svm_init_erratum_383(void)
557 erratum_383_found = true; 565 erratum_383_found = true;
558} 566}
559 567
568static void svm_init_osvw(struct kvm_vcpu *vcpu)
569{
570 /*
571 * Guests should see errata 400 and 415 as fixed (assuming that
572 * HLT and IO instructions are intercepted).
573 */
574 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
575 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
576
577 /*
578 * By increasing VCPU's osvw.length to 3 we are telling the guest that
579 * all osvw.status bits inside that length, including bit 0 (which is
580 * reserved for erratum 298), are valid. However, if host processor's
581 * osvw_len is 0 then osvw_status[0] carries no information. We need to
582 * be conservative here and therefore we tell the guest that erratum 298
583 * is present (because we really don't know).
584 */
585 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
586 vcpu->arch.osvw.status |= 1;
587}
588
560static int has_svm(void) 589static int has_svm(void)
561{ 590{
562 const char *msg; 591 const char *msg;
@@ -623,6 +652,36 @@ static int svm_hardware_enable(void *garbage)
623 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; 652 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
624 } 653 }
625 654
655
656 /*
657 * Get OSVW bits.
658 *
659 * Note that it is possible to have a system with mixed processor
660 * revisions and therefore different OSVW bits. If bits are not the same
661 * on different processors then choose the worst case (i.e. if erratum
662 * is present on one processor and not on another then assume that the
663 * erratum is present everywhere).
664 */
665 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
666 uint64_t len, status = 0;
667 int err;
668
669 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
670 if (!err)
671 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
672 &err);
673
674 if (err)
675 osvw_status = osvw_len = 0;
676 else {
677 if (len < osvw_len)
678 osvw_len = len;
679 osvw_status |= status;
680 osvw_status &= (1ULL << osvw_len) - 1;
681 }
682 } else
683 osvw_status = osvw_len = 0;
684
626 svm_init_erratum_383(); 685 svm_init_erratum_383();
627 686
628 amd_pmu_enable_virt(); 687 amd_pmu_enable_virt();
@@ -910,20 +969,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
910 return _tsc; 969 return _tsc;
911} 970}
912 971
913static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 972static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
914{ 973{
915 struct vcpu_svm *svm = to_svm(vcpu); 974 struct vcpu_svm *svm = to_svm(vcpu);
916 u64 ratio; 975 u64 ratio;
917 u64 khz; 976 u64 khz;
918 977
919 /* TSC scaling supported? */ 978 /* Guest TSC same frequency as host TSC? */
920 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) 979 if (!scale) {
980 svm->tsc_ratio = TSC_RATIO_DEFAULT;
921 return; 981 return;
982 }
922 983
923 /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ 984 /* TSC scaling supported? */
924 if (user_tsc_khz == 0) { 985 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
925 vcpu->arch.virtual_tsc_khz = 0; 986 if (user_tsc_khz > tsc_khz) {
926 svm->tsc_ratio = TSC_RATIO_DEFAULT; 987 vcpu->arch.tsc_catchup = 1;
988 vcpu->arch.tsc_always_catchup = 1;
989 } else
990 WARN(1, "user requested TSC rate below hardware speed\n");
927 return; 991 return;
928 } 992 }
929 993
@@ -938,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
938 user_tsc_khz); 1002 user_tsc_khz);
939 return; 1003 return;
940 } 1004 }
941 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
942 svm->tsc_ratio = ratio; 1005 svm->tsc_ratio = ratio;
943} 1006}
944 1007
@@ -958,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
958 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1021 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
959} 1022}
960 1023
961static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) 1024static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
962{ 1025{
963 struct vcpu_svm *svm = to_svm(vcpu); 1026 struct vcpu_svm *svm = to_svm(vcpu);
964 1027
1028 WARN_ON(adjustment < 0);
1029 if (host)
1030 adjustment = svm_scale_tsc(vcpu, adjustment);
1031
965 svm->vmcb->control.tsc_offset += adjustment; 1032 svm->vmcb->control.tsc_offset += adjustment;
966 if (is_guest_mode(vcpu)) 1033 if (is_guest_mode(vcpu))
967 svm->nested.hsave->control.tsc_offset += adjustment; 1034 svm->nested.hsave->control.tsc_offset += adjustment;
@@ -1191,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1191 if (kvm_vcpu_is_bsp(&svm->vcpu)) 1258 if (kvm_vcpu_is_bsp(&svm->vcpu))
1192 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1259 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1193 1260
1261 svm_init_osvw(&svm->vcpu);
1262
1194 return &svm->vcpu; 1263 return &svm->vcpu;
1195 1264
1196free_page4: 1265free_page4:
@@ -1268,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1268 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1337 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1269} 1338}
1270 1339
1340static void svm_update_cpl(struct kvm_vcpu *vcpu)
1341{
1342 struct vcpu_svm *svm = to_svm(vcpu);
1343 int cpl;
1344
1345 if (!is_protmode(vcpu))
1346 cpl = 0;
1347 else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
1348 cpl = 3;
1349 else
1350 cpl = svm->vmcb->save.cs.selector & 0x3;
1351
1352 svm->vmcb->save.cpl = cpl;
1353}
1354
1271static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1355static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1272{ 1356{
1273 return to_svm(vcpu)->vmcb->save.rflags; 1357 return to_svm(vcpu)->vmcb->save.rflags;
@@ -1275,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1275 1359
1276static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1360static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1277{ 1361{
1362 unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
1363
1278 to_svm(vcpu)->vmcb->save.rflags = rflags; 1364 to_svm(vcpu)->vmcb->save.rflags = rflags;
1365 if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
1366 svm_update_cpl(vcpu);
1279} 1367}
1280 1368
1281static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1369static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
@@ -1543,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
1543 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 1631 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1544 } 1632 }
1545 if (seg == VCPU_SREG_CS) 1633 if (seg == VCPU_SREG_CS)
1546 svm->vmcb->save.cpl 1634 svm_update_cpl(vcpu);
1547 = (svm->vmcb->save.cs.attrib
1548 >> SVM_SELECTOR_DPL_SHIFT) & 3;
1549 1635
1550 mark_dirty(svm->vmcb, VMCB_SEG); 1636 mark_dirty(svm->vmcb, VMCB_SEG);
1551} 1637}
@@ -2735,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm)
2735 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2821 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2736 skip_emulated_instruction(&svm->vcpu); 2822 skip_emulated_instruction(&svm->vcpu);
2737 2823
2738 if (kvm_task_switch(&svm->vcpu, tss_selector, reason, 2824 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2825 int_vec = -1;
2826
2827 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
2739 has_error_code, error_code) == EMULATE_FAIL) { 2828 has_error_code, error_code) == EMULATE_FAIL) {
2740 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2829 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2741 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2830 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;